
B_Kc           @   si   d  Z  d d k Z d d k l Z d d k l Z d d k Td d k Td e f d     YZ	 d   Z
 d S(	   s  
The Carnegie Mellon Pronouncing Dictionary [cmudict.0.6]
ftp://ftp.cs.cmu.edu/project/speech/dict/
Copyright 1998 Carnegie Mellon University

File Format: Each line consists of an uppercased word, a counter
(for alternative pronunciations), and a transcription.  Vowels are
marked for stress (1=primary, 2=secondary, 0=no stress).  E.g.:
NATURAL 1 N AE1 CH ER0 AH0 L

The dictionary contains 127069 entries.  Of these, 119400 words are assigned
a unique pronunciation, 6830 words have two pronunciations, and 839 words have
three or more pronunciations.  Many of these are fast-speech variants.

Phonemes: There are 39 phonemes, as shown below:
    
Phoneme Example Translation    Phoneme Example Translation
------- ------- -----------    ------- ------- -----------
AA      odd     AA D           AE      at      AE T
AH      hut     HH AH T        AO      ought   AO T
AW      cow     K AW           AY      hide    HH AY D
B       be      B IY           CH      cheese  CH IY Z
D       dee     D IY           DH      thee    DH IY
EH      Ed      EH D           ER      hurt    HH ER T
EY      ate     EY T           F       fee     F IY
G       green   G R IY N       HH      he      HH IY
IH      it      IH T           IY      eat     IY T
JH      gee     JH IY          K       key     K IY
L       lee     L IY           M       me      M IY
N       knee    N IY           NG      ping    P IH NG
OW      oat     OW T           OY      toy     T OY
P       pee     P IY           R       read    R IY D
S       sea     S IY           SH      she     SH IY
T       tea     T IY           TH      theta   TH EY T AH
UH      hood    HH UH D        UW      two     T UW
V       vee     V IY           W       we      W IY
Y       yield   Y IY L D       Z       zee     Z IY
ZH      seizure S IY ZH ER
iN(   t
   deprecated(   t   Index(   t   *t   CMUDictCorpusReaderc           B   sw   e  Z d    Z d   Z d   Z d   Z e d  d d d   Z e d  d d	   Z e d
  d d   Z	 RS(   c      	   C   sF   t  g  } |  i t t  D]" \ } } | t | t d | q ~  S(   su   
        @return: the cmudict lexicon as a list of entries
        containing (word, transcriptions) tuples.
        t   encoding(   t   concatt   abspathst   Nonet   Truet   StreamBackedCorpusViewt   read_cmudict_block(   t   selft   _[1]t   fileidt   enc(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyt   entries9   s    
c         C   sk   | t  j o |  i } n t | t  o | g } n t g  } | D] } | |  i |  i   qE ~  S(   s?   
        @return: the cmudict lexicon as a raw string.
        (   R   t   _fileidst
   isinstancet
   basestringR   t   opent   read(   R   t   fileidsR   t   f(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyt   rawB   s    c         C   s1   g  } |  i    D] \ } } | | i   q ~ S(   sN   
        @return: a list of all words defined in the cmudict lexicon.
        (   R   t   lower(   R   R   t   wordt   _(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyt   wordsJ   s    c         C   s   t  t |  i     S(   s   
        @return: the cmudict lexicon as a dictionary, whose keys are
        lowercase words and whose values are lists of pronunciations.
        (   t   dictR   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyR   P   s    s,   Use .entries() or .transcriptions() instead.t   cmudictt   listedc         C   sP   | d j o |  i  |  Sn | d j o |  i |  Sn t d |   d  S(   NR   t
   dictionarys   bad format %r(   R   R   t
   ValueError(   R   t   itemst   format(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyR   X   s    s   Use .dict() instead.c         C   s   |  i  |  S(   N(   R   (   R   R!   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyR   ]   s    s   Use .entries() instead.c         C   s   |  i  |  S(   N(   R   (   R   R!   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyR   _   s    (
   t   __name__t
   __module__R   R   R   R   R    R   R   R   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyR   8   s   								c         C   ss   g  } xf t  |  d j  oR |  i   } | d j o | Sn | i   } | i | d i   | d f  q	 W| S(   Nid   t    i    i   (   t   lent   readlinet   splitt   appendR   (   t   streamR   t   linet   pieces(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pyR
   c   s    &(   t   __doc__t   codecst   nltk.internalsR    t	   nltk.utilR   t   utilt   apit   CorpusReaderR   R
   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/cmudict.pys   <module>.   s   

+