³ò
4ÒÇIc           @   s`   d  d k  Z  d  d k l Z d  d k Td  d k Td e f d „  ƒ  YZ d e f d „  ƒ  YZ d S(   iÿÿÿÿN(   t	   str2tuple(   t   *t   SwitchboardUtterancec           B   s    e  Z d  Z d „  Z d „  Z RS(   sC  
    A specialized list object used to encode switchboard utterances.
    The elements of the list are the words in the utterance; and two
    attributes, C{speaker} and C{id}, are provided to retrieve the
    spearker identifier and utterance id.  Note that utterance ids
    are only unique within a given discourse.
    c         C   s,   t  i |  | ƒ | |  _ t | ƒ |  _ d  S(   N(   t   listt   __init__t   speakert   intt   id(   t   selft   wordsR   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR      s    	c         C   st   t  |  ƒ d j o
 d } nA t |  d t ƒ o d i d „  |  Dƒ ƒ } n d i |  ƒ } d |  i |  i | f S(   Ni    t    t    c         s   s   x |  ] } d  | Vq Wd S(   s   %s/%sN(    (   t   .0t   w(    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pys	   <genexpr>   s    s   <%s.%s: %r>(   t   lent
   isinstancet   tuplet   joinR   R   (   R   t   text(    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyt   __repr__   s    
(   t   __name__t
   __module__t   __doc__R   R   (    (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR      s   	t   SwitchboardCorpusReaderc           B   s¤   e  Z d  g Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z e i d ƒ Z d Z d „  Z RS(   t   taggedc         C   s   t  i |  | |  i ƒ d  S(   N(   t   CorpusReaderR   t   _FILES(   R   t   root(    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR   )   s    c         C   s   t  |  i d ƒ |  i ƒ S(   NR   (   t   StreamBackedCorpusViewt   abspatht   _words_block_reader(   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR	   ,   s    c         C   s   t  |  i d ƒ |  i ƒ S(   NR   (   R   R   t   _tagged_words_block_reader(   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyt   tagged_words0   s    c         C   s   t  |  i d ƒ |  i ƒ S(   NR   (   R   R   t   _utterances_block_reader(   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyt
   utterances4   s    c         C   s   t  |  i d ƒ |  i ƒ S(   NR   (   R   R   t   _tagged_utterances_block_reader(   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyt   tagged_utterances8   s    c         C   s   t  |  i d ƒ |  i ƒ S(   NR   (   R   R   t   _discourses_block_reader(   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyt
   discourses<   s    c         C   s   t  |  i d ƒ |  i ƒ S(   NR   (   R   R   t   _tagged_discourses_block_reader(   R   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyt   tagged_discourses@   s    c         C   s^   g  } t  | ƒ D]C } | i d ƒ D]- } | i ƒ  o | |  i | d t ƒq$ q$ q ~ g S(   Ns   
t   include_tag(   t   read_blankline_blockt   splitt   stript   _parse_utterancet   False(   R   t   streamt   _[1]t   bt   u(    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR%   D   s    c         C   s^   g  } t  | ƒ D]C } | i d ƒ D]- } | i ƒ  o | |  i | d t ƒq$ q$ q ~ g S(   Ns   
R)   (   R*   R+   R,   R-   t   True(   R   R/   R0   R1   R2   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR'   J   s    c         C   s   |  i  | ƒ d S(   Ni    (   R%   (   R   R/   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR!   P   s    c         C   s   |  i  | ƒ d S(   Ni    (   R'   (   R   R/   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR#   S   s    c         C   s   t  |  i | ƒ d g  ƒ S(   Ni    (   t   sumR%   (   R   R/   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR   V   s    c         C   s   t  |  i | ƒ d g  ƒ S(   Ni    (   R4   R'   (   R   R/   (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR   Y   s    s   (\w+)\.(\d+)\:\s+(.*)t   /c         C   s½   |  i  i | ƒ } | d  j o t d | ƒ ‚ n | i ƒ  \ } } } g  } | i ƒ  D] } | t | |  i ƒ qY ~ }	 | p+ g  }
 |	 D] \ } } |
 | q ~
 }	 n t |	 | | ƒ S(   Ns   Bad utterance %r(	   t   _UTTERANCE_REt   matcht   Nonet
   ValueErrort   groupsR+   R    t   _SEPR   (   R   t	   utteranceR)   t   mR   R   R   R0   t   sR	   t   _[2]R   t   t(    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR-   ^   s    3+(   R   R   R   R   R	   R    R"   R$   R&   R(   R%   R'   R!   R#   R   R   t   ret   compileR6   R;   R-   (    (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pyR   $   s"   														(	   RA   t   nltk.tagR    t   utilt   apiR   R   R   R   (    (    (    s4   /p/zhu/06/nlp/nltk/nltk/corpus/reader/switchboard.pys   <module>   s
   

