³ò
B_Kc        	   @   sê   d  d k  Z  d  d k Z d  d k l Z d  d k l Z d  d k Td  d k Td e f d „  ƒ  YZ	 d e
 f d „  ƒ  YZ d	 e
 f d
 „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e
 f d „  ƒ  YZ d S(   iÿÿÿÿN(   t   Tree(   t   ElementTree(   t   *t   PropbankCorpusReaderc           B   s_   e  Z d  Z d e e e e d „ Z e d „ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 RS(	   s—  
    Corpus reader for the propbank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every verb instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of X{frameset
    files} which define the argument labels used by the annotations,
    on a per-verb basis.  Each X{frameset file} contains one or more
    predicates, such as C{'turn'} or C{'turn_on'}, each of which is
    divided into coarse-grained word senses called X{rolesets}.  For
    each X{roleset}, the frameset file provides descriptions of the
    argument roles, along with examples.
    t    c         C   s€   t  | t ƒ o t | | ƒ } n t | ƒ } t i |  | | | g | | ƒ | |  _ | |  _ | |  _ | |  _	 | |  _
 d S(   sÛ  
        @param root: The root directory for this corpus.
        @param propfile: The name of the file containing the predicate-
            argument annotations (relative to C{root}).
        @param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        @param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        @param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by propbank.
        N(   t
   isinstancet
   basestringt   find_corpus_fileidst   listt   CorpusReadert   __init__t	   _propfilet   _framefilest
   _verbsfilet   _parse_fileid_xformt   _parse_corpus(   t   selft   roott   propfilet
   framefilest	   verbsfilet   parse_fileid_xformt   parse_corpust   encoding(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
      s    				c         C   sk   | t  j o |  i } n t | t ƒ o | g } n t g  } | D] } | |  i | ƒ i ƒ  qE ~ ƒ S(   sV   
        @return: the text contents of the given fileids, as a single string.
        (   t   Nonet   _fileidsR   R   t   concatt   opent   read(   R   t   fileidst   _[1]t   f(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt   raw>   s    c         C   s.   t  |  i |  i ƒ |  i d |  i |  i ƒ ƒS(   s…   
        @return: a corpus view that acts as a list of
        L{PropbankInstance} objects, one for each verb in the corpus.
        R   (   t   StreamBackedCorpusViewt   abspathR   t   _read_instance_blockR   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt	   instancesF   s    	c         C   s+   t  |  i |  i ƒ t d |  i |  i ƒ ƒS(   s   
        @return: a corpus view that acts as a list of strings, one for
        each line in the predicate-argument annotation file.  
        R   (   R!   R"   R   t   read_line_blockR   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt   linesO   s    c         C   sµ   | i  d ƒ d } d | } | |  i j o t d | ƒ ‚ n t i |  i | ƒ i ƒ  ƒ i ƒ  } xI | i d ƒ D]" } | i	 d | j o | Squ qu Wt d | | f ƒ ‚ d S(	   sE   
        @return: the xml description for the given roleset.
        t   .i    s   frames/%s.xmls   Frameset file for %s not founds   predicate/rolesett   ids   Roleset %s not found in %sN(
   t   splitR   t
   ValueErrorR   t   parseR"   R   t   getroott   findallt   attrib(   R   t
   roleset_idt   lemmat	   framefilet   etreet   roleset(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR3   X   s    
$ c         C   s+   t  |  i |  i ƒ t d |  i |  i ƒ ƒS(   s   
        @return: a corpus view that acts as a list of all verb lemmas
        in this corpus (from the verbs.txt file).
        R   (   R!   R"   R   R%   R   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt   verbsl   s    c         C   s`   g  } xS t  d ƒ D]E } | i ƒ  i ƒ  } | o& | i t i | |  i |  i ƒ ƒ q q W| S(   Nid   (   t   ranget   readlinet   stript   appendt   PropbankInstanceR+   R   R   (   R   t   streamt   blockt   it   line(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR#   u   s     	(   t   __name__t
   __module__t   __doc__R   R
   R    R$   R&   R3   R4   R#   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR      s   							R9   c           B   sV   e  Z e d  „ Z d „  Z d „  Z d „  Z e e d d ƒZ e	 e e d „ ƒ Z
 RS(   c
   
      C   s[   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ t | ƒ |  _ |	 |  _	 d  S(   N(
   t   fileidt   sentnumt   wordnumt   taggerR3   t
   inflectiont	   predicatet   tuplet	   argumentsR   (
   R   RA   RB   RC   RD   R3   RE   RF   RH   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
   ˆ   s    								c         C   s   d |  i  |  i |  i f S(   Ns(   <PropbankInstance: %s, sent %s, word %s>(   RA   RB   RC   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt   __repr__´   s    c         C   s|   d |  i  |  i |  i |  i |  i |  i f } |  i |  i d f f } x. t | ƒ D]  \ } } | d | | f 7} qT W| S(   Ns   %s %s %s %s %s %st   rels    %s-%s(	   RA   RB   RC   RD   R3   RE   RH   RF   t   sorted(   R   t   st   itemst   argloct   argid(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt   __str__¸   s     c         C   sS   |  i  d  j o d  Sn |  i |  i  i ƒ  j o d  Sn |  i  i |  i ƒ |  i S(   N(   R   R   RA   R   t   parsed_sentsRB   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt	   _get_treeÀ   s    !t   docsv   
        The parse tree corresponding to this instance, or C{None} if
        the corresponding tree is not available.c         C   s°  |  i  ƒ  } t | ƒ d j  o t d |  ƒ ‚ n | d  \ } } } } } }	 g  }
 | d D]! } | i d ƒ o |
 | q^ q^ ~
 } g  } | d D]! } | i d ƒ p | | q— q— ~ } t | ƒ d j o t d |  ƒ ‚ n | d  j	 o | | ƒ } n t | ƒ } t | ƒ } t i |	 ƒ }	 t i | d d  ƒ } g  } xB | D]: } | i  d d ƒ \ } } | i	 t i | ƒ | f ƒ qPWt
 | | | | | |	 | | | ƒ	 S(	   Ni   s!   Badly formatted propbank line: %ri   s   -reli   i    iüÿÿÿt   -(   R)   t   lenR*   t   endswithR   t   intt   PropbankInflectionR+   t   PropbankTreePointerR8   R9   (   RL   R   R   t   piecesRA   RB   RC   RD   R3   RE   R   t   pRJ   t   _[2]t   argsRF   RH   t   argRN   RO   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR+   È   s,    99  	(   R>   R?   R   R
   RI   RP   RR   t   propertyt   treet   staticmethodR+   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR9   †   s   +					t   PropbankPointerc           B   s   e  Z d  Z d „  Z RS(   sn  
    A pointer used by propbank to identify one or more constituents in
    a parse tree.  C{PropbankPointer} is an abstract base class with
    three concrete subclasses:

      - L{PropbankTreePointer} is used to point to single constituents.
      - L{PropbankSplitTreePointer} is used to point to 'split'
        constituents, which consist of a sequence of two or more
        C{PropbankTreePointer}s.
      - L{PropbankChainTreePointer} is used to point to entire trace
        chains in a tree.  It consists of a sequence of pieces, which
        can be C{PropbankTreePointer}s or C{PropbankSplitTreePointer}s.
    c         C   s$   |  i  t j o t d ƒ ‚ n d  S(   Ns)   PropbankPointer is an abstract base class(   t	   __class__t   PropbankPoitnert   AssertionError(   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
   ý   s    (   R>   R?   R@   R
   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRb   ï   s   t   PropbankChainTreePointerc           B   s,   e  Z d  „  Z d „  Z d „  Z d „  Z RS(   c         C   s   | |  _  d  S(   N(   RZ   (   R   RZ   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
     s    	c         C   s   d i  d „  |  i Dƒ ƒ S(   NR   c         s   s   x |  ] } d  | Vq Wd S(   s   %sN(    (   t   .0R[   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pys	   <genexpr>	  s    (   t   joinRZ   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRP     s    c         C   s   d |  S(   Ns   <PropbankChainTreePointer: %s>(    (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRI   
  s    c         C   sQ   | d  j o t d ƒ ‚ n t d g  } |  i D] } | | i | ƒ q1 ~ ƒ S(   Ns   Parse tree not avaialables   *CHAIN*(   R   R*   R    RZ   t   select(   R   R`   R   R[   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRi     s    (   R>   R?   R
   RP   RI   Ri   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRf     s   			t   PropbankSplitTreePointerc           B   s,   e  Z d  „  Z d „  Z d „  Z d „  Z RS(   c         C   s   | |  _  d  S(   N(   RZ   (   R   RZ   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
     s    	c         C   s   d i  d „  |  i Dƒ ƒ S(   Nt   ,c         s   s   x |  ] } d  | Vq Wd S(   s   %sN(    (   Rg   R[   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pys	   <genexpr>  s    (   Rh   RZ   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRP     s    c         C   s   d |  S(   Ns   <PropbankSplitTreePointer: %s>(    (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRI     s    c         C   sQ   | d  j o t d ƒ ‚ n t d g  } |  i D] } | | i | ƒ q1 ~ ƒ S(   Ns   Parse tree not avaialables   *SPLIT*(   R   R*   R    RZ   Ri   (   R   R`   R   R[   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRi     s    (   R>   R?   R
   RP   RI   Ri   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRj     s   			RY   c           B   sS   e  Z d  Z d „  Z e d „  ƒ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 RS(   sD   
    wordnum:height*wordnum:height*...
    wordnum:height,
    
    c         C   s   | |  _  | |  _ d  S(   N(   RC   t   height(   R   RC   Rl   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
   $  s    	c      	   C   sÿ   |  i  d ƒ } t | ƒ d j o2 t g  } | D] } | t i | ƒ q0 ~ ƒ Sn |  i  d ƒ } t | ƒ d j o2 t g  } | D] } | t i | ƒ q„ ~ ƒ Sn |  i  d ƒ } t | ƒ d j o t d |  ƒ ‚ n t t | d ƒ t | d ƒ ƒ S(   NR   i   Rk   t   :i   s   bad propbank pointer %ri    (   R)   RU   Rf   RY   R+   Rj   R*   RW   (   RL   RZ   R   t   eltR\   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR+   (  s    
(
('c         C   s   d |  i  |  i f S(   Ns   %s:%s(   RC   Rl   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRP   ;  s    c         C   s   d |  i  |  i f S(   Ns   PropbankTreePointer(%d, %d)(   RC   Rl   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRI   >  s    c         C   s   x( t  | t t f ƒ o | i d } q Wt  | t ƒ p t t |  ƒ t | ƒ ƒ Sn t |  i |  i f | i | i f ƒ S(   Ni    (	   R   Rf   Rj   RZ   RY   t   cmpR(   RC   Rl   (   R   t   other(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyt   __cmp__A  s    c         C   s.   | d  j o t d ƒ ‚ n | |  i | ƒ S(   Ns   Parse tree not avaialable(   R   R*   t   treepos(   R   R`   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRi   L  s    c         C   s*  | d j o t d ƒ ‚ n | g } g  } d } xñ t oé t | d t ƒ oŠ t | ƒ t | ƒ j  o | i d ƒ n | d c d 7<| d t | d ƒ j  o | i | d | d ƒ q!| i ƒ  | i ƒ  q5 | |  i j o# t	 | t | ƒ |  i
 d  ƒ Sq5 | d 7} | i ƒ  q5 Wd S(   s}   
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        s   Parse tree not avaialablei    iÿÿÿÿi   N(   R   R*   t   TrueR   R    RU   R8   t   popRC   RG   Rl   (   R   R`   t   stackRr   RC   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRr   P  s"    	

#
(   R>   R?   R@   R
   Ra   R+   RP   RI   Rq   Ri   Rr   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRY     s   					RX   c           B   s¤   e  Z d  Z d Z d Z d Z d Z d Z d Z d Z	 d Z
 d Z d Z d	 Z d Z d
 Z d
 d
 d
 d
 d
 d „ Z d „  Z d „  Z e i d ƒ Z e d „  ƒ Z RS(   R<   t   gR[   t   vR   t   nt   ot   bt   3t   aRT   c         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   formt   tenset   aspectt   persont   voice(   R   R}   R~   R   R€   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR
   Š  s
    				c         C   s#   |  i  |  i |  i |  i |  i S(   N(   R}   R~   R   R€   R   (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRP   ‘  s    c         C   s   d |  S(   Ns   <PropbankInflection: %s>(    (   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRI   ”  s    s"   [igpv\-][fpn\-][pob\-][3\-][ap\-]$c         C   se   t  |  t ƒ p t d ƒ ‚ n t |  ƒ d j p t i i |  ƒ o t d |  ƒ ‚ n t |  Œ  S(   Ns   expected a stringi   s!   Bad propbank inflection string %r(   R   R   t	   TypeErrorRU   RX   t	   _VALIDATEt   matchR*   (   RL   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyR+   ™  s    (   R>   R?   t
   INFINITIVEt   GERUNDt
   PARTICIPLEt   FINITEt   FUTUREt   PASTt   PRESENTt   PERFECTt   PROGRESSIVEt   PERFECT_AND_PROGRESSIVEt   THIRD_PERSONt   ACTIVEt   PASSIVEt   NONER
   RP   RI   t   ret   compileRƒ   Ra   R+   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pyRX   s  s&   		(   R“   t   codecst	   nltk.treeR    t
   nltk.etreeR   t   utilt   apiR	   R   t   objectR9   Rb   Rf   Rj   RY   RX   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/propbank.pys   <module>   s   

uiU