³ò
B_Kc        	   @   sÔ   d  d k  Z  d  d k Z d  d k l Z d  d k l Z d  d k Td  d k Td e f d „  ƒ  YZ	 d e
 f d „  ƒ  YZ d	 e
 f d
 „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d S(   iÿÿÿÿN(   t   Tree(   t   ElementTree(   t   *t   NombankCorpusReaderc           B   s_   e  Z d  Z d e e e e d „ Z e d „ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 RS(	   s–  
    Corpus reader for the nombank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every noun instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of X{frameset
    files} which define the argument labels used by the annotations,
    on a per-noun basis.  Each X{frameset file} contains one or more
    predicates, such as C{'turn'} or C{'turn_on'}, each of which is
    divided into coarse-grained word senses called X{rolesets}.  For
    each X{roleset}, the frameset file provides descriptions of the
    argument roles, along with examples.
    t    c         C   s€   t  | t ƒ o t | | ƒ } n t | ƒ } t i |  | | | g | | ƒ | |  _ | |  _ | |  _ | |  _	 | |  _
 d S(   sÙ  
        @param root: The root directory for this corpus.
        @param nomfile: The name of the file containing the predicate-
            argument annotations (relative to C{root}).
        @param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        @param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        @param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by nombank.
        N(   t
   isinstancet
   basestringt   find_corpus_fileidst   listt   CorpusReadert   __init__t   _nomfilet   _framefilest
   _nounsfilet   _parse_fileid_xformt   _parse_corpus(   t   selft   roott   nomfilet
   framefilest	   nounsfilet   parse_fileid_xformt   parse_corpust   encoding(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR
      s    				c         C   sk   | t  j o |  i } n t | t ƒ o | g } n t g  } | D] } | |  i | ƒ i ƒ  qE ~ ƒ S(   sV   
        @return: the text contents of the given fileids, as a single string.
        (   t   Nonet   _fileidsR   R   t   concatt   opent   read(   R   t   fileidst   _[1]t   f(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt   raw?   s    c         C   s.   t  |  i |  i ƒ |  i d |  i |  i ƒ ƒS(   s„   
        @return: a corpus view that acts as a list of
        L{NombankInstance} objects, one for each noun in the corpus.
        R   (   t   StreamBackedCorpusViewt   abspathR   t   _read_instance_blockR   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt	   instancesG   s    	c         C   s+   t  |  i |  i ƒ t d |  i |  i ƒ ƒS(   s   
        @return: a corpus view that acts as a list of strings, one for
        each line in the predicate-argument annotation file.  
        R   (   R!   R"   R   t   read_line_blockR   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt   linesP   s    c         C   sµ   | i  d ƒ d } d | } | |  i j o t d | ƒ ‚ n t i |  i | ƒ i ƒ  ƒ i ƒ  } xI | i d ƒ D]" } | i	 d | j o | Squ qu Wt d | | f ƒ ‚ d S(	   sE   
        @return: the xml description for the given roleset.
        t   .i    s   frames/%s.xmls   Frameset file for %s not founds   predicate/rolesett   ids   Roleset %s not found in %sN(
   t   splitR   t
   ValueErrorR   t   parseR"   R   t   getroott   findallt   attrib(   R   t
   roleset_idt   lemmat	   framefilet   etreet   roleset(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR3   Y   s    
$ c         C   s+   t  |  i |  i ƒ t d |  i |  i ƒ ƒS(   s‰   
        @return: a corpus view that acts as a list of all noun lemmas
        in this corpus (from the nombank.1.0.words file).
        R   (   R!   R"   R   R%   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt   nounsm   s    c         C   s`   g  } xS t  d ƒ D]E } | i ƒ  i ƒ  } | o& | i t i | |  i |  i ƒ ƒ q q W| S(   Nid   (   t   ranget   readlinet   stript   appendt   NombankInstanceR+   R   R   (   R   t   streamt   blockt   it   line(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR#   v   s     	(   t   __name__t
   __module__t   __doc__R   R
   R    R$   R&   R3   R4   R#   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR      s   							R9   c           B   se   e  Z e d  „ Z e d „  ƒ Z d „  Z d „  Z d „  Z e e d d ƒZ	 e
 e e d „ ƒ Z RS(   c
   
      C   s[   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ t | ƒ |  _ |	 |  _	 d  S(   N(
   t   fileidt   sentnumt   wordnumt   baseformt   sensenumbert	   predicatet   predidt   tuplet	   argumentsR   (
   R   RA   RB   RC   RD   RE   RF   RG   RI   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR
   ‰   s    								c         C   s   d |  i  |  i f S(   s«   The name of the roleset used by this instance's predicate.
        Use L{nombank.roleset() <NombankCorpusReader.roleset>} to
        look up information about the roleset.s   %s.%s(   RD   RE   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR3   ±   s    c         C   s   d |  i  |  i |  i f S(   Ns'   <NombankInstance: %s, sent %s, word %s>(   RA   RB   RC   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt   __repr__¸   s    c         C   sv   d |  i  |  i |  i |  i |  i f } |  i |  i d f f } x. t | ƒ D]  \ } } | d | | f 7} qN W| S(   Ns   %s %s %s %s %st   rels    %s-%s(   RA   RB   RC   t   basenameRE   RI   RF   t   sorted(   R   t   st   itemst   argloct   argid(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt   __str__¼   s     c         C   sS   |  i  d  j o d  Sn |  i |  i  i ƒ  j o d  Sn |  i  i |  i ƒ |  i S(   N(   R   R   RA   R   t   parsed_sentsRB   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt	   _get_treeÄ   s    !t   docsv   
        The parse tree corresponding to this instance, or C{None} if
        the corresponding tree is not available.c         C   s‘  |  i  ƒ  } t | ƒ d j  o t d |  ƒ ‚ n | d  \ } } } } } | d }	 g  }
 t |	 ƒ D]- \ } } d | j o |
 |	 i | ƒ qg qg ~
 } t | ƒ d j o t d |  ƒ ‚ n | d  j	 o | | ƒ } n t | ƒ } t | ƒ } | d i  d d ƒ \ } } t i | ƒ } g  } xB |	 D]: } | i  d d ƒ \ } } | i	 t i | ƒ | f ƒ q1Wt
 | | | | | | | | | ƒ	 S(   Ni   s    Badly formatted nombank line: %ri   s   -reli   i    t   -(   R)   t   lenR*   t	   enumeratet   popR   t   intt   NombankTreePointerR+   R8   R9   (   RN   R   R   t   piecesRA   RB   RC   RD   RE   t   argsR   R<   t   pRK   t   predlocRG   RF   RI   t   argRP   RQ   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR+   Ì   s*    
G  (   R>   R?   R   R
   t   propertyR3   RJ   RR   RT   t   treet   staticmethodR+   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR9   ‡   s   '					t   NombankPointerc           B   s   e  Z d  Z d „  Z RS(   sf  
    A pointer used by nombank to identify one or more constituents in
    a parse tree.  C{NombankPointer} is an abstract base class with
    three concrete subclasses:

      - L{NombankTreePointer} is used to point to single constituents.
      - L{NombankSplitTreePointer} is used to point to 'split'
        constituents, which consist of a sequence of two or more
        C{NombankTreePointer}s.
      - L{NombankChainTreePointer} is used to point to entire trace
        chains in a tree.  It consists of a sequence of pieces, which
        can be C{NombankTreePointer}s or C{NombankSplitTreePointer}s.
    c         C   s$   |  i  t j o t d ƒ ‚ n d  S(   Ns(   NombankPointer is an abstract base class(   t	   __class__t   NombankPoitnert   AssertionError(   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR
      s    (   R>   R?   R@   R
   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRd   ò   s   t   NombankChainTreePointerc           B   s,   e  Z d  „  Z d „  Z d „  Z d „  Z RS(   c         C   s   | |  _  d  S(   N(   R\   (   R   R\   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR
     s    	c         C   s   d i  d „  |  i Dƒ ƒ S(   NR   c         s   s   x |  ] } d  | Vq Wd S(   s   %sN(    (   t   .0R^   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pys	   <genexpr>  s    (   t   joinR\   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRR     s    c         C   s   d |  S(   Ns   <NombankChainTreePointer: %s>(    (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRJ     s    c         C   sQ   | d  j o t d ƒ ‚ n t d g  } |  i D] } | | i | ƒ q1 ~ ƒ S(   Ns   Parse tree not avaialables   *CHAIN*(   R   R*   R    R\   t   select(   R   Rb   R   R^   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRk     s    (   R>   R?   R
   RR   RJ   Rk   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRh     s   			t   NombankSplitTreePointerc           B   s,   e  Z d  „  Z d „  Z d „  Z d „  Z RS(   c         C   s   | |  _  d  S(   N(   R\   (   R   R\   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR
     s    	c         C   s   d i  d „  |  i Dƒ ƒ S(   Nt   ,c         s   s   x |  ] } d  | Vq Wd S(   s   %sN(    (   Ri   R^   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pys	   <genexpr>  s    (   Rj   R\   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRR     s    c         C   s   d |  S(   Ns   <NombankSplitTreePointer: %s>(    (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRJ     s    c         C   sQ   | d  j o t d ƒ ‚ n t d g  } |  i D] } | | i | ƒ q1 ~ ƒ S(   Ns   Parse tree not avaialables   *SPLIT*(   R   R*   R    R\   Rk   (   R   Rb   R   R^   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRk     s    (   R>   R?   R
   RR   RJ   Rk   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRl     s   			R[   c           B   sS   e  Z d  Z d „  Z e d „  ƒ Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 RS(   sD   
    wordnum:height*wordnum:height*...
    wordnum:height,
    
    c         C   s   | |  _  | |  _ d  S(   N(   RC   t   height(   R   RC   Rn   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR
   '  s    	c      	   C   sÿ   |  i  d ƒ } t | ƒ d j o2 t g  } | D] } | t i | ƒ q0 ~ ƒ Sn |  i  d ƒ } t | ƒ d j o2 t g  } | D] } | t i | ƒ q„ ~ ƒ Sn |  i  d ƒ } t | ƒ d j o t d |  ƒ ‚ n t t | d ƒ t | d ƒ ƒ S(   NR   i   Rm   t   :i   s   bad nombank pointer %ri    (   R)   RW   Rh   R[   R+   Rl   R*   RZ   (   RN   R\   R   t   eltt   _[2](    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR+   +  s    
(
('c         C   s   d |  i  |  i f S(   Ns   %s:%s(   RC   Rn   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRR   >  s    c         C   s   d |  i  |  i f S(   Ns   NombankTreePointer(%d, %d)(   RC   Rn   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRJ   A  s    c         C   s   x( t  | t t f ƒ o | i d } q Wt  | t ƒ p t t |  ƒ t | ƒ ƒ Sn t |  i |  i f | i | i f ƒ S(   Ni    (	   R   Rh   Rl   R\   R[   t   cmpR(   RC   Rn   (   R   t   other(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyt   __cmp__D  s    c         C   s.   | d  j o t d ƒ ‚ n | |  i | ƒ S(   Ns   Parse tree not avaialable(   R   R*   t   treepos(   R   Rb   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRk   O  s    c         C   s*  | d j o t d ƒ ‚ n | g } g  } d } xñ t oé t | d t ƒ oŠ t | ƒ t | ƒ j  o | i d ƒ n | d c d 7<| d t | d ƒ j  o | i | d | d ƒ q!| i ƒ  | i ƒ  q5 | |  i j o# t	 | t | ƒ |  i
 d  ƒ Sq5 | d 7} | i ƒ  q5 Wd S(   s}   
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        s   Parse tree not avaialablei    iÿÿÿÿi   N(   R   R*   t   TrueR   R    RW   R8   RY   RC   RH   Rn   (   R   Rb   t   stackRu   RC   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyRu   S  s"    	

#
(   R>   R?   R@   R
   Rc   R+   RR   RJ   Rt   Rk   Ru   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pyR[   !  s   					(   t   ret   codecst	   nltk.treeR    t
   nltk.etreeR   t   nltk.corpus.reader.utilt   nltk.corpus.reader.apiR	   R   t   objectR9   Rd   Rh   Rl   R[   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/nombank.pys   <module>	   s   

uk