³ò
B_Kc        
   @   sj   d  d k  Z  d  d k Z d  d k Td  d k l Z d  d k Td  d k Td  d k Td e f d „  ƒ  YZ	 d S(   iÿÿÿÿN(   t   *(   t
   deprecatedt   VerbnetCorpusReaderc           B   s%  e  Z e d  „ Z e i d ƒ Z e i d ƒ Z e i d ƒ Z e	 d „ Z
 e	 d „ Z e	 e	 e	 e	 d „ Z d „  Z e	 d „ Z e d	 ƒ e	 d
 „ ƒ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d d „ Z d d „ Z d d „ Z d d „ Z d d „ Z d d „ Z d d „ Z RS(   c         C   sT   t  i |  | | | ƒ t t ƒ |  _ t t ƒ |  _ h  |  _ h  |  _ |  i ƒ  d  S(   N(	   t   XMLCorpusReadert   __init__t   defaultdictt   listt   _lemma_to_classt   _wordnet_to_classt   _class_to_fileidt   _shortid_to_longidt   _quick_index(   t   selft   roott   fileidst
   wrap_etree(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR      s    		s   ([^\-\.]*)-([\d+.\-]+)$s
   [\d+.\-]+$sH   <MEMBER name="\??([^"]+)" wn="([^"]*)"[^>]+>|<VNSUBCLASS ID="([^"]+)"/?>c         C   sh   | d j o t |  i i ƒ  ƒ SnA |  i | ƒ } g  } | i d ƒ D] } | | i d ƒ qG ~ Sd S(   sw   
        Return a list of all verb lemmas that appear in any class, or
        in the C{classid} if specified.
        s   MEMBERS/MEMBERt   nameN(   t   Nonet   sortedR   t   keyst   vnclasst   findallt   get(   R   t   classidR   t   _[1]t   member(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyt   lemmas7   s
    c         C   sz   | d j o t |  i i ƒ  ƒ SnS |  i | ƒ } t g  } | i d ƒ D] } | | i d d ƒ i ƒ  qJ ~ g  ƒ Sd S(   s{   
        Return a list of all wordnet identifiers that appear in any
        class, or in C{classid} if specified.
        s   MEMBERS/MEMBERt   wnt    N(	   R   R   R   R   R   t   sumR   R   t   split(   R   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyt
   wordnetidsD   s
    
c         C   sK  t  g  } | | | | g D] } | d j	 o | | q q ~ ƒ d j o t d ƒ ‚ n | d j	 oC g  } |  i i ƒ  D]$ \ } }	 |	 | j o | | q| q| ~ Sn | d j	 o |  i | Sn | d j	 o |  i | Sne | d j	 oD |  i | ƒ }
 g  } |
 i d ƒ D] } | | i	 d ƒ q~ Sn t
 |  i i ƒ  ƒ Sd S(   sS  
        Return a list of the verbnet class identifiers.  If a file
        identifier is specified, then return only the verbnet class
        identifiers for classes (and subclasses) defined by that file.
        If a lemma is specified, then return only verbnet class
        identifiers for classes that contain that lemma as a member.
        If a wordnetid is specified, then return only identifiers for
        classes that contain that wordnetid as a member.  If a classid
        is specified, then return only identifiers for subclasses of
        the specified verbnet class.
        i   s:   Specify at most one of: fileid, wordnetid, fileid, classids   SUBCLASSES/VNSUBCLASSt   IDN(   t   lenR   t
   ValueErrorR	   t   itemsR   R   R   R   R   R   R   (   R   t   lemmat	   wordnetidt   fileidR   R   t   xt   _[2]t   ct   ft   xmltreet   _[3]t   subclass(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyt   classidsQ   s     + #.c         C   sá   | |  i  j o |  i | ƒ Sn |  i | ƒ } | |  i j o |  i |  i | ƒ } |  i | ƒ } | | i d ƒ j o | SqÝ xW | i d ƒ D]$ } | | i d ƒ j o | Sq“ q“ Wt p t ‚ n t d | ƒ ‚ d S(   s‚  
        Return an ElementTree containing the xml for the specified
        verbnet class.

        @param fileid_or_classid: An identifier specifying which class
        should be returned.  Can be a file identifier (such as
        C{'put-9.1.xml'}), or a verbnet class identifier (such as
        C{'put-9.1'}) or a short verbnet class identifier (such as
        C{'9.1'}).
        R    s   .//VNSUBCLASSs   Unknown identifier %sN(	   t   _fileidst   xmlt   longidR	   R   R   t   Falset   AssertionErrorR"   (   R   t   fileid_or_classidR   R&   t   treeR-   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR   o   s     c         C   sv   | d j o |  i Sn[ t | t ƒ o |  i |  i | ƒ g Sn0 g  } | D] } | |  i |  i | ƒ qN ~ Sd S(   s»   
        Return a list of fileids that make up this corpus.  If
        C{vnclass_ids} is specified, then return the fileids that make
        up the specified verbnet class(es).
        N(   R   R/   t
   isinstancet
   basestringR	   R1   (   R   t   vnclass_idsR   t
   vnclass_id(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR      s    s   Use corpus.fileids() insteadc         C   s   |  i  | ƒ S(   N(   R   (   R   R8   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyt   filesž   s    c         C   s1   x* |  i  D] } |  i |  i | ƒ | ƒ q
 Wd S(   s(  
        Initialize the indexes L{_lemma_to_class},
        L{_wordnet_to_class}, and L{_class_to_fileid} by scanning
        through the corpus fileids.  This is fast with cElementTree
        (<0.1 secs), but quite slow (>10 secs) with the python
        implementation of ElementTree.
        N(   R/   t   _index_helperR0   (   R   R&   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyt   _index§   s    
 c         C   sÎ   | i  d ƒ } | |  i | <| |  i |  i | ƒ <xk | i d ƒ D]Z } |  i | i  d ƒ i | ƒ x4 | i  d d ƒ i ƒ  D] } |  i | i | ƒ q~ WqB Wx' | i d ƒ D] } |  i	 | | ƒ q° Wd S(   s   Helper for L{_index()}R    s   MEMBERS/MEMBERR   R   R   s   SUBCLASSES/VNSUBCLASSN(
   R   R	   R
   t   shortidR   R   t   appendR   R   R;   (   R   R+   R&   R   R   R   R-   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR;   ²   s       c         C   s0  x)|  i  D]} | d  } | |  i | <| |  i |  i | ƒ <xè |  i i |  i | ƒ i ƒ  ƒ D]Å } | i ƒ  } | d d j	 oK |  i
 | d i | ƒ x‡ | d i ƒ  D] } |  i | i | ƒ q« Wq_ | d d j	 o5 | |  i | d <| d } | |  i |  i | ƒ <q_ t p
 t d ‚ q_ Wq
 Wd S(   sª  
        Initialize the indexes L{_lemma_to_class},
        L{_wordnet_to_class}, and L{_class_to_fileid} by scanning
        through the corpus fileids.  This doesn't do proper xml parsing,
        but is good enough to find everything in the standard verbnet
        corpus -- and it runs about 30 times faster than xml parsing
        (with the python ElementTree; only 2-3 times faster with
        cElementTree).
        iüÿÿÿi    i   i   s   unexpected match conditionN(   R/   R	   R
   R=   t	   _INDEX_REt   finditert   opent   readt   groupsR   R   R>   R   R   R2   R3   (   R   R&   R   t   mRC   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR   ¾   s$    
 
"  
c         C   s{   |  i  i | ƒ o | Sn( |  i i | ƒ p t d | ƒ ‚ n y |  i | SWn# t j
 o t d | ƒ ‚ n Xd S(   s¨   Given a short verbnet class identifier (eg '37.10'), map it
        to a long id (eg 'confess-37.10').  If C{shortid} is already a
        long id, then return it as-iss   vnclass identifier %r not foundN(   t
   _LONGID_REt   matcht   _SHORTID_RER"   R
   t   KeyError(   R   R=   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR1   ß   s    c         C   sY   |  i  i | ƒ o | Sn |  i i | ƒ } | o | i d ƒ Sn t d | ƒ ‚ d S(   s©   Given a long verbnet class identifier (eg 'confess-37.10'),
        map it to a short id (eg '37.10').  If C{longid} is already a
        short id, then return it as-is.i   s   vnclass identifier %r not foundN(   RG   RF   RE   t   groupR"   (   R   R1   RD   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR=   ì   s    c            sÑ   t  | t ƒ o ˆ  i | ƒ } n | i d ƒ d } | ˆ  i | d d ƒd 7} | ˆ  i | d d ƒd 7} | d 7} | ˆ  i | d d ƒd 7} | d 7} | d i ‡  f d †  | i d	 ƒ Dƒ ƒ 7} | S(
   s÷   
        Return a string containing a pretty-printed representation of
        the given verbnet class.
        
        @param vnclass: A verbnet class identifier; or an ElementTree
        containing the xml contents of a verbnet class.
        R    s   
t   indents     s     Thematic roles:
s       s
     Frames:
c         3   s(   x! |  ] } ˆ  i  | d  d ƒVq Wd S(   RJ   s       N(   t   pprint_frame(   t   .0t   vnframe(   R   (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pys	   <genexpr>  s   	s   FRAMES/FRAME(	   R6   R7   R   R   t   pprint_subclassest   pprint_memberst   pprint_themrolest   joinR   (   R   R   t   s(    (   R   s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyt   pprintü   s    

R   c      
   C   s   t  | t ƒ o |  i | ƒ } n g  } | i d ƒ D] } | | i d ƒ q7 ~ } | p d g } n d d i | ƒ } t i | d d | d | d	 ƒS(
   s  
        Return a string containing a pretty-printed representation of
        the given verbnet class's subclasses.
        
        @param vnclass: A verbnet class identifier; or an ElementTree
        containing the xml contents of a verbnet class.
        s   SUBCLASSES/VNSUBCLASSR    s   (none)s   Subclasses: t    iF   t   initial_indentt   subsequent_indents     (   R6   R7   R   R   R   RQ   t   textwrapt   fill(   R   R   RJ   R   R-   t
   subclassesRR   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRN     s    ,c      
   C   s   t  | t ƒ o |  i | ƒ } n g  } | i d ƒ D] } | | i d ƒ q7 ~ } | p d g } n d d i | ƒ } t i | d d | d | d	 ƒS(
   s  
        Return a string containing a pretty-printed representation of
        the given verbnet class's member verbs.
        
        @param vnclass: A verbnet class identifier; or an ElementTree
        containing the xml contents of a verbnet class.
        s   MEMBERS/MEMBERR   s   (none)s	   Members: RT   iF   RU   RV   s     (   R6   R7   R   R   R   RQ   RW   RX   (   R   R   RJ   R   R   t   membersRR   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRO   #  s    ,c   	      C   sÇ   t  | t ƒ o |  i | ƒ } n g  } xŽ | i d ƒ D]} } | d | i d ƒ } g  } | i d ƒ D] } | d | i qj ~ } | o | d d i | ƒ 7} n | i | ƒ q9 Wd i | ƒ S(	   s  
        Return a string containing a pretty-printed representation of
        the given verbnet class's thematic roles.
        
        @param vnclass: A verbnet class identifier; or an ElementTree
        containing the xml contents of a verbnet class.
        s   THEMROLES/THEMROLEs   * t   types   SELRESTRS/SELRESTRs   %(Value)s%(type)ss   [%s]RT   s   
(   R6   R7   R   R   R   t   attribRQ   R>   (	   R   R   RJ   t   piecest   themrolet   pieceR   t   restrt	   modifiers(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRP   5  s     *c         C   s`   |  i  | | ƒ d } | |  i | | d ƒ d 7} | | d 7} | |  i | | d ƒ 7} | S(   sØ   
        Return a string containing a pretty-printed representation of
        the given verbnet frame.
        
        @param vnframe: An ElementTree containing the xml contents of
        a verbnet frame.
        s   
s
     Syntax: s     Semantics:
s       (   t   pprint_descriptiont   pprint_syntaxt   pprint_semantics(   R   RM   RJ   RR   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRK   K  s
    c         C   sR   | i  d ƒ } | | i d } | i d d ƒ o | d | i d ƒ 7} n | S(   sä   
        Return a string containing a pretty-printed representation of
        the given verbnet frame description.
        
        @param vnframe: An ElementTree containing the xml contents of
        a verbnet frame.
        t   DESCRIPTIONt   primaryt	   secondaryR   s    (%s)(   t   findR\   R   (   R   RM   RJ   t   descrRR   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRb   Y  s
    c   	      C   sÛ   g  } xÁ | i  d ƒ D]° } | i } g  } d | i j o | i | i d ƒ ƒ n | g  } | i d ƒ | i d ƒ D] } | d | i qy ~ 7} | o | d d i | ƒ 7} n | i | ƒ q W| d i | ƒ S(   sß   
        Return a string containing a pretty-printed representation of
        the given verbnet frame syntax.
        
        @param vnframe: An ElementTree containing the xml contents of
        a verbnet frame.
        t   SYNTAXt   values   SELRESTRS/SELRESTRs   SYNRESTRS/SYNRESTRs   %(Value)s%(type)ss   [%s]RT   (   Rh   t   tagR\   R>   R   R   RQ   (	   R   RM   RJ   R]   t   eltR_   Ra   R   R`   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRc   g  s     	
,c   
   	   C   s®   g  } xs | i  d ƒ D]b } g  } | i  d ƒ D] } | | i d ƒ q0 ~ } | i d | i d ƒ d i | ƒ f ƒ q Wd i g  } | D] }	 | d | |	 f q ~ ƒ S(   sâ   
        Return a string containing a pretty-printed representation of
        the given verbnet frame semantics.
        
        @param vnframe: An ElementTree containing the xml contents of
        a verbnet frame.
        s   SEMANTICS/PREDs   ARGS/ARGRk   s   %s(%s)s   , s   
s   %s* %s(   R   R   R>   RQ   (
   R   RM   RJ   R]   t   predR   t   argt   argsR(   R_   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyRd   ~  s     3-(   t   __name__t
   __module__R2   R   t   ret   compileRE   RG   R?   R   R   R   R.   R   R   R   R:   R<   R;   R   R1   R=   RS   RN   RO   RP   RK   Rb   Rc   Rd   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pyR      s0   	 				!			(
   Rs   RW   t   nltk.compatt   nltk.internalsR   t   utilt   apit   xmldocsR   R   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/verbnet.pys   <module>   s   



