
B_Kc        +   @   s  d  d k  Z  d  d k Z d  d k l Z l Z d  d k l Z d  d k l Z d  d k	 l
 Z d  d k l Z d Z d d	 d
 d d f \ Z Z Z Z Z e e e e g Z e d d d d d d d d d d d d d d d d d d d d  d! d" d# d$ d% d& d' d( d) d* d+ d, d- d. d/ f$ Z d0 e f d1     YZ d2 e f d3     YZ d4 e f d5     YZ d6 e f d7     YZ d8 e f d9     YZ d: e f d;     YZ e d<  Z  e i  i! e  _! e d=  Z" e i" i! e" _! e d>  Z# e i# i! e# _! e d?  Z$ e i$ i! e$ _! e d@  Z% e i% i! e% _! e dA  Z& e i& i! e& _! e dB  Z' e dC  Z( dD   Z) dE   Z* dF   Z+ e, dG j o e+   n d S(H   iN(   t   islicet   chain(   t   defaultdict(   t   CorpusReader(   t   binary_search_file(   t   FreqDistgu <7~t   at   st   rt   nt   vs   Something %ss   Somebody %ss   It is %sings   Something is %sing PPs%   Something %s something Adjective/Nouns   Something %s Adjective/Nouns   Somebody %s Adjectives   Somebody %s somethings   Somebody %s somebodys   Something %s somebodys   Something %s somethings   Something %s to somebodys   Somebody %s on somethings   Somebody %s somebody somethings!   Somebody %s something to somebodys#   Somebody %s something from somebodys#   Somebody %s somebody with somethings!   Somebody %s somebody of somethings!   Somebody %s something on somebodys   Somebody %s somebody PPs   Somebody %s something PPs   Somebody %s PPs   Somebody's (body part) %ss"   Somebody %s somebody to INFINITIVEs   Somebody %s somebody INFINITIVEs   Somebody %s that CLAUSEs   Somebody %s to somebodys   Somebody %s to INFINITIVEs   Somebody %s whether INFINITIVEs)   Somebody %s somebody into V-ing somethings$   Somebody %s something with somethings   Somebody %s INFINITIVEs   Somebody %s VERB-ings   It %s that CLAUSEs   Something %s INFINITIVEt   WordNetErrorc           B   s   e  Z d  Z RS(   s.   An exception class for wordnet-related errors.(   t   __name__t
   __module__t   __doc__(    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   X   s   t   _WordNetObjectc           B   s   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z RS(   s+   A common base class for lemmas and synsets.c         C   s   |  i  d  S(   Nt   @(   t   _related(   t   self(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt	   hypernyms_   s    c         C   s   |  i  d  S(   Ns   @i(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   instance_hypernymsb   s    c         C   s   |  i  d  S(   Nt   ~(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   hyponymse   s    c         C   s   |  i  d  S(   Ns   ~i(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   instance_hyponymsh   s    c         C   s   |  i  d  S(   Ns   #m(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   member_holonymsk   s    c         C   s   |  i  d  S(   Ns   #s(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   substance_holonymsn   s    c         C   s   |  i  d  S(   Ns   #p(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   part_holonymsq   s    c         C   s   |  i  d  S(   Ns   %m(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   member_meronymst   s    c         C   s   |  i  d  S(   Ns   %s(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   substance_meronymsw   s    c         C   s   |  i  d  S(   Ns   %p(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   part_meronymsz   s    c         C   s   |  i  d  S(   Nt   =(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt
   attributes}   s    c         C   s   |  i  d  S(   Nt   *(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   entailments   s    c         C   s   |  i  d  S(   Nt   >(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   causes   s    c         C   s   |  i  d  S(   Nt   ^(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt	   also_sees   s    c         C   s   |  i  d  S(   Nt   $(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   verb_groups   s    c         C   s   |  i  d  S(   Nt   &(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   similar_tos   s    c         C   s   t  |  i  S(   N(   t   hasht   name(   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   __hash__   s    c         C   s   |  i  | i  j S(   N(   R+   (   R   t   other(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   __eq__   s    c         C   s   |  i  | i  j S(   N(   R+   (   R   R-   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   __ne__   s    (   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R!   R#   R%   R'   R)   R,   R.   R/   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   \   s(   																		t   Lemmac           B   sM   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 RS(   sD  
    The lexical entry for a single morphological form of a
    sense-disambiguated word.

    Create a Lemma from a "<word>.<pos>.<number>.<lemma>" string where:
    <word> is the morphological stem identifying the synset
    <pos> is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB
    <number> is the sense number, counting from 0.
    <lemma> is the morphological form of interest

    Note that <word> and <lemma> can be different, e.g. the Synset
    'salt.n.03' has the Lemmas 'salt.n.03.salt', 'salt.n.03.saltiness' and
    'salt.n.03.salinity'.

    Lemma attributes
    ----------------
    name - The canonical name of this lemma.
    synset - The synset that this lemma belongs to.
    syntactic_marker - For adjectives, the WordNet string identifying the
        syntactic position relative modified noun. See:
            http://wordnet.princeton.edu/man/wninput.5WN.html#sect10
        For all other parts of speech, this attribute is None.

    Lemma methods
    -------------
    Lemmas have the following methods for retrieving related Lemmas. They
    correspond to the names for the pointer symbols defined here:
        http://wordnet.princeton.edu/man/wninput.5WN.html#sect3
    These methods all return lists of Lemmas.

    antonyms
    hypernyms
    instance_hypernyms
    hyponyms
    instance_hyponyms
    member_holonyms
    substance_holonyms
    part_holonyms
    member_meronyms
    substance_meronyms
    part_meronyms
    attributes
    derivationally_related_forms
    entailments
    causes
    also_sees
    verb_groups
    similar_tos
    pertainyms
    c         C   sU   | |  _  | |  _ | |  _ | |  _ g  |  _ g  |  _ | |  _ | |  _ d  |  _	 d  S(   N(
   t   _wordnet_corpus_readerR+   t   syntactic_markert   synsett   frame_stringst	   frame_idst   _lexname_indext   _lex_idt   Nonet   key(   R   t   wordnet_corpus_readerR3   R+   t   lexname_indext   lex_idR2   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   __init__   s    								c         C   s)   t  |   i |  i i |  i f } d | S(   Ns   %s('%s.%s')(   t   typeR   R3   R+   (   R   t   tup(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   __repr__   s    !c         C   sW   |  i  i } g  } |  i i |  i | f D]& \ } } } | | | |  i | q* ~ S(   N(   R1   t   _synset_from_pos_and_offsetR3   t   _lemma_pointersR+   t   lemmas(   R   t   relation_symbolt
   get_synsett   _[1]t   post   offsett   lemma_index(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR      s    c         C   s   |  i  i |   S(   s)   Return the frequency count for this Lemma(   R1   t   lemma_count(   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   count   s    c         C   s   |  i  d  S(   Nt   !(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   antonyms   s    c         C   s   |  i  d  S(   Nt   +(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   derivationally_related_forms   s    c         C   s   |  i  d  S(   Ns   \(   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt
   pertainyms   s    (
   R   R   R   R=   R@   R   RK   RM   RO   RP   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR0      s   2						t   Synsetc           B   s   e  Z d  Z d   Z d   Z d   Z d   Z d d  Z d   Z d   Z	 d	   Z
 d
 d  Z d   Z d e d  Z e d  Z e d  Z e d  Z e d  Z e d  Z e d  Z d   Z d   Z d   Z RS(   s]  Create a Synset from a "<lemma>.<pos>.<number>" string where:
    <lemma> is the word's morphological stem
    <pos> is one of the module attributes ADJ, ADJ_SAT, ADV, NOUN or VERB
    <number> is the sense number, counting from 0.

    Synset attributes
    -----------------
    name - The canonical name of this synset, formed using the first lemma
        of this synset. Note that this may be different from the name
        passed to the constructor if that string used a different lemma to
        identify the synset.
    pos - The synset's part of speech, matching one of the module level
        attributes ADJ, ADJ_SAT, ADV, NOUN or VERB.
    lemmas - A list of the Lemma objects for this synset.
    definition - The definition for this synset.
    examples - A list of example strings for this synset.
    offset - The offset in the WordNet dict file of this synset.
    #lexname - The name of the lexicographer file containing this synset.

    Synset methods
    --------------
    Synsets have the following methods for retrieving related Synsets.
    They correspond to the names for the pointer symbols defined here:
        http://wordnet.princeton.edu/man/wninput.5WN.html#sect3
    These methods all return lists of Synsets.

    hypernyms
    instance_hypernyms
    hyponyms
    instance_hyponyms
    member_holonyms
    substance_holonyms
    part_holonyms
    member_meronyms
    substance_meronyms
    part_meronyms
    attributes
    entailments
    causes
    also_sees
    verb_groups
    similar_tos

    Additionally, Synsets support the following methods specific to the
    hypernym relation:

    root_hypernyms
    common_hypernyms
    lowest_common_hypernyms

    Note that Synsets do not support the following relations because
    these are defined by WordNet as lexical relations:

    antonyms
    derivationally_related_forms
    pertainyms
    c         C   s   | |  _  d  |  _ d  |  _ d  |  _ g  |  _ g  |  _ g  |  _ g  |  _ d  |  _	 g  |  _
 d  |  _ t t  |  _ t t  |  _ d  S(   N(   R1   R8   RG   RH   R+   R5   RC   t   lemma_namest   lemma_infost
   definitiont   examplest   lexnameR   t   sett	   _pointersRB   (   R   R:   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR=   .  s    											c         C   s   g  } t    } |  g } xq | oi | i   } | | j oL | i |  | i   | i   } | p | i |  q | i |  q q W| S(   s4   Get the topmost hypernyms of this synset in WordNet.(   RW   t   popt   addR   R   t   appendt   extend(   R   t   resultt   seent   todot   next_synsett   next_hypernyms(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   root_hypernymsA  s    		
	c         C   sb   d |  i  j oK |  i   |  i   } | p d |  _ q[ d t d   | D  |  _ n |  i S(   sh   
        @return: The length of the longest hypernym path from this
        synset to the root.
        t
   _max_depthi    i   c         s   s   x |  ] } | i    Vq Wd  S(   N(   t	   max_depth(   t   .0t   h(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>f  s    (   t   __dict__R   R   Rc   t   max(   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRd   [  s    !c         C   sb   d |  i  j oK |  i   |  i   } | p d |  _ q[ d t d   | D  |  _ n |  i S(   si   
        @return: The length of the shortest hypernym path from this
        synset to the root.
        t
   _min_depthi    i   c         s   s   x |  ] } | i    Vq Wd  S(   N(   t	   min_depth(   Re   Rf   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>t  s    (   Rg   R   R   Ri   t   min(   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRj   i  s    !ic         c   sw   d d k  l } g  } xZ | |  | |  D]F } | i |  i j o- | i | j o | i | i  | Vqo q) q) Wd S(   s  Return the transitive closure of source under the rel
        relationship, breadth-first

        >>> from nltk.corpus import wordnet as wn
        >>> dog = wn.synset('dog.n.01')
        >>> hyp = lambda s:s.hypernyms()
        >>> list(dog.closure(hyp))
        [Synset('domestic_animal.n.01'), Synset('canine.n.02'),
        Synset('animal.n.01'), Synset('carnivore.n.01'),
        Synset('organism.n.01'), Synset('placental.n.01'),
        Synset('living_thing.n.01'), Synset('mammal.n.01'),
        Synset('whole.n.02'), Synset('vertebrate.n.01'),
        Synset('object.n.01'), Synset('chordate.n.01'),
        Synset('physical_entity.n.01'), Synset('entity.n.01')]
        i(   t   breadth_firstN(   t	   nltk.utilRl   RH   R[   (   R   t   relt   depthRl   t   synset_offsetsR3   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   closurew  s     c         C   s{   g  } |  i    } t |  d j o |  g g } n x? | D]7 } x. | i   D]  } | i |   | i |  qO Wq< W| S(   s$  
        Get the path(s) from this synset to the root, where each path is a
        list of the synset nodes traversed on the way to the root.

        @return: A list of lists, where each list gives the node sequence
           connecting the initial L{Synset} node and a root node.
        i    (   R   t   lent   hypernym_pathsR[   (   R   t   pathsR   t   hypernymt   ancestor_list(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRs     s      c         C   sK   t  d   |  i   D  } t  d   | i   D  } t | i |   S(   s   
        Find all synsets that are hypernyms of this synset and the
        other synset.

        @type  other: L{Synset}
        @param other: other input synset.
        @return: The synsets that are hypernyms of both synsets.
        c         s   s*   x# |  ] } x | D] } | Vq Wq Wd  S(   N(    (   Re   t   self_synsetst   self_synset(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s   	c         s   s*   x# |  ] } x | D] } | Vq Wq Wd  S(   N(    (   Re   t   other_synsetst   other_synset(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s   	(   RW   t   _iter_hypernym_listst   listt   intersection(   R   R-   Rw   Ry   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   common_hypernyms  s
    			c   	      C   s   |  i    } | i    } t d   | D  } t d   | D  } | i |  yP t d   | D  } g  } | D]$ } | i   | j o | | qu qu ~ SWn t j
 o g  Sn Xd S(   s;   Get the lowest synset that both synsets have as a hypernym.c         s   s*   x# |  ] } x | D] } | Vq Wq Wd  S(   N(    (   Re   t   synsetsR   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s    c         s   s*   x# |  ] } x | D] } | Vq Wq Wd  S(   N(    (   Re   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s    c         s   s   x |  ] } | i    Vq Wd  S(   N(   Rj   (   Re   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s    N(   R{   RW   t   intersection_updateRh   Rj   t
   ValueError(	   R   R-   t   self_hypernymst   other_hypernymsR   t   othersRd   RF   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   lowest_common_hypernyms  s    :i    c         C   sQ   t  |  | f g  } x5 |  i   |  i   D] } | | i | d  O} q, W| S(   s  
        Get the path(s) from this synset to the root, counting the distance
        of each node from the initial node on the way. A set of
        (synset, distance) tuples is returned.

        @type  distance: C{int}
        @param distance: the distance (number of edges) from this hypernym to
            the original hypernym L{Synset} on which this method was called.
        @return: A set of (L{Synset}, int) tuples where each L{Synset} is
           a hypernym of the first L{Synset}.
        i   (   RW   R   R   t   hypernym_distances(   R   t   distancet	   distancesRu   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s
     c         C   s2  |  | j o d Sn t  } |  i   } h  } | i   } h  } xw | | f | | f g D]] \ } } xN | D]F \ }	 }
 |	 | j o# |
 | |	 j  o |
 | |	 <q qk |
 | |	 <qk WqX Wxr | i   D]d } x[ | i   D]M } | | j o: | | | | } | d j  p | | j  o
 | } q&q q Wq W| S(   s  
        Returns the distance of the shortest path linking the two synsets (if
        one exists). For each synset, all the ancestor nodes and their
        distances are recorded and compared. The ancestor node common to both
        synsets that can be reached with the minimum number of traversals is
        used. If no ancestor nodes are common, None is returned. If a node is
        compared with itself 0 is returned.

        @type  other: L{Synset}
        @param other: The Synset to which the shortest path will be found.
        @return: The number of edges in the shortest path connecting the two
            nodes, or None if no path exists.
        i    (   R8   R   t   keys(   R   R-   t   path_distancet
   dist_list1t
   dist_dict1t
   dist_list2t
   dist_dict2t   lt   dR9   t   valuet   synset1t   synset2t   new_distance(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   shortest_path_distance  s0        c         C   st   |  g } | d j oB | g  } | |   D]  } | | i  | | d |  q* ~ 7} n | o | | g 7} n | S(   sJ  
        >>> from nltk.corpus import wordnet as wn
        >>> dog = wn.synset('dog.n.01')
        >>> hyp = lambda s:s.hypernyms()
        >>> from pprint import pprint
        >>> pprint(dog.tree(hyp))
        [Synset('dog.n.01'),
         [Synset('domestic_animal.n.01'),
          [Synset('animal.n.01'),
           [Synset('organism.n.01'),
            [Synset('living_thing.n.01'),
             [Synset('whole.n.02'),
              [Synset('object.n.01'),
               [Synset('physical_entity.n.01'), [Synset('entity.n.01')]]]]]]]],
         [Synset('canine.n.02'),
          [Synset('carnivore.n.01'),
           [Synset('placental.n.01'),
            [Synset('mammal.n.01'),
             [Synset('vertebrate.n.01'),
              [Synset('chordate.n.01'),
               [Synset('animal.n.01'),
                [Synset('organism.n.01'),
                 [Synset('living_thing.n.01'),
                  [Synset('whole.n.02'),
                   [Synset('object.n.01'),
                    [Synset('physical_entity.n.01'),
                     [Synset('entity.n.01')]]]]]]]]]]]]]]
        i    i   (   t   tree(   R   Rn   Ro   t   cut_markR   RF   t   x(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    	Bc         C   s4   |  i  |  } | d j o d | d Sn d Sd S(   sf  
        Path Distance Similarity:
        Return a score denoting how similar two word senses are, based on the
        shortest path that connects the senses in the is-a (hypernym/hypnoym)
        taxonomy. The score is in the range 0 to 1, except in those cases where
        a path cannot be found (will only be true for verbs as there are many
        distinct verb taxonomies), in which case None is returned. A score of
        1 represents identity i.e. comparing a sense with itself will return 1.

        @type  other: L{Synset}
        @param other: The L{Synset} that this L{Synset} is being compared to.

        @return: A score denoting the similarity of the two L{Synset}s,
            normally between 0 and 1. None is returned if no connecting path
            could be found. 1 is returned if a L{Synset} is compared with
            itself.
        i    g      ?i   N(   R   R8   (   R   R-   t   verboseR   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   path_similarity.  s    c         C   s   |  i  | i  j o t d d |  | f   n |  i  |  i i j o |  i i |  i   n |  i i |  i  } |  i |  } | d j o t i | d d |  Sn d Sd S(   s%  
        Leacock Chodorow Similarity:
        Return a score denoting how similar two word senses are, based on the
        shortest path that connects the senses (as above) and the maximum depth
        of the taxonomy in which the senses occur. The relationship is given as
        -log(p/2d) where p is the shortest path length and d is the taxonomy
        depth.

        @type  other: L{Synset}
        @param other: The L{Synset} that this L{Synset} is being compared to.

        @return: A score denoting the similarity of the two L{Synset}s,
            normally greater than 0. None is returned if no connecting path
            could be found. If a L{Synset} is compared with itself, the
            maximum score is returned, which varies depending on the taxonomy
            depth.
        s&   Computing the lch similarity requires s*   %s and %s to have the same part of speech.i    i   g       @N(	   RG   R   R1   Rc   t   _compute_max_depthR   t   matht   logR8   (   R   R-   R   Ro   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   lch_similarityG  s    c         C   s   |  i  |  } t |  d j o t Sn | d } | i   d } | i t j o | d 7} n |  i |  } | i |  } | t j p | t j o t Sn | | 7} | | 7} d | | | S(   s  
        Wu-Palmer Similarity:
        Return a score denoting how similar two word senses are, based on the
        depth of the two senses in the taxonomy and that of their Least Common
        Subsumer (most specific ancestor node). Note that at this time the
        scores given do _not_ always agree with those given by Pedersen's Perl
        implementation of WordNet Similarity.

        The LCS does not necessarily feature in the shortest path connecting
        the two senses, as it is by definition the common ancestor deepest in
        the taxonomy, not closest to the two senses. Typically, however, it
        will so feature. Where multiple candidates for the LCS exist, that
        whose shortest path to the root node is the longest will be selected.
        Where the LCS has multiple paths to the root, the longer path is used
        for the purposes of the calculation.

        @type  other: L{Synset}
        @param other: The L{Synset} that this L{Synset} is being compared to.
        @return: A float score denoting the similarity of the two L{Synset}s,
            normally greater than zero. If no connecting path between the two
            senses can be found, None is returned.
        i    i   g       @(   R   Rr   R8   Rd   RG   t   NOUNR   (   R   R-   R   t	   subsumerst   subsumerRo   t   len1t   len2(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   wup_similarityj  s    


c         C   s   t  |  | |  \ } } } | S(   s  
        Resnik Similarity:
        Return a score denoting how similar two word senses are, based on the
        Information Content (IC) of the Least Common Subsumer (most specific
        ancestor node).

        @type  other: L{Synset}
        @param other: The L{Synset} that this L{Synset} is being compared to.
        @type  ic: C{dict}
        @param ic: an information content object (as returned by L{load_ic()}).
        @return: A float score denoting the similarity of the two L{Synset}s.
            Synsets whose LCS is the root node of the taxonomy will have a
            score of 0 (e.g. N['dog'][0] and N['table'][0]).
        (   t   _lcs_ic(   R   R-   t   icR   t   ic1t   ic2t   lcs_ic(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   res_similarity  s    c         C   s   |  | j o t  Sn t |  | |  \ } } } | d j p | d j o d Sn | | d | } | d j o t  Sn d | S(   s  
        Jiang-Conrath Similarity:
        Return a score denoting how similar two word senses are, based on the
        Information Content (IC) of the Least Common Subsumer (most specific
        ancestor node) and that of the two input Synsets. The relationship is
        given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).

        @type  other: L{Synset}
        @param other: The L{Synset} that this L{Synset} is being compared to.
        @type  ic: C{dict}
        @param ic: an information content object (as returned by L{load_ic()}).
        @return: A float score denoting the similarity of the two L{Synset}s.
        i    i   i   (   t   _INFR   (   R   R-   R   R   R   R   R   t   ic_difference(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   jcn_similarity  s    c         C   s+   t  |  | |  \ } } } d | | | S(   s  
        Lin Similarity:
        Return a score denoting how similar two word senses are, based on the
        Information Content (IC) of the Least Common Subsumer (most specific
        ancestor node) and that of the two input Synsets. The relationship is
        given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).

        @type  other: L{Synset}
        @param other: The L{Synset} that this L{Synset} is being compared to.
        @type  ic: C{dict}
        @param ic: an information content object (as returned by L{load_ic()}).
        @return: A float score denoting the similarity of the two L{Synset}s,
            in the range 0 to 1.
        g       @(   R   (   R   R-   R   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   lin_similarity  s    c         c   s   |  g } t    } x~ | ov x | D] } | i |  q# W| Vg  } | D]; } | i   | i   D] } | | j o | | qd qd qJ ~ } q Wd S(   s   
        @return: An iterator over L{Synset}s that are either proper
        hypernyms or instance of hypernyms of the synset.
        N(   RW   RZ   R   R   (   R   R_   R^   R3   RF   Ru   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR{     s    		
 
	c         C   s   d t  |   i |  i f S(   Ns   %s(%r)(   R>   R   R+   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR@     s    c         C   sG   |  i  i } |  i | } g  } | D] \ } } | | | |  q$ ~ S(   N(   R1   RA   RX   (   R   RD   RE   t   pointer_tuplesRF   RG   RH   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    (   R   R   R   R=   Rb   Rd   Rj   Rq   Rs   R~   R   R   R   R8   R   t   FalseR   R   R   R   R   R   R{   R@   R   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRQ      s*   9								3&#4 		t   WordNetCorpusReaderc           B   sv  e  Z d  Z e Z dO \ Z Z Z Z Z	 h  d e <d e <d e <d	 e	 <Z
 h  d
 e <d e	 <d e <d e <d e <Z e d   e i   D  Z dP Z d   Z d    Z d!   Z d"   Z d#   Z d$   Z d%   Z d&   Z d'   Z d(   Z e d)  Z e d*  Z e d+  Z e d,  Z e d-  Z d.   Z e  d/  Z! e" i! i e! _ e  d0  Z# e" i# i e# _ e  d1  Z$ e" i$ i e$ _ e  d2  Z% e" i% i e% _ e  d3  Z& e" i& i e& _ e  d4  Z' e" i' i e' _ e d5  Z( h  dQ dR dS dT dU dV dW dX dY g	 e <dZ d[ d\ d] d^ d_ d` da g e	 <db dc dd de g e <g  e <Z) dL   Z* e  dM dN  Z+ RS(f   sA   
    A corpus reader used to access wordnet or its variants.
    R   R   R   R	   R
   t   adjt   advt   nount   verbi   i   i   i   i   c         c   s&   x |  ] } | d  d  d  Vq Wd  S(   Ni(    (   Re   R?   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s    s   cntlist.revt   lexnamess   index.senses	   index.adjs	   index.advs
   index.nouns
   index.verbs   data.adjs   data.advs	   data.nouns	   data.verbs   adj.excs   adv.excs   noun.excs   verb.excc         C   s   t  i |  | |  i d |  i t t  |  _ t t  |  _ t t  |  _ h  |  _	 h  |  _
 g  |  _ d |  _ d |  _ xb t |  i d   D]K \ } } | i   \ } } } t |  | j p t  |  i i |  q W|  i   |  i   d S(   s_   
        Construct a new wordnet corpus reader, with the given root
        directory.
        t   encodingR   N(   R   R=   t   _FILESt	   _ENCODINGR   t   dictt   _lemma_pos_offset_mapt   _synset_offset_cacheRc   t   _data_file_mapt   _exception_mapt	   _lexnamesR8   t   _key_count_filet   _key_synset_filet	   enumeratet   opent   splitt   intt   AssertionErrorR[   t   _load_lemma_pos_offset_mapt   _load_exception_map(   R   t   roott   it   linet   indexRV   t   _(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR=     s"    
					 
c         C   s  x|  i  i   D]} xt |  i d |   D]\ } } | i d  o q0 n t | i    i } y |   } |   } t |    } | d j p t	  t |    } g  }	 t
 |  D] }
 |	 |   q ~	 }
 t |    } | | j p t	  t |    }
 g  } t
 |  D] }
 | t |    q~ } WnB t	 t f j
 o0 } d | | d | f } t d |   n X| |  i | | <| t j o | |  i | t <q0 q0 Wq Wd  S(   Ns   index.%st    i    i   s   file %s, line %i: %s(   t   _FILEMAPt   valuesR   R   t
   startswitht   iterR   t   nextR   R   t   xrangeR   R   R   t   ADJt   ADJ_SAT(   R   t   suffixR   R   R   t   lemmaRG   t	   n_synsetst
   n_pointersRF   R   t   n_sensest   _[2]Rp   t   eR?   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   F  s0      		*4c         C   s   xm |  i  i   D]\ \ } } h  |  i | <x@ |  i d |  D]+ } | i   } | d |  i | | d <q= Wq W|  i t |  i t <d  S(   Ns   %s.exci   i    (   R   t   itemsR   R   R   R   R   (   R   RG   R   R   t   terms(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   q  s      !c         C   se   d } xK |  i  |  D]: } y t | | i    } Wq t j
 o | GHq Xq W| |  i | <d S(   sy   
        Compute the max depth for the given part of speech.  This is
        used by the lch similarity metric.
        i    N(   t   all_synsetsRh   Rd   t   RuntimeErrorRc   (   R   RG   Ro   t   ii(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   z  s     c         C   sm   | i  d d  \ } } |  i |  } x) | i D] } | i | j o | Sq1 q1 Wt d | | f   d  S(   Nt   .i   s   no lemma %r in %r(   t   rsplitR3   RC   R+   R   (   R   R+   t   synset_namet
   lemma_nameR3   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    
 c         C   s  | i    } | i d  \ } } | i d  \ } } } } } |  i t |  } |  i d  j o |  i d  |  _ n t |  i |  }	 |	 p t d |   n t |	 i   d  }
 |  i	 | |
  } x) | i
 D] } | i | j o | Sq q Wt d |   d  S(   Nt   %t   :s   index.senses   No synset found for key %ri   s   No lemma found for for key %r(   t   lowerR   t
   _pos_namesR   R   R8   R   t   _binary_search_fileR   RA   RC   R9   (   R   R9   R   t	   lex_senset
   pos_numberR;   R<   R   RG   t   synset_lineRH   R3   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   lemma_from_key  s     
 c         C   sq  | i    i d d  \ } } } t |  d } y |  i | | | } Wn t j
 o# d } t | | | f   nv t j
 oi t |  i | |  } d } | d j o | | | d f }	 n | | | d f }	 t | |	   n X|  i | |  }
 | d j o* |
 i	 d	 j o d
 } t | |   n |
 i	 | j p$ | d	 j o |
 i	 d j p t
  |
 S(   NR   i   i   s"   no lemma %r with part of speech %rs.   lemma %r with part of speech %r has only %i %st   senset   sensesR   R   sI   adjective satellite requested but only plain adjective found for lemma %r(   R   R   R   R   t   KeyErrorR   t
   IndexErrorRr   RA   RG   R   (   R   R+   R   RG   t   synset_index_strt   synset_indexRH   t   messageR   R?   R3   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR3     s(    !4c         C   sf   | t  j o
 t } n |  i i |  t j o+ d |  i | } |  i |  |  i | <n |  i | S(   se   
        Return an open file pointer for the data file for the given
        part of speech.
        s   data.%s(   R   R   R   t   getR8   R   R   (   R   RG   t   fileid(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt
   _data_file  s    
c         C   s   | |  i  | j o |  i  | | Sn |  i |  } | i |  | i   } |  i | |  } | i | j p t  | |  i  | | <| S(   N(   R   R   t   seekt   readlinet   _synset_from_pos_and_lineRH   R   (   R   RG   RH   t	   data_filet   data_file_lineR3   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRA     s    c   %      C   s  t  |   } y| i d  \ } } | i   } g  } x] | i d  D]L } | i   } | i d  o | i i | i d   qF | i |  qF Wd i |  | _ t | i    i	 } t
 |    | _ t
 |    }	 |  i |	 | _ |   | _ t
 |   d  }
 x t |
  D] } |   } t
 |   d  } t i d |  } | i   \ } } t |  | | |	 | |  } | i i |  | i i | i  qWt
 |    } x t |  D] } |   } t
 |    } |   } |   } | d j o | i | i | | f  qt
 | d  d  d	 } t
 | d d  d	 } | i | i } | i } | | | f } | i | | | f  qWy t
 |    } Wn t j
 o n Xx t |  D] } |   d
 j p t  t
 |    } t | } t
 |   d  } | d j oO | i i |  xt | i D]- } | i i |  | i i | | i  q-Wq| i | d	 } | i i |  | i i | | i  qWWn+ t j
 o } t  d | | f   n Xx | i D] } | i t! j o1 | i"   d i d } | i }  d | i# }! n d }  }! | i t$ i% | i | i& | i# |  |! f }" d |" i'   | _( qW| i d i i'   } |  i) | | i }# |# i* | i  }$ | | i |$ d	 f }" d |" | _ | S(   Nt   |t   ;t   "s   ; i   s   (.*?)(\(.*\))?$t   0000i   i   RN   i    s   line %r: %ss   %02dt    s   %s%%%d:%02d:%02d:%s:%ss
   %s.%s.%02i(+   RQ   R   t   stripR   RU   R[   t   joinRT   R   R   R   RH   R   RV   RG   R   t   ret   matcht   groupsR0   RC   RR   R+   RX   RZ   RB   t   StopIterationR   t   VERB_FRAME_STRINGSR5   R4   R   R   R   R)   R7   R   t   _pos_numbersR6   R   R9   R   R   (%   R   RG   R   R3   t   columns_strt   glosst   definitionst
   gloss_partR   R;   t   n_lemmasR   R   R<   t   mt   syn_markR   R   t   symbolRH   t   lemma_ids_strt   source_indext   target_indext   source_lemma_namet   lemma_pointerst   tupst   frame_countt   frame_numbert   frame_string_fmtt   lemma_numberR   t
   head_lemmat	   head_namet   head_idR?   t   offsetst   sense_index(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s      	 				 

 
 	
c         C   s   | i    } |  i } |  i } | t j o
 t } n g  } | D]? } | |  i | |  i | g   D] } | | | |  qf q@ ~ S(   s   Load all synsets with a given lemma and part of speech tag.
        If no pos is specified, all synsets for all parts of speech
        will be loaded.
        (   R   RA   R   R8   t   POS_LISTt   morphyR   (   R   R   RG   RE   R   RF   t   pRH   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   a  s    		

c         C   sO   g  } |  i  | |  D]1 } | i D]! } | i | j o | | q$ q$ q ~ S(   N(   R   RC   R+   (   R   R   RG   RF   R3   t	   lemma_obj(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRC   q  s    c         C   s+   g  } |  i  |  D] } | | i q ~ S(   N(   RC   R+   (   R   RG   RF   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   wordsw  s    c            s<    d j o t   i  Sn    f d     i D Sd S(   s   Return all lemma names for all synsets for the given
        part of speech tag. If not pos is specified, all synsets
        for all parts of speech will be used.
        c         3   s1   x* |  ]# }    i  | j o	 | Vq q Wd  S(   N(   R   (   Re   R   (   R   RG   (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s   	N(   R8   R   R   (   R   RG   (    (   R   RG   s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   all_lemma_namesz  s    c         c   so  | d j o |  i i   } n
 | g } |  i } |  i } x-| D]%} | t j o
 t } n d |  i | } |  i |  } y | i   } | i	   }	 x |	 o |	 d i
   ps | | | j o | | | }
 n | | |	  }
 |
 | | | <| t j o |
 i | j o	 |
 Vq$q(|
 Vn | i   } | i	   }	 q WWn | i     qB X| i   qB Wd S(   s   Iterate over all synsets with a given part of speech tag.
        If no pos is specified, all synsets for all parts of speech
        will be loaded.
        s   data.%si    N(   R8   R   R   R   R   R   R   R   t   tellR   t   isspaceRG   t   close(   R   RG   t   pos_tagst   cachet   from_pos_and_linet   pos_tagR   R   RH   R   R3   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s<    			 

	
c         C   sh   |  i  d j o |  i d  |  _  n t |  i  | i  } | o t | i d d  d  Sn d Sd S(   s)   Return the frequency count for this Lemmas   cntlist.revR   i   iN(   R   R8   R   R   R9   R   R   (   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRJ     s    c         C   s   | i  | |  S(   N(   R   (   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   | i  | |  S(   N(   R   (   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   | i  | |  S(   N(   R   (   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   | i  | | |  S(   N(   R   (   R   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   | i  | | |  S(   N(   R   (   R   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   | i  | | |  S(   N(   R   (   R   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c            s   | d j o, |  i  t    f d   t D  } n |  i   |  } t t | d   } t |  d j o | d Sn d Sd S(   s  
        Find a possible base form for the given form, with the given
        part of speech, by checking WordNet's list of exceptional
        forms, and by recursively stripping affixes for this part of
        speech until a form in WordNet is found.

        >>> from nltk.corpus import wordnet as wn
        >>> wn.morphy('dogs')
        'dog'
        >>> wn.morphy('churches')
        'church'
        >>> wn.morphy('aardwolves')
        'aardwolf'
        >>> wn.morphy('abaci')
        'abacus'
        >>> wn.morphy('hardrock', ADV)
        >>> wn.morphy('book', wn.NOUN)
        'book'
        >>> wn.morphy('book', wn.ADJ)
        c         3   s3   x, |  ]% } x    |  D] } | Vq Wq Wd  S(   N(    (   Re   R%  R   (   t   formR$  (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s    i   i    N(   R8   t   _morphyR   R#  R|   R    Rr   (   R   R0  RG   t   analysest   first(    (   R0  R$  s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR$    s    	#R  t   sest   vest   ft   xesR   t   zest   zt   chest   cht   shest   sht   ment   mant   iest   yt   esR   t   edt   ingt   ert   estc         #   s     i   }   i        f d    | | j o x | | D] } | VqJ Wn  t j o$ | i d  o d } | d  } n d } x  |  D] } | | Vq Wd  S(   Nc         3   s   |    i  j o    i  |  j o	 |  Vn xS  D]K \ } } |  i |  o/ x,  |  t |   |  D] } | Vql Wq4 q4 Wd  S(   N(   R   t   endswithRr   (   R0  t   oldt   newR6  (   R   RG   t   substitutionst   try_substitutions(    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRK    s    	  t   fuliR  (   R   t   MORPHOLOGICAL_SUBSTITUTIONSR   RG  (   R   R0  RG   t
   exceptionsR6  R   (    (   R   RG   RJ  RK  s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR1    s    
  g      ?c         C   s  t    } x! | i   D] } | i |  q Wh  } x t D] } t t  | | <q: W| d j oL xI |  i   D]7 } | i }	 |	 t j o
 t	 }	 n | | |	 | i
 <qn Wn x | D] } |  i |  }
 t |
  d j o q n t | |  } | p | t t |
   :} n x |
 D]y } | i }	 |	 t j o
 t	 }	 n x< | i   D]. } x% | D] } | |	 | i
 c | 7<q[WqNW| |	 d c | 7<qWq W| S(   s  
        Creates an information content lookup dictionary from a corpus.

        @type corpus: L{CorpusReader}
        @param corpus: The corpus from which we create an information
        content dictionary.
        @type weight_senses_equally: L{bool}
        @param weight_senses_equally: If this is True, gives all
        possible senses equal weight rather than dividing by the
        number of possible senses.  (If a word has 3 synses, each
        sense gets 0.3333 per appearance when this is False, 1.0 when
        it is true.)
        @param smoothing: How much do we smooth synset counts (default is 1.0)
        @type smoothing: L{float}
        @return: An information content dictionary
        g        i    (   R   R'  t   incR#  R   t   floatR   RG   R   R   RH   R   Rr   R{   (   R   t   corpust   weight_senses_equallyt	   smoothingt   countst   wwR   t   ppt   ssRG   t   possible_synsetst   weightt   levelt   hh(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   6  sF    	   	
  	
  (   R   R   R   R	   R
   (   s   cntlist.revs   lexnamess   index.senses	   index.adjs	   index.advs
   index.nouns
   index.verbs   data.adjs   data.advs	   data.nouns	   data.verbs   adj.excs   adv.excs   noun.excs   verb.exc(   R   R  (   R4  R   (   R5  R6  (   R7  R   (   R8  R9  (   R:  s   ch(   R<  s   sh(   R>  s   man(   R@  RA  (   R   R  (   R@  RA  (   s   esR   (   s   esR  (   s   edR   (   s   edR  (   RD  R   (   RD  R  (   RE  R  (   RF  R  (   RE  R   (   RF  R   (,   R   R   R   R8   R   R   R   t   ADVR   t   VERBR   R  R   R   R   R   R=   R   R   R   R   R   R3   R   RA   R   R   RC   R'  R(  R   RJ   R   R   RQ   R   R   R   R   R   R$  RM  R1  R   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     sb   *3   	)	+						!			y>	#		t   WordNetICCorpusReaderc           B   s    e  Z d  Z d   Z d   Z RS(   sE   
    A corpus reader for the WordNet information content corpus.
    c         C   s   t  i |  | |  d  S(   N(   R   R=   (   R   R   t   fileids(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR=   v  s    c   	      C   s   h  } t  t  | t <t  t  | t <x t |  i |   D] \ } } | d j o q< n | i   } t | d d   } t | d  } t | d  } t	 |  d j o) | d d j o | | d c | 7<n | d j o | | | | <q< q< W| S(   s  
        Load an information content file from the wordnet_ic corpus
        and return a dictionary.  This dictionary has just two keys,
        NOUN and VERB, whose values are dictionaries that map from
        synsets to information content values.

        @type  icfile: L{str}
        @param icfile: The name of the wordnet_ic file (e.g. "ic-brown.dat")
        @return: An information content dictionary
        i    ii   i   i   t   ROOT(
   R   RP  R   R]  R   R   R   R   t   _get_posRr   (	   R   t   icfileR   t   numR   t   fieldsRH   R   RG   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR   }  s      $(   R   R   R   R=   R   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR^  q  s   	c         C   s   |  i  | |  S(   N(   R   (   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   |  i  | |  S(   N(   R   (   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   |  i  | |  S(   N(   R   (   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   |  i  | |  S(   N(   R   (   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   |  i  | |  S(   N(   R   (   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c         C   s   |  i  | |  S(   N(   R   (   R   R   R   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    c      	   C   s}  t  } d } |  i |  } | o d G| GHn t   } d   } xF | D]> } x5 | D]- }	 |	 | i |  j o | i |	  qU qU WqH W| o d G| GHn g  }
 | D] } | | j o |
 | q q ~
 } | o d G| GHn x~ | D]v } | i   } d } x> | D]6 } | d j  p t |  | j  o t |  } q
q
W| | j o | } | } q q W| o d G| GHn | S(   s  
    Finds the least common subsumer of two synsets in a WordNet taxonomy,
    where the least common subsumer is defined as the ancestor node common
    to both input synsets whose shortest path to the root node is the longest.

    @type  synset1: L{Synset}
    @param synset1: First input synset.
    @type  synset2: L{Synset}
    @param synset2: Second input synset.
    @return: The ancestor synset common to both input synsets which is also the
    LCS.
    is   > Subsumers1:c         S   s
   |  i    S(    (   R   (   R   (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   <lambda>  s    s   > Eliminated:s   > Subsumers2:i    s   > LCS Subsumer by depth:(   R8   R~   RW   Rq   RZ   Rs   Rr   (   R   R   R   R   t   max_min_path_lengthR   t
   eliminatedt   hypernym_relationt   s1t   s2RF   R   t	   candidatet   paths_to_roott   min_path_lengtht   path(    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   _lcs_by_depth  s@    		  2   c            s   |  i  | i  j o t d d |  | f   n t |     } t |    } |  i |  } t |  d j o
 d } n t   f d   | D  } | o d G| GHn | | | f S(   s  
    Get the information content of the least common subsumer that has
    the highest information content value.  If two nodes have no
    explicit common subsumer, assume that they share an artificial
    root node that is the hypernym of all explicit roots.

    @type  synset1: L{Synset}
    @param synset1: First input synset.
    @type  synset2: L{Synset}
    @param synset2: Second input synset.  Must be the same part of
    speech as the first synset.
    @type  ic: C{dict}
    @param ic: an information content object (as returned by L{load_ic()}).
    @return: The information content of the two synsets and their most
    informative subsumer
    s-   Computing the least common subsumer requires s*   %s and %s to have the same part of speech.i    c         3   s"   x |  ] } t  |    Vq Wd  S(   N(   t   information_content(   Re   R   (   R   (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys	   <genexpr>  s    s   > LCS Subsumer by content:(   RG   R   Rp  R~   Rr   Rh   (   R   R   R   R   R   R   R   t   subsumer_ic(    (   R   s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyR     s    
c         C   s|   y | |  i  } Wn, t j
 o  d } t | |  i    n X| |  i } | d j o t Sn t i | | d  Sd  S(   Ns>   Information content file has no entries for part-of-speech: %si    (   RG   R   R   RH   R   R   R   (   R3   R   t   icpost   msgRT  (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRp  #  s    c         C   sH   |  d d j o t  Sn, |  d d j o t Sn d } t |   d  S(   NiR	   R
   s?   Unidentified part of speech in WordNet Information Content file(   R   R]  R   (   t   fieldRs  (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyRa  4  s    c             s  d d  k  }  d GHt |  i i d   } d GH| i   | i } d GH  d  } | i G| i G| i GH| i	 GH| i
 GH| i GHd g } d d	 d
 d g }   f d   } | |  } | |  } t | |  }	 | GH| GHd GH  d  i   GH  d  i   GH  d  i   GH| d  i   GH| d  i   GH| d  i   GH| d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH  d  i   GH| d   i   GH| d!  i   GH  d"  i   GH  d#  i   GH  d$  i   GH  d#  i   GH| d%  i   GH  d&  i   GH  d'  i   GH  d(  i   GH  d&  i   d   GH  d  i   d)   GH  d  i    d)   GH  d  i!   d)   GHt" |  i i d*  d+  }
 |
 i# d,  }   d  i$   d)  |  GH|
 i# d-  }   d  i%   d)  |  GHd  S(.   Nis   loading wordnets   corpora/wordnets   done loadings   getting a synset for gos   go.v.21s   zap.n.01s   zap.v.01s   zap.v.02s	   nuke.v.01s   microwave.v.01c            s%   g  } |  D] } |   |  q ~ S(   N(    (   t   synset_stringsRF   R3   (   t   S(    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   _get_synsetsT  s    s   Navigations:s   travel.v.01s   travel.v.02s   travel.v.03s   zap.v.03.nukes   zap.v.03.atomizes   zap.v.03.atomises   zap.v.03.zaps   dog.n.01s   breakfast.n.1s   meal.n.1s
   Austen.n.1s   composer.n.1s   faculty.n.2s   copilot.n.1s	   table.n.2s
   course.n.7s	   water.n.1s   gin.n.1s   leader.n.1.leaders   increase.v.1.increases	   snore.v.1s	   heavy.a.1s	   light.a.1s   English.a.1.Englishs   person.n.01s	   sail.v.01s	   fall.v.12s   cat.n.01s   corpora/wordnet_ics   .*\.dats   ic-brown.dats   ic-semcor.dat(&   t   nltkR   t   datat   findR3   R   R+   RG   RV   RR   RT   RU   RW   R   RO   R   R   R   R   R   R   R   R   R   RM   R!   R)   R   RP   Rb   R   R   R   R   R^  R   R   R   (   Rx  t   wnt   Lt   move_synsett   zap_nt   zap_vRw  t   zap_n_synsetst   zap_v_synsetst   zap_synsetst   wnicR   (    (   Rv  s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pyt   demoB  sx    				t   __main__(-   R   R  t	   itertoolsR    R   t   nltk.compatR   t   nltk.corpus.readerR   Rm   R   R   t   nltk.probabilityR   R   R   R   R\  R   R]  R#  R8   R
  t	   ExceptionR   t   objectR   R0   RQ   R   R^  R   R   R   R   R   R   R   R   Ro  R   Rp  Ra  R  R   (    (    (    s0   /p/zhu/06/nlp/nltk/nltk/corpus/reader/wordnet.pys   <module>
   s   $	=Z    q3;&			S