³ò
3ÒÇIc           @   sL  d  d k  l Z d  d k Z d  d k l Z l Z d  d k l Z d  d k l	 Z	 d  d k
 l Z l Z l Z d  d k l Z d  d k l Z d  d	 k l Z l Z d
 e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d „  Z e d j o e ƒ  n d
 d d d d g Z d S(   iÿÿÿÿ(   t   logN(   t   FreqDistt   LidstoneProbDist(   t   ConditionalFreqDist(   t   defaultdict(   t   ngramst	   tokenwrapt   LazyConcatenation(   t
   NgramModel(   t	   f_measure(   t   BigramCollocationFindert   bigram_measurest   ContextIndexc           B   s\   e  Z d  Z e d „  ƒ Z e e d „  d „ Z d „  Z d „  Z d d „ Z	 e
 d „ Z RS(	   s  
    A bidrectional index between words and their 'contexts' in a text.
    The context of a word is usually defined to be the words that occur
    in a fixed window around the word; but other definitions may also
    be used by providing a custom context function.
    c         C   sj   | d j o
 d } n |  | d i  ƒ  } | t |  ƒ d j o
 d } n |  | d i  ƒ  } | | f S(   s;   One left token and one right token, normalized to lowercasei    s   *START*i   s   *END*(   t   lowert   len(   t   tokenst   it   leftt   right(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   _default_context   s
    !c         C   s   |  S(    (    (   t   x(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   <lambda>$   s    c            s±   | ˆ  _  | p ˆ  i ˆ  _ n | o6 g  } ˆ D] } | | ƒ o | | q2 q2 ~ ‰ n t ‡  ‡ f d †  t ˆ ƒ Dƒ ƒ ˆ  _ t ‡  ‡ f d †  t ˆ ƒ Dƒ ƒ ˆ  _ d  S(   Nc         3   s:   x3 |  ], \ } } ˆ  i  | ƒ ˆ  i ˆ | ƒ f Vq Wd  S(   N(   t   _keyt   _context_func(   t   .0R   t   w(   t   selfR   (    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>*   s   	c         3   s:   x3 |  ], \ } } ˆ  i  ˆ | ƒ ˆ  i | ƒ f Vq Wd  S(   N(   R   R   (   R   R   R   (   R   R   (    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>,   s   	(   R   R   R   t   CFDt	   enumeratet   _word_to_contextst   _context_to_words(   R   R   t   context_funct   filtert   keyt   _[1]t   t(    (   R   R   s   /p/zhu/06/nlp/nltk/nltk/text.pyt   __init__$   s    	6c         C   s   |  i  S(   s€   
        @rtype: C{list} of token
        @return: The document that this context index was
            created from.  
        (   t   _tokens(   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR   /   s    c         C   se   |  i  | ƒ } t |  i | ƒ } h  } x6 |  i i ƒ  D]% \ } } t | t | ƒ ƒ | | <q8 W| S(   s¢   
        Return a dictionary mapping from words to 'similarity scores,'
        indicating how often these two words occur in the same
        context.  
        (   R   t   setR   t   itemsR	   (   R   t   wordt   word_contextst   scoresR   t
   w_contexts(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   word_similarity_dict7   s     i   c         C   s¾   t  t ƒ } x˜ |  i |  i | ƒ D]€ } xw |  i | D]h } | | j oU | G| G|  i | | G|  i | | GH| | c |  i | | |  i | | 7<q7 q7 Wq# Wt | d | i ƒ|  S(   NR!   (   R   t   intR   R   R   t   sortedt   get(   R   R(   t   nR*   t   cR   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   similar_wordsF   s      '6c      
      s  g  } | D] } | ˆ  i  | ƒ q ~ } g  } | D] } | t ˆ  i | ƒ q5 ~ } g  } t t | ƒ ƒ D]  } | | p | | | qo qo ~ }	 t t i | ƒ ‰ |	 o# | o t d d i | ƒ ƒ ‚ n6 ˆ p t	 ƒ  Sn$ t	 ‡  ‡ f d †  | Dƒ ƒ }
 |
 Sd S(   s³  
        Find contexts where the specified words can all appear; and
        return a frequency distribution mapping each context to the
        number of times that context was used.
        
        @param words: The words used to seed the similarity search
        @type words: C{str} 
        @param fail_on_unknown: If true, then raise a value error if
            any of the given words do not occur at all in the index.
        s%   The following word(s) were not found:t    c         3   sB   x; |  ]4 } x+ ˆ  i  | D] } | ˆ j o	 | Vq q Wq Wd  S(   N(   R   (   R   R   R1   (   R   t   common(    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>e   s   N(
   R   R&   R   t   rangeR   t   reducet   intersectiont
   ValueErrort   joinR   (   R   t   wordst   fail_on_unknownR"   R   t   _[2]t   contextst   _[3]R   t   emptyt   fd(    (   R   R4   s   /p/zhu/06/nlp/nltk/nltk/text.pyt   common_contextsO   s    *.@(   t   __name__t
   __module__t   __doc__t   staticmethodR   t   NoneR$   R   R,   R2   t   FalseRA   (    (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR      s   				t   ConcordanceIndexc           B   sG   e  Z d  Z d „  d „ Z d „  Z d „  Z d „  Z d d d „ Z RS(	   ss   
    An index that can be used to look up the offset locations at which
    a given word occurs in a document.
    c         C   s   |  S(    (    (   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR   o   s    c         C   se   | |  _  | |  _ t t ƒ |  _ x= t | ƒ D]/ \ } } |  i | ƒ } |  i | i | ƒ q. Wd S(   sÞ  
        Construct a new concordance index.

        @param tokens: The document (list of tokens) that this
            concordance index was created from.  This list can be used
            to access the context of a given word occurance.
        @param key: A function that maps each token to a normalized
            version that will be used as a key in the index.  E.g., if
            you use C{key=str.lower}, then the index will be
            case-insensitive.
        N(   R%   R   R   t   listt   _offsetsR   t   append(   R   R   R!   t   indexR(   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR$   o   s    		 c         C   s   |  i  S(   s„   
        @rtype: C{list} of token
        @return: The document that this concordance index was
            created from.  
        (   R%   (   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR   ‹   s    c         C   s   |  i  | ƒ } |  i | S(   sì   
        @rtype: C{list} of C{int}
        @return: A list of the offset positions at which the given
            word occurs.  If a key function was specified for the
            index, then given word's key will be looked up.
        (   R   RJ   (   R   R(   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   offsets“   s    c         C   s    d t  |  i ƒ t  |  i ƒ f S(   Ns+   <ConcordanceIndex for %d tokens (%d types)>(   R   R%   RJ   (   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   __repr__   s    iK   i   c   
      C   s  | t  | ƒ d d } | d } |  i | ƒ } | oÃ t | t  | ƒ ƒ } d | t  | ƒ f GHx› | D]Š } d | d i |  i | | | !ƒ } d i |  i | d | | !ƒ }	 | | } |	 |  }	 | G| G|	 GH| d 8} | d j  o Pqi qi Wn d GHd S(	   s`  
        Print a concordance for C{word} with the specified context window.
        
        @param word: The target word
        @type word: C{str}
        @param width: The width of each line, in characters (default=80)
        @type width: C{int}
        @param lines: The number of lines to display (default=25)
        @type lines: C{int}
        i   i   s   Displaying %s of %s matches:R3   i   i    s
   No matchesN(   R   RM   t   minR9   R%   (
   R   R(   t   widtht   linest
   half_widtht   contextRM   R   R   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   print_concordance¡   s$    
 !

(   RB   RC   RD   R$   R   RM   RN   RT   (    (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRH   j   s   		
	t   TokenSearcherc           B   s    e  Z d  Z d „  Z d „  Z RS(   sÝ  
    A class that makes it easier to use regular expressions to search
    over tokenized strings.  The tokenized string is converted to a
    string where tokens are marked with angle brackets -- e.g.,
    C{'<the><window><is><still><open>'}.  The regular expression
    passed to the L{findall()} method is modified to treat angle
    brackets as nongrouping parentheses, in addition to matching the
    token boundaries; and to have C{'.'} not match the angle brackets.
    c         C   s    d i  d „  | Dƒ ƒ |  _ d  S(   Nt    c         s   s!   x |  ] } d  | d Vq Wd S(   t   <t   >N(    (   R   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>Ë   s    (   R9   t   _raw(   R   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR$   Ê   s    c         C   sà   t  i d d | ƒ } t  i d d | ƒ } t  i d d | ƒ } t  i d d | ƒ } t  i | |  i ƒ } x? | D]7 } | i d ƒ o  | i d ƒ o t d	 ƒ ‚ qp qp Wg  } | D] } | | d
 d !i d ƒ q¶ ~ } | S(   s¯  
        Find instances of the regular expression in the text.
        The text is a list of tokens, and a regexp pattern to match
        a single token must be surrounded by angle brackets.  E.g.
        
        >>> ts.findall("<.*><.*><bro>")
        ['you rule bro', ['telling you bro; u twizted bro
        >>> ts.findall("<a>(<.*>)<man>")
        monied; nervous; dangerous; white; white; white; pious; queer; good;
        mature; white; Cape; great; wise; wise; butterless; white; fiendish;
        pale; furious; better; certain; complete; dismasted; younger; brave;
        brave; brave; brave
        >>> text9.findall("<th.*>{3,}")
        thread through those; the thought that; that the thing; the thing
        that; that that thing; through these than through; them that the;
        through the thick; them that they; thought that the
        
        @param regexp: A regular expression
        @type regexp: C{str}
        s   \sRV   RW   s   (?:<(?:RX   s   )>)s	   (?<!\\)\.s   [^>]s$   Bad regexp for TokenSearcher.findalli   iÿÿÿÿs   ><(   t   ret   subt   findallRY   t
   startswitht   endswithR8   t   split(   R   t   regexpt   hitst   hR"   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR\   Í   s     !1(   RB   RC   RD   R$   R\   (    (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRU   À   s   		t   Textc           B   sÚ   e  Z d  Z e Z e d „ Z d „  Z d „  Z d d d „ Z	 d d „ Z
 d	 „  Z d
 „  Z d „  Z d d „ Z d „  Z d d „ Z d d „ Z d „  Z d „  Z d „  Z d „  Z e i d ƒ Z d „  Z d „  Z RS(   s  
    A wrapper around a sequence of simple (string) tokens, which is
    intened to support initial exploration of texts (via the
    interactive console).  Its methods perform a variety of analyses
    on the text's contexts (e.g., counting, concordancing, collocation
    discovery), and display the results.  If you wish to write a
    program which makes use of these analyses, then you should bypass
    the C{Text} class, and use the appropriate analysis function or
    class directly instead.

    C{Text}s are typically initialized from a given document or
    corpus.  E.g.:
    
    >>> moby = Text(nltk.corpus.gutenberg.words('melville-moby_dick.txt'))
    c         C   s¨   |  i  o t | ƒ } n | |  _ | o | |  _ nn d | d  j o9 | d  i d ƒ } d i t t | d | !ƒ ƒ |  _ n$ d i t t | d  ƒ ƒ d |  _ d S(   s„   
        Create a Text object.
        
        @param tokens: The source text.
        @type tokens: C{sequence} of C{str}
        t   ]i   R3   i   i   s   ...N(   t   _COPY_TOKENSRI   R   t   nameRL   R9   t   mapt   str(   R   R   Rf   t   end(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR$   
  s    
	&c         C   s7   t  | t ƒ o |  i | i | i !Sn |  i | Sd  S(   N(   t
   isinstancet   sliceR   t   startt   stop(   R   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   __getitem__!  s    c         C   s   t  |  i ƒ S(   N(   R   R   (   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   __len__'  s    iO   i   c         C   sN   d |  i  j o$ d GHt |  i d t i ƒ|  _ n |  i i | | | ƒ d S(   sz   
        Print a concordance for C{word} with the specified context window.
        @seealso: L{ConcordanceIndex}
        t   _concordance_indexs   Building index...R!   N(   t   __dict__RH   R   Rh   R   Rp   RT   (   R   R(   RP   RQ   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   concordance.  s
    i   c            sÌ   d |  i  j ou d GHd d k l } | i d ƒ ‰  t i |  i ƒ } | i d ƒ | i ‡  f d †  ƒ | i	 t
 i | ƒ |  _ n g  } |  i D] \ } } | | d | q“ ~ } t | d	 d
 ƒGHd S(   sb   
        Print collocations derived from the text.
        @seealso: L{find_collocaitons}
        t   _collocationss   Building collocations listiÿÿÿÿ(   t	   stopwordst   englishi   c            s#   t  |  ƒ d  j  p |  i ƒ  ˆ  j S(   i   (   R   R   (   R   (   t   ignored_words(    s   /p/zhu/06/nlp/nltk/nltk/text.pyR   E  s    R3   t	   separators   ; N(   Rq   t   nltk.corpusRt   R:   R
   t
   from_wordsR   t   apply_freq_filtert   apply_word_filtert   nbestR   t   likelihood_ratioRs   R   (   R   t   numRt   t   finderR"   t   w1t   w2t   colloc_strings(    (   Rv   s   /p/zhu/06/nlp/nltk/nltk/text.pyt   collocations:  s    2c         C   s   |  i  i | ƒ S(   sJ   
        Count the number of times this word appears in the text.
        (   R   t   count(   R   R(   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR„   J  s    c         C   s   |  i  i | ƒ S(   sQ   
        Find the index of the first occurrence of the word in the text.
        (   R   RL   (   R   R(   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRL   P  s    c         C   s
   t  ‚ d  S(   N(   t   NotImplementedError(   R   t   method(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   readabilityV  s    id   c         C   sX   d |  i  j o' d GHd „  } t d |  | ƒ |  _ n |  i i | ƒ } t | ƒ GHd S(   sØ   
        Print random text, generated using a trigram language model.
        
        @param length: The length of text to generate (default=100)
        @type length: C{int}
        @seealso: L{NgramModel}
        t   _trigram_models   Building ngram index...c         S   s   t  |  d  ƒ S(   gš™™™™™É?(   R   (   t   fdistt   bins(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR   d  s    i   N(   Rq   R   Rˆ   t   generateR   (   R   t   lengtht	   estimatort   text(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR‹   Z  s    	c         C   s?   d |  i  j o d GHt |  i ƒ |  _ n |  i i | ƒ d S(   s   
        Search for instances of the regular expression pattern in the text.
        
        @seealso: L{TokenSearcher}
        t   _token_searchers   Loading data...N(   Rq   RU   R   R   R\   (   R   t   pattern(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   searchi  s    c            sÈ   d |  i  j o- d GHt |  i d d „  d t i ƒ|  _ n ˆ  i ƒ  ‰  |  i i ‰ ˆ  ˆ i ƒ  j oW t ˆ ˆ  ƒ ‰ t	 ‡  ‡ ‡ f d †  ˆ i ƒ  Dƒ ƒ } | i
 ƒ  |  } t | ƒ GHn d GHd S(	   s  
        Distributional similarity: find other words which appear in the
        same contexts as the specified word; list most similar words first.
        
        @param word: The word used to seed the similarity search
        @type word: C{str} 
        @param num: The number of words to generate (default=20)
        @type num: C{int}
        @seealso: L{ContextIndex.similar_words()}
        t   _word_context_indexs   Building word-context index...R    c         S   s
   |  i  ƒ  S(    (   t   isalpha(   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR   ‚  s    R!   c         3   sM   xF |  ]? } x6 ˆ | D]* } | ˆ j o | ˆ  j o	 | Vq q Wq Wd  S(   N(    (   R   R   R1   (   R(   R=   t   wci(    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>Š  s   s
   No matchesN(   Rq   R   R   Rh   R   R’   R   t
   conditionsR&   R   t   keysR   (   R   R(   R~   R@   R:   (    (   R(   R=   R”   s   /p/zhu/06/nlp/nltk/nltk/text.pyt   similaru  s    ((c         C   s£   d |  i  j o$ d GHt |  i d t i ƒ|  _ n yN |  i i | t ƒ } | p	 d GHn& | i ƒ  |  } t	 d „  | Dƒ ƒ GHWn t
 j
 o } | GHn Xd S(   sh  
        Find contexts where the specified words appear; list
        most frequent common contexts first.
        
        @param word: The word used to seed the similarity search
        @type word: C{str} 
        @param num: The number of words to generate (default=20)
        @type num: C{int}
        @seealso: L{ContextIndex.common_contexts()}
        R’   s   Building word-context index...R!   s   No common contexts were foundc         s   s'   x  |  ] \ } } | d  | Vq Wd S(   t   _N(    (   R   R€   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>§  s    N(   Rq   R   R   Rh   R   R’   RA   t   TrueR–   R   R8   (   R   R:   R~   R@   t   ranked_contextst   e(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRA   ’  s    	c         C   s!   d d k  l } | |  | ƒ d S(   s  
        Produce a plot showing the distribution of the words through the text.
        Requires pylab to be installed.
        
        @param words: The words to be plotted
        @type word: C{str}
        @seealso: L{nltk.draw.dispersion_plot()}
        iÿÿÿÿ(   t   dispersion_plotN(   t	   nltk.drawRœ   (   R   R:   Rœ   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRœ   ¬  s    	c         G   s   |  i  ƒ  i | Œ  d S(   sf   
        See documentation for FreqDist.plot()
        @seealso: L{nltk.prob.FreqDist.plot()}
        N(   t   vocabt   plot(   R   t   args(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRŸ   ¸  s    c         C   s/   d |  i  j o d GHt |  ƒ |  _ n |  i S(   s1   
        @seealso: L{nltk.prob.FreqDist}
        t   _vocabs   Building vocabulary index...(   Rq   R   R¡   (   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRž   ¿  s    c         C   sq   d |  i  j o t |  ƒ |  _ n |  i i | ƒ } g  } | D] } | d i | ƒ q@ ~ } t | d ƒ GHd S(   s°  
        Find instances of the regular expression in the text.
        The text is a list of tokens, and a regexp pattern to match
        a single token must be surrounded by angle brackets.  E.g.
        
        >>> text5.findall("<.*><.*><bro>")
        you rule bro; telling you bro; u twizted bro
        >>> text1.findall("<a>(<.*>)<man>")
        monied; nervous; dangerous; white; white; white; pious; queer; good;
        mature; white; Cape; great; wise; wise; butterless; white; fiendish;
        pale; furious; better; certain; complete; dismasted; younger; brave;
        brave; brave; brave
        >>> text9.findall("<th.*>{3,}")
        thread through those; the thought that; that the thing; the thing
        that; that that thing; through these than through; them that the;
        through the thick; them that they; thought that the
        
        @param regexp: A regular expression
        @type regexp: C{str}
        R   R3   s   ; N(   Rq   RU   R   R\   R9   R   (   R   R`   Ra   R"   Rb   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR\   È  s
    *s   \w+|[\.\!\?]c         C   sÚ   | d } x4 | d j o& |  i  i | | ƒ o | d } q W| d j o
 d } n | | } | d } x: | t | ƒ j  o& |  i  i | | ƒ o | d } qo W| t | ƒ j o
 d } n | | } | | f S(   sÕ   
        One left & one right token, both case-normalied.  Skip over
        non-sentence-final punctuation.  Used by the L{ContextIndex}
        that is created for L{similar()} and L{common_contexts()}.
        i   i    s   *START*s   *END*(   t   _CONTEXT_REt   matchR   (   R   R   R   t   jR   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   _contextê  s    
(

.
c         C   s   d |  i  S(   s^   
        @return: A string representation of this C{FreqDist}.
        @rtype: string
        s
   <Text: %s>(   Rf   (   R   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRN     s    (   RB   RC   RD   R™   Re   RF   R$   Rn   Ro   Rr   Rƒ   R„   RL   R‡   R‹   R‘   R—   RA   Rœ   RŸ   Rž   R\   RZ   t   compileR¢   R¥   RN   (    (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyRc   ô   s*   											!	t   TextCollectionc           B   s;   e  Z d  Z e d „ Z e d „ Z e d „ Z d „  Z RS(   sÉ  A collection of texts, which can be loaded with list of texts, or
    with a corpus consisting of one or more texts, and which supports
    counting, concordancing, collocation discovery, etc.  Initialize a
    TextCollection as follows:
    
    >>> gutenberg = TextCollection(nltk.corpus.gutenberg)
    >>> mytexts = TextCollection([text1, text2, text3])
    
    Iterating over a TextCollection produces all the tokens of all the
    texts in order.
    c         C   sg   t  | d ƒ o4 g  } | i ƒ  D] } | | i | ƒ q! ~ } n | |  _ t i |  t | ƒ ƒ d  S(   NR:   (   t   hasattrt   filesR:   t   _textsRc   R$   R   (   R   t   sourceRf   R"   t   f(    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR$     s    4	c         C   s   t  | i | ƒ ƒ t | ƒ S(   N(   t   floatR„   R   (   R   t   termRŽ   R†   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   tf  s    c            s0   t  ‡  f d †  |  i Dƒ ƒ t t  |  i ƒ ƒ S(   Nc         3   s*   x# |  ] } ˆ  | j o	 t  Vq q Wd  S(   N(   R™   (   R   RŽ   (   R®   (    s   /p/zhu/06/nlp/nltk/nltk/text.pys	   <genexpr>"  s    (   R   Rª   R­   (   R   R®   R†   (    (   R®   s   /p/zhu/06/nlp/nltk/nltk/text.pyt   df!  s    c         C   s#   |  i  | | ƒ t |  i | ƒ ƒ S(   N(   R¯   R    R°   (   R   R®   RŽ   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   tf_idf%  s    (   RB   RC   RD   RF   R$   R¯   R°   R±   (    (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyR§     s
   c          C   sß   d d k  l }  t |  i d d ƒ ƒ } | GHHd GH| i d ƒ Hd GH| i d ƒ Hd GH| i ƒ  Hd GH| i ƒ  Hd	 GH| i d d
 d d g ƒ Hd GH| i	 d ƒ Hd GHd G| d GHd G| d d !GHd G| i
 ƒ  d GHd  S(   Niÿÿÿÿ(   t   brownt
   categoriest   newss   Concordance:s   Distributionally similar words:s   Collocations:s   Automatically generated text:s   Dispersion plot:t   reportt   saidt	   announceds   Vocabulary plot:i2   s	   Indexing:s   text[3]:i   s
   text[3:5]:i   s   text.vocab()['news']:(   Rx   R²   Rc   R:   Rr   R—   Rƒ   R‹   Rœ   RŸ   Rž   (   R²   RŽ   (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pyt   demo(  s4    

t   __main__(   t   mathR    RZ   t   nltk.probabilityR   R   R   R   t   nltk.compatR   t	   nltk.utilR   R   R   t
   nltk.modelR   t   nltk.metricsR	   t   nltk.collocationsR
   R   t   objectR   RH   RU   Rc   R§   R¸   RB   t   __all__(    (    (    s   /p/zhu/06/nlp/nltk/nltk/text.pys   <module>   s,   VV4ÿ 	