³ò
4ÒÇIc           @   s6  d  Z  d d k Td d k Td d k Td d k Td d k Td d k l Z d d k Z d d d d d	 d
 d d d d d d d d d d d d d d g Z	 d „  Z
 e ƒ  i Z d „  Z e d ƒ d „  ƒ Z e d ƒ d „  ƒ Z e d ƒ d „  ƒ Z e d  ƒ d! „  ƒ Z e d" ƒ d# „  ƒ Z e d$ ƒ d% „  ƒ Z d S(&   sK   
Functions for X{tokenizing}, i.e., dividing text strings into
substrings.
iÿÿÿÿ(   t   *(   t
   deprecatedNt   WhitespaceTokenizert   SpaceTokenizert   TabTokenizert   LineTokenizert   RegexpTokenizert   BlanklineTokenizert   WordPunctTokenizert   WordTokenizert   blankline_tokenizet   wordpunct_tokenizet   regexp_tokenizet   word_tokenizet   SExprTokenizert   sexpr_tokenizet   line_tokenizet   PunktWordTokenizert   PunktSentenceTokenizert   TreebankWordTokenizert   sent_tokenizec         C   s   t  i i d ƒ } | i |  ƒ S(   sŸ   
    Use NLTK's currently recommended sentence tokenizer to tokenize
    sentences in the given text.  Currently, this uses
    L{PunktSentenceTokenizer}.
    s   tokenizers/punkt/english.pickle(   t   nltkt   datat   loadt   tokenize(   t   textt	   tokenizer(    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyR       s    c         C   s
   t  |  ƒ S(   sÙ   
    Use NLTK's currently recommended word tokenizer to tokenize words
    in the given sentence.  Currently, this uses
    L{TreebankWordTokenizer}.  This tokenizer should be fed a single
    sentence at a time.
    (   t   _word_tokenize(   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyR   +   s    sA   Use nltk.blankline_tokenize() or nltk.BlanklineTokenizer instead.c         C   s   t  ƒ  i |  ƒ S(   N(   R   R   (   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyt	   blankline8   s    sA   Use nltk.wordpunct_tokenize() or nltk.WordPunctTokenizer instead.c         C   s   t  ƒ  i |  ƒ S(   N(   R   R   (   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyt	   wordpunct=   s    s4   Use str.split() or nltk.WhitespaceTokenizer instead.c         C   s   t  ƒ  i |  ƒ S(   N(   R   R   (   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyt
   whitespaceB   s    s7   Use nltk.word_tokenize() or nltk.WordTokenizer instead.c         C   s   t  ƒ  i |  ƒ S(   N(   R	   R   (   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyt   wordF   s    s7   Use nltk.line_tokenize() or nltk.LineTokenizer instead.c         C   s   t  ƒ  i |  ƒ S(   N(   R   R   (   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyt   lineK   s    s7   Use method of nltk.tokenize.PunktWordTokenizer instead.c         C   s   t  ƒ  i |  ƒ S(   N(   R   R   (   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyt   punkt_word_tokenizeP   s    (   t   __doc__t   simplet   regexpt   punktt   sexprt   treebankt   nltk.internalsR   R   t   __all__R   R   R   R   R   R   R   R   R   R    R!   (    (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pys   <module>   s.   




						
	