³ò
RÔjKc           @   så   d  Z  d d k Td d k Td d k Td d k Td d k Td d k Z d d d d d d	 d
 d d d d d d d d d d d d d g Z y d d k Z Wn e	 j
 o n Xd d k
 Te d g 7Z d „  Z e ƒ  i Z d „  Z d S(   sK   
Functions for X{tokenizing}, i.e., dividing text strings into
substrings.
iÿÿÿÿ(   t   *Nt   WhitespaceTokenizert   SpaceTokenizert   TabTokenizert   LineTokenizert   RegexpTokenizert   BlanklineTokenizert   WordPunctTokenizert   WordTokenizert   blankline_tokenizet   wordpunct_tokenizet   regexp_tokenizet   word_tokenizet   SExprTokenizert   sexpr_tokenizet   line_tokenizet   PunktWordTokenizert   PunktSentenceTokenizert   TreebankWordTokenizert   sent_tokenizet   TextTilingTokenizerc         C   s   t  i i d ƒ } | i |  ƒ S(   sŸ   
    Use NLTK's currently recommended sentence tokenizer to tokenize
    sentences in the given text.  Currently, this uses
    L{PunktSentenceTokenizer}.
    s   tokenizers/punkt/english.pickle(   t   nltkt   datat   loadt   tokenize(   t   textt	   tokenizer(    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyR   %   s    c         C   s
   t  |  ƒ S(   sÙ   
    Use NLTK's currently recommended word tokenizer to tokenize words
    in the given sentence.  Currently, this uses
    L{TreebankWordTokenizer}.  This tokenizer should be fed a single
    sentence at a time.
    (   t   _word_tokenize(   R   (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pyR   0   s    (   t   __doc__t   simplet   regexpt   punktt   sexprt   treebankR   t   __all__t   numpyt   ImportErrort
   texttilingR   R   R   R   R   (    (    (    s,   /p/zhu/06/nlp/nltk/nltk/tokenize/__init__.pys   <module>   s(   




					
	
