³ò
4ÒÇIc           @   s0   d  Z  d d k l Z d e f d „  ƒ  YZ d S(   s   
Tokenizer Interface
iÿÿÿÿ(   t
   overriddent
   TokenizerIc           B   s    e  Z d  Z d „  Z d „  Z RS(   sÈ   
    A procesing interface for I{tokenizing} a string, or dividing it
    into a list of substrings.
    
    Subclasses must define:
      - either L{tokenize()} or L{batch_tokenize()} (or both)
    c         C   s5   t  |  i ƒ o |  i | g ƒ d Sn
 t ƒ  ‚ d S(   sp   
        Divide the given string into a list of substrings.
        
        @return: C{list} of C{str}
        i    N(   R    t   batch_tokenizet   NotImplementedError(   t   selft   s(    (    s'   /p/zhu/06/nlp/nltk/nltk/tokenize/api.pyt   tokenize   s    c         C   s(   g  } | D] } | |  i  | ƒ q ~ S(   sº   
        Apply L{self.tokenize()} to each element of C{strings}.  I.e.:

            >>> return [self.tokenize(s) for s in strings]

        @rtype: C{list} of C{list} of C{str}
        (   R   (   R   t   stringst   _[1]R   (    (    s'   /p/zhu/06/nlp/nltk/nltk/tokenize/api.pyR       s    (   t   __name__t
   __module__t   __doc__R   R   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/tokenize/api.pyR      s   	N(   R   t   nltk.internalsR    t   objectR   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/tokenize/api.pys   <module>
   s   