łň
4ŇÇIc           @   sQ   d  d k  Z  d  d k Td e f d     YZ d   Z e d j o e   n d S(   i˙˙˙˙N(   t   *t   RegexpStemmerc           B   s,   e  Z d  Z d d  Z d   Z d   Z RS(   s    
    A stemmer that uses regular expressions to identify morphological
    affixes.  Any substrings that match the regular expressions will
    be removed.
    i    c         C   s9   t  | d  p t i |  } n | |  _ | |  _ d S(   s  
        Create a new regexp stemmer.

        @type regexp: C{string} or C{regexp}
        @param regexp: The regular expression that should be used to
            identify morphological affixes.
        @type min: int
        @param min: The minimum length of string to stem
        t   patternN(   t   hasattrt   ret   compilet   _regexpt   _min(   t   selft   regexpt   min(    (    s&   /p/zhu/06/nlp/nltk/nltk/stem/regexp.pyt   __init__   s    	c         C   s5   t  |  |  i j  o | Sn |  i i d |  Sd  S(   Nt    (   t   lenR   R   t   sub(   R   t   word(    (    s&   /p/zhu/06/nlp/nltk/nltk/stem/regexp.pyt   stem$   s    c         C   s   d |  i  i S(   Ns   <RegexpStemmer: %r>(   R   R   (   R   (    (    s&   /p/zhu/06/nlp/nltk/nltk/stem/regexp.pyt   __repr__*   s    (   t   __name__t
   __module__t   __doc__R   R   R   (    (    (    s&   /p/zhu/06/nlp/nltk/nltk/stem/regexp.pyR      s   	c          C   sp   d d k  l }  l } | i d d d } d } | i   } | GHx& | D] } d | | i |  f GHqI WHd  S(   Ni˙˙˙˙(   t   tokenizeR   s
   ing$|s$|e$R
   i   s   John was eating icecreams
   %20s => %s(   t   nltkR   R   R   t   split(   R   R   t   stemmert   textt   tokensR   (    (    s&   /p/zhu/06/nlp/nltk/nltk/stem/regexp.pyt   demo-   s     t   __main__(   R   t   apit   StemmerIR   R   R   (    (    (    s&   /p/zhu/06/nlp/nltk/nltk/stem/regexp.pys   <module>
   s   
	