³ò
B_Kc           @   sv   d  Z  d d k Z d d k l Z d d k l Z d d k Td d k Td e f d „  ƒ  YZ	 d e
 f d	 „  ƒ  YZ d S(
   sí   
Indian Language POS-Tagged Corpus
Collected by A Kumaran, Microsoft Research, India
Distributed with permission

Contents:
  - Bangla: IIT Kharagpur
  - Hindi: Microsoft Research India
  - Marathi: IIT Bombay
  - Telugu: IIIT Hyderabad
iÿÿÿÿN(   t
   deprecated(   t	   str2tuple(   t   *t   IndianCorpusReaderc           B   s’   e  Z d  Z e d „ Z e e d „ Z e d „ Z e e d „ Z e d „ Z	 e
 d ƒ d d „ ƒ Z e
 d	 ƒ d
 „  ƒ Z e
 d ƒ d „  ƒ Z RS(   s@   
    List of words, one per line.  Blank lines are ignored.
    c      	   C   sF   t  g  } |  i | t ƒ D]" \ } } | t | | t t ƒ q ~ ƒ S(   N(   t   concatt   abspathst   Truet   IndianCorpusViewt   False(   t   selft   fileidst   _[1]t   fileidt   enc(    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyt   words!   s    
c      
   C   sc   | o |  i  } n d  } t g  } |  i | t ƒ D]% \ } } | t | | t t | ƒ q4 ~ ƒ S(   N(   t   _tag_mapping_functiont   NoneR   R   R   R   R   (   R	   R
   t   simplify_tagst   tag_mapping_functionR   R   R   (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyt   tagged_words&   s
    
c      	   C   sF   t  g  } |  i | t ƒ D]" \ } } | t | | t t ƒ q ~ ƒ S(   N(   R   R   R   R   R   (   R	   R
   R   R   R   (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyt   sents/   s    
c      
   C   sc   | o |  i  } n d  } t g  } |  i | t ƒ D]% \ } } | t | | t t | ƒ q4 ~ ƒ S(   N(   R   R   R   R   R   R   (   R	   R
   R   R   R   R   R   (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyt   tagged_sents4   s
    
c         C   sk   | d  j o |  i } n t | t ƒ o | g } n t g  } | D] } | |  i | ƒ i ƒ  qE ~ ƒ S(   N(   R   t   _fileidst
   isinstancet
   basestringR   t   opent   read(   R	   R
   R   t   f(    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyt   raw=   s    s2   Use .raw() or .words() or .tagged_words() instead.t   taggedc         C   sn   | d j o |  i  | ƒ Sn | d j o |  i | ƒ Sn | d j o |  i | ƒ Sn t d | ƒ ‚ d  S(   NR   t	   tokenizedR   s   bad format %r(   R   R   R   t
   ValueError(   R	   t   itemst   format(    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyR   C   s    s   Use .words() instead.c         C   s   |  i  | ƒ S(   N(   R   (   R	   R    (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyR   I   s    s   Use .tagged_words() instead.c         C   s   |  i  | ƒ S(   N(   R   (   R	   R    (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyR   L   s    (   t   __name__t
   __module__t   __doc__R   R   R   R   R   R   R   R    R   R   R   (    (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyR      s   			R   c           B   s   e  Z e d  „ Z d „  Z RS(   c         C   s5   | |  _  | |  _ | |  _ t i |  | d | ƒd  S(   Nt   encoding(   t   _taggedt   _group_by_sentR   t   StreamBackedCorpusViewt   __init__(   R	   t   corpus_fileR%   R   t   group_by_sentR   (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyR)   R   s    			c   
      C   sí   | i  ƒ  } | i d ƒ o g  Sn g  } | i ƒ  D] } | t | d d ƒq5 ~ } |  i o: g  } | D]" \ } } | | |  i | ƒ f ql ~ } n |  i p+ g  }	 | D] \ } } |	 | q° ~	 } n |  i o | g Sn | Sd  S(   Nt   <t   sept   _(   t   readlinet
   startswitht   splitR   R   R&   R'   (
   R	   t   streamt   lineR   t   wordt   sentt   _[2]t   wt   tt   _[3](    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyt
   read_blockY   s    3
:5
(   R"   R#   R   R)   R:   (    (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pyR   Q   s   (   R$   t   codecst   nltk.internalsR    t   nltk.tag.utilR   t   utilt   apit   CorpusReaderR   R(   R   (    (    (    s/   /p/zhu/06/nlp/nltk/nltk/corpus/reader/indian.pys   <module>   s   

4