ģō
B_Kc        	   @   s·   d  Z  d d k Z d d k Z d d k l Z d d k Td d k Th  d d <d d <d	 d
 <d d <d d <d d <Z e e  Z	 d d d     YZ
 d e f d     YZ d S(   s$  
Corpus reader for the Information Extraction and Entity Recognition Corpus.

NIST 1999 Information Extraction: Entity Recognition Evaluation
http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm

This corpus contains the NEWSWIRE development test data for the
NIST 1999 IE-ER Evaluation.  The files were taken from the
subdirectory: /ie_er_99/english/devtest/newswire/*.ref.nwt
and filenames were shortened.

The corpus contains the following files: APW_19980314, APW_19980424,
APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
iĸĸĸĸN(   t
   deprecated(   t   *s&   Associated Press Weekly, 14 March 1998t   APW_19980314s&   Associated Press Weekly, 24 April 1998t   APW_19980424s&   Associated Press Weekly, 29 April 1998t   APW_19980429s   New York Times, 15 March 1998t   NYT_19980315s   New York Times, 3 April 1998t   NYT_19980403s   New York Times, 7 April 1998t   NYT_19980407t   IEERDocumentc           B   s&   e  Z e e e d  d  Z d   Z RS(   t    c         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   textt   docnot   doctypet	   date_timet   headline(   t   selfR
   R   R   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   __init__/   s
    				c         C   s§   |  i  o d i |  i  i    } nQ d i g  } |  i i   D]" } | d  d j o | | q@ q@ ~ d   d } |  i d  j	 o d |  i | f Sn	 d | Sd  S(   Nt    i   t   <i   s   ...s   <IEERDocument %s: %r>s   <IEERDocument: %r>(   R   t   joint   leavesR
   R   t   None(   R   R   t   _[1]t   w(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   __repr__6   s    
 0(   t   __name__t
   __module__R   R   R   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR   .   s   t   IEERCorpusReaderc           B   sz   e  Z d  Z e d  Z e d  Z e d  Z d   Z d   Z d   Z	 e
 d  d d	   Z e
 d
  d    Z RS(   s   
    c         C   sk   | d  j o |  i } n t | t  o | g } n t g  } | D] } | |  i |  i   qE ~  S(   N(   R   t   _fileidst
   isinstancet
   basestringt   concatt   opent   read(   R   t   fileidsR   t   f(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   rawD   s    c      	   C   sI   t  g  } |  i | t  D]% \ } } | t | |  i d | q ~  S(   Nt   encoding(   R   t   abspathst   Truet   StreamBackedCorpusViewt   _read_block(   R   R"   R   t   fileidt   enc(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   docsI   s    
c      	   C   sI   t  g  } |  i | t  D]% \ } } | t | |  i d | q ~  S(   NR%   (   R   R&   R'   R(   t   _read_parsed_block(   R   R"   R   R*   R+   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   parsed_docsN   s    
c         C   sN   g  } |  i  |  D]3 } |  i |  i d  j	 o | |  i |  q q ~ S(   N(   R)   t   _parseR   R   (   R   t   streamR   t   doc(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR-   T   s    c         C   sD   t  i i | d d } t | t  o t |   Sn t |  Sd  S(   Nt   top_nodet   DOCUMENT(   t   nltkt   chunkt   ieerstr2treeR   t   dictR   (   R   R1   t   val(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR/   Y   s    c         C   sŪ   g  } x< t  o4 | i   } | p Pn | i   d j o Pq	 q	 W| i |  xI t  oA | i   } | p Pn | i |  | i   d j o PqU qU Wd i |  g S(   Ns   <DOC>s   </DOC>s   
(   R'   t   readlinet   stript   appendR   (   R   R0   t   outt   line(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR)   `   s    

s0   Use .parsed_docs() or .raw() or .docs() instead.t   parsedc         C   sn   | d j o |  i  |  Sn | d j o |  i |  Sn | d j o |  i |  Sn t d |   d  S(   NR>   R$   R,   s   bad format %r(   R.   R$   R,   t
   ValueError(   R   t   itemst   format(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR!   r   s    s   Use .parsed_docs() instead.c         C   s   |  i  |  S(   N(   R.   (   R   R@   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR>   x   s    (   R   R   t   __doc__R   R$   R,   R.   R-   R/   R)   R    R!   R>   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR   A   s   				(    (   RB   t   codecsR4   t   nltk.internalsR    t   apit   utilt   titlest   sortedt	   documentsR   t   CorpusReaderR   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pys   <module>   s   

					