³ò
4ÒÇIc        	   @   s»   d  Z  d d k Z d d k l Z d d k l Z d d k Td d k Th  d d <d d	 <d
 d <d d <d d <d d <Z e	 e ƒ Z
 d d d „  ƒ  YZ d e f d „  ƒ  YZ d S(   s$  
Corpus reader for the Information Extraction and Entity Recognition Corpus.

NIST 1999 Information Extraction: Entity Recognition Evaluation
http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm

This corpus contains the NEWSWIRE development test data for the
NIST 1999 IE-ER Evaluation.  The files were taken from the
subdirectory: /ie_er_99/english/devtest/newswire/*.ref.nwt
and filenames were shortened.

The corpus contains the following files: APW_19980314, APW_19980424,
APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
iÿÿÿÿN(   t   chunk(   t
   deprecated(   t   *s&   Associated Press Weekly, 14 March 1998t   APW_19980314s&   Associated Press Weekly, 24 April 1998t   APW_19980424s&   Associated Press Weekly, 29 April 1998t   APW_19980429s   New York Times, 15 March 1998t   NYT_19980315s   New York Times, 3 April 1998t   NYT_19980403s   New York Times, 7 April 1998t   NYT_19980407t   IEERDocumentc           B   s&   e  Z e e e d  d „ Z d „  Z RS(   t    c         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   textt   docnot   doctypet	   date_timet   headline(   t   selfR   R   R   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   __init__/   s
    				c         C   s§   |  i  o d i |  i  i ƒ  ƒ } nQ d i g  } |  i i ƒ  D]" } | d  d j o | | q@ q@ ~ d  ƒ d } |  i d  j	 o d |  i | f Sn	 d | Sd  S(   Nt    i   t   <i   s   ...s   <IEERDocument %s: %r>s   <IEERDocument: %r>(   R   t   joint   leavesR   R   t   None(   R   R   t   _[1]t   w(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   __repr__6   s    
 0(   t   __name__t
   __module__R   R   R   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR	   .   s   t   IEERCorpusReaderc           B   sz   e  Z d  Z e d „ Z e d „ Z e d „ Z d „  Z d „  Z d „  Z	 e
 d ƒ d d	 „ ƒ Z e
 d
 ƒ d „  ƒ Z RS(   s   
    c         C   sk   | d  j o |  i } n t | t ƒ o | g } n t g  } | D] } | |  i | ƒ i ƒ  qE ~ ƒ S(   N(   R   t   _fileidst
   isinstancet
   basestringt   concatt   opent   read(   R   t   fileidsR   t   f(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   rawD   s    c      	   C   sI   t  g  } |  i | t ƒ D]% \ } } | t | |  i d | ƒq ~ ƒ S(   Nt   encoding(   R    t   abspathst   Truet   StreamBackedCorpusViewt   _read_block(   R   R#   R   t   fileidt   enc(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   docsI   s    
c      	   C   sI   t  g  } |  i | t ƒ D]% \ } } | t | |  i d | ƒq ~ ƒ S(   NR&   (   R    R'   R(   R)   t   _read_parsed_block(   R   R#   R   R+   R,   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyt   parsed_docsN   s    
c         C   sN   g  } |  i  | ƒ D]3 } |  i | ƒ i d  j	 o | |  i | ƒ q q ~ S(   N(   R*   t   _parseR   R   (   R   t   streamR   t   doc(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR.   T   s    c         C   sA   t  i | d d ƒ} t | t ƒ o t |   Sn t | ƒ Sd  S(   Nt   top_nodet   DOCUMENT(   R    t   ieerstr2treeR   t   dictR	   (   R   R2   t   val(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR0   Y   s    c         C   s®   g  } x< t  o4 | i ƒ  } | p Pn | i ƒ  d j o Pq	 q	 W| i | ƒ xI t  oA | i ƒ  } | p Pn | i | ƒ | i ƒ  d j o PqU qU Wd i | ƒ g S(   Ns   <DOC>s   </DOC>s   
(   R(   t   readlinet   stript   appendR   (   R   R1   t   outt   line(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR*   `   s    

s0   Use .parsed_docs() or .raw() or .docs() instead.t   parsedc         C   sn   | d j o |  i  | ƒ Sn | d j o |  i | ƒ Sn | d j o |  i | ƒ Sn t d | ƒ ‚ d  S(   NR=   R%   R-   s   bad format %r(   R/   R%   R-   t
   ValueError(   R   t   itemst   format(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR"   r   s    s   Use .parsed_docs() instead.c         C   s   |  i  | ƒ S(   N(   R/   (   R   R?   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR=   x   s    (   R   R   t   __doc__R   R%   R-   R/   R.   R0   R*   R   R"   R=   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pyR   A   s   				(    (   RA   t   codecst   nltkR    t   nltk.internalsR   t   apit   utilt   titlest   sortedt	   documentsR	   t   CorpusReaderR   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ieer.pys   <module>   s   

					