
B_Kc           @   sc   d  Z  d d k Z d d k l Z d d k Td d k Td d	 d     YZ d e f d     YZ d S(
   s  
Read lines from the Prepositional Phrase Attachment Corpus.

The PP Attachment Corpus contains several files having the format:

sentence_id verb noun1 preposition noun2 attachment

For example:

42960 gives authority to administration V
46742 gives inventors of microchip N

The PP attachment is to the verb phrase (V) or noun phrase (N), i.e.:

(VP gives (NP authority) (PP to administration))
(VP gives (NP inventors (PP of microchip)))

The corpus contains the following files:

training:   training set
devset:     development test set, used for algorithm development.
test:       test set, used to report results
bitstrings: word classes derived from Mutual Information Clustering for the Wall Street Journal.

Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional
Phrase Attachment.  Proceedings of the ARPA Human Language Technology
Conference.  [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps]

The PP Attachment Corpus is distributed with NLTK with the permission
of the author.
iN(   t
   deprecated(   t   *t   PPAttachmentc           B   s   e  Z d    Z d   Z RS(   c         C   s:   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ d  S(   N(   t   sentt   verbt   noun1t   prept   noun2t
   attachment(   t   selfR   R   R   R   R   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyt   __init__1   s    					c         C   s,   d |  i  |  i |  i |  i |  i |  i f S(   NsJ   PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, noun2=%r, attachment=%r)(   R   R   R   R   R   R   (   R	   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyt   __repr__9   s    (   t   __name__t
   __module__R
   R   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyR   0   s   	t   PPAttachmentCorpusReaderc           B   sV   e  Z d  Z d   Z d   Z e d  Z d   Z d   Z e	 d  d d   Z
 RS(	   s=   
    sentence_id verb noun1 preposition noun2 attachment
    c      	   C   sI   t  g  } |  i | t  D]% \ } } | t | |  i d | q ~  S(   Nt   encoding(   t   concatt   abspathst   Truet   StreamBackedCorpusViewt   _read_obj_block(   R	   t   fileidst   _[1]t   fileidt   enc(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyt   attachmentsC   s    
c      	   C   sI   t  g  } |  i | t  D]% \ } } | t | |  i d | q ~  S(   NR   (   R   R   R   R   t   _read_tuple_block(   R	   R   R   R   R   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyt   tuplesH   s    
c         C   sk   | d  j o |  i } n t | t  o | g } n t g  } | D] } | |  i |  i   qE ~  S(   N(   t   Nonet   _fileidst
   isinstancet
   basestringR   t   opent   read(   R	   R   R   t   f(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyt   rawM   s    c         C   s2   | i    } | o t | i    g Sn g  Sd  S(   N(   t   readlinet   tuplet   split(   R	   t   streamt   line(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyR   R   s    c         C   s2   | i    } | o t | i     g Sn g  Sd  S(   N(   R$   R   R&   (   R	   R'   R(   (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyR   Y   s    s2   Use .tuples() or .raw() or .attachments() instead.R%   c         C   sP   | d j o |  i  |  Sn | d j o |  i |  Sn t d |   d  S(   NR%   R#   s   bad format %r(   R   R#   t
   ValueError(   R	   t   itemst   format(    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyR!   a   s    (   R   R   t   __doc__R   R   R   R#   R   R   R    R!   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pyR   ?   s   					(    (	   R,   t   codecst   nltk.internalsR    t   utilt   apiR   t   CorpusReaderR   (    (    (    s1   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ppattach.pys   <module>'   s   

