
4Ic           @   sR  d  Z  d d k Z d d k Z d d k l Z d d k l Z d d k l Z d d k	 l
 Z
 d d k l Z d d k Td d k Td	 e f d
     YZ d e f d     YZ d e f d     YZ h  d d <d d <d d <d d <d d <d d <d d <d d <d d  <d! d" <d# d$ <d% d& <d' d( <d) d* <d+ d, <d- d. <d/ d0 <d1 d2 <d3 d4 <d5 d6 <d7 d8 <d9 d: <d; d< <d= d> <d? d@ <dA dB <dC dD <dE dF <dG dH <dI dJ <dK dL <dM dN <dO dP <dO dQ <dR dS <dT dU <dV dW <dX dY <dZ d[ <d\ d] <d^ d_ <d` da <db dc <dd de <df dg <dh di <dj dk <dl dm <dn do <dp dq <dr ds <dt du <dv dw <dx dy <dz d{ <d| d} <d~ d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <Z d S(   s  
Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
English Prose (YCOE), a 1.5 million word syntactically-annotated
corpus of Old English prose texts. The corpus is distributed by the
Oxford Text Archive: http://www.ota.ahds.ac.uk/ It is not included
with NLTK.

The YCOE corpus is divided into 100 files, each representing
an Old English prose text. Tags used within each text complies
to the YCOE standard: http://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm
iN(   t   RegexpTokenizer(   t   BracketParseCorpusReader(   t   TaggedCorpusReader(   t   split(   t
   deprecated(   t   *t   YCOECorpusReaderc           B   s
  e  Z d  Z e d  Z e d  Z e d  Z d   Z e d  Z e d  Z	 e d  Z
 e d  Z e d	  Z e d
  Z e d  Z e d  e d d   Z e d  e d   Z e d  e d   Z e d  e d   Z e d  e d   Z RS(   s   
    Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
    English Prose (YCOE), a 1.5 million word syntactically-annotated
    corpus of Old English prose texts.
    c      	   C   s0  t  i |  | g  |  t |  i i d  d d d | |  _ t |  i i d  d d d | |  _ t d   |  i i	   D  } t d   |  i i	   D  | j o t
 d	   n t g  } | D] } | d
 | q ~ g  } | D] } | d | q ~  } t  i |  | | |  t |  |  _ d  S(   Nt   psds   .*s   .psdt   encodingt   poss   .posc         s   s   x |  ] } | d   Vq Wd S(   iN(    (   t   .0t   f(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pys	   <genexpr>1   s    c         s   s   x |  ] } | d   Vq Wd S(   iN(    (   R
   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pys	   <genexpr>2   s    s5   Items in "psd" and "pos" subdirectories do not match.s   %s.psds   %s.pos(   t   CorpusReadert   __init__t   YCOEParseCorpusReadert   roott   joint   _psd_readert   YCOETaggedCorpusReadert   _pos_readert   sett   fileidst
   ValueErrort   sortedt
   _documents(   t   selfR   R   t	   documentst   _[1]t   doct   _[2]R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR   (   s    $$&%)c         C   s   | t  j o |  i Sn t | t  o | g } n x2 | D]* } | |  i j o t d |   q< q< Wt t d   | D   S(   s   
        Return a list of document identifiers for all documents in
        this corpus, or for the documents with the given file(s) if
        specified.
        s   File id %s not foundc         s   s   x |  ] } | d   Vq Wd S(   iN(    (   R
   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pys	   <genexpr>I   s    (   t   NoneR   t
   isinstancet
   basestringt   _fileidst   KeyErrorR   R   (   R   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR   ;   s     c      
   C   s   | t  j o |  i Sn t | t  o | g } n t t g  } | D] } | d | qF ~ g  } | D] } | d | qh ~   S(   s   
        Return a list of file identifiers for the files that make up
        this corpus, or that store the given document(s) if specified.
        s   %s.poss   %s.psd(   R   R!   R   R    R   R   (   R   R   R   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR   K   s    (c         C   s   | t  j o |  i } nt t | t  o | g } n xS | D]K } | |  i j o5 | d d j o t d   q t d |   q> q> Wg  } | D] } | d | | f q ~ S(   s   
        Helper that selects the appropriate fileids for a given set of
        documents from a given subcorpus (pos or psd).
        is   .poss   .psdsv   Expected a document identifier, not a file identifier.  (Use corpus.documents() to get a list of document identifiers.s    Document identifier %s not founds   %s.%s(   s   .poss   .psd(   R   R   R   R    R   (   R   R   t	   subcorpust   documentR   t   d(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyt   _getfileidsW   s     c         C   s   |  i  i |  i | d   S(   NR	   (   R   t   wordsR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR'   n   s    c         C   s   |  i  i |  i | d   S(   NR	   (   R   t   sentsR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR(   p   s    c         C   s   |  i  i |  i | d   S(   NR	   (   R   t   parasR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR)   r   s    c         C   s   |  i  i |  i | d   S(   NR	   (   R   t   tagged_wordsR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR*   t   s    c         C   s   |  i  i |  i | d   S(   NR	   (   R   t   tagged_sentsR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR+   v   s    c         C   s   |  i  i |  i | d   S(   NR	   (   R   t   tagged_parasR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR,   x   s    c         C   s   |  i  i |  i | d   S(   NR   (   R   t   parsed_sentsR&   (   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR-   z   s    sE   Use .raw() or .words() or .tagged_words() or .parsed_sents() instead.t   parsedc         C   s   | d j o |  i  |  Sn | d j o |  i |  Sn | d j o |  i |  Sn | d j o |  i |  Sn | d j o t d   n t d |   d  S(   NR.   t   rawt	   tokenizedt   taggedt   chunkeds   no longer supporteds   bad format %r(   R-   R/   R'   R*   R   (   R   t   itemst   format(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyt   read~   s    s   Use .parsed_sents() instead.c         C   s   |  i  |  S(   N(   R-   (   R   R3   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR.      s    s   Use .words() instead.c         C   s   |  i  |  S(   N(   R'   (   R   R3   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR0      s    s   Use .tagged_words() instead.c         C   s   |  i  |  S(   N(   R*   (   R   R3   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR1      s    s   Operation no longer supported.c         C   s   t  d   d  S(   Ns$   format "chunked" no longer supported(   R   (   R   R3   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR2      s    (   t   __name__t
   __module__t   __doc__R   R   R   R   R&   R'   R(   R)   R*   R+   R,   R-   R   R5   R.   R0   R1   R2   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR   "   s,   						R   c           B   s   e  Z d  Z d   Z RS(   sr   Specialized version of the standard bracket parse corpus reader
    that strips out (CODE ...) and (ID ...) nodes.c         C   s@   t  i d d |  } t  i d |  o d  Sn t i |  |  S(   Ns   (?u)\((CODE|ID)[^\)]*\)t    s   \s*\(\s*\)\s*$(   t   ret   subt   matchR   R   t   _parse(   R   t   t(    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR=      s    (   R6   R7   R8   R=   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR      s   R   c           B   s   e  Z e d   Z RS(   c      
   C   sA   d } t  | d t } t i |  | | d d d | d | d  S(   Ns,   (?u)\(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*t   gapst   sept   _t   sent_tokenizerR   (   R    t   TrueR   R   (   R   R   R3   R   t   gaps_reRB   (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR      s
    (   R6   R7   R   R   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pyR      s   s   Adrian and Ritheuss   coadrian.o34s   lfric, Supplemental Homiliess   coaelhom.o3s   lfric's Lives of Saintss   coaelive.o3s   Alcuin De virtutibus et vitiist   coalcuins   Alexander's Letter to Aristotles
   coalex.o23s   Apollonius of Tyres   coapollo.o3t	   Augustinet   coaugusts$   Bede's History of the English Churchs	   cobede.o2s   Benedictine Rules   cobenrul.o3s   Blickling Homiliess   coblick.o23s#   Boethius' Consolation of Philosophys
   coboeth.o2s   Byrhtferth's Manuals   cobyrhtf.o3s   Canons of Edgar (D)t	   cocanedgDs   Canons of Edgar (X)t	   cocanedgXs   lfric's Catholic Homilies Is   cocathom1.o3s   lfric's Catholic Homilies IIs   cocathom2.o3s
   Saint Chads
   cochad.o24s   Chrodegang of Metz, Rulet   cochdruls   Saint Christophert   cochristophs   Anglo-Saxon Chronicle As   cochronA.o23s   Anglo-Saxon Chronicle Ct   cochronCs   Anglo-Saxon Chronicle Dt   cochronDs   Anglo-Saxon Chronicle Es   cochronE.o34s   Cura Pastoraliss	   cocura.o2s   Cura Pastoralis (Cotton)t   cocuraCs   Dicts of Catos   codicts.o34s   Documents 1 (O1)s
   codocu1.o1s   Documents 2 (O1/O2)s   codocu2.o12s   Documents 2 (O2)s
   codocu2.o2s   Documents 3 (O2/O3)s   codocu3.o23s   Documents 3 (O3)s
   codocu3.o3s   Documents 4 (O2/O4)s   codocu4.o24s    Honorius of Autun, Elucidarium 1t   coeluc1t   coeluc2s   lfric's Epilogue to Genesiss   coepigen.o3s   Saint Euphrosynet   coeuphrs    Saint Eustace and his companionst   coeusts
   Exodus (P)t	   coexodusPs   Genesis (C)t	   cogenesiCs   Gregory's Dialogues (C)s   cogregdC.o24s   Gregory's Dialogues (H)s   cogregdH.o23s   Pseudo-Apuleius, Herbariumt   coherbars"   Wulfstan's Institute of Polity (D)s   coinspolD.o34s"   Wulfstan's Institute of Polity (X)t	   coinspolXs   Saint Jamest   cojamest   Lacnungas   colacnu.o23t	   Leechdomss
   colaece.o2s   Laws, Cnut Is   colaw1cn.o3s   Laws, Cnut IIs   colaw2cn.o3s   Laws, thelred Vs   colaw5atr.o3s   Laws, thelred VIs   colaw6atr.o3s   Laws, Alfreds
   colawaf.o2s   Alfred's Introduction to Lawss   colawafint.o2s   Laws, Gerefas   colawger.o34s	   Laws, Ines   colawine.ox2s   Northumbra Preosta Lagus   colawnorthu.o3s   Laws, William I, Lads   colawwllad.o4t   Leofrics   coleofri.o4s   lfric's Letter to Sigefyrths   colsigef.o3s    lfric's Letter to Sigeweard (B)t	   colsigewBs    lfric's Letter to Sigeweard (Z)s   colsigewZ.o34s   lfric's Letter to Wulfgeatt   colwgeats   lfric's Letter to Wulfsige (T)t	   colwsigeTs    lfric's Letter to Wulfsige (Xa)s   colwsigeXa.o34s   lfric's Letter to Wulfstan Is   colwstan1.o3s   lfric's Letter to Wulfstan IIs   colwstan2.o3s   Saint Margaret (C)s   comargaC.o34s   Saint Margaret (T)t   comargaTs   Martyrology, It   comart1s   Martyrology, IIt   comart2s   Martyrology, IIIs   comart3.o23s   Marvels of the Easts   comarvel.o23s   Mary of Egyptt   comarys
   Saint Neott   coneots   Gospel of Nicodemus (A)t   conicodAs   Gospel of Nicodemus (C)t   conicodCs   Gospel of Nicodemus (D)t   conicodDs   Gospel of Nicodemus (E)t   conicodEt   Orosiuss   coorosiu.o2t
   Heptateuchs
   cootest.o3s'   lfric's Preface to Catholic Homilies Is   coprefcath1.o3s(   lfric's Preface to Catholic Homilies IIs   coprefcath2.o3s   Preface to the Cura Pastoraliss   coprefcura.o2s   lfric's Preface to Genesiss   coprefgen.o3s#   lfric's Preface to Lives of Saintss   copreflives.o3s"   Preface to Augustine's Soliloquiest   coprefsolilos*   Pseudo-Apuleius, Medicina de quadrupedibuss   coquadru.o23s   History of the Holy Rood-Treet   coroods   Seven Sleeperst	   cosevensls   St. Augustine's Soliloquiest   cosolilos   Solomon and Saturn Is   cosolsat1.o4s   Solomon and Saturn IIt	   cosolsat2s   lfric's De Temporibus Annis
   cotempo.o3s   Vercelli Homiliest   coverhoms   Vercelli Homilies (E)t	   coverhomEs   Vercelli Homilies (L)t	   coverhomLs   Saint Vincent (Bodley 343)t   covinceBs   Vindicta Salvatorist   covinsals   West-Saxon Gospelss   cowsgosp.o3s   Wulfstan's Homiliess
   cowulf.o34(   R8   t   osR:   t   nltk.tokenizeR    t    nltk.corpus.reader.bracket_parseR   t   nltk.corpus.reader.taggedR   t   stringR   t   nltk.internalsR   t   utilt   apiR   R   R   R   R   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/corpus/reader/ycoe.pys   <module>   s   

s																																																																																																				