³ò
B_Kc           @   s“   d  d k  Z  d  d k l Z l Z d  d k Td  d k Te i d ƒ Z e i d ƒ Z	 e i d ƒ Z
 d e f d „  ƒ  YZ d	 e f d
 „  ƒ  YZ d S(   iÿÿÿÿN(   t   bracket_parset   Tree(   t   *s   \(([^\s()]+) ([^\s()]+)\)s   \([^\s()]+ ([^\s()]+)\)s
   \s*\(\s*\(t   BracketParseCorpusReaderc           B   sS   e  Z d  Z e d e e d „ Z d „  Z d „  Z d „  Z e d „ Z	 d „  Z
 RS(   sT   
    Reader for corpora that consist of parenthesis-delineated parse
    trees.
    t   unindented_parenc         C   s5   t  i |  | | | ƒ | |  _ | |  _ | |  _ d S(   s  
        @param root: The root directory for this corpus.
        @param fileids: A list or regexp specifying the fileids in this corpus.
        @param comment_char: The character which can appear at the start of
            a line to indicate that the rest of the line is a comment.
        @param detect_blocks: The method that is used to find blocks
          in the corpus; can be 'unindented_paren' (every unindented
          parenthesis starts a new parse) or 'sexpr' (brackets are
          matched).
        N(   t   CorpusReadert   __init__t   _comment_chart   _detect_blockst   _tag_mapping_function(   t   selft   roott   fileidst   comment_chart   detect_blockst   encodingt   tag_mapping_function(    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyR      s    		c         C   sÒ   |  i  d j o t | d |  i ƒSn¨ |  i  d j o t | ƒ SnŠ |  i  d j oh t | d d ƒ} |  i oD g  } | D], } | t i d t i |  i ƒ d | ƒ q| ~ } n | Sn d	 p
 t d
 ‚ d  S(   Nt   sexprR   t	   blanklineR   t   start_res   ^\(s	   (?m)^%s.*t    i    s   bad block type(	   R   t   read_sexpr_blockR   t   read_blankline_blockt   read_regexp_blockt   ret   subt   escapet   AssertionError(   R
   t   streamt   tokst   _[1]t   tok(    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyt   _read_block0   s    
=c         C   sU   t  i | ƒ o | i ƒ  d d !} n t i d d | ƒ } t i d d | ƒ } | S(   Ni   iÿÿÿÿs   \((.)\)s   (\1 \1)s"   \(([^\s()]+) ([^\s()]+) [^\s()]+\)s   (\1 \2)(   t   EMPTY_BRACKETSt   matcht   stripR   R   (   R
   t   t(    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyt
   _normalizeA   s
    c      	   C   sæ   y t  |  i | ƒ ƒ SWnÈ t j
 o¼ } t i i d ƒ | i d	 j om xj t d d ƒ D]U } y9 t  |  i | d | ƒ ƒ } t i i d | ƒ | SWqZ t j
 o qZ XqZ Wn t i i d ƒ t d |  i	 | ƒ ƒ Sn Xd  S(
   Ns(   Bad tree detected; trying to recover...
s   mismatched parensi   i   t   )s(     Recovered by adding %d close paren(s)
s'     Recovered by returning a flat parse.
t   S(   s   mismatched parens(
   R    R%   t
   ValueErrort   syst   stderrt   writet   argst   rangeR   t   _tag(   R
   R$   t   et   nt   v(    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyt   _parseL   s     c         C   s„   g  } t  i |  i | ƒ ƒ D] \ } } | | | f q ~ } | o: g  } | D]" \ } } | | |  i | ƒ f qQ ~ } n | S(   N(   t   TAGWORDt   findallR%   R	   (   R
   R$   t   simplify_tagsR   t   wt   tagged_sentt   _[2](    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyR.   _   s
    ?3c         C   s   t  i |  i | ƒ ƒ S(   N(   t   WORDR4   R%   (   R
   R$   (    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyt   _wordf   s    (   t   __name__t
   __module__t   __doc__t   NoneR   R    R%   R2   t   FalseR.   R:   (    (    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyR      s   			t   AlpinoCorpusReaderc           B   s&   e  Z d  Z e e d „ Z d „  Z RS(   s/   
    Reader for the Alpino Dutch Treebank.
    c      
   C   s)   t  i |  | d d d d | d | ƒd  S(   Ns   alpino\.xmlR   R   R   R   (   R   R   (   R
   R   R   R   (    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyR   m   s    c         C   s†   | d  d j o d Sn t  i d d | ƒ } t  i d d | ƒ } t  i d d	 | ƒ } t  i d
 d | ƒ } t  i d d | ƒ } | S(   Ni
   s
   <alpino_dsR   s     <node .*? cat="(\w+)".*>s   (\1s-     <node .*? pos="(\w+)".*? word="([^"]+)".*/>s   (\1 \2)s	     </node>R&   s   <sentence>.*</sentence>s   </?alpino_ds.*>(   R   R   (   R
   R$   (    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyR%   s   s    (   R;   R<   R=   R>   R   R%   (    (    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pyR@   i   s   (   R)   t	   nltk.treeR    R   t   utilt   apiR   t   compileR3   R9   R!   t   SyntaxCorpusReaderR   R@   (    (    (    s6   /p/zhu/06/nlp/nltk/nltk/corpus/reader/bracket_parse.pys   <module>	   s   

P