ģō
B_Kc        	   @   sĪ   d  Z  d d k Z d d k Z d d k Z d d k l Z d d k Td d k Te i d  Z	 e i d  Z
 e i d  Z e i d  Z d	 e f d
     YZ d S(   sĖ  
Sinica Treebank Corpus Sample

http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm

10,000 parsed sentences, drawn from the Academia Sinica Balanced
Corpus of Modern Chinese.  Parse tree notation is based on
Information-based Case Grammar.  Tagset documentation is available
at http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html

Language and Knowledge Processing Group, Institute of Information
Science, Academia Sinica

It is distributed with the Natural Language Toolkit under the terms of
the Creative Commons Attribution-NonCommercial-ShareAlike License
[http://creativecommons.org/licenses/by-nc-sa/2.5/].

References:

Feng-Yi Chen, Pi-Fang Tsai, Keh-Jiann Chen, and Chu-Ren Huang (1999)
The Construction of Sinica Treebank. Computational Linguistics and
Chinese Language Processing, 4, pp 87-104.

Huang Chu-Ren, Keh-Jiann Chen, Feng-Yi Chen, Keh-Jiann Chen, Zhao-Ming
Gao, and Kuang-Yu Chen. 2000. Sinica Treebank: Design Criteria,
Annotation Guidelines, and On-line Interface. Proceedings of 2nd
Chinese Language Processing Workshop, Association for Computational
Linguistics.

Chen Keh-Jiann and Yu-Ming Hsieh (2004) Chinese Treebanks and Grammar
Extraction, Proceedings of IJCNLP-04, pp560-565.
iĸĸĸĸN(   t
   deprecated(   t   *s   ^#\S+\ss   (?<=\))#.*$s   :([^:()|]+):([^:()|]+)s   :[^:()|]+:([^:()|]+)t   SinicaTreebankCorpusReaderc           B   s5   e  Z d  Z d   Z d   Z e d  Z d   Z RS(   s)   
    Reader for the sinica treebank.
    c         C   s7   | i    } t i d |  } t i d |  } | g S(   Nt    (   t   readlinet
   IDENTIFIERt   subt   APPENDIX(   t   selft   streamt   sent(    (    s8   /p/zhu/06/nlp/nltk/nltk/corpus/reader/sinica_treebank.pyt   _read_block;   s    c         C   s   t  i i |  S(   N(   t   nltkt   treet   sinica_parse(   R   R
   (    (    s8   /p/zhu/06/nlp/nltk/nltk/corpus/reader/sinica_treebank.pyt   _parseA   s    c         C   s{   g  } t  i |  D] \ } } | | | f q ~ } | o: g  } | D]" \ } } | | |  i |  f qH ~ } n | S(   N(   t   TAGWORDt   findallt   _tag_mapping_function(   R   R
   t   simplify_tagst   _[1]t   tt   wt   tagged_sentt   _[2](    (    s8   /p/zhu/06/nlp/nltk/nltk/corpus/reader/sinica_treebank.pyt   _tagD   s
    63c         C   s   t  i |  S(   N(   t   WORDR   (   R   R
   (    (    s8   /p/zhu/06/nlp/nltk/nltk/corpus/reader/sinica_treebank.pyt   _wordK   s    (   t   __name__t
   __module__t   __doc__R   R   t   NoneR   R   (    (    (    s8   /p/zhu/06/nlp/nltk/nltk/corpus/reader/sinica_treebank.pyR   7   s
   		(   R   t   ost   reR   t   nltk.internalsR    t   utilt   apit   compileR   R   R   R   t   SyntaxCorpusReaderR   (    (    (    s8   /p/zhu/06/nlp/nltk/nltk/corpus/reader/sinica_treebank.pys   <module>'   s   

