
dhKc        
   @   s   d  Z  d d k l Z l Z l Z d d k l Z l Z d d k l	 Z	 d d k
 l Z l Z d d k Z d d d d d  Z d	 e f d
     YZ d d  Z d S(   s    
Utility functions for parsers.
i(   t   ContextFreeGrammart   FeatureGrammart   WeightedGrammar(   t   Chartt   ChartParser(   t   InsideChartParser(   t   FeatureChartt   FeatureChartParserNi    c         K   s  t  i i |  |  } t | t  p t d   n t | t  o1 | d j o
 t } n | | d | d | Sn t | t	  oH | d j o
 t
 } n | d j o
 t } n | | d | d | SnE | d j o
 t } n | d j o
 t } n | | d | d | Sd S(   s  
    Load a grammar from a file, and build a parser based on that grammar. 
    The parser depends on the grammar format, and might also depend
    on properties of the grammar itself.
    
    The following grammar formats are currently supported:
      - C{'cfg'}  (CFGs: L{ContextFreeGrammar})
      - C{'pcfg'} (probabilistic CFGs: L{WeightedGrammar})
      - C{'fcfg'} (feature-based CFGs: L{ContextFreeGrammar})

    @type grammar_url: C{str}
    @param grammar_url: A URL specifying where the grammar is located.
        The default protocol is C{"nltk:"}, which searches for the file 
        in the the NLTK data package.
    @type trace: C{int}
    @param trace: The level of tracing that should be used when
        parsing a text.  C{0} will generate no tracing output;
        and higher numbers will produce more verbose tracing output.
    @param parser: The class used for parsing; should be L{ChartParser}
        or a subclass.
        If C{None}, the class depends on the grammar format.
    @param chart_class: The class used for storing the chart;
        should be L{Chart} or a subclass. 
        Only used for CFGs and feature CFGs.
        If C{None}, the chart class depends on the grammar format.
    @type beam_size: C{int}
    @param beam_size: The maximum length for the parser's edge queue.
        Only used for probabilistic CFGs.
    @param load_args: Keyword parameters used when loading the grammar.
        See L{data.load} for more information.
    s@   The grammar must be a ContextFreeGrammar, or a subclass thereof.t   tracet	   beam_sizet   chart_classN(   t   nltkt   datat   loadt
   isinstanceR    t
   ValueErrorR   t   NoneR   R   R   R   R   R   (   t   grammar_urlR   t   parserR
   R	   t	   load_argst   grammar(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/util.pyt   load_parser   s$    "




t   TestGrammarc           B   s)   e  Z d  Z e e d  Z e d  Z RS(   s   
    Unit tests for  CFG.
    c         C   s=   | |  _  t | d d |  _ | |  _ | |  _ | |  _ d  S(   NR   i    (   t   test_grammarR   t   cpt   suitet   _acceptt   _reject(   t   selfR   R   t   acceptt   reject(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/util.pyt   __init__V   s
    			c   
      C   s  x |  i  D] } | d d Gx d d g D] } x | | D] } | i   } |  i i |  } | o' | o  H| GHx | D] } | GHqy Wn | d j o( | g  j o t d |  q t } q: | o t d |  q: t }	 q: Wq) W| o |	 o	 d GHq
 q
 Wd S(	   s|  
        Sentences in the test suite are divided into two classes:
         - grammatical (C{accept}) and
         - ungrammatical (C{reject}). 
        If a sentence should parse accordng to the grammar, the value of
        C{trees} will be a non-empty list. If a sentence should be rejected
        according to the grammar, then the value of C{trees} will be C{None}.
        t   doct   :R   R   s   Sentence '%s' failed to parse's   Sentence '%s' received a parse's   All tests passed!N(   R   t   splitR   t   parseR   t   True(
   R   t
   show_treest   testt   keyt   sentt   tokenst   treest   treet   acceptedt   rejected(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/util.pyt   run_   s0    	
    
(   t   __name__t
   __module__t   __doc__R   R   t   FalseR.   (    (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/util.pyR   R   s   	s   #%;c         C   s   g  } x |  i  d  D] } | d j p | d | j o q n | i  d d  } t } t |  d j oM | d d j o | d d j } | d } q t | d  } | d } n | i    } | g  j o q n | | | f g 7} q W| S(   sg  
    Parses a string with one test sentence per line.
    Lines can optionally begin with:
      - a C{bool}, saying if the sentence is grammatical or not, or
      - an C{int}, giving the number of parse trees is should have,
    The result information is followed by a colon, and then the sentence.
    Empty lines and lines beginning with a comment char are ignored.

    @return: a C{list} of C{tuple} of sentences and expected results,
        where a sentence is a C{list} of C{str},
        and a result is C{None}, or C{bool}, or C{int}

    @param comment_chars: L{str} of possible comment characters.
    s   
t    i    R!   i   i   R$   t   trueR2   t   false(   s   Trues   trues   Falses   false(   s   Trues   true(   R"   R   t   lent   int(   t   stringt   comment_charst	   sentencest   sentencet
   split_infot   resultR)   (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/util.pyt   extract_test_sentences   s      %(   R1   t   nltk.grammarR    R   R   t   chartR   R   t   pchartR   t   featurechartR   R   t	   nltk.dataR   R   R   t   objectR   R>   (    (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/util.pys   <module>   s   </