³ò
4ÒÇIc           @   s  d  Z  d d k Z d d k Z d d k l Z l Z l Z l Z d d k l	 Z	 d d k
 l Z l Z l Z l Z d d k l Z d d k Z
 d d k Td d k Td e e e d d d	 „ Z d
 e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ d e  f d „  ƒ  YZ! d e f d „  ƒ  YZ" d e# f d „  ƒ  YZ$ d e$ f d „  ƒ  YZ% d e f d „  ƒ  YZ& e' d d „ Z( d „  Z) e* d j oU e( ƒ  He d  d! d" ƒZ+ d# Z, e, i- ƒ  Z. e+ i/ e. ƒ Z0 x e0 D] Z1 e1 GHqêWn d S($   s`   
Extension of chart parsing implementation to handle grammars with
feature structures as nodes.
iÿÿÿÿN(   t
   FeatStructt   unifyt   FeatStructParsert   TYPE(   t   logic(   t   Nonterminalt
   Productiont   ContextFreeGrammart   defaultdict(   t   FeatStructNonterminal(   t   *i    c      
   C   s@   t  i i |  d | d | d | d | ƒ} t | d | d | ƒS(   s¾  
    Load a grammar from a file, and build an Earley feature parser based on
    that grammar.

    You can optionally specify a tracing level, for how much output you
    want to see:

    0: No output.
    1: Show edges from scanner and completer rules (not predictor).
    2 (default): Show all edges as they are added to the chart.
    3: Show all edges, plus the results of successful unifications.
    4: Show all edges, plus the results of all attempted unifications.
    5: Show all edges, plus the results of all attempted unifications,
    including those with cached results.

    If C{verbose} is set to C{True}, then more diagnostic information about
    grammar-loading is displayed.
    t   cachet   verboset   logic_parsert   fstruct_parsert   tracet   chart_class(   t   nltkt   datat   loadt   FeatureEarleyChartParser(   t   filenameR   R   R   R   R   R   t   grammar(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   load_earley   s
    	t   FeatureTreeEdgec           B   sS   e  Z d  Z d e d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 RS(	   s  
    A specialized tree edge that allows shared variable bindings
    between nonterminals on the left-hand side and right-hand side.

    Each C{FeatureTreeEdge} contains a set of C{bindings}, i.e., a
    dictionary mapping from variables to values.  If the edge is not
    complete, then these bindings are simply stored.  However, if the
    edge is complete, then the constructor applies these bindings to
    every nonterminal in the edge whose symbol implements the
    interface L{SubstituteBindingsI}.
    i    c         C   s    | d j o
 h  } n | t | ƒ j oP | oI |  i | | ƒ } g  } | D] } | |  i | | ƒ qN ~ } h  } n t i |  | | | | ƒ | |  _ d S(   su  
        Construct a new edge.  If the edge is incomplete (i.e., if
        C{dot<len(rhs)}), then store the bindings as-is.  If the edge
        is complete (i.e., if C{dot==len(rhs)}), then apply the
        bindings to all nonterminals in C{lhs} and C{rhs}, and then
        clear the bindings.  See L{TreeEdge} for a description of
        the other arguments.
        N(   t   Nonet   lent   _bindt   TreeEdget   __init__t	   _bindings(   t   selft   spant   lhst   rhst   dott   bindingst   _[1]t   elt(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   B   s    	-
c         C   s%   t  | t ƒ p | Sn | i | ƒ S(   N(   t
   isinstanceR	   t   substitute_bindings(   R   t   ntR$   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   [   s    c         C   s   |  i  |  i ƒ  |  i ƒ S(   N(   R   t   nextR   (   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   next_with_bindings_   s    c         C   s   |  i  i ƒ  S(   sC   
        Return a copy of this edge's bindings dictionary.
        (   R   t   copy(   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR$   b   s    c         C   se   |  i  ƒ  o t i |  ƒ SnD d d i d „  t |  i i ƒ  ƒ Dƒ ƒ } d t i |  ƒ | f Sd  S(   Ns   {%s}s   , c         s   s   x |  ] } d  | Vq Wd S(   s   %s: %rN(    (   t   .0t   item(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pys	   <genexpr>l   s    s   %s %s(   t   is_completeR   t   __str__t   joint   sortedR   t   items(   R   R$   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR0   h   s
    c         C   sd   |  i  | i  j o d Sn t |  i |  i |  i |  i |  i f | i | i | i | i | i f ƒ S(   Niÿÿÿÿ(   t	   __class__t   cmpt   _spant   _lhst   _rhst   _dotR   (   R   t   other(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   __cmp__q   s
    c         C   s4   t  |  i |  i |  i |  i t t |  i ƒ ƒ f ƒ S(   N(   t   hashR7   R8   R6   R9   t   tupleR2   R   (   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   __hash__x   s    (   t   __name__t
   __module__t   __doc__R   R   R   R+   R$   R0   R;   R>   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   6   s   						t   FeatureFundamentalRulec           B   s   e  Z d  Z d „  Z RS(   s¤  
    A specialized version of the fundamental rule that operates on
    nonterminals whose symbols are C{FeatStructNonterminal}s.  Rather
    tha simply comparing the nonterminals for equality, they are
    unified.  Variable bindings from these unifications are collected
    and stored in the chart using a L{FeatureTreeEdge}.  When a
    complete edge is generated, these bindings are applied to all
    nonterminals in the edge.

    The fundamental rule states that:
        - [AS{->}S{alpha}*B1S{beta}][i:j]
        - [B2S{->}S{gamma}*][j:k]
    licenses the edge:
        - [AS{->}S{alpha}B3*S{beta}][i:j]
    assuming that B1 and B2 can be unified to generate B3.
    c   
      c   sh  | i  ƒ  | i ƒ  j oX | i ƒ  oK | i ƒ  o> t | t ƒ o. t | t ƒ o | i ƒ  t | i ƒ  t j p d  Sn | i	 ƒ  } t
 | i ƒ  | i ƒ  | d t ƒ} | d  j o d  Sn t d | i ƒ  | i  ƒ  f d | i ƒ  d | i ƒ  d | i ƒ  d d | ƒ } t } x; | i | ƒ D]* }	 | i | |	 | f ƒ o
 t } q&q&W| o	 | Vn d  S(   Nt   rename_varsR    R!   R"   R#   i   R$   (   t   endt   startt   is_incompleteR/   R'   R   R*   R   R!   R$   R   t   FalseR   R   R"   R#   t   child_pointer_listst   insertt   True(
   R   t   chartR   t	   left_edget
   right_edgeR$   t   resultt   new_edget   changed_chartt   cpl1(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt
   apply_iterŽ   s(    ! (   R?   R@   RA   RR   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRB   }   s   t   FeatureTopDownExpandRulec           B   s   e  Z d  Z d „  Z RS(   sÎ  
    A specialized version of the top down expand rule that operates on
    nonterminals whose symbols are C{FeatStructNonterminal}s.  Rather
    tha simply comparing the nonterminals for equality, they are
    unified.

    The top down expand rule states that:
        - [AS{->}S{alpha}*B1S{beta}][i:j]
    licenses the edge:
        - [B2S{->}*S{gamma}][j:j]
    for each grammar production C{B2S{->}S{gamma}}, assuming that B1
    and B2 can be unified.
    c      
   c   s  | i  ƒ  o d  Sn xð | i ƒ  D]â } t | i ƒ  ƒ d j o! t | i ƒ  d t ƒ o q" n | i ƒ  t | i ƒ  t j o t	 | i ƒ  | i
 ƒ  d t ƒo_ t d | i ƒ  | i ƒ  f d | i ƒ  d | i ƒ  d d ƒ } | i | d ƒ o	 | Vqq" q" Wd  S(	   Ni   i    RC   R    R!   R"   R#   (    (   R/   t   productionsR   R"   R'   t   strR!   R   R*   R   R+   RJ   R   RD   RI   (   R   RK   R   t   edget   prodRO   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRR   »   s     :!"(   R?   R@   RA   RR   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRS   ­   s   t   FeatureCompleterRulec           B   s    e  Z d  Z e ƒ  Z d „  Z RS(   s  
    A specialized version of the completer rule that operates on
    nonterminals whose symbols are C{FeatStructNonterminal}s.  Rather
    tha simply comparing the nonterminals for equality, they are
    unified.  See L{CompleterRule} for more information.
    c         c   s^   |  i  } xN | i d | i ƒ  d t ƒ D]. } x% | i | | | | ƒ D] } | VqG Wq( Wd  S(   NRD   R/   (   t   _fundamental_rulet   selectRE   RG   RR   (   R   RK   R   t   edge1t   frt   edge2RO   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRR   Ø   s    	  (   R?   R@   RA   RB   RY   RR   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRX   Ï   s   	t   FeatureScannerRulec           B   s   e  Z d  „  Z RS(   c         c   s2  | i  ƒ  p | i ƒ  | i ƒ  j o d  Sn | i ƒ  } | i | ƒ } xâ g  } | i d | ƒ D] } | | i ƒ  qc ~ D]­ } | t | i ƒ  t j oŒ t | | i	 ƒ  d t
 ƒop t | | ƒ }	 | i |	 d ƒ o	 |	 Vn t | | d f | | g d ƒ }
 | i |
 |	 f ƒ o	 |
 Vq*q} q} Wd  S(   NR"   RC   i   (    (   R/   RD   t
   num_leavest   leafRT   R!   R   R*   R   R+   RJ   t   LeafEdgeRI   R   (   R   RK   t   gramarRV   t   indexR`   R%   RW   t   post   new_leaf_edget   new_pos_edge(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRR   ß   s    .4 	(   R?   R@   RR   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR^   Þ   s   t   FeaturePredictorRulec           B   s   e  Z RS(    (   R?   R@   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRg   ï   s    t   EarleyChartParserc           B   sb   e  Z d  Z e Z e Z e Z d e	 d „ Z
 d „  Z d Z e e e d „ Z d „  Z d „  Z RS(   sƒ  
    A chart parser implementing the Earley parsing algorithm:

        - For each index I{end} in [0, 1, ..., N]:
          - For each I{edge} s.t. I{edge}.end = I{end}:
            - If I{edge} is incomplete, and I{edge}.next is not a part
              of speech:
                - Apply PredictorRule to I{edge}
            - If I{edge} is incomplete, and I{edge}.next is a part of
              speech:
                - Apply ScannerRule to I{edge}
            - If I{edge} is complete:
                - Apply CompleterRule to I{edge}
        - Return any complete parses in the chart

    @ivar _predictor_class, _completer_class, _scanner_class: The
    classes that are used to implement the three rules used by the
    Earley algorithm,  Replacement rules can be specified by
    subclasses (such as L{FeatureEarleyChartParser
    <nltk.parse.featurechar.FeatureEarleyChartParser>}).
    i    c         C   s?   t  | t ƒ o t d ƒ ‚ n | |  _ | |  _ | |  _ d S(   s7  
        Create a new Earley chart parser, that uses C{grammar} to
        parse texts.
        
        @type grammar: C{ContextFreeGrammar}
        @param grammar: The grammar used to parse texts.
        @type trace: C{int}
        @param trace: The level of tracing that should be used when
            parsing a text.  C{0} will generate no tracing output;
            and higher numbers will produce more verbose tracing
            output.
        @param chart_class: The class that should be used to create
            the charts used by this parser.
        st   Earley parser no longer takes a lexicon as a separate parameter; the lexicon is calculated from the grammar instead.N(   R'   t   dictt
   ValueErrort   _grammart   _tracet   _chart_class(   R   R   R   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR     s
    		c         C   s   |  i  S(   N(   Rk   (   R   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   *  s    i(   c      
   C   s£  | d  j	 o | |  _ n t | ƒ } |  i i | ƒ |  i | ƒ } |  i } t d |  i | i ƒ  d ƒ } |  i d j o d G| i	 | ƒ GHn | i
 |  i | i ƒ  ƒ d ƒ |  i ƒ  } |  i ƒ  }	 |  i ƒ  }
 x—t | i ƒ  d ƒ D]} |  i d j o d | GHn |  i d j o! t i i d ƒ t i i ƒ  n x(| i d | ƒ D]} | i ƒ  oM xJ |	 i | | | ƒ D]/ } |  i d j o d G| i | | ƒ GHq~q~Wn | i ƒ  oM xJ | i | | | ƒ D]/ } |  i d j o d	 G| i | | ƒ GHqØqØWn | i ƒ  oM xJ |
 i | | | ƒ D]/ } |  i d j o d
 G| i | | ƒ GHq2q2WqUqUWqî W|  i d j o Hn |  i | | i ƒ  | ƒ |  S(   Ni   i   t    i	   s   Processing queue %dt   .RD   t	   Completert	   Predictors	   Scanner  s	            (    (   R   Rl   t   listRk   t   check_coverageRm   t   maxt   _trace_chart_widthR_   t	   pp_leavesRI   t   _starter_edgeRE   t   _predictor_classt   _completer_classt   _scanner_classt   ranget   syst   stdoutt   writet   flushRZ   R/   RR   t   pp_edgeRF   t   _parses(   R   t   tokenst   nt
   tree_classR   RK   R   t   wt	   predictort	   completert   scannerRD   RV   t   e(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   nbest_parse1  sF    	 & !  ! ! )c         C   s   t  d ƒ } t d | | f ƒ S(   s9   Return a 'starter edge' that expands to the start symbol.s   [INIT]i    (   i    i    (   R   R   (   R   t	   start_symt   root(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRw   ]  s    c         C   s   | i  | d | ƒS(   s+   Return a list of parses in the given chart.R„   (   t   parses(   R   RK   R‹   R„   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   d  s    (   R?   R@   RA   t   PredictorRuleRx   t   CompleterRuleRy   t   ScannerRuleRz   t   ChartR   R   Ru   R   t   TreeRŠ   Rw   R   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRh   ù   s   	,	R   c           B   s8   e  Z d  Z e Z e Z e Z d Z	 d „  Z
 d „  Z RS(   sÅ   
    A chart parser implementing the Earley parsing algorithm, allowing
    nonterminals that have features (known as L{FeatStructNonterminal}s).
    See L{EarleyChartParser} for more details.
    i
   c         C   s"   t  d ƒ } t d | | f d ƒ S(   Ns   [*type*="[INIT]"]i    (   i    i    (   R	   R   (   R   RE   RŒ   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRw   s  s    c         C   sž   g  } x‘ | i  d d | i ƒ  f ƒ D]q } t | t ƒ oZ | i ƒ  t | t j o? t | i ƒ  | d t ƒo# | | i | d t d | ƒ7} q% q% W| S(   NR    i    RC   t   completeR„   (	   RZ   R_   R'   Ra   R!   R   R   RJ   t   trees(   R   RK   RE   R„   R”   RV   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   w  s     (   R?   R@   RA   Rg   Rx   RX   Ry   R^   Rz   Ru   Rw   R   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   h  s   	t   InstantiateVarsChartc           B   s2   e  Z d  Z d „  Z d „  Z d „  Z d „  Z RS(   s>  
    A specialized chart that 'instantiates' variables whose names
    start with '@', by replacing them with unique new variables.
    In particular, whenever a complete edge is added to the chart, any
    variables in the edge's C{lhs} whose names start with '@' will be
    replaced by unique new L{Variable}s.
    c         C   s    t  i |  | ƒ t ƒ  |  _ d  S(   N(   R‘   R   t   sett   _instantiated(   R   R‚   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR   Ž  s    c         C   s:   | |  i  j o t Sn |  i | ƒ } t i |  | | ƒ S(   N(   R—   RG   t   instantiate_edgeR‘   RI   (   R   RV   t   child_pointer_list(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyRI   ’  s    c         C   s¶   t  | t ƒ p | Sn | i ƒ  p | Sn | |  i j o | Sn |  i | ƒ } | p | Sn |  i i | ƒ | i ƒ  i | ƒ } t | i	 ƒ  | | i
 ƒ  | i ƒ  | i ƒ  ƒ S(   N(   R'   R   R/   t   _edge_to_cplst	   inst_varsR—   t   addR!   R(   R    R"   R#   R$   (   R   RV   R›   R!   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR˜   —  s    c         C   s    t  d „  | i ƒ  i ƒ  Dƒ ƒ S(   Nc         s   s<   x5 |  ]. } | i  i d  ƒ o | t i ƒ  f Vq q Wd S(   t   @N(   t   namet
   startswithR   t   unique_variable(   R-   t   var(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pys	   <genexpr>ª  s   	(   Ri   R!   t	   variables(   R   RV   (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR›   ©  s    	(   R?   R@   RA   R   RI   R˜   R›   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyR•   †  s
   			i   c         C   sb  d d  k  } d d  k } t d ƒ } t d ƒ } t d ƒ } t d ƒ } t d ƒ } t d ƒ }	 t d ƒ }
 t d	 ƒ } t d
 ƒ } t d ƒ } t d ƒ } t d ƒ } t d ƒ } t | | | f ƒ t | |
 | f ƒ t | | | f ƒ t | | | f ƒ t | | | f ƒ t | | f ƒ t | | | f ƒ t | | | f ƒ t | d ƒ t | d ƒ t | d  ƒ t | d! ƒ t | d" ƒ t | d# ƒ t | d$ ƒ t | d% ƒ t | d& ƒ t |
 d' ƒ t |
 d( ƒ g } t | | ƒ } | GHHd } d G| GHH| i ƒ  } | i ƒ  } t | d | ƒ} | i | ƒ } H|  o d | i ƒ  | GHn x | D] } | GHqOWd  S()   Niÿÿÿÿt   St   VPt   NPt   PPt   Vt   Nt   Pt   Namet   Dets   Det[-pl]s   Det[+pl]s   N[-pl]s   N[+pl]t   Johnt   It   thet   myt   at   dogt   cookiet   atet   sawt   witht   unders$   I saw John with a dog with my cookies	   Sentence:R   s   Time: %s(   s   John(   R­   (   s   the(   s   my(   R°   (   s   dog(   s   cookie(   s   ate(   s   saw(   s   with(   s   under(   R|   t   timeR	   R   R   t   splitR   RŠ   (   t   should_print_timesR   R|   R·   R£   R¤   R¥   R¦   R§   R¨   R©   Rª   R«   t   DetSgt   DetPlt   NSgt   NPlRT   t   earley_grammart   sentR‚   t   tt   cpR”   t   tree(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   demo³  sR    $$! c          C   sy   d d  k  }  |  i d d ƒ d d  k } | i d ƒ } | i ƒ  i d d ƒ i d ƒ | i ƒ  i d d ƒ i d ƒ d  S(   Niÿÿÿÿs   for i in range(1): demo()s   /tmp/profile.outR·   t   cumi<   (   t   profilet   runt   pstatst   Statst
   strip_dirst
   sort_statst   print_stats(   RÅ   RÇ   t   p(    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pyt   run_profileå  s    t   __main__s!   grammars/book_grammars/feat0.fcfgR   i   s   Kim likes children(2   RA   t   yamlR|   t   nltk.featstructR    R   R   R   t   nltk.semR   R   R   R   R   R   t   nltk.grammarR	   t	   nltk.datat   apiRK   RG   R‘   R   R   R   R   t   FundamentalRuleRB   t   TopDownExpandRuleRS   R   RX   R   R^   Rg   t   ParserIRh   R   R•   RJ   RÃ   RÍ   R?   RÁ   R¿   R¸   R‚   RŠ   R”   RÂ   (    (    (    s-   /p/zhu/06/nlp/nltk/nltk/parse/featurechart.pys   <module>   s>   ""

	G0"
o-2	 