
3Ic        	   @   s   d  Z  d d k Z d d k Z d d k Z d d k Z d d k l Z l Z l Z l	 Z	 d d k
 Td e f d     YZ d   Z e d j o e   n d	 g Z d S(
   s  
A graphical tool for exploring the regular expression based chunk
parser (L{RegexpChunkParser<nltk.chunk.regex.RegexpChunkParser>}).

@todo: Add a way to select the development set from the menubar.  This
    might just need to be a selection box (conll vs treebank etc) plus
    configuration parameters to select what's being chunked (eg VP vs NP)
    and what part of the data is being used as the development set.
iN(   t   corpust   Treet   chunkt   in_idle(   t   *t   RegexpChunkAppc           B   s  e  Z d  Z h  d d <d d <d d <d d <d	 d
 <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d <d d  <d! d" <d# d$ <d% d& <d' d( <d) d* <d+ d, <d- d. <d/ d0 <d1 d2 <d3 d4 <d5 d6 <d7 d8 <d9 d: <d; d< <d= d> <d? d@ <dA dB <dC dD <dE dF <dG dH <dI dJ <dK dL <dM dN <dO dP <dQ dR <dS dT <dU dV <dW dX <dY dZ <Z d d d dd db de df dg dh f g Z di e dj dk  f dl e dj dm  f dn e do dp  f dq e dq e  f dr e dq e  f ds e dt du dv du  f dw e dt dx dv dy  f dz e dj d{  f d| e dj d}  f d~ e dj d  f g
 Z d Z d Z	 d Z
 d Z d Z e d d d d do d d d d d d d d d d d  Z e d d d d do d d d dj d d d d d d d d d  	Z e d d d d do d d d d d d d d d d d d d  	Z e do d d d d d  Z e d d d d  Z e do d d d d d d d  Z e do d d d d d d d d d d d d d  Z e do d d d d d  Z d Z d Z e do d  Z e do d  Z d Z d   Z d e d d e d  Z d   Z d   Z d   Z d   Z  d Z! e" Z# d   Z$ e" Z% d   Z& d   Z' d   Z( e" Z) d   Z* d   Z+ d   Z, d   Z- d   Z. d   Z/ d   Z0 d   Z1 d   Z2 e d  Z3 d   Z4 d   Z5 d   Z6 d   Z7 e d  Z8 d   Z9 d   Z: d   Z; d Z< e d  Z= e d  Z> e d  Z? d   Z@ e d  ZA e d  ZB d   ZC RS(   s   
    A graphical tool for exploring the regular expression based chunk
    parser (L{RegexpChunkParser<nltk.chunk.regex.RegexpChunkParser>}).
    
    See L{HELP} for instructional text.
    s   Coordinating conjunctiont   CCs   Possessive pronouns   PRP$s   Cardinal numbert   CDt   Adverbt   RBt
   Determinert   DTs   Adverb, comparativet   RBRs   Existential theret   EXs   Adverb, superlativet   RBSs   Foreign wordt   FWt   Particlet   RPt	   Adjectivet   JJt   tot   TOs   Adjective, comparativet   JJRt   Interjectiont   UHs   Adjective, superlativet   JJSs   Verb, base formt   VBs   List item markert   LSs   Verb, past tenset   VBDt   Modalt   MDs   Noun, pluralt   NNSs   Noun, singular or maspst   NNs   Verb, past participlet   VBNs   Verb,3rd ps. sing. presentt   VBZs   Proper noun, singulart   NNPs   Proper noun pluralt   NNPSs   wh-determinert   WDTt   Predeterminert   PDTs
   wh-pronount   WPs   Possessive endingt   POSs   Possessive wh-pronouns   WP$s   Personal pronount   PRPs	   wh-adverbt   WRBs   open parenthesist   (s   close parenthesist   )s
   open quotes   ``t   commat   ,s   close quotes   ''t   periodt   .s   pound sign (currency marker)t   #s   dollar sign (currency marker)t   $s   Preposition/subord. conjunctiont   INs#   Symbol (mathematical or scientific)t   SYMs   Verb, gerund/present participlet   VBGs   Verb, non-3rd ps. sing. presentt   VBPt   colont   :t   Helpt   20s1  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their geometric average (the 'f-score'), are displayed in the status bar at the bottom of the window.t   Rulest   10s  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Chink rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
t   Regexpss   10 60sX  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
	<regexp><\#><CD> # This is a comment...</regexp>
		Matches <match>"#/# 100/CD"</match>
</hangindent>t   Tagss   <h1>Part of Speech Tags:</h1>
s   <hangindent>s
   <<TAGSET>>s   </hangindent>
t   redt
   foregrounds   #a00t   greens   #080t	   highlightt
   backgrounds   #dddt	   underlinet   h1t   indentt   lmargin1i   t   lmargin2t
   hangindenti    i<   t   vars   #88ft   regexps   #ba7t   matchs   #6a6i   i   g?g{Gz?g{Gz?t   widthi(   t   heighti   s   #efet   highlightbackgroundt   highlightthicknesst   relieft   groovet   borderi   t   wrapt   words   #555iF   i
   s   #eeft   tabsi   s   #9bbt   familyt	   helveticat   sizeis   #777t   padxt   padyi   i,  i  t   activebackgrounds   #abai   c         C   sd   t  i d d |  } t  i d d |  } t  i d d |  } | i   } t  i d d |  } | S(	   Ns   ((\\.|[^#])*)(#.*)?s   \1s    +t    s   
\s+s   
s	   ([^\\])\$s   \1\\$(   t   ret   subt   strip(   t   selft   grammar(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyt   normalize_grammar   s    t	   conll2000t    t   NPc         C   s  | |  _  | d j o |  i } n | |  _ | d j oW | d j o t i i d  } q | d j o t i i   } q t d |   n d |  _	 | |  _
 d |  _ d |  _ | |  _ | |  _ d |  _ d |  _ g  |  _ d |  _ d |  _ d |  _ d |  _ t i d |  |  _ t   } |  _ | i d  | i d	  | i d
 |  i  t |  |  _  |  i  i! d  |  i" |  |  i# |  |  i$ |  |  i% |  |  i& i'   | o. |  i& i( d | d  |  i& i) d d  n |  i* d  |  i+   d S(   s  
        @param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        @param devset: A list of chunked sentences
        @param grammar: The initial grammar to display.
        @param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to C{self.TAGSET}.
        Re   s	   train.txtt   treebanks   Unknown development set %si    it
   chunk_nodes   +50+50s   Regexp Chunk Parser Apps   <Control-q>id   t   ends   
t   inserts   1.0N(,   t   _chunk_nodet   Nonet   TAGSETt   tagsetR    Re   t   chunked_sentst   treebank_chunkt
   ValueErrort   chunkerRc   t   normalized_grammart   grammar_changedt   devsett   devset_namet   devset_indext   _last_keypresst   _historyt   _history_indext   _eval_grammart   _eval_normalized_grammart   _eval_indexR   t
   ChunkScoret   _eval_scoret   Tkt   topt   geometryt   titlet   bindt   destroyt   IntVart   _devset_sizet   sett   _init_fontst   _init_widgetst   _init_bindingst   _init_menubart
   grammarboxt   focusRk   t   mark_sett   show_devsett   update(   Rb   Rw   Rv   Rc   Ri   Ro   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyt   __init__   sN    															c            s   | i  d   i  | i  d   i  | i  d   i  | i  d   i  | i  d   f d    | i  d   f d      i i  d   i    i i  d   i    i i  d   i    i i  d	   i  d  S(
   Ns   <Control-n>s   <Control-p>s   <Control-t>s
   <KeyPress>s   <Control-s>c            s
     i    S(    (   t   save_grammar(   t   e(   Rb   (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyt   <lambda>f  s    s   <Control-o>c            s
     i    S(    (   t   load_grammar(   R   (   Rb   (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   g  s    s   <Configure>(   R   t   _devset_nextt   _devset_prevt   toggle_show_traceR   R   t   evalboxt
   _eval_plot(   Rb   R   (    (   Rb   s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   a  s    c         C   su   t  |  |  _ |  i i d  t i d d d |  i i    |  _ t i d d d |  i i   d d  |  _ d  S(   Ni   RX   RY   RZ   i   (   R   t   _sizeR   t   tkFontt   Fontt   gett   _fontt
   _smallfont(   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   o  s    c         C   s*  t  |  } t  | d d } | i d d d d d |  i  | i d d d d d d	 d |  i  | i d d
 d d d d d |  i  | i d d d d d |  i  | i d d d d d |  i d d  | i d d d d d |  t  | d d } | i d d d |  i	 d d d d d |  i
  | i d d d |  i	 d d d d d |  i
  | i d d d |  i	 d d d d d |  i
  | i d d d |  i	 d d d d d |  i
  | i d d d |  i	 d d d d d |  i
  | i d d d d d |  t  | d d } | i d d  d |  i d d! d |  i  | i d d" d |  i d d# d |  i  | i d d$ d |  i d d% d |  i  | i d d& d |  i d d' d |  i  | i d d( d d d |  t  | d d } | i d d) d d d |  i  | i d d* d d d |  | i d |  d  S(+   Nt   tearoffi    t   labels   Reset ApplicationRE   t   commands   Save Current Grammart   accelerators   Ctrl-ss   Load Grammars   Ctrl-os   Save Grammar Historyi   t   Exiti   s   Ctrl-qt   Filet   menut   Tinyt   variablet   valuei
   t   Smalli   t   Mediumi   t   Largei   t   Hugei"   t   Views   50 sentencesi2   s   100 sentencesid   s   200 sentencesi   s   500 sentencesi  s   Development-Sett   AboutR:   (   t   Menut   add_commandt   resetR   R   t   save_historyR   t   add_cascadet   add_radiobuttonR   t   resizeR   t   set_devset_sizet   aboutt   config(   Rb   t   parentt   menubart   filemenut   viewmenut
   devsetmenut   helpmenu(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   x  s^    



				
c         G   s&   |  i  o |  i   n |  i   d S(   Nt   break(   t   _showing_traceR   t
   show_trace(   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    

i   c         O   s  | i  d |  i i    } | i  d |  i i    } |  i i d  |  i i d | d d d d d d	 d
 d } |  i i |  d d | d } } |  i i | | | d | d d d d
 d d d } d |  i i |  d d } }	 |  i d }
 |  i i |  i i	 d d | d d d |
 d |
  |  i i |  i i	 d |	 d d d d |
 d |
  |  i
 i    ot |  i  d j o d } } d } } x t d t t |  i  |  i d   D]\ } |  i | \ } } } } t | |  } t | |  } t | |  } t | |  } qWt | d d  } t | d d  } t | d d  } t | d d  } n d } } d } } x t d  D] } | | | | d | | | } |	 |	 | | d | | | } | | j  o
 | j  n o# |  i i | | | |	 d d n | | j  o
 |	 j  n o# |  i i | | | | d d qqW|  i i | | | |	  |  i i | |	 | |	  |  i i | d |	 d d d d d
 d d | |  i i | d | d d d d d
 d d | |  i i | |	 d d d d d  d
 d d | |  i i | |	 d d d d d d
 d d | d  } } xut |  i  D]d\ } \ } } } } | | | | | | | } |	 |	 | | | | | } | |  i j of |  i i | d | d | d | d d d! d d" d# | d d$ | d d% | d |  i d
 <nB |  i i |  i i | d | d | d | d d d& d d'  | d  j	 o? |  i i    o/ |  i i |  i i | | | | d d'  n | | } } q}Wd  S((   NRN   RO   t   alli
   i   t   justifyt   leftt   anchort   wt   textt	   Precisioni   t   st   Recallt   centeri   RD   i    i  t   fillt   outlineg{Gz?i   g      $@s   #888i   t   rightt   ses   %d%%id   t   net   nws   #0f0s   #000s   Precision: %.2f%%	s   Recall: %.2f%%	s   F-score: %.2f%%s   #afas   #8c8(   R   R   t   winfo_widtht   winfo_heightt   deletet   create_textt   bboxt   _EVALBOX_PARAMSt   lowert   create_rectanglet
   _autoscalet   lenRz   t   ranget   mint   _SCALE_Nt   maxt   create_lineRm   t	   enumerateR{   t   create_ovalt   statust   _eval_lines(   Rb   R   R   RN   RO   t   tagR   R   R   t   bott   bgt   max_precisiont
   max_recallt   min_precisiont
   min_recallt   iRc   t	   precisiont   recallt   fmeasuret   xt   yt   prev_xt   prev_yt   _t   fscore(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s     %"!%%&

) 

 #'				
 (	(	c   	   
   C   s  |  i  d  j o d  Sn |  i d  j o t |  _ d  Sn t i   } t i   |  i |  i j  oC |  i |  i	 j o0 t
 |  _ |  i  i t |  i d  |  i  Sn |  i |  i	 j o x |  i D] \ } } } } |  i |  i |  j oV |  i i | | | | f  t |  i  d |  _ |  i   t |  _ d  |  _	 d  Sq q Wd |  _ t i d |  i  |  _ |  i |  _ |  i |  _	 n |  i i   d j o t |  _ d  Sn x_ |  i |  i t |  i |  i |  i  i!    !D]. } |  i" | i#    } |  i i$ | |  qW|  i |  i 7_ |  i |  i  i!   j op |  i i |  i |  i i%   |  i i&   |  i i'   f  t |  i  d |  _ |  i   t |  _ d  |  _	 no d |  i |  i  i!   } d | |  i( d <t
 |  _ |  i) t i   |  |  i  i t |  i d  |  i  d  S(	   Ni  i   i    Ri   Rf   id   s$   Evaluating on Development Set (%d%%)R   (*   R   Rm   Rs   t   Falset   _eval_demon_runningt   timeRy   t   _EVAL_DELAYRt   R}   t   Truet   aftert   intt
   _EVAL_FREQt   _eval_demonRz   Rd   t   appendR   R{   R   R~   R   R   Rl   R   Rc   R|   Ra   Rv   R   t   _EVAL_CHUNKR   R   t   _chunkparset   leavest   scoreR   R   t	   f_measureR   t   _adaptively_modify_eval_chunk(	   Rb   t   t0t   gt   pt   rt   ft   goldt   guesst   progress(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR  "  sb    		'
 
					 
		c         C   s   | |  i  j oQ |  i d j oA t |  i d t t |  i |  i  |  |  i d   |  _ nR | |  i j  oA t |  i d t t |  i |  i |  |  i d   |  _ n d S(   s   
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.
        
        @param t: The amount of time that the eval demon took.
        i   i   i
   N(   t   _EVAL_DEMON_MAXR  R   R   R   t   _EVAL_DEMON_MIN(   Rb   t   t(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR	  f  s     c            s	  t  |   i  } | i d d d | i d d d | i d d d | i d d d t | d   i   i   _ t | d   i d	 d
 d d d   i d   _	   i	 i
 d d d d d d    i i
 d d d d d d  t | d   i i } | i
 d d d d d d    i i d | i    i d } t  | d | } | i
 d d d d d d  t | d	 d d   i   i i d d  t | d	 d d   i   i i d d  t | d   i   i   _   i i
 d d d d d d  h    _   i d } t  | d | } | i
 d d d d d d  x t   i  D] \ } \ } }	 }
 t | d	 | d   i } | i
 d | d d d d d  | i d |   f d   |   i | <t  | d d d   i d | i
 d | d d d d  qWW  i   i d d i d   i    i i d  d  t x.   i D]# \ } }   i i d! | |  qRW  i    i d d  t | d   i i }   i i d | i  | i
 d d d d d d  t  | d   i d } t | d   i   i!   _"   i" i d" t d# d$  t | d   i d	 d% d& d' d   i! d   _#   i# i
 d d d d d d  | i
 d d d d d d  t | d   i$   _%   i% i
 d d d d d d  t | d   i" i& d( d)   _'   i' i   i" d* <  i' i d d+ d# d,    i d } t  | d | } | i
 d d d d- d d  t | d	 d. d   i(   i i d d  t | d	 d/ d   i)   i i d d  t | d	 d0 d   i* d1 d2   i   _+   i+ i d d'  t | d	 d3 d   i,   i   _-   i- i d d'  t. |   i/    _0 t | d   i d	 d4 d& d' d   i/ d } | i
 d d d d d d    i0 i
 d d d d d d d5 d    i d } t  | d | } | i
 d d d d- d d  t1   i2    _3   i3 i t4  t5 | d6   i3 d   i6 d	 d7   i i d d  t1   i2    _7   i7 i t4  t5 | d6   i7 d   i6 d	 d8   i i d d  t | d	 d9   i i d d'  t | d   i   i8   _9   i9 i
 d d d d: d d; d< d d= d d5 d  d2   i d1 <d2   i" d1 <  i d } t  | d d> d d d | i
 d d d d  t  | d d d d> d | i
 d d d d  t  | d d? d d d | i
 d d d d@  | i d# d$ d" t    i" i dA d dB dC dD   i" i dE dC dD dF dG   i" i dH d dI   i" i dJ dF dK dL dM   i" i dN dO dP dL dM   i" i dQ dF dG   i i dQ d dR   i i dS dF dT   i i dU dF dV   i i dW dF dX   i i dY dZ d dO d[ d  S(\   Ni    t   weighti   i   i   i   i   t   fontR   s   Grammar:t   highlightcolort   blackRD   t   columnt   rowt   stickyt   SWt   NEWSR   t   NWSt   yscrollcommandt   EWs   Prev Grammart   sideR   s   Next Grammart   Ss   <ButtonPress>c            s     i  |  S(    (   t	   show_help(   R   t   tab(   Rb   (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    RO   RN   t   elides   tag-%st   expandR   t   boths   Development Set:R   R   t   orientt   horizt   xscrollcommandt   bottomR   i   s   Prev Example (Ctrl-p)s   Next Example (Ctrl-n)s   Show examplet   statet   disableds
   Show traces   Evaluation:t
   columnspanR   t   Zoomt   Linest   Historyi	   t   NEWR[   R\   i
   i   i   s   true-poss   #afaRE   R   s	   false-negRA   s   #800s	   false-poss   #faat   traces   #666RU   t   nonet
   wrapindentRI   i   t   errors   #fect   comments   #840t   angles   #00ft   braces   #0a0RJ   RH   i(   (:   t   Framet   _FRAME_PARAMSt   grid_columnconfiguret   grid_rowconfiguret   TextR   t   _GRAMMARBOX_PARAMSR   t   Labelt   grammarlabelt   gridt	   Scrollbart   yviewR   R   t   Buttont   _history_prevt   _BUTTON_PARAMSt   packt   _history_nextR   t   _HELPBOX_PARAMSt   helpboxt   helptabsR   t   HELPR   t   _HELPTAB_SPACERt	   configuret
   tag_configR   t   HELP_AUTOTAGR#  t   _DEVSETBOX_PARAMSt	   devsetboxt   devsetlabelt   _devset_scrollt   devset_scrollt   xviewt   devset_xscrollR   R   R   t   devset_buttonR   t   trace_buttont   CanvasR   R   R   R   R   R   t   CheckbuttonR   R   t   _STATUS_PARAMSR   (   Rb   R   t   frame0t   grammar_scrollbarR   t   frame3t   helptab_frameR   R$  t   tabstopsR   R   R   t   paramst   help_scrollbart   frame4t   frame1t   frame2(    (   Rb   s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   v  s    	  %%
 	%*...c            sG  t  |  _ d |  i d <d |  i d <d |  i d <|  i i d d  d |  i d |  i i   f |  i	 d <|  i
 d  j o1 |  i i d d	  |  i i d
 d d  d  Sn |  i |  i } |  i
 i   } d } d g   xG t | i    D]3 \ } \ } } | d | 7}   i t |   q Wt   f d   t t |  d  D  |  _ t d   t t |  d  D  |  _ x}t t |  d  D]e} | d j o- |  i i d d  |  i i d d d  n6 |  i i d d | | d  |  i i d d d  |  i i d | d  |  i i d d d  |	 i | |   }
 |  i | i    } |  i |  } |  i |  } x* | i |  D] }	 |  i | |	 d  qWx% | | D] }	 |  i | |	 d  qWx% | | D] }	 |  i | |	 d  qWqW|  i i d d  |  i i d d d  |  i i d |  i i  d d  d  S(   NR-  R,  t   normals   1.0Rj   s   Development Set (%d/%d)i   R   s#   Trace: waiting for a valid grammar.R6  s   	s   %s c         3   sF   x? |  ]8 } x/ t  t     D] } | | f   | f Vq Wq Wd  S(   N(   R   R   (   t   .0R   t   j(   t   charnum(    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pys	   <genexpr>+  s   	c         s   s'   x  |  ] } | | d  d  f Vq Wd S(   i   N(    (   Ri  R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pys	   <genexpr>.  s    i    s   Start:
R3  s   end -2c linestarts   end -2cs
   Apply %s:
s   
R5  s   true-poss	   false-negs	   false-poss
   Finished.
id   g333333?(!   R   R   RZ  RY  RS  R   Rx   R   R   RT  Rs   Rm   Rk   t   tag_addRv   t   rulesR   R  R  R   t   dictR   Rk  t   linenumt   RegexpChunkParserR  t   _chunkst   intersectiont   _color_chunkR   R   RX  R   (   Rb   R   t	   gold_treeRm  t   tagseqt   wordnumRV   t   posR   R   Rs   t	   test_treet   gold_chunkst   test_chunks(    (   Rk  s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s^    	$	  )    c   
   	   C   s  d |  i  d <|  i  i d d  x|  i D]\ } } } | | j oc| i d d i d   t |  i i   d d	   D   } |  i | i	 |  i
   |  i  i	 d
 |  |  i  i d | d  d } x |  i D] \ } } d | | f } x t i | |  D] }	 |  i  i d | |	 i d  | |	 i d   |  i  i d | | |	 i d  | |	 i d   |  i  i d | |	 i d  | |	 i d   qWq Wq* |  i | i	 |  i   q* Wd |  i  d <d  S(   NRh  R,  s   1.0Rj   s
   <<TAGSET>>s   
c         s   s   x |  ] } d  | Vq Wd S(   s   	%s	%sN(    (   Ri  t   item(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pys	   <genexpr>T  s    t   keyc         S   s6   |  \ } } t  i d  |  o d | f p
 d | f S(   s   \w+i    i   (   R_   RM   (   Ri  R  R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   U  s    RW   i   s   1.0 + %d charss   (?s)(<%s>)(.*?)(</%s>)R%  i   s   tag-%si   i   R-  s   



















(   RK  R   RM  t   replacet   joint   sortedRo   t   itemsRL  R   t   _HELPTAB_FG_PARAMSRk   RQ  R_   t   finditerRl  t   startRj   t   _HELPTAB_BG_PARAMS(
   Rb   R$  t   nameRb  R   t   CR   Rc  t   patternt   m(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR#  N  s2    
 
  $$0c         G   s   |  i  |  i d  d S(   Ni   R   (   t   _view_historyR{   (   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyRF  h  s    c         G   s   |  i  |  i d  d S(   Ni   R   (   R  R{   (   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyRI  l  s    c         C   s  t  d t t |  i  d |   } |  i p d  Sn | |  i j o d  Sn d |  i d <|  i i d d  |  i i d |  i | d  |  i i d d  | |  _ |  i	 |  i | d  |  i
 |  i | d  |  _ |  i o@ g  } |  i i d  D] } | t i i i |  q ~ } n g  } t i |  |  _ |  i   |  i   |  i o |  i   n |  i t |  i  d j  o+ d	 |  i d t |  i  f |  i d
 <n d |  i d
 <d  S(   Ni    i   Rh  R,  s   1.0Rj   Rk   s   
s   Grammar %s/%s:R   s   Grammar:(   R   R   R   Rz   R{   R   R   Rk   R   t   _syntax_highlight_grammarRd   Rt   t   splitR   RL   t   RegexpChunkRulet   parseRp  Rs   R   t   _highlight_devsetR   R   RA  (   Rb   t   indext   _[1]t   lineRm  (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR  p  s0    %	
9

(c         G   s   |  i  d d d  d S(   Nt   scrolli   t   pageR   (   RU  (   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    c         G   s   |  i  d d d  d S(   NR  iR  R   (   RU  (   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    c         G   s2   |  i  d  j o d  Sn |  i  i   d  |  _  d  S(   N(   R   Rm   R   (   Rb   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    c         G   s  d } |  i  } | d j o6 | d i d  o" |  i |  i t | d   n | d j o: | d i d  o& |  i |  i | t | d   nW | d j o. |  i t t | d  |  i i     n d p t d | | f  | o |  i	   n d  S(   Ni   R  t   uniti    R  t   movetos   bad scroll command %s %s(
   R   t
   startswithR   Rx   R   t   floatR   R   t   AssertionErrorR   (   Rb   R   t   argst   Nt   showing_trace(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyRU    s    	!"!&.c      	   C   sF  | d  j o |  i } n t t d |  |  i i   d  } | |  i j o |  i o d  Sn | |  _ t |  _ d |  i d <d |  i	 d <d |  i
 d <d |  i
 d <|  i
 i d d	  d
 |  i d |  i i   f |  i d <|  i |  i |  i d !} h  |  _ h  d d <|  _ x t |  D] \ } } d } xp t | i    D]\ \ } \ } } t |  |  i | | f <| d | | f 7} t |  |  i | | d f <qIW|  i
 i d	 | d  d  q$W|  i d  j	 o |  i   n d |  i
 d <t |  i  |  i i   }	 t |  i d  |  i i   }
 |  i i |	 |
  d  S(   Ni    i   Rh  R,  R-  RV   RU   s   1.0Rj   s   Development Set (%d/%d)R   Rf   s   %s/%s is   

i   (   Rm   Rx   R   R   R   R   R   R   RZ  RY  RS  R   RT  Rv   Rk  Ro  R   R  R   Rk   Rs   R  R  RV  R   (   Rb   R  t   samplet   sentnumt   sentt   linestrRv  RV   Rw  t   firstt   last(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s<    %#		$	  ! c         C   s   t    } d } xp | D]h } t | t  oH | i |  i j o! | i | | t |  f  n | t |  7} q | d 7} q W| S(   Ni    i   (   R   t
   isinstanceR   t   nodeRl   t   addR   (   Rb   t   treet   chunksRv  t   child(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyRq    s    	 !c         C   s  |  i  d  j o d  Sn |  i i d d d  |  i i d d d  |  i i d d d  |  i i d d d  x{t | i d   D]d\ } } | i   p q n t i	 d |  } d  } | i
 d	  oc | i d	  } d
 | d | i d	  f } d
 | d | i d	  f } |  i i d | |  n x t i d |  D] } | d  j	 o | i   | j o Pn d
 | d | i   f } d
 | d | i   f } | i
   d j o |  i i d | |  qD|  i i d | |  qDWq Wd  S(   NR7  s   1.0Rj   R8  R9  RJ   s   
s   (\\.|[^#])*(#.*)?i   s   %d.%di   s   [<>{}]s   <>(   R   Rm   R   t
   tag_removeRl  R   R  Ra   R_   RM   t   groupR  Rj   R  (   Rb   Rc   t   linenoR  R  t   comment_startR   R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR    s0       c         C   s   |  i  d  j o d  Sn |  i i d d d  g  |  _ x t | i d   D] \ } } t i d d |  } | i	   } | oY y t
 i i i |  Wq t j
 o/ } |  i i d d | d d	 | d  q XqM qM Wd
 |  i d <d  S(   NR6  s   1.0Rj   s   
s   ((\\.|[^#])*)(#.*)?s   \1s   %s.0i   s   %s.0 lineendRf   R   (   R   Rm   R   R  t   _grammarcheck_errsR   R  R_   R`   Ra   R   RL   R  R  Rr   Rl  R   (   Rb   Rc   R  R  R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyt   _grammarcheck  s    	 c      	   G   s  | o t  i    |  _ n |  i i d d  |  _ } |  i |  } | |  i j o d  Sn
 | |  _ |  i t |  i	  d j  o d |  i
 d <n |  i |  yN | o= g  } | i d  D] } | t i i i |  q ~ } n g  } Wn/ t j
 o# } |  i |  d  |  _ d  Sn Xt i |  |  _ |  i i d d d  t  i    |  _ |  i o |  i   n |  i   |  i p |  i   n d  S(   Ns   1.0Rj   i   s   Grammar:R   s   
R6  (   R   Ry   R   R   Rc   Rd   Rt   R{   R   Rz   RA  R  R  R   RL   R  R  Rr   R  Rm   Rs   Rp  R  Ru   R   R   R  R   R  (   Rb   t   eventRc   Rt   R  R  Rm  R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s6    	6
		


c         C   s>  | d  j o |  i |  i |  i d !} n |  i i d d d  |  i i d d d  |  i i d d d  x t |  D] \ } } |  i | i    } |  i |  } |  i |  } x* | i	 |  D] } |  i
 | | d  q Wx% | | D] } |  i
 | | d  q Wx% | | D] } |  i
 | | d  qWqz Wd  S(   Ni   s   true-poss   1.0Rj   s	   false-negs	   false-pos(   Rm   Rv   Rx   RS  R  R   R  R  Rq  Rr  Rs  (   Rb   R  R  Rt  Rx  Ry  Rz  R   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR  >  s&        c         C   sP   y |  i  i |  SWn5 t t f j
 o# } |  i i d d d  | Sn Xd  S(   NR6  s   1.0Rj   (   Rs   R  Rr   t
   IndexErrorR   Rl  (   Rb   t   wordsR   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR  U  s
    c         C   sf   | \ } } |  i  i | d |  i | |  i | | f f d |  i | |  i | | f d f  d  S(   Ns   %s.%si   (   RS  Rl  Ro  Rk  (   Rb   R  R   R   R  Rj   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyRs  `  s    !c         C   sd   d  |  _ d  |  _ d  |  _ d |  _ g  |  _ d |  _ |  i i d d  |  i	 d  |  i
   d  S(   Ni    s   1.0Rj   (   Rm   Rs   Rc   Rt   Ru   Rz   R{   R   R   R   R   (   Rb   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   f  s    						s   # Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c   	      C   sG  | p7 d d g } t  i d | d d  } | p d  Sq> n |  i oe |  i |  i |  i d d  j oA g  } |  i d d	 D] } | d
 d | q ~ \ } } } n1 |  i d  j o d } } } n d } } } t | d  } | i |  i	 t
 d t i   d |  i d | d | d | d |  i i     | i   d  S(   Ns   Chunk Gramamrs   .chunks	   All filesR   t	   filetypest   defaultextensionii    i   s   %.2f%%id   s   Grammar not well formeds   Not finished evaluation yetR   t   dateRv   R   R   R   Rc   (   s   Chunk Gramamrs   .chunk(   s	   All filesR   (   t   tkFileDialogt   asksaveasfilenameRz   Rt   Rd   Rs   Rm   t   opent   writet   SAVE_GRAMMAR_TEMPLATERn  R   t   ctimeRw   Rc   Ra   t   close(	   Rb   t   filenamet   ftypesR  t   vR   R   R   t   out(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR   ~  s&    		:c         C   s   | p7 d d g } t  i d | d d  } | p d  Sq> n |  i i d d  |  i   t |  i   } t i d	 d
 |  i	   } |  i i
 d |  |  i   d  S(   Ns   Chunk Gramamrs   .chunks	   All filesR   R  R  s   1.0Rj   s2   ^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
Rf   (   s   Chunk Gramamrs   .chunk(   s	   All filesR   (   R  t   askopenfilenameR   R   R   R  t   readR_   R`   t   lstripRk   (   Rb   R  R  Rc   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    		
	c   
      C   s  | p7 d d g } t  i d | d d  } | p d  Sq> n t | d  } | i d  | i d	 t i    | i d
 |  i  x t |  i  D] \ } \ } } } } d | d t	 |  i  | d | d | d f }	 | i d |	  | i d i
 d   | i   i   D   q W|  i o! |  i |  i |  i d d  j pa |  i d  j o | i d  n | i d  | i d i
 d   |  i i   i   D   n | i   d  S(   Ns   Chunk Gramamr Historys   .txts	   All filesR   R  R  R   s'   # Regexp Chunk Parsing Grammar History
s   # Saved %s
s   # Development set: %s
s>   Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)i   id   s   
%s
Rf   c         s   s   x |  ] } d  | Vq Wd S(   s     %s
N(    (   Ri  R  (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pys	   <genexpr>  s    ii    s#   
Current Grammar (not well-formed)
s!   
Current Grammar (not evaluated)
c         s   s   x |  ] } d  | Vq Wd S(   s     %s
N(    (   Ri  R  (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pys	   <genexpr>  s    (   s   Chunk Gramamr Historys   .txt(   s	   All filesR   (   R  R  R  R  R   R  Rw   R   Rz   R   R~  Ra   R  Rt   Rd   Rs   Rm   Rc   R  (
   Rb   R  R  R  R   R  R  R  R  t   hdr(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s2    		 0!c      	   G   s^   d d } d } y- d d k  l } | d | d |  i   Wn t |  i | |  n Xd  S(   Ns%   NLTK RegExp Chunk Parser Application
s   Written by Edward Lopers2   About: Regular Expression Chunk Parser Applicationi(   t   Messaget   messageR   (   t   tkMessageBoxR  t   showt   ShowTextR   (   Rb   R   t   ABOUTt   TITLER  (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    c         C   sj   | d  j	 o |  i i |  n |  i i t t |  i  |  i i     |  i d  |  i d  d  S(   Ni   i    (   Rm   R   R   R   R   Rv   R   R   (   Rb   RZ   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    !+c         C   sy   | d  j	 o |  i i |  n |  i i   } |  i i d t |   |  i i d t d t |  d d   d  S(   NRZ   ii   i   (	   Rm   R   R   R   R   RO  t   absR   R   (   Rb   RZ   (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR     s    !c         O   s)   t    o d Sn |  i i | |   d S(   s   
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N(   R   R   t   mainloop(   Rb   R  t   kwargs(    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR    s    (   s   HelpR;   s1  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their geometric average (the 'f-score'), are displayed in the status bar at the bottom of the window.(   R<   s   10s  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Chink rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
(   R>   s   10 60sX  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
	<regexp><\#><CD> # This is a comment...</regexp>
		Matches <match>"#/# 100/CD"</match>
</hangindent>(   i   (D   t   __name__t
   __module__t   __doc__Rn   RM  Rn  R   RQ  R   R  R  R  R  R?  RJ  RR  R]  t   _FONT_PARAMSR;  R   RG  t   _HELPTAB_BG_COLORt   _HELPTAB_FG_COLORR  R  RN  Rd   Rm   R   R   R   R   R   R   R   t   _DRAW_LINESR   R   R  R	  R   R   R   R#  RF  RI  R  R   R   R   RU  R   Rq  R  R  R   R  R  Rs  R   R  R   R   R   R   R   R   R  (    (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyR      s   							 $  !			e				8		g	D			<				$				*				1					
c           C   s   t    i   d  S(   N(   R   R  (    (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pyt   app  s    t   __main__R  (   R  R   t   textwrapR_   t   randomt   nltkR    R   R   R   t   nltk.draw.utilt   objectR   R  R  t   __all__(    (    (    s.   /p/zhu/06/nlp/nltk/nltk/app/chunkparser_app.pys   <module>   s   "
    	