³ò
4ÒÇIc           @   sÛ   d  d k  Z  d  d k Z d  d k Z d  d k Z d  d k l Z d  d k l Z d  d k l Z d  d k l	 Z	 d  d k
 l Z d  d k l Z d  d k l Z d	 e f d
 „  ƒ  YZ d „  Z e d j o e ƒ  n d S(   iÿÿÿÿN(   t   add(   t   data(   t   tokenize(   t   tag(   t   ParserI(   t   DependencyGraph(   t   find_binaryt
   MaltParserc           B   sS   e  Z e d  „ Z e e d „ Z e d „ Z e d „ Z e d „ Z e d „ Z	 RS(   c         C   sg   |  i  ƒ  d |  _ t |  _ | d  j	 o | |  _ n. t i d d d d d d d d d g	 ƒ |  _ d  S(   Nt	   malt_temps   ^-?[0-9]+(.[0-9]+)?$t   CDs   (The|the|A|a|An|an)$t   ATs   .*able$t   JJs   .*ness$t   NNs   .*ly$t   RBs   .*s$t   NNSs   .*ing$t   VBGs   .*ed$t   VBDs   .*(   s   ^-?[0-9]+(.[0-9]+)?$R	   (   s   (The|the|A|a|An|an)$s   AT(   s   .*able$R   (   s   .*ness$R   (   s   .*ly$R   (   s   .*s$R   (   s   .*ing$R   (   s   .*ed$R   (   s   .*R   (   t   config_maltt   mcot   Falset   _trainedt   Nonet   taggerR   t   RegexpTagger(   t   selfR   (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyt   __init__   s    
		c         C   sg   d d d d d d d g } t  t t t i | ƒ ƒ } t d | d | d	 d
 g d d d | ƒ|  _ d S(   s“  
        Configure NLTK's interface to the C{malt} package.  This
        searches for a directory containing the malt jar
        
        @param bin: The full path to the C{malt} binary.  If not
            specified, then nltk will search the system for a C{malt}
            binary; and if one is not found, it will raise a
            C{LookupError} exception.
        @type bin: C{string}
        t   .s   /usr/lib/malt-1*s   /usr/local/bins   /usr/local/malt-1*s   /usr/local/bin/malt-1*s   /usr/local/share/malt-1*s   malt.jart
   searchpatht   env_varst   MALTPARSERHOMEt   urls/   http://w3.msi.vxu.se/~jha/maltparser/index.htmlt   verboseN(   t   reduceR    t   mapt   globR   t	   _malt_bin(   R   t   binR   t
   _malt_patht	   malt_path(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR   +   s    	c         C   s®  |  i  p t d ƒ ‚ n |  i p t d ƒ ‚ n t i i t i ƒ  d ƒ } t i i t i ƒ  d ƒ } d } | p' | d t i i t i ƒ  d ƒ 7} n d } zí t	 | d ƒ } xe t
 |  i i | i ƒ  ƒ ƒ D]E \ } \ } }	 | i d	 | d
 | d |	 |	 d d d d d f
 ƒ qÕ W| i d ƒ | i ƒ  d d |  i  d t i ƒ  d |  i d | d | d g }
 |  i |
 d | ƒ t i | ƒ SWd | o | i ƒ  n Xd S(   så   
        Use MaltParser to parse a sentence
        
        @param sentence: Input sentence to parse
        @type sentence: L{str}
        @return: C{DependencyGraph} the dependency graph representation of the sentence
        sA   MaltParser location is not configured.  Call config_malt() first.s1   Parser has not been trained.  Call train() first.s   malt_input.conlls   malt_output.conlls-   java -jar %s -w %s -c %s -i %s -o %s -m parses    > s   malt.outt   ws   %s	%s	%s	%s	%s	%s	%s	%s	%s	%s
i   t   _t   0t   as   
t   javas   -jar %ss   -w %ss   -c %ss   -i %ss   -o %ss   -m parset   parseN(   R#   t	   ExceptionR   t   ost   patht   joint   tempfilet
   gettempdirR   t   opent	   enumerateR   R   t   splitt   writet   closeR   t   _executeR   t   load(   R   t   sentenceR   t
   input_filet   output_filet   execute_stringt   ft   it   wordR   t   cmd(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR,   J   s.    

' 	.
!c      
   C   sž   t  i i t i ƒ  d ƒ } d } zM t | d ƒ } | i d i g  } | D] } | | i d ƒ qJ ~ ƒ ƒ Wd | o | i	 ƒ  n X|  i
 | d | ƒd S(   sŸ   
        Train MaltParser from a list of C{DependencyGraph}s
        
        @param depgraphs: C{list} of C{DependencyGraph}s for training input data
        s   malt_train.conllR'   s   
i
   NR   (   R.   R/   R0   R1   R2   R   R3   R6   t   to_conllR7   t   train_from_file(   R   t	   depgraphsR   R;   R>   t   _[1]t   dg(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyt   trainp   s    >c         C   sn   |  i  p t d ƒ ‚ n d d |  i  d t i ƒ  d |  i d | d g } |  i | d | ƒ t |  _ d	 S(
   s…   
        Train MaltParser from a file
        
        @param conll_file: C{str} for the filename of the training input data
        sA   MaltParser location is not configured.  Call config_malt() first.R+   s   -jar %ss   -w %ss   -c %ss   -i %ss   -m learnRG   N(   R#   R-   R1   R2   R   R8   t   TrueR   (   R   t
   conll_fileR   RA   (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyRC      s    
c         C   s]   | p: t  i i t i ƒ  d ƒ } | i d | | f d ƒ n t  i d i | ƒ ƒ } d  S(   Nt    s!    > %smalt_%s.out 2> %smalt_%s.erri   t    (   R.   R/   R0   R1   R2   t   appendt   system(   R   RA   t   typeR   t   temp_dirt	   malt_exit(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR8   –   s    (
   t   __name__t
   __module__R   R   R   R   R,   RG   RC   R8   (    (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR      s   &c          C   s„   t  d ƒ }  t  d ƒ } t } t ƒ  } | i |  | g d | ƒ| i d d | ƒi ƒ  i ƒ  GH| i d d | ƒi ƒ  i ƒ  GHd  S(   NsI  1    John    _    NNP   _    _    2    SUBJ    _    _
                             2    sees    _    VB    _    _    0    ROOT    _    _
                             3    a       _    DT    _    _    4    SPEC    _    _
                             4    dog     _    NN    _    _    2    OBJ     _    _
                          s£   1    John    _    NNP   _    _    2    SUBJ    _    _
                             2    walks   _    VB    _    _    0    ROOT    _    _
                          R   s   John sees Marys
   a man runs(   R   R   R   RG   R,   t   treet   pprint(   t   dg1t   dg2R   t
   maltParser(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyt   demo   s    			 t   __main__(   R.   R1   t
   subprocessR"   t   operatorR    t   nltkR   R   R   t   apiR   t   dependencygraphR   t   nltk.internalsR   R   RX   RQ   (    (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pys   <module>   s   ˆ	