³ò
dhKc           @   s·   d  d k  Z  d  d k Z d  d k Z d  d k Z d  d k l Z d  d k Z d  d k l Z d  d k	 l
 Z
 d  d k l Z d e f d „  ƒ  YZ d „  Z e d	 j o e ƒ  n d S(
   iÿÿÿÿN(   t   add(   t   ParserI(   t   DependencyGraph(   t   find_binaryt
   MaltParserc           B   sS   e  Z e d  „ Z e e d „ Z e d „ Z e d „ Z e d „ Z e d „ Z	 RS(   c         C   sj   |  i  ƒ  d |  _ t |  _ | d  j	 o | |  _ n1 t i i d d d d d d d d d g	 ƒ |  _ d  S(   Nt	   malt_temps   ^-?[0-9]+(.[0-9]+)?$t   CDs   (The|the|A|a|An|an)$t   ATs   .*able$t   JJs   .*ness$t   NNs   .*ly$t   RBs   .*s$t   NNSs   .*ing$t   VBGs   .*ed$t   VBDs   .*(   s   ^-?[0-9]+(.[0-9]+)?$R   (   s   (The|the|A|a|An|an)$s   AT(   s   .*able$R   (   s   .*ness$R	   (   s   .*ly$R
   (   s   .*s$R   (   s   .*ing$R   (   s   .*ed$R   (   s   .*R	   (	   t   config_maltt   mcot   Falset   _trainedt   Nonet   taggert   nltkt   tagt   RegexpTagger(   t   selfR   (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyt   __init__   s    
			c         C   sj   d d d d d d d d g } t  t t t i | ƒ ƒ } t d | d	 | d
 d g d d d | ƒ|  _ d S(   s“  
        Configure NLTK's interface to the C{malt} package.  This
        searches for a directory containing the malt jar
        
        @param bin: The full path to the C{malt} binary.  If not
            specified, then nltk will search the system for a C{malt}
            binary; and if one is not found, it will raise a
            C{LookupError} exception.
        @type bin: C{string}
        t   .s   /usr/lib/malt-1*s   /usr/share/malt-1*s   /usr/local/bins   /usr/local/malt-1*s   /usr/local/bin/malt-1*s   /usr/local/share/malt-1*s   malt.jart
   searchpatht   env_varst   MALTPARSERHOMEt   urls/   http://w3.msi.vxu.se/~jha/maltparser/index.htmlt   verboseN(   t   reduceR    t   mapt   globR   t	   _malt_bin(   R   t   binR   t
   _malt_patht	   malt_path(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR   *   s    	c         C   s®  |  i  p t d ƒ ‚ n |  i p t d ƒ ‚ n t i i t i ƒ  d ƒ } t i i t i ƒ  d ƒ } d } | p' | d t i i t i ƒ  d ƒ 7} n d } zí t	 | d ƒ } xe t
 |  i i | i ƒ  ƒ ƒ D]E \ } \ } }	 | i d	 | d
 | d |	 |	 d d d d d f
 ƒ qÕ W| i d ƒ | i ƒ  d d |  i  d t i ƒ  d |  i d | d | d g }
 |  i |
 d | ƒ t i | ƒ SWd | o | i ƒ  n Xd S(   så   
        Use MaltParser to parse a sentence
        
        @param sentence: Input sentence to parse
        @type sentence: L{str}
        @return: C{DependencyGraph} the dependency graph representation of the sentence
        sA   MaltParser location is not configured.  Call config_malt() first.s1   Parser has not been trained.  Call train() first.s   malt_input.conlls   malt_output.conlls-   java -jar %s -w %s -c %s -i %s -o %s -m parses    > s   malt.outt   ws   %s	%s	%s	%s	%s	%s	%s	%s	%s	%s
i   t   _t   0t   as   
t   javas   -jar %ss   -w %ss   -c %ss   -i %ss   -o %ss   -m parset   parseN(   R"   t	   ExceptionR   t   ost   patht   joint   tempfilet
   gettempdirR   t   opent	   enumerateR   R   t   splitt   writet   closeR   t   _executeR   t   load(   R   t   sentenceR   t
   input_filet   output_filet   execute_stringt   ft   it   wordR   t   cmd(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR+   J   s.    

' 	.
!c      
   C   sž   t  i i t i ƒ  d ƒ } d } zM t | d ƒ } | i d i g  } | D] } | | i d ƒ qJ ~ ƒ ƒ Wd | o | i	 ƒ  n X|  i
 | d | ƒd S(   sŸ   
        Train MaltParser from a list of C{DependencyGraph}s
        
        @param depgraphs: C{list} of C{DependencyGraph}s for training input data
        s   malt_train.conllR&   s   
i
   NR   (   R-   R.   R/   R0   R1   R   R2   R5   t   to_conllR6   t   train_from_file(   R   t	   depgraphsR   R:   R=   t   _[1]t   dg(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyt   trainp   s    >c         C   sá   |  i  p t d ƒ ‚ n d } t | d ƒ o] | } t i i t i ƒ  d ƒ } | i	 ƒ  i
 ƒ  } t	 | d ƒ } | i | ƒ | i ƒ  n d d |  i  d t i ƒ  d |  i d	 | d
 g } |  i | d | ƒ t |  _ d S(   s…   
        Train MaltParser from a file
        
        @param conll_file: C{str} for the filename of the training input data
        sA   MaltParser location is not configured.  Call config_malt() first.t   zipfiles   malt_train.conllR&   R*   s   -jar %ss   -w %ss   -c %ss   -i %ss   -m learnRF   N(   R"   R,   R   t   hasattrR-   R.   R/   R0   R1   R2   t   readR5   R6   R   R7   t   TrueR   (   R   t
   conll_fileR   R=   t   zip_conll_filet	   conll_strR@   (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyRB      s    
c         C   s]   | p: t  i i t i ƒ  d ƒ } | i d | | f d ƒ n t  i d i | ƒ ƒ } d  S(   Nt    s!    > %smalt_%s.out 2> %smalt_%s.erri   t    (   R-   R.   R/   R0   R1   t   appendt   system(   R   R@   t   typeR   t   temp_dirt	   malt_exit(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR7       s    (
   t   __name__t
   __module__R   R   R   R   R+   RF   RB   R7   (    (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyR      s    &c          C   s„   t  d ƒ }  t  d ƒ } t } t ƒ  } | i |  | g d | ƒ| i d d | ƒi ƒ  i ƒ  GH| i d d | ƒi ƒ  i ƒ  GHd  S(   NsI  1    John    _    NNP   _    _    2    SUBJ    _    _
                             2    sees    _    VB    _    _    0    ROOT    _    _
                             3    a       _    DT    _    _    4    SPEC    _    _
                             4    dog     _    NN    _    _    2    OBJ     _    _
                          s£   1    John    _    NNP   _    _    2    SUBJ    _    _
                             2    walks   _    VB    _    _    0    ROOT    _    _
                          R   s   John sees Marys
   a man runs(   R   R   R   RF   R+   t   treet   pprint(   t   dg1t   dg2R   t
   maltParser(    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pyt   demo§   s    			 t   __main__(   R-   R0   t
   subprocessR!   t   operatorR    R   t   apiR   t   dependencygraphR   t   nltk.internalsR   R   R\   RU   (    (    (    s%   /p/zhu/06/nlp/nltk/nltk/parse/malt.pys   <module>	   s   “	