³ς
B_Kc           @   s;   d  e  f d     YZ d   Z e d j o e   n d S(   t   ConfusionMatrixc           B   sS   e  Z d  Z e d  Z d   Z d   Z d   Z e e e	 e d  Z
 d   Z RS(   s|  
    The confusion matrix between a list of reference values and a
    corresponding list of test values.  Entry [M{r},M{t}] of this
    matrix is a count of the number of times that the reference value
    M{r} corresponds to the test value M{t}.  E.g.:

        >>> ref  = 'DET NN VB DET JJ NN NN IN DET NN'.split()
        >>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
        >>> cm = ConfusionMatrix(ref, test)
        >>> print cm['NN', 'NN']
        3

    Note that the diagonal entries (M{Ri}=M{Tj}) of this matrix
    corresponds to correct values; and the off-diagonal entries
    correspond to incorrect values.
    c            s«  t  |  t  |  j o t d   n | oJ t |    t |      f d   } t t | |  d | } n t t | |   } t d   t |  D  } g  } | D]( } | g  }	 | D] } |	 d qΛ ~	 q· ~  d }
 xX t | |  D]G \ } }  | | | | c d 7<t |
  | | | |  }
 qώ W| |  _	 | |  _
  |  _ |
 |  _ t  |  |  _ t  f d   t t  |   D  |  _ d S(	   sΣ  
        Construct a new confusion matrix from a list of reference
        values and a corresponding list of test values.
        
        @type reference: C{list}
        @param reference: An ordered list of reference values.
        @type test: C{list}
        @param test: A list of values to compare against the
            corresponding reference values.
        @raise ValueError: If C{reference} and C{length} do not have
            the same length.
        s    Lists must have the same length.c            s    |    |  S(   N(    (   t   v(   t
   test_fdistt	   ref_fdist(    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   key/   s    R   c         s   s%   x |  ] \ } } | | f Vq Wd  S(   N(    (   t   .0t   it   val(    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pys	   <genexpr>5   s    i    i   c         3   s!   x |  ] }   | | Vq Wd  S(   N(    (   R   R   (   t	   confusion(    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pys	   <genexpr>I   s    N(   t   lent
   ValueErrort   FreqDistt   sortedt   sett   dictt	   enumeratet   zipt   maxt   _valuest   _indicest
   _confusiont	   _max_conft   _totalt   sumt   ranget   _correct(   t   selft	   referencet   testt   sort_by_countR   t   valuest   indicest   _[1]R   t   _[2]t   max_conft   wt   g(    (   R   R   R   s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   __init__   s*     < #				c         C   s5   | \ } } |  i  | } |  i  | } |  i | | S(   s   
        @return: The number of times that value C{li} was expected and
        value C{lj} was given.
        @rtype: C{int}
        (   R   R   (   R   t   .1t   lit   ljR   t   j(    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   __getitem__K   s   c         C   s   d |  i  |  i f S(   Ns    <ConfusionMatrix: %s/%s correct>(   R   R   (   R   (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   __repr__U   s    	c         C   s
   |  i    S(   N(   t   pp(   R   (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   __str__Y   s    c            s    i  }   i } | o t | d   f d   } n | o | |  } n | o+ g  } | D] } | t |  q_ ~ }	 n8 g  }
 t t |   D] } |
 t | d  q ~
 }	 t d   |	 D  } d | d } | o d } d } d	 } n2 t   i  } d | d
 } d | d d } d } x t |  D] } | d | d 7} xd |	 D]\ } | | t |  j o- | | | | t |  i | d  7} q[| d | d 7} q[W| d 7} q<W| d d | d | d t |  f 7} xt	 |	 |  D]ό \ } }   i
 | } | | | 7} xΘ | D]ΐ }   i
 | } | | | d j o | | 7} nC | o% | | d | | |   i 7} n | | | | | 7} | | j o1 | i d  } | |  d | | d d } q1| d 7} q1W| d 7} qW| d d | d | d t |  f 7} | d 7} | pC | d 7} x6 t |  D]$ \ } } | d | d | f 7} qUWn | S(   s  
        @return: A multi-line string representation of this confusion
        matrix.
        @type truncate: int
        @param truncate: If specified, then only show the specified
            number of values.  Any sorting (e.g., sort_by_count)
            will be performed before truncation.
        @param sort_by_count: If true, then sort by the count of each
            label in the reference data.  I.e., labels that occur more
            frequently in the reference label will be towards the left
            edge of the matrix, and labels that occur less frequently
            will be towards the right edge.
        @todo: add marginals?
        R   c            s   t    i   i |   S(    (   R   R   R   (   R   (   R   (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   <lambda>p   s    i   c         s   s   x |  ] } t  |  Vq Wd  S(   N(   R	   (   R   R   (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pys	   <genexpr>|   s    t   %s   s | i   s   %5.1f%%s        .t   dt    t   .t    s    |s    |
s	   %s-+-%s+
t   -i    g      Y@t   <t   >s   |
s   (row = reference; col = test)
s   Value key:
s   %6d: %s
(   R   R   R   t   strR   R	   R   R   t   rjustR   R   R   t   rfindR   (   R   t   show_percentst   values_in_chartt   truncateR   R   R   R    R   t   value_stringsR!   t   nt   valuelent   value_formatt   entrylent   entry_formatt   zerostrt   sR   R'   R(   R)   t	   prevspacet   value(    (   R   s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyR,   \   sl    		+7
  -*  %"*

  c         C   sn   |  i  } d } t t |  d  } d | d } x2 t t |   D] } | | | | | f 7} qH W| S(   Ns   Value key:
i   s     %s   d: %s
(   R   R	   R   (   R   R   R7   t   indexlent
   key_formatR   (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyR   ΄   s    	 (   t   __name__t
   __module__t   __doc__t   FalseR%   R*   R+   R-   t   Truet   NoneR,   R   (    (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyR    	   s   0	
		Wc          C   s[   d i    }  d i    } d G|  GHd G| GHd GHt |  |  GHt |  |  i d t  GHd  S(   Ns    DET NN VB DET JJ NN NN IN DET NNs    DET VB VB DET NN NN NN IN DET NNs   Reference =s	   Test    =s   Confusion matrix:R   (   t   splitR    R,   RM   (   R   R   (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pyt   demoΎ   s    		t   __main__N(   t   objectR    RP   RI   (    (    (    s2   /p/zhu/06/nlp/nltk/nltk/metrics/confusionmatrix.pys   <module>	   s   ΅		