³ò
4ÒÇIc           @   s¤   d  d k  l Z d „  Z d „  Z e d ƒ d „  ƒ Z d „  Z d „  Z d „  Z d	 „  Z d
 „  Z	 d „  Z
 d „  Z d „  Z d „  Z e d j o e ƒ  n d S(   iÿÿÿÿ(   t
   deprecatedc         C   s   g  } x( t  |  ƒ D] } | i d g | ƒ q Wx" t  |  ƒ D] } | | | d <q> Wx" t  | ƒ D] } | | d | <qc W| S(   Ni    (   t   ranget   append(   t   len1t   len2t   levt   it   j(    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   _edit_dist_init   s       c         C   sj   |  | d | d } |  | d | d | | j } |  | | d d } t  | | | ƒ |  | | <d  S(   Ni   (   t   min(   R   R   R   t   c1t   c2t   at   bt   c(    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   _edit_dist_step   s     s   Use edit_distance() instead.c         C   s   t  |  | ƒ S(   N(   t   edit_distance(   t   s1t   s2(    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt	   edit_dist   s    c      	   C   s   t  |  ƒ } t  | ƒ } t | d | d ƒ } xQ t | ƒ D]C } x: t | ƒ D], } t | | d | d |  | | | ƒ qO Wq< W| | | S(   sC  
    Calculate the Levenshtein edit-distance between two strings.
    The edit distance is the number of characters that need to be
    substituted, inserted, or deleted, to transform s1 into s2.  For
    example, transforming "rain" to "shine" requires three steps,
    consisting of two substitutions and one insertion:
    "rain" -> "sain" -> "shin" -> "shine".  These operations could have
    been done in other orders, but at least three steps are needed.

    @param s1, s2: The strings to be analysed
    @type s1: C{string}
    @type s2: C{string}
    @rtype C{int}
    i   (   t   lenR   R   R   (   R   R   R   R   R   R   R   (    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyR   !   s      .c         C   s   |  | j o d Sn d Sd S(   s§   Simple equality test.

    0.0 if the labels are identical, 1.0 if they are different.

    >>> binary_distance(1,1)
    0.0

    >>> binary_distance(1,3)
    1.0
    g        g      ð?N(    (   t   label1t   label2(    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   binary_distance;   s    c         C   s?   t  |  i | ƒ ƒ t  |  i | ƒ ƒ t t  |  i | ƒ ƒ ƒ S(   s/   Distance metric comparing set-similarity.

    (   R   t   uniont   intersectiont   float(   R   R   (    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   jaccard_distanceM   s    c         C   s<   d t  t |  i | ƒ ƒ ƒ t  t t |  ƒ t | ƒ ƒ ƒ S(   s  Distance metric that takes into account partial agreement when multiple
    labels are assigned.

    >>> masi_distance(set([1,2]),set([1,2,3,4]))
    0.5

    Passonneau 2005, Measuring Agreement on Set-Valued Items (MASI) for Semantic and Pragmatic Annotation.
    i   (   R   R   R   t   max(   R   R   (    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   masi_distanceT   s    
c         C   s(   y t  |  | d ƒ SWn d GHn Xd S(   s¡   Krippendorff'1 interval distance metric

    >>> interval_distance(1,10)
    81

    Krippendorff 1980, Content Analysis: An Introduction to its Methodology
    i   s7   non-numeric labels not supported with interval distanceN(   t   pow(   R   R   (    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   interval_distancea   s    c            s   ‡  f d †  S(   s=   Higher-order function to test presence of a given label

    c            s   d  ˆ  |  j ˆ  | j j S(   g      ð?(    (   t   xt   y(   t   label(    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   <lambda>t   s    (    (   R"   (    (   R"   s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   presencep   s    c            s   ‡  f d †  S(   Nc            sÅ   t  t d  t |  ƒ ƒ t d  t | ƒ ƒ ƒ ˆ  |  j o
 ˆ  | j p} d ˆ  |  j o
 ˆ  | j p_ t  t d  t |  ƒ ƒ ƒ ˆ  |  j o
 ˆ  | j p+ t d  t | ƒ ƒ ˆ  |  j o
 ˆ  | j S(   g      ð?g        (   t   absR   R   (   R    R!   (   R"   (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyR#   x   s    (    (   R"   (    (   R"   s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   fractional_presencew   s    c            s‚   h  ‰  xl t  |  ƒ D]^ } | i ƒ  i d ƒ \ } } } t | g ƒ } t | g ƒ } t | ƒ ˆ  t | | g ƒ <q W‡  f d †  S(   Ns   	c            s   ˆ  t  |  | g ƒ S(    (   t	   frozenset(   R    R!   (   t   data(    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyR#   ‚   s    (   t   opent   stript   splitR'   R   (   t   filet   lt   labelAt   labelBt   dist(    (   R(   s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   custom_distance{   s      c          C   s—   d }  d } d Gt  |  | ƒ GHt d d d d g ƒ }  t d d d g ƒ } d	 G|  GHd
 G| GHd Gt |  | ƒ GHd Gt |  | ƒ GHd Gt |  | ƒ GHd  S(   Nt   raint   shines$   Edit distance between '%s' and '%s':i   i   i   i   i   s   s1:s   s2:s   Binary distance:s   Jaccard distance:s   MASI distance:(   R   t   setR   R   R   (   R   R   (    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pyt   demo‰   s    		t   __main__N(   t   nltk.internalsR    R   R   R   R   R   R   R   R   R$   R&   R1   R5   t   __name__(    (    (    s+   /p/zhu/06/nlp/nltk/nltk/metrics/distance.pys   <module>   s   	
										