³ò
ŒB_Kc           @   s’   d  d k  Z  d  d k Z d  d k Z d  d k Z d  d k Td e f d „  ƒ  YZ d „  Z d „  Z d e	 f d „  ƒ  YZ
 d	 e	 f d
 „  ƒ  YZ d S(   iÿÿÿÿN(   t   *t   VectorSpaceClustererc           B   sk   e  Z d  Z e e d „ Z e e d „ Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d „  Z d	 „  Z RS(
   s©   
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    c         C   s   d |  _ | |  _ | |  _ d S(   s*  
        @param normalise:       should vectors be normalised to length 1
        @type normalise:        boolean
        @param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        @type svd_dimensions:   int 
        N(   t   Nonet   _Ttt   _should_normaliset   _svd_dimensions(   t   selft	   normaliset   svd_dimensions(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   __init__   s    		c         C   sm  t  | ƒ d j p t ‚ |  i o t |  i | ƒ } n |  i oÖ |  i t  | d ƒ j  o¼ t i i t i	 t
 | ƒ ƒ ƒ \ } } } | |  i  t i |  i t i ƒ } | d  d  … d  |  i … f } | d  |  i … d  d  … f }	 t i	 t i | |	 ƒ ƒ } t i	 | ƒ |  _ n |  i | | ƒ | o8 |  i G| GHg  }
 | D] } |
 |  i | ƒ qH~
 Sn d  S(   Ni    (   t   lent   AssertionErrorR   t   mapt
   _normaliseR   t   numpyt   linalgt   svdt	   transposet   arrayt   identityt   Float64t   matrixmultiplyR   t   cluster_vectorspacet   classify(   R   t   vectorst   assign_clusterst   tracet   ut   dt   vtt   St   Tt   Dtt   _[1]t   vector(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   cluster!   s    
$*
c         C   s   t  ƒ  ‚ d S(   sD   
        Finds the clusters using the given set of vectors.
        N(   R   (   R   R   R   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR   :   s    c         C   sb   |  i  o |  i | ƒ } n |  i d  j o t i |  i | ƒ } n |  i | ƒ } |  i | ƒ S(   N(   R   R   R   R   R   R   t   classify_vectorspacet   cluster_name(   R   R"   R#   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR   @   s    
c         C   s   t  ƒ  ‚ d S(   sN   
        Returns the index of the appropriate cluster for the vector.
        N(   R   (   R   R"   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR$   H   s    c         C   sV   |  i  o |  i | ƒ } n |  i d  j o t i |  i | ƒ } n |  i | | ƒ S(   N(   R   R   R   R   R   R   t   likelihood_vectorspace(   R   R"   t   label(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt
   likelihoodN   s
    
c         C   s,   |  i  | ƒ } | | j o d Sn d Sd S(   sP   
        Returns the likelihood of the vector belonging to the cluster.
        g      ð?g        N(   R$   (   R   R"   R#   t	   predicted(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR&   U   s    c         C   sJ   |  i  o |  i | ƒ } n |  i t j o t i |  i | ƒ } n | S(   sU   
        Returns the vector after normalisation and dimensionality reduction
        (   R   R   R   R   R   R   (   R   R"   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR"   ]   s
    
c         C   s   | t  i t i | | ƒ ƒ S(   s7   
        Normalises the vector to unit length.
        (   t   matht   sqrtR   t   dot(   R   R"   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR   g   s    (   t   __name__t
   __module__t   __doc__t   FalseR   R	   R#   R   R   R$   R(   R&   R"   R   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR      s   						
c         C   s#   |  | } t  i t i | | ƒ ƒ S(   s}   
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    (   R*   R+   R   R,   (   R   t   vt   diff(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   euclidean_distancem   s    
c         C   sB   t  i |  | ƒ t i t  i |  |  ƒ ƒ t i t  i | | ƒ ƒ S(   se   
    Returns the cosine of the angle between vectors v and u. This is equal to
    u.v / |u||v|.
    (   R   R,   R*   R+   (   R   R1   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   cosine_distanceu   s    t   _DendrogramNodec           B   s,   e  Z d  Z d „  Z e d „ Z d „  Z RS(   s    Tree node of a dendrogram. c         G   s   | |  _  | |  _ d  S(   N(   t   _valuet	   _children(   R   t   valuet   children(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR	      s    	c         C   sb   |  i  o8 g  } x' |  i  D] } | i | i | ƒ ƒ q W| Sn | o |  i g Sn |  g Sd  S(   N(   R7   t   extendt   leavesR6   (   R   t   valuesR;   t   child(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR;   ƒ   s    

 c         C   sê   |  i  |  f g } x¡ t | ƒ | j  o | i ƒ  \ } } | i p | i | | f ƒ Pn xH | i D]= } | i o | i | i  | f ƒ qf | i d | f ƒ qf W| i ƒ  q Wg  } x' | D] \ } } | i | i ƒ  ƒ qÃ W| S(   Ni    (   R6   R
   t   popR7   t   pusht   appendt   sortR;   (   R   t   nt   queuet   priorityt   nodeR=   t   groups(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyRF   Ž   s"    

 
 (   R-   R.   R/   R	   t   TrueR;   RF   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR5   |   s   	t
   Dendrogramc           B   s>   e  Z d  Z g  d „ Z d „  Z d „  Z d „  Z d „  Z RS(   s  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c         C   sL   g  } | D] } | t  | ƒ q ~ |  _ t i |  i ƒ |  _ d |  _ d S(   ss   
        @param  items: the items at the leaves of the dendrogram
        @type   items: sequence of (any)
        i   N(   R5   t   _itemst   copyt   _original_itemst   _merge(   R   t   itemsR!   t   item(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyR	   «   s    *c         G   s‘   t  | ƒ d j p t ‚ t |  i g  } | D] } | |  i | q. ~ Œ } |  i d 7_ | |  i | d <x | d D] } |  i | =qy Wd S(   s=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        @param  indices: indices of the items to merge (at least two)
        @type   indices: seq of int
        i   i   i    N(   R
   R   R5   RL   RI   (   R   t   indicesR!   t   iRE   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   merge´   s    	4 c         C   sI   t  |  i ƒ d j o t |  i |  i Œ } n |  i d } | i | ƒ S(   s’   
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        @param  n: number of groups
        @type   n: int
        i   i    (   R
   RI   R5   RL   RF   (   R   RB   t   root(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyRF   Ä   s    c            s  d \ } } } t  |  i ƒ d j o t |  i |  i Œ } n |  i d } |  i } g  } | D] } | t | i ƒ q_ ~ } t t t  | ƒ ƒ d ‰ ˆ d ‰ ˆ ˆ d ‰  d d ‡  ‡ f d † }	 d	 „  }
 | i | f g } g  } | D] } | |	 d ƒ qí ~ } xÅ| o½| i	 ƒ  \ } } t d
 „  | i
 ƒ } t | i | ƒ } | o t | ƒ } t | ƒ } n xé t t  | ƒ ƒ D]Õ } | | | j ox | | j o |
 |	 | d | ƒ ƒ n> | | j o |
 |	 | | d ƒ ƒ n |
 |	 | | | ƒ ƒ |	 | ƒ | | <q‚| | j o
 | j n o |
 |	 | | | ƒ ƒ q‚|
 | | ƒ q‚W|
 d ƒ x5 | i
 D]* } | i
 o | i | i | f ƒ qoqoW| i ƒ  x | D] } |
 | ƒ q®W|
 d ƒ qW|
 d i ‡ f d †  | Dƒ ƒ ƒ |
 d ƒ d S(   sD   
        Print the dendrogram in ASCII art to standard out.
        t   +t   -t   |i   i    i   t    c            s   d ˆ | |  | ˆ  f S(   Ns   %s%s%s(    (   t   centret   leftt   right(   t   rhalft   lhalf(    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   formatæ   s    c         S   s   t  i i |  ƒ d  S(   N(   t   syst   stdoutt   write(   t   str(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   displayè   s    c         S   s   |  i  t ƒ d  S(   i    (   R;   R0   (   t   c(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   <lambda>ð   s    s   
t    c         3   s"   x |  ] } | i  ˆ  ƒ Vq Wd  S(   N(   t   center(   t   .0RN   (   t   width(    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pys	   <genexpr>
  s    N(   RS   RT   RU   (   R
   RI   R5   RL   RK   R`   R6   t   maxR   R>   R7   t   indext   mint   rangeR@   RA   t   join(   R   t   JOINt   HLINKt   VLINKRR   R;   R!   t   leaft   last_rowR\   Ra   RC   t   _[2]t	   verticalsRD   RE   t   child_left_leafRO   t   min_idxt   max_idxRP   R=   t   vertical(    (   RZ   Rg   R[   s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   showÐ   sT    	*
	'
 ''

 

 #c         C   sY   t  |  i ƒ d j o t |  i |  i Œ } n |  i d } | i t ƒ } d t  | ƒ S(   Ni   i    s   <Dendrogram with %d leaves>(   R
   RI   R5   RL   R;   R0   (   R   RR   R;   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyt   __repr__  s
    (   R-   R.   R/   R	   RQ   RF   Rx   Ry   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pyRH   £   s   				=(   RJ   R]   R*   R   t   apit   ClusterIR   R3   R4   t   objectR5   RH   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/util.pys   <module>   s   
^		'