³ò
ŒB_Kc           @   sk   d  d k  Z  d  d k l Z d  d k Td  d k Td e f d „  ƒ  YZ d „  Z e d j o e ƒ  n d S(   iÿÿÿÿN(   t
   deprecated(   t   *t   GAAClustererc           B   s†   e  Z d  Z d e e d „ Z e e d „ Z e d „ Z d „  Z	 d „  Z
 e d ƒ d „  ƒ Z d	 „  Z d
 „  Z d „  Z d „  Z RS(   sN  
    The Group Average Agglomerative starts with each of the N vectors as singleton
    clusters. It then iteratively merges pairs of clusters which have the
    closest centroids.  This continues until there is only one cluster. The
    order of merges gives rise to a dendrogram: a tree with the earlier merges
    lower than later merges. The membership of a given number of clusters c, 1
    <= c <= N, can be found by cutting the dendrogram at depth c.

    This clusterer uses the cosine similarity metric only, which allows for
    efficient speed-up in the clustering process. 
    i   c         C   s2   t  i |  | | ƒ | |  _ d  |  _ d  |  _ d  S(   N(   t   VectorSpaceClusterert   __init__t   _num_clusterst   Nonet   _dendrogramt   _groups_values(   t   selft   num_clusterst	   normaliset   svd_dimensions(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR      s    		c         C   sO   t  g  } | D] } | t i | t i ƒ q ~ ƒ |  _ t i |  | | | ƒ S(   N(   t
   Dendrogramt   numpyt   arrayt   float64R   R   t   cluster(   R	   t   vectorst   assign_clusterst   tracet   _[1]t   vector(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR   "   s    6c         C   s’  g  } | D] } | | g q ~ } t  i  | ƒ } xHt | ƒ t |  i d ƒ j o(d  } xž t t | ƒ ƒ D]Š } x t | d t | ƒ ƒ D]f }	 |  i | | t | | ƒ | |	 t | |	 ƒ ƒ }
 | p |
 | d j o |
 | |	 f } qŽ qŽ Wqn W| d \ } }	 | | | |	 } | o d | |	 f GHn | | | <| |	 =| | | |	 | | <| |	 =|  i i | |	 ƒ q6 W|  i	 |  i ƒ d  S(   Ni   i    s   merging %d and %d(
   t   copyt   lent   maxR   R   t   ranget   _average_similarityR   t   merget   update_clusters(   R	   R   R   R   R   t   clusterst
   vector_sumt   bestt   it   jt   simt   sum(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyt   cluster_vectorspace(   s,    $"  
c         C   sö   |  i  i | ƒ } g  |  _ xÂ | D]º } t | ƒ d j p t ‚ |  i o |  i | d ƒ } n t i | d ƒ } x= | d D]1 } |  i o | |  i | ƒ 7} q | | 7} q W| t	 t | ƒ ƒ :} |  i i
 | ƒ q" Wt |  i ƒ |  _ d  S(   Ni    i   (   R   t   groupst
   _centroidsR   t   AssertionErrort   _should_normaliset
   _normaliseR   R   t   floatt   appendR   (   R	   R
   R   R   t   centroidR   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR   I   s     	 
 
c         C   sv   d  } xe t |  i ƒ D]T } |  i | } |  i | d | d ƒ } | p | | d j o | | f } q q W| d S(   Ni   i    (   R   R   R   R'   R   (   R	   R   R    R!   R-   R#   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyt   classify_vectorspace[   s     s$   Use GAAClusterer.dendrogram instead.c         C   s
   t  |  ƒ S(   N(   t
   dendrogram(   R	   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyt	   dendogramd   s    c         C   s   |  i  S(   si   
        @return: The dendrogram representing the current clustering
        @rtype:  Dendrogram
        (   R   (   R	   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR/   h   s    c         C   s   |  i  S(   N(   R   (   R	   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR
   o   s    c         C   s4   | | } | | } t  i | | ƒ | | | d S(   Ni   (   R   t   dot(   R	   t   v1t   l1t   v2t   l2R$   t   length(    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR   r   s    

c         C   s   d |  i  S(   Ns*   <GroupAverageAgglomerative Clusterer n=%d>(   R   (   R	   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyt   __repr__w   s    (   t   __name__t
   __module__t   __doc__t   TrueR   R   t   FalseR   R%   R   R.   R    R0   R/   R
   R   R7   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyR      s   !						c       	   C   sí   d d k  l }  g  } d d g d d g d d g d d g d d g d d g g D] } | t i | ƒ qQ ~ } |  i d ƒ } | i | t ƒ } d G| GHd	 G| GHd
 G| GHH| i ƒ  i ƒ  t i d d g ƒ } d | G| i | ƒ GHHd S(   sO   
    Non-interactive demonstration of the clusterers with simple 2-D data.
    iÿÿÿÿ(   R   i   i   i   i   i    s
   Clusterer:s
   Clustered:s   As:s   classify(%s):N(	   t   nltkR   R   R   R   R;   R/   t   showt   classify(   R   R   t   fR   t	   clustererR   R   (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pyt   demoz   s    `			t   __main__(	   R   t   nltk.internalsR    t   apit   utilR   R   RB   R8   (    (    (    s'   /p/zhu/06/nlp/nltk/nltk/cluster/gaac.pys   <module>   s   

k	