HashMap.hpp

Go to the documentation of this file.
00001 #ifndef wali_HASH_MAP_GUARD
00002 #define wali_HASH_MAP_GUARD 1
00003 
00004 /**
00005  * @author Nicholas Kidd
00006  */
00007 
00008 // Disable name truncation in Visual Studio
00009 #if defined(_WIN32) && _MSC_VER > 1000
00010 #   pragma warning(disable: 4786)
00011 #endif
00012 
00013 #include <climits> // ULONG_MAX
00014 #include <utility>  // std::pair
00015 #include <functional>
00016 #include <iostream>
00017 #include "wali/hm_hash.hpp"
00018 #define HASHMAP_GROWTH_FRACTION 0.75
00019 #define HASHMAP_SHRINK_FRACTION 0.25
00020 
00021 
00022 /*
00023  * TODO??:  make erase shrink the number of buckets
00024  * TODO??:  make GROWTH|HASHMAP_SHRINK_FRACTION member vars vs. static
00025  */
00026 
00027 namespace wali
00028 {
00029 
00030 #if defined(PI_STATS_DETAIL) && PI_STATS_DETAIL
00031   // totalHashMapnumBuckets of all HashMaps
00032   extern long totalHashMapnumBuckets;
00033 #endif /* PI_STATS_DETAIL */
00034 
00035   // Pre decls
00036   template< typename Key,
00037     typename Data,
00038     typename HashFunc,
00039     typename EqualFunc > class HashMap;
00040 
00041   template< typename Key,
00042     typename Data,
00043     typename HashFunc,
00044     typename EqualFunc > class HashMapIterator;
00045 
00046   template< typename Key,
00047     typename Data,
00048     typename HashFunc,
00049     typename EqualFunc > class HashMapConstIterator;
00050 
00051   // FIXME
00052   template< typename Value > struct Bucket
00053   {
00054     Bucket( const Value& v, Bucket *n=0 )
00055       : value(v),next(n) {}
00056     Value value;
00057     Bucket *next;
00058   };
00059 
00060   /**
00061    * One should always use:
00062    *      HashMap< a,b,c,d >::iterator
00063    *              or
00064    *      HashMap< a,b,c,d >::const_iterator
00065    *
00066    * This keeps with abstraction.
00067    */
00068   template< typename Key,
00069     typename Data,
00070     typename HashFunc,
00071     typename EqualFunc >
00072       class HashMapIterator
00073       {
00074         public:
00075           friend class HashMap< Key,Data,HashFunc,EqualFunc >;
00076           friend class HashMapConstIterator< Key,Data,HashFunc,EqualFunc >;
00077 
00078         public:
00079           typedef HashMapIterator< Key,Data,HashFunc,EqualFunc >      iterator;
00080           typedef HashMapConstIterator< Key,Data,HashFunc,EqualFunc > const_iterator;
00081           typedef HashMap< Key,Data,HashFunc,EqualFunc >              hashmap_type;
00082           typedef std::pair< Key,Data >                               pair_type;
00083           typedef pair_type                                           value_type;
00084           typedef size_t                                              size_type;
00085           typedef Bucket< value_type >                                bucket_type;
00086 
00087 
00088           HashMapIterator() : bucket(0),hashMap(0) {}
00089 
00090           HashMapIterator( bucket_type *bkt,hashmap_type *hmap )
00091             : bucket( bkt ),hashMap(hmap) {}
00092 
00093           inline value_type *operator->()
00094           {
00095             return &(bucket->value);
00096           }
00097 
00098           inline value_type& operator*()
00099           {
00100             return bucket->value;
00101           }
00102 
00103           inline bool operator==( const iterator& right )
00104           {
00105             return right.bucket == bucket;
00106           }
00107 
00108           inline bool operator!=( const iterator& right )
00109           {
00110             return right.bucket != bucket;
00111           }
00112 
00113           inline iterator operator++()
00114           {
00115             return this->operator++(60);
00116           }
00117 
00118           HashMapIterator operator++( int )
00119           {
00120             bucket_type *old = bucket;
00121             bucket = bucket->next;
00122             if( !bucket ) {
00123               size_type bktNum = hashMap->bucketFromValue( old->value );
00124               while( !bucket && ++bktNum < hashMap->numBuckets ) {
00125                 bucket = hashMap->buckets[bktNum];
00126               }
00127             }
00128             return *this;
00129           }
00130 
00131         protected:
00132           bucket_type  *bucket;
00133           hashmap_type *hashMap;
00134 
00135       };
00136 
00137   template< typename Key,
00138     typename Data,
00139     typename HashFunc,
00140     typename EqualFunc >
00141       class HashMapConstIterator
00142       {
00143         friend class HashMap< Key,Data,HashFunc,EqualFunc >;
00144         friend class HashMapIterator< Key,Data,HashFunc,EqualFunc >;
00145 
00146         public:
00147           typedef HashMapIterator< Key,Data,HashFunc,EqualFunc >      iterator;
00148           typedef HashMapConstIterator< Key,Data,HashFunc,EqualFunc > const_iterator;
00149           typedef HashMap< Key,Data,HashFunc,EqualFunc >              hashmap_type;
00150           typedef std::pair< Key,Data >                               pair_type;
00151           typedef pair_type                                           value_type;
00152           typedef size_t                                              size_type;
00153           typedef Bucket< value_type >                                bucket_type;
00154 
00155           HashMapConstIterator() : bucket(0),hashMap(0) {}
00156 
00157           HashMapConstIterator( const bucket_type *bkt,const hashmap_type *hmap )
00158             : bucket( bkt ),hashMap(hmap) {}
00159 
00160           HashMapConstIterator( const iterator& it )
00161             : bucket( it.bucket ),hashMap( it.hashMap ) {}
00162 
00163           inline const value_type *operator->()
00164           {
00165             return &(bucket->value);
00166           }
00167 
00168           inline const value_type& operator*()
00169           {
00170             return bucket->value;
00171           }
00172 
00173           inline bool operator==( const const_iterator& right )
00174           {
00175             return right.bucket == bucket;
00176           }
00177 
00178           inline bool operator!=( const const_iterator& right )
00179           {
00180             return right.bucket != bucket;
00181           }
00182 
00183           const_iterator operator++()
00184           {
00185             return this->operator++(60);
00186           }
00187 
00188           HashMapConstIterator operator++( int )
00189           {
00190             const bucket_type *old = bucket;
00191             bucket = bucket->next;
00192             if( !bucket ) {
00193               size_type bktNum = hashMap->bucketFromValue( old->value );
00194               while( !bucket && ++bktNum < hashMap->numBuckets ) {
00195                 bucket = hashMap->buckets[bktNum];
00196               }
00197             }
00198             return *this;
00199           }
00200 
00201         protected:
00202           const bucket_type  *bucket;
00203 
00204           const hashmap_type *hashMap;
00205       };
00206 
00207 
00208   /**
00209    * class HashMap
00210    *
00211    * Notes:
00212    *      equal_to is part of the STL.
00213    */
00214   template< typename Key,
00215     typename Data,
00216     typename HashFunc = hm_hash< Key >,
00217     typename EqualFunc = hm_equal< Key > >
00218       class HashMap 
00219       {
00220 
00221         public:     // typedef
00222           typedef HashMapIterator< Key,Data,HashFunc,EqualFunc >      iterator;
00223           typedef HashMapConstIterator< Key,Data,HashFunc,EqualFunc > const_iterator;
00224           typedef HashMap< Key,Data,HashFunc,EqualFunc >              hashmap_type;
00225           typedef std::pair< Key,Data >                               pair_type;
00226           typedef pair_type                                           value_type;
00227           typedef size_t                                              size_type;
00228           typedef Bucket< value_type >                                bucket_type;
00229 
00230           // friend iterator
00231           friend class HashMapIterator<Key,Data,HashFunc,EqualFunc>;
00232           friend class HashMapConstIterator<Key,Data,HashFunc,EqualFunc>;
00233 
00234           enum enum_size_type_max { SIZE_TYPE_MAX = ULONG_MAX };
00235 
00236         public:     // con/destructor
00237           HashMap( size_type the_size=47 )
00238             : numValues(0),numBuckets(the_size), 
00239               growthFactor( static_cast<double>(the_size) * HASHMAP_GROWTH_FRACTION ), 
00240               shrinkFactor( static_cast<double>(the_size) * HASHMAP_SHRINK_FRACTION )
00241         { initBuckets(); }
00242 
00243           HashMap( const HashMap& hm )
00244           {
00245             operator=(hm);
00246           }
00247 
00248           HashMap& operator=( const HashMap& hm ) {
00249             clear();
00250             for( const_iterator it = hm.begin() ; it != hm.end() ; it++ ) {
00251               insert(key(it),value(it));
00252             }
00253             return *this;
00254           }
00255 
00256           ~HashMap() {
00257             clear();
00258             releaseBuckets();
00259           }
00260 
00261         public:        // inline methods
00262           void clear()
00263           {
00264             for(size_type i=0;i<numBuckets;i++) {
00265               while( buckets[i] ) {
00266                 bucket_type *tmp = buckets[i];
00267                 buckets[i] = tmp->next;
00268                 releaseBucket( tmp );
00269               }
00270             }
00271             numValues = 0;
00272           }
00273 
00274           inline size_type size() const
00275           {
00276             return numValues;
00277           }
00278 
00279           inline size_type capacity() const
00280           {
00281             return numBuckets;
00282           }
00283 
00284           inline std::pair<iterator,bool> insert( const Key& k, const Data& d )
00285           {
00286             return insert( pair_type(k,d) );
00287           }
00288 
00289           void erase( const Key& key_to_erase )
00290           {
00291             iterator it = find( key_to_erase );
00292             if( it != end() )
00293               erase( it );
00294           }
00295 
00296           iterator begin()
00297           {
00298             for( size_type n = 0 ; n < numBuckets ; n++ )
00299               if( buckets[n] )
00300                 return iterator(buckets[n],this);
00301             return end();
00302           }
00303 
00304           inline iterator end()
00305           {
00306             return iterator( 0,this );
00307           }
00308 
00309           const_iterator begin() const
00310           {
00311             for( size_type n = 0 ; n < numBuckets ; n++ )
00312               if( buckets[n] )
00313                 return const_iterator(buckets[n],this);
00314             return end();
00315           }
00316 
00317           inline const_iterator end() const
00318           {
00319             return const_iterator(0,this);
00320           }
00321 
00322           Key & key( iterator & it )
00323           {
00324             return it->first;
00325           }
00326 
00327           const Key & key( const_iterator & it ) const
00328           {
00329             return it->first;
00330           }
00331 
00332           Data & value( iterator & it )
00333           {
00334             return it->second;
00335           }
00336 
00337           const Data & value( const_iterator & it ) const
00338           {
00339             return it->second;
00340           }
00341 
00342           Data & data( iterator & it )
00343           {
00344             return it->second;
00345           }
00346 
00347           const Data & data( const_iterator & it ) const
00348           {
00349             return it->second;
00350           }
00351 
00352           void print_stats( std::ostream & o = std::cout ) const 
00353           {
00354             size_type i= 0;
00355             int activebuckets= 0;
00356             int bucket_count=0;
00357             int max_bucket_count=0;
00358             int min_bucket_count = 10000000;
00359             bucket_type *bkt;
00360             for( ; i< numBuckets; i++ ) {
00361               bkt = buckets[i];
00362               if( bkt ) {
00363                 activebuckets++;
00364                 bucket_count = 0;
00365                 while( bkt ) {
00366                   bucket_count++;
00367                   bkt = bkt->next;
00368                 }
00369                 if( bucket_count > max_bucket_count )
00370                   max_bucket_count = bucket_count;
00371                 if( bucket_count < min_bucket_count )
00372                   min_bucket_count = bucket_count;
00373               }
00374             }
00375             o << "Stats:\n";
00376             o << "\tNumber of Values   : " << numValues << std::endl;
00377             o << "\tNumber of Buckets  : " << numBuckets << std::endl;
00378             o << "\tActive buckets     : " << activebuckets << std::endl;
00379             o << "\tAverage bucket size: " << (numValues / activebuckets)  << std::endl;
00380             o << "\tMax bucket count   : " << max_bucket_count << std::endl;
00381             o << "\tMin bucket count   : " << min_bucket_count << std::endl;
00382           }
00383 
00384         public:        // methods
00385           std::pair<iterator,bool> insert( const value_type& );
00386           iterator find( const Key& );
00387           const_iterator find( const Key& ) const;
00388           void erase( iterator it );
00389           Data & operator[](const Key & k) {
00390             return (*((insert(value_type(k, Data()))).first)).second;
00391           }
00392 
00393         public:        // vars
00394 
00395         private:    // inline methods
00396           /*************  Get Bucket location from various objects **********/
00397           inline size_type _bucketFromHash( size_type hash, size_type the_size ) const
00398           {
00399             return hash % the_size;
00400           }
00401           inline size_type _bucketFromKey( const Key& the_key,size_type the_size ) const
00402           {
00403             return _bucketFromHash( hashFunc(the_key),the_size );
00404           }
00405           inline size_type _bucketFromValue( const value_type& v,size_type s ) const
00406           {
00407             return _bucketFromKey( v.first,s );
00408           }
00409           inline size_type bucketFromHash( size_type hash ) const
00410           {
00411             return _bucketFromHash( hash,numBuckets );
00412           }
00413           inline size_type bucketFromKey( const Key& the_key ) const
00414           {
00415             return bucketFromHash( hashFunc(the_key) );
00416           }
00417           inline size_type bucketFromValue( const value_type& the_value ) const
00418           {
00419             return bucketFromKey( the_value.first );
00420           }
00421 
00422           /************ Initialize and release buckets *************************/
00423           inline void initBuckets()
00424           {
00425             buckets = _makeBuckets( numBuckets );
00426           }
00427 
00428           inline bucket_type **  _makeBuckets( size_type the_size )
00429           {
00430 #if defined(PI_STATS_DETAIL) && PI_STATS_DETAIL
00431             totalHashMapnumBuckets += the_size;
00432 #endif /* PI_STATS_DETAIL */
00433             bucket_type **bktptr = new bucket_type*[the_size];
00434             memset(bktptr,0,sizeof(bucket_type*)*the_size);
00435             //for(the_size_type s = 0;s<the_size;s++)
00436             //bktptr[s] = 0;
00437             return bktptr;
00438           }
00439 
00440           inline void releaseBuckets()
00441           {
00442 #if defined(PI_STATS_DETAIL) && PI_STATS_DETAIL
00443             totalHashMapnumBuckets -= numBuckets;
00444 #endif /* PI_STATS_DETAIL */
00445             delete[] buckets;
00446           }
00447 
00448           inline void releaseBucket( bucket_type *bkt )
00449           {
00450             delete bkt;
00451           }
00452 
00453         private:    // methods
00454           void resize( size_type the_size );
00455 
00456         private:    // variables
00457           bucket_type **buckets;
00458           size_type numValues;
00459           size_type numBuckets;
00460           double growthFactor;
00461           double shrinkFactor;
00462           HashFunc hashFunc;
00463           EqualFunc equalFunc;
00464       };
00465 
00466   template< typename Key,
00467     typename Data,
00468     typename HashFunc,
00469     typename EqualFunc >
00470       HashMapIterator< Key,Data,HashFunc,EqualFunc >
00471       HashMap< Key,Data,HashFunc,EqualFunc>::find( const Key& the_key )
00472       {
00473         size_type bktNum = bucketFromKey( the_key );
00474         for( bucket_type *bkt = buckets[bktNum]; bkt; bkt = bkt->next )
00475           if( equalFunc( the_key,bkt->value.first) )
00476             return iterator( bkt,this );
00477         return end();
00478       }
00479 
00480   template< typename Key,
00481     typename Data,
00482     typename HashFunc,
00483     typename EqualFunc >
00484       HashMapConstIterator< Key,Data,HashFunc,EqualFunc >
00485       HashMap< Key,Data,HashFunc,EqualFunc>::find( const Key& the_key ) const
00486       {
00487         size_type bktNum = bucketFromKey( the_key );
00488         for( bucket_type *bkt = buckets[bktNum]; bkt; bkt = bkt->next )
00489           if( equalFunc( the_key,bkt->value.first) )
00490             return const_iterator( bkt,this );
00491         return end();
00492       }
00493 
00494   template< typename Key,
00495     typename Data,
00496     typename HashFunc,
00497     typename EqualFunc >
00498       void HashMap<Key,Data,HashFunc,EqualFunc>::erase(
00499           typename HashMap<Key,Data,HashFunc,EqualFunc>::iterator it )
00500       {
00501         /* We can't just erase the bucket in the iterator b/c we
00502          * need to fix the "pointers" in the bucket list.
00503          *
00504          * ie   prev -> it.bkt -> next  ==> prev -> next
00505          *
00506          * This would be simple if we were using doubly linked list..
00507          */
00508         bucket_type *bkt = it.bucket;
00509         if( bkt ) {
00510           // get the first bucket in chain
00511           size_type bktNum = bucketFromValue( it.bucket->value );
00512           bucket_type *cur = buckets[bktNum];
00513           if( cur == bkt ) {
00514             buckets[bktNum] = cur->next;
00515             releaseBucket( cur );
00516             numValues--;
00517           }
00518           else {
00519             bucket_type *next = cur->next;
00520             while( next ) {
00521               if( next == bkt ) {
00522                 cur->next = next->next;
00523                 releaseBucket( next );
00524                 numValues--;
00525                 break;
00526               }
00527               else {
00528                 cur = next;
00529                 next = cur->next;
00530               }
00531             }
00532           }
00533         }
00534       }
00535 
00536   template< typename Key,
00537     typename Data,
00538     typename HashFunc,
00539     typename EqualFunc >
00540       std::pair< HashMapIterator< Key,Data,HashFunc,EqualFunc >,bool >
00541       HashMap<Key,Data,HashFunc,EqualFunc>::insert( const value_type& the_value )
00542       {
00543         typedef std::pair< iterator,bool > RPair;
00544         resize( numValues+1 );
00545         size_type bktNum = bucketFromValue( the_value );
00546         bucket_type *bktptr = buckets[bktNum];
00547         for( bucket_type *tmp = bktptr; tmp ; tmp = tmp->next ) {
00548           if( equalFunc( the_value.first,tmp->value.first ) ) {
00549             return RPair( iterator(tmp,this),false );
00550           }
00551         }
00552         bktptr = new bucket_type( the_value,bktptr );
00553         buckets[bktNum] = bktptr;
00554         numValues++;
00555         return RPair( iterator(bktptr,this),true );
00556       }
00557 
00558   template< typename Key,
00559     typename Data,
00560     typename HashFunc,
00561     typename EqualFunc >
00562       void HashMap<Key,Data,HashFunc,EqualFunc>::resize( size_type needed )
00563       {
00564         if( needed < growthFactor )
00565           return;
00566         size_type new_size = numBuckets * 2;
00567         if( new_size >= SIZE_TYPE_MAX )
00568           return;
00569 
00570 #ifdef DBGHASHMAP
00571         printf("DBG HashMap : Resizing to %lu buckets\n",new_size);
00572 #endif
00573 
00574         // alloc new array of buckets
00575         bucket_type **tmp = _makeBuckets( new_size );
00576 
00577         // copy from old array to new
00578         size_type newBktNum;
00579         for(size_type i=0;i<numBuckets;i++) {
00580           /*
00581           // is this ok?
00582           if( buckets[i] ) {
00583           bucket_type *bktptr = buckets[i];
00584           newBktNum = _bucketFromValue( bktptr->value,new_size );
00585           tmp[newBktNum] = bktptr;
00586           buckets[i] = 0;
00587           }
00588           */
00589           while( buckets[i] ) {
00590             bucket_type *old = buckets[i];
00591             // get new bucket num from hashing
00592             newBktNum = _bucketFromValue( old->value,new_size );
00593             // store old's next ptr
00594             buckets[i] = old->next;
00595             // set up old's new bktNum
00596             old->next = tmp[newBktNum];
00597             // put buck in new array
00598             tmp[newBktNum] = old;
00599           }
00600         }
00601 
00602         // release old array
00603         releaseBuckets();
00604 
00605         // set buckets to new array
00606         buckets = tmp;
00607         numBuckets = new_size;
00608         // set up new growth|shrink factors
00609         growthFactor = static_cast<double>(numBuckets) * HASHMAP_GROWTH_FRACTION;
00610         shrinkFactor = static_cast<double>(numBuckets) * HASHMAP_SHRINK_FRACTION;
00611       }
00612 
00613 } // namespace wali
00614 
00615 #endif  // wali_HASH_MAP_GUARD
00616