00001 #ifndef wali_HASH_MAP_GUARD
00002 #define wali_HASH_MAP_GUARD 1
00003
00004
00005
00006
00007
00008
00009 #if defined(_WIN32) && _MSC_VER > 1000
00010 # pragma warning(disable: 4786)
00011 #endif
00012
00013 #include <climits>
00014 #include <utility>
00015 #include <functional>
00016 #include <iostream>
00017 #include "wali/hm_hash.hpp"
00018 #define HASHMAP_GROWTH_FRACTION 0.75
00019 #define HASHMAP_SHRINK_FRACTION 0.25
00020
00021
00022
00023
00024
00025
00026
00027 namespace wali
00028 {
00029
00030 #if defined(PI_STATS_DETAIL) && PI_STATS_DETAIL
00031
00032 extern long totalHashMapnumBuckets;
00033 #endif
00034
00035
00036 template< typename Key,
00037 typename Data,
00038 typename HashFunc,
00039 typename EqualFunc > class HashMap;
00040
00041 template< typename Key,
00042 typename Data,
00043 typename HashFunc,
00044 typename EqualFunc > class HashMapIterator;
00045
00046 template< typename Key,
00047 typename Data,
00048 typename HashFunc,
00049 typename EqualFunc > class HashMapConstIterator;
00050
00051
00052 template< typename Value > struct Bucket
00053 {
00054 Bucket( const Value& v, Bucket *n=0 )
00055 : value(v),next(n) {}
00056 Value value;
00057 Bucket *next;
00058 };
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068 template< typename Key,
00069 typename Data,
00070 typename HashFunc,
00071 typename EqualFunc >
00072 class HashMapIterator
00073 {
00074 public:
00075 friend class HashMap< Key,Data,HashFunc,EqualFunc >;
00076 friend class HashMapConstIterator< Key,Data,HashFunc,EqualFunc >;
00077
00078 public:
00079 typedef HashMapIterator< Key,Data,HashFunc,EqualFunc > iterator;
00080 typedef HashMapConstIterator< Key,Data,HashFunc,EqualFunc > const_iterator;
00081 typedef HashMap< Key,Data,HashFunc,EqualFunc > hashmap_type;
00082 typedef std::pair< Key,Data > pair_type;
00083 typedef pair_type value_type;
00084 typedef size_t size_type;
00085 typedef Bucket< value_type > bucket_type;
00086
00087
00088 HashMapIterator() : bucket(0),hashMap(0) {}
00089
00090 HashMapIterator( bucket_type *bkt,hashmap_type *hmap )
00091 : bucket( bkt ),hashMap(hmap) {}
00092
00093 inline value_type *operator->()
00094 {
00095 return &(bucket->value);
00096 }
00097
00098 inline value_type& operator*()
00099 {
00100 return bucket->value;
00101 }
00102
00103 inline bool operator==( const iterator& right )
00104 {
00105 return right.bucket == bucket;
00106 }
00107
00108 inline bool operator!=( const iterator& right )
00109 {
00110 return right.bucket != bucket;
00111 }
00112
00113 inline iterator operator++()
00114 {
00115 return this->operator++(60);
00116 }
00117
00118 HashMapIterator operator++( int )
00119 {
00120 bucket_type *old = bucket;
00121 bucket = bucket->next;
00122 if( !bucket ) {
00123 size_type bktNum = hashMap->bucketFromValue( old->value );
00124 while( !bucket && ++bktNum < hashMap->numBuckets ) {
00125 bucket = hashMap->buckets[bktNum];
00126 }
00127 }
00128 return *this;
00129 }
00130
00131 protected:
00132 bucket_type *bucket;
00133 hashmap_type *hashMap;
00134
00135 };
00136
00137 template< typename Key,
00138 typename Data,
00139 typename HashFunc,
00140 typename EqualFunc >
00141 class HashMapConstIterator
00142 {
00143 friend class HashMap< Key,Data,HashFunc,EqualFunc >;
00144 friend class HashMapIterator< Key,Data,HashFunc,EqualFunc >;
00145
00146 public:
00147 typedef HashMapIterator< Key,Data,HashFunc,EqualFunc > iterator;
00148 typedef HashMapConstIterator< Key,Data,HashFunc,EqualFunc > const_iterator;
00149 typedef HashMap< Key,Data,HashFunc,EqualFunc > hashmap_type;
00150 typedef std::pair< Key,Data > pair_type;
00151 typedef pair_type value_type;
00152 typedef size_t size_type;
00153 typedef Bucket< value_type > bucket_type;
00154
00155 HashMapConstIterator() : bucket(0),hashMap(0) {}
00156
00157 HashMapConstIterator( const bucket_type *bkt,const hashmap_type *hmap )
00158 : bucket( bkt ),hashMap(hmap) {}
00159
00160 HashMapConstIterator( const iterator& it )
00161 : bucket( it.bucket ),hashMap( it.hashMap ) {}
00162
00163 inline const value_type *operator->()
00164 {
00165 return &(bucket->value);
00166 }
00167
00168 inline const value_type& operator*()
00169 {
00170 return bucket->value;
00171 }
00172
00173 inline bool operator==( const const_iterator& right )
00174 {
00175 return right.bucket == bucket;
00176 }
00177
00178 inline bool operator!=( const const_iterator& right )
00179 {
00180 return right.bucket != bucket;
00181 }
00182
00183 const_iterator operator++()
00184 {
00185 return this->operator++(60);
00186 }
00187
00188 HashMapConstIterator operator++( int )
00189 {
00190 const bucket_type *old = bucket;
00191 bucket = bucket->next;
00192 if( !bucket ) {
00193 size_type bktNum = hashMap->bucketFromValue( old->value );
00194 while( !bucket && ++bktNum < hashMap->numBuckets ) {
00195 bucket = hashMap->buckets[bktNum];
00196 }
00197 }
00198 return *this;
00199 }
00200
00201 protected:
00202 const bucket_type *bucket;
00203
00204 const hashmap_type *hashMap;
00205 };
00206
00207
00208
00209
00210
00211
00212
00213
00214 template< typename Key,
00215 typename Data,
00216 typename HashFunc = hm_hash< Key >,
00217 typename EqualFunc = hm_equal< Key > >
00218 class HashMap
00219 {
00220
00221 public:
00222 typedef HashMapIterator< Key,Data,HashFunc,EqualFunc > iterator;
00223 typedef HashMapConstIterator< Key,Data,HashFunc,EqualFunc > const_iterator;
00224 typedef HashMap< Key,Data,HashFunc,EqualFunc > hashmap_type;
00225 typedef std::pair< Key,Data > pair_type;
00226 typedef pair_type value_type;
00227 typedef size_t size_type;
00228 typedef Bucket< value_type > bucket_type;
00229
00230
00231 friend class HashMapIterator<Key,Data,HashFunc,EqualFunc>;
00232 friend class HashMapConstIterator<Key,Data,HashFunc,EqualFunc>;
00233
00234 enum enum_size_type_max { SIZE_TYPE_MAX = ULONG_MAX };
00235
00236 public:
00237 HashMap( size_type the_size=47 )
00238 : numValues(0),numBuckets(the_size),
00239 growthFactor( static_cast<double>(the_size) * HASHMAP_GROWTH_FRACTION ),
00240 shrinkFactor( static_cast<double>(the_size) * HASHMAP_SHRINK_FRACTION )
00241 { initBuckets(); }
00242
00243 HashMap( const HashMap& hm )
00244 {
00245 operator=(hm);
00246 }
00247
00248 HashMap& operator=( const HashMap& hm ) {
00249 clear();
00250 for( const_iterator it = hm.begin() ; it != hm.end() ; it++ ) {
00251 insert(key(it),value(it));
00252 }
00253 return *this;
00254 }
00255
00256 ~HashMap() {
00257 clear();
00258 releaseBuckets();
00259 }
00260
00261 public:
00262 void clear()
00263 {
00264 for(size_type i=0;i<numBuckets;i++) {
00265 while( buckets[i] ) {
00266 bucket_type *tmp = buckets[i];
00267 buckets[i] = tmp->next;
00268 releaseBucket( tmp );
00269 }
00270 }
00271 numValues = 0;
00272 }
00273
00274 inline size_type size() const
00275 {
00276 return numValues;
00277 }
00278
00279 inline size_type capacity() const
00280 {
00281 return numBuckets;
00282 }
00283
00284 inline std::pair<iterator,bool> insert( const Key& k, const Data& d )
00285 {
00286 return insert( pair_type(k,d) );
00287 }
00288
00289 void erase( const Key& key_to_erase )
00290 {
00291 iterator it = find( key_to_erase );
00292 if( it != end() )
00293 erase( it );
00294 }
00295
00296 iterator begin()
00297 {
00298 for( size_type n = 0 ; n < numBuckets ; n++ )
00299 if( buckets[n] )
00300 return iterator(buckets[n],this);
00301 return end();
00302 }
00303
00304 inline iterator end()
00305 {
00306 return iterator( 0,this );
00307 }
00308
00309 const_iterator begin() const
00310 {
00311 for( size_type n = 0 ; n < numBuckets ; n++ )
00312 if( buckets[n] )
00313 return const_iterator(buckets[n],this);
00314 return end();
00315 }
00316
00317 inline const_iterator end() const
00318 {
00319 return const_iterator(0,this);
00320 }
00321
00322 Key & key( iterator & it )
00323 {
00324 return it->first;
00325 }
00326
00327 const Key & key( const_iterator & it ) const
00328 {
00329 return it->first;
00330 }
00331
00332 Data & value( iterator & it )
00333 {
00334 return it->second;
00335 }
00336
00337 const Data & value( const_iterator & it ) const
00338 {
00339 return it->second;
00340 }
00341
00342 Data & data( iterator & it )
00343 {
00344 return it->second;
00345 }
00346
00347 const Data & data( const_iterator & it ) const
00348 {
00349 return it->second;
00350 }
00351
00352 void print_stats( std::ostream & o = std::cout ) const
00353 {
00354 size_type i= 0;
00355 int activebuckets= 0;
00356 int bucket_count=0;
00357 int max_bucket_count=0;
00358 int min_bucket_count = 10000000;
00359 bucket_type *bkt;
00360 for( ; i< numBuckets; i++ ) {
00361 bkt = buckets[i];
00362 if( bkt ) {
00363 activebuckets++;
00364 bucket_count = 0;
00365 while( bkt ) {
00366 bucket_count++;
00367 bkt = bkt->next;
00368 }
00369 if( bucket_count > max_bucket_count )
00370 max_bucket_count = bucket_count;
00371 if( bucket_count < min_bucket_count )
00372 min_bucket_count = bucket_count;
00373 }
00374 }
00375 o << "Stats:\n";
00376 o << "\tNumber of Values : " << numValues << std::endl;
00377 o << "\tNumber of Buckets : " << numBuckets << std::endl;
00378 o << "\tActive buckets : " << activebuckets << std::endl;
00379 o << "\tAverage bucket size: " << (numValues / activebuckets) << std::endl;
00380 o << "\tMax bucket count : " << max_bucket_count << std::endl;
00381 o << "\tMin bucket count : " << min_bucket_count << std::endl;
00382 }
00383
00384 public:
00385 std::pair<iterator,bool> insert( const value_type& );
00386 iterator find( const Key& );
00387 const_iterator find( const Key& ) const;
00388 void erase( iterator it );
00389 Data & operator[](const Key & k) {
00390 return (*((insert(value_type(k, Data()))).first)).second;
00391 }
00392
00393 public:
00394
00395 private:
00396
00397 inline size_type _bucketFromHash( size_type hash, size_type the_size ) const
00398 {
00399 return hash % the_size;
00400 }
00401 inline size_type _bucketFromKey( const Key& the_key,size_type the_size ) const
00402 {
00403 return _bucketFromHash( hashFunc(the_key),the_size );
00404 }
00405 inline size_type _bucketFromValue( const value_type& v,size_type s ) const
00406 {
00407 return _bucketFromKey( v.first,s );
00408 }
00409 inline size_type bucketFromHash( size_type hash ) const
00410 {
00411 return _bucketFromHash( hash,numBuckets );
00412 }
00413 inline size_type bucketFromKey( const Key& the_key ) const
00414 {
00415 return bucketFromHash( hashFunc(the_key) );
00416 }
00417 inline size_type bucketFromValue( const value_type& the_value ) const
00418 {
00419 return bucketFromKey( the_value.first );
00420 }
00421
00422
00423 inline void initBuckets()
00424 {
00425 buckets = _makeBuckets( numBuckets );
00426 }
00427
00428 inline bucket_type ** _makeBuckets( size_type the_size )
00429 {
00430 #if defined(PI_STATS_DETAIL) && PI_STATS_DETAIL
00431 totalHashMapnumBuckets += the_size;
00432 #endif
00433 bucket_type **bktptr = new bucket_type*[the_size];
00434 memset(bktptr,0,sizeof(bucket_type*)*the_size);
00435
00436
00437 return bktptr;
00438 }
00439
00440 inline void releaseBuckets()
00441 {
00442 #if defined(PI_STATS_DETAIL) && PI_STATS_DETAIL
00443 totalHashMapnumBuckets -= numBuckets;
00444 #endif
00445 delete[] buckets;
00446 }
00447
00448 inline void releaseBucket( bucket_type *bkt )
00449 {
00450 delete bkt;
00451 }
00452
00453 private:
00454 void resize( size_type the_size );
00455
00456 private:
00457 bucket_type **buckets;
00458 size_type numValues;
00459 size_type numBuckets;
00460 double growthFactor;
00461 double shrinkFactor;
00462 HashFunc hashFunc;
00463 EqualFunc equalFunc;
00464 };
00465
00466 template< typename Key,
00467 typename Data,
00468 typename HashFunc,
00469 typename EqualFunc >
00470 HashMapIterator< Key,Data,HashFunc,EqualFunc >
00471 HashMap< Key,Data,HashFunc,EqualFunc>::find( const Key& the_key )
00472 {
00473 size_type bktNum = bucketFromKey( the_key );
00474 for( bucket_type *bkt = buckets[bktNum]; bkt; bkt = bkt->next )
00475 if( equalFunc( the_key,bkt->value.first) )
00476 return iterator( bkt,this );
00477 return end();
00478 }
00479
00480 template< typename Key,
00481 typename Data,
00482 typename HashFunc,
00483 typename EqualFunc >
00484 HashMapConstIterator< Key,Data,HashFunc,EqualFunc >
00485 HashMap< Key,Data,HashFunc,EqualFunc>::find( const Key& the_key ) const
00486 {
00487 size_type bktNum = bucketFromKey( the_key );
00488 for( bucket_type *bkt = buckets[bktNum]; bkt; bkt = bkt->next )
00489 if( equalFunc( the_key,bkt->value.first) )
00490 return const_iterator( bkt,this );
00491 return end();
00492 }
00493
00494 template< typename Key,
00495 typename Data,
00496 typename HashFunc,
00497 typename EqualFunc >
00498 void HashMap<Key,Data,HashFunc,EqualFunc>::erase(
00499 typename HashMap<Key,Data,HashFunc,EqualFunc>::iterator it )
00500 {
00501
00502
00503
00504
00505
00506
00507
00508 bucket_type *bkt = it.bucket;
00509 if( bkt ) {
00510
00511 size_type bktNum = bucketFromValue( it.bucket->value );
00512 bucket_type *cur = buckets[bktNum];
00513 if( cur == bkt ) {
00514 buckets[bktNum] = cur->next;
00515 releaseBucket( cur );
00516 numValues--;
00517 }
00518 else {
00519 bucket_type *next = cur->next;
00520 while( next ) {
00521 if( next == bkt ) {
00522 cur->next = next->next;
00523 releaseBucket( next );
00524 numValues--;
00525 break;
00526 }
00527 else {
00528 cur = next;
00529 next = cur->next;
00530 }
00531 }
00532 }
00533 }
00534 }
00535
00536 template< typename Key,
00537 typename Data,
00538 typename HashFunc,
00539 typename EqualFunc >
00540 std::pair< HashMapIterator< Key,Data,HashFunc,EqualFunc >,bool >
00541 HashMap<Key,Data,HashFunc,EqualFunc>::insert( const value_type& the_value )
00542 {
00543 typedef std::pair< iterator,bool > RPair;
00544 resize( numValues+1 );
00545 size_type bktNum = bucketFromValue( the_value );
00546 bucket_type *bktptr = buckets[bktNum];
00547 for( bucket_type *tmp = bktptr; tmp ; tmp = tmp->next ) {
00548 if( equalFunc( the_value.first,tmp->value.first ) ) {
00549 return RPair( iterator(tmp,this),false );
00550 }
00551 }
00552 bktptr = new bucket_type( the_value,bktptr );
00553 buckets[bktNum] = bktptr;
00554 numValues++;
00555 return RPair( iterator(bktptr,this),true );
00556 }
00557
00558 template< typename Key,
00559 typename Data,
00560 typename HashFunc,
00561 typename EqualFunc >
00562 void HashMap<Key,Data,HashFunc,EqualFunc>::resize( size_type needed )
00563 {
00564 if( needed < growthFactor )
00565 return;
00566 size_type new_size = numBuckets * 2;
00567 if( new_size >= SIZE_TYPE_MAX )
00568 return;
00569
00570 #ifdef DBGHASHMAP
00571 printf("DBG HashMap : Resizing to %lu buckets\n",new_size);
00572 #endif
00573
00574
00575 bucket_type **tmp = _makeBuckets( new_size );
00576
00577
00578 size_type newBktNum;
00579 for(size_type i=0;i<numBuckets;i++) {
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589 while( buckets[i] ) {
00590 bucket_type *old = buckets[i];
00591
00592 newBktNum = _bucketFromValue( old->value,new_size );
00593
00594 buckets[i] = old->next;
00595
00596 old->next = tmp[newBktNum];
00597
00598 tmp[newBktNum] = old;
00599 }
00600 }
00601
00602
00603 releaseBuckets();
00604
00605
00606 buckets = tmp;
00607 numBuckets = new_size;
00608
00609 growthFactor = static_cast<double>(numBuckets) * HASHMAP_GROWTH_FRACTION;
00610 shrinkFactor = static_cast<double>(numBuckets) * HASHMAP_SHRINK_FRACTION;
00611 }
00612
00613 }
00614
00615 #endif // wali_HASH_MAP_GUARD
00616