00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 #include "w.h"
00055 #include "mem_block.h"
00056 #include "atomic_ops.h"
00057 #include <cstdlib>
00058 #include <stdio.h>
00059 #include <algorithm>
00060 #ifdef __linux
00061 #include <malloc.h>
00062 #endif
00063
00064
00065 #undef assert
00066 void assert_failed(const char *desc, const char *f, int l) {
00067 fprintf(stdout, "Assertion failed: %s at line %d file %s ", desc, l,f);
00068 w_assert0(0);
00069 }
00070 #define assert(x) if (!(x)) assert_failed(#x, __FILE__, __LINE__);
00071
00072 #define TEMPLATE_ARGS chip_size, chip_count, block_size
00073
00074 #ifdef __GNUC__
00075 #if defined(__x86_64) || defined(i386) || defined(__i386__)
00076 #define NEED_POPC64 0
00077 #elif defined(__sparcv9)
00078 #define NEED_POPC64 0
00079 #else
00080 #define NEED_POPC64 1
00081 #endif
00082 #else // !defined(__GNUC__)
00083 #define NEED_POPC64 1
00084 #endif
00085
00086 namespace memory_block {
00087 #if 0
00088 }
00089 #endif
00090
00091 #if NEED_POPC64==1
00092
00093 typedef unsigned long long u64;
00094 static inline
00095 long popc64(u64 x) {
00096 u64 k1 = 0x5555555555555555ull;
00097 u64 k2 = 0x3333333333333333ull;
00098 u64 k4 = 0x0f0f0f0f0f0f0f0full;
00099 u64 kf = 0x0101010101010101ull;
00100 x = x - ((x >> 1) & k1);
00101 x = (x & k2) + ((x >> 2) & k2);
00102 x = (x + (x >> 4)) & k4 ;
00103 x = (x * kf) >> 56;
00104 return x;
00105 }
00106 #endif
00107
00108 size_t block_bits::_popc(bitmap bm) {
00109 #if NEED_POPC64==1
00110 #warning "using home-brew popc routine"
00111 return popc64(bm);
00112 #elif defined(__x86_64) || defined(i386) || defined(__i386__)
00113
00114 return __builtin_popcountll(bm);
00115 #elif defined(__sparcv9)
00116 #warning "Using gcc inline asm to access sparcv9 'popc' instruction"
00117 long rval;
00118 __asm__("popc %[in], %[out]" : [out] "=r"(rval) : [in] "r"(x));
00119 return rval;
00120 #else
00121 #error "NEED_POPC64: Platform not supported"
00122 #endif
00123 }
00124
00125 block_bits::block_bits(size_t chip_count)
00126 : _usable_chips(create_mask(chip_count))
00127 , _zombie_chips(0)
00128 {
00129 assert(chip_count <= 8*sizeof(bitmap));
00130 }
00131
00132 size_t block_bits::acquire_contiguous(size_t chip_count) {
00133 (void) chip_count;
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 bitmap one_bit = _usable_chips &- _usable_chips;
00145 size_t index = _popc(one_bit-1);
00146 if(index < 8*sizeof(bitmap)) {
00147
00148 assert(index < chip_count);
00149 _usable_chips ^= one_bit;
00150 }
00151 else {
00152
00153 assert(index == 8*sizeof(bitmap));
00154 }
00155
00156 return index;
00157 }
00158
00159 void block_bits::release_contiguous(size_t index, size_t chip_count) {
00160
00161 (void) chip_count;
00162 assert(index < chip_count);
00163 bitmap to_free = bitmap(1) << index;
00164 assert(! (to_free & *usable_chips()));
00165 membar_exit();
00166 bitmap volatile* ptr = &_zombie_chips;
00167 bitmap ov = *ptr;
00168 while(1) {
00169 bitmap nv = ov | to_free;
00170 bitmap cv = atomic_cas_64(ptr, ov, nv);
00171 if(cv == ov)
00172 break;
00173 ov = cv;
00174 }
00175 bitmap was_free = ov;
00176 (void) was_free;
00177
00178 assert( ! (was_free & to_free));
00179 }
00180
00181 block_bits::bitmap block_bits::create_mask(size_t bits_set) {
00182
00183 return ~bitmap(0) >> (8*sizeof(bitmap) - bits_set);
00184 }
00185
00186 void block_bits::recycle() {
00187
00188
00189
00190
00191
00192
00193
00194
00195 bitmap newly_usable = *&_zombie_chips;
00196 _usable_chips |= newly_usable;
00197 membar_exit();
00198 bitmap volatile* ptr = &_zombie_chips;
00199 bitmap ov = *ptr;
00200 while(1) {
00201 bitmap nv = ov ^ newly_usable;
00202 bitmap cv = atomic_cas_64(ptr, ov, nv);
00203 if(cv == ov)
00204 break;
00205 ov = cv;
00206 }
00207 }
00208
00209 void* block::acquire_chip(size_t chip_size, size_t chip_count, size_t ) {
00210 size_t index = _bits.acquire_contiguous(chip_count);
00211 return (index < chip_count)? _get(index, chip_size) : 0;
00212 }
00213
00214 void block::release_chip(void* ptr, size_t chip_size, size_t chip_count, size_t block_size)
00215 {
00216
00217
00218
00219
00220
00221
00222 union { void* v; size_t n; block* b; char* c; } u = {ptr}, v=u;
00223 u.n &= -block_size;
00224 size_t offset = v.c - u.b->_data;
00225 size_t idx = offset/chip_size;
00226 assert(u.b->_data + idx*chip_size == ptr);
00227 u.b->_bits.release_contiguous(idx, chip_count);
00228 }
00229
00230 char* block::_get(size_t index, size_t chip_size) {
00231 return _data + index*chip_size;
00232 }
00233
00234 block::block(size_t chip_size, size_t chip_count, size_t block_size)
00235 : _bits(chip_count)
00236 , _owner(0)
00237 , _next(0)
00238 {
00239
00240 char* end_of_block = _get(0, chip_size)-sizeof(this)+block_size;
00241 char* end_of_chips = _get(chip_count, chip_size);
00242 (void) end_of_block;
00243 (void) end_of_chips;
00244 assert(end_of_chips <= end_of_block);
00245
00246
00247
00248
00249
00250
00251 }
00252
00253 void* block_list::acquire(size_t chip_size, size_t chip_count, size_t block_size)
00254 {
00255 if(void* ptr = _tail->acquire_chip(TEMPLATE_ARGS)) {
00256 _hit_count++;
00257 return ptr;
00258 }
00259
00260
00261 return _slow_acquire(TEMPLATE_ARGS);
00262 }
00263
00264
00265 block_list::block_list(block_pool* pool, size_t chip_size, size_t chip_count, size_t block_size)
00266 : _fake_block(TEMPLATE_ARGS)
00267 , _tail(&_fake_block)
00268 , _pool(pool)
00269 , _hit_count(0)
00270 , _avg_hit_rate(0)
00271 {
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282 _fake_block._bits._usable_chips = 0;
00283 _fake_block._bits._zombie_chips = 0;
00284 }
00285
00286
00287 void* block_list::_slow_acquire(size_t chip_size,
00288 size_t chip_count, size_t block_size)
00289 {
00290 _change_blocks(TEMPLATE_ARGS);
00291 return acquire(TEMPLATE_ARGS);
00292 }
00293
00294 block* block_list::acquire_block(size_t block_size)
00295 {
00296 union { block* b; uintptr_t n; } u = {_pool->acquire_block(this)};
00297 (void) block_size;
00298 assert((u.n & -block_size) == u.n);
00299 return u.b;
00300
00301 }
00302 void block_list::_change_blocks(size_t chip_size,
00303 size_t chip_count, size_t block_size)
00304 {
00305 (void) chip_size;
00306
00307
00308 if(_tail == &_fake_block) {
00309 _tail = acquire_block(block_size);
00310 _tail->_next = _tail;
00311 return;
00312 }
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327 static double const decay_rate = 1./5;
00328
00329 size_t const max_available = chip_count - std::max((int)(.1*chip_count), 1);
00330
00331 size_t const min_allocated = (chip_count+1)/2;
00332
00333 _avg_hit_rate = _hit_count*(1-decay_rate) + _avg_hit_rate*decay_rate;
00334 if(_hit_count < min_allocated && _avg_hit_rate < min_allocated) {
00335
00336 block* new_block = acquire_block(block_size);
00337 new_block->_next = _tail->_next;
00338 _tail = _tail->_next = new_block;
00339 }
00340 else {
00341
00342 block* prev = 0;
00343 block* cur = _tail;
00344 block* next;
00345 while(1) {
00346 next = cur->_next;
00347 next->recycle();
00348 if(next->_bits.usable_count() <= max_available)
00349 break;
00350
00351
00352 if(prev) {
00353 assert(prev != cur);
00354 assert(cur->_bits.usable_count() > max_available);
00355 assert(next->_bits.usable_count() > max_available);
00356 prev->_next = next;
00357 _pool->release_block(cur);
00358 cur = prev;
00359 }
00360
00361
00362 if(next == _tail) break;
00363
00364 prev = cur;
00365 cur = next;
00366 }
00367
00368
00369 _tail = cur;
00370 }
00371
00372 _hit_count = 0;
00373 }
00374
00375 block_list::~block_list() {
00376
00377 if(_tail == &_fake_block) return;
00378
00379
00380 block* cur = _tail->_next;
00381 _tail->_next = 0;
00382
00383
00384 while( (cur=_pool->release_block(cur)) ) ;
00385 }
00386
00387
00388 }