usermode/library/mcore/src/hal/pcm.c

00001 /*
00002     Copyright (C) 2011 Computer Sciences Department, 
00003     University of Wisconsin -- Madison
00004 
00005     ----------------------------------------------------------------------
00006 
00007     This file is part of Mnemosyne: Lightweight Persistent Memory, 
00008     originally developed at the University of Wisconsin -- Madison.
00009 
00010     Mnemosyne was originally developed primarily by Haris Volos
00011     with contributions from Andres Jaan Tack.
00012 
00013     ----------------------------------------------------------------------
00014 
00015     Mnemosyne is free software; you can redistribute it and/or
00016     modify it under the terms of the GNU General Public License
00017     as published by the Free Software Foundation, version 2
00018     of the License.
00019  
00020     Mnemosyne is distributed in the hope that it will be useful,
00021     but WITHOUT ANY WARRANTY; without even the implied warranty of
00022     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023     GNU General Public License for more details.
00024 
00025     You should have received a copy of the GNU General Public License
00026     along with this program; if not, write to the Free Software
00027     Foundation, Inc., 51 Franklin Street, Fifth Floor, 
00028     Boston, MA  02110-1301, USA.
00029 
00030 ### END HEADER ###
00031 */
00032 
00033 #include <stdint.h>
00034 #include <stdlib.h>
00035 #include <pthread.h>
00036 #include <mmintrin.h>
00037 #include <list.h>
00038 #include <spinlock.h>
00039 #include "cuckoo_hash/PointerHashInline.h"
00040 #include "pcm_i.h"
00041 
00042 
00043 /* Private types */
00044 
00045 typedef uint64_t cacheline_bitmask_t;
00046 
00047 typedef struct pcm_storeset_list_s pcm_storeset_list_t; 
00048 
00049 struct pcm_storeset_list_s {
00050         uint32_t          count;
00051         struct list_head  list;
00052         volatile uint8_t  outstanding_crash;
00053         pthread_mutex_t   lock;
00054         pthread_cond_t    cond_halt;
00055         pthread_cond_t    cond_wait_storesets_halt;
00056         volatile uint32_t waiters;
00057 };
00058 
00059                                                                                   
00060 /* 
00061  * The CDF that there will be a partial crash after we write the N first bytes
00062  * of a cacheline given there is a crash. SUM(cdf[0..8]) must be 1000.
00063  * cdf[I] means: crash after writing (I+1)*sizeof(word) bytes
00064  *
00065  * Example: For a cache line of size 64 bytes and word size of 8 bytes: 
00066  * cdf[0] --> crash after writing 0 bytes
00067  * cdf[1] --> crash after writing 8 bytes
00068  * ...
00069  * cdf[7] --> crash after writing 56 bytes
00070  * cdf[8] --> crash after writing 64 bytes (no partial crash)
00071  */
00072 typedef struct cacheline_crash_cdf_s cacheline_crash_cdf_t;
00073 
00074 struct cacheline_crash_cdf_s {
00075         int word[CACHELINE_SIZE/sizeof(pcm_word_t)+1];
00076 };
00077 
00078 
00079 typedef struct cacheline_s cacheline_t;
00080 
00081 struct cacheline_s {
00082         cacheline_bitmask_t bitmask;
00083         uint8_t             bytes[CACHELINE_SIZE];
00084 };
00085 
00086 
00087 struct cacheline_tbl_s {
00088         cacheline_t   *cachelines;
00089         unsigned int  cachelines_count;
00090         unsigned int  cachelines_size;
00091 };
00092 
00093 
00094 /* Dynamic configuration */
00095 
00096 pcm_storeset_list_t pcm_storeset_list = { 0, 
00097                                           LIST_HEAD_INIT(pcm_storeset_list.list), 
00098                                           0, 
00099                                           PTHREAD_MUTEX_INITIALIZER, 
00100                                           PTHREAD_COND_INITIALIZER, 
00101                                           PTHREAD_COND_INITIALIZER, 
00102                                           0 };
00103 
00104 
00105 /* CDF for a cacheline partial crash. */
00106 
00107 #define NO_PARTIAL_CRASH {{0,0,0,0,0,0,0,0,1000000}}
00108 
00109 cacheline_crash_cdf_t cacheline_crash_cdf = NO_PARTIAL_CRASH;
00110 
00111 
00112 /* Likelihood a store will block wait. */
00113 unsigned int pcm_likelihood_store_blockwaits = 1000;  
00114 
00115 /* Likelihood a cacheline has been evicted. */
00116 unsigned int pcm_likelihood_evicted_cacheline = 10000;  
00117 
00118 volatile arch_spinlock_t ticket_lock = {0};
00119 
00120 
00121 __thread pcm_storeset_t* _thread_pcm_storeset;
00122 
00123 
00124 static inline
00125 cacheline_t *
00126 cacheline_alloc()
00127 {
00128         cacheline_t *l;
00129 
00130         l = (cacheline_t*) malloc(sizeof(cacheline_t));
00131         memset(l, 0x0, sizeof(cacheline_t));
00132         return l;
00133 }
00134 
00135 static inline
00136 void
00137 cacheline_free(cacheline_t *l)
00138 {
00139         free(l);
00140 }
00141 
00142 
00143 int
00144 pcm_storeset_create(pcm_storeset_t **setp)
00145 {
00146         pcm_storeset_t *set;
00147 
00148         set = (pcm_storeset_t *) malloc(sizeof(pcm_storeset_t));
00149         pthread_mutex_lock(&pcm_storeset_list.lock);
00150         pcm_storeset_list.count++;
00151         list_add(&set->list, &pcm_storeset_list.list);
00152         pthread_mutex_unlock(&pcm_storeset_list.lock);
00153         set->hashtbl = PointerHash_new();
00154         memset(set->wcbuf_hashtbl, 0, WCBUF_HASHTBL_SIZE);
00155         set->wcbuf_hashtbl_count = 0;
00156         set->seqstream_len = 0;
00157         set->in_crash_emulation_code = 0;
00158         /* Initialize reentrant random generator */
00159         set->rand_seed = pthread_self();
00160         rand_int(&set->rand_seed);
00161         *setp = set;
00162         return 0;
00163 }
00164 
00165 
00166 void
00167 pcm_storeset_destroy(pcm_storeset_t *set)
00168 {
00169         pthread_mutex_lock(&pcm_storeset_list.lock);
00170         pcm_storeset_list.count--;
00171         list_del(&set->list);
00172         pthread_mutex_unlock(&pcm_storeset_list.lock);
00173         PointerHash_free(set->hashtbl);
00174         free(set);
00175 }
00176 
00177 
00178 pcm_storeset_t*
00179 pcm_storeset_get(void)
00180 {
00181         pcm_storeset_t *set = _thread_pcm_storeset;
00182 
00183         if (set) {
00184                 return set;
00185         }
00186 
00187         pcm_storeset_create(&set);
00188         _thread_pcm_storeset = set;
00189 
00190         return set;
00191 }
00192 
00193 
00194 void
00195 pcm_storeset_put(void)
00196 {
00197         pcm_storeset_t *set = _thread_pcm_storeset;
00198 
00199         if (set) {
00200                 pcm_storeset_destroy(set);
00201                 _thread_pcm_storeset = NULL;
00202         }
00203 }
00204 
00205 
00206 
00207 void 
00208 pcm_check_crash(pcm_storeset_t *set)
00209 {
00210         int was_in_crash_emulation_code;
00211 
00212         if (pcm_storeset_list.outstanding_crash) {
00213                 was_in_crash_emulation_code = set->in_crash_emulation_code;
00214                 set->in_crash_emulation_code = 0;
00215                 pthread_mutex_lock(&pcm_storeset_list.lock);
00216                 if (pcm_storeset_list.outstanding_crash) {
00217                         pcm_storeset_list.waiters++;
00218                         pthread_cond_signal(&pcm_storeset_list.cond_wait_storesets_halt);
00219                         while(pcm_storeset_list.outstanding_crash) {
00220                                 pthread_cond_wait(&pcm_storeset_list.cond_halt, &pcm_storeset_list.lock);
00221                         }
00222                 }       
00223                 pthread_mutex_unlock(&pcm_storeset_list.lock);
00224                 set->in_crash_emulation_code = was_in_crash_emulation_code;
00225         }
00226 }
00227 
00228 
00229 static inline
00230 void
00231 crash_save_oldvalue(pcm_storeset_t *set, volatile pcm_word_t *addr)
00232 {
00233         uintptr_t           byte_addr;
00234         uintptr_t           block_byte_addr;
00235         uintptr_t           index_byte_addr;
00236         int                 i;
00237         cacheline_t         *cacheline;
00238         cacheline_bitmask_t bitmask;
00239         uint8_t             byte_oldvalue;
00240 
00241         assert(set->in_crash_emulation_code);
00242 
00243         for (i=0; i<sizeof(pcm_word_t); i++) {
00244                 byte_addr = (uintptr_t) addr + i;
00245                 block_byte_addr = (uintptr_t) BLOCK_ADDR(byte_addr);
00246                 index_byte_addr = (uintptr_t) INDEX_ADDR(byte_addr);
00247                 cacheline = (cacheline_t *) PointerHash_at_(set->hashtbl, (void *) block_byte_addr);
00248                 if (!cacheline) {
00249                         cacheline = cacheline_alloc();
00250                         PointerHash_at_put_(set->hashtbl, (void *) block_byte_addr, (void *) cacheline);
00251                 }
00252                 bitmask = 1ULL << index_byte_addr;
00253                 if (!(cacheline->bitmask & bitmask)) {
00254                         byte_oldvalue = *((uint8_t *) byte_addr);
00255                         cacheline->bytes[index_byte_addr] = byte_oldvalue;
00256                         cacheline->bitmask |= bitmask;
00257                 }
00258         }
00259 
00260 }
00261 
00262 
00263 static inline
00264 void
00265 crash_restore_unflushed_values(pcm_storeset_t *set)
00266 {
00267         uintptr_t           byte_addr;
00268         uintptr_t           block_byte_addr;
00269         int                 i;
00270         int                 j;
00271         cacheline_t         *cacheline;
00272         cacheline_bitmask_t bitmask;
00273 
00274         assert(set->in_crash_emulation_code);
00275 
00276         for(i = 0; i < set->hashtbl->size; i++) {
00277                 PointerHashRecord *r = PointerHashRecords_recordAt_(set->hashtbl->records, i);
00278                 if ((block_byte_addr = (uintptr_t) r->k)) {
00279                         cacheline = (cacheline_t *) r->v;
00280                         for (j=0; j<CACHELINE_SIZE; j++) {
00281                                 byte_addr = block_byte_addr + j;
00282                                 bitmask = 1ULL << j;
00283                                 if (cacheline->bitmask & bitmask) {
00284                                         *((uint8_t*)byte_addr) = cacheline->bytes[j];
00285                                 }
00286                         }
00287                 }
00288         }
00289 }
00290 
00291 
00292 /*
00293  * Flush a cacheline by removing the line from the list of outstanding stores.
00294  */
00295 void
00296 crash_flush_cacheline(pcm_storeset_t *set, volatile pcm_word_t *addr, int allow_partial_crash)
00297 {
00298         uintptr_t           byte_addr;
00299         uintptr_t           block_byte_addr;
00300         int                 random_number;
00301         int                 i;
00302         int                 sum;
00303         int                 successfully_flushed_words_num=0;
00304         cacheline_t         *cacheline;
00305         cacheline_bitmask_t bitmask;
00306 
00307         assert(set->in_crash_emulation_code);
00308 
00309         byte_addr = (uintptr_t) addr;
00310         block_byte_addr = (uintptr_t) BLOCK_ADDR(addr);
00311 
00312 flush_cacheline:
00313         cacheline = (cacheline_t *) PointerHash_at_(set->hashtbl, (void *) block_byte_addr);
00314         if (!cacheline) {
00315                 /* Cache line already flushed. */
00316                 return;
00317         }
00318         
00319         /* If there is an outstanding failure then the currently flushed line
00320          * can be treaded as not flushed, partially flushed, or completely
00321          * flushed based on some distribution.
00322          * Remove from the outstanding stores just the part of the cache line
00323          * that has been successfully flushed.
00324          */
00325         if (pcm_storeset_list.outstanding_crash && allow_partial_crash) {
00326                 random_number = rand_int(&set->rand_seed) % TOTAL_OUTCOMES_NUM;
00327                 for (i=0, sum=0; i<CACHELINE_SIZE/sizeof(pcm_word_t)+1; i++) {
00328                         sum += cacheline_crash_cdf.word[i];
00329                         if (random_number <= sum) {
00330                                 successfully_flushed_words_num = i;
00331                                 break;
00332                         }
00333                 }
00334         } else {
00335                 successfully_flushed_words_num = CACHELINE_SIZE/sizeof(pcm_word_t);
00336         }       
00337         if (successfully_flushed_words_num == CACHELINE_SIZE/sizeof(pcm_word_t)) {
00338                 bitmask = 0x0;
00339         } else {
00340                 //FIXME: what is the correct bitmask here?
00341                 //bitmask = ((~(1ULL << CACHELINE_SIZE - 1))
00342                 //           << (successfully_flushed_words_num * sizeof(pcm_word_t))) & (~(1ULL << CACHELINE_SIZE - 1));
00343                 bitmask = (((uint64_t) -1)
00344                            << (successfully_flushed_words_num * sizeof(pcm_word_t))) & ((uint64_t) -1);
00345         }       
00346         cacheline->bitmask = cacheline->bitmask & bitmask;
00347 
00348         if (cacheline->bitmask == 0x0) {
00349                 cacheline_free(cacheline);
00350                 PointerHash_removeKey_(set->hashtbl, (void *) block_byte_addr);
00351         }
00352 
00353         /* The word could fall in two cachelines. */
00354         byte_addr += sizeof(pcm_word_t)-1;
00355         block_byte_addr += sizeof(pcm_word_t)-1;
00356         goto flush_cacheline;
00357 
00358 }
00359 
00360 
00361 /*
00362  * Either flush all cachelines, or randomly flush some based on a given
00363  * probability distribution.
00364  */
00365 static inline
00366 void
00367 crash_flush_cachelines(pcm_storeset_t *set, int all, int likelihood_flush_cacheline)
00368 {
00369         int                 i;
00370         int                 count;
00371         int                 random_number;
00372         int                 do_flush;
00373         PointerHashRecord   *ra[1024];
00374 
00375         assert(set->in_crash_emulation_code);
00376 
00377         for(i = 0, count=0, do_flush=0; i < set->hashtbl->size; i++) {
00378                 PointerHashRecord *r = PointerHashRecords_recordAt_(set->hashtbl->records, i);
00379                 if (r->k) {
00380                         if (!all) {
00381                                 random_number = rand_int(&set->rand_seed) % TOTAL_OUTCOMES_NUM;
00382                                 if (random_number < likelihood_flush_cacheline) {
00383                                         do_flush = 1;
00384                                 } else {
00385                                         do_flush = 0;
00386                                 }
00387                         }
00388                         if (all || (!all && do_flush)) {
00389                                 assert(count < 1024);
00390                                 ra[count++] = r;
00391                         }       
00392                 }
00393         }
00394 
00395         for(i = 0; i < count; i++) {
00396                 crash_flush_cacheline(set, (pcm_word_t *) ra[i]->k, 0);
00397         }       
00398 }
00399 
00400 
00401 
00402 void 
00403 pcm_trigger_crash(pcm_storeset_t *set, int wait_storesets_halt)
00404 {
00405         int             waiters;
00406         pcm_storeset_t  *set_iter;
00407 
00408 
00409         pthread_mutex_lock(&pcm_storeset_list.lock);
00410         if (pcm_storeset_list.outstanding_crash) {
00411                 /* Someone else has already triggered a crash */
00412                 pthread_mutex_unlock(&pcm_storeset_list.lock);
00413                 pcm_check_crash(set);
00414                 return;
00415         }
00416         pcm_storeset_list.outstanding_crash = 1;
00417         pcm_storeset_list.waiters = 0;
00418 
00419         if (wait_storesets_halt) {
00420                 /* Wait for all storesets to halt. */
00421                 waiters = pcm_storeset_list.count; 
00422                 if (set) {
00423                         /* Called in the context of a storeset, so make sure I don't
00424                          * wait for myself */
00425                         waiters--;
00426                 }       
00427                 while(pcm_storeset_list.waiters < waiters) {
00428                         pthread_cond_wait(&pcm_storeset_list.cond_wait_storesets_halt, &pcm_storeset_list.lock);
00429                 }       
00430         } {
00431                 /* If I don't wait for store sets to halt, at least make sure
00432                  * that there is no concurrent action on a store set.
00433                  */
00434                 list_for_each_entry(set_iter, &pcm_storeset_list.list, list)             
00435                 {
00436                         while (set_iter->in_crash_emulation_code);
00437                 }
00438                  
00439         }
00440 
00441         /* We are at a safe point; either all store sets are halted or 
00442          * there is no concurrent activity on any store set. Proceed with crash. 
00443          * To emulate evictions that might had happened, first randomly flush some 
00444          * cache lines based on a probability distribution, and then restore
00445          * old values of any outstanding stores.
00446          */
00447         list_for_each_entry(set_iter, &pcm_storeset_list.list, list)             
00448         {
00449                 crash_flush_cachelines(set, 0, pcm_likelihood_evicted_cacheline);
00450                 crash_restore_unflushed_values(set_iter);
00451         }
00452         
00453         /* Done with crash. Release anyone that has been halted. */
00454         pcm_storeset_list.outstanding_crash = 0;
00455         pthread_cond_broadcast(&pcm_storeset_list.cond_halt);
00456         pthread_mutex_unlock(&pcm_storeset_list.lock);
00457 }
00458 
00459 
00460 /* 
00461  * We emulate the write-combining buffers using a hash table that never contains 
00462  * more than WRITE_COMBINING_BUFFERS_NUM keys.
00463  */
00464 static inline
00465 void
00466 nt_flush_buffers(pcm_storeset_t *set)
00467 {
00468         int                 i;
00469         int                 count;
00470         PointerHashRecord   *ra[WRITE_COMBINING_BUFFERS_NUM];
00471 
00472         assert(set->in_crash_emulation_code);
00473 
00474         for(i = 0, count=0; i < set->hashtbl->size; i++) {
00475                 PointerHashRecord *r = PointerHashRecords_recordAt_(set->hashtbl->records, i);
00476                 if (r->k) {
00477                         assert(count < WRITE_COMBINING_BUFFERS_NUM);
00478                         ra[count++] = r;
00479                 }
00480         }
00481 
00482         for(i = 0; i < count; i++) {
00483                 crash_flush_cacheline(set, (pcm_word_t *) ra[i]->k, 1);
00484         }       
00485 }
00486 
00487 
00488 void
00489 pcm_wb_store_emulate_crash(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00490 {
00491         if (!set) {
00492                 return;
00493         }
00494         pcm_check_crash(set);
00495         set->in_crash_emulation_code = 1;
00496         crash_save_oldvalue(set, addr);
00497         set->in_crash_emulation_code = 0;
00498 }
00499 
00500 
00501 void
00502 pcm_wb_flush_emulate_crash(pcm_storeset_t *set, volatile pcm_word_t *addr)
00503 {
00504         set->in_crash_emulation_code = 1;
00505         crash_flush_cacheline(set, addr, 1);
00506         set->in_crash_emulation_code = 0;
00507         pcm_check_crash(set);
00508 }
00509 
00510 
00511 void
00512 pcm_nt_store_emulate_crash(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00513 {
00514         unsigned int active_buffers_count;
00515         uintptr_t    byte_addr;
00516         uintptr_t    block_byte_addr1;
00517         uintptr_t    block_byte_addr2;
00518         cacheline_t  *cacheline;
00519         int          buffers_needed = 0;
00520 
00521         pcm_check_crash(set);
00522         set->in_crash_emulation_code = 1;
00523         byte_addr = (uintptr_t) addr;
00524 
00525         /* Check if a buffer already holds the cacheline containing addr.
00526          * If not then find out how many empty buffers we need. 
00527          */
00528         block_byte_addr1 = (uintptr_t) BLOCK_ADDR(byte_addr);
00529         block_byte_addr2 = (uintptr_t) BLOCK_ADDR(byte_addr+sizeof(pcm_word_t)-1);
00530         if (!(cacheline = PointerHash_at_(set->hashtbl, (void *) block_byte_addr1))) {
00531                 buffers_needed++;
00532         }
00533         if (block_byte_addr1 != block_byte_addr2 /* Word spans more than one cacheline. */
00534                 && (!(cacheline = PointerHash_at_(set->hashtbl, (void *) block_byte_addr2)))) 
00535         {
00536                 buffers_needed++;
00537         }
00538 
00539         /* Check if there is space. If not then flush buffers */
00540         active_buffers_count = PointerHash_count(set->hashtbl);
00541         if (active_buffers_count + buffers_needed > WRITE_COMBINING_BUFFERS_NUM) {
00542                 nt_flush_buffers(set);
00543         }
00544         crash_save_oldvalue(set, addr);
00545         set->in_crash_emulation_code = 0;
00546 }
00547 
00548 
00549 void
00550 pcm_nt_flush_emulate_crash(pcm_storeset_t *set)
00551 {
00552         pcm_check_crash(set);
00553         set->in_crash_emulation_code = 1;
00554         nt_flush_buffers(set);
00555         set->in_crash_emulation_code = 0;
00556 }

Generated on Sat Apr 23 11:43:35 2011 for Mnemosyne by  doxygen 1.4.7