usermode/library/mcore/src/hal/pcm_i.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2011 Computer Sciences Department, 
00003     University of Wisconsin -- Madison
00004 
00005     ----------------------------------------------------------------------
00006 
00007     This file is part of Mnemosyne: Lightweight Persistent Memory, 
00008     originally developed at the University of Wisconsin -- Madison.
00009 
00010     Mnemosyne was originally developed primarily by Haris Volos
00011     with contributions from Andres Jaan Tack.
00012 
00013     ----------------------------------------------------------------------
00014 
00015     Mnemosyne is free software; you can redistribute it and/or
00016     modify it under the terms of the GNU General Public License
00017     as published by the Free Software Foundation, version 2
00018     of the License.
00019  
00020     Mnemosyne is distributed in the hope that it will be useful,
00021     but WITHOUT ANY WARRANTY; without even the implied warranty of
00022     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023     GNU General Public License for more details.
00024 
00025     You should have received a copy of the GNU General Public License
00026     along with this program; if not, write to the Free Software
00027     Foundation, Inc., 51 Franklin Street, Fifth Floor, 
00028     Boston, MA  02110-1301, USA.
00029 
00030 ### END HEADER ###
00031 */
00032 
00039 #ifndef _PCM_INTERNAL_H
00040 #define _PCM_INTERNAL_H
00041 
00042 #include <stdint.h>
00043 #include <mmintrin.h>
00044 #include <list.h>
00045 #include <spinlock.h>
00046 #include "cuckoo_hash/PointerHashInline.h"
00047 
00048 
00049 #ifdef __cplusplus
00050 extern "C" {
00051 #endif
00052 
00053 
00063 //#define M_PCM_EMULATE_LATENCY_BLOCKING_STORES 0x1
00064 #undef M_PCM_EMULATE_LATENCY_BLOCKING_STORES
00065 
00066 
00075 //#define HAS_RDTSCP
00076 #undef HAS_RDTSCP
00077 
00079 #define WRITE_COMBINING_BUFFERS_NUM 8
00080 
00082 #define WCBUF_HASHTBL_SIZE WRITE_COMBINING_BUFFERS_NUM*4
00083 
00085 #define MEMORY_BANKING_FACTOR 8
00086 
00087 
00088 /* 
00089  * Probabilities are derived using total number of outcomes equal to 
00090  * TOTAL_OUTCOMES_NUMTENCY_PCM_WRITE
00091  */
00092 #define TOTAL_OUTCOMES_NUM 1000000
00093 
00094 #if (RAND_MAX < TOTAL_OUTCOMES_NUM)
00095 # error "RAND_MAX must be at least equal to PROB_TOTAL_OUTCOMES_NUM."
00096 #endif
00097 
00098 
00099 #define NS2CYCLE(__ns) ((__ns) * M_PCM_CPUFREQ / 1000)
00100 #define CYCLE2NS(__cycles) ((__cycles) * 1000 / M_PCM_CPUFREQ)
00101 
00102 
00103 #define likely(x)       __builtin_expect(!!(x), 1)
00104 #define unlikely(x)     __builtin_expect(!!(x), 0)
00105 
00106 
00107 /* Memory Pages */
00108 
00109 #define PAGE_SIZE 4096
00110 
00111 /* Returns the number of pages */
00112 #define NUM_PAGES(size) ((((size) % PAGE_SIZE) == 0? 0 : 1) + (size)/PAGE_SIZE)
00113 
00114 /* Returns the size at page granularity */
00115 #define SIZEOF_PAGES(size) (NUM_PAGES((size)) * PAGE_SIZE)
00116 
00117 /* Returns the size at page granularity */
00118 #define PAGE_ALIGN(addr) (NUM_PAGES((addr)) * PAGE_SIZE)
00119 
00120 
00121 /* Hardware Cache */
00122 
00123 #ifdef __x86_64__
00124 # define CACHELINE_SIZE     64
00125 # define CACHELINE_SIZE_LOG 6
00126 #else
00127 # define CACHELINE_SIZE     32
00128 # define CACHELINE_SIZE_LOG 5
00129 #endif
00130 
00131 #define BLOCK_ADDR(addr) ( (pcm_word_t *) (((pcm_word_t) (addr)) & ~(CACHELINE_SIZE - 1)) )
00132 #define INDEX_ADDR(addr) ( (pcm_word_t *) (((pcm_word_t) (addr)) & (CACHELINE_SIZE - 1)) )
00133 
00134 
00135 /* Public types */
00136 
00137 typedef uintptr_t pcm_word_t;
00138 
00139 typedef uint64_t pcm_hrtime_t;
00140 
00141 typedef struct pcm_storeset_s pcm_storeset_t;
00142 typedef struct cacheline_tbl_s cacheline_tbl_t;
00143 
00144 
00145 
00150 struct pcm_storeset_s {
00151         uint32_t              id;
00152         uint32_t              state;
00153         unsigned int          rand_seed;
00154         PointerHash           *hashtbl;
00155         uint16_t              wcbuf_hashtbl[WCBUF_HASHTBL_SIZE];
00156         uint16_t              wcbuf_hashtbl_count;
00157         uint32_t              seqstream_len;
00158         cacheline_tbl_t       *cacheline_tbl;
00159         struct list_head      list;
00160         volatile unsigned int in_crash_emulation_code;
00161         uint64_t              seqstream_write_TS_array[8]; /* timestamp of writes */
00162         int                   seqstream_write_TS_index; 
00163 };
00164 
00165 /*
00166  * Locally defined global variables.
00167  */ 
00168 
00169 /*
00170  * Externally defined global variables.
00171  */ 
00172 
00173 extern unsigned int pcm_likelihood_store_blockwaits;  
00174 extern volatile arch_spinlock_t ticket_lock;
00175 
00176 /* 
00177  * Prototypes
00178  */
00179 
00180 int pcm_storeset_create(pcm_storeset_t **setp);
00181 void pcm_storeset_destroy(pcm_storeset_t *set);
00182 pcm_storeset_t* pcm_storeset_get(void);
00183 void pcm_storeset_put(void);
00184 void pcm_wb_store_emulate_crash(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val);
00185 void pcm_wb_flush_emulate_crash(pcm_storeset_t *set, volatile pcm_word_t *addr);
00186 void pcm_nt_store_emulate_crash(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val);
00187 void pcm_nt_flush_emulate_crash(pcm_storeset_t *set);
00188 
00189 
00190 /*
00191  * Helper functions.
00192  */
00193 
00194 static inline void asm_cpuid() {
00195         asm volatile( "cpuid" :::"rax", "rbx", "rcx", "rdx");
00196 }
00197 
00198 #if defined(__i386__)
00199 
00200 static inline unsigned long long asm_rdtsc(void)
00201 {
00202         unsigned long long int x;
00203         __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
00204         return x;
00205 }
00206 
00207 static inline unsigned long long asm_rdtscp(void)
00208 {
00209                 unsigned hi, lo;
00210         __asm__ __volatile__ ("rdtscp" : "=a"(lo), "=d"(hi)::"ecx");
00211     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
00212 
00213 }
00214 #elif defined(__x86_64__)
00215 
00216 static inline unsigned long long asm_rdtsc(void)
00217 {
00218         unsigned hi, lo;
00219         __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
00220     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
00221 }
00222 
00223 static inline unsigned long long asm_rdtscp(void)
00224 {
00225         unsigned hi, lo;
00226         __asm__ __volatile__ ("rdtscp" : "=a"(lo), "=d"(hi)::"rcx");
00227     return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
00228 }
00229 #else
00230 #error "What architecture is this???"
00231 #endif
00232 
00233 
00234 static inline void asm_sse_write_block64(volatile pcm_word_t *addr, pcm_word_t *val)
00235 {
00236         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[0]): "r" (val[0]));
00237         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[1]): "r" (val[1]));
00238         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[2]): "r" (val[2]));
00239         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[3]): "r" (val[3]));
00240         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[4]): "r" (val[4]));
00241         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[5]): "r" (val[5]));
00242         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[6]): "r" (val[6]));
00243         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*&addr[7]): "r" (val[7]));
00244 }
00245 
00246 
00247 static inline void asm_movnti(volatile pcm_word_t *addr, pcm_word_t val)
00248 {
00249         __asm__ __volatile__ ("movnti %1, %0" : "=m"(*addr): "r" (val));
00250 }
00251 
00252 
00253 static inline void asm_clflush(volatile pcm_word_t *addr)
00254 {
00255         __asm__ __volatile__ ("clflush %0" : : "m"(*addr));
00256 }
00257 
00258 
00259 static inline void asm_mfence(void)
00260 {
00261         __asm__ __volatile__ ("mfence");
00262 }
00263 
00264 
00265 static inline void asm_sfence(void)
00266 {
00267         __asm__ __volatile__ ("sfence");
00268 }
00269 
00270 
00271 static inline
00272 int rand_int(unsigned int *seed)
00273 {
00274     *seed=*seed*196314165+907633515;
00275     return *seed;
00276 }
00277 
00278 
00279 # ifdef _EMULATE_LATENCY_USING_NOPS
00280 /* So you think nops are more accurate? you might be surprised */
00281 static inline void asm_nop10() {
00282         asm volatile("nop");
00283         asm volatile("nop");
00284         asm volatile("nop");
00285         asm volatile("nop");
00286         asm volatile("nop");
00287         asm volatile("nop");
00288         asm volatile("nop");
00289         asm volatile("nop");
00290         asm volatile("nop");
00291         asm volatile("nop");
00292 }
00293 
00294 static inline
00295 void
00296 emulate_latency_ns(int ns)
00297 {
00298         int          i;
00299         pcm_hrtime_t cycles;
00300         pcm_hrtime_t start;
00301         pcm_hrtime_t stop;
00302         
00303         cycles = NS2CYCLE(ns);
00304         for (i=0; i<cycles; i+=5) {
00305                 asm_nop10(); /* each nop is 1 cycle */
00306         }
00307 }
00308 
00309 # else
00310 
00311 static inline
00312 void
00313 emulate_latency_ns(int ns)
00314 {
00315         pcm_hrtime_t cycles;
00316         pcm_hrtime_t start;
00317         pcm_hrtime_t stop;
00318         
00319         start = asm_rdtsc();
00320         cycles = NS2CYCLE(ns);
00321 
00322         do { 
00323                 /* RDTSC doesn't necessarily wait for previous instructions to complete 
00324                  * so a serializing instruction is usually used to ensure previous 
00325                  * instructions have completed. However, in our case this is a desirable
00326                  * property since we want to overlap the latency we emulate with the
00327                  * actual latency of the emulated instruction. 
00328                  */
00329                 stop = asm_rdtsc();
00330         } while (stop - start < cycles);
00331 }
00332 
00333 # endif
00334 
00351 static inline 
00352 void
00353 write_aligned_masked(pcm_word_t *addr, pcm_word_t val, pcm_word_t mask)
00354 {
00355         uintptr_t a;
00356         int       i;
00357         int       trailing_0bytes;
00358         int       leading_0bytes;
00359 
00360         union convert_u {
00361                 pcm_word_t w;
00362                 uint8_t    b[sizeof(pcm_word_t)];
00363         } valu;
00364 
00365         /* Complete write? */
00366         if (mask == ((uint64_t) -1)) {
00367                 *addr = val;
00368         } else {
00369                 valu.w = val;
00370                 a = (uintptr_t) addr;
00371                 trailing_0bytes = __builtin_ctzll(mask) >> 3;
00372                 leading_0bytes = __builtin_clzll(mask) >> 3;
00373                 for (i = trailing_0bytes; i<8-leading_0bytes;i++) {
00374                         *((uint8_t *) (a+i)) = valu.b[i];
00375                 }
00376         }
00377 }
00378 
00379 
00380 /*
00381  * WRITE BACK CACHE MODE
00382  */
00383 
00384 
00393 static inline
00394 void
00395 PCM_WB_STORE(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00396 {
00397         //printf("PCM_WB_STORE: (0x%lx, %lx)\n", addr, val);
00398 #ifdef M_PCM_EMULATE_CRASH
00399         pcm_wb_store_emulate_crash(set, addr, val);
00400 #endif  
00401 
00402         *addr = val;
00403 
00404 #ifdef M_PCM_EMULATE_LATENCY
00405 # ifdef M_PCM_EMULATE_LATENCY_BLOCKING_STORES
00406         if (pcm_likelihood_store_blockwaits > 0) {
00407                 int random_number = rand_int(&set->rand_seed) % TOTAL_OUTCOMES_NUM;
00408                 if (random_number < pcm_likelihood_store_blockwaits) {
00409                         emulate_latency_ns(M_PCM_LATENCY_WRITE);
00410                 }
00411         }
00412 # endif
00413 #endif
00414 }
00415 
00416 
00417 static inline
00418 void
00419 PCM_WB_STORE_MASKED(pcm_storeset_t *set, 
00420                     volatile pcm_word_t *addr, 
00421                     pcm_word_t val, 
00422                     pcm_word_t mask)
00423 {
00424         //printf("PCM_WB_STORE_MASKED: (0x%lx, %lx, %lx)\n", addr, val, mask);
00425 #ifdef M_PCM_EMULATE_CRASH
00426         pcm_wb_store_emulate_crash(set, addr, val);
00427 #endif  
00428 
00429         write_aligned_masked((pcm_word_t *) addr, val, mask);
00430 
00431 #ifdef M_PCM_EMULATE_LATENCY
00432 # ifdef M_PCM_EMULATE_LATENCY_BLOCKING_STORES
00433         if (pcm_likelihood_store_blockwaits > 0) {
00434                 int random_number = rand_int(&set->rand_seed) % TOTAL_OUTCOMES_NUM;
00435                 if (random_number < pcm_likelihood_store_blockwaits) {
00436                         emulate_latency_ns(M_PCM_LATENCY_WRITE);
00437                 }
00438         }
00439 # endif
00440 #endif
00441 }
00442 
00443 
00444 static inline
00445 void
00446 PCM_WB_STORE_ALIGNED_MASKED(pcm_storeset_t *set, 
00447                             volatile pcm_word_t *addr, 
00448                             pcm_word_t val, 
00449                             pcm_word_t mask)
00450 {
00451         PCM_WB_STORE_MASKED(set, addr, val, mask);
00452 }
00453 
00454 
00455 static inline
00456 void
00457 PCM_WB_FENCE(pcm_storeset_t *set)
00458 {
00459                 asm_mfence(); 
00460 }
00461 
00462 /*
00463  * Flush the cacheline containing address addr.
00464  */
00465 static inline
00466 void
00467 PCM_WB_FLUSH(pcm_storeset_t *set, volatile pcm_word_t *addr)
00468 {
00469 #ifdef M_PCM_EMULATE_CRASH
00470         pcm_wb_flush_emulate_crash(set, addr);
00471 #endif
00472 
00473         /* 
00474          * Need mfence first to ensure that previous stores are included 
00475          * in the write-back 
00476          * But as this interface is used by transactions that may have many
00477          * clflush we required them to have issued the mfence themselves
00478          * otherwise we would unnecessarily issue multiple mfences
00479          */
00480 #ifdef M_PCM_EMULATE_LATENCY
00481         {
00482 #ifdef HAS_RDTSCP
00483                 /* Measure the latency of a clflush and add an additional delay to
00484                  * meet the latency to write to PCM */
00485                 pcm_hrtime_t start;
00486                 pcm_hrtime_t stop;
00487 
00488                 start = asm_rdtscp();
00489                 asm_clflush(addr);      
00490                 stop = asm_rdtscp();
00491                 emulate_latency_ns(M_PCM_LATENCY_WRITE - CYCLE2NS(stop-start));
00492 #else
00493                 asm_clflush(addr);      
00494                 emulate_latency_ns(M_PCM_LATENCY_WRITE);
00495 #endif          
00496                 asm_mfence(); 
00497         }       
00498 
00499 #else /* !M_PCM_EMULATE_LATENCY */ 
00500         asm_clflush(addr);      
00501         asm_mfence(); 
00502 #endif /* !M_PCM_EMULATE_LATENCY */ 
00503 
00504 }
00505 
00506 
00507 /*
00508  * NON-TEMPORAL STREAM MODE
00509  *
00510  * Stores are non-cacheable but go through the write-combining buffers instead.
00511  */
00512 
00513 static inline
00514 void
00515 PCM_NT_STORE(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00516 {
00517 #ifdef M_PCM_EMULATE_CRASH
00518         pcm_nt_store_emulate_crash(set, addr, val);
00519 #endif  
00520 
00521         asm_movnti(addr, val);
00522 
00523 #ifdef M_PCM_EMULATE_LATENCY
00524         uint16_t  i;
00525         uint16_t  index_addr;
00526         uint16_t  index_i;
00527         uintptr_t byte_addr;
00528         uintptr_t block_byte_addr;
00529         
00530         byte_addr = (uintptr_t) addr;
00531         block_byte_addr = (uintptr_t) BLOCK_ADDR(byte_addr);
00532         index_addr = (uint16_t) ((block_byte_addr >> CACHELINE_SIZE_LOG)  & ((uint16_t) (-1)));
00533 
00534 retry:
00535         if (set->wcbuf_hashtbl_count < WRITE_COMBINING_BUFFERS_NUM) {
00536                 for (i=0; i<WCBUF_HASHTBL_SIZE; i++) {
00537                         index_i = (index_addr + i) &  (WCBUF_HASHTBL_SIZE-1);
00538                         if (set->wcbuf_hashtbl[index_i] == index_addr) {
00539                                 /* hit -- do nothing */
00540                                 break;
00541                         } else if (set->wcbuf_hashtbl[index_i] == 0) {
00542                                 set->wcbuf_hashtbl[index_i] = index_addr;
00543                                 set->wcbuf_hashtbl_count++;
00544                                 break;
00545                         }       
00546                 }
00547         } else {
00548                 memset(set->wcbuf_hashtbl, 0, WCBUF_HASHTBL_SIZE);
00549                 emulate_latency_ns(M_PCM_LATENCY_WRITE * set->wcbuf_hashtbl_count);
00550                 set->wcbuf_hashtbl_count = 0;
00551                 goto retry;
00552         }
00553 
00554 #endif
00555 }
00556 
00557 
00558 static inline
00559 void
00560 PCM_NT_FLUSH(pcm_storeset_t *set)
00561 {
00562 #ifdef M_PCM_EMULATE_CRASH
00563         pcm_nt_flush_emulate_crash(set);
00564 #endif  
00565 
00566         asm_sfence();
00567 #ifdef M_PCM_EMULATE_LATENCY
00568         emulate_latency_ns(M_PCM_LATENCY_WRITE * set->wcbuf_hashtbl_count);
00569         memset(set->wcbuf_hashtbl, 0, WCBUF_HASHTBL_SIZE);
00570         set->wcbuf_hashtbl_count = 0;
00571 #endif
00572 }
00573 
00574 
00575 /*
00576  * NON-TEMPORAL SEQUENTIAL STREAM MODE
00577  *
00578  * Used when we know that stream accesses are sequential so that we 
00579  * emulate bandwidth and hide some latency. For example, stores to the log
00580  * are sequential. 
00581  *
00582  */
00583 
00584  
00585 static inline
00586 void
00587 PCM_SEQSTREAM_INIT(pcm_storeset_t *set)
00588 {
00589 #ifdef M_PCM_EMULATE_CRASH
00590 
00591 #endif  
00592 
00593 #ifdef M_PCM_EMULATE_LATENCY
00594         set->seqstream_len = 0;
00595         set->seqstream_write_TS_index = 0;
00596 #endif
00597 }
00598 
00599 
00600 static inline
00601 void
00602 PCM_SEQSTREAM_STORE(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00603 {
00604         //printf("PCM_SEQSTREAM_STORE: set=%p, addr=%p, val=%llX\n", set, addr, val);
00605 #ifdef M_PCM_EMULATE_CRASH
00606 #endif  
00607 
00608         asm_movnti(addr, val);
00609 
00610 #ifdef M_PCM_EMULATE_LATENCY
00611         set->seqstream_len = set->seqstream_len + 8;
00612 
00613         /* NOTE: Well, we want to always set the TS of the first write of a 
00614          * sequence. We could use an if-statement but this adds a branch.
00615          * I am not entirely convinced that the branch-predictor will work
00616          * well as the condition whether the branch is taken or not really 
00617          * depends on the length of the sequence of stores which varies.
00618          * I prefer to use a trick that relies on two stores that are likely
00619          * to hit the L1-D cache.
00620          */
00621         set->seqstream_write_TS_array[set->seqstream_write_TS_index] = asm_rdtsc();
00622         set->seqstream_write_TS_index |= 1;
00623 #endif
00624 }
00625 
00626 
00627 static inline
00628 void
00629 PCM_SEQSTREAM_STORE_64B_FIRST_WORD(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00630 {
00631         //printf("PCM_SEQSTREAM_STORE: set=%p, addr=%p, val=%llX\n", set, addr, val);
00632 #ifdef M_PCM_EMULATE_CRASH
00633 #endif  
00634 
00635         asm_movnti(addr, val);
00636 
00637 #ifdef M_PCM_EMULATE_LATENCY
00638         set->seqstream_len = set->seqstream_len + 64;
00639 
00640         /* NOTE: Well, we want to always set the TS of the first write of a 
00641          * sequence. We could use an if-statement but this adds a branch.
00642          * I am not entirely convinced that the branch-predictor will work
00643          * well as the condition whether the branch is taken or not really 
00644          * depends on the length of the sequence of stores which varies.
00645          * I prefer to use a trick that relies on two stores that are likely
00646          * to hit the L1-D cache.
00647          */
00648         set->seqstream_write_TS_array[set->seqstream_write_TS_index] = asm_rdtsc();
00649         set->seqstream_write_TS_index |= 1;
00650 #endif
00651 }
00652 
00653 
00654 static inline
00655 void
00656 PCM_SEQSTREAM_STORE_64B_NEXT_WORD(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t val)
00657 {
00658         //printf("PCM_SEQSTREAM_STORE: set=%p, addr=%p, val=%llX\n", set, addr, val);
00659 #ifdef M_PCM_EMULATE_CRASH
00660 #endif  
00661 
00662         asm_movnti(addr, val);
00663 }
00664 
00665 
00666 static inline
00667 void
00668 PCM_SEQSTREAM_STORE_64B(pcm_storeset_t *set, volatile pcm_word_t *addr, pcm_word_t *val)
00669 {
00670         //printf("PCM_SEQSTREAM_STORE: set=%p, addr=%p, val=%llX\n", set, addr, val);
00671 #ifdef M_PCM_EMULATE_CRASH
00672 #endif  
00673 
00674         asm_sse_write_block64(addr, val);
00675 
00676 #ifdef M_PCM_EMULATE_LATENCY
00677         set->seqstream_len = set->seqstream_len + 64;
00678         /* HACK: Well, we want to always set the TS of the first write of a 
00679          * sequence. We could use an if-statement but this adds a branch.
00680          * I am not entirely convinced that the branch-predictor will work
00681          * well as the condition whether the branch is taken or not really 
00682          * depends on the length of the sequence of stores
00683          * I prefer to use a trick that relies on two stores that are likely
00684          * to hit the L1-D cache.
00685          */
00686         set->seqstream_write_TS_array[set->seqstream_write_TS_index] = asm_rdtsc();
00687         set->seqstream_write_TS_index |= 1;
00688 #endif
00689 }
00690 #define RAM_SYSTEM_PEAK_BANDWIDTH_MB 7000
00691 
00692 
00693 static inline
00694 void
00695 PCM_SEQSTREAM_FLUSH(pcm_storeset_t *set)
00696 {
00697 #ifdef M_PCM_EMULATE_CRASH
00698 
00699 #endif  
00700 
00701 #ifdef M_PCM_EMULATE_LATENCY
00702         int          pcm_bandwidth_MB = M_PCM_BANDWIDTH_MB;
00703         int          ram_system_peak_bandwidth_MB = RAM_SYSTEM_PEAK_BANDWIDTH_MB;
00704         int          size;
00705         pcm_hrtime_t handicap_latency;
00706         pcm_hrtime_t extra_latency;
00707         pcm_hrtime_t elapsed_time_ns;
00708         pcm_hrtime_t elapsed_time_cycles;
00709         pcm_hrtime_t current_TS;
00710 
00711         if ((size = set->seqstream_len) > 64) {
00712                 current_TS = asm_rdtsc();
00713                 elapsed_time_cycles = current_TS - set->seqstream_write_TS_array[0];
00714                 elapsed_time_ns = CYCLE2NS(elapsed_time_cycles);
00715                 
00716                 handicap_latency = (int) size * (1-(float) (((float) pcm_bandwidth_MB)/1000)/(((float) ram_system_peak_bandwidth_MB)/1000))/(((float)pcm_bandwidth_MB)/1000);
00717                 if (handicap_latency > elapsed_time_ns) {
00718                         extra_latency = handicap_latency - elapsed_time_ns;
00719                         asm_sfence();  
00720                         __ticket_spin_lock(&ticket_lock);
00721                         emulate_latency_ns(extra_latency);
00722                         __ticket_spin_unlock(&ticket_lock);
00723                         asm_cpuid();
00724                 } else {
00725                         asm_sfence();
00726                         emulate_latency_ns(M_PCM_LATENCY_WRITE);
00727                 }
00728         } else {
00729                 asm_sfence();
00730                 emulate_latency_ns(M_PCM_LATENCY_WRITE);
00731         }
00732         set->seqstream_write_TS_index = 0;
00733         set->seqstream_len = 0;
00734 #else   
00735         asm_sfence();
00736 #endif
00737 }
00738 
00739 #ifdef __cplusplus
00740 }
00741 #endif
00742 
00743 #endif /* _PCM_INTERNAL_H */

Generated on Sat Apr 23 11:43:35 2011 for Mnemosyne by  doxygen 1.4.7