usermode/library/mcore/src/log/phlog_tornbit.h

Go to the documentation of this file.
00001 /*
00002     Copyright (C) 2011 Computer Sciences Department, 
00003     University of Wisconsin -- Madison
00004 
00005     ----------------------------------------------------------------------
00006 
00007     This file is part of Mnemosyne: Lightweight Persistent Memory, 
00008     originally developed at the University of Wisconsin -- Madison.
00009 
00010     Mnemosyne was originally developed primarily by Haris Volos
00011     with contributions from Andres Jaan Tack.
00012 
00013     ----------------------------------------------------------------------
00014 
00015     Mnemosyne is free software; you can redistribute it and/or
00016     modify it under the terms of the GNU General Public License
00017     as published by the Free Software Foundation, version 2
00018     of the License.
00019  
00020     Mnemosyne is distributed in the hope that it will be useful,
00021     but WITHOUT ANY WARRANTY; without even the implied warranty of
00022     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023     GNU General Public License for more details.
00024 
00025     You should have received a copy of the GNU General Public License
00026     along with this program; if not, write to the Free Software
00027     Foundation, Inc., 51 Franklin Street, Fifth Floor, 
00028     Boston, MA  02110-1301, USA.
00029 
00030 ### END HEADER ###
00031 */
00032 
00073 /*
00074  * FIXME: Tornbit currently appears to be working correctly and passing our 
00075  *        tests. Developing it was quite painful mainly due to some
00076  *        optimizations (to avoid branches), which is not clear whether 
00077  *        they paid off. Thus, the author fills a simpler, more robust 
00078  *        implementation with good performance should be possible. 
00079  *        Reconsider a simpler implementation...
00080  *        
00081  *
00082  */
00083 
00084 
00085 #ifndef _PHYSICAL_LOG_TORNBIT_H
00086 #define _PHYSICAL_LOG_TORNBIT_H
00087 
00088 /* System header files */
00089 #include <stdio.h>
00090 #include <stdint.h>
00091 #include <stdbool.h>
00092 /* Mnemosyne common header files */
00093 #include <result.h>
00094 #include <list.h>
00095 #include "../hal/pcm_i.h"
00096 #include "log_i.h"
00097 
00098 uint64_t load_nt_word(void *addr);
00099 
00100 #undef _DEBUG_THIS 
00101 //#define _DEBUG_THIS 1
00102 
00103 #ifdef __cplusplus
00104 extern "C" {
00105 #endif
00106 
00107 
00108 #define CHUNK_SIZE              64
00109 
00110 
00111 #define TORN_MASK               0x8000000000000000LLU
00112 #define TORN_MASKC              0x7FFFFFFFFFFFFFFFLLU /* complement of TORN_MASK */
00113 #define TORNBIT_ZERO            0x0000000000000000LLU
00114 #define TORNBIT_ONE             0x8000000000000000LLU
00115 
00116 #define LF_TORNBIT              TORN_MASK
00117 #define LF_HEAD_MASK            0x00000000FFFFFFFFLLU
00118 
00119 
00120 
00121 typedef struct m_phlog_tornbit_s      m_phlog_tornbit_t;
00122 typedef struct m_phlog_tornbit_nvmd_s m_phlog_tornbit_nvmd_t;
00123 
00124 struct m_phlog_tornbit_nvmd_s {
00125         pcm_word_t generic_flags;
00126         pcm_word_t flags;                         
00127         pcm_word_t reserved3;                     
00128         pcm_word_t reserved4;                     
00129 };
00130 
00131 
00132 
00133 
00142 struct m_phlog_tornbit_s {
00143         uint64_t                buffer[CHUNK_SIZE/sizeof(uint64_t)];    
00144         uint64_t                buffer_count;                           
00145         uint64_t                write_remainder;                        
00146         uint64_t                write_remainder_nbits;                  
00147         uint64_t                read_remainder;                         
00148         uint64_t                read_remainder_nbits;                   
00149         uint64_t                *nvphlog;                               
00150         m_phlog_tornbit_nvmd_t  *nvmd;                                  
00151         uint64_t                head;
00152         uint64_t                tail;
00153         uint64_t                stable_tail;                            
00154         uint64_t                read_index;
00155         uint64_t                tornbit;
00156         //uint64_t              tornbit[CHUNK_SIZE/sizeof(uint64_t)];
00157         
00158         /* statistics */
00159         uint64_t                pad1[8];                                
00160         uint64_t                stat_wait_for_trunc;                    
00161         uint64_t                stat_wait_time_for_trunc;               
00162 };
00163 
00164 
00165 static 
00166 void 
00167 print_binary64(uint64_t val)
00168 {
00169         int i;
00170         uint64_t mask = 0x8000000000000000LLU; 
00171         for (i=0; i<64; i++){
00172                 if ((mask & val) > 0)
00173                         printf("1");
00174                 else
00175                         printf("0");
00176                 mask >>= 1;
00177         }
00178 }
00179 
00180 
00187 static inline
00188 void
00189 tornbit_write_buffer2log(pcm_storeset_t *set, m_phlog_tornbit_t *log)
00190 {
00191         /* 
00192          * Modulo arithmetic is implemented using the most efficient equivalent:
00193          * (log->tail + k) % PHYSICAL_LOG_NUM_ENTRIES == (log->tail + k) & (PHYSICAL_LOG_NUM_ENTRIES-1)
00194          */
00195 #ifdef _DEBUG_THIS              
00196         printf("tornbit_write_buffer2log: log->tail = %llu\n", log->tail);       
00197 #endif  
00198 
00199         PCM_SEQSTREAM_STORE_64B_FIRST_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+0)], 
00200                                            log->tornbit | (pcm_word_t) log->buffer[0]);
00201         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+1)], 
00202                                           log->tornbit | (pcm_word_t) log->buffer[1]);
00203         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+2)], 
00204                                           log->tornbit | (pcm_word_t) log->buffer[2]);
00205         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+3)], 
00206                                           log->tornbit | (pcm_word_t) log->buffer[3]);
00207         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+4)], 
00208                                           log->tornbit | (pcm_word_t) log->buffer[4]);
00209         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+5)], 
00210                                           log->tornbit | (pcm_word_t) log->buffer[5]);
00211         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+6)], 
00212                                           log->tornbit | (pcm_word_t) log->buffer[6]);
00213         PCM_SEQSTREAM_STORE_64B_NEXT_WORD(set, (volatile pcm_word_t *) &log->nvphlog[(log->tail+7)], 
00214                                           log->tornbit | (pcm_word_t) log->buffer[7]);
00215 
00216         log->buffer_count=0;
00217         log->tail = (log->tail+8) & (PHYSICAL_LOG_NUM_ENTRIES-1);
00218 
00219         /* Flip tornbit if wrap around */
00220         if (log->tail == 0x0) {
00221                 log->tornbit = ~(log->tornbit) & TORN_MASK;
00222         }
00223 }
00224 
00225 
00233 static inline
00234 m_result_t
00235 m_phlog_tornbit_write(pcm_storeset_t *set, m_phlog_tornbit_t *log, pcm_word_t value)
00236 {
00237         /* 
00238          * Check there is space in the buffer and in the log before writing the
00239          * new value. This duplicates some code but doesn't require unrolling state
00240          * in case of any error. We could also avoid duplication by putting some
00241          * extra branches but we prefer as few branches as possible in the critical
00242          * path.
00243          */
00244 
00245 #ifdef _DEBUG_THIS
00246         printf("buffer_count = %lu, write_remainder_nbits = %lu, log->head=%lu, log->tail=%lu\n", log->buffer_count, log->write_remainder_nbits, log->head, log->tail);
00247         printf("value = 0x%llX\n", value);
00248         printf("[T: %x] &log->tail= %p\n", pthread_self(), &log->tail);
00249 #endif
00250         /* Will new write flush buffer out to log? */
00251         if (log->buffer_count+1 > CHUNK_SIZE/sizeof(pcm_word_t)-1) {
00252                 /* UNCOMMON PATH */
00253                 /* Will log overflow? */
00254                 if (((log->tail + CHUNK_SIZE/sizeof(pcm_word_t)) & (PHYSICAL_LOG_NUM_ENTRIES-1))
00255                     == log->head)
00256                 {
00257 #ifdef _DEBUG_THIS
00258                         printf("LOG OVERFLOW!!!\n");
00259                         printf("tail: %lu\n", log->tail);
00260                         printf("head: %lu\n", log->head);
00261 #endif                  
00262                         return M_R_FAILURE;
00263                 } else {
00264 #ifdef _DEBUG_THIS              
00265                         printf("FLUSH BUFFER OUT\n");
00266 #endif                  
00267                         log->buffer[log->buffer_count] = ~TORN_MASK & 
00268                                                                                          (log->write_remainder | 
00269                                                                                           (value << log->write_remainder_nbits));
00270                         /* the new remainder is the part of the value not written to the buffer */                                                                
00271                         log->write_remainder = value >> (64 - (log->write_remainder_nbits+1));
00272                         log->write_remainder_nbits = (log->write_remainder_nbits + 1) & (64 - 1); /* efficient form of (log->write_remainder_nbits + 1) % 64 */
00273                         //log->buffer_count++; /* do we need this inc? write_buffer2log sets this to zero */
00274 
00275                         tornbit_write_buffer2log(set, log); /* write_buffer2log resets buffer_count to zero */
00276                         /* 
00277                          * If write_remainder_nbits wrapped around then we are actually left 
00278                          * with an overflow remainder of 64 bits, not zero. Write a complete 
00279                          * buffer entry using 63 of the 64 bits. 
00280                          */
00281                         if (log->write_remainder_nbits == 0) {
00282                                 log->buffer[0] = ~TORN_MASK & 
00283                                                  (log->write_remainder);
00284                                 log->buffer_count++;
00285                                 //log->write_remainder = value >> 63;
00286                                 /* the new remainder is the 64th bit that didn't make it */
00287                                 log->write_remainder = log->write_remainder >> 63;
00288                                 log->write_remainder_nbits = 1;
00289                         }
00290                 }
00291         } else {
00292                 /* COMMON PATH */
00293                 /* 
00294                  * Store the remainder bits in the lowest part of the buffer word
00295                  * and fill the highest part of the buffer word with a fragment of
00296                  * the value.
00297                  *
00298                  * +-+--------------------------------+------------------------+
00299                  * |T| value << write_remainder_nbits | write_remainder        |
00300                  * +-+--------------------------------+------------------------+
00301                  *
00302                  */
00303                 log->buffer[log->buffer_count] = ~TORN_MASK & 
00304                                                                                  (log->write_remainder | 
00305                                                                                   (value << log->write_remainder_nbits));
00306                 /* 
00307                  * Invariant: Remainder bits cannot be more than 63. So one buffer 
00308                  * slot should suffice. 
00309                  */
00310                 log->write_remainder_nbits = (log->write_remainder_nbits + 1) & (64 - 1); 
00311                 log->write_remainder = value >> (64 - log->write_remainder_nbits);
00312                 log->buffer_count++;
00313                 /* 
00314                  * Note: It would be easier to check whether there are 63 remaining
00315                  * bits and write them to the buffer. However this would add an 
00316                  * extra branch in the common path. We avoid this by doing the check
00317                  * in the uncommon path above instead. This is correct because we go 
00318                  * through the uncommon path every 8th write, so when we are left with 
00319                  * 64 bits to write out (i.e 63 bits we had to write + 1 new bit), we 
00320                  * will be in the uncommon path already.
00321                  */
00322         }
00323         return M_R_SUCCESS;
00324 }
00325 
00326 
00334 static inline
00335 m_result_t
00336 m_phlog_tornbit_flush(pcm_storeset_t *set, m_phlog_tornbit_t *log)
00337 {
00338 #ifdef _DEBUG_THIS              
00339         printf("m_phlog_flush\n");
00340         printf("nvmd       : %p\n", log->nvmd);
00341         printf("nvphlog    : %p\n", log->nvphlog);
00342 #endif  
00343         if (log->write_remainder_nbits > 0) {
00344                 /* Will log overflow? */
00345                 if (((log->tail + CHUNK_SIZE/sizeof(pcm_word_t)) & (PHYSICAL_LOG_NUM_ENTRIES-1))
00346                     == log->head) 
00347                 {
00348 #ifdef _DEBUG_THIS              
00349                         printf("FLUSH: LOG OVERFLOW!!!\n");
00350                         printf("tail: %lu\n", log->tail);
00351                         printf("head: %lu\n", log->head);
00352                         printf("stable_tail: %lu\n", log->stable_tail);
00353 #endif                  
00354                         return M_R_FAILURE;
00355                 } else {
00356                         /* 
00357                          * Invariant: After each log_write we check whether buffer is full. 
00358                          * Therefore when we get here we should have at least one slot free.
00359                          *
00360                          * Invariant: Remainder bits cannot be more than 63. So one buffer
00361                          * slot should suffice.
00362                          */
00363                         log->buffer[log->buffer_count] = ~TORN_MASK & 
00364                                                                                          (log->write_remainder);
00365                         log->buffer_count++;
00366                         log->write_remainder_nbits = 0;
00367                         log->write_remainder = 0;
00368                         /* 
00369                          * Write the buffer out to log even if it is not completely full. 
00370                          * Simplifies and makes bound checking faster: no extra branches, 
00371                          * no memory-fences.
00372                          */
00373                         tornbit_write_buffer2log(set, log);
00374                 }       
00375         }
00376         log->stable_tail = log->tail;
00377         PCM_SEQSTREAM_FLUSH(set);
00378 #ifdef _DEBUG_THIS              
00379         printf("phlog_tornbit_flush: log->tail = %llu, log->stable_tail = %llu\n", log->tail, log->stable_tail);
00380 #endif  
00381         return M_R_SUCCESS;
00382 }
00383 
00384 
00391 #if  0
00392 static inline
00393 m_result_t
00394 m_phlog_tornbit_read(m_phlog_tornbit_t *log, uint64_t *valuep)
00395 {
00396         uint64_t value;
00397 
00398 #ifdef _DEBUG_THIS              
00399         printf("log_read: %lu %lu %lu\n", log->read_index, (uint64_t) log->read_remainder_nbits, log->stable_tail);
00400 #endif
00401         /* Are there any stable data to read? */
00402         if (log->read_index != log->stable_tail && 
00403             log->read_index + 1 != log->stable_tail) 
00404         {
00405 #ifdef _DEBUG_THIS              
00406                 printf("[%04lu]: 0x%016llX\n", log->read_index, ~TORN_MASK & log->nvphlog[log->read_index]);
00407 #endif          
00408                 value = (~TORN_MASK & log->nvphlog[log->read_index]) >> log->read_remainder_nbits;
00409                 log->read_index = (log->read_index + 1) & (PHYSICAL_LOG_NUM_ENTRIES - 1);
00410 #ifdef _DEBUG_THIS              
00411                 printf("[%04lu]: 0x%016llX\n", log->read_index, ~TORN_MASK & log->nvphlog[log->read_index]);
00412 #endif          
00413                 value |= log->nvphlog[log->read_index] << (63 - log->read_remainder_nbits);
00414                 log->read_remainder_nbits = (log->read_remainder_nbits + 1) & (64 - 1);
00415                 if (log->read_remainder_nbits == 63) {
00416                         log->read_index = (log->read_index + 1) & (PHYSICAL_LOG_NUM_ENTRIES - 1);
00417                         log->read_remainder_nbits = 0;
00418                 }
00419                 *valuep = value;
00420 #ifdef _DEBUG_THIS              
00421                 printf("\t read value: 0x%016lX\n", value);
00422 #endif          
00423                 return M_R_SUCCESS;
00424         }
00425 #ifdef _DEBUG_THIS              
00426         printf("NO DATA\n");
00427 #endif          
00428         return M_R_FAILURE;
00429 }
00430 #else
00431 
00432 static inline
00433 m_result_t
00434 m_phlog_tornbit_read(m_phlog_tornbit_t *log, uint64_t *valuep)
00435 {
00436         uint64_t value;
00437         uint64_t tmp;
00438 
00439 #ifdef _DEBUG_THIS              
00440         printf("log_read: %lu %lu %lu\n", log->read_index, (uint64_t) log->read_remainder_nbits, log->stable_tail);
00441 #endif
00442         /* Are there any stable data to read? */
00443         if (log->read_index != log->stable_tail && 
00444             log->read_index + 1 != log->stable_tail) 
00445         {
00446 #ifdef _DEBUG_THIS              
00447                 printf("[%04lu]: 0x%016llX\n", log->read_index, ~TORN_MASK & log->nvphlog[log->read_index]);
00448 #endif          
00449                 tmp = load_nt_word(&log->nvphlog[log->read_index]);
00450                 value = (~TORN_MASK & tmp) >> log->read_remainder_nbits;
00451                 log->read_index = (log->read_index + 1) & (PHYSICAL_LOG_NUM_ENTRIES - 1);
00452 #ifdef _DEBUG_THIS              
00453                 printf("[%04lu]: 0x%016llX\n", log->read_index, ~TORN_MASK & log->nvphlog[log->read_index]);
00454 #endif          
00455                 tmp = load_nt_word(&log->nvphlog[log->read_index]);
00456                 value |= tmp << (63 - log->read_remainder_nbits);
00457                 log->read_remainder_nbits = (log->read_remainder_nbits + 1) & (64 - 1);
00458                 if (log->read_remainder_nbits == 63) {
00459                         log->read_index = (log->read_index + 1) & (PHYSICAL_LOG_NUM_ENTRIES - 1);
00460                         log->read_remainder_nbits = 0;
00461                 }
00462                 *valuep = value;
00463 #ifdef _DEBUG_THIS              
00464                 printf("\t read value: 0x%016lX\n", value);
00465 #endif          
00466                 return M_R_SUCCESS;
00467         }
00468 #ifdef _DEBUG_THIS              
00469         printf("NO DATA\n");
00470 #endif          
00471         return M_R_FAILURE;
00472 }
00473 #endif
00474 
00481 static inline
00482 bool
00483 m_phlog_tornbit_stable_exists(m_phlog_tornbit_t *log)
00484 {
00485         if (log->read_index != log->stable_tail) {
00486                 return true;
00487         }
00488         return false;
00489 }
00490 
00491 
00492 
00498 static inline
00499 void
00500 m_phlog_tornbit_next_chunk(m_phlog_tornbit_t *log)
00501 {
00502         uint64_t read_index; 
00503 
00504         if (log->read_remainder_nbits > 0) {
00505                 log->read_remainder_nbits = 0;
00506                 read_index = log->read_index & ~(CHUNK_SIZE/sizeof(pcm_word_t) - 1);
00507                 log->read_index = (read_index + CHUNK_SIZE/sizeof(pcm_word_t)) & (PHYSICAL_LOG_NUM_ENTRIES-1); 
00508         } else {
00509                 log->read_remainder_nbits = 0;
00510                 read_index = log->read_index & ~(CHUNK_SIZE/sizeof(pcm_word_t) - 1);
00511                 /* 
00512                  * If current log->read_index points to the beginning of a chunk
00513                  * then we are already in the next chunk so we don't need to advance.
00514                  */
00515                 if (read_index != log->read_index) {
00516                         log->read_index = (read_index + CHUNK_SIZE/sizeof(pcm_word_t)) & (PHYSICAL_LOG_NUM_ENTRIES-1); 
00517                 }
00518         }
00519 }
00520 
00521 
00528 static inline
00529 m_result_t
00530 m_phlog_tornbit_checkpoint_readindex(m_phlog_tornbit_t *log, uint64_t *readindex)
00531 {
00532         if (log->read_remainder_nbits > 0) {
00533                 return M_R_FAILURE;
00534         }
00535         *readindex = log->read_index;
00536 
00537         return M_R_SUCCESS;
00538 }
00539 
00540 
00547 static inline
00548 void
00549 m_phlog_tornbit_restore_readindex(m_phlog_tornbit_t *log, uint64_t readindex)
00550 {
00551         log->read_index = readindex;
00552         log->read_remainder_nbits = 0;
00553 }
00554 
00555 
00556 static inline
00557 m_result_t
00558 m_phlog_tornbit_truncate_sync(pcm_storeset_t *set, m_phlog_tornbit_t *phlog) 
00559 {
00560         phlog->head = phlog->tail;
00561 
00562         //FIXME: do we need a flush? PCM_NT_FLUSH(set);
00563         PCM_NT_STORE(set, (volatile pcm_word_t *) &phlog->nvmd->flags, (pcm_word_t) (phlog->head | phlog->tornbit));
00564         PCM_NT_FLUSH(set);
00565         
00566         return M_R_SUCCESS;
00567 }
00568 
00569 m_result_t m_phlog_tornbit_format (pcm_storeset_t *set, m_phlog_tornbit_nvmd_t *nvmd, pcm_word_t *nvphlog, int type);
00570 m_result_t m_phlog_tornbit_alloc (m_phlog_tornbit_t **phlog_tornbitp);
00571 m_result_t m_phlog_tornbit_init (m_phlog_tornbit_t *phlog, m_phlog_tornbit_nvmd_t *nvmd, pcm_word_t *nvphlog);
00572 m_result_t m_phlog_tornbit_check_consistency(m_phlog_tornbit_nvmd_t *nvmd, pcm_word_t *nvphlog, uint64_t *stable_tail);
00573 m_result_t m_phlog_tornbit_prepare_truncate(m_log_dsc_t *log_dsc);
00574 m_result_t m_phlog_tornbit_truncate_async(pcm_storeset_t *set, m_phlog_tornbit_t *phlog);
00575 
00576 
00577 #ifdef __cplusplus
00578 }
00579 #endif
00580 
00581 #endif /* _PHYSICAL_LOG_TORNBIT_H */

Generated on Sat Apr 23 11:43:36 2011 for Mnemosyne by  doxygen 1.4.7