usermode/library/mcore/src/log/mgr.c

00001 /*
00002     Copyright (C) 2011 Computer Sciences Department, 
00003     University of Wisconsin -- Madison
00004 
00005     ----------------------------------------------------------------------
00006 
00007     This file is part of Mnemosyne: Lightweight Persistent Memory, 
00008     originally developed at the University of Wisconsin -- Madison.
00009 
00010     Mnemosyne was originally developed primarily by Haris Volos
00011     with contributions from Andres Jaan Tack.
00012 
00013     ----------------------------------------------------------------------
00014 
00015     Mnemosyne is free software; you can redistribute it and/or
00016     modify it under the terms of the GNU General Public License
00017     as published by the Free Software Foundation, version 2
00018     of the License.
00019  
00020     Mnemosyne is distributed in the hope that it will be useful,
00021     but WITHOUT ANY WARRANTY; without even the implied warranty of
00022     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023     GNU General Public License for more details.
00024 
00025     You should have received a copy of the GNU General Public License
00026     along with this program; if not, write to the Free Software
00027     Foundation, Inc., 51 Franklin Street, Fifth Floor, 
00028     Boston, MA  02110-1301, USA.
00029 
00030 ### END HEADER ###
00031 */
00032 
00040 #include <sys/mman.h>
00041 #include <pthread.h>
00042 #include <malloc.h>
00043 #include <stdint.h>
00044 #include <result.h>
00045 #include <debug.h>
00046 #include <list.h>
00047 #include "log_i.h"
00048 #include "logtrunc.h"
00049 #include "staticlogs.h"
00050 #include "../segment.h"
00051 #include "../pregionlayout.h"
00052 #include "phlog_tornbit.h"
00053 
00054 __attribute__ ((section("PERSISTENT"))) pcm_word_t log_pool = 0x0;
00055 
00056 #define LOG_NUM 32
00057 
00058 
00059 typedef struct m_logtype_entry_s m_logtype_entry_t;
00060 struct m_logtype_entry_s {
00061         int              type;
00062         m_log_ops_t      *ops;
00063         struct list_head list;
00064 };
00065 
00066 
00067 static pthread_mutex_t      logmgr_init_lock = PTHREAD_MUTEX_INITIALIZER;
00068 static m_logmgr_t           *logmgr = NULL;
00069 static volatile char        logmgr_initialized = 0; /* reads and writes to single-byte memory locations are guaranteed to be atomic. Don't need to bother with alignment. */
00070 
00071 #define NULL_LOG_OPS { NULL, NULL, NULL, NULL, NULL}
00072 
00078 static m_log_ops_t static_log_ops[LF_TYPE_VALIDVALUES] =
00079 {
00080         NULL_LOG_OPS
00081 };
00082 
00083 
00084 static m_result_t register_static_logtypes(m_logmgr_t *mgr);
00085 static m_result_t do_recovery(pcm_storeset_t *set, m_logmgr_t *mgr);
00086 
00087 
00096 static
00097 m_result_t
00098 create_log_pool(pcm_storeset_t *set, m_logmgr_t *mgr)
00099 {
00100         uintptr_t        metadata_start_addr;
00101         uintptr_t        logs_start_addr;
00102         int              metadata_section_size;
00103         int              physical_log_size;
00104         void             *addr;
00105         m_log_dsc_t      *log_dscs;
00106         m_segidx_entry_t *segidx_entry;
00107         int              i;
00108 
00109         if (!log_pool) {
00110                 /* 
00111                  * Check whether the segment already exists. This is possible if
00112                  * there was a crash right after segment was created but before 
00113                  * log_pool was written.
00114                  */
00115                 if (m_segment_find_using_addr((void *) LOG_POOL_START, &segidx_entry) 
00116                     != M_R_SUCCESS) 
00117                 {
00118                         addr = m_pmap2((void *) LOG_POOL_START, LOG_POOL_SIZE, 
00119                                        PROT_READ|PROT_WRITE, MAP_FIXED);
00120                         if (addr == MAP_FAILED) {
00121                                 M_INTERNALERROR("Could not allocate logs pool segment.\n");
00122                         }
00123                 }
00124                 PCM_NT_STORE(set, (volatile pcm_word_t *) &log_pool, (pcm_word_t) addr);
00125                 PCM_NT_FLUSH(set);
00126         }
00127         
00128         /* 
00129          * Now read the non-volatile log metadata and non-volatile physical logs.
00130          * 
00131          * Physical logs should be page aligned to get maximum bandwidth from the 
00132          * system. Since sizeof(metadata) much smaller than sizeof(PAGE) we 
00133          * aggregate all the metadata together.
00134          */
00135         metadata_start_addr = LOG_POOL_START; /* this is already page aligned */
00136         metadata_section_size = PAGE_ALIGN(LOG_NUM * sizeof(m_log_nvmd_t));
00137         logs_start_addr = metadata_start_addr + metadata_section_size;
00138         physical_log_size = PAGE_ALIGN(PHYSICAL_LOG_SIZE);
00139         assert(metadata_section_size + LOG_NUM*physical_log_size <= LOG_POOL_SIZE);
00140         log_dscs = (m_log_dsc_t *) calloc(LOG_NUM, sizeof(m_log_dsc_t));
00141         for (i=0; i<LOG_NUM; i++) {
00142                 log_dscs[i].nvmd = (m_log_nvmd_t *) (metadata_start_addr + 
00143                                                         sizeof(m_log_nvmd_t)*i);
00144                 log_dscs[i].nvphlog = (pcm_word_t *) (logs_start_addr + 
00145                                                          physical_log_size*i);
00146                 log_dscs[i].log = NULL;
00147                 log_dscs[i].ops = NULL;
00148                 log_dscs[i].logorder = INV_LOG_ORDER;
00149                 if ((log_dscs[i].nvmd->generic_flags & LF_TYPE_MASK) == 
00150                     LF_TYPE_FREE) 
00151                 {
00152                         list_add_tail(&(log_dscs[i].list), &(mgr->free_logs_list));
00153                 } else {
00154                         list_add_tail(&(log_dscs[i].list), &(mgr->pending_logs_list));
00155                 }
00156         }
00157 
00158         return M_R_SUCCESS;
00159 }
00160 
00161 
00166 static
00167 m_result_t
00168 logmgr_init(pcm_storeset_t *set)
00169 {
00170         m_result_t rv = M_R_FAILURE;
00171         m_logmgr_t *mgr;
00172 
00173         pthread_mutex_lock(&logmgr_init_lock);
00174         if (logmgr_initialized) {
00175                 rv = M_R_SUCCESS;
00176                 goto out;
00177         }
00178 
00179         if (!(mgr = (m_logmgr_t *) malloc(sizeof(m_logmgr_t)))) {
00180                 rv = M_R_NOMEMORY;
00181                 goto out;
00182         }
00183         pthread_mutex_init(&(mgr->mutex), NULL);
00184         INIT_LIST_HEAD(&(mgr->known_logtypes_list));
00185         INIT_LIST_HEAD(&(mgr->free_logs_list));
00186         INIT_LIST_HEAD(&(mgr->active_logs_list));
00187         INIT_LIST_HEAD(&(mgr->pending_logs_list));
00188         create_log_pool(set, mgr);
00189         register_static_logtypes(mgr);
00190         do_recovery(set, mgr); /* will recover any known log types so far. */
00191 
00192         /* 
00193          * Be careful, order matters. 
00194          * 
00195          * x86 does not reorder STORE ops so we know that if someone sees variable
00196          * 'logmgr_initialized' set then it is guaranteed to see the assignment 
00197          * to 'logmgr'.
00198          */
00199         logmgr = mgr;
00200         logmgr_initialized = 1; 
00201 
00202         m_logtrunc_init((m_logmgr_t *) logmgr);
00203         rv = M_R_SUCCESS;
00204 
00205 out:
00206         pthread_mutex_unlock(&logmgr_init_lock);
00207         return rv;
00208 }
00209 
00210 
00211 m_result_t
00212 m_logmgr_init(pcm_storeset_t *set)
00213 {
00214         return logmgr_init(set);
00215 }
00216 
00217 
00218 
00225 m_result_t
00226 m_logmgr_fini(void)
00227 {
00228 #ifdef _M_STATS_BUILD
00229         m_logmgr_stat_print();
00230         printf("total_trunc_time  %llu (ns)\n", logmgr->trunc_time);
00231         printf("total_trunc_count %llu\n", logmgr->trunc_count);
00232         if (logmgr->trunc_count>0) {
00233                 printf("avg_trunc_time    %llu (ns)\n", logmgr->trunc_time/logmgr->trunc_count);
00234         }       
00235 #endif
00236         return M_R_SUCCESS;
00237 }
00238 
00239 
00240 
00241 static
00242 m_result_t
00243 register_logtype(m_logmgr_t *mgr, int type, m_log_ops_t *ops, int lock)
00244 {
00245         m_result_t        rv = M_R_FAILURE;
00246         m_logtype_entry_t *logtype_entry;
00247         m_log_dsc_t       *log_dsc;
00248 
00249         if (lock) {
00250                 pthread_mutex_lock(&(mgr->mutex));
00251         }
00252         /* first check that the type is not already registered. */
00253         list_for_each_entry(logtype_entry, &(mgr->known_logtypes_list), list) {
00254                 if (logtype_entry->type == type) {
00255                         /* already registered, nothing need to be done */
00256                         rv = M_R_SUCCESS;
00257                         goto out;
00258                 }
00259         }
00260         logtype_entry = NULL;
00261         if (!(logtype_entry = malloc(sizeof(m_logtype_entry_t)))) {
00262                 rv = M_R_NOMEMORY;
00263                 goto out;
00264         }
00265         logtype_entry->type = type;
00266         logtype_entry->ops = ops;
00267         list_add_tail(&(logtype_entry->list), &(mgr->known_logtypes_list));
00268         /* Update the ops field of any pending log of the newly registered type and allocate a log. */
00269         list_for_each_entry(log_dsc, &(mgr->pending_logs_list), list) {
00270                 if ((log_dsc->nvmd->generic_flags & LF_TYPE_MASK)  == type) {
00271                         log_dsc->ops = ops;
00272                         assert(log_dsc->ops->alloc(log_dsc) == M_R_SUCCESS);
00273                 }       
00274         }
00275 
00276         rv = M_R_SUCCESS;
00277 out:
00278         if (lock) {
00279                 pthread_mutex_unlock(&(mgr->mutex));
00280         }
00281         return rv;
00282 }
00283 
00284 
00285 static
00286 m_result_t
00287 register_static_logtypes(m_logmgr_t *mgr)
00288 {
00289         int i;
00290 
00291         for (i=1; i<LF_TYPE_VALIDVALUES; i++) {
00292                 assert(register_logtype(mgr, i, &static_log_ops[i], 0) == M_R_SUCCESS);
00293         }
00294         
00295         return M_R_SUCCESS;
00296 }
00297 
00298 
00299 m_result_t
00300 m_logmgr_register_logtype(pcm_storeset_t *set, int type, m_log_ops_t *ops)
00301 {
00302         if (!logmgr_initialized) {
00303                 logmgr_init(set);
00304         }
00305         return register_logtype((m_logmgr_t *)logmgr, type, ops, 1);
00306 }
00307 
00308 
00313 static
00314 m_result_t
00315 do_recovery(pcm_storeset_t *set, m_logmgr_t *mgr)
00316 {
00317         m_log_dsc_t        *log_dsc;
00318         m_log_dsc_t        *log_dsc_tmp;
00319         m_log_dsc_t        *log_dsc_to_recover;
00320         struct list_head   recovery_list;
00321         unsigned int       nlogfragments_recovered;
00322 #ifdef _M_STATS_BUILD
00323         struct timeval     start_time;
00324         struct timeval     stop_time;
00325         unsigned long long op_time;
00326 #endif
00327 
00328 
00329         /* 
00330          * First collect all logs which are to be recovered and prepare
00331          * each log for recovery. After a log is prepared, it might pass 
00332          * back a recovery order number if it cares about the order 
00333          * the recovery is performed with respect to other logs.
00334          */
00335         /* FIXME: Collect and recover logs by type. */
00336         INIT_LIST_HEAD(&recovery_list);
00337         list_for_each_entry_safe(log_dsc, log_dsc_tmp, &(mgr->pending_logs_list), list) {
00338                 if (log_dsc->ops && log_dsc->ops->recovery_init) {
00339                         log_dsc->ops->recovery_init(set, log_dsc);
00340                         list_del_init(&(log_dsc->list));
00341                         list_add(&(log_dsc->list), &recovery_list);
00342                 }
00343         }
00344 
00345 #ifdef _M_STATS_BUILD
00346         gettimeofday(&start_time, NULL);
00347 #endif
00348 
00349         /* 
00350          * Find the next log to recover, recover it, update its recovery
00351          * order, and repeat until there are no more logs to recover.
00352          */
00353         nlogfragments_recovered = 0;
00354         do {
00355                 log_dsc_to_recover = NULL; 
00356                 list_for_each_entry(log_dsc, &recovery_list, list) {
00357                         if (log_dsc->logorder == INV_LOG_ORDER) {
00358                                 continue;
00359                         }
00360                         if (log_dsc_to_recover == NULL) {
00361                                 log_dsc_to_recover = log_dsc;
00362                         } else {
00363                                 if (log_dsc_to_recover->logorder > log_dsc->logorder) {
00364                                         log_dsc_to_recover = log_dsc;
00365                                 }
00366                         }
00367                 }
00368                 if (log_dsc_to_recover) {
00369                         assert(log_dsc_to_recover->ops);
00370                         assert(log_dsc_to_recover->ops->recovery_do);
00371                         assert(log_dsc_to_recover->ops->recovery_prepare_next);
00372                         log_dsc_to_recover->ops->recovery_do(set, log_dsc_to_recover);
00373                         log_dsc_to_recover->ops->recovery_prepare_next(set, log_dsc_to_recover);
00374                         nlogfragments_recovered++;
00375                 }       
00376         } while(log_dsc_to_recover);
00377 
00378         /* Make the recovered logs available for reuse */
00379         list_splice(&recovery_list, &(mgr->free_logs_list));
00380 
00381 #ifdef _M_STATS_BUILD
00382         gettimeofday(&stop_time, NULL);
00383 #endif
00384 #ifdef _M_STATS_BUILD
00385         gettimeofday(&stop_time, NULL);
00386         op_time = 1000000 * (stop_time.tv_sec - start_time.tv_sec) +
00387                              stop_time.tv_usec - start_time.tv_usec;
00388         fprintf(stderr, "log_recovery_latency    = %llu (us)\n", op_time);
00389         fprintf(stderr, "nlogfragments_recovered = %u \n", nlogfragments_recovered);
00390 #endif
00391         return M_R_SUCCESS;
00392 }
00393 
00394 
00395 m_result_t 
00396 m_logmgr_do_recovery(pcm_storeset_t *set)
00397 {
00398         return do_recovery(set, logmgr);
00399 }
00400 
00401 
00405 m_result_t
00406 m_logmgr_alloc_log(pcm_storeset_t *set, int type, uint64_t flags, m_log_dsc_t **log_dscp)
00407 {
00408         m_result_t        rv = M_R_FAILURE;
00409         m_log_dsc_t       *log_dsc;
00410         m_log_dsc_t       *free_log_dsc = NULL;
00411         m_log_dsc_t       *free_log_dsc_notype = NULL;
00412         m_logtype_entry_t *logtype_entry;
00413 
00414         pthread_mutex_lock(&(logmgr->mutex));
00415         list_for_each_entry(log_dsc, &(logmgr->free_logs_list), list) {
00416                 if (((log_dsc->nvmd->generic_flags & LF_TYPE_MASK) ==  type) &&
00417                     free_log_dsc == NULL) 
00418                 {
00419                         free_log_dsc = log_dsc;
00420                 }
00421                 if (((log_dsc->nvmd->generic_flags & LF_TYPE_MASK) ==  LF_TYPE_FREE) &&
00422                     free_log_dsc_notype == NULL) 
00423                 {
00424                         free_log_dsc_notype = log_dsc;
00425                 }
00426         }
00427         /* Prefer using a log descriptor of the same type */
00428         if (free_log_dsc) {
00429                 log_dsc = free_log_dsc;
00430         } else if (free_log_dsc_notype) {
00431                 /* assign the operations specific for this log type */
00432                 log_dsc = free_log_dsc_notype;
00433                 list_for_each_entry(logtype_entry, &(logmgr->known_logtypes_list), list) {
00434                         if (logtype_entry->type == type) {
00435                                 log_dsc->ops = logtype_entry->ops;
00436                                 assert(log_dsc->ops->alloc(log_dsc) == M_R_SUCCESS);
00437                                 break;
00438                         }
00439                 }
00440                 if (!log_dsc->ops) {
00441                         /* unknown type */
00442                         rv = M_R_FAILURE;
00443                         goto out;
00444                 }
00445         } else {
00446                 /* 
00447                  * TODO: there might be an available log in the free list but 
00448                  * be of different type. Need to get one out of the free list 
00449                  * and clean it.
00450                  */
00451                 rv = M_R_FAILURE;
00452                 goto out;
00453         }
00454 
00455         list_del_init(&(log_dsc->list));
00456         list_add_tail(&(log_dsc->list), &(logmgr->active_logs_list));
00457 
00458         /* Finally, initialize the log */
00459         log_dsc->flags = flags;
00460         assert(log_dsc->ops && log_dsc->ops->init);
00461         assert(log_dsc->ops->init(set, log_dsc->log, log_dsc) == M_R_SUCCESS);
00462         PCM_NT_STORE(set, (volatile pcm_word_t *) &(log_dsc->nvmd->generic_flags), 
00463                      (pcm_word_t) ((log_dsc->nvmd->generic_flags & ~LF_TYPE_MASK) | type));
00464         PCM_NT_FLUSH(set);
00465 
00466         *log_dscp = log_dsc;
00467         rv = M_R_SUCCESS;
00468 out:
00469         pthread_mutex_unlock(&logmgr->mutex);
00470         return rv;
00471 }
00472 
00473 
00477 m_result_t 
00478 m_logmgr_free_log(m_log_dsc_t *log_dsc)
00479 {
00480         //TODO
00481         return M_R_SUCCESS;
00482 }
00483 
00484 void
00485 m_logmgr_stat_print()
00486 {
00487         FILE *fout = stdout;
00488 
00489         m_log_dsc_t       *log_dsc;
00490 
00491         fprintf(fout, "PER LOG STATISTICS\n");
00492         list_for_each_entry(log_dsc, &(logmgr->active_logs_list), list) {
00493                 log_dsc->ops->report_stats(log_dsc);
00494         }
00495         fprintf(fout, "\n");
00496         fprintf(fout, "TRUNCATION THREAD STATISTICS\n");
00497 }

Generated on Sat Apr 23 11:43:35 2011 for Mnemosyne by  doxygen 1.4.7