00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00040 #include <sys/mman.h>
00041 #include <pthread.h>
00042 #include <malloc.h>
00043 #include <stdint.h>
00044 #include <result.h>
00045 #include <debug.h>
00046 #include <list.h>
00047 #include "log_i.h"
00048 #include "logtrunc.h"
00049 #include "staticlogs.h"
00050 #include "../segment.h"
00051 #include "../pregionlayout.h"
00052 #include "phlog_tornbit.h"
00053
00054 __attribute__ ((section("PERSISTENT"))) pcm_word_t log_pool = 0x0;
00055
00056 #define LOG_NUM 32
00057
00058
00059 typedef struct m_logtype_entry_s m_logtype_entry_t;
00060 struct m_logtype_entry_s {
00061 int type;
00062 m_log_ops_t *ops;
00063 struct list_head list;
00064 };
00065
00066
00067 static pthread_mutex_t logmgr_init_lock = PTHREAD_MUTEX_INITIALIZER;
00068 static m_logmgr_t *logmgr = NULL;
00069 static volatile char logmgr_initialized = 0;
00070
00071 #define NULL_LOG_OPS { NULL, NULL, NULL, NULL, NULL}
00072
00078 static m_log_ops_t static_log_ops[LF_TYPE_VALIDVALUES] =
00079 {
00080 NULL_LOG_OPS
00081 };
00082
00083
00084 static m_result_t register_static_logtypes(m_logmgr_t *mgr);
00085 static m_result_t do_recovery(pcm_storeset_t *set, m_logmgr_t *mgr);
00086
00087
00096 static
00097 m_result_t
00098 create_log_pool(pcm_storeset_t *set, m_logmgr_t *mgr)
00099 {
00100 uintptr_t metadata_start_addr;
00101 uintptr_t logs_start_addr;
00102 int metadata_section_size;
00103 int physical_log_size;
00104 void *addr;
00105 m_log_dsc_t *log_dscs;
00106 m_segidx_entry_t *segidx_entry;
00107 int i;
00108
00109 if (!log_pool) {
00110
00111
00112
00113
00114
00115 if (m_segment_find_using_addr((void *) LOG_POOL_START, &segidx_entry)
00116 != M_R_SUCCESS)
00117 {
00118 addr = m_pmap2((void *) LOG_POOL_START, LOG_POOL_SIZE,
00119 PROT_READ|PROT_WRITE, MAP_FIXED);
00120 if (addr == MAP_FAILED) {
00121 M_INTERNALERROR("Could not allocate logs pool segment.\n");
00122 }
00123 }
00124 PCM_NT_STORE(set, (volatile pcm_word_t *) &log_pool, (pcm_word_t) addr);
00125 PCM_NT_FLUSH(set);
00126 }
00127
00128
00129
00130
00131
00132
00133
00134
00135 metadata_start_addr = LOG_POOL_START;
00136 metadata_section_size = PAGE_ALIGN(LOG_NUM * sizeof(m_log_nvmd_t));
00137 logs_start_addr = metadata_start_addr + metadata_section_size;
00138 physical_log_size = PAGE_ALIGN(PHYSICAL_LOG_SIZE);
00139 assert(metadata_section_size + LOG_NUM*physical_log_size <= LOG_POOL_SIZE);
00140 log_dscs = (m_log_dsc_t *) calloc(LOG_NUM, sizeof(m_log_dsc_t));
00141 for (i=0; i<LOG_NUM; i++) {
00142 log_dscs[i].nvmd = (m_log_nvmd_t *) (metadata_start_addr +
00143 sizeof(m_log_nvmd_t)*i);
00144 log_dscs[i].nvphlog = (pcm_word_t *) (logs_start_addr +
00145 physical_log_size*i);
00146 log_dscs[i].log = NULL;
00147 log_dscs[i].ops = NULL;
00148 log_dscs[i].logorder = INV_LOG_ORDER;
00149 if ((log_dscs[i].nvmd->generic_flags & LF_TYPE_MASK) ==
00150 LF_TYPE_FREE)
00151 {
00152 list_add_tail(&(log_dscs[i].list), &(mgr->free_logs_list));
00153 } else {
00154 list_add_tail(&(log_dscs[i].list), &(mgr->pending_logs_list));
00155 }
00156 }
00157
00158 return M_R_SUCCESS;
00159 }
00160
00161
00166 static
00167 m_result_t
00168 logmgr_init(pcm_storeset_t *set)
00169 {
00170 m_result_t rv = M_R_FAILURE;
00171 m_logmgr_t *mgr;
00172
00173 pthread_mutex_lock(&logmgr_init_lock);
00174 if (logmgr_initialized) {
00175 rv = M_R_SUCCESS;
00176 goto out;
00177 }
00178
00179 if (!(mgr = (m_logmgr_t *) malloc(sizeof(m_logmgr_t)))) {
00180 rv = M_R_NOMEMORY;
00181 goto out;
00182 }
00183 pthread_mutex_init(&(mgr->mutex), NULL);
00184 INIT_LIST_HEAD(&(mgr->known_logtypes_list));
00185 INIT_LIST_HEAD(&(mgr->free_logs_list));
00186 INIT_LIST_HEAD(&(mgr->active_logs_list));
00187 INIT_LIST_HEAD(&(mgr->pending_logs_list));
00188 create_log_pool(set, mgr);
00189 register_static_logtypes(mgr);
00190 do_recovery(set, mgr);
00191
00192
00193
00194
00195
00196
00197
00198
00199 logmgr = mgr;
00200 logmgr_initialized = 1;
00201
00202 m_logtrunc_init((m_logmgr_t *) logmgr);
00203 rv = M_R_SUCCESS;
00204
00205 out:
00206 pthread_mutex_unlock(&logmgr_init_lock);
00207 return rv;
00208 }
00209
00210
00211 m_result_t
00212 m_logmgr_init(pcm_storeset_t *set)
00213 {
00214 return logmgr_init(set);
00215 }
00216
00217
00218
00225 m_result_t
00226 m_logmgr_fini(void)
00227 {
00228 #ifdef _M_STATS_BUILD
00229 m_logmgr_stat_print();
00230 printf("total_trunc_time %llu (ns)\n", logmgr->trunc_time);
00231 printf("total_trunc_count %llu\n", logmgr->trunc_count);
00232 if (logmgr->trunc_count>0) {
00233 printf("avg_trunc_time %llu (ns)\n", logmgr->trunc_time/logmgr->trunc_count);
00234 }
00235 #endif
00236 return M_R_SUCCESS;
00237 }
00238
00239
00240
00241 static
00242 m_result_t
00243 register_logtype(m_logmgr_t *mgr, int type, m_log_ops_t *ops, int lock)
00244 {
00245 m_result_t rv = M_R_FAILURE;
00246 m_logtype_entry_t *logtype_entry;
00247 m_log_dsc_t *log_dsc;
00248
00249 if (lock) {
00250 pthread_mutex_lock(&(mgr->mutex));
00251 }
00252
00253 list_for_each_entry(logtype_entry, &(mgr->known_logtypes_list), list) {
00254 if (logtype_entry->type == type) {
00255
00256 rv = M_R_SUCCESS;
00257 goto out;
00258 }
00259 }
00260 logtype_entry = NULL;
00261 if (!(logtype_entry = malloc(sizeof(m_logtype_entry_t)))) {
00262 rv = M_R_NOMEMORY;
00263 goto out;
00264 }
00265 logtype_entry->type = type;
00266 logtype_entry->ops = ops;
00267 list_add_tail(&(logtype_entry->list), &(mgr->known_logtypes_list));
00268
00269 list_for_each_entry(log_dsc, &(mgr->pending_logs_list), list) {
00270 if ((log_dsc->nvmd->generic_flags & LF_TYPE_MASK) == type) {
00271 log_dsc->ops = ops;
00272 assert(log_dsc->ops->alloc(log_dsc) == M_R_SUCCESS);
00273 }
00274 }
00275
00276 rv = M_R_SUCCESS;
00277 out:
00278 if (lock) {
00279 pthread_mutex_unlock(&(mgr->mutex));
00280 }
00281 return rv;
00282 }
00283
00284
00285 static
00286 m_result_t
00287 register_static_logtypes(m_logmgr_t *mgr)
00288 {
00289 int i;
00290
00291 for (i=1; i<LF_TYPE_VALIDVALUES; i++) {
00292 assert(register_logtype(mgr, i, &static_log_ops[i], 0) == M_R_SUCCESS);
00293 }
00294
00295 return M_R_SUCCESS;
00296 }
00297
00298
00299 m_result_t
00300 m_logmgr_register_logtype(pcm_storeset_t *set, int type, m_log_ops_t *ops)
00301 {
00302 if (!logmgr_initialized) {
00303 logmgr_init(set);
00304 }
00305 return register_logtype((m_logmgr_t *)logmgr, type, ops, 1);
00306 }
00307
00308
00313 static
00314 m_result_t
00315 do_recovery(pcm_storeset_t *set, m_logmgr_t *mgr)
00316 {
00317 m_log_dsc_t *log_dsc;
00318 m_log_dsc_t *log_dsc_tmp;
00319 m_log_dsc_t *log_dsc_to_recover;
00320 struct list_head recovery_list;
00321 unsigned int nlogfragments_recovered;
00322 #ifdef _M_STATS_BUILD
00323 struct timeval start_time;
00324 struct timeval stop_time;
00325 unsigned long long op_time;
00326 #endif
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336 INIT_LIST_HEAD(&recovery_list);
00337 list_for_each_entry_safe(log_dsc, log_dsc_tmp, &(mgr->pending_logs_list), list) {
00338 if (log_dsc->ops && log_dsc->ops->recovery_init) {
00339 log_dsc->ops->recovery_init(set, log_dsc);
00340 list_del_init(&(log_dsc->list));
00341 list_add(&(log_dsc->list), &recovery_list);
00342 }
00343 }
00344
00345 #ifdef _M_STATS_BUILD
00346 gettimeofday(&start_time, NULL);
00347 #endif
00348
00349
00350
00351
00352
00353 nlogfragments_recovered = 0;
00354 do {
00355 log_dsc_to_recover = NULL;
00356 list_for_each_entry(log_dsc, &recovery_list, list) {
00357 if (log_dsc->logorder == INV_LOG_ORDER) {
00358 continue;
00359 }
00360 if (log_dsc_to_recover == NULL) {
00361 log_dsc_to_recover = log_dsc;
00362 } else {
00363 if (log_dsc_to_recover->logorder > log_dsc->logorder) {
00364 log_dsc_to_recover = log_dsc;
00365 }
00366 }
00367 }
00368 if (log_dsc_to_recover) {
00369 assert(log_dsc_to_recover->ops);
00370 assert(log_dsc_to_recover->ops->recovery_do);
00371 assert(log_dsc_to_recover->ops->recovery_prepare_next);
00372 log_dsc_to_recover->ops->recovery_do(set, log_dsc_to_recover);
00373 log_dsc_to_recover->ops->recovery_prepare_next(set, log_dsc_to_recover);
00374 nlogfragments_recovered++;
00375 }
00376 } while(log_dsc_to_recover);
00377
00378
00379 list_splice(&recovery_list, &(mgr->free_logs_list));
00380
00381 #ifdef _M_STATS_BUILD
00382 gettimeofday(&stop_time, NULL);
00383 #endif
00384 #ifdef _M_STATS_BUILD
00385 gettimeofday(&stop_time, NULL);
00386 op_time = 1000000 * (stop_time.tv_sec - start_time.tv_sec) +
00387 stop_time.tv_usec - start_time.tv_usec;
00388 fprintf(stderr, "log_recovery_latency = %llu (us)\n", op_time);
00389 fprintf(stderr, "nlogfragments_recovered = %u \n", nlogfragments_recovered);
00390 #endif
00391 return M_R_SUCCESS;
00392 }
00393
00394
00395 m_result_t
00396 m_logmgr_do_recovery(pcm_storeset_t *set)
00397 {
00398 return do_recovery(set, logmgr);
00399 }
00400
00401
00405 m_result_t
00406 m_logmgr_alloc_log(pcm_storeset_t *set, int type, uint64_t flags, m_log_dsc_t **log_dscp)
00407 {
00408 m_result_t rv = M_R_FAILURE;
00409 m_log_dsc_t *log_dsc;
00410 m_log_dsc_t *free_log_dsc = NULL;
00411 m_log_dsc_t *free_log_dsc_notype = NULL;
00412 m_logtype_entry_t *logtype_entry;
00413
00414 pthread_mutex_lock(&(logmgr->mutex));
00415 list_for_each_entry(log_dsc, &(logmgr->free_logs_list), list) {
00416 if (((log_dsc->nvmd->generic_flags & LF_TYPE_MASK) == type) &&
00417 free_log_dsc == NULL)
00418 {
00419 free_log_dsc = log_dsc;
00420 }
00421 if (((log_dsc->nvmd->generic_flags & LF_TYPE_MASK) == LF_TYPE_FREE) &&
00422 free_log_dsc_notype == NULL)
00423 {
00424 free_log_dsc_notype = log_dsc;
00425 }
00426 }
00427
00428 if (free_log_dsc) {
00429 log_dsc = free_log_dsc;
00430 } else if (free_log_dsc_notype) {
00431
00432 log_dsc = free_log_dsc_notype;
00433 list_for_each_entry(logtype_entry, &(logmgr->known_logtypes_list), list) {
00434 if (logtype_entry->type == type) {
00435 log_dsc->ops = logtype_entry->ops;
00436 assert(log_dsc->ops->alloc(log_dsc) == M_R_SUCCESS);
00437 break;
00438 }
00439 }
00440 if (!log_dsc->ops) {
00441
00442 rv = M_R_FAILURE;
00443 goto out;
00444 }
00445 } else {
00446
00447
00448
00449
00450
00451 rv = M_R_FAILURE;
00452 goto out;
00453 }
00454
00455 list_del_init(&(log_dsc->list));
00456 list_add_tail(&(log_dsc->list), &(logmgr->active_logs_list));
00457
00458
00459 log_dsc->flags = flags;
00460 assert(log_dsc->ops && log_dsc->ops->init);
00461 assert(log_dsc->ops->init(set, log_dsc->log, log_dsc) == M_R_SUCCESS);
00462 PCM_NT_STORE(set, (volatile pcm_word_t *) &(log_dsc->nvmd->generic_flags),
00463 (pcm_word_t) ((log_dsc->nvmd->generic_flags & ~LF_TYPE_MASK) | type));
00464 PCM_NT_FLUSH(set);
00465
00466 *log_dscp = log_dsc;
00467 rv = M_R_SUCCESS;
00468 out:
00469 pthread_mutex_unlock(&logmgr->mutex);
00470 return rv;
00471 }
00472
00473
00477 m_result_t
00478 m_logmgr_free_log(m_log_dsc_t *log_dsc)
00479 {
00480
00481 return M_R_SUCCESS;
00482 }
00483
00484 void
00485 m_logmgr_stat_print()
00486 {
00487 FILE *fout = stdout;
00488
00489 m_log_dsc_t *log_dsc;
00490
00491 fprintf(fout, "PER LOG STATISTICS\n");
00492 list_for_each_entry(log_dsc, &(logmgr->active_logs_list), list) {
00493 log_dsc->ops->report_stats(log_dsc);
00494 }
00495 fprintf(fout, "\n");
00496 fprintf(fout, "TRUNCATION THREAD STATISTICS\n");
00497 }