sm_base.h

Go to the documentation of this file.
00001 /* -*- mode:C++; c-basic-offset:4 -*-
00002      Shore-MT -- Multi-threaded port of the SHORE storage manager
00003    
00004                        Copyright (c) 2007-2009
00005       Data Intensive Applications and Systems Labaratory (DIAS)
00006                Ecole Polytechnique Federale de Lausanne
00007    
00008                          All Rights Reserved.
00009    
00010    Permission to use, copy, modify and distribute this software and
00011    its documentation is hereby granted, provided that both the
00012    copyright notice and this permission notice appear in all copies of
00013    the software, derivative works or modified versions, and any
00014    portions thereof, and that both notices appear in supporting
00015    documentation.
00016    
00017    This code is distributed in the hope that it will be useful, but
00018    WITHOUT ANY WARRANTY; without even the implied warranty of
00019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
00020    DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
00021    RESULTING FROM THE USE OF THIS SOFTWARE.
00022 */
00023 
00024 /*<std-header orig-src='shore' incl-file-exclusion='SM_BASE_H'>
00025 
00026  $Id: sm_base.h,v 1.158 2010/12/08 17:37:43 nhall Exp $
00027 
00028 SHORE -- Scalable Heterogeneous Object REpository
00029 
00030 Copyright (c) 1994-99 Computer Sciences Department, University of
00031                       Wisconsin -- Madison
00032 All Rights Reserved.
00033 
00034 Permission to use, copy, modify and distribute this software and its
00035 documentation is hereby granted, provided that both the copyright
00036 notice and this permission notice appear in all copies of the
00037 software, derivative works or modified versions, and any portions
00038 thereof, and that both notices appear in supporting documentation.
00039 
00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
00044 
00045 This software was developed with support by the Advanced Research
00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by
00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
00048 Further funding for this work was provided by DARPA through
00049 Rome Research Laboratory Contract No. F30602-97-2-0247.
00050 
00051 */
00052 
00053 #ifndef SM_BASE_H
00054 #define SM_BASE_H
00055 
00056 #include "w_defines.h"
00057 
00058 /*  -- do not edit anything above this line --   </std-header>*/
00059 
00060 /**\file sm_base.h
00061  * \ingroup Macros
00062  */
00063 
00064 #ifdef __GNUG__
00065 #pragma interface
00066 #endif
00067 
00068 #include <climits>
00069 #ifndef OPTION_H
00070 #include "option.h"
00071 #endif
00072 #ifndef __opt_error_def_gen_h__
00073 #include "opt_error_def_gen.h"
00074 #endif
00075 
00076 
00077 class ErrLog;
00078 class sm_stats_info_t;
00079 class xct_t;
00080 class xct_i;
00081 
00082 class device_m;
00083 class io_m;
00084 class bf_m;
00085 class comm_m;
00086 class log_m;
00087 class lock_m;
00088 
00089 class tid_t;
00090 class option_t;
00091 
00092 #ifndef        SM_EXTENTSIZE
00093 #define        SM_EXTENTSIZE        8
00094 #endif
00095 #ifndef        SM_LOG_PARTITIONS
00096 #define        SM_LOG_PARTITIONS        8
00097 #endif
00098 
00099 typedef   w_rc_t        rc_t;
00100 
00101 
00102 /**\cond skip
00103  * This structure collects the depth on construction
00104  * and checks that it matches the depth on destruction; this
00105  * is to ensure that we haven't forgotten to release
00106  * an anchor somewhere.
00107  *
00108  * We're defining the CHECK_NESTING_VARIABLES macro b/c
00109  * this work is spread out and we want to have 1 place to
00110  * determine whether it's turned on or off; don't want to 
00111  * make the mistake of changing the debug level (on which
00112  * it depends) in only one of several places.
00113  *
00114  * NOTE: this doesn't work in a multi-threaded xct context.
00115  * That's b/c the check is too late -- once the count goes
00116  * to zero, another thread can change it and throw off all the
00117  * counts. To be sure, we'd have to use a TLS copy as well
00118  * as the common copy of these counts.
00119  *
00120  * This was on for debug level > 0 but it's been stable
00121  * enough to change it to > 2
00122  */
00123 #if W_DEBUG_LEVEL > 2
00124 #define CHECK_NESTING_VARIABLES 1
00125 #else
00126 #define CHECK_NESTING_VARIABLES 0
00127 #endif
00128 struct check_compensated_op_nesting {
00129 #if CHECK_NESTING_VARIABLES
00130     xct_t* _xd;
00131     int _depth;
00132     int _line;
00133     const char *const _file;
00134     // static methods are so we can avoid having to
00135     // include xct.h here.
00136     static int compensated_op_depth(xct_t* xd, int dflt);
00137 
00138     check_compensated_op_nesting(xct_t* xd, int line, const char *const file)
00139     : _xd(xd), 
00140     _depth(_xd? compensated_op_depth(_xd, 0) : 0), 
00141     _line(line),
00142     _file(file)
00143     {
00144     }
00145 
00146     ~check_compensated_op_nesting() {
00147         if(_xd) {
00148             if( _depth != compensated_op_depth(_xd, _depth) ) {
00149                 fprintf(stderr, 
00150                     "th.%d check_compensated_op_nesting(%d,%s) depth was %d is %d\n",
00151                     sthread_t::me()->id,
00152                     _line, _file, _depth, compensated_op_depth(_xd, _depth));
00153             }
00154 
00155 
00156             w_assert0(_depth == compensated_op_depth(_xd, _depth));
00157         }
00158     }
00159 #else
00160     check_compensated_op_nesting(xct_t*, int, const char *const) { }
00161 #endif
00162 };
00163 
00164 
00165 /**\brief Encapsulates a few types uses in the API */
00166 class smlevel_0 : public w_base_t {
00167 public:
00168     // Give these enums names for doxygen purposes:
00169     enum error_constant_t { eNOERROR = 0, eFAILURE = -1 };
00170     enum sm_constant_t { 
00171         page_sz = SM_PAGESIZE,        // page size (SM_PAGESIZE is set by makemake)
00172         ext_sz = SM_EXTENTSIZE,        // extent size
00173         max_exts = max_int4,        // max no. extents, must fit extnum_t
00174 #if defined(_POSIX_PATH_MAX)
00175         max_devname = _POSIX_PATH_MAX,        // max length of unix path name
00176     // BEWARE: this might be larger than you want.  Array sizes depend on it.
00177     // The default might be small enough, e.g., 256; getconf() yields the upper
00178     // bound on this value.
00179 #elif defined(MAXPATHLEN)
00180         max_devname = MAXPATHLEN,
00181 #else
00182         max_devname = 1024,        
00183 #endif
00184         max_vols = 20,                // max mounted volumes
00185         max_xct_thread = 20,        // max threads in a xct
00186         max_servers = 15,       // max servers to be connected with
00187         max_keycomp = 20,        // max key component (for btree)
00188         max_openlog = SM_LOG_PARTITIONS,        // max # log partitions
00189         max_dir_cache = max_vols * 10,
00190 
00191         /* XXX I want to propogate sthread_t::iovec_max here, but
00192            it doesn't work because of sm_app.h not including
00193            the thread package. */
00194         max_many_pages = 8,
00195 
00196         srvid_map_sz = (max_servers - 1) / 8 + 1,
00197         ext_map_sz_in_bytes = ((ext_sz + 7) / 8),
00198 
00199         dummy = 0
00200     };
00201 
00202     enum {
00203         max_rec_len = max_uint4
00204     };
00205 
00206     typedef sthread_base_t::fileoff_t fileoff_t;
00207     /*
00208      * Sizes-in-Kbytes for for things like volumes and devices.
00209      * A KB is assumes to be 1024 bytes.
00210      * Note: a different type was used for added type checking.
00211      */
00212     typedef sthread_t::fileoff_t smksize_t;
00213     typedef w_base_t::base_stat_t base_stat_t; 
00214 
00215     /**\endcond skip */
00216 
00217     /*
00218      * rather than automatically aborting the transaction, when the
00219      * _log_warn_percent is exceeded, this callback is made, with a
00220      * pointer to the xct that did the writing, and with the
00221      * expectation that the result will be one of:
00222      * - return value == RCOK --> proceed
00223      * - return value == eUSERABORT --> victim to abort is given in the argument
00224      *
00225      * The server has the responsibility for choosing a victim and 
00226      * for aborting the victim transaction. 
00227      *
00228      */
00229 
00230     /**\brief Log space warning callback function type.  
00231      *
00232      * For more details of how this is used, see the constructor ss_m::ss_m().
00233      *
00234      * Storage manager methods check the available log space. 
00235      * If the log is in danger of filling to the point that it will be
00236      * impossible to abort a transaction, a
00237      * callback is made to the server.  The callback function is of this type.
00238      * The danger point is a threshold determined by the option sm_log_warn. 
00239      *
00240      * The callback
00241      * function is meant to choose a victim xct and 
00242      * tell if the xct should be
00243      * aborted by returning RC(eUSERABORT).  
00244      *
00245      * Any other RC value is returned to the server through the call stack.
00246      *
00247      * The arguments:
00248      * @param[in] iter    Pointer to an iterator over all xcts.
00249      * @param[out] victim    Victim will be returned here. This is an in/out
00250      * paramter and is initially populated with the transaction that is
00251      * attached to the running thread.
00252      * @param[in] curr    Bytes of log consumed by active transactions.
00253      * @param[in] thresh   Threshhold just exceeded. 
00254      * @param[in] logfile   Character string name of oldest file to archive.
00255      *                     
00256      *  This function must be careful not to return the same victim more
00257      *  than once, even though the callback may be called many 
00258      *  times before the victim is completely aborted.
00259      *
00260      *  When this function has archived the given log file, it needs
00261      *  to notify the storage manager of that fact by calling
00262      *  ss_m::log_file_was_archived(logfile)
00263      */
00264     typedef w_rc_t (*LOG_WARN_CALLBACK_FUNC) (
00265             xct_i*      iter,     
00266             xct_t *&    victim, 
00267             fileoff_t   curr, 
00268             fileoff_t   thresh, 
00269             const char *logfile
00270         );
00271     /**\brief Callback function type for restoring an archived log file.
00272      *
00273      * @param[in] fname   Original file name (with path).
00274      * @param[in] needed   Partition number of the file needed.
00275      *
00276      *  An alternative to aborting a transaction (when the log fills)
00277      *  is to archive log files.
00278      *  The server can use the log directory name to locate these files,
00279      *  and may use the iterator and the static methods of xct_t to 
00280      *  determine which log file(s) to archive.
00281      *
00282      *  Archiving and removing the older log files will work only if
00283      *  the server also provides a LOG_ARCHIVED_CALLBACK_FUNCTION 
00284      *  to restore the
00285      *  archived log files when the storage manager needs them for
00286      *  rollback.
00287      *  This is the function type used for that purpose.
00288      *
00289      *  The function must locate the archived log file containing for the
00290      *  partition number \a num, which was a suffix of the original log file's
00291      *  name.
00292      *  The log file must be restored with its original name.  
00293      */
00294     typedef    w_base_t::uint4_t partition_number_t; 
00295     typedef w_rc_t (*LOG_ARCHIVED_CALLBACK_FUNC) (
00296             const char *fname,
00297             partition_number_t num
00298         );
00299 
00300 /**\cond skip */
00301     enum switch_t {
00302         ON = 1,
00303         OFF = 0
00304     };
00305 /**\endcond skip */
00306 
00307     /**\brief Comparison types used in scan_index_i
00308      * \enum cmp_t
00309      * Shorthand for CompareOp.
00310      */
00311     enum cmp_t { bad_cmp_t=badOp, eq=eqOp,
00312                  gt=gtOp, ge=geOp, lt=ltOp, le=leOp };
00313 
00314 
00315     /* used by lock escalation routines */
00316     enum escalation_options {
00317         dontEscalate        = max_int4_minus1,
00318         dontEscalateDontPassOn,
00319         dontModifyThreshold        = -1
00320     };
00321 
00322     /**\brief Types of stores.
00323      * \enum store_t
00324      */
00325     enum store_t { 
00326         t_bad_store_t, 
00327         /// a b-tree or r-tree index
00328         t_index, 
00329         /// a file of records
00330         t_file, 
00331         /// t_lgrec is used for storing large record pages 
00332         /// and is always associated with some t_file store
00333         t_lgrec 
00334     };
00335     
00336     // types of indexes
00337 
00338     /**\brief Index types */
00339     enum ndx_t { 
00340         t_bad_ndx_t,             // illegal value
00341         t_btree,                 // B+tree with duplicates
00342         t_uni_btree,             // Unique-key btree
00343         t_rtree                  // R*tree
00344     };
00345 
00346     /**\enum concurrency_t 
00347      * \brief 
00348      * Lock granularities 
00349      * \details
00350      * - t_cc_bad Illegal
00351      * - t_cc_none No locking
00352      * - t_cc_record Record-level locking for files & records
00353      * - t_cc_page Page-level locking for files & records 
00354      * - t_cc_file File-level locking for files & records 
00355      * - t_cc_vol Volume-level locking for files and indexes 
00356      * - t_cc_kvl Key-value locking for B+-Tree indexes
00357      * - t_cc_im Aries IM locking for B+-Tree indexes : experimental
00358      * - t_cc_modkvl Modified key-value locking: experimental
00359      * - t_cc_append Used internally \todo true?
00360      */
00361     enum concurrency_t {
00362         t_cc_bad,                // this is an illegal value
00363         t_cc_none,                // no locking
00364         t_cc_record,                // record-level
00365         t_cc_page,                // page-level
00366         t_cc_file,                // file-level
00367         t_cc_vol,
00368         t_cc_kvl,                // key-value
00369         t_cc_im,                 // ARIES IM, not supported yet
00370         t_cc_modkvl,                 // modified ARIES KVL, for paradise use
00371         t_cc_append                 // append-only with scan_file_i
00372     };
00373 
00374     /**\enum pg_policy_t 
00375      * \brief 
00376      * File-compaction policy for creating records.
00377      * \details
00378      * - t_append : append new record to file (preserve order)
00379      * - t_cache  : look in cache for pages with space for new record (does
00380      *              not preserve order)
00381      * - t_compact: keep file compact even if it means searching the file
00382      *              for space in which to create the file (does not preserve
00383      *              order)
00384      *
00385      * These are masks - the following combinations are sensible:
00386      *
00387      * - t_append                        -- preserve sort order
00388      * - t_cache | t_append              -- check the cache first, 
00389      *                                      append if no luck
00390      * - t_cache | t_compact | t_append  -- append to file as a last resort
00391      */
00392     enum pg_policy_t {
00393         t_append        = 0x01, // retain sort order (cache 0 pages)
00394         t_cache        = 0x02, // look in n cached pgs 
00395         t_compact        = 0x04 // scan file for space in pages 
00396         
00397     };
00398 
00399 /**\cond skip */
00400 
00401     /* 
00402      * smlevel_0::operating_mode is always set to 
00403      * ONE of these, but the function in_recovery() tests for
00404      * any of them, so we'll give them bit-mask values
00405      */
00406     enum operating_mode_t {
00407         t_not_started = 0, 
00408         t_in_analysis = 0x1,
00409         t_in_redo = 0x2,
00410         t_in_undo = 0x4,
00411         t_forward_processing = 0x8
00412     };
00413 
00414     static concurrency_t cc_alg;        // concurrency control algorithm
00415     static bool          cc_adaptive;        // is PS-AA (adaptive) algorithm used?
00416 
00417 #include "e_error_enum_gen.h"
00418 
00419     static const w_error_info_t error_info[];
00420     static void init_errorcodes();
00421 
00422     static void  add_to_global_stats(const sm_stats_info_t &from);
00423     static void  add_from_global_stats(sm_stats_info_t &to);
00424 
00425     static device_m* dev;
00426     static io_m* io;
00427     static bf_m* bf;
00428     static lock_m* lm;
00429 
00430     static log_m* log;
00431     static tid_t* redo_tid;
00432 
00433     static LOG_WARN_CALLBACK_FUNC log_warn_callback;
00434     static LOG_ARCHIVED_CALLBACK_FUNC log_archived_callback;
00435     static fileoff_t              log_warn_trigger; 
00436     static int                    log_warn_exceed_percent; 
00437 
00438     static int    dcommit_timeout; // to convey option to coordinator,
00439                                    // if it is created by VAS
00440 
00441     static ErrLog* errlog;
00442 
00443     static bool        shutdown_clean;
00444     static bool        shutting_down;
00445     static bool        logging_enabled;
00446     static bool        lock_caching_default;
00447     static bool        do_prefetch;
00448 
00449     static operating_mode_t operating_mode;
00450     static bool in_recovery() { 
00451         return ((operating_mode & 
00452                 (t_in_redo | t_in_undo | t_in_analysis)) !=0); }
00453     static bool in_recovery_analysis() { 
00454         return ((operating_mode & t_in_analysis) !=0); }
00455     static bool in_recovery_undo() { 
00456         return ((operating_mode & t_in_undo ) !=0); }
00457     static bool in_recovery_redo() { 
00458         return ((operating_mode & t_in_redo ) !=0); }
00459 
00460     // these variable are the default values for lock escalation counts
00461     static w_base_t::int4_t defaultLockEscalateToPageThreshold;
00462     static w_base_t::int4_t defaultLockEscalateToStoreThreshold;
00463     static w_base_t::int4_t defaultLockEscalateToVolumeThreshold;
00464 
00465     // These variables control the size of the log.
00466     static fileoff_t max_logsz; // max log file size
00467 
00468     // This variable controls checkpoint frequency.
00469     // Checkpoints are taken every chkpt_displacement bytes
00470     // written to the log.
00471     static fileoff_t chkpt_displacement;
00472 
00473     // The volume_format_version is used to test compatability
00474     // of software with a volume.  Whenever a change is made
00475     // to the SM software that makes it incompatible with
00476     // previouly formatted volumes, this volume number should
00477     // be incremented.  The value is set in sm.cpp.
00478     static w_base_t::uint4_t volume_format_version;
00479 
00480     // This is a zeroed page for use wherever initialized memory
00481     // is needed.
00482     static char zero_page[page_sz];
00483 
00484     // option for controlling background buffer flush thread
00485     static option_t* _backgroundflush;
00486 
00487 
00488     /*
00489      * Pre-defined store IDs -- see also vol.h
00490      * 0 -- is reserved for the extent map and the store map
00491      * 1 -- directory (see dir.cpp)
00492      * 2 -- root index (see sm.cpp)
00493      */
00494     enum {
00495         store_id_extentmap = 0,
00496         store_id_directory = 1,
00497         store_id_root_index = 2 
00498     };
00499 
00500     enum {
00501             eINTERNAL = fcINTERNAL,
00502             eOS = fcOS,
00503             eOUTOFMEMORY = fcOUTOFMEMORY,
00504             eNOTFOUND = fcNOTFOUND,
00505             eNOTIMPLEMENTED = fcNOTIMPLEMENTED
00506     };
00507 
00508     enum store_flag_t {
00509         // NB: this had better match sm_store_property_t (sm_int_3.h) !!!
00510         // or at least be convted properly every time we come through the API
00511         st_bad            = 0x0,
00512         st_regular        = 0x01, // fully logged
00513         st_tmp            = 0x02, // space logging only, 
00514                                   // file destroy on dismount/restart
00515         st_load_file      = 0x04, // not stored in the stnode_t, 
00516                             // only passed down to
00517                             // io_m and then converted to tmp and added to the
00518                             // list of load files for the xct.
00519                             // no longer needed
00520         st_insert_file     = 0x08,        // stored in stnode, but not on page.
00521                             // new pages are saved as tmp, old pages as regular.
00522         st_empty           = 0x100 // store might be empty - used ONLY
00523                             // as a function argument, NOT stored
00524                             // persistently.  Nevertheless, it's
00525                             // defined here to be sure that if other
00526                             // store flags are added, this doesn't
00527                             // conflict with them.
00528     };
00529 
00530     /* 
00531      * for use by set_store_deleting_log; 
00532      * type of operation to perform on the stnode 
00533      */
00534     enum store_operation_t {
00535             t_delete_store, 
00536             t_create_store, 
00537             t_set_deleting, 
00538             t_set_store_flags, 
00539             t_set_first_ext};
00540 
00541     enum store_deleting_t  {
00542             t_not_deleting_store = 0,  // must be 0: code assumes it
00543             t_deleting_store, 
00544             t_store_freeing_exts, 
00545             t_unknown_deleting};
00546 /**\endcond skip */
00547 };
00548 
00549 /**\cond skip */
00550 ostream&
00551 operator<<(ostream& o, smlevel_0::store_flag_t flag);
00552 
00553 ostream&
00554 operator<<(ostream& o, const smlevel_0::store_operation_t op);
00555 
00556 ostream&
00557 operator<<(ostream& o, const smlevel_0::store_deleting_t value);
00558 
00559 /**\endcond skip */
00560 
00561 /*<std-footer incl-file-exclusion='SM_BASE_H'>  -- do not edit anything below this line -- */
00562 
00563 #endif          /*</std-footer>*/

Generated on Mon Jan 2 15:13:57 2012 for Shore Storage Manager by  doxygen 1.4.7