00001 /* -*- mode:C++; c-basic-offset:4 -*- 00002 Shore-MT -- Multi-threaded port of the SHORE storage manager 00003 00004 Copyright (c) 2007-2009 00005 Data Intensive Applications and Systems Labaratory (DIAS) 00006 Ecole Polytechnique Federale de Lausanne 00007 00008 All Rights Reserved. 00009 00010 Permission to use, copy, modify and distribute this software and 00011 its documentation is hereby granted, provided that both the 00012 copyright notice and this permission notice appear in all copies of 00013 the software, derivative works or modified versions, and any 00014 portions thereof, and that both notices appear in supporting 00015 documentation. 00016 00017 This code is distributed in the hope that it will be useful, but 00018 WITHOUT ANY WARRANTY; without even the implied warranty of 00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS 00020 DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER 00021 RESULTING FROM THE USE OF THIS SOFTWARE. 00022 */ 00023 00024 /*<std-header orig-src='shore' incl-file-exclusion='PIN_H'> 00025 00026 $Id: pin.h,v 1.92 2010/08/23 14:28:18 nhall Exp $ 00027 00028 SHORE -- Scalable Heterogeneous Object REpository 00029 00030 Copyright (c) 1994-99 Computer Sciences Department, University of 00031 Wisconsin -- Madison 00032 All Rights Reserved. 00033 00034 Permission to use, copy, modify and distribute this software and its 00035 documentation is hereby granted, provided that both the copyright 00036 notice and this permission notice appear in all copies of the 00037 software, derivative works or modified versions, and any portions 00038 thereof, and that both notices appear in supporting documentation. 00039 00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY 00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS 00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND 00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 00044 00045 This software was developed with support by the Advanced Research 00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by 00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518. 00048 Further funding for this work was provided by DARPA through 00049 Rome Research Laboratory Contract No. F30602-97-2-0247. 00050 00051 */ 00052 00053 #ifndef PIN_H 00054 #define PIN_H 00055 00056 #include "w_defines.h" 00057 00058 /* -- do not edit anything above this line -- </std-header>*/ 00059 00060 #ifdef __GNUG__ 00061 #pragma interface 00062 #endif 00063 00064 #ifndef FILE_S_H 00065 #include <file_s.h> 00066 #endif /* FILE_S_H */ 00067 00068 #include <page_alias.h> 00069 00070 /* DOXYGEN Documentation */ 00071 /**\addtogroup SSMPIN 00072 * You may pin (force to remain in the buffer pool at a fixed location) 00073 * portions (no larger than a page) of a record for short periods of time 00074 * while you operate on them. You may step through a large record pinning 00075 * a sequence of such portions. 00076 * 00077 * \b Use \b of \b the \b pin_i \b requires \b care. 00078 * \b Take \b care \b to \b observe \b the \b following \b constraints: 00079 * 00080 * - You may not operate on the in-buffer-pool copy directly, as the 00081 * only storage manager knows the format of these data. You may 00082 * operate on these pinned data through the class pin_i. 00083 * 00084 * - Do not hold page latches (keep a page pinned) for 00085 * long periods (while a thread sleeps, awaits I/O, or otherwise 00086 * blocks long-term. Operating system scheduling 00087 * of threads is not under your control for this purpose). 00088 * Latches are meant to be short-term. Holding a latch for 00089 * a long time interferes with other aspects of the storage manager, 00090 * including buffer-pool cleaning. 00091 * 00092 * - A latch is held by a thread, not by a transaction. 00093 * Under no circumstances can a pin_i be passed from thread to thread. 00094 * 00095 * - It is dangerous to operate on a record through the static 00096 * storage manager methods (such as append_rec) while holding records pinned 00097 * through pin_i. This can lead to invalid pin_i with undefined results 00098 * when they are used; doing so with concurrent threads in a single transaction 00099 * can lead to undetectable deadlocks (latch-latch deadlocks, for example). 00100 */ 00101 00102 00103 class file_p; 00104 class lgdata_p; 00105 class record_t; 00106 00107 /*********************************************************************** 00108 The pin_i class (located in pin.h) is used to 00109 You mahn pin ranges of bytes in 00110 a record. The amount pinned (of the record body) can be determined 00111 with the start_byte() and length() functions. Access to the pinned 00112 region is via the body() function. The header is always pinned if 00113 any region is pinned and can be accessed via hdr(). 00114 00115 next_bytes() is used to get access to the next pinnable 00116 region of the record. 00117 00118 ~pin_i() will unpin the record. Pin() and unpin() can also 00119 be used to change which record is pinned (pin() will unpin the 00120 currently pinned record()). 00121 00122 For large records, data pages will not actually be pinned until 00123 body() is called. Therefore, to just read record 00124 headers, pinning with start 0 will not cause any additional IO. 00125 00126 The repin function efficiently re-pins a previously unpinned record 00127 and efficiently repins a record even while it is pinned. This is 00128 useful after append_rec and truncate_rec calls to repin the record 00129 since its location may have changed. 00130 00131 NOTE ON LOCK MODE PARAMETERS: 00132 The pin_i, pin, repin functions all take a lock mode parameter 00133 that specifies how the record should initially be locked. The 00134 options are SH and EX. EX should be used when 00135 the pinned record will be eventually updated (through update_rec, 00136 unpdate_rec_hdr, append_rec, or truncate_rec). Using EX in these 00137 cases will improve performance and reduce the risk of deadlock, 00138 but is not necessary for correctness. 00139 00140 WARNING: 00141 The pin_i structure for a pinned record is no longer valid after 00142 any append, truncate, create operation for ANY record on the page 00143 that is pinned. To enforce this a debugging check is made that 00144 compares the page's current lsn with its value when the record was 00145 pinned. Therefore, update_rec calls must also have a repin call 00146 performed. 00147 00148 For efficiency (to avoid repinning), the 00149 ss_m::update_rec and ss_m::update_rec_hdr functions are also 00150 provided by pin_i. These can be called on any pinned record 00151 regardless of where and how much is pinned. If a pin_i was 00152 previously pinned and then upinned, a call to 00153 pin_i::update_rec[_hdr] will temporarily repin the record and then 00154 unpin it. Therefore, after any pin_i call that updates the record, 00155 the state of the pin_i (either pinned or not) remains the same. 00156 00157 **********************************************************************/ 00158 /**\brief Pin records in the buffer pool and operate on them. 00159 * \ingroup SSMPIN 00160 * \details 00161 * Certain operations on the records referenced by a pin_i may invalidate 00162 * the pin_i. For example, if you pin a record, then truncate it or 00163 * append to it while holding a pin_i, the pin_i must be considered 00164 * invalidated because appending to the record might necessarily require 00165 * moving it. 00166 * 00167 * The pin functions take a lock mode parameter that tells the 00168 * storage manager how to lock the record initially. 00169 * The options are SH and EX. 00170 * EX should be used when the pinned record will be 00171 * updated (through update_rec, unpdate_rec_hdr, append_rec, 00172 * or truncate_rec). 00173 * Using EX in these cases will improve performance and 00174 * reduce the risk of deadlock, but it is not necessary for correctness. 00175 * 00176 * If you pin with SH, and subsequently modify the record through pin_i, 00177 * the pin_i method(s) will 00178 * upgrade locks as necessary to maintain ACID properties. 00179 * 00180 * These methods will not perform needless unfix/refix operations: you 00181 * may pin many small records on the same page in sequence and avoid 00182 * unfixing the page between pins. 00183 */ 00184 class pin_i : public smlevel_top { 00185 friend class scan_file_i; 00186 public: 00187 /**\cond skip */ 00188 enum flags_t { 00189 pin_empty = 0x0, 00190 pin_rec_pinned = 0x01, 00191 pin_hdr_only = 0x02, 00192 pin_separate_data = 0x04, 00193 pin_lg_data_pinned = 0x08 // large data page is pinned 00194 }; 00195 /**\endcond skip */ 00196 00197 /// Constructor. Does not pin anything until pin() is called. 00198 NORET pin_i() {_init_constructor();} 00199 00200 /// Destructor. Unpins anything currently pinned. 00201 NORET ~pin_i(); 00202 00203 // These methods pin portions of a record beginning at start 00204 // the actual location pinned (returned by start_byte), may 00205 // be <= start. 00206 // (They are smart enough not to unfix/refix the page 00207 // if the prior state has a record pinned on the same page 00208 // as the indicated record.) 00209 // 00210 /**\brief Pin a portion of the record starting at a given location. 00211 * \details 00212 * @param[in] rid ID of the record of interest 00213 * @param[in] start Offset of the first byte of interest. 00214 * @param[in] lmode Lock mode to use. 00215 * Pin the page containing the first byte of interest. 00216 * A record lock in the given mode is acquired (if it is not 00217 * already subsumed by a coarser lock or by a higher lock mode). 00218 * 00219 * Only the slotted page containing the record header is fixed at 00220 * this point. Its latch mode is inferred from the lock mode. 00221 * If any part of the record is pinned, the slotted 00222 * page containing the header is also fixed. 00223 * Thus, if the record is large (or very large), data pages won't 00224 * be fixed in the buffer pool until the body() method is called. 00225 */ 00226 rc_t pin( 00227 const rid_t & rid, 00228 smsize_t start, 00229 lock_mode_t lmode = SH); 00230 00231 /**\brief Pin a portion of the record starting at a given location. 00232 * \details 00233 * Pin a record with the given lock mode and latch mode. 00234 * See pin(rid, start, lock_mode); 00235 */ 00236 rc_t pin( 00237 const rid_t & rid, 00238 smsize_t start, 00239 lock_mode_t lock_mode, 00240 latch_mode_t latch_mode); 00241 00242 /**\brief Unpin whatever record was pinned. */ 00243 void unpin(); 00244 00245 /**\brief True if the running thread owns this pin_i */ 00246 bool is_mine() const; // only if owning thread 00247 00248 /**\brief 00249 * Set the reference bit to use for the buffer frame containing 00250 * the pinned body page when the page is unpinned. 00251 * \details 00252 * @param[in] value 0 or greater. 00253 * A value of 0 is a "hate" hint indicating that 00254 * the frame can be reused as soon as necessary. 00255 * By default, a value of 1 is used indicating the page will be cached 00256 * until at least 1 sweep of the buffer clock hand has passed. 00257 * Higher values cause the page to remain cached longer. 00258 */ 00259 void set_ref_bit(int value); 00260 00261 /**\brief Efficiently repin a record after is size has changed or 00262 * after it has been unpinned. 00263 * \details 00264 * @param[in] lmode SH or EX 00265 */ 00266 rc_t repin(lock_mode_t lmode = SH); 00267 00268 00269 /**\brief Pin the next range of bytes in the record 00270 * \details 00271 * @param[out] eof Set to true if there are no more bytes to pin. 00272 * When eof is reached, the previously pinned range remains pinned. 00273 */ 00274 rc_t next_bytes(bool& eof); 00275 00276 /**\brief True if something currently pinned. */ 00277 bool pinned() const { return _flags & pin_rec_pinned; } 00278 00279 /**\brief True if the entire record pinned. */ 00280 bool pinned_all() const 00281 { return pinned() && _start==0 && _len==body_size();} 00282 00283 /**\brief True if record is pinned and the pin_i is valid 00284 * \details 00285 * The pin_i is valid if it is up-to-date with the LSN on 00286 * the page. In other words, use this to verify that the page has not been 00287 * updated since it was pinned by this pin_i 00288 */ 00289 bool up_to_date() const 00290 { return pinned() && (_hdr_lsn == _get_hdr_lsn());} 00291 00292 /**\brief Return the byte-offset (within the record) the of the pinned portion */ 00293 smsize_t start_byte() const { _check_lsn(); return _start;} 00294 /**\brief Return the length of the pinned portion */ 00295 smsize_t length() const { _check_lsn(); return _len;} 00296 /**\brief Return the size of the pinned record's header */ 00297 smsize_t hdr_size() const { _check_lsn(); return _rec->hdr_size();} 00298 /**\brief Return the size of the pinned record's body */ 00299 smsize_t body_size() const { _check_lsn(); return _rec->body_size();} 00300 /**\brief True if the record is too large to fit on a file page */ 00301 bool is_large() const { _check_lsn(); return _rec->is_large();} 00302 /**\brief True if the record is small enough to fit on a file page */ 00303 bool is_small() const { _check_lsn(); return _rec->is_small();} 00304 /**\brief The kind of large-record implementation used for the pinned 00305 * record 00306 * \details 00307 * Values returned are: 0, 1, or 2. 00308 * - 0 means "large" : ~8KB - ~21 GB 00309 * - 1 means "1-level index" - up to ~16.GB 00310 * - 2 means "2-level index" - up to ~33 GB 00311 * - 3-level and deeper indexes are not supported. 00312 */ 00313 int large_impl() const { _check_lsn(); return _rec->large_impl();} 00314 00315 /**\brief Return the record ID of the pinned record */ 00316 const rid_t& rid() const {_check_lsn(); return _rid;} 00317 00318 /**\brief Return a pointer to the pinned record's header in the buffer pool. 00319 * \details 00320 * \attention 00321 * Do NOT update anything directly in the buffer pool. This returns a 00322 * const string because it is for the purpose of reading or copy-out. 00323 */ 00324 const char* hdr() const 00325 { _check_lsn(); return pinned() ? _rec->hdr() : 0;} 00326 00327 /**\brief Return a pointer into the pinned-record-portion in the buffer pool. 00328 * \details 00329 * \attention 00330 * Do NOT update anything directly in the buffer pool. This returns a 00331 * const string because it is for the purpose of reading or copy-out. 00332 */ 00333 const char* body(); 00334 00335 // These record update functions duplicate those in class ss_m 00336 // and are more efficient. They can be called on any pinned record 00337 // regardless of where and how much is pinned. 00338 /**\brief Overwrite a portion of the pinned record with new data. 00339 * \details 00340 * @param[in] start The offset from the beginning of the record of the 00341 * place to perform the update. 00342 * @param[in] data A vector containing the data to place in the record 00343 * at location \a start. 00344 * @param[out] old_value deprecated 00345 * The portion of the record containing the start byte need not 00346 * be pinned before this is called. 00347 */ 00348 rc_t update_rec(smsize_t start, const vec_t& data, int* old_value = 0 00349 #ifdef SM_DORA 00350 , const bool bIgnoreLocks = false 00351 #endif 00352 ); 00353 00354 /**\brief Update the pinned record's header. 00355 * \details 00356 * @param[in] start The offset from the beginning of the header of the 00357 * place to perform the update. 00358 * @param[in] hdr A vector containing the data to place in the header 00359 * at location \a start. 00360 */ 00361 rc_t update_rec_hdr(smsize_t start, const vec_t& hdr 00362 #ifdef SM_DORA 00363 , const bool bIgnoreLocks = false 00364 #endif 00365 ); 00366 00367 /**\brief Append to a pinned record. 00368 * \details 00369 * @param[in] data A vector containing the data to append to the record's 00370 * body. 00371 * The end of the record need not be pinned before this is called. 00372 */ 00373 rc_t append_rec(const vec_t& data); 00374 00375 /**\brief Shorten a record. 00376 * \details 00377 * @param[in] amount Number of bytes to chop off the end of the 00378 * pinned record's body. 00379 * The end of the record need not be pinned before this is called. 00380 */ 00381 rc_t truncate_rec(smsize_t amount); 00382 00383 const record_t* rec() const { _check_lsn(); return _rec;} 00384 00385 /**\brief Return a pointer to the page containing the record. 00386 * \details 00387 * This allows you to read the entire page. 00388 * \attention 00389 * Do NOT update anything directly in the buffer pool. This returns a 00390 * const string because it is for the purpose of reading or copy-out. 00391 */ 00392 const char* hdr_page_data(); 00393 00394 /**\brief Return the ID of the page containing the given byte of the record 00395 * \details 00396 * @param[in] offset The offset from the beginning of the record of the 00397 * byte of interest 00398 * @param[out] start_byte The offset from the beginning of the page of 00399 * the byte of interest 00400 * \return The page ID of the page containing the 00401 * byte of interest. 00402 */ 00403 lpid_t page_containing(smsize_t offset, smsize_t& start_byte) const; 00404 00405 private: 00406 00407 void _init_constructor(); // companion to constructor 00408 00409 rc_t _pin_data(); 00410 00411 const char* _body_large(); 00412 00413 rc_t _pin(const rid_t &rid, smsize_t start, lock_mode_t m, 00414 latch_mode_t l); 00415 00416 rc_t _pin(const rid_t &rid, smsize_t start, lock_mode_t m); 00417 00418 rc_t _repin(lock_mode_t lmode, int* old_value = 0 00419 #ifdef SM_DORA 00420 , const bool bIgnoreLocks = false 00421 #endif 00422 ); 00423 00424 file_p* _get_hdr_page_no_lsn_check() const { 00425 return pinned() ? &_hdr_page() : 0;} 00426 file_p* _get_hdr_page() const { 00427 _check_lsn(); return _get_hdr_page_no_lsn_check();} 00428 00429 // NOTE: if the _check_lsn assert fails, it usually indicates that 00430 // you tried to access a pinned record after having updated the 00431 // page, but before calling repin. 00432 // The _set_lsn() function is used to reset the lsn to the page's 00433 // new value, after an update operation. 00434 // 00435 #if W_DEBUG_LEVEL > 1 00436 void _check_lsn() const {w_assert2(up_to_date());} 00437 // these are in scan.cpp and pin.cpp respectively 00438 // so that pin.h is #include-able by client code, and 00439 // these are inlined in those files that use them. 00440 void _set_lsn(); 00441 void _set_lsn_for_scan(); 00442 #else 00443 void _check_lsn() const {} 00444 void _set_lsn() {} 00445 void _set_lsn_for_scan() {} 00446 #endif 00447 00448 const lsn_t& _get_hdr_lsn() const; 00449 00450 rid_t _rid; 00451 smsize_t _len; 00452 smsize_t _start; 00453 record_t* _rec; 00454 w_base_t::uint4_t _flags; // this cannot be flags_t since it uses 00455 // | to generate new flags not in the enum 00456 // _hdr_lsn is used to record the lsn on the page when 00457 // the page is pinned. When compiled with -DDEBUG, all pin_i 00458 // operations check that the hdr page's _lsn1 has not changed 00459 // (ie. to verify that the pinned record has not moved) 00460 lsn_t _hdr_lsn; 00461 lock_mode_t _lmode; // current locked state 00462 00463 /* 00464 * Originally pin_i contained the _hdr_page and _hdr_page data 00465 * members commented out below. This required that users #include 00466 * sm_int.h (ie. the whole world), generating large .o's. 00467 * Instead, we have the corresponding "alias" byte arrays and 00468 * member functions which cast these to the correct page type. 00469 * Only pin.cpp uses these functions. This greatly reduces the 00470 * number of .h files users need to include. 00471 * 00472 * Asserts in pin_i constructors verify that the _alias members 00473 * are large enough to hold file_p and lgdata_p. 00474 */ 00475 //file_p _hdr_page; 00476 //lgdata_p _data_page; 00477 file_p& _hdr_page() const; 00478 lgdata_p& _data_page() const; 00479 /* see comment above 4 reason 4 alias */ 00480 char _hdr_page_alias[PAGE_ALIAS_FILE]; 00481 char _data_page_alias[PAGE_ALIAS_LGDATA]; 00482 00483 // disable 00484 NORET pin_i(const pin_i&); 00485 NORET pin_i& operator=(const pin_i&); 00486 00487 public: 00488 /**\cond skip */ 00489 // Put inside pin_i only for the purpose of namescoping. 00490 static latch_mode_t lock_to_latch(lock_mode_t m); 00491 /**\endcond skip */ 00492 }; 00493 00494 /**\cond skip */ 00495 inline latch_mode_t pin_i::lock_to_latch(lock_mode_t m) { 00496 switch(m) { 00497 case SH: 00498 case UD: 00499 case NL: 00500 return LATCH_SH; 00501 case EX: 00502 return LATCH_EX; 00503 00504 default: 00505 W_FATAL(smlevel_0::eNOTIMPLEMENTED); 00506 } 00507 return LATCH_NL; // never gets here 00508 } 00509 /**\endcond skip */ 00510 00511 /*<std-footer incl-file-exclusion='PIN_H'> -- do not edit anything below this line -- */ 00512 00513 #endif /*</std-footer>*/