00001 /* -*- mode:C++; c-basic-offset:4 -*- 00002 Shore-MT -- Multi-threaded port of the SHORE storage manager 00003 00004 Copyright (c) 2007-2009 00005 Data Intensive Applications and Systems Labaratory (DIAS) 00006 Ecole Polytechnique Federale de Lausanne 00007 00008 All Rights Reserved. 00009 00010 Permission to use, copy, modify and distribute this software and 00011 its documentation is hereby granted, provided that both the 00012 copyright notice and this permission notice appear in all copies of 00013 the software, derivative works or modified versions, and any 00014 portions thereof, and that both notices appear in supporting 00015 documentation. 00016 00017 This code is distributed in the hope that it will be useful, but 00018 WITHOUT ANY WARRANTY; without even the implied warranty of 00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS 00020 DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER 00021 RESULTING FROM THE USE OF THIS SOFTWARE. 00022 */ 00023 00024 /*<std-header orig-src='shore' incl-file-exclusion='SCAN_H'> 00025 00026 $Id: scan.h,v 1.94 2010/12/08 17:37:43 nhall Exp $ 00027 00028 SHORE -- Scalable Heterogeneous Object REpository 00029 00030 Copyright (c) 1994-99 Computer Sciences Department, University of 00031 Wisconsin -- Madison 00032 All Rights Reserved. 00033 00034 Permission to use, copy, modify and distribute this software and its 00035 documentation is hereby granted, provided that both the copyright 00036 notice and this permission notice appear in all copies of the 00037 software, derivative works or modified versions, and any portions 00038 thereof, and that both notices appear in supporting documentation. 00039 00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY 00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS 00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND 00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 00044 00045 This software was developed with support by the Advanced Research 00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by 00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518. 00048 Further funding for this work was provided by DARPA through 00049 Rome Research Laboratory Contract No. F30602-97-2-0247. 00050 00051 */ 00052 00053 #ifndef SCAN_H 00054 #define SCAN_H 00055 00056 #include "w_defines.h" 00057 00058 /* -- do not edit anything above this line -- </std-header>*/ 00059 00060 #ifdef __GNUG__ 00061 #pragma interface 00062 #endif 00063 00064 #ifndef XCT_DEPENDENT_H 00065 #include <xct_dependent.h> 00066 #endif /* XCT_DEPENDENT_H */ 00067 00068 #include <page_alias.h> 00069 00070 /**\addtogroup SSMSCAN 00071 * Scans can be performed on B+tree and R-tree indexes and on files 00072 * of records. Iterator classes scan_index_i, scan_rt_i and scan_file_i are 00073 * used for Btree's, Rtree's and files, respectively. 00074 * 00075 * Scans begin by creating an iterator to specify what 00076 * to scan and the range of the scan. 00077 * 00078 * An iterator's next() function is used to retrieve values from 00079 * the scan (including the first). Next() will set the eof 00080 * parameter to true only when no value can be retrieved for the 00081 * current call, so 00082 * if a file contains 2 records and next() has been called 00083 * twice, eof will return false on the first and second calls, but 00084 * true on the third. 00085 * 00086 * The eof() function reports the value of eof returned 00087 * from the \b last call to next(). This means you might find 00088 * eof to be false, call next(), and get nothing back from next() 00089 * (eof is now true). 00090 * 00091 * The scan destructor frees and un-fixes (un-pins) all resources 00092 * used by the scan. 00093 */ 00094 /**\addtogrup SSMSCAN 00095 * 00096 * The finish() function frees (and un-fixes) resources used 00097 * by the scan. finish() is called by the destructor, 00098 * but it is sometimes useful to call it early for scan objects 00099 * on the stack when the destructor might not be called until 00100 * the end of the function. 00101 * 00102 * All of the scan constructors have a concurrency control 00103 * parameter. This controls the granularity of locking 00104 * done by the scan. The possible values of cc are: 00105 * 00106 * - t_cc_none: IS lock the file/index and 00107 * obtain no other locks during the scan 00108 * - t_cc_kvl: IS lock the index and obtain SH key-value locks 00109 * on every entry as encountered; used only for btrees (scan_index_i) 00110 * - t_cc_record:IS lock the file and obtain SH locks on every 00111 * record as encountered 00112 * - t_cc_page: IS lock the file/index and obtain SH locks on pages 00113 * (leaf pages, used only for rtrees) 00114 * - t_cc_file: SH lock the file/index 00115 * 00116 */ 00117 00118 class bt_cursor_t; 00119 00120 /**\brief Iterator over an index. 00121 * \details 00122 * \ingroup SSMSCANI 00123 * To iterate over the {key,value} pairs in an index, 00124 * construct an instance of this class, 00125 * and use its next() method to advance the cursor and the curr() method 00126 * to copy out keys and values into server-space. 00127 * It is unwise to delete or insert associations while you have a scan open on 00128 * the index (in the same transaction). 00129 * 00130 * Example code: 00131 * \code 00132 * stid_t fid(1,7); 00133 * scan_index_i scan(fid, 00134 scan_index_i::ge, vec_t::neg_inf, 00135 scan_index_i::le, vec_t::pos_inf, false, 00136 ss_m::t_cc_kvl); 00137 * bool eof(false); 00138 * do { 00139 * w_rc_t rc = scan.next(eof); 00140 * if(rc.is_error()) { 00141 * // handle error 00142 * ... 00143 * } 00144 * if(eof) break; 00145 * 00146 * // get the key len and element len 00147 * W_DO(scan.curr(NULL, klen, NULL, elen)); 00148 * 00149 * // Create vectors for the given lengths. 00150 * vec_t key(keybuf, klen); 00151 * vec_t elem(&info, elen); 00152 * 00153 * // Get the key and element value 00154 * W_DO(scan.curr(&key, klen, &elem, elen)); 00155 * ... 00156 * } while (1); 00157 * \endcode 00158 * 00159 */ 00160 class scan_index_i : public smlevel_top, public xct_dependent_t { 00161 public: 00162 /**\brief Construct an iterator. 00163 * \details 00164 * @param[in] stid ID of the B-Tree to be scanned. 00165 * @param[in] c1 Comparison type to be used in the scan for 00166 * lower bound comparison : 00167 * eq, gt, ge, lt, le 00168 * @param[in] bound1 Lower bound 00169 * @param[in] c2 Comparison type to be used in the scan for 00170 * upper bound comparison : 00171 * eq, gt, ge, lt, le 00172 * @param[in] bound2 Upper bound 00173 * @param[in] include_nulls If true, we will consider null keys 00174 * as satisfying the condition. 00175 * @param[in] cc Can be used to override the concurrency_t stored in 00176 * the store descriptor (see ss_m::get_store_info and sm_store_info_t)) 00177 * in a few situations. 00178 * @param[in] mode Can be used to override the mode in which the 00179 * store is locked. 00180 * 00181 * The following table describes the way these two values are used. 00182 * The columns represent the values of this parameter, cc. 00183 * The rows represent the value with which the store was created, 00184 * and may be determined with ss_m::get_store_info. 00185 * The table entries tell what kinds of locks are acquired on the store 00186 * and on the key-value pairs. 00187 * \verbatim 00188 cc | t_cc_none | t_cc_modkvl | t_cc_im | t_cc_kvl | t_cc_file 00189 Created | 00190 ------------------------------------------------------------------- 00191 t_cc_none | IS/none | IS/modkvl | IS/im | IS/kvl | SH/none 00192 ------------------------------------------------------------------- 00193 t_cc_modkvl| IS/none | IS/none | IS/modkvl | IS/modkvl | SH/none 00194 ------------------------------------------------------------------- 00195 t_cc_im | IS/im | IS/im | IS/im | IS/im | SH/file 00196 ------------------------------------------------------------------- 00197 t_cc_kvl | IS/kvl | IS/kvl | IS/kvl | IS/kvl | SH/file 00198 ------------------------------------------------------------------- 00199 t_cc_file | error | error | error | error | SH/none 00200 ------------------------------------------------------------------- 00201 \endverbatim 00202 * 00203 * \anchor MODKVL 00204 * The protocol t_cc_modkvl is a modified key-value locking protocol 00205 * created for the Paradise project, and behaves as follows: 00206 * -only allow "eq" comparsons : bound1 : eq X and boudn2: eq X 00207 * -grabs an SH lock on the bound X, whether or not the index is unique 00208 * 00209 * \anchor IM 00210 * The protocol t_cc_im treats the value part of a key-value pair as 00211 * a record id, and it acquires a lock on the record rather than on 00212 * they key-value pair. Otherwise it is like normal key-value locking. 00213 * 00214 * Although they are called "lower bound" and "upper bound", 00215 * technically the two conditions can be reversed, for example, 00216 * c1= le, bound1= vec_t::pos_inf 00217 * and 00218 * c2= ge, bound2= vec_t::neg_inf. 00219 */ 00220 NORET scan_index_i( 00221 const stid_t& stid, 00222 cmp_t c1, 00223 const cvec_t& bound1, 00224 cmp_t c2, 00225 const cvec_t& bound2, 00226 bool include_nulls = false, 00227 concurrency_t cc = t_cc_kvl, 00228 lock_mode_t mode = SH 00229 ); 00230 00231 00232 NORET ~scan_index_i(); 00233 00234 /**brief Get the key and value where the cursor points. 00235 * 00236 * @param[out] key Pointer to vector supplied by caller. curr() writes into this vector. 00237 * A null pointer indicates that the caller is not interested in the key. 00238 * @param[out] klen Pointer to sm_size_t variable. Length of key is written here. 00239 * @param[out] el Pointer to vector supplied by caller. curr() writes into this vector. 00240 * A null pointer indicates that the caller is not interested in the value. 00241 * @param[out] elen Pointer to sm_size_t variable. Length of value is written here. 00242 */ 00243 rc_t curr( 00244 vec_t* key, 00245 smsize_t& klen, 00246 vec_t* el, 00247 smsize_t& elen) { 00248 return _fetch(key, &klen, el, &elen, false); 00249 } 00250 00251 /**brief Move to the next key-value pair in the index. 00252 * 00253 * @param[out] eof True is returned if there are no more pairs in the index. 00254 * If false, curr() may be called. 00255 */ 00256 rc_t next(bool& eof) { 00257 rc_t rc = _fetch(0, 0, 0, 0, true); 00258 eof = _eof; 00259 return rc.reset(); 00260 } 00261 00262 /// Free the resources used by this iterator. Called by desctructor if 00263 /// necessary. 00264 void finish(); 00265 00266 /// If false, curr() may be called. 00267 bool eof() { return _eof; } 00268 00269 /// If false, curr() may be called. 00270 tid_t xid() const { return tid; } 00271 ndx_t ndx() const { return ntype; } 00272 const rc_t & error_code() const { return _error_occurred; } 00273 00274 private: 00275 stid_t _stid; 00276 tid_t tid; 00277 ndx_t ntype; 00278 cmp_t cond2; 00279 bool _eof; 00280 00281 w_rc_t _error_occurred; 00282 bt_cursor_t* _btcursor; 00283 bool _finished; 00284 bool _skip_nulls; 00285 concurrency_t _cc; 00286 lock_mode_t _mode; 00287 00288 rc_t _fetch( 00289 vec_t* key, 00290 smsize_t* klen, 00291 vec_t* el, 00292 smsize_t* elen, 00293 bool skip); 00294 00295 void _init( 00296 cmp_t cond, 00297 const cvec_t& bound, 00298 cmp_t c2, 00299 const cvec_t& b2, 00300 lock_mode_t mode = SH); 00301 00302 void xct_state_changed( 00303 xct_state_t old_state, 00304 xct_state_t new_state); 00305 00306 // disabled 00307 NORET scan_index_i(const scan_index_i&); 00308 scan_index_i& operator=(const scan_index_i&); 00309 }; 00310 00311 00312 // R-Tree Scanning 00313 class rt_cursor_t; 00314 00315 /**\brief Iterator for scanning an R*-Tree. 00316 *\ingroup SSMSCANRT 00317 *\details 00318 * To iterate over the {key,value} pairs in a spatial index, 00319 * construct an instance of this class, 00320 * and use its next() method to advance the cursor and the curr() method 00321 * to copy out keys and values into server-space. 00322 * It is unwise to delete or insert associations while you have a scan open on 00323 * the index (in the same transaction). 00324 * 00325 *\sa SSMRTREE 00326 */ 00327 class scan_rt_i : public smlevel_top, public xct_dependent_t { 00328 public: 00329 00330 /// Store id of the R-Tree. 00331 stid_t stid; 00332 /// Transaction ID of the transaction that initialized this iterator. 00333 tid_t tid; 00334 /// Type of this index. 00335 ndx_t ntype; 00336 00337 /**\brief Construct an iterator. 00338 * \details 00339 * @param[in] stid ID of the R-Tree to be scanned. 00340 * @param[in] c Comparison type to be used in the scan : t_exact, 00341 * t_overlap, t_cover, t_inside. 00342 * @param[in] box Box to reference for the comparison above. 00343 * @param[in] include_nulls If true, we will consider null keys 00344 * as satisfying the condition. 00345 * @param[in] cc Must be t_cc_none, t_cc_page or t_cc_file. 00346 * In the first two cases, an IS lock is acquired; in the 00347 * last, an SH lock is acquired; this lock applies to the 00348 * entire index. 00349 */ 00350 NORET scan_rt_i( 00351 const stid_t& stid, 00352 nbox_t::sob_cmp_t c, 00353 const nbox_t& box, 00354 bool include_nulls = false, 00355 concurrency_t cc = t_cc_page); 00356 00357 NORET ~scan_rt_i(); 00358 00359 rc_t next( 00360 nbox_t& key, 00361 void* el, 00362 smsize_t& elen, 00363 bool& eof) { 00364 return _fetch(key, el, elen, eof, true); 00365 } 00366 00367 /* 00368 curr(nbox_t& key, void* el, smsize_t& elen, bool& eof) { 00369 return _fetch(key, el, elen, eof, false); 00370 } 00371 */ 00372 void finish(); 00373 00374 bool eof() { return _eof; } 00375 const rc_t & error_code() const { return _error_occurred; } 00376 private: 00377 bool _eof; 00378 rc_t _error_occurred; 00379 rt_cursor_t* _cursor; 00380 bool _finished; 00381 bool _skip_nulls; 00382 concurrency_t _cc; 00383 00384 rc_t _fetch( 00385 nbox_t& key, 00386 void* el, 00387 smsize_t& elen, 00388 bool& eof, 00389 bool skip); 00390 void _init( 00391 nbox_t::sob_cmp_t c, 00392 const nbox_t& qbox); 00393 00394 void xct_state_changed( 00395 xct_state_t old_state, 00396 xct_state_t new_state); 00397 00398 // disabled 00399 NORET scan_rt_i(const scan_rt_i&); 00400 scan_rt_i& operator=(const scan_rt_i&); 00401 }; 00402 00403 class bf_prefetch_thread_t; 00404 00405 00406 /** \brief Iterator over a file of records. 00407 * \ingroup SSMSCANF 00408 * \details 00409 * To iterate over the records in a file, construct an instance of this class, 00410 * and use its next, next_page and eof methods to located the 00411 * records in the file. 00412 * The methods next and next_page return a pin_i, 00413 * which lets you manipulate the record (pin it in the buffer pool while you 00414 * use it). 00415 * It is unwise to delete or insert records while you have a scan open on 00416 * the file (in the same transaction). 00417 * 00418 * \code 00419 * stid_t fid(1,7); 00420 * scan_file_i scan(fid); 00421 * pin_i* cursor(NULL); 00422 * bool eof(false); 00423 * do { 00424 * w_rc_t rc = scan.next(cursor, 0, eof); 00425 * if(rc.is_error()) { 00426 * // handle error 00427 * ... 00428 * } 00429 * if(eof) break; 00430 * // handle record 00431 * ... 00432 * const char *body = cursor->body(); 00433 * ... 00434 * } while (1); 00435 * \endcode 00436 */ 00437 class scan_file_i : public smlevel_top, public xct_dependent_t { 00438 public: 00439 stid_t stid; 00440 rid_t curr_rid; 00441 00442 /**\brief Construct an iterator over the given store (file). 00443 * 00444 * @param[in] stid ID of the file over which to iterate. 00445 * @param[in] start ID of the first record of interest, can be 00446 * used to start a scan in the middle of the file. 00447 * @param[in] cc Locking granularity to be used. See discussion 00448 * for other constructor. 00449 * @param[in] prefetch If true, a background thread will pre-fetch 00450 * pages in support of this scan. For this to work 00451 * the server option sm_prefetch must be enabled. 00452 * @param[in] ignored \e not used 00453 * 00454 * Record-level locking is not supported here because there are problems 00455 * with phantoms if another transaction is altering the file while 00456 * this scan is going on. Thus, if you try to use record-level 00457 * locking, you will get page-level locking instead. 00458 */ 00459 NORET scan_file_i( 00460 const stid_t& stid, 00461 const rid_t& start, 00462 concurrency_t cc = t_cc_file, 00463 bool prefetch=false, 00464 lock_mode_t ignored = SH); 00465 00466 /**\brief Construct an iterator over the given store (file). 00467 * 00468 * @param[in] stid ID of the file over which to iterate. 00469 * @param[in] cc Locking granularity to be used. The following are 00470 * permissible, and their implied store, page and record 00471 * lock modes are given: 00472 * - t_cc_none : store - IS page - none record - none 00473 * - t_cc_record : store - IS page - SH record - none 00474 * - t_cc_page : store - IS page - SH record - none 00475 * - t_cc_append : store - IX page - EX record - EX 00476 * - t_cc_file : store - SH page - none record - none 00477 * @param[in] prefetch If true, a background thread will pre-fetch 00478 * pages in support of this scan. For this to work 00479 * the server option sm_prefetch must be enabled. 00480 * @param[in] ignored \e not used 00481 * 00482 * Record-level locking is not supported here because there are problems 00483 * with phantoms if another transaction is altering the file while 00484 * this scan is going on. Thus, if you try to use record-level 00485 * locking, you will get page-level locking instead. 00486 */ 00487 NORET scan_file_i( 00488 const stid_t& stid, 00489 concurrency_t cc = t_cc_file, 00490 bool prefetch=false, 00491 lock_mode_t ignored = SH); 00492 00493 NORET ~scan_file_i(); 00494 00495 /* needed for tcl scripts */ 00496 /**\brief Return the pin_i that represents the scan state. 00497 * @param[out] pin_ptr Populate the caller's pointer. 00498 * @param[out] eof False if the pin_i points to a record, 00499 * true if there were no next record to pin. 00500 * 00501 */ 00502 void cursor( 00503 pin_i*& pin_ptr, 00504 bool& eof 00505 ) { pin_ptr = &_cursor; eof = _eof; } 00506 00507 /**\brief Advance to the next record of the file. 00508 * 00509 * @param[out] pin_ptr Populate the caller's pointer. 00510 * @param[in] start_offset Pin the next record at this offset. Meaningful 00511 * only for large records. 00512 * @param[out] eof False if the pin_i points to a record, 00513 * true if there were no next record to pin. 00514 * 00515 * This must be called once to get the first record of the file. 00516 */ 00517 rc_t next( 00518 pin_i*& pin_ptr, 00519 smsize_t start_offset, 00520 bool& eof); 00521 00522 /**\brief Advance to the first record on the next page in the file. 00523 * 00524 * @param[out] pin_ptr Populate the caller's pointer. 00525 * This pin_i represents the state of this scan. 00526 * @param[in] start_offset Pin the next record at this offset. 00527 * This is meaningful only for large records. 00528 * @param[out] eof False if the pin_i points to a record, 00529 * true if there were no next record to pin. 00530 * 00531 * If next_page is 00532 * called after the scan_file_i is initialized it will advance 00533 * the cursor to the first record in the file, just as 00534 * next() would do. 00535 */ 00536 rc_t next_page( 00537 pin_i*& pin_ptr, 00538 smsize_t start_offset, 00539 bool& eof); 00540 00541 /**\brief Free resources acquired by this iterator. Useful if 00542 * you are finished with the scan but not ready to delete it. 00543 */ 00544 void finish(); 00545 /**\brief End of file was reached. Cursor is not usable.*/ 00546 bool eof() { return _eof; } 00547 /**\brief Error code returned from last method call. */ 00548 const rc_t & error_code() const { return _error_occurred; } 00549 /**\brief ID of the transaction that created this iterator */ 00550 tid_t xid() const { return tid; } 00551 00552 protected: 00553 tid_t tid; 00554 bool _eof; 00555 w_rc_t _error_occurred; 00556 pin_i _cursor; 00557 lpid_t _next_pid; 00558 concurrency_t _cc; // concurrency control 00559 lock_mode_t _page_lock_mode; 00560 lock_mode_t _rec_lock_mode; 00561 00562 rc_t _init(bool for_append=false); 00563 00564 rc_t _next( 00565 pin_i*& pin_ptr, 00566 smsize_t start_offset, 00567 bool& eof); 00568 00569 void xct_state_changed( 00570 xct_state_t old_state, 00571 xct_state_t new_state); 00572 00573 private: 00574 bool _do_prefetch; 00575 bf_prefetch_thread_t* _prefetch; 00576 00577 // disabled 00578 NORET scan_file_i(const scan_file_i&); 00579 scan_file_i& operator=(const scan_file_i&); 00580 }; 00581 00582 #include <cstring> 00583 #ifndef SDESC_H 00584 #include "sdesc.h" 00585 #endif 00586 00587 /**\brief Pseudo-iterator used to append to a file. 00588 * \details 00589 * Just creating a record doesn't ensure that it appears at the 00590 * end of the file; when new pages are allocated, they could be 00591 * in extents in the middle of the file, and unused slots in 00592 * pages in the middle of the file can be scavenged for create_rec. 00593 * 00594 * If you want to ensure that the records are appended to a file, 00595 * use this. 00596 * 00597 * \attention 00598 * Clearly, this cannot be used in a meaningful way when other 00599 * threads are updating the same file. The constructor acquires 00600 * an exclusive lock on the file to protect it from updates by 00601 * other transactions, but this does not protect it from updates 00602 * from other threads in the same transaction. 00603 * The server must ensure that no more than one thread attached 00604 * to the locking transaction is using the file, and that the only use 00605 * of the file is through the append_file_i while the instance 00606 * exists. 00607 */ 00608 class append_file_i : public scan_file_i 00609 { 00610 public: 00611 /**\brief Construct an append_file_i for a given file. 00612 * @param[in] stid ID of the file in which to create records. 00613 */ 00614 NORET append_file_i( const stid_t& stid); 00615 00616 NORET ~append_file_i(); 00617 00618 /**\brief Place-holder method. Returns error. 00619 * 00620 * You cannot scan with an append_file_i. 00621 */ 00622 rc_t next( 00623 pin_i*& pin_ptr, 00624 smsize_t start_offset, 00625 bool& eof); 00626 00627 /**\brief Append a new record to the end of the file. 00628 * \ingroup SSMFILE 00629 * @param[in] hdr The client-defined record header. 00630 * @param[in] len_hint The length of the record, more or less. More 00631 * accuracy here helps the sm reduce its work. 00632 * @param[in] data The client-defined record contents. 00633 * @param[out] rid The identifier of the new record. 00634 */ 00635 rc_t create_rec( 00636 const vec_t& hdr, 00637 smsize_t len_hint, 00638 const vec_t& data, 00639 rid_t& rid); 00640 private: 00641 void _init_constructor(); 00642 00643 // See comments in pin.h, which does the same thing 00644 // file_p page; 00645 file_p& _page(); 00646 char _page_alias[PAGE_ALIAS_FILE]; 00647 sdesc_t _cached_sdesc; 00648 }; 00649 00650 /*<std-footer incl-file-exclusion='SCAN_H'> -- do not edit anything below this line -- */ 00651 00652 #endif /*</std-footer>*/