00001 /* -*- mode:C++; c-basic-offset:4 -*- 00002 Shore-MT -- Multi-threaded port of the SHORE storage manager 00003 00004 Copyright (c) 2007-2009 00005 Data Intensive Applications and Systems Labaratory (DIAS) 00006 Ecole Polytechnique Federale de Lausanne 00007 00008 All Rights Reserved. 00009 00010 Permission to use, copy, modify and distribute this software and 00011 its documentation is hereby granted, provided that both the 00012 copyright notice and this permission notice appear in all copies of 00013 the software, derivative works or modified versions, and any 00014 portions thereof, and that both notices appear in supporting 00015 documentation. 00016 00017 This code is distributed in the hope that it will be useful, but 00018 WITHOUT ANY WARRANTY; without even the implied warranty of 00019 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS 00020 DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER 00021 RESULTING FROM THE USE OF THIS SOFTWARE. 00022 */ 00023 00024 /*<std-header orig-src='shore' incl-file-exclusion='SORT_S_H'> 00025 00026 $Id: sort_s.h,v 1.36 2010/08/23 14:28:18 nhall Exp $ 00027 00028 SHORE -- Scalable Heterogeneous Object REpository 00029 00030 Copyright (c) 1994-99 Computer Sciences Department, University of 00031 Wisconsin -- Madison 00032 All Rights Reserved. 00033 00034 Permission to use, copy, modify and distribute this software and its 00035 documentation is hereby granted, provided that both the copyright 00036 notice and this permission notice appear in all copies of the 00037 software, derivative works or modified versions, and any portions 00038 thereof, and that both notices appear in supporting documentation. 00039 00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY 00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS 00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND 00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 00044 00045 This software was developed with support by the Advanced Research 00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by 00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518. 00048 Further funding for this work was provided by DARPA through 00049 Rome Research Laboratory Contract No. F30602-97-2-0247. 00050 00051 */ 00052 00053 #ifndef SORT_S_H 00054 #define SORT_S_H 00055 00056 #include "w_defines.h" 00057 00058 /* -- do not edit anything above this line -- </std-header>*/ 00059 00060 #ifdef __GNUG__ 00061 #pragma interface 00062 #endif 00063 00064 // forward 00065 class file_p; 00066 class record_t; 00067 00068 #define OLDSORT_COMPATIBILITY 00069 00070 /**\brief A namespace to contain certain types used for ss_m::sort_file. */ 00071 namespace ssm_sort { 00072 00073 /**\brief Descriptor for sort key, used with sort_stream_i 00074 * 00075 * This structure was used in the original implementation 00076 * of sort_file (now deprecated). Unfortunately, the 00077 * API for sort_stream_i has not been updated to use the 00078 * alternative key descriptor structure, sort_keys_t. 00079 */ 00080 struct key_info_t { 00081 typedef sortorder::keytype key_type_t; 00082 00083 /**\cond skip */ 00084 /// for backward compatibility only: should use keytype henceforth 00085 enum dummy_t { t_char=sortorder::kt_u1, 00086 t_int=sortorder::kt_i4, 00087 t_float=sortorder::kt_f4, 00088 t_string=sortorder::kt_b, 00089 t_spatial=sortorder::kt_spatial}; 00090 /**\endcond skip */ 00091 00092 key_type_t type; // key type 00093 /// For sorting on spatial keys 00094 nbox_t universe; 00095 /**\brief Is a derived key. 00096 * If true, the key must be the only item in rec 00097 * header, and the header will not be copied to 00098 * the result record (allows the user to store derived 00099 * key in the header temporarily, for sorting purposes). 00100 */ 00101 bool derived; 00102 00103 /**\enum where_t 00104 **\brief Describes where the key is found: in a record header or body. 00105 * For file sort. 00106 * 00107 * - t_hdr : key is in the record header 00108 * - t_body : key is in the record body 00109 */ 00110 enum where_t { t_hdr = 0, t_body }; 00111 /**\brief Where the key resides: header or body */ 00112 where_t where; 00113 /**\brief Offset from beginning of header or body where the key starts */ 00114 w_base_t::uint4_t offset; 00115 /**\brief Length of key */ 00116 w_base_t::uint4_t len; 00117 /**\brief Estimated record length. A hint. */ 00118 w_base_t::uint4_t est_reclen; 00119 00120 key_info_t() { 00121 type = sortorder::kt_i4; 00122 where = t_body; 00123 derived = false; 00124 offset = 0; 00125 len = sizeof(int); 00126 est_reclen = 0; 00127 } 00128 }; 00129 00130 /**\brief Behavioral options for sort_stream_i. 00131 * 00132 * This structure was used in the original implementation 00133 * of sort_file (now deprecated). Unfortunately, the 00134 * API for sort_stream_i has not been updated to use the 00135 * alternative behavioral options structure, sort_keys_t. 00136 */ 00137 struct sort_parm_t { 00138 /// Number of pages for each run. 00139 uint2_t run_size; 00140 /// Volume on which scratch files will reside. 00141 vid_t vol; 00142 /// True -> duplicates will be removed. 00143 bool unique; 00144 /// Sort in ascending order? 00145 bool ascending; 00146 /// Destroy the input file when done? 00147 bool destructive; 00148 00149 /// Logging level of scratch files 00150 smlevel_3::sm_store_property_t property; 00151 00152 sort_parm_t() : run_size(10), unique(false), ascending(true), 00153 destructive(false), 00154 property(smlevel_3::t_regular) {} 00155 }; 00156 00157 00158 /* 00159 * For new sort: 00160 */ 00161 00162 /**\brief Input, output argument to CSKF, MOF, UMOF callbacks. 00163 * \ingroup SSMSORT 00164 * \details 00165 * Since some of the callbacks need variously an integer or a 00166 * pointer, this class 00167 * puns an integer and a void *. 00168 * 00169 * See ss_m::sort_file 00170 */ 00171 class key_cookie_t { 00172 void * c; // datum stored as a void*, punned by 00173 // the extraction methods 00174 public: 00175 explicit key_cookie_t () : c(NULL) { } 00176 00177 /// Create a cookie from an integer. 00178 explicit key_cookie_t (int i) { 00179 union { int _i; void *v; } _pun = {i}; 00180 c = _pun.v; 00181 } 00182 /// Create a cookie from a pointer. 00183 explicit key_cookie_t (void *v):c(v) { } 00184 00185 /// Extract integer value. 00186 int make_int() const { return operator int(); } 00187 00188 /// Extract smsize_t value. 00189 int make_smsize_t() const { return operator smsize_t(); } 00190 00191 /// Extract ptr value. 00192 void *make_ptr() const { return c; } 00193 00194 static key_cookie_t null; // newsort.cpp 00195 00196 private: 00197 operator int () const { 00198 union { void *v; int _i; } _pun = {c}; 00199 return _pun._i; 00200 } 00201 00202 operator smsize_t () const { 00203 union { void *v; int _i; } _pun = {c}; 00204 smsize_t t = _pun._i & 0xffffffff; 00205 #ifdef ARCH_LP64 00206 w_assert1((_pun._i & 0xffffffff00000000) == 0); 00207 #endif 00208 return t; 00209 } 00210 }; 00211 00212 /**\brief A memory allocator (abstract base class) used by ss_m::sort_file and its adjuncts. 00213 * \ingroup SSMSORT 00214 * \details 00215 * The ssm_sort::CSKF, ssm_sort::MOF and ssm_sort::UMOF callbacks 00216 * may need to allocate memory, in which case they 00217 * should use the factory passed in as an argument; this 00218 * allows the storage manager to free the memory using the same 00219 * allocator. 00220 * 00221 * These allocators are used with object_t (record handles) and 00222 * skey_t (key handles). 00223 * 00224 * The default allocators used by ss_m::sort_file are 00225 * - factory_t::none : does not allocate or free, used for statically- 00226 * allocated space. 00227 * - factory_t::cpp_vector : a factory that allocates byte-strings that 00228 * start on a double-aligned boundary and frees them. 00229 * 00230 * Users must write their own factories that inherit from 00231 * factory_t. 00232 */ 00233 class factory_t 00234 { 00235 /* users are meant to write their own factories 00236 * that inherit from this 00237 */ 00238 public: 00239 factory_t(); 00240 virtual NORET ~factory_t(); 00241 00242 virtual void* allocfunc(smsize_t)=0; 00243 virtual void freefunc(const void *, smsize_t)=0; 00244 00245 // none: causes no delete - used for statically allocated space 00246 static factory_t* none; 00247 00248 // cpp_vector - simply calls delete[] 00249 static factory_t* cpp_vector; 00250 00251 void freefunc(vec_t&v) { 00252 for(int i=v.count()-1; i>=0; i--) { 00253 DBG(<<"freefuncVEC(ptr=" << (void*)v.ptr(i) << " len=" << v.len(i)); 00254 freefunc((void *)v.ptr(i), v.len(i)); 00255 } 00256 } 00257 }; 00258 00259 /* XXX move virtual functions to a .cpp */ 00260 inline factory_t::factory_t() {} 00261 inline NORET factory_t::~factory_t() {} 00262 00263 /**\brief Descriptor for fixed-location keys 00264 * \ingroup SSMSORT 00265 * \details 00266 * A sort_keys_t contains an instance of this for each key. 00267 */ 00268 class key_location_t 00269 { 00270 public: 00271 /// Key is in record header 00272 bool _in_hdr; 00273 /// Offset from start of header/body of start of key 00274 smsize_t _off; 00275 /// Length of key 00276 smsize_t _length; 00277 00278 key_location_t() : _in_hdr(false), _off(0), _length(0) {} 00279 00280 key_location_t(const key_location_t &old) : 00281 _in_hdr(old._in_hdr), 00282 _off(old._off), _length(old._length) {} 00283 00284 /// Key is in record header 00285 bool is_in_hdr() const { return _in_hdr; } 00286 }; 00287 00288 class skey_t; // forward 00289 00290 /**\brief Handle on a record in the buffer pool or in scratch memory. 00291 * \ingroup SSMSORT 00292 * \details 00293 * The ss_m::sort_file function calls a variety of user-defined 00294 * callback functions to process records in the buffer pool 00295 * or in-memory copies of records and to produce more of the same. 00296 * 00297 * This class provides a generic handle to reference the records or copies of 00298 * records, since the callbacks need only to 00299 * - know the header length 00300 * - find the data at an offset in the header 00301 * - know the body length 00302 * - find the data at an offset in the body 00303 * - copy a generic handle 00304 * - copy out a portion of the header or body to a buffer 00305 * - create a new generic handle from a pair of buffer 00306 * 00307 * An object_t created from buffers must also know how the buffers were 00308 * allocated so it can free the buffers to the same heap, so it keeps 00309 * a reference to the factory_t used by the callback function 00310 * to create the buffers. 00311 */ 00312 class object_t : public smlevel_top 00313 { 00314 friend class skey_t; 00315 static const object_t& none; 00316 protected: 00317 void _construct(file_p&, slotid_t); 00318 void _construct( 00319 const void *hdr, smsize_t hdrlen, factory_t *, 00320 const void *body, smsize_t bodylen, factory_t *); 00321 00322 void _replace(const object_t&); // copy over 00323 NORET object_t(); 00324 public: 00325 NORET object_t(const object_t&o) 00326 : 00327 _valid(false), 00328 _in_bp(false), 00329 _rec(0), 00330 _fp(0), 00331 _hdrfact(factory_t::none), 00332 _hdrlen(0), 00333 _hdrbuf(0), 00334 _bodyfact(factory_t::none), 00335 _bodylen(0), 00336 _bodybuf(0) 00337 { _replace(o); } 00338 00339 NORET object_t( 00340 const void *hdr, 00341 smsize_t hdrlen, 00342 factory_t& hf, 00343 const void *body, 00344 smsize_t bodylen, 00345 factory_t& bf) 00346 : 00347 _valid(false), 00348 _in_bp(false), 00349 _rec(0), 00350 _fp(0), 00351 _hdrfact(&hf), 00352 _hdrlen(hdrlen), 00353 _hdrbuf(hdr), 00354 _bodyfact(&bf), 00355 _bodylen(bodylen), 00356 _bodybuf(body) 00357 { } 00358 00359 NORET ~object_t(); 00360 00361 bool is_valid() const { return _valid; } 00362 bool is_in_buffer_pool() const { return is_valid() && _in_bp; } 00363 smsize_t hdr_size() const { return _hdrlen; } 00364 smsize_t body_size() const { return _bodylen; } 00365 00366 const void *hdr(smsize_t offset) const; 00367 const void *body(smsize_t offset) const; 00368 void freespace(); 00369 void assert_nobuffers() const; 00370 smsize_t contig_body_size() const; // pinned amt 00371 00372 w_rc_t copy_out( 00373 bool in_hdr, 00374 smsize_t offset, 00375 smsize_t length, 00376 vec_t&dest) const; 00377 00378 00379 private: // data 00380 bool _valid; 00381 bool _in_bp; // in buffer pool or in memory 00382 00383 // for in_buffer_pool: 00384 record_t* _rec; 00385 file_p* _fp; 00386 00387 // for in_memory: 00388 factory_t* _hdrfact; 00389 smsize_t _hdrlen; 00390 const void* _hdrbuf; 00391 00392 factory_t* _bodyfact; 00393 smsize_t _bodylen; 00394 const void* _bodybuf; 00395 00396 protected: 00397 int _save_pin_count; 00398 // for callback 00399 void _callback_prologue() const { 00400 # if W_DEBUG_LEVEL > 2 00401 /* 00402 * leaving SM 00403 */ 00404 // escape const-ness of the method 00405 int *save_pin_count = (int *)&_save_pin_count; 00406 *save_pin_count = me()->pin_count(); 00407 w_assert3(_save_pin_count == 0); 00408 me()->check_pin_count(0); 00409 me()->in_sm(false); 00410 # endif 00411 } 00412 void _callback_epilogue() const { 00413 # if W_DEBUG_LEVEL > 2 00414 /* 00415 * re-entering SM 00416 */ 00417 me()->check_actual_pin_count(_save_pin_count); 00418 me()->in_sm(true); 00419 # endif 00420 } 00421 void _invalidate() { 00422 _valid=false; 00423 _in_bp=false; 00424 _rec = 0; 00425 _fp=0; 00426 _hdrfact=factory_t::none; 00427 _hdrlen=0; 00428 _hdrbuf=0; 00429 _bodyfact=factory_t::none; 00430 _bodylen=0; 00431 _bodybuf=0; 00432 _save_pin_count=0; 00433 } 00434 }; 00435 00436 class run_mgr; // forward 00437 00438 /**\brief The result of a CSKF function. 00439 * \ingroup SSMSORT 00440 * \details 00441 * This is a handle on a key. 00442 * The key might reside in scratch memory or in the buffer pool. 00443 * This class provides a generic API for using such a key. 00444 * 00445 * The attributes are: 00446 * - pointer to start of key (method ptr()) 00447 * - length of key (method size()) 00448 * - length of contiguous portion of key (method contig_length()) 00449 * - location (in buffer pool or scratch memory) (method is_in_obj()) 00450 * - sub-location (in buffer pool: in record header or body) (method is_in_hdr()) 00451 * - the heap allocator (factory_t) used to allocate buffers if this 00452 * refers to a key in scratch memory. 00453 * 00454 * Generally only the constructor is used by a CSKF callback function, 00455 * and the idiom used (indeed, the only way it can 00456 * be populated) is placement new as follows: 00457 * \code 00458 using namespace ssm_sort; 00459 w_rc_t 00460 exampleCSKF( 00461 const rid_t& rid, 00462 const object_t& obj, 00463 key_cookie_t , 00464 factory_t& f, 00465 skey_t* out 00466 ) 00467 { 00468 { 00469 // this shows how to create an skey_t from the entire header of the 00470 // incoming object_t, whether the object is in the buffer pool or 00471 // in scratch memory: 00472 smsize_t length = obj.hdr_size(); 00473 smsize_t offset = 0; 00474 bool in_hdr = true; 00475 new(out) skey_t(obj, offset, length, in_hdr); 00476 } 00477 00478 { 00479 // this shows how to create an skey_t from the last 3 bytes of the 00480 // body of the incoming object_t, whether the 00481 // object is in the buffer pool or in scratch memory: 00482 smsize_t length = 3; 00483 smsize_t offset = obj.body_size() - length; 00484 bool in_hdr = false; 00485 new(out) skey_t(obj, offset, length, in_hdr); 00486 } 00487 00488 { 00489 // this shows how to create an skey_t derived from the record id 00490 int val = rid.pid.page; 00491 smsize_t length = sizeof(val); 00492 smsize_t offset = 0; 00493 void *buf = f.allocfunc(length); 00494 memcpy(&buf, &val, sizeof(val)); 00495 new(out) skey_t(buf, offset, length, f); 00496 } 00497 00498 return RCOK; 00499 } 00500 \endcode 00501 */ 00502 class skey_t 00503 { 00504 // To let run_mgr construct empty skey_t 00505 friend class ssm_sort::run_mgr; 00506 public: 00507 /**\brief Construct from a key in the buffer pool 00508 * @param[in] o structure describing the key location 00509 * @param[in] offset offset from record header or body where key starts 00510 * @param[in] len length of entire key 00511 * @param[in] in_hdr true if key is in record header, false if in body 00512 */ 00513 NORET skey_t( 00514 const object_t& o, 00515 smsize_t offset, 00516 smsize_t len, 00517 bool in_hdr) 00518 00519 : _valid(true), _in_obj(true), 00520 _length(len), 00521 _obj(&o), _offset(offset), 00522 _in_hdr(in_hdr), 00523 _fact(factory_t::none), _buf(0) 00524 { 00525 } 00526 /**\brief Construct from a key in scratch memory 00527 * @param[in] buf scratch memory buffer 00528 * @param[in] offset offset from buf where key starts 00529 * @param[in] len length of entire key 00530 * @param[in] f factory used to allocate the buffer 00531 */ 00532 NORET skey_t( 00533 void *buf, 00534 smsize_t offset, 00535 smsize_t len, // KEY len, NOT NECESSARILY BUF len 00536 factory_t& f 00537 ) 00538 : _valid(true), _in_obj(false), 00539 _length(len), 00540 _obj(&object_t::none), 00541 _offset(offset), 00542 _in_hdr(false), 00543 _fact(&f), _buf(buf) 00544 { 00545 } 00546 00547 NORET ~skey_t() {} 00548 00549 /// Length of entire key 00550 smsize_t size() const { return _length; } 00551 00552 /// True if this structure is populated (points to a key) 00553 bool is_valid() const { return _valid; } 00554 00555 /// True if key is in the buffer pool 00556 bool is_in_obj() const { return is_valid() && _in_obj; } 00557 00558 /**\brief True unless this key is for an object other than \a o 00559 * 00560 * Used for assertions in sort code. 00561 */ 00562 bool consistent_with_object(const object_t&o ) const { 00563 return ((_obj == &o) || !_in_obj); } 00564 00565 /// True if key is in buffer pool and is in record header 00566 bool is_in_hdr() const { return is_in_obj() && _in_hdr; } 00567 00568 /// Pointer into byte at \a offset in key 00569 const void *ptr(smsize_t offset) const; // key 00570 00571 /// Using its factory, free space allocated for the in-scratch-memory buffer 00572 void freespace(); 00573 00574 /// Asserts that no heap memory is held by this or its object_t. 00575 void assert_nobuffers()const; 00576 00577 /// Pinned amount of the key 00578 smsize_t contig_length() const; 00579 00580 /// Copies key into vector (which must have pre-allocated space) 00581 w_rc_t copy_out(vec_t&dest) const; 00582 00583 private: 00584 bool _valid; 00585 bool _in_obj; // else in mem 00586 smsize_t _length; 00587 protected: 00588 // for in_obj case; 00589 const object_t* _obj; 00590 smsize_t _offset; // into buf or object of start of key 00591 private: 00592 bool _in_hdr; 00593 // for !in_obj but valid 00594 factory_t* _fact; 00595 const void* _buf; // buf to be deallocated -- key 00596 // might not start at offset 0 00597 protected: 00598 void _invalidate(); 00599 NORET skey_t() : 00600 _valid(false), _in_obj(false), _length(0), 00601 _obj(&object_t::none), 00602 _offset(0), _in_hdr(false), 00603 _fact(factory_t::none), _buf(0) 00604 { } 00605 void _construct( 00606 const object_t *o, smsize_t off, smsize_t l, bool h) { 00607 _valid = true; 00608 _in_obj = true; 00609 _obj = o; 00610 _offset = off; 00611 _length = l; 00612 _in_hdr = h; 00613 _fact = factory_t::none; 00614 } 00615 void _construct( 00616 const void *buf, smsize_t off, smsize_t l, factory_t* f) { 00617 _valid = true; 00618 _obj = 0; 00619 _in_obj = false; 00620 _offset = off; 00621 _length = l; 00622 _fact = f; 00623 _buf = buf; 00624 } 00625 void _replace(const skey_t&); // copy over 00626 void _replace_relative_to_obj(const object_t &, const skey_t&); // copy over 00627 }; // skey_t 00628 00629 00630 /** 00631 **\typedef CSKF 00632 * \ingroup SSMSORT 00633 *\brief Create-Sort-Key Function. 00634 * @param[in] rid Record ID of the record containing the key. 00635 * @param[in] in_obj Handle (object_t) on the record whose key we need. 00636 * @param[in] cookie Describes the location and type of key in the source 00637 * record. 00638 * @param[in] f Class for managing allocated space. 00639 * @param[out] out Result is written to this object_t. 00640 * \details 00641 * 00642 * This type of callback function fills in the skey_t \a out for 00643 * a key in an object (record). It is called only when the object 00644 * is first encountered in its run in a sort. 00645 * 00646 * A set of predefined callbacks are provided, q.v.: 00647 * - sort_keys_t::noCSKF : vacuous - does nothing 00648 * - sort_keys_t::generic_CSKF : copies or lexifies a key based on its 00649 * key_cookie_t argument. 00650 * 00651 * See generic_CSKF_cookie for an example of a simple user-defined CSKF. 00652 * 00653 * The skey_t \a out is pre-allocated and must be populated by 00654 * this callback function. 00655 * The factory_t \a f may be used for this allocation, or a user-defined 00656 * factory_t may be used instead. Whatever factory is used to allocate 00657 * the buffer to hold a key, that same factory must be placed in 00658 * the skey_t via the placement-new constructor call. Examples are 00659 * given in the description for skey_t. 00660 * 00661 * This callback must not free any space for \a in_obj. 00662 */ 00663 typedef w_rc_t (*CSKF)( 00664 const rid_t& rid, 00665 const object_t& in_obj, 00666 key_cookie_t cookie, // type info 00667 factory_t& f, 00668 skey_t* out 00669 ); 00670 00671 /**\typedef MOF 00672 * \ingroup SSMSORT 00673 *\brief Marshal Object Function 00674 * @param[in] rid Record id of the source record to marshal 00675 * @param[in] obj_in Handle on the source record (object_t) 00676 * @param[in] cookie Describes location and type of key in the source record 00677 * @param[out] obj_out Result is to be written to this object_t. 00678 *\details 00679 * The \a obj_out is already allocated; this function must populate it. 00680 * The \a obj_out has no buffers or factory associated with it. The 00681 * MOF must populate the \a obj_out from its own factory (it may use 00682 * factory_t::cpp_vector, which uses the global heap, or it may 00683 * use the factory from the \a obj_in, but ONLY if that factory is 00684 * \b not factory_t::none. 00685 * The \a obj_out 00686 * object carries its factory along with it throughout the sort process. 00687 * 00688 * This callback must not free any space for \a obj_in. 00689 * 00690 * This type of callback is used 00691 * - when a record is first encountered in the input file 00692 * - if sort_keys_t::carry_obj() is true, 00693 * it's also called when the object is read back 00694 * from a temporary file. 00695 * 00696 * A predefined callback is provided: 00697 * - ssm_sort::sort_keys_t::noMOF : vacuous - does nothing 00698 * 00699 * The sort code checks for 00700 * \code 00701 sort_keys_t::marshal_func() == sort_keys_t::noMOF 00702 \endcode 00703 * in which case, it does not call a marshal function at all. (This 00704 * is less work than allocating the output object_t to call a 00705 * vacuous function.) 00706 */ 00707 typedef w_rc_t (*MOF)( 00708 const rid_t& rid, // record id 00709 const object_t& obj_in, 00710 key_cookie_t cookie, // type info 00711 // func must allocate obj_out, 00712 object_t* obj_out // SM allocates, MOF initializes, SM 00713 // frees its buffers 00714 ); 00715 00716 /**\typedef UMOF 00717 * \ingroup SSMSORT 00718 * \brief Un-marshal Object Function 00719 * @param[in] rid Record id of the source record to marshal 00720 * @param[in] obj_in Handle on the source record (object_t) 00721 * @param[in] cookie Describes location and type of key in the destination 00722 * record 00723 * @param[out] obj_out Result is to be written to this object_t. 00724 * 00725 * \details 00726 * 00727 * The \a obj_out is already allocated; this function must populate it. 00728 * The \a obj_out has no buffers or factory associated with it. The 00729 * MOF must populate the \a obj_out from its own factory (it may use 00730 * factory_t::cpp_vector, which uses the global heap, or it may 00731 * use the factory from the \a obj_in, but ONLY if that factory is 00732 * \b not factory_t::none. 00733 * The \a obj_out 00734 * object carries its factory along with it throughout the sort process. 00735 * 00736 * This callback must not free any space for \a obj_in. 00737 * 00738 * This type of callback is used 00739 * - when an object is written to a temp file 00740 * - when the final object is written to the result file (if the 00741 * output is a copy of the input file). 00742 * 00743 * A predefined callback is provided: 00744 * - ssm_sort::sort_keys_t::noUMOF : vacuous - does nothing 00745 * 00746 * The sort code checks for 00747 * \code 00748 sort_keys_t::unmarshal_func() == sort_keys_t::noUMOF 00749 \endcode 00750 * in which case, it does not call a marshal function at all. (This 00751 * is less work than allocating the output object_t to call a 00752 * vacuous function.) 00753 * 00754 */ 00755 typedef w_rc_t (*UMOF)( 00756 const rid_t& rid, // orig record id of object in buffer 00757 const object_t& obj_in, 00758 key_cookie_t cookie, // type info 00759 object_t* obj_out // SM allocates, UMOF initializes, 00760 // SM will copy to disk, free mem 00761 ); 00762 00763 /**\typedef CF 00764 * \ingroup SSMSORT 00765 * \brief key Comparison Function 00766 * @param[in] length1 Length of first key in bytes 00767 * @param[in] key1 Pointer to start of first key 00768 * @param[in] length2 Length of second key in bytes 00769 * @param[in] key2 Pointer to start of second key 00770 * \retval int Negative if key1 < key2, 0 if equal, Positive if key1 > key2 00771 * \details 00772 * Used on every key comparison. 00773 * \note It is up to the server to determine if the keys are properly 00774 * aligned for comparison as fundamental types. Alignment is determined by 00775 * the combined behavior of the CSKF and MOF callbacks, as well as of 00776 * the location of the keys in the original records. 00777 */ 00778 typedef int (*CF) ( 00779 w_base_t::uint4_t length1, 00780 const void * key1, 00781 w_base_t::uint4_t length2, 00782 const void * key2 00783 ); 00784 00785 /**\typedef LEXFUNC 00786 * \ingroup SSMSORT 00787 * \brief Lexify key function 00788 * @param[in] source Pointer to start of key 00789 * @param[in] len Length of key 00790 * @param[out] sink Pointer to output buffer (preallocated, of length len) 00791 * \details 00792 * 00793 * This type of function is called by the generic_CSKF. It may be useful 00794 * for user-defined CKSF callbacks. Its purpose is to reformat keys 00795 * into lexicographic form so that simple string-type (byte-by-byte) 00796 * comparisons yield the same results as typed comparisons would. 00797 * An alternative to lexifying keys and using string comparisons is to 00798 * use typed comparisons, however, that has its disadvantages, particularly 00799 * for keys that might be spread across page boundaries or are not aligned. 00800 */ 00801 typedef w_rc_t (*LEXFUNC) (const void *source, smsize_t len, void *sink); 00802 00803 /**\brief A cookie passed to generic_CSKF callback must point to one of these. 00804 * \ingroup SSMSORT 00805 * \details 00806 * This struct may be used by user-defined CSKF callback functions. 00807 * For example: 00808 \code 00809 w_rc_t 00810 example_stringCSKF( 00811 const rid_t& , // not used 00812 const object_t& obj_in, 00813 key_cookie_t cookie, // generic_CSKF_cookie 00814 factory_t& , // not used 00815 skey_t* out 00816 ) 00817 { 00818 // Cast the cookie to a pointer to a generic_CSKF_cookie. 00819 generic_CSKF_cookie& K = *(generic_CSKF_cookie*)(cookie.make_ptr()); 00820 bool is_in_header = false; 00821 00822 // Populate the output : point to key in body of record/object 00823 new(out) skey_t(obj_in, K.offset, K.length, is_in_header); 00824 return RCOK; 00825 } 00826 \endcode 00827 */ 00828 struct generic_CSKF_cookie 00829 { 00830 /// value == noLEXFUNC means don't lexify 00831 LEXFUNC func; 00832 /// True if the key is in the record header, false if in body. 00833 bool in_hdr; 00834 /// Offset from start of record header or body. 00835 smsize_t offset; 00836 /// Key length. 00837 smsize_t length; 00838 00839 void clear() { length = 0; offset = 0; in_hdr = false; func = NULL ; } 00840 }; 00841 00842 /**\brief Parameter to control behavior of sort_file. 00843 * \ingroup SSMSORT 00844 * \details 00845 * This class determines how the sort will behave, and it holds descriptions 00846 * of the keys to be used for a sort. 00847 * For details of the effect on a sort, see \ref SORTOUTPUT. 00848 * For details about sort keys, see \ref SORTKEYS. 00849 * 00850 * He who creates this data structure determines 00851 * what is "most significant" key, next-most significant key, etc. 00852 * Key 0 gets compared first, 1 next, and so on. 00853 * 00854 * For related types, see the ssm_sort namespace. 00855 */ 00856 class sort_keys_t 00857 { 00858 public: 00859 typedef ssm_sort::LEXFUNC LEXFUNC; 00860 00861 /**\brief Lexify callback function that does a simple memory copy 00862 * @param[in] source Pointer to start of key 00863 * @param[in] len Length of key 00864 * @param[out] sink Pointer to output buffer 00865 * 00866 * \details 00867 * Does no reformatting; simply copies from source to sink. 00868 */ 00869 static w_rc_t noLEXFUNC (const void *source, smsize_t len, void *sink); 00870 00871 /**\brief Vacuous callback function, does nothing. 00872 * @param[in] rid Ignored. 00873 * @param[in] obj Ignored. 00874 * @param[in] cookie Ignored. 00875 * @param[in] f Ignored. 00876 * @param[out] out Ignored. 00877 * \details 00878 * This function should never be used. It is a default value. 00879 * The sort_file checks for 00880 * \code sort_keys_t::lexify() == sort_keys_t::noCSKF 00881 * \endcode 00882 * and if so, bypasses any code connected with key creation, 00883 * using the object_t it would have passed in to this function 00884 * as if it were the output of this function. 00885 * This comparison and bypass is faster than executing the 00886 * prologue and epilogue code to acquire space and release it, 00887 * needed when a CSKF is called. 00888 */ 00889 static w_rc_t noCSKF( 00890 const rid_t& rid, 00891 const object_t& obj, 00892 key_cookie_t cookie, // type info 00893 factory_t& f, 00894 skey_t* out 00895 ); 00896 00897 /**\brief Either copies or lexifies a key. 00898 * @param[in] rid Record ID of the record containing the key. 00899 * @param[in] in_obj This refers to the record containing the key. 00900 * @param[in] cookie Must be a pointer to a generic_CSKF_cookie, 00901 * which tells it which \ref LEXFUNC function to call 00902 * (noLEXFUNC indicates straight copy), 00903 * and also tells it the length and location (offset) 00904 * of the key. 00905 * @param[in] f A heap manager for allocating space. 00906 * @param[out] out Result is written here. 00907 * 00908 * \details 00909 * One normally expects the user to provide the entire 00910 * function for this, but we have this generic version just 00911 * for simplifying the handling of basic types for backward 00912 * compatibility. 00913 */ 00914 static w_rc_t generic_CSKF( 00915 const rid_t& rid, 00916 const object_t& in_obj, 00917 key_cookie_t cookie, // type info 00918 factory_t& f, 00919 skey_t* out 00920 ); 00921 00922 /**\brief Vacuous Marshal Object Function 00923 * \details 00924 * This function is never called; rather, the 00925 * sort code checks for 00926 * \code sort_keys_t::marshal_func() ==sort_keys_t::noMOF 00927 * \endcode and if so, bypasses any code specific to 00928 * marshalling. This is done because the preparatory work 00929 * for calling a marshal function includes allocating space for 00930 * the results, and it is cheaper to bypass it altogether. 00931 */ 00932 static w_rc_t noMOF ( 00933 const rid_t& , 00934 const object_t& , 00935 key_cookie_t , 00936 object_t* 00937 ); 00938 00939 /**\brief Vacuous Unmarshal Object Function 00940 * \details 00941 * This function is never called; rather, the 00942 * sort code checks for 00943 * \code sort_keys_t::unmarshal_func() == sort_keys_t::noMOF 00944 * \endcode and if so, bypasses any code specific to 00945 * unmarshalling. This is done because the preparatory work 00946 * for calling an unmarshal function includes allocating space for 00947 * the results, and it is cheaper to bypass it altogether. 00948 */ 00949 static w_rc_t noUMOF ( 00950 const rid_t& , 00951 const object_t& , 00952 key_cookie_t , 00953 object_t* 00954 ); 00955 00956 /* 00957 * Default comparision functions for in-buffer-pool 00958 * comparisons. No byte-swapping is done; alignment 00959 * requirements must already be met before calling these. 00960 * 00961 * Template is instantiated for {uint,int}[8421]_t. 00962 */ 00963 static int string_cmp(w_base_t::uint4_t , const void* , 00964 w_base_t::uint4_t , const void*); 00965 00966 static int uint8_cmp(w_base_t::uint4_t , const void* , 00967 w_base_t::uint4_t , const void* ); 00968 static int int8_cmp(w_base_t::uint4_t , const void* , 00969 w_base_t::uint4_t , const void* ); 00970 static int uint4_cmp(w_base_t::uint4_t , const void* , 00971 w_base_t::uint4_t , const void* ); 00972 static int int4_cmp(w_base_t::uint4_t , const void* , 00973 w_base_t::uint4_t , const void* ); 00974 static int uint2_cmp(w_base_t::uint4_t , const void* , 00975 w_base_t::uint4_t , const void* ); 00976 static int int2_cmp(w_base_t::uint4_t , const void* , 00977 w_base_t::uint4_t , const void* ); 00978 static int uint1_cmp(w_base_t::uint4_t , const void* , 00979 w_base_t::uint4_t , const void* ); 00980 static int int1_cmp(w_base_t::uint4_t , const void* , 00981 w_base_t::uint4_t , const void* ); 00982 static int f4_cmp(w_base_t::uint4_t , const void* , 00983 w_base_t::uint4_t , const void* ); 00984 static int f8_cmp(w_base_t::uint4_t , const void* , 00985 w_base_t::uint4_t , const void* ); 00986 00987 public: 00988 //@{ 00989 /** @name LEXFUNCs 00990 * LEXFUNC (q.v.) functions for fundamental types. 00991 */ 00992 static w_rc_t f8_lex(const void *source, smsize_t len, void *sink); 00993 static w_rc_t f4_lex(const void *source, smsize_t len, void *sink); 00994 static w_rc_t u8_lex(const void *source, smsize_t len, void *sink); 00995 static w_rc_t i8_lex(const void *source, smsize_t len, void *sink); 00996 static w_rc_t u4_lex(const void *source, smsize_t len, void *sink); 00997 static w_rc_t i4_lex(const void *source, smsize_t len, void *sink); 00998 static w_rc_t u2_lex(const void *source, smsize_t len, void *sink); 00999 static w_rc_t i2_lex(const void *source, smsize_t len, void *sink); 01000 static w_rc_t u1_lex(const void *source, smsize_t len, void *sink); 01001 static w_rc_t i1_lex(const void *source, smsize_t len, void *sink); 01002 //@} 01003 01004 01005 private: 01006 01007 /* Metadata about a key -- info about the key 01008 * as it appears in all objects, not per-object 01009 * key info. 01010 */ 01011 class key_meta_t { 01012 public: 01013 /* 01014 * callbacks: 01015 */ 01016 CF _cb_keycmp; // comparison 01017 CSKF _cb_keyinfo; // get location/copy/derived 01018 key_cookie_t _cookie; 01019 w_base_t::int1_t _mask; 01020 fill1 _dummy1; 01021 fill2 _dummy2; 01022 key_meta_t() : _cb_keycmp(0), _cb_keyinfo(0), 01023 _cookie(0), 01024 _mask(t_none) {} 01025 key_meta_t(const key_meta_t &old) : 01026 _cb_keycmp(old._cb_keycmp), 01027 _cb_keyinfo(old._cb_keyinfo), 01028 _cookie(old._cookie), 01029 _mask(old._mask) {} 01030 }; 01031 int _nkeys; // constructor 01032 int _spaces; // _grow 01033 key_meta_t* _meta; 01034 key_location_t* _locs; 01035 bool _stable; // set_stable, is_stable 01036 bool _for_index; // set_for_index, is_for_index 01037 bool _remove_dups; // set_unique 01038 bool _remove_dup_nulls; // set_null_unique 01039 bool _ascending; // ascending, set_ascending 01040 bool _deep_copy; // set_for_index, set_for_file 01041 bool _keep_orig_file;// set_for_index, set_for_file, set_keep_orig 01042 bool _carry_obj; // set_for_file, carry_obj, set_carry_obj 01043 01044 CSKF _cb_lexify_index_key; // set_for_index, index key lexify 01045 key_cookie_t _cb_lexify_index_key_cookie; // set_for_index 01046 01047 MOF _cb_marshal; // set_object_marshal 01048 UMOF _cb_unmarshal; // set_object_marshal 01049 key_cookie_t _cb_marshal_cookie; // set_object_marshal 01050 01051 void _grow(int i); 01052 01053 void _prep(int key); 01054 void _set_loc(int key, smsize_t off, smsize_t len) { 01055 key_location_t &t = _locs[key]; 01056 t._off = off; 01057 t._length = len; 01058 } 01059 void _set_mask(int key, 01060 bool fixed, 01061 bool aligned, 01062 bool lexico, 01063 bool in_header, 01064 CSKF gfunc, 01065 CF cfunc, 01066 key_cookie_t cookie 01067 ) { 01068 key_meta_t &t = _meta[key]; 01069 if(aligned) t._mask |= t_aligned; 01070 else t._mask &= ~t_aligned; 01071 if(fixed) t._mask |= t_fixed; 01072 else t._mask &= ~t_fixed; 01073 if(lexico) t._mask |= t_lexico; 01074 else t._mask &= ~t_lexico; 01075 if(in_header) t._mask |= sort_keys_t::t_hdr; 01076 else t._mask &= ~sort_keys_t::t_hdr; 01077 t._cb_keycmp = cfunc; 01078 t._cb_keyinfo = gfunc; 01079 w_assert3(cfunc); 01080 if( ! fixed) { 01081 w_assert3(gfunc); 01082 } 01083 t._cookie = cookie; 01084 } 01085 01086 void _copy(const sort_keys_t &old); 01087 01088 /* 01089 * mask: 01090 * t_fixed: key location is fixed for all records: no need 01091 * to call CSKF for each record 01092 * t_aligned: key location adequately aligned (relative 01093 * to the beginning of the record, which is 4-byte aligned) for 01094 * in-buffer-pool comparisons with the given comparison 01095 * function. Copy to a contiguous buffer is unnecessary iff 01096 * the entire key happens to be on one page (which is always 01097 * the case with small records). 01098 * t_lexico: Key is already in lexicographic order and 01099 * can be spread across pages, and the comparison 01100 * function (usually string-compare or byte-compare) 01101 * can be called on successive segments of the key -- 01102 * copying they key to a contiguous buffer is unnecessary. 01103 * Implies key is adequately aligned, but does not imply t_fixed. 01104 * 01105 * t_hdr: key is at offset in hdr rather than in body 01106 */ 01107 01108 enum mask { t_none = 0x0, 01109 t_fixed = 0x1, 01110 t_aligned =0x2, 01111 t_lexico=0x4, 01112 t_hdr = 0x40 // key found in hdr 01113 }; 01114 01115 public: 01116 /// Create a structure that's ready to be populated with \a nkeys keys. 01117 NORET sort_keys_t(int nkeys); 01118 01119 NORET ~sort_keys_t() { 01120 delete[] _meta; 01121 delete[] _locs; 01122 } 01123 01124 /// Copy operator. 01125 NORET sort_keys_t(const sort_keys_t &old); 01126 01127 /// Return number of keys. 01128 int nkeys() const { return _nkeys; } 01129 01130 /**\brief Output is to be suitable for index bulk-load, index key is sort 01131 * key. 01132 * 01133 * Call this if you want the output file to be written with objects 01134 * of the form 01135 * * hdr == key, body==rid 01136 * and the input file not to be destroyed. 01137 * This file is suitable for bulk-loading an index, and 01138 * the index key is the sort key. 01139 * 01140 * You \b must provide conversion functions for the sort key 01141 * to be converted to a lexicographic format string if it is not 01142 * already in such format in the original record, if the 01143 * index key (being the sort key) is to be used in a B+-Tree. 01144 * 01145 * Only one sort key is supported when sorting for index bulk-load, but 01146 * the key may be derived, and so the CSKF callback can combine 01147 * multiple keys, and lexifying them ensures that they can be 01148 * sorted as one. This is not entirely sufficient to cover all 01149 * cases of multiple keys, but it will do for many cases, particularly 01150 * where the sub-keys are of fixed length. 01151 */ 01152 int set_for_index() { 01153 _keep_orig_file = true; 01154 _deep_copy = false; 01155 _for_index = true; 01156 if(_nkeys > 1) { 01157 return 1; // BAD 01158 // we only support single-key btrees 01159 } 01160 _cb_lexify_index_key = sort_keys_t::noCSKF; 01161 _cb_lexify_index_key_cookie = key_cookie_t(0); 01162 return 0; 01163 } 01164 01165 /**\brief Output is to be suitable for index bulk-load, index key is 01166 * different from the sort key. 01167 * @param[in] lfunc Key creation/location function for the index key. 01168 * @param[in] ck Datum for \a lfunc 01169 * \details 01170 * 01171 * Only one sort key can be used. 01172 * 01173 * Call this if you want the output file 01174 * to be written with objects of the form 01175 * hdr == key, body==rid 01176 * and the input file not to be destroyed, 01177 * and you wish the index key to be different from the sort key. 01178 * 01179 * The \a lfunc argument must produce an index key 01180 * in \ref LEXICOFORMAT "lexicographic format" if the index is to 01181 * be a B+-Tree. This function is called when the record is 01182 * first encountered (reading the input file), since the record is 01183 * already pinned to gather a sort key. 01184 * 01185 * Only one sort key is supported when bulk-loading for indexes, but 01186 * the key may be derived, and so the CSKF callback can combine 01187 * multiple keys, and lexifying them ensures that they can be 01188 * sorted as one. This is not entirely sufficient to cover all 01189 * cases of multiple keys, but it will do for many cases, particularly 01190 * where the sub-keys are of fixed length. 01191 */ 01192 int set_for_index(CSKF lfunc, key_cookie_t ck) { 01193 set_for_index(); 01194 if(!lfunc) lfunc = sort_keys_t::noCSKF; 01195 _cb_lexify_index_key = lfunc; 01196 _cb_lexify_index_key_cookie = ck; 01197 return 0; 01198 } 01199 01200 /// Return true if set_for_index() was called. 01201 bool is_for_index() const { return _for_index; } 01202 01203 /// Return true if set_for_file() was called. 01204 bool is_for_file() const { return !_for_index; } 01205 01206 /// Return true if set_stable() 01207 bool is_stable() const { return _stable; } 01208 01209 /// Ensure stable sort. Cannot be used with set_for_index. 01210 void set_stable(bool val) { _stable = val; } 01211 01212 /// Return the function that creates or locates and/or lexifies the key. 01213 CSKF lexify_index_key() const { return _cb_lexify_index_key; } 01214 01215 /// Pointer to input datum for the associated lexify() CSKF. 01216 key_cookie_t lexify_index_key_cookie() const { return _cb_lexify_index_key_cookie; } 01217 01218 /**\brief Output is to be a the input records, sorted 01219 * @param[in] deepcopy Use true if you want a deep copy. 01220 * @param[in] keeporig Use true if you want to retain the input file. 01221 * @param[in] carry_obj Use true if you want to carry along the entire 01222 * objects through the scratch files and to the 01223 * output file. Used only for is_for_file(). 01224 * 01225 * Call this if you want the output file 01226 * to contain copies of the input file records, undulterated, 01227 * but in sorted order. 01228 * 01229 * Multiple keys are supported. Use of a CSKF is not needed if 01230 * the keys are embedded in the records, suitably aligned, and do 01231 * not cross page boundaries (string comparisons excepted, of course, 01232 * as string-comparison methods can be called repeatedly on successive 01233 * corresponding portions of string keys). 01234 */ 01235 int set_for_file(bool deepcopy, bool keeporig, bool carry_obj) { 01236 _for_index = false; 01237 (void) set_carry_obj(carry_obj); 01238 (void) set_deep_copy(deepcopy); 01239 return set_keep_orig(keeporig); 01240 } 01241 01242 /**\brief Ensure marshaling and unmarshaling of the objects. 01243 * @param[in] marshal MOF to be used when reading records from disk. 01244 * @param[in] unmarshal UMOF to be used to write records to disk, 01245 * @param[in] c Arguemtn to \a marshal and \a unmarshal 01246 * 01247 * Call this if the objects in the file need to be byte-swapped 01248 * or otherwise marshaled before use, and if they need to be 01249 * unmarshaled before the output file is written. 01250 * This may be used with set_for_index or set_for_file. 01251 */ 01252 int set_object_marshal( MOF marshal, UMOF unmarshal, key_cookie_t c) { 01253 _cb_marshal = marshal; 01254 _cb_unmarshal = unmarshal; 01255 _cb_marshal_cookie = c; 01256 return 0; 01257 } 01258 01259 /**\brief Return the marshal function or noMOF if none was given */ 01260 MOF marshal_func() const { return _cb_marshal; } 01261 /**\brief Return the unmarshal function or noUMOF if none was given */ 01262 UMOF unmarshal_func() const { return _cb_unmarshal; } 01263 01264 /**\brief Pointer to datum for marshal and unmarshal functions */ 01265 key_cookie_t marshal_cookie() const { return _cb_marshal_cookie; } 01266 01267 /**\brief True if sort will be in ascending order */ 01268 bool is_ascending() const { return _ascending; } 01269 01270 /**\brief Ensure that sort will be in ascending order */ 01271 int set_ascending( bool value = true) { _ascending = value; return 0; } 01272 01273 /**\brief True if duplicate keys and their records will be removed. 01274 * 01275 * When duplicates are encountered, they are sorted by record-id, 01276 * and the larger of the two (per umemcmp) is removed. 01277 * */ 01278 bool is_unique() const { return _remove_dups; } 01279 01280 /**\brief Ensure that duplicate keys and their records will be removed 01281 * */ 01282 int set_unique( bool value = true) { _remove_dups = value; return 0; } 01283 01284 /**\brief True if duplicate null keys and their records will be removed 01285 * 01286 * When duplicates are encountered, they are sorted by record-id, 01287 * and the larger of the two (per umemcmp) is removed. 01288 * */ 01289 bool null_unique() const { return _remove_dup_nulls; } 01290 01291 /**\brief Ensure that duplicate null keys and their records will be removed 01292 */ 01293 int set_null_unique( bool value = true) { _remove_dup_nulls = value; 01294 return 0; } 01295 01296 /**\brief True if the sort will copy the entire objects through each 01297 * phase. 01298 * \details 01299 * Used when is_for_file only. 01300 */ 01301 bool carry_obj() const { return _carry_obj; } 01302 01303 /**\brief Control whether the sort will copy the entire objects through each 01304 * phase. 01305 * @param[in] value If true, ensure keep_orig(). 01306 * \details 01307 * Used when is_for_file only. 01308 * This is useful if the keys are fixed and consume most of the 01309 * original objects, in which case there is no need for the 01310 * sort code to duplicate the key as well as the object 01311 * in the temporary output files, or to re-pin the original 01312 * records to copy them to the output file. 01313 */ 01314 int set_carry_obj(bool value = true) { 01315 return _carry_obj = value; 01316 return 0; 01317 } 01318 01319 /**\brief True if the sort will copy the entire objects to the 01320 * result file. 01321 */ 01322 bool deep_copy() const { return _deep_copy; } 01323 /**\brief Control whether the sort will copy the entire objects to the 01324 * result file. 01325 * @param[in] value If true, ensure deep_copy(). 01326 * \details 01327 * Used when is_for_file only. 01328 * When large objects appear in the input file and the input (original) 01329 * file is not to be kept, sort can copy only the metadata for the 01330 * large objects and reassign the large-object store to the result file. 01331 * This eliminates a lot of object creation and logging. 01332 */ 01333 int set_deep_copy(bool value = true ) { 01334 _deep_copy = value; 01335 return 0; 01336 } 01337 01338 /**\brief Return true if the sort will not destroy the input file. 01339 * \details 01340 * Used when is_for_file only. 01341 * This is turned on automatically when set_for_index. 01342 */ 01343 bool keep_orig() const { return _keep_orig_file; } 01344 /**\brief Control whether the sort will not destroy the input file. 01345 * @param[in] value If true, ensure keep_orig(). 01346 * \details 01347 * Used when is_for_file only. 01348 * This is turned on automatically when set_for_index. 01349 */ 01350 int set_keep_orig(bool value = true ) { 01351 if(value && !_deep_copy) return 1; 01352 _keep_orig_file = value; 01353 return 0; 01354 } 01355 01356 /**\brief Set attributes of key. 01357 * @param[in] keyindex The ordinal number of the index whose 01358 * attributes are to be set 01359 * @param[in] off Offset from beginning 01360 * of record header or body where the key is to be found 01361 * @param[in] len Length of key in recordd 01362 * @param[in] in_header True indicates that the key is 01363 * to be found in the record header rather than in the body. 01364 * @param[in] aligned True indicates that the key, as found in the record, 01365 * is suitably aligned for key comparisons with the 01366 * CF (key-comparison function) to be used. False means 01367 * that sort has to make an aligned copy before 01368 * doing a key comparison. 01369 * @param[in] lexico True indicates that the key, as found in the record, 01370 * is already in \ref LEXICOFORMAT "lexicographic format" 01371 * @param[in] cfunc Key comparison function to use on this key. 01372 * 01373 * \retval 0 if OK, 1 if error. 01374 * \details 01375 * You must call this or set_sortkey_derived for each of the keys. 01376 * 01377 * There must be implicit agreement between what the \a cfunc 01378 * expects and the arguments \a aligned and \a lexico. 01379 */ 01380 int set_sortkey_fixed( 01381 int keyindex, // Key number origin 0 01382 smsize_t off, // offset from beginning of hdr or body 01383 smsize_t len, // length 01384 bool in_header,// header or body 01385 bool aligned, // is aligned 01386 bool lexico, // needs lexifying 01387 CF cfunc // comparison func 01388 ); 01389 01390 /**\brief Set attributes of key. 01391 * @param[in] keyindex The ordinal number of the index whose 01392 * attributes are to be set 01393 * @param[in] gfunc Key-creation (lexify) function. 01394 * @param[in] cookie Datum for \a gfunc. 01395 * @param[in] in_header True indicates that the key is 01396 * to be found in the record header rather than in the body. 01397 * @param[in] aligned True indicates that the key, as found in the 01398 * result of \a gfunc, 01399 * is suitably aligned for key comparisons with the 01400 * CF (key-comparison function) to be used. False means 01401 * that sort has to make an aligned copy before 01402 * doing a key comparison. 01403 * @param[in] lexico True indicates that the key, as found in the result 01404 * of \a gfunc, 01405 * is in \ref LEXICOFORMAT "lexicographic format". 01406 * @param[in] cfunc Key comparison function to use on this key. 01407 * 01408 * \retval 0 if OK, 1 if error. 01409 * \details 01410 * You must call this or set_sortkey_fixed for each of the keys. 01411 * 01412 * There must be implicit agreement between what the \a cfunc 01413 * expects and the arguments \a aligned and \a lexico. 01414 */ 01415 int set_sortkey_derived( 01416 int keyindex, 01417 CSKF gfunc, 01418 key_cookie_t cookie, 01419 bool in_header, 01420 bool aligned, 01421 bool lexico, 01422 CF cfunc 01423 ); 01424 01425 /**\brief Return the key-location information for a given fixed-location key. 01426 * @param[in] i The ordinal number of the index of interest. 01427 * \details 01428 * Only for fixed-location keys. 01429 */ 01430 key_location_t& get_location(int i) { return _locs[i]; } 01431 01432 /**\brief Return the offset for a given fixed-location key. 01433 * @param[in] i The ordinal number of the index of interest. 01434 * \details 01435 * Only for fixed-location keys. 01436 */ 01437 smsize_t offset(int i) const { 01438 return _locs[i]._off; 01439 } 01440 /**\brief Return the offset for a given fixed-location key. 01441 * @param[in] i The ordinal number of the index of interest. 01442 * \details 01443 * Only for fixed-location keys. 01444 */ 01445 smsize_t length(int i) const { 01446 return _locs[i]._length; 01447 } 01448 /**\brief Return the CSKF function for a given derived key. 01449 * @param[in] i The ordinal number of the index of interest. 01450 * \details 01451 * Only for derived keys. 01452 */ 01453 CSKF keycreate(int i) const { 01454 return _meta[i]._cb_keyinfo; 01455 } 01456 01457 /**\brief Return the argument to the CSKF function for a given derived key. 01458 * @param[in] i The ordinal number of the index of interest. 01459 * \details 01460 * Only for derived keys. 01461 */ 01462 key_cookie_t cookie(int i) const { 01463 return _meta[i]._cookie; 01464 } 01465 01466 /**\brief Return the key-comparison function for a given key. 01467 * @param[in] i The ordinal number of the index of interest. 01468 * \details 01469 * For fixed-location and derived keys. 01470 */ 01471 CF keycmp(int i) const { 01472 return _meta[i]._cb_keycmp; 01473 } 01474 01475 /// Return true if key \a i is in lexicographic format in the input record 01476 bool is_lexico(int i) const { 01477 return (_meta[i]._mask & t_lexico)?true:false; 01478 } 01479 /// Return true if key \a i is in a fixed location in all input records 01480 bool is_fixed(int i) const { 01481 return (_meta[i]._mask & t_fixed)?true:false; 01482 } 01483 /// Return true if key \a i is in suitably aligned in the input record for the key-comparison function 01484 bool is_aligned(int i) const { 01485 return (_meta[i]._mask & t_aligned)?true:false; 01486 } 01487 /// True if the key or the source of a derived key is to be found in the record header 01488 bool in_hdr(int i) const { 01489 return (_meta[i]._mask & t_hdr)?true:false; 01490 } 01491 }; 01492 01493 } // end namespace ssm_sort 01494 01495 inline void 01496 ssm_sort::sort_keys_t::_grow(int i) 01497 { 01498 // realloc it to accommodate i more entries 01499 { 01500 key_location_t* tmp = new key_location_t[_spaces + i]; 01501 if(!tmp) W_FATAL(fcOUTOFMEMORY); 01502 if(_locs) { 01503 memcpy(tmp, _locs, nkeys() * sizeof(key_location_t)); 01504 delete[] _locs; 01505 } 01506 _locs = tmp; 01507 } 01508 { 01509 key_meta_t* tmp = new key_meta_t[_spaces + i]; 01510 if(!tmp) W_FATAL(fcOUTOFMEMORY); 01511 if(_meta) { 01512 memcpy(tmp, _meta, nkeys() * sizeof(key_meta_t)); 01513 delete[] _meta; 01514 } 01515 _meta = tmp; 01516 } 01517 _spaces += i; 01518 // don't change nkeys 01519 } 01520 inline NORET 01521 ssm_sort::sort_keys_t::sort_keys_t(int nkeys): 01522 _nkeys(0), 01523 _spaces(0), 01524 _meta(0), 01525 _locs(0), 01526 _stable(false), _for_index(false), 01527 _remove_dups(false), _remove_dup_nulls(false), 01528 _ascending(true), _deep_copy(false), _keep_orig_file(false), 01529 _carry_obj(false), 01530 _cb_lexify_index_key(sort_keys_t::noCSKF), 01531 _cb_lexify_index_key_cookie(0), 01532 _cb_marshal(sort_keys_t::noMOF), 01533 _cb_unmarshal(sort_keys_t::noUMOF), 01534 _cb_marshal_cookie(0) 01535 { 01536 _grow(nkeys); 01537 } 01538 01539 inline void 01540 ssm_sort::sort_keys_t::_prep(int key) 01541 { 01542 if(key >= _spaces) { 01543 _grow(5); 01544 } 01545 if(key >= _nkeys) { 01546 // NB: hazard if not all of these filled in! 01547 _nkeys = key+1; 01548 } 01549 } 01550 01551 inline void 01552 ssm_sort::sort_keys_t::_copy(const sort_keys_t &old) 01553 { 01554 _prep(old.nkeys()-1); 01555 int i; 01556 for(i=0; i< old.nkeys(); i++) { 01557 _locs[i] = old._locs[i]; 01558 _meta[i] = old._meta[i]; 01559 } 01560 w_assert3(nkeys() == old.nkeys()); 01561 w_assert3(_spaces >= old._spaces); 01562 for(i=0; i< old.nkeys(); i++) { 01563 w_assert3(_meta[i]._mask == old._meta[i]._mask); 01564 } 01565 } 01566 01567 inline NORET 01568 ssm_sort::sort_keys_t::sort_keys_t(const sort_keys_t &old) : 01569 _nkeys(0), _spaces(0), 01570 _meta(0), _locs(0) 01571 { 01572 _copy(old); 01573 } 01574 01575 inline int 01576 ssm_sort::sort_keys_t::set_sortkey_fixed( 01577 int key, 01578 smsize_t off, 01579 smsize_t len, 01580 bool in_header, 01581 bool aligned, 01582 bool lexico, 01583 CF cfunc 01584 ) 01585 { 01586 DBG(<<"set_sortkey_fixed " << key); 01587 if(is_for_index() && key > 0) { 01588 return 1; 01589 } 01590 _prep(key); 01591 _set_mask(key, true, aligned, lexico, 01592 in_header, noCSKF, cfunc, key_cookie_t::null); 01593 _set_loc(key, off, len); 01594 return 0; 01595 } 01596 01597 inline int 01598 ssm_sort::sort_keys_t::set_sortkey_derived( 01599 int key, 01600 CSKF gfunc, 01601 key_cookie_t cookie, 01602 bool in_header, 01603 bool aligned, 01604 bool lexico, 01605 CF cfunc 01606 ) 01607 { 01608 DBG(<<"set_sortkey_derived " << key); 01609 if(is_for_index() && key > 0) { 01610 return 1; 01611 } 01612 _prep(key); 01613 _set_mask(key, false, aligned, lexico, 01614 in_header, 01615 gfunc, cfunc, 01616 cookie); 01617 return 0; 01618 } 01619 01620 01621 /*<std-footer incl-file-exclusion='SORT_S_H'> -- do not edit anything below this line -- */ 01622 01623 #endif /*</std-footer>*/