sort_s.h

00001 /* -*- mode:C++; c-basic-offset:4 -*-
00002      Shore-MT -- Multi-threaded port of the SHORE storage manager
00003    
00004                        Copyright (c) 2007-2009
00005       Data Intensive Applications and Systems Labaratory (DIAS)
00006                Ecole Polytechnique Federale de Lausanne
00007    
00008                          All Rights Reserved.
00009    
00010    Permission to use, copy, modify and distribute this software and
00011    its documentation is hereby granted, provided that both the
00012    copyright notice and this permission notice appear in all copies of
00013    the software, derivative works or modified versions, and any
00014    portions thereof, and that both notices appear in supporting
00015    documentation.
00016    
00017    This code is distributed in the hope that it will be useful, but
00018    WITHOUT ANY WARRANTY; without even the implied warranty of
00019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
00020    DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
00021    RESULTING FROM THE USE OF THIS SOFTWARE.
00022 */
00023 
00024 /*<std-header orig-src='shore' incl-file-exclusion='SORT_S_H'>
00025 
00026  $Id: sort_s.h,v 1.36 2010/08/23 14:28:18 nhall Exp $
00027 
00028 SHORE -- Scalable Heterogeneous Object REpository
00029 
00030 Copyright (c) 1994-99 Computer Sciences Department, University of
00031                       Wisconsin -- Madison
00032 All Rights Reserved.
00033 
00034 Permission to use, copy, modify and distribute this software and its
00035 documentation is hereby granted, provided that both the copyright
00036 notice and this permission notice appear in all copies of the
00037 software, derivative works or modified versions, and any portions
00038 thereof, and that both notices appear in supporting documentation.
00039 
00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
00044 
00045 This software was developed with support by the Advanced Research
00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by
00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
00048 Further funding for this work was provided by DARPA through
00049 Rome Research Laboratory Contract No. F30602-97-2-0247.
00050 
00051 */
00052 
00053 #ifndef SORT_S_H
00054 #define SORT_S_H
00055 
00056 #include "w_defines.h"
00057 
00058 /*  -- do not edit anything above this line --   </std-header>*/
00059 
00060 #ifdef __GNUG__
00061 #pragma interface
00062 #endif
00063 
00064 // forward
00065 class file_p;
00066 class record_t;
00067 
00068 #define OLDSORT_COMPATIBILITY
00069 
00070 /**\brief A namespace to contain certain types used for ss_m::sort_file. */
00071 namespace ssm_sort {
00072 
00073 /**\brief Descriptor for sort key, used with sort_stream_i
00074  *
00075  * This structure was used in the original implementation
00076  * of sort_file (now deprecated).  Unfortunately, the
00077  * API for sort_stream_i has not been updated to use the
00078  * alternative key descriptor structure, sort_keys_t.
00079  */
00080 struct key_info_t {
00081     typedef sortorder::keytype key_type_t;
00082 
00083     /**\cond skip */
00084     /// for backward compatibility only: should use keytype henceforth
00085     enum dummy_t {  t_char=sortorder::kt_u1,
00086                     t_int=sortorder::kt_i4,
00087                     t_float=sortorder::kt_f4,
00088                     t_string=sortorder::kt_b, 
00089                     t_spatial=sortorder::kt_spatial};
00090     /**\endcond skip */
00091 
00092     key_type_t  type;       // key type
00093     /// For sorting on spatial keys
00094     nbox_t      universe;   
00095     /**\brief Is a derived key. 
00096      * If true, the key must be the only item in rec
00097      *  header, and the header will not be copied to
00098      *  the result record (allows the user to store derived
00099      *  key in the header temporarily, for sorting purposes).
00100      */
00101     bool        derived;    
00102 
00103     /**\enum where_t
00104      **\brief Describes where the key is found: in a record header or body.
00105      * For file sort. 
00106      *
00107      * - t_hdr  : key is in the record header
00108      * - t_body  : key is in the record body
00109      */
00110     enum where_t { t_hdr = 0, t_body };
00111     /**\brief Where the key resides: header or body */
00112     where_t              where;      
00113     /**\brief Offset from beginning of header or body where the key starts */
00114     w_base_t::uint4_t    offset;     
00115     /**\brief Length of key */
00116     w_base_t::uint4_t    len;        
00117     /**\brief Estimated record length. A hint. */
00118     w_base_t::uint4_t    est_reclen; 
00119     
00120     key_info_t() {
00121       type = sortorder::kt_i4;
00122       where = t_body;
00123       derived = false;
00124       offset = 0;
00125       len = sizeof(int);
00126       est_reclen = 0;
00127     }
00128 };
00129 
00130 /**\brief Behavioral options for sort_stream_i.
00131  *
00132  * This structure was used in the original implementation
00133  * of sort_file (now deprecated).  Unfortunately, the
00134  * API for sort_stream_i has not been updated to use the
00135  * alternative behavioral options structure, sort_keys_t.
00136  */
00137 struct sort_parm_t {
00138     /// Number of pages for each run.
00139     uint2_t run_size;        
00140     /// Volume on which scratch files will reside.
00141     vid_t   vol;        
00142     /// True -> duplicates will be removed. 
00143     bool   unique;        
00144     /// Sort in ascending order?
00145     bool   ascending;        
00146     /// Destroy the input file when done?
00147     bool   destructive;    
00148 
00149     /// Logging level of scratch files
00150     smlevel_3::sm_store_property_t property; 
00151 
00152     sort_parm_t() : run_size(10), unique(false), ascending(true),
00153             destructive(false), 
00154             property(smlevel_3::t_regular) {}
00155 };
00156 
00157 
00158 /*
00159  * For new sort:
00160  */
00161 
00162 /**\brief Input, output argument to CSKF, MOF, UMOF callbacks.
00163  * \ingroup SSMSORT
00164  * \details
00165  * Since some of the callbacks need variously an integer or a
00166  * pointer, this class
00167  * puns an integer and a void *.
00168  *
00169  * See ss_m::sort_file
00170  */
00171 class key_cookie_t {
00172     void *   c; // datum stored as a void*, punned by
00173                 // the extraction methods
00174 public:
00175     explicit key_cookie_t () : c(NULL) { }
00176 
00177     /// Create a cookie from an integer.
00178     explicit key_cookie_t (int i) {
00179         union { int _i; void *v; } _pun = {i};
00180         c = _pun.v;
00181     }
00182     /// Create a cookie from a pointer.
00183     explicit key_cookie_t (void *v):c(v) { }
00184 
00185     /// Extract integer value.
00186     int make_int() const { return operator int(); }
00187 
00188     /// Extract smsize_t value.
00189     int make_smsize_t() const { return operator smsize_t(); }
00190 
00191     /// Extract ptr value.
00192     void *make_ptr() const { return c; }
00193 
00194     static key_cookie_t   null; // newsort.cpp
00195 
00196 private:
00197     operator int () const { 
00198          union { void *v; int _i; } _pun = {c};
00199              return _pun._i;
00200     }
00201 
00202     operator smsize_t () const { 
00203          union { void *v; int _i; } _pun = {c};
00204          smsize_t t =  _pun._i & 0xffffffff;
00205 #ifdef ARCH_LP64
00206          w_assert1((_pun._i & 0xffffffff00000000) == 0);
00207 #endif
00208          return t;
00209     }
00210 };
00211 
00212 /**\brief A memory allocator (abstract base class) used by ss_m::sort_file and its adjuncts.
00213  * \ingroup SSMSORT
00214  * \details
00215  * The ssm_sort::CSKF, ssm_sort::MOF and ssm_sort::UMOF callbacks  
00216  * may need to allocate memory, in which case they 
00217  * should use the factory passed in as an argument; this
00218  * allows the storage manager to free the memory using the same
00219  * allocator.
00220  *
00221  * These allocators are used with object_t (record handles) and 
00222  * skey_t (key handles).
00223  *
00224  * The default allocators used by ss_m::sort_file are 
00225  * - factory_t::none : does not allocate or free, used for statically-
00226  *   allocated space.
00227  * - factory_t::cpp_vector : a factory that allocates byte-strings that
00228  *   start on a double-aligned boundary and frees them. 
00229  *
00230  *  Users must write their own factories that inherit from
00231  *  factory_t.
00232  */
00233 class factory_t 
00234 {
00235    /* users are meant to write their own factories 
00236     * that inherit from this
00237     */
00238 public:
00239    factory_t();
00240    virtual    NORET ~factory_t();
00241 
00242    virtual    void* allocfunc(smsize_t)=0;
00243    virtual    void freefunc(const void *, smsize_t)=0;
00244 
00245    // none: causes no delete - used for statically allocated space
00246    static factory_t*    none;
00247 
00248    // cpp_vector - simply calls delete[] 
00249    static factory_t*    cpp_vector;
00250 
00251    void freefunc(vec_t&v) {
00252     for(int i=v.count()-1; i>=0; i--) {
00253         DBG(<<"freefuncVEC(ptr=" << (void*)v.ptr(i) << " len=" << v.len(i));
00254         freefunc((void *)v.ptr(i), v.len(i));
00255     }
00256    }
00257 };
00258 
00259 /* XXX move virtual functions to a .cpp */
00260 inline factory_t::factory_t() {}
00261 inline NORET factory_t::~factory_t() {}
00262 
00263 /**\brief Descriptor for fixed-location keys
00264  * \ingroup SSMSORT
00265  * \details
00266  * A sort_keys_t contains an instance of this for each key.
00267  */
00268 class key_location_t 
00269 {
00270 public:
00271     /// Key is in record header
00272     bool         _in_hdr;
00273     /// Offset from start of header/body of start of key
00274     smsize_t     _off;
00275     /// Length of key
00276     smsize_t     _length;
00277 
00278     key_location_t() : _in_hdr(false), _off(0), _length(0)  {}
00279 
00280     key_location_t(const key_location_t &old) : 
00281         _in_hdr(old._in_hdr), 
00282         _off(old._off), _length(old._length) {}
00283 
00284     /// Key is in record header
00285     bool is_in_hdr() const { return _in_hdr; }
00286 };
00287 
00288 class skey_t; // forward
00289 
00290 /**\brief Handle on a record in the buffer pool or in scratch memory.
00291  * \ingroup SSMSORT
00292  * \details
00293  * The ss_m::sort_file function calls a variety of user-defined 
00294  * callback functions to process records in the buffer pool
00295  * or in-memory copies of records and to produce more of the same. 
00296  *
00297  * This class provides a generic handle to reference the records or copies of 
00298  * records, since the callbacks need only to
00299  * - know the header length
00300  * - find the data at an offset in the header
00301  * - know the body length
00302  * - find the data at an offset in the body
00303  * - copy a generic handle
00304  * - copy out a portion of the header or body to a buffer
00305  * - create a new generic handle from a pair of buffer
00306  *
00307  * An object_t created from buffers must also know how the buffers were
00308  * allocated so it can free the buffers to the same heap, so it keeps
00309  * a reference to the factory_t used by the callback function
00310  * to create the buffers. 
00311  */
00312 class object_t : public smlevel_top 
00313 {
00314     friend class skey_t;
00315     static const object_t& none;
00316 protected:
00317     void        _construct(file_p&, slotid_t);
00318     void        _construct(
00319               const void *hdr, smsize_t hdrlen, factory_t *,
00320               const void *body, smsize_t bodylen, factory_t *);
00321 
00322     void      _replace(const object_t&); // copy over
00323     NORET     object_t();
00324 public:
00325     NORET     object_t(const object_t&o) 
00326                :
00327                _valid(false),
00328                _in_bp(false),
00329                _rec(0),
00330                _fp(0),
00331                _hdrfact(factory_t::none),
00332                _hdrlen(0),
00333                _hdrbuf(0),
00334                _bodyfact(factory_t::none),
00335                _bodylen(0),
00336                _bodybuf(0)
00337             { _replace(o); }
00338 
00339     NORET     object_t(
00340               const void *hdr, 
00341               smsize_t hdrlen, 
00342               factory_t& hf,
00343               const void *body, 
00344               smsize_t bodylen, 
00345               factory_t& bf) 
00346                :
00347                _valid(false),
00348                _in_bp(false),
00349                _rec(0),
00350                _fp(0),
00351                _hdrfact(&hf),
00352                _hdrlen(hdrlen),
00353                _hdrbuf(hdr),
00354                _bodyfact(&bf),
00355                _bodylen(bodylen),
00356                _bodybuf(body)
00357                { }
00358 
00359     NORET     ~object_t();
00360 
00361     bool        is_valid() const  { return _valid; }
00362     bool        is_in_buffer_pool() const { return is_valid() && _in_bp; }
00363     smsize_t    hdr_size() const { return _hdrlen; }
00364     smsize_t    body_size() const { return _bodylen; }
00365 
00366     const void *hdr(smsize_t offset) const; 
00367     const void *body(smsize_t offset) const;
00368     void        freespace();
00369     void        assert_nobuffers() const;
00370     smsize_t    contig_body_size() const; // pinned amt
00371 
00372     w_rc_t      copy_out(
00373                     bool in_hdr, 
00374                     smsize_t offset, 
00375                     smsize_t length, 
00376                     vec_t&dest) const;
00377 
00378 
00379 private:      // data
00380     bool        _valid;
00381     bool        _in_bp; // in buffer pool or in memory
00382 
00383     // for in_buffer_pool:
00384     record_t*    _rec;
00385     file_p*      _fp;
00386 
00387     // for in_memory:
00388     factory_t*    _hdrfact;
00389     smsize_t      _hdrlen;
00390     const void*   _hdrbuf;
00391 
00392     factory_t*    _bodyfact;
00393     smsize_t      _bodylen;
00394     const void*   _bodybuf;
00395 
00396 protected:
00397     int     _save_pin_count;
00398         // for callback
00399     void      _callback_prologue() const {
00400 #        if W_DEBUG_LEVEL > 2
00401             /*
00402              * leaving SM
00403              */
00404             // escape const-ness of the method
00405             int *save_pin_count = (int *)&_save_pin_count;
00406             *save_pin_count = me()->pin_count();
00407             w_assert3(_save_pin_count == 0);
00408             me()->check_pin_count(0);
00409             me()->in_sm(false);
00410 #        endif 
00411         }
00412     void      _callback_epilogue() const {
00413 #        if W_DEBUG_LEVEL > 2
00414             /*
00415              * re-entering SM
00416              */
00417             me()->check_actual_pin_count(_save_pin_count);
00418             me()->in_sm(true);
00419 #        endif 
00420         }
00421     void    _invalidate() {
00422                _valid=false;
00423                _in_bp=false;
00424                _rec = 0;
00425                _fp=0;
00426                _hdrfact=factory_t::none;
00427                _hdrlen=0;
00428                _hdrbuf=0;
00429                _bodyfact=factory_t::none;
00430                _bodylen=0;
00431                _bodybuf=0;
00432                _save_pin_count=0;
00433             }
00434 };
00435 
00436 class run_mgr; // forward
00437 
00438 /**\brief The result of a CSKF function.  
00439  * \ingroup SSMSORT
00440  * \details
00441  * This is a handle on a key.  
00442  * The key might reside in scratch memory or in the buffer pool.
00443  * This class provides a generic API for using such a key.
00444  *
00445  * The attributes are:
00446  * - pointer to start of key (method ptr())
00447  * - length of key (method size())
00448  * - length of contiguous portion of key (method contig_length())
00449  * - location (in buffer pool or scratch memory) (method is_in_obj())
00450  * - sub-location (in buffer pool: in record header or body) (method is_in_hdr())
00451  * - the heap allocator (factory_t) used to allocate buffers if this
00452  *   refers to a key in scratch memory.
00453  *
00454  *   Generally only the constructor is used by a CSKF callback function,
00455  *   and the idiom used (indeed, the only way it can 
00456  *   be populated) is placement new as follows:
00457  *   \code
00458 using namespace ssm_sort;
00459 w_rc_t 
00460 exampleCSKF(
00461     const rid_t&         rid,
00462     const object_t&      obj,
00463     key_cookie_t         ,  
00464     factory_t&           f,
00465     skey_t*              out
00466 )
00467 {
00468     {
00469     // this shows how to create an skey_t from the entire header of the
00470     // incoming object_t, whether the object is in the buffer pool or
00471     // in scratch memory:
00472     smsize_t length = obj.hdr_size();
00473     smsize_t offset = 0;
00474     bool     in_hdr = true;
00475     new(out) skey_t(obj, offset, length, in_hdr);
00476     }
00477 
00478     {
00479     // this shows how to create an skey_t from the last 3 bytes of the
00480     // body of the incoming object_t, whether the 
00481     // object is in the buffer pool or in scratch memory:
00482     smsize_t length = 3;
00483     smsize_t offset = obj.body_size() - length;
00484     bool     in_hdr = false;
00485     new(out) skey_t(obj, offset, length, in_hdr);
00486     }
00487 
00488     {
00489     // this shows how to create an skey_t derived from the record id
00490     int val = rid.pid.page;
00491     smsize_t length = sizeof(val);
00492     smsize_t offset = 0;
00493     void *buf = f.allocfunc(length);
00494     memcpy(&buf, &val, sizeof(val));
00495     new(out) skey_t(buf, offset, length, f);
00496     }
00497 
00498     return RCOK;
00499 }
00500 \endcode
00501  */
00502 class skey_t 
00503 {
00504     // To let run_mgr construct empty skey_t
00505     friend class ssm_sort::run_mgr;
00506 public:
00507     /**\brief Construct from a key in the buffer pool
00508      * @param[in]  o  structure describing the key location
00509      * @param[in]  offset  offset from record header or body where key starts
00510      * @param[in]  len  length of entire key
00511      * @param[in]  in_hdr  true if key is in record header, false if in body
00512      */
00513     NORET skey_t(
00514         const object_t& o, 
00515         smsize_t offset,
00516         smsize_t len, 
00517         bool in_hdr) 
00518 
00519         : _valid(true), _in_obj(true),
00520         _length(len),
00521         _obj(&o), _offset(offset),
00522         _in_hdr(in_hdr), 
00523         _fact(factory_t::none), _buf(0)
00524         {
00525         }
00526     /**\brief Construct from a key in scratch memory
00527      * @param[in]  buf  scratch memory buffer
00528      * @param[in]  offset  offset from buf where key starts
00529      * @param[in]  len  length of entire key
00530      * @param[in]  f  factory used to allocate the buffer
00531      */
00532     NORET skey_t(
00533         void *buf, 
00534         smsize_t offset,
00535         smsize_t len,  // KEY len, NOT NECESSARILY BUF len
00536         factory_t& f
00537         ) 
00538         : _valid(true), _in_obj(false),
00539         _length(len),
00540         _obj(&object_t::none),
00541         _offset(offset),
00542         _in_hdr(false), 
00543         _fact(&f), _buf(buf)
00544         {
00545         }
00546 
00547     NORET   ~skey_t() {}
00548 
00549     /// Length of entire key
00550     smsize_t  size() const     { return _length; }
00551 
00552     /// True if this structure is populated (points to a key)
00553     bool      is_valid() const { return _valid; }
00554 
00555     /// True if key is in the buffer pool
00556     bool      is_in_obj() const { return is_valid() && _in_obj; }
00557 
00558     /**\brief True unless this key is for an object other than \a o
00559      *
00560      * Used for assertions in sort code.
00561      */
00562     bool      consistent_with_object(const object_t&o ) const { 
00563                                  return ((_obj == &o) || !_in_obj); }
00564 
00565     /// True if key is in buffer pool and is in record header
00566     bool      is_in_hdr() const { return is_in_obj() && _in_hdr; }
00567 
00568     /// Pointer into byte at \a offset in key
00569     const void *ptr(smsize_t offset) const;  // key
00570 
00571     /// Using its factory, free space allocated for the in-scratch-memory buffer
00572     void      freespace();
00573 
00574     /// Asserts that no heap memory is held by this or its object_t.
00575     void      assert_nobuffers()const;
00576 
00577     /// Pinned amount of the key
00578     smsize_t  contig_length() const; 
00579 
00580     /// Copies key into vector (which must have pre-allocated space)
00581     w_rc_t    copy_out(vec_t&dest) const;
00582 
00583 private:
00584     bool        _valid; 
00585     bool        _in_obj; // else in mem
00586     smsize_t    _length;
00587 protected:
00588     // for in_obj case;
00589     const object_t*    _obj;     
00590     smsize_t    _offset; // into buf or object of start of key
00591 private:
00592     bool        _in_hdr;
00593     // for !in_obj but valid
00594     factory_t*    _fact;
00595     const void*    _buf;   // buf to be deallocated -- key
00596                 // might not start at offset 0
00597 protected:
00598     void      _invalidate();
00599     NORET     skey_t() : 
00600                 _valid(false), _in_obj(false), _length(0),
00601                 _obj(&object_t::none),
00602                 _offset(0), _in_hdr(false),
00603                 _fact(factory_t::none), _buf(0)
00604                 { }
00605     void    _construct(
00606                const object_t *o, smsize_t off, smsize_t l, bool h) {
00607                 _valid = true;
00608                 _in_obj = true;
00609                 _obj = o;
00610                 _offset = off;
00611                 _length = l;
00612                 _in_hdr = h;
00613                 _fact = factory_t::none;
00614                 }
00615     void      _construct(
00616             const void *buf, smsize_t off, smsize_t l, factory_t* f) {
00617                 _valid = true;
00618                 _obj = 0;
00619                 _in_obj = false;
00620                 _offset = off;
00621                 _length = l;
00622                 _fact = f;
00623                 _buf = buf;
00624                 }
00625     void    _replace(const skey_t&); // copy over
00626     void    _replace_relative_to_obj(const object_t &, const skey_t&); // copy over
00627 }; // skey_t
00628 
00629 
00630  /** 
00631  **\typedef  CSKF
00632  * \ingroup SSMSORT
00633  *\brief Create-Sort-Key Function.
00634  * @param[in] rid   Record ID of the record containing the key.
00635  * @param[in] in_obj  Handle (object_t) on the record whose key we need. 
00636  * @param[in] cookie  Describes the location and type of key in the source
00637  * record.
00638  * @param[in] f  Class for managing allocated space.
00639  * @param[out] out   Result is written to this object_t.  
00640  * \details
00641  *
00642  * This type of callback function fills in the skey_t \a out for
00643  *  a key in an object (record).  It is called only when the object 
00644  *  is first encountered in its run in a sort.
00645  *
00646  *  A set of predefined callbacks are provided, q.v.:
00647  *  - sort_keys_t::noCSKF  : vacuous - does nothing
00648  *  - sort_keys_t::generic_CSKF :  copies or lexifies a key based on its
00649  *      key_cookie_t argument.
00650  *
00651  *  See generic_CSKF_cookie for an example of a simple user-defined CSKF.
00652  *
00653  *  The skey_t \a out is pre-allocated and must be populated by 
00654  *  this callback function.
00655  *  The factory_t \a f may be used for this allocation, or a user-defined
00656  *  factory_t may be used instead. Whatever factory is used to allocate
00657  *  the buffer to hold a key, that same factory must be placed in
00658  *  the skey_t via the placement-new constructor call. Examples are
00659  *  given in the description for skey_t.
00660  *
00661  *  This callback must not free any space for \a in_obj.
00662  */  
00663 typedef w_rc_t (*CSKF)(
00664     const rid_t&        rid,
00665     const object_t&     in_obj,
00666     key_cookie_t        cookie,  // type info
00667     factory_t&          f,
00668     skey_t*             out
00669 );
00670 
00671 /**\typedef MOF
00672  * \ingroup SSMSORT
00673  *\brief  Marshal Object Function 
00674  * @param[in] rid  Record id of the source record to marshal
00675  * @param[in] obj_in Handle on the source record (object_t)
00676  * @param[in] cookie Describes location and type of key in the source record
00677  * @param[out] obj_out Result is to be written to this object_t.  
00678  *\details
00679  * The \a obj_out is already allocated; this function must populate it.
00680  * The \a obj_out has no buffers or factory associated with it. The
00681  * MOF must populate the \a obj_out from its own factory (it may use
00682  * factory_t::cpp_vector, which uses the global heap, or it may 
00683  * use the factory from the \a obj_in, but ONLY if that factory is
00684  * \b not factory_t::none.
00685  * The \a obj_out
00686  * object carries its factory along with it throughout the sort process.
00687  *
00688  * This callback must not free any space for \a obj_in.
00689  *
00690  *  This type of callback is used 
00691  *  - when a record is first encountered in the input file
00692  *  - if sort_keys_t::carry_obj() is true,
00693  *  it's also called when the object is read back
00694  *  from a temporary file.
00695  *
00696  *  A predefined callback is provided: 
00697  *  - ssm_sort::sort_keys_t::noMOF  : vacuous - does nothing
00698  *
00699  *  The sort code checks for 
00700  *  \code
00701  sort_keys_t::marshal_func() == sort_keys_t::noMOF
00702  \endcode
00703  * in which case, it does not call a marshal function at all. (This
00704  * is less work than allocating the output object_t to call a
00705  * vacuous function.)
00706  */
00707 typedef w_rc_t (*MOF)(
00708     const rid_t&       rid,  // record id
00709     const object_t&    obj_in,
00710     key_cookie_t       cookie,  // type info
00711                 // func must allocate obj_out,
00712     object_t*         obj_out     // SM allocates, MOF initializes, SM
00713                 // frees its buffers
00714     );
00715 
00716 /**\typedef UMOF
00717  * \ingroup SSMSORT
00718  * \brief Un-marshal Object Function 
00719  * @param[in] rid  Record id of the source record to marshal
00720  * @param[in] obj_in Handle on the source record (object_t)
00721  * @param[in] cookie Describes location and type of key in the destination 
00722  * record
00723  * @param[out] obj_out Result is to be written to this object_t.  
00724  *
00725  * \details
00726  *
00727  * The \a obj_out is already allocated; this function must populate it.
00728  * The \a obj_out has no buffers or factory associated with it. The
00729  * MOF must populate the \a obj_out from its own factory (it may use
00730  * factory_t::cpp_vector, which uses the global heap, or it may 
00731  * use the factory from the \a obj_in, but ONLY if that factory is
00732  * \b not factory_t::none.
00733  * The \a obj_out
00734  * object carries its factory along with it throughout the sort process.
00735  *
00736  * This callback must not free any space for \a obj_in.
00737  *
00738  *  This type of callback is used 
00739  *  - when an object is written to a temp file
00740  *  - when the final object is written to the result file (if the
00741  *  output is a copy of the input file).
00742  *
00743  *  A predefined callback is provided: 
00744  *  - ssm_sort::sort_keys_t::noUMOF  : vacuous - does nothing
00745  *
00746  *  The sort code checks for 
00747  *  \code
00748  sort_keys_t::unmarshal_func() == sort_keys_t::noUMOF
00749  \endcode
00750  * in which case, it does not call a marshal function at all. (This
00751  * is less work than allocating the output object_t to call a
00752  * vacuous function.)
00753  *
00754  */
00755 typedef w_rc_t (*UMOF)(
00756     const rid_t&       rid,  // orig record id of object in buffer
00757     const object_t&    obj_in,
00758     key_cookie_t       cookie,  // type info
00759     object_t*        obj_out // SM allocates, UMOF initializes,
00760             // SM will copy to disk, free mem
00761     );
00762 
00763 /**\typedef CF
00764  * \ingroup SSMSORT
00765  * \brief key Comparison Function
00766  * @param[in] length1  Length of first key in bytes
00767  * @param[in] key1    Pointer to start of first key
00768  * @param[in] length2 Length of second key in bytes
00769  * @param[in] key2    Pointer to start of second key
00770  * \retval int Negative if key1 < key2, 0 if equal, Positive if key1 > key2
00771  * \details
00772  * Used on every key comparison.  
00773  * \note It is up to the server to determine if the keys are properly
00774  * aligned for comparison as fundamental types. Alignment is determined by
00775  * the combined behavior of the CSKF and MOF callbacks, as well as of
00776  * the location of the keys in the original records.
00777  */
00778 typedef int (*CF) (
00779     w_base_t::uint4_t length1, 
00780     const void *      key1, 
00781     w_base_t::uint4_t length2, 
00782     const void *      key2
00783 );
00784 
00785 /**\typedef LEXFUNC
00786  * \ingroup SSMSORT
00787  * \brief Lexify key function
00788  * @param[in] source    Pointer to start of key
00789  * @param[in] len    Length of key
00790  * @param[out] sink    Pointer to output buffer (preallocated, of length len)
00791  * \details
00792  *
00793  * This type of function is called by the generic_CSKF. It may be useful
00794  * for user-defined CKSF callbacks.  Its purpose is to reformat keys 
00795  * into lexicographic form so that simple string-type (byte-by-byte)
00796  * comparisons yield the same results as typed comparisons would.
00797  * An alternative to lexifying keys and using string comparisons is to
00798  * use typed comparisons, however, that has its disadvantages, particularly
00799  * for keys that might be spread across page boundaries or are not aligned.
00800  */
00801 typedef w_rc_t (*LEXFUNC) (const void *source, smsize_t len, void *sink);
00802 
00803 /**\brief A cookie passed to generic_CSKF callback must point to one of these.
00804  * \ingroup SSMSORT
00805  * \details
00806  * This struct may be used by user-defined CSKF callback functions.
00807  * For example:
00808     \code
00809     w_rc_t 
00810     example_stringCSKF(
00811         const rid_t&        ,  // not used
00812         const object_t&     obj_in,
00813         key_cookie_t        cookie,  // generic_CSKF_cookie
00814         factory_t&          , // not used
00815         skey_t*             out
00816     )
00817     {
00818         // Cast the cookie to a pointer to a generic_CSKF_cookie.
00819         generic_CSKF_cookie&  K = *(generic_CSKF_cookie*)(cookie.make_ptr());
00820         bool is_in_header = false;
00821 
00822         // Populate the output : point to key in body of record/object 
00823         new(out) skey_t(obj_in, K.offset, K.length, is_in_header);
00824         return RCOK;
00825     }
00826     \endcode
00827  */
00828 struct generic_CSKF_cookie 
00829 {
00830     /// value == noLEXFUNC means don't lexify
00831     LEXFUNC    func; 
00832     /// True if the key is in the record header, false if in body.
00833     bool       in_hdr;
00834     /// Offset from start of record header or body.
00835     smsize_t   offset;
00836     /// Key length.
00837     smsize_t   length;
00838 
00839     void clear() { length = 0; offset = 0; in_hdr = false; func = NULL ; }
00840 };
00841 
00842 /**\brief Parameter to control behavior of sort_file. 
00843  * \ingroup SSMSORT
00844  * \details
00845  * This class determines how the sort will behave, and it holds descriptions
00846  * of the keys to be used for a sort.  
00847  * For details of the effect on a sort, see \ref SORTOUTPUT.
00848  * For details about sort keys, see \ref SORTKEYS.
00849  *
00850  * He who creates this data structure determines
00851  * what is "most significant" key, next-most significant key, etc.
00852  * Key 0 gets compared first, 1 next, and so on.
00853  *
00854  * For related types, see the ssm_sort namespace.
00855  */
00856 class sort_keys_t 
00857 {
00858 public:
00859     typedef ssm_sort::LEXFUNC LEXFUNC;
00860  
00861     /**\brief Lexify callback function that does a simple memory copy 
00862      * @param[in] source    Pointer to start of key
00863      * @param[in] len    Length of key
00864      * @param[out] sink    Pointer to output buffer
00865      * 
00866      * \details
00867      * Does no reformatting; simply copies from source to sink.
00868      */
00869     static w_rc_t noLEXFUNC (const void *source, smsize_t len, void *sink);
00870 
00871     /**\brief Vacuous callback function, does nothing.
00872      * @param[in] rid   Ignored.
00873      * @param[in] obj  Ignored.
00874      * @param[in] cookie  Ignored.
00875      * @param[in] f  Ignored.
00876      * @param[out] out   Ignored.
00877      * \details
00878      * This function should never be used.  It is a default value.
00879      * The sort_file checks for
00880      * \code sort_keys_t::lexify() == sort_keys_t::noCSKF 
00881      * \endcode
00882      * and if so, bypasses any code connected with key creation, 
00883      * using the object_t it would have passed in to this function
00884      * as if it were the output of this function.
00885      * This comparison and bypass is faster than executing the
00886      * prologue and epilogue code to acquire space and release it,
00887      * needed when a CSKF is called.
00888      */
00889     static w_rc_t noCSKF(
00890         const rid_t&       rid,
00891         const object_t&    obj,
00892         key_cookie_t       cookie,  // type info
00893         factory_t&         f,
00894         skey_t*            out
00895     );
00896 
00897     /**\brief Either copies or lexifies a key.
00898      * @param[in] rid   Record ID of the record containing the key.
00899      * @param[in] in_obj  This refers to the record containing the key.
00900      * @param[in] cookie  Must be a pointer to a generic_CSKF_cookie,
00901      *                 which tells it which \ref LEXFUNC function to call 
00902      *                 (noLEXFUNC indicates straight copy),
00903      *                 and also tells it the length and location (offset) 
00904      *                 of the key.
00905      * @param[in] f    A heap manager for allocating space.
00906      * @param[out] out   Result is written here.
00907      *
00908      * \details
00909      * One normally expects the user to provide the entire
00910      * function for this, but we have this generic version just
00911      * for simplifying the handling of basic types for backward
00912      * compatibility.
00913      */
00914     static w_rc_t generic_CSKF(
00915         const rid_t&     rid,
00916         const object_t&  in_obj,
00917         key_cookie_t     cookie,  // type info
00918         factory_t&       f,
00919         skey_t*          out
00920     );
00921 
00922     /**\brief Vacuous Marshal Object Function
00923      * \details
00924      * This function is never called; rather, the
00925      * sort code checks for 
00926      * \code sort_keys_t::marshal_func() ==sort_keys_t::noMOF 
00927      * \endcode and if so, bypasses any code specific to
00928      * marshalling. This is done because the preparatory work
00929      * for calling a marshal function includes allocating space for
00930      * the results, and it is cheaper to bypass it altogether.
00931      */
00932     static w_rc_t noMOF (
00933         const rid_t&     ,  
00934         const object_t&  ,
00935         key_cookie_t     ,  
00936         object_t*    
00937     );
00938 
00939     /**\brief Vacuous Unmarshal Object Function
00940      * \details
00941      * This function is never called; rather, the
00942      * sort code checks for 
00943      * \code sort_keys_t::unmarshal_func() == sort_keys_t::noMOF 
00944      * \endcode and if so, bypasses any code specific to
00945      * unmarshalling. This is done because the preparatory work
00946      * for calling an unmarshal function includes allocating space for
00947      * the results, and it is cheaper to bypass it altogether.
00948      */
00949     static w_rc_t noUMOF (
00950         const rid_t&     ,  
00951         const object_t&  ,
00952         key_cookie_t     ,  
00953         object_t*    
00954     );
00955 
00956     /*
00957      * Default comparision functions for in-buffer-pool
00958      * comparisons.  No byte-swapping is done; alignment
00959      * requirements must already be met before calling these.
00960      *
00961      * Template is instantiated for {uint,int}[8421]_t.
00962      */
00963     static int string_cmp(w_base_t::uint4_t , const void* , 
00964             w_base_t::uint4_t , const void*);
00965 
00966     static int uint8_cmp(w_base_t::uint4_t , const void* , 
00967             w_base_t::uint4_t , const void* );
00968     static int int8_cmp(w_base_t::uint4_t , const void* , 
00969             w_base_t::uint4_t , const void* );
00970     static int uint4_cmp(w_base_t::uint4_t , const void* , 
00971             w_base_t::uint4_t , const void* );
00972     static int int4_cmp(w_base_t::uint4_t , const void* , 
00973             w_base_t::uint4_t , const void* );
00974     static int uint2_cmp(w_base_t::uint4_t , const void* , 
00975             w_base_t::uint4_t , const void* );
00976     static int int2_cmp(w_base_t::uint4_t , const void* , 
00977             w_base_t::uint4_t , const void* );
00978     static int uint1_cmp(w_base_t::uint4_t , const void* , 
00979             w_base_t::uint4_t , const void* );
00980     static int int1_cmp(w_base_t::uint4_t , const void* , 
00981             w_base_t::uint4_t , const void* );
00982     static int f4_cmp(w_base_t::uint4_t , const void* , 
00983             w_base_t::uint4_t , const void* );
00984     static int f8_cmp(w_base_t::uint4_t , const void* , 
00985             w_base_t::uint4_t , const void* );
00986 
00987 public:
00988     //@{
00989     /** @name LEXFUNCs
00990      * LEXFUNC (q.v.) functions for fundamental types.
00991      */
00992     static w_rc_t f8_lex(const void *source, smsize_t len, void *sink);
00993     static w_rc_t f4_lex(const void *source, smsize_t len, void *sink);
00994     static w_rc_t u8_lex(const void *source, smsize_t len, void *sink);
00995     static w_rc_t i8_lex(const void *source, smsize_t len, void *sink);
00996     static w_rc_t u4_lex(const void *source, smsize_t len, void *sink);
00997     static w_rc_t i4_lex(const void *source, smsize_t len, void *sink);
00998     static w_rc_t u2_lex(const void *source, smsize_t len, void *sink);
00999     static w_rc_t i2_lex(const void *source, smsize_t len, void *sink);
01000     static w_rc_t u1_lex(const void *source, smsize_t len, void *sink);
01001     static w_rc_t i1_lex(const void *source, smsize_t len, void *sink);
01002     //@}
01003 
01004 
01005 private:
01006 
01007      /* Metadata about a key -- info about the key
01008      *     as it appears in all objects, not per-object
01009      *      key info.
01010      */
01011     class key_meta_t {
01012         public:
01013         /* 
01014          * callbacks:
01015          */ 
01016         CF              _cb_keycmp;  // comparison
01017         CSKF            _cb_keyinfo; // get location/copy/derived
01018         key_cookie_t    _cookie;
01019         w_base_t::int1_t _mask;
01020         fill1            _dummy1;
01021         fill2            _dummy2;
01022         key_meta_t() : _cb_keycmp(0), _cb_keyinfo(0), 
01023             _cookie(0),
01024             _mask(t_none) {}
01025         key_meta_t(const key_meta_t &old) : 
01026             _cb_keycmp(old._cb_keycmp), 
01027             _cb_keyinfo(old._cb_keyinfo),
01028             _cookie(old._cookie),
01029             _mask(old._mask) {}
01030     };
01031     int        _nkeys;        // constructor 
01032     int        _spaces;    // _grow
01033     key_meta_t*     _meta;
01034     key_location_t* _locs;
01035     bool    _stable;    // set_stable, is_stable
01036     bool    _for_index;    // set_for_index, is_for_index
01037     bool    _remove_dups;    // set_unique
01038     bool    _remove_dup_nulls; // set_null_unique
01039     bool    _ascending;    // ascending, set_ascending
01040     bool    _deep_copy;    // set_for_index, set_for_file
01041     bool    _keep_orig_file;// set_for_index, set_for_file, set_keep_orig
01042     bool    _carry_obj;    // set_for_file, carry_obj, set_carry_obj
01043 
01044     CSKF    _cb_lexify_index_key;    // set_for_index, index key lexify
01045     key_cookie_t _cb_lexify_index_key_cookie;    // set_for_index
01046 
01047     MOF        _cb_marshal;    // set_object_marshal
01048     UMOF    _cb_unmarshal;    // set_object_marshal
01049     key_cookie_t _cb_marshal_cookie;    // set_object_marshal
01050 
01051     void _grow(int i);
01052 
01053     void _prep(int key);
01054     void _set_loc(int key, smsize_t off, smsize_t len) {
01055         key_location_t &t = _locs[key];
01056         t._off = off;
01057         t._length = len;
01058     }
01059     void _set_mask(int key,
01060         bool fixed, 
01061         bool aligned, 
01062         bool lexico, 
01063         bool in_header,
01064         CSKF gfunc,
01065         CF   cfunc,
01066         key_cookie_t cookie
01067     ) {
01068         key_meta_t &t = _meta[key];
01069         if(aligned) t._mask |= t_aligned;
01070         else t._mask &= ~t_aligned;
01071         if(fixed) t._mask |= t_fixed;
01072         else t._mask &= ~t_fixed;
01073         if(lexico) t._mask |= t_lexico;
01074         else t._mask &= ~t_lexico;
01075         if(in_header) t._mask |= sort_keys_t::t_hdr;
01076         else t._mask &= ~sort_keys_t::t_hdr;
01077         t._cb_keycmp = cfunc;
01078         t._cb_keyinfo = gfunc;
01079         w_assert3(cfunc);
01080         if( ! fixed) {
01081             w_assert3(gfunc);
01082         }
01083         t._cookie = cookie;
01084     }
01085 
01086     void _copy(const sort_keys_t &old);
01087 
01088     /*
01089      * mask:
01090      * t_fixed: key location is fixed for all records: no need
01091      *    to call CSKF for each record 
01092      * t_aligned: key location adequately aligned (relative
01093      *    to the beginning of the record, which is 4-byte aligned) for
01094      *    in-buffer-pool comparisons with the given comparison
01095      *    function.  Copy to a contiguous buffer is unnecessary iff
01096      *    the entire key happens to be on one page (which is always
01097      *    the case with small records). 
01098      * t_lexico: Key is already in lexicographic order and
01099      *    can be spread across pages, and the comparison 
01100      *    function (usually string-compare or byte-compare) 
01101      *    can be called on successive segments of the key -- 
01102      *    copying they key to a contiguous buffer is unnecessary.
01103      *    Implies key is adequately aligned, but does not imply t_fixed.
01104      *
01105      *  t_hdr: key is at offset in hdr rather than in body 
01106      */
01107 
01108     enum mask { t_none = 0x0, 
01109         t_fixed = 0x1, 
01110         t_aligned =0x2, 
01111         t_lexico=0x4,
01112         t_hdr = 0x40  // key found in hdr
01113         };
01114 
01115 public:
01116     /// Create a structure that's ready to be populated with \a nkeys keys.
01117     NORET sort_keys_t(int nkeys);
01118 
01119     NORET ~sort_keys_t() {
01120         delete[] _meta;
01121         delete[] _locs;
01122     }
01123 
01124     /// Copy operator.
01125     NORET sort_keys_t(const sort_keys_t &old);
01126 
01127     /// Return number of keys.
01128     int     nkeys() const { return _nkeys; }
01129 
01130     /**\brief Output is to be suitable for index bulk-load, index key is sort
01131      * key.
01132      *
01133      * Call this if you want the output file to be written with objects 
01134      * of the form 
01135      * * hdr == key, body==rid
01136      * and the input file not to be destroyed.
01137      * This file is suitable for bulk-loading an index, and
01138      * the index key is the sort key.
01139      *
01140      * You \b must provide conversion functions for the sort key
01141      * to be converted to a lexicographic format string if it is not
01142      * already in such format in the original record, if the
01143      * index key (being the sort key) is to be used in a B+-Tree.
01144      *
01145      * Only one sort key is supported when sorting for index bulk-load, but
01146      * the key may be derived, and so the CSKF callback can combine
01147      * multiple keys, and lexifying them ensures that they can be
01148      * sorted as one.  This is not entirely sufficient to cover all
01149      * cases of multiple keys, but it will do for many cases, particularly
01150      * where the sub-keys are of fixed length.
01151      */
01152     int  set_for_index() {
01153             _keep_orig_file = true;
01154             _deep_copy = false;
01155             _for_index = true; 
01156             if(_nkeys > 1) {
01157                 return 1; // BAD 
01158                 // we only support single-key btrees
01159             }
01160             _cb_lexify_index_key = sort_keys_t::noCSKF;
01161             _cb_lexify_index_key_cookie = key_cookie_t(0);
01162             return 0;
01163         }
01164 
01165     /**\brief Output is to be suitable for index bulk-load, index key is
01166      * different from the sort key.
01167      * @param[in] lfunc Key creation/location function for the index key.
01168      * @param[in] ck Datum for \a lfunc
01169      * \details
01170      *
01171      * Only one sort key can be used.
01172      *
01173      * Call this if you want the output file
01174      * to be written with objects of the form 
01175      * hdr == key, body==rid
01176      * and the input file not to be destroyed,
01177      * and you wish the index key to be different from the sort key.
01178      *
01179      * The \a lfunc argument must produce an index key 
01180      * in \ref LEXICOFORMAT "lexicographic format" if the index is to
01181      * be a B+-Tree.  This function is called when the record is
01182      * first encountered (reading the input file), since the record is
01183      * already pinned to gather a sort key.
01184      *
01185      * Only one sort key is supported when bulk-loading for indexes, but
01186      * the key may be derived, and so the CSKF callback can combine
01187      * multiple keys, and lexifying them ensures that they can be
01188      * sorted as one.  This is not entirely sufficient to cover all
01189      * cases of multiple keys, but it will do for many cases, particularly
01190      * where the sub-keys are of fixed length.
01191      */
01192     int   set_for_index(CSKF lfunc, key_cookie_t ck) {
01193             set_for_index();
01194             if(!lfunc) lfunc = sort_keys_t::noCSKF;
01195             _cb_lexify_index_key = lfunc;
01196             _cb_lexify_index_key_cookie = ck;
01197             return 0;
01198         }
01199 
01200     /// Return true if set_for_index() was called.
01201     bool    is_for_index() const { return _for_index; }
01202 
01203     /// Return true if set_for_file() was called.
01204     bool    is_for_file() const { return !_for_index; }
01205 
01206     /// Return true if set_stable() 
01207     bool    is_stable() const { return _stable; }
01208 
01209     /// Ensure stable sort.  Cannot be used with set_for_index.
01210     void    set_stable(bool val) { _stable = val; }
01211 
01212     /// Return the function that creates or locates and/or lexifies the key.
01213     CSKF    lexify_index_key() const { return _cb_lexify_index_key; }
01214 
01215     /// Pointer to input datum for the associated lexify() CSKF. 
01216     key_cookie_t     lexify_index_key_cookie() const { return _cb_lexify_index_key_cookie; }
01217 
01218     /**\brief Output is to be a the input records, sorted 
01219      * @param[in] deepcopy Use true if you want a deep copy.
01220      * @param[in] keeporig Use true if you want to retain the input file.
01221      * @param[in] carry_obj Use true if you want to carry along the entire
01222      *                      objects through the scratch files and to the
01223      *                      output file. Used only for is_for_file().
01224      *
01225      * Call this if you want the output file
01226      * to contain copies of the input file records, undulterated,
01227      * but in sorted order.
01228      *
01229      * Multiple keys are supported.  Use of a CSKF is not needed if
01230      * the keys are embedded in the records, suitably aligned, and do
01231      * not cross page boundaries (string comparisons excepted, of course,
01232      * as string-comparison methods can be called repeatedly on successive
01233      * corresponding portions of string keys).
01234      */
01235     int     set_for_file(bool deepcopy, bool keeporig, bool carry_obj) {
01236             _for_index = false;
01237             (void) set_carry_obj(carry_obj);
01238             (void) set_deep_copy(deepcopy);
01239             return set_keep_orig(keeporig);
01240         }
01241 
01242     /**\brief Ensure marshaling and unmarshaling of the objects.
01243      * @param[in] marshal MOF to be used when reading records from disk.
01244      * @param[in] unmarshal UMOF to be used to write records to disk,
01245      * @param[in] c Arguemtn to \a marshal and \a unmarshal
01246      *
01247      * Call this if the objects in the file need to be byte-swapped
01248      * or otherwise marshaled before use, and if they need to be
01249      * unmarshaled before the output file is written.
01250      * This may be used with set_for_index or set_for_file.
01251      */
01252     int     set_object_marshal( MOF marshal, UMOF unmarshal, key_cookie_t c) {
01253             _cb_marshal = marshal;
01254             _cb_unmarshal = unmarshal;
01255             _cb_marshal_cookie = c;
01256             return 0;
01257         }
01258 
01259     /**\brief Return the marshal function or noMOF if none was given */
01260     MOF     marshal_func() const { return _cb_marshal; }
01261     /**\brief Return the unmarshal function or noUMOF if none was given */
01262     UMOF    unmarshal_func() const { return _cb_unmarshal; }
01263 
01264     /**\brief Pointer to datum for marshal and unmarshal functions */
01265     key_cookie_t    marshal_cookie() const { return _cb_marshal_cookie; }
01266 
01267     /**\brief True if sort will be in ascending order */
01268     bool    is_ascending() const { return _ascending; } 
01269 
01270     /**\brief Ensure that sort will be in ascending order */
01271     int     set_ascending( bool value = true) {  _ascending = value; return 0; }
01272 
01273     /**\brief True if duplicate keys and their records will be removed.
01274      *
01275      * When duplicates are encountered, they are sorted by record-id,
01276      * and the larger of the two (per umemcmp)  is removed.
01277      * */
01278     bool    is_unique() const { return _remove_dups; } 
01279 
01280     /**\brief Ensure that duplicate keys and their records will be removed 
01281      * */
01282     int     set_unique( bool value = true) {  _remove_dups = value; return 0; }
01283 
01284     /**\brief True if duplicate null keys and their records will be removed 
01285      * 
01286      * When duplicates are encountered, they are sorted by record-id,
01287      * and the larger of the two (per umemcmp) is removed.
01288      * */
01289     bool    null_unique() const { return _remove_dup_nulls; } 
01290 
01291     /**\brief Ensure that duplicate null keys and their records will be removed
01292      */
01293     int     set_null_unique( bool value = true) {  _remove_dup_nulls = value; 
01294             return 0; }
01295 
01296     /**\brief True if the sort will copy the entire objects through each
01297      * phase.
01298      * \details
01299      * Used when is_for_file only.
01300      */
01301     bool    carry_obj() const {  return _carry_obj; }
01302 
01303     /**\brief Control whether the sort will copy the entire objects through each
01304      * phase.
01305      * @param[in] value   If true, ensure keep_orig().
01306      * \details
01307      * Used when is_for_file only.
01308      * This is useful if the keys are fixed and consume most of the
01309      * original objects, in which case there is no need for the
01310      * sort code to duplicate the key as well as the object 
01311      * in the temporary output files, or to re-pin the original
01312      * records to copy them to the output file.
01313      */
01314     int     set_carry_obj(bool value = true) {  
01315         return _carry_obj = value; 
01316         return 0;
01317         }
01318 
01319     /**\brief True if the sort will copy the entire objects to the
01320      * result file.
01321      */
01322     bool    deep_copy() const {  return _deep_copy; }
01323     /**\brief Control whether the sort will copy the entire objects to the
01324      * result file.
01325      * @param[in] value   If true, ensure deep_copy().
01326      * \details
01327      * Used when is_for_file only.
01328      * When large objects appear in the input file and the input (original)
01329      * file is not to be kept, sort can copy only the metadata for the
01330      * large objects and reassign the large-object store to the result file.
01331      * This eliminates a lot of object creation and logging.
01332      */
01333     int     set_deep_copy(bool value = true ) {  
01334         _deep_copy = value; 
01335         return 0; 
01336         }
01337 
01338     /**\brief Return true if the sort will not destroy the input file.
01339      * \details
01340      * Used when is_for_file only.
01341      * This is turned on automatically when set_for_index.
01342      */
01343     bool    keep_orig() const {  return _keep_orig_file; }
01344     /**\brief Control whether the sort will not destroy the input file.
01345      * @param[in] value   If true, ensure keep_orig().
01346      * \details
01347      * Used when is_for_file only.
01348      * This is turned on automatically when set_for_index.
01349      */
01350     int     set_keep_orig(bool value = true ) {  
01351         if(value && !_deep_copy) return 1;
01352         _keep_orig_file = value;
01353         return 0;
01354         }
01355 
01356     /**\brief Set attributes of key.
01357      * @param[in] keyindex The ordinal number of the index whose 
01358      *                      attributes are to be set
01359      * @param[in] off Offset from beginning 
01360      *               of record header or body where the key is to be found
01361      * @param[in] len Length of key in recordd
01362      * @param[in] in_header True indicates that the key is 
01363      *                 to be found in the record header rather than in the body.
01364      * @param[in] aligned True indicates that the key, as found in the record,
01365      *                  is suitably aligned for key comparisons with the
01366      *                  CF (key-comparison function) to be used. False means
01367      *                  that sort has to make an aligned copy before
01368      *                  doing a key comparison.
01369      * @param[in] lexico True indicates that the key, as found in the record,
01370      *                  is already in \ref LEXICOFORMAT "lexicographic format"
01371      * @param[in] cfunc Key comparison function to use on this key.
01372      *
01373      * \retval 0 if OK, 1 if error.
01374      * \details
01375      * You must call this or set_sortkey_derived for each of the keys.
01376      *
01377      * There must be implicit agreement between what the \a cfunc
01378      * expects and the arguments \a aligned and \a lexico.
01379      */
01380     int set_sortkey_fixed(
01381         int keyindex,  // Key number origin 0
01382         smsize_t off,  // offset from beginning of hdr or body
01383         smsize_t len,  // length
01384         bool in_header,// header or body
01385         bool aligned,  // is aligned
01386         bool lexico,   // needs lexifying
01387         CF   cfunc     // comparison func
01388         );
01389 
01390     /**\brief Set attributes of key.
01391      * @param[in] keyindex The ordinal number of the index whose 
01392      *                      attributes are to be set
01393      * @param[in] gfunc Key-creation (lexify) function.
01394      * @param[in] cookie Datum for \a gfunc.
01395      * @param[in] in_header True indicates that the key is 
01396      *                 to be found in the record header rather than in the body.
01397      * @param[in] aligned True indicates that the key, as found in the 
01398      *                  result of \a gfunc,
01399      *                  is suitably aligned for key comparisons with the
01400      *                  CF (key-comparison function) to be used. False means
01401      *                  that sort has to make an aligned copy before
01402      *                  doing a key comparison.
01403      * @param[in] lexico True indicates that the key, as found in the result
01404      *                  of \a gfunc,
01405      *                  is in \ref LEXICOFORMAT "lexicographic format".
01406      * @param[in] cfunc Key comparison function to use on this key.
01407      *
01408      * \retval 0 if OK, 1 if error.
01409      * \details
01410      * You must call this or set_sortkey_fixed for each of the keys.
01411      *
01412      * There must be implicit agreement between what the \a cfunc
01413      * expects and the arguments \a aligned and \a lexico.
01414      */
01415     int set_sortkey_derived(
01416         int keyindex, 
01417         CSKF gfunc,
01418         key_cookie_t cookie,
01419         bool in_header,
01420         bool aligned, 
01421         bool lexico, 
01422         CF   cfunc
01423         );
01424 
01425     /**\brief Return the key-location information for a given fixed-location key.
01426      * @param[in] i The ordinal number of the index of interest.
01427      * \details
01428      * Only for fixed-location keys.
01429      */
01430     key_location_t&  get_location(int i) { return _locs[i]; }
01431 
01432     /**\brief Return the offset for a given fixed-location key.
01433      * @param[in] i The ordinal number of the index of interest.
01434      * \details
01435      * Only for fixed-location keys.
01436      */
01437     smsize_t offset(int i) const {
01438         return _locs[i]._off;
01439     }
01440     /**\brief Return the offset for a given fixed-location key.
01441      * @param[in] i The ordinal number of the index of interest.
01442      * \details
01443      * Only for fixed-location keys.
01444      */
01445     smsize_t length(int i) const {
01446         return _locs[i]._length;
01447     }
01448     /**\brief Return the CSKF function for a given derived key.
01449      * @param[in] i The ordinal number of the index of interest.
01450      * \details
01451      * Only for derived keys.
01452      */
01453     CSKF keycreate(int i) const {
01454         return _meta[i]._cb_keyinfo;
01455     }
01456 
01457     /**\brief Return the argument to the CSKF function for a given derived key.
01458      * @param[in] i The ordinal number of the index of interest.
01459      * \details
01460      * Only for derived keys.
01461      */
01462     key_cookie_t cookie(int i) const {
01463         return _meta[i]._cookie;
01464     }
01465 
01466     /**\brief Return the key-comparison function for a given key.
01467      * @param[in] i The ordinal number of the index of interest.
01468      * \details
01469      * For fixed-location and derived keys.
01470      */
01471     CF keycmp(int i) const {
01472         return _meta[i]._cb_keycmp;
01473     }
01474 
01475     /// Return true if key \a i is in lexicographic format in the input record
01476     bool     is_lexico(int i) const {
01477         return (_meta[i]._mask & t_lexico)?true:false;
01478     }
01479     /// Return true if key \a i is in a fixed location in all input records
01480     bool     is_fixed(int i) const {
01481         return (_meta[i]._mask & t_fixed)?true:false;
01482     }
01483     /// Return true if key \a i is in suitably aligned in the input record for the key-comparison function
01484     bool     is_aligned(int i) const {
01485         return (_meta[i]._mask & t_aligned)?true:false;
01486     }
01487     /// True if the key or the source of a derived key is to be found in the record header
01488     bool     in_hdr(int i) const {
01489         return (_meta[i]._mask & t_hdr)?true:false;
01490     }
01491 };
01492 
01493 } // end namespace ssm_sort
01494 
01495 inline void 
01496 ssm_sort::sort_keys_t::_grow(int i) 
01497 {
01498     // realloc it to accommodate i more entries
01499     {
01500     key_location_t* tmp = new key_location_t[_spaces + i];
01501     if(!tmp) W_FATAL(fcOUTOFMEMORY);
01502     if(_locs) {
01503         memcpy(tmp, _locs, nkeys() * sizeof(key_location_t));
01504         delete[] _locs;
01505     }
01506     _locs = tmp;
01507     }
01508     {
01509     key_meta_t* tmp = new key_meta_t[_spaces + i];
01510     if(!tmp) W_FATAL(fcOUTOFMEMORY);
01511     if(_meta) {
01512         memcpy(tmp, _meta, nkeys() * sizeof(key_meta_t));
01513         delete[] _meta;
01514     }
01515     _meta = tmp;
01516     }
01517     _spaces += i;
01518     // don't change nkeys
01519 }
01520 inline NORET 
01521 ssm_sort::sort_keys_t::sort_keys_t(int nkeys):
01522     _nkeys(0),
01523     _spaces(0),
01524     _meta(0),
01525     _locs(0),
01526     _stable(false), _for_index(false), 
01527     _remove_dups(false), _remove_dup_nulls(false),
01528     _ascending(true), _deep_copy(false), _keep_orig_file(false),
01529     _carry_obj(false),
01530     _cb_lexify_index_key(sort_keys_t::noCSKF),
01531     _cb_lexify_index_key_cookie(0),
01532     _cb_marshal(sort_keys_t::noMOF),
01533     _cb_unmarshal(sort_keys_t::noUMOF),
01534     _cb_marshal_cookie(0)
01535 { 
01536     _grow(nkeys);
01537 }
01538 
01539 inline void 
01540 ssm_sort::sort_keys_t::_prep(int key) 
01541 {
01542     if(key >= _spaces) {
01543     _grow(5);
01544     }
01545     if(key >= _nkeys) {
01546     // NB: hazard if not all of these filled in!
01547     _nkeys = key+1;
01548     }
01549 }
01550 
01551 inline void 
01552 ssm_sort::sort_keys_t::_copy(const sort_keys_t &old) 
01553 {
01554     _prep(old.nkeys()-1);
01555     int i;
01556     for(i=0; i< old.nkeys(); i++) {
01557     _locs[i] = old._locs[i];
01558     _meta[i] = old._meta[i];
01559     }
01560     w_assert3(nkeys() == old.nkeys());
01561     w_assert3(_spaces >= old._spaces);
01562     for(i=0; i< old.nkeys(); i++) {
01563     w_assert3(_meta[i]._mask == old._meta[i]._mask);
01564     }
01565 }
01566 
01567 inline NORET 
01568 ssm_sort::sort_keys_t::sort_keys_t(const sort_keys_t &old) : 
01569     _nkeys(0), _spaces(0), 
01570     _meta(0), _locs(0) 
01571 {
01572     _copy(old);
01573 }
01574 
01575 inline int 
01576 ssm_sort::sort_keys_t::set_sortkey_fixed(
01577     int key, 
01578     smsize_t off, 
01579     smsize_t len,
01580     bool in_header,
01581     bool aligned, 
01582     bool lexico,
01583     CF   cfunc
01584 ) 
01585 {
01586  DBG(<<"set_sortkey_fixed " << key);
01587     if(is_for_index() && key > 0) {
01588         return 1;
01589     }
01590     _prep(key);
01591     _set_mask(key,  true, aligned, lexico, 
01592                 in_header, noCSKF, cfunc, key_cookie_t::null);
01593     _set_loc(key,  off, len);
01594     return 0;
01595 }
01596 
01597 inline int 
01598 ssm_sort::sort_keys_t::set_sortkey_derived(
01599     int key, 
01600     CSKF gfunc,
01601     key_cookie_t cookie,
01602     bool in_header,
01603     bool aligned, 
01604     bool lexico, 
01605     CF   cfunc
01606 )
01607 {
01608  DBG(<<"set_sortkey_derived " << key);
01609     if(is_for_index() && key > 0) {
01610         return 1;
01611     }
01612     _prep(key);
01613     _set_mask(key, false, aligned, lexico, 
01614         in_header,
01615         gfunc, cfunc, 
01616         cookie);
01617     return 0;
01618 }
01619 
01620 
01621 /*<std-footer incl-file-exclusion='SORT_S_H'>  -- do not edit anything below this line -- */
01622 
01623 #endif          /*</std-footer>*/

Generated on Mon Jan 2 15:13:57 2012 for Shore Storage Manager by  doxygen 1.4.7