Shore Storage Manager: src/common/vec

00001 /* -*- mode:C++; c-basic-offset:4 -*-
00002      Shore-MT -- Multi-threaded port of the SHORE storage manager
00003    
00004                        Copyright (c) 2007-2009
00005       Data Intensive Applications and Systems Labaratory (DIAS)
00006                Ecole Polytechnique Federale de Lausanne
00007    
00008                          All Rights Reserved.
00009    
00010    Permission to use, copy, modify and distribute this software and
00011    its documentation is hereby granted, provided that both the
00012    copyright notice and this permission notice appear in all copies of
00013    the software, derivative works or modified versions, and any
00014    portions thereof, and that both notices appear in supporting
00015    documentation.
00016    
00017    This code is distributed in the hope that it will be useful, but
00018    WITHOUT ANY WARRANTY; without even the implied warranty of
00019    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
00020    DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
00021    RESULTING FROM THE USE OF THIS SOFTWARE.
00022 */
00023 
00024 /*<std-header orig-src='shore' incl-file-exclusion='VEC_T_H'>
00025 
00026  $Id: vec_t.h,v 1.68 2012/01/02 17:02:10 nhall Exp $
00027 
00028 SHORE -- Scalable Heterogeneous Object REpository
00029 
00030 Copyright (c) 1994-99 Computer Sciences Department, University of
00031                       Wisconsin -- Madison
00032 All Rights Reserved.
00033 
00034 Permission to use, copy, modify and distribute this software and its
00035 documentation is hereby granted, provided that both the copyright
00036 notice and this permission notice appear in all copies of the
00037 software, derivative works or modified versions, and any portions
00038 thereof, and that both notices appear in supporting documentation.
00039 
00040 THE AUTHORS AND THE COMPUTER SCIENCES DEPARTMENT OF THE UNIVERSITY
00041 OF WISCONSIN - MADISON ALLOW FREE USE OF THIS SOFTWARE IN ITS
00042 "AS IS" CONDITION, AND THEY DISCLAIM ANY LIABILITY OF ANY KIND
00043 FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
00044 
00045 This software was developed with support by the Advanced Research
00046 Project Agency, ARPA order number 018 (formerly 8230), monitored by
00047 the U.S. Army Research Laboratory under contract DAAB07-91-C-Q518.
00048 Further funding for this work was provided by DARPA through
00049 Rome Research Laboratory Contract No. F30602-97-2-0247.
00050 
00051 */
00052 
00053 #ifndef VEC_T_H
00054 #define VEC_T_H
00055 
00056 #include "w_defines.h"
00057 
00058 /*  -- do not edit anything above this line --   </std-header>*/
00059 
00060 /* NB: you must already have defined the type size_t,
00061  * (which is defined include "basics.h") before you include this.
00062  */
00063 
00064 #ifdef __GNUG__
00065 #pragma interface
00066 #endif
00067 
00068 typedef const unsigned char * CADDR_T;
00069 #define MAX_SMALL_VEC_SIZE 8
00070 
00071 /*
00072  * Newer c++ compilers require
00073  * that copy constructors be available for classes which use anonymous
00074  * temporary variables.  However, vec_t are non-copyable, so you
00075  * must create named temporaries of these structures.
00076  */
00077 
00078 /**\brief A helper class for VEC_t
00079  */
00080 struct vec_pair_t {
00081     CADDR_T        ptr;
00082     size_t         len;
00083 };
00084 
00085 /**\brief A base class for vec_t.
00086  */
00087 struct VEC_t {
00088     int                _cnt;
00089     size_t             _size;
00090     vec_pair_t*        _base;        // pointer to beginning of _pair or malloced
00091                         // space
00092     vec_pair_t         _pair[MAX_SMALL_VEC_SIZE];
00093 };
00094 
00095 /**\brief A constant vec_t  (meaning things pointed to cannot be changed).
00096  */
00097 class cvec_t : protected VEC_t {
00098     friend class vec_t; // so vec_t can look at VEC_t
00099 protected:
00100     static        CADDR_T  zero_location; // see zvec_t, which is supposed
00101                                     // to be for the server-side only
00102 private:
00103     typedef w_base_t::uint8_t u64;
00104     typedef w_base_t::uint4_t u32;
00105 
00106     enum dummy_enumid { max_small = MAX_SMALL_VEC_SIZE };
00107     void _calc_kvl(uint4_t seed, uint4_t& h) const;
00108     static u32 convert64_32 (u64 num);
00109 public:
00110     cvec_t() {
00111         _cnt = 0;
00112         _size = 0;
00113         _base = &_pair[0];
00114     }
00115     cvec_t(const cvec_t& v1, const cvec_t& v2) {
00116         _base= &_pair[0];
00117         set(v1, v2);
00118     }
00119     cvec_t(const void* p, size_t l) {
00120         _base = &_pair[0];
00121         set(p, l);
00122     }
00123     cvec_t(const cvec_t& v, size_t offset, size_t limit) {
00124         _base = &_pair[0];
00125         set(v, offset, limit);
00126     }
00127     ~cvec_t();
00128 
00129     void split(size_t l1, cvec_t& v1, cvec_t& v2) const;
00130     /// append {p,l} pairs from vector v, (first ptr is v + offset),
00131     ///  as needed to append at most nbytes
00132     cvec_t& put(const cvec_t& v, size_t offset, size_t nbytes);
00133     /// append { p, l } pair to this vector.
00134     cvec_t& put(const void* p, size_t l);
00135     /// append { p, l } pairs from v to this vector.
00136     cvec_t& put(const cvec_t& v);
00137 
00138     /// Clear this vector.
00139     cvec_t& reset()  {
00140         _cnt = _size = 0;
00141         return *this;
00142     }
00143     /// reset, then copy over all {p,l} pairs from v1 and v2
00144     cvec_t& set(const cvec_t& v1, const cvec_t& v2)  {
00145         return reset().put(v1).put(v2);
00146     }
00147     /// reset, then copy over all {p,l} pairs from v
00148     cvec_t& set(const cvec_t& v) {
00149         return reset().put(v);
00150     }
00151 
00152     /// reset, then install {p,l} pair
00153     cvec_t& set(const void* p, size_t l)  {
00154         return reset().put(p, l);
00155     }
00156 
00157     /// reset, then install {p,l} pairs as needed to capture limit 
00158     /// bytes starting at v + offset
00159     cvec_t& set(const cvec_t& v, size_t offset, size_t limit)  {
00160         return reset().put(v, offset, limit);
00161     }
00162 
00163 
00164     /// returns # bytes this vector references
00165     size_t size() const        {
00166         return _size;
00167     }
00168 
00169     /// Write from vector to p, no more than \a limit bytes
00170     size_t copy_to(void* p, size_t limit = 0x7fffffff) const;
00171     
00172     int cmp(const cvec_t& v, size_t* common_size = 0) const;
00173     int cmp(const void* s, size_t len) const;
00174 
00175     static int cmp(const cvec_t& v1,
00176                const cvec_t& v2, size_t* common_size = 0)  {
00177         return v1.cmp(v2, common_size);
00178     }
00179 
00180     /// return number of {p,l} pairs
00181     int count() const {return _cnt;}
00182 
00183     int  checksum() const;
00184     void calc_kvl(uint4_t& h) const;
00185     void calc_kvl2(uint4_t& h) const;
00186     void init()         { _cnt = _size = 0; }  // re-initialize the vector
00187     // Creator of the vec has responsibility for delete[]ing anything that
00188     // was dynamically allocated in the array.  These are convenience methods
00189     // for holders of vec_ts that dynamically allocated all parts and want
00190     // them delete[]-ed.
00191     // vecdelparts() calls delete[] on all parts.
00192     // delparts() calls delete on all parts.
00193     // Both leave the vector re-initialized (0 parts)
00194     void vecdelparts()      {   while(_cnt-->0) { 
00195                                    delete[] _base[_cnt].ptr;
00196                                    _base[_cnt].ptr = NULL;
00197                                    _base[_cnt].len = 0;
00198                                 } 
00199                                 init();
00200                             }
00201     void delparts()         {   while(_cnt-->0) { 
00202                                    delete _base[_cnt].ptr;
00203                                    _base[_cnt].ptr = NULL;
00204                                    _base[_cnt].len = 0;
00205                                 } 
00206                                 init();
00207                             }
00208 
00209     bool is_pos_inf() const        { return this == &pos_inf; }
00210     bool is_neg_inf() const        { return this == &neg_inf; }
00211     bool is_null() const        { return size() == 0; }
00212 
00213     friend inline bool operator<(const cvec_t& v1, const cvec_t& v2);
00214     friend inline bool operator<=(const cvec_t& v1, const cvec_t& v2);
00215     friend inline bool operator>=(const cvec_t& v1, const cvec_t& v2);
00216     friend inline bool operator>(const cvec_t& v1, const cvec_t& v2);
00217     friend inline bool operator==(const cvec_t& v1, const cvec_t& v2);
00218     friend inline bool operator!=(const cvec_t& v1, const cvec_t& v2);
00219 
00220     friend ostream& operator<<(ostream&, const cvec_t& v);
00221     friend istream& operator>>(istream&, cvec_t& v);
00222 
00223     static cvec_t pos_inf;
00224     static cvec_t neg_inf;
00225 
00226 private:
00227     // disabled
00228     cvec_t(const cvec_t& v);
00229     // determine if this is a large vector (one where extra space
00230     // had to be malloc'd 
00231     bool _is_large() const {return _base != &_pair[0];}
00232 
00233     // determine max number of elements in the vector
00234     int  _max_cnt() const {
00235         return (int)(_is_large() ? _pair[0].len : (int)max_small);
00236     }
00237     // grow vector to have total_cnt elements
00238     void _grow(int total_cnt);
00239 
00240     // disabled
00241     //    cvec_t(const cvec_t& v);
00242     cvec_t& operator=(cvec_t);
00243 
00244     size_t recalc_size() const;
00245     bool   check_size() const;
00246 
00247 public:
00248     bool is_zvec() const { 
00249 #if W_DEBUG_LEVEL > 2
00250         if(count()>0) {
00251             if(_pair[0].ptr == zero_location) {
00252                 w_assert3(count() == 1);
00253             }
00254         }
00255 #endif
00256         return (count()==0)
00257                 ||
00258                 (count() == 1 && _pair[0].ptr == zero_location);
00259     }
00260 };
00261 
00262 /**\brief  Vector: a set of {pointer,length} pairs for memory manipulation.
00263  *
00264  * This class is used throughout the storage manager and in its API
00265  * for copy-in and copy-out. 
00266  */
00267 class vec_t : public cvec_t {
00268 public:
00269     /// Construct empty vector.
00270     vec_t() : cvec_t()        {};
00271     /// Construct a vector that combines two others.
00272     vec_t(const cvec_t& v1, const cvec_t& v2) : cvec_t(v1, v2)  {};
00273     /// Construct a vector from a memory location and a length.
00274     vec_t(const void* p, size_t l) : cvec_t(p, l)        {};
00275     /// Construct a vector from a memory location + offset and a length.
00276     vec_t(const vec_t& v, size_t offset, size_t limit)
00277         : cvec_t(v, offset, limit)        {};
00278     // shallow copy
00279     vec_t(const vec_t&v) : cvec_t()  {
00280         reset().put(v);
00281     }
00282 
00283 
00284     /**\brief Overwrites the data area to which the vector points.
00285      *
00286      * Scatter limit bytes of data from the location at p
00287      * into the locations identified by this vector.
00288      */
00289     const vec_t& copy_from(
00290         const void* p,
00291         size_t limit,
00292         size_t offset = 0) const;        // offset tells where
00293                                 //in the vec to begin to copy
00294     
00295     /**\brief Overwrites the data area to which the vector points.
00296      *
00297      * Write data from the vector v
00298      * into the locations identified by this vector.
00299      */
00300     vec_t& copy_from(const cvec_t& v);
00301 
00302     /**\brief Overwrites the data area to which the vector points.
00303      *
00304      * Write data from the vector v, starting at the given offset
00305      * from the start of vector v,
00306      * into the locations identified by this vector.
00307      */
00308     vec_t& copy_from(
00309         const cvec_t& v,
00310         size_t offset,                // offset in v
00311         size_t limit,                // # bytes
00312         size_t myoffset = 0);        // offset in this
00313 
00314     /// Return the pointer from the {pointer, length} pair at the given index.
00315     CADDR_T       ptr(int index) const { return (index >= 0 && index < _cnt) ? 
00316                                         _base[index].ptr : (CADDR_T) NULL; }
00317     /// Return the length from the {pointer, length} pair at the given index.
00318     size_t        len(int index) const { return (index >= 0 && index < _cnt) ? 
00319                                         _base[index].len : 0; }
00320 
00321     /**\cond skip */
00322     /// Lets you reformat the vector into "result" with maximum-sized
00323     // chunks.
00324     void mkchunk( int maxsize, // max size of result vec
00325                 int skip,                 // # skipped in *this
00326                 vec_t        &result      // provided by the caller
00327     ) const;
00328     /**\endcond skip */
00329 
00330     /// A constant vector representing infinity. Used for key-value pairs, scans.
00331     static vec_t& pos_inf;
00332     /// A constant vector representing negative infinity. Used for key-value pairs, scans.
00333     static vec_t& neg_inf;
00334 
00335  private:
00336     // disabled
00337     vec_t& operator=(vec_t);
00338 
00339 };
00340 
00341 inline bool operator<(const cvec_t& v1, const cvec_t& v2)
00342 {
00343     return v1.cmp(v2) < 0;
00344 }
00345 
00346 inline bool operator<=(const cvec_t& v1, const cvec_t& v2)
00347 {
00348     return v1.cmp(v2) <= 0;
00349 }
00350 
00351 inline bool operator>=(const cvec_t& v1, const cvec_t& v2)
00352 {
00353     return v1.cmp(v2) >= 0;
00354 }
00355 
00356 inline bool operator>(const cvec_t& v1, const cvec_t& v2)
00357 {
00358     return v1.cmp(v2) > 0;
00359 }
00360 
00361 inline bool operator==(const cvec_t& v1, const cvec_t& v2)
00362 {
00363     return (&v1==&v2) || v1.cmp(v2) == 0;
00364 }
00365 
00366 inline bool operator!=(const cvec_t& v1, const cvec_t& v2)
00367 {
00368     return ! (v1 == v2);
00369 }
00370 
00371 
00372 /**\brief A vec_t that represents a batch of zeros.
00373  *
00374  * This is used when we know we need only a (read-only) vector
00375  * of zeroes because it's a one-time constructed vector pointing
00376  * to a fixed-location at "zero_location".
00377  * It doesn't require any real zeroes at that location; it behaves
00378  * as if it really were a vector.
00379  */
00380 class zvec_t : public vec_t {
00381 public:
00382     zvec_t() : vec_t(zero_location,0)        {};
00383     zvec_t(size_t l) : vec_t(zero_location, l)        {};
00384     zvec_t &put(size_t l) { reset().put(zero_location,l); return *this; }
00385 private:
00386     // disabled
00387     zvec_t(const zvec_t&) :vec_t(zero_location, 0)  {}
00388     zvec_t &operator=(zvec_t);
00389     // disabled other constructors from vec_t
00390     zvec_t(const cvec_t& v1, const cvec_t& v2);/* {} */
00391     zvec_t(const void* p, size_t l); // {}
00392     zvec_t(const vec_t& v, size_t offset, size_t limit); // {}
00393 };
00394 
00395 /*<std-footer incl-file-exclusion='VEC_T_H'>  -- do not edit anything below this line -- */
00396 
00397 #endif          /*</std-footer>*/