00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053 #include "w_defines.h"
00054
00055
00056
00057 #ifdef __GNUC__
00058 #pragma implementation "vec_t.h"
00059 #endif
00060
00061 #define VEC_T_C
00062 #include <cstdlib>
00063 #include <cstring>
00064 #include <w_stream.h>
00065 #include <w_base.h>
00066 #include <w_minmax.h>
00067 #include "basics.h"
00068 #include "vec_t.h"
00069 #include "umemcmp.h"
00070
00071
00072 #ifdef EXPLICIT_TEMPLATE
00073
00074
00075 template class w_auto_delete_array_t<cvec_t>;
00076 template class w_auto_delete_array_t<vec_t>;
00077 #endif
00078
00079
00080
00081 cvec_t cvec_t::pos_inf;
00082 cvec_t cvec_t::neg_inf;
00083 CADDR_T cvec_t::zero_location=(CADDR_T)&(cvec_t::neg_inf);
00084 vec_t& vec_t::pos_inf = *(vec_t*) &cvec_t::pos_inf;
00085 vec_t& vec_t::neg_inf = *(vec_t*) &cvec_t::neg_inf;
00086
00087 cvec_t::cvec_t(const cvec_t& )
00088 {
00089 cerr << "cvec_t: disabled member called" << endl;
00090 cerr << "failed at \"" << __FILE__ << ":" << __LINE__
00091 << "\"" << endl;
00092 abort();
00093 }
00094
00095 cvec_t::~cvec_t()
00096 {
00097 if (_is_large()) {
00098 free(_base);
00099 }
00100 }
00101
00102 void cvec_t::split(size_t l1, cvec_t& v1, cvec_t& v2) const
00103 {
00104 size_t min = 0;
00105 int i;
00106 for (i = 0; i < _cnt; i++) {
00107 if (l1 > 0) {
00108 min = (_base[i].len > l1) ? l1 : _base[i].len;
00109 v1.put(_base[i].ptr, min);
00110 l1 -= min;
00111 }
00112 if (l1 <= 0) break;
00113 }
00114
00115 for ( ; i < _cnt; i++) {
00116 v2.put(_base[i].ptr + min, _base[i].len - min);
00117 min = 0;
00118 }
00119 }
00120
00121 cvec_t& cvec_t::put(const cvec_t& v, size_t start, size_t num_bytes)
00122 {
00123 int i;
00124 size_t start_byte, start_elem, total;
00125
00126 if (v.size() < start+num_bytes) {
00127 w_assert3(v.size() >= start+num_bytes );
00128 }
00129
00130
00131 for (i = 0, total = 0; i < v._cnt && total <= start; i++) {
00132 total += v._base[i].len;
00133 }
00134
00135
00136 if (_cnt+v._cnt > max_small) {
00137 _grow(_cnt+v._cnt);
00138 }
00139
00140 start_elem = i-1;
00141
00142 start_byte = start - (total - v._base[start_elem].len);
00143
00144
00145 _base[_cnt].ptr = v._base[start_elem].ptr+start_byte;
00146 _base[_cnt].len = v._base[start_elem].len-start_byte;
00147 _cnt++;
00148 w_assert3(_cnt <= _max_cnt());
00149 for (i = 1, total = _base[_cnt-1].len; total < num_bytes; i++) {
00150 _base[_cnt].ptr = v._base[start_elem+i].ptr;
00151 _base[_cnt].len = v._base[start_elem+i].len;
00152 total += _base[_cnt++].len;
00153 w_assert3(_cnt <= _max_cnt());
00154 }
00155 _base[_cnt - 1].len -= total-num_bytes;
00156
00157 _size += num_bytes;
00158 w_assert3(check_size());
00159 return *this;
00160 }
00161
00162 cvec_t& cvec_t::put(const void* p, size_t l)
00163 {
00164 if (_cnt+1 > max_small) {
00165 _grow(_cnt+1);
00166 }
00167
00168
00169 _base[_cnt].ptr = (unsigned char*)p;
00170 _base[_cnt].len = l;
00171 if(l>0) {
00172 _cnt++;
00173 w_assert3(_cnt <= _max_cnt());
00174 _size += l;
00175 }
00176 return *this;
00177 }
00178
00179 bool cvec_t::check_size() const
00180 {
00181 w_assert1(_size == recalc_size());
00182 return true;
00183 }
00184
00185 size_t cvec_t::recalc_size() const
00186 {
00187
00188 size_t l;
00189 int i;
00190 for (i = 0, l = 0; i < _cnt; l += _base[i++].len) ;
00191 return l;
00192 }
00193
00194
00195
00196
00197
00198
00199
00200
00201 const vec_t& vec_t::copy_from(
00202 const void* p,
00203 size_t limit,
00204 size_t offset) const
00205 {
00206 w_assert1(! is_pos_inf() && ! is_neg_inf() && !is_null() );
00207 w_assert1( _base[0].ptr != zero_location );
00208
00209 char* s = (char*) p;
00210 for (int i = 0; (i < _cnt) && (limit>0); i++) {
00211 if ( offset < _base[i].len ) {
00212 size_t elemlen = ((_base[i].len - offset > limit)?
00213 limit : (_base[i].len - offset)) ;
00214 memcpy((char*)_base[i].ptr + offset, s, elemlen);
00215 w_assert3(limit >= elemlen);
00216 limit -= elemlen;
00217 s += elemlen;
00218 }
00219 if (_base[i].len > offset) {
00220 offset = 0;
00221 } else {
00222 offset -= _base[i].len;
00223 }
00224 }
00225 return *this;
00226 }
00227
00228
00229 size_t cvec_t::copy_to(void* p, size_t limit) const
00230 {
00231 w_assert1(! is_pos_inf() && ! is_neg_inf() );
00232 char* s = (char*) p;
00233 for (int i = 0; i < _cnt && limit > 0; i++) {
00234 size_t n = limit > _base[i].len ? _base[i].len : limit;
00235 if(_base[i].ptr == zero_location) {
00236 memset(s, '\0', n);
00237 } else {
00238 memcpy(s, _base[i].ptr, n);
00239 }
00240 w_assert3(limit >= n);
00241 limit -= n;
00242 s += n;
00243 }
00244 return s - (char*)p;
00245 }
00246
00247
00248 vec_t& vec_t::copy_from(const cvec_t& s)
00249 {
00250 bool zeroing=false;
00251 int j = 0;
00252 char* p = (char*) _base[j].ptr;
00253 size_t l = _base[j].len;
00254
00255 w_assert1(size() >= s.size());
00256 w_assert1(_base[0].ptr != zero_location);
00257
00258 for (int i = 0; i < s._cnt; i++) {
00259 const unsigned char* pp = s._base[i].ptr;
00260 if(pp == zero_location) zeroing = true;
00261 size_t left = s._base[i].len;
00262 while (l <= left && j < _cnt) {
00263 if( zeroing) {
00264 memset(p, '\0', l);
00265 } else {
00266 memcpy(p, pp, l);
00267 }
00268 pp += l;
00269 left -= l;
00270 j++;
00271 if (j >= _cnt) break;
00272 l = _base[j].len;
00273 p = (char*) _base[j].ptr;
00274 }
00275 if (left) {
00276 if( zeroing) {
00277 memset(p, '\0', left);
00278 } else {
00279 memcpy(p, pp, left);
00280 }
00281 pp += left;
00282 w_assert3(l >= left);
00283 l -= left;
00284 }
00285 }
00286 return *this;
00287 }
00288
00289 vec_t& vec_t::copy_from(const cvec_t& ss, size_t offset, size_t limit, size_t myoffset)
00290 {
00291 bool zeroing=false;
00292 vec_t s;
00293 s.put(ss, offset, limit);
00294
00295 w_assert1(size() >= s.size());
00296 w_assert1(_base[0].ptr != zero_location);
00297
00298 size_t ssz = s.size(), dsz = size();
00299 if (offset > ssz) offset = ssz;
00300 if (limit > ssz - offset) limit = ssz - offset;
00301 if (myoffset > dsz) offset = dsz;
00302
00303 int j;
00304 for (j = 0; j < _cnt; j++) {
00305 if (myoffset > _base[j].len)
00306 myoffset -= _base[j].len;
00307 else
00308 break;
00309 }
00310 char* p = ((char*)_base[j].ptr) + myoffset;
00311 size_t l = _base[j].len - myoffset;
00312
00313 w_assert1(dsz <= limit);
00314
00315 size_t done;
00316 int i;
00317 for (i = 0, done = 0; i < s._cnt && done < limit; i++) {
00318 const unsigned char* pp = s._base[i].ptr;
00319 if(pp == zero_location) zeroing = true;
00320 size_t left = s._base[i].len;
00321 if (limit - done < left) left = limit - done;
00322 while (l < left) {
00323 if(zeroing) {
00324 memset(p, '\0', l);
00325 } else {
00326 memcpy(p, pp, l);
00327 }
00328 done += l, pp += l;
00329 left -= l;
00330 j++;
00331 if (j >= _cnt) break;
00332 l = _base[j].len;
00333 p = (char*) _base[j].ptr;
00334 }
00335 if (left) {
00336 if(zeroing) {
00337 memset(p, '\0', left);
00338 } else {
00339 memcpy(p, pp, left);
00340 }
00341 pp += left;
00342 l -= left;
00343 done += left;
00344 }
00345 }
00346 return *this;
00347 }
00348
00349 cvec_t& cvec_t::put(const cvec_t& v)
00350 {
00351 w_assert1(! is_pos_inf() && ! is_neg_inf());
00352 if (_cnt+v._cnt > max_small) {
00353 _grow(_cnt+v._cnt);
00354 }
00355 for (int i = 0; i < v._cnt; i++) {
00356 _base[_cnt + i].ptr = v._base[i].ptr;
00357 _base[_cnt + i].len = v._base[i].len;
00358 }
00359 _cnt += v._cnt;
00360 w_assert3(_cnt <= _max_cnt());
00361 _size += v._size;
00362
00363 w_assert3(check_size());
00364 return *this;
00365 }
00366
00367 int cvec_t::cmp(const void* s, size_t l) const
00368 {
00369 if (is_pos_inf()) return 1;
00370 if (is_neg_inf()) return -1;
00371 if (is_null()) {
00372 if(l==0) return 0;
00373
00374 return -1;
00375 }
00376
00377 size_t acc = 0;
00378 for (int i = 0; i < _cnt && acc < l; i++) {
00379 int d = umemcmp(_base[i].ptr, ((char*)s) + acc,
00380 _base[i].len < l - acc ? _base[i].len : l - acc);
00381 if (d) return d;
00382 acc += _base[i].len;
00383 }
00384 return acc - l;
00385 }
00386
00387 int cvec_t::cmp(const cvec_t& v, size_t* common_size) const
00388 {
00389
00390
00391
00392
00393 if (&v == this) {
00394 if (common_size) *common_size = v.size();
00395 return 0;
00396 }
00397
00398
00399 if (is_null() && !v.is_null()) return -1;
00400 if (is_neg_inf() || v.is_pos_inf()) return -1;
00401 if (is_pos_inf() || v.is_neg_inf()) return 1;
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412 int result = 0;
00413
00414
00415
00416
00417 size_t l1 = size();
00418 size_t l2 = v.size();
00419 int i1 = 0;
00420 int i2 = 0;
00421 size_t j1 = 0, j2 = 0;
00422
00423 while (l1 && l2) {
00424 w_assert3(i1 < _cnt);
00425 w_assert3(i2 < v._cnt);
00426 size_t min = _base[i1].len - j1;
00427 if (v._base[i2].len - j2 < min) min = v._base[i2].len - j2;
00428
00429 w_assert3(min > 0);
00430 result = umemcmp(&_base[i1].ptr[j1], &v._base[i2].ptr[j2], min);
00431 if (result) break;
00432
00433 if ((j1 += min) >= _base[i1].len) { j1 = 0; ++i1; }
00434 if ((j2 += min) >= v._base[i2].len) { j2 = 0; ++i2; }
00435
00436 l1 -= min, l2 -= min;
00437 }
00438
00439 if (result) {
00440 if (common_size) {
00441 while (_base[i1].ptr[j1] == v._base[i2].ptr[j2]) {
00442 ++j1, ++j2;
00443 --l1;
00444 }
00445 *common_size = (l1 < size() ? size() - l1 : 0);
00446 }
00447 } else {
00448 result = l1 - l2;
00449 if (result && common_size) {
00450
00451 if (l1 == 0) {
00452 *common_size = size();
00453 } else {
00454 w_assert3(l2 == 0);
00455 *common_size = v.size();
00456 }
00457 w_assert3(*common_size == MIN(size(), v.size()));
00458 }
00459 }
00460 return result;
00461 }
00462
00463 int cvec_t::checksum() const
00464 {
00465 int sum = 0;
00466 w_assert1(! is_pos_inf() && ! is_neg_inf());
00467 for (int i = 0; i < _cnt; i++) {
00468 for(size_t j=0; j<_base[i].len; j++) sum += ((char*)_base[i].ptr)[j];
00469 }
00470 return sum;
00471 }
00472
00473
00474
00475 cvec_t::u32 cvec_t::convert64_32 (cvec_t::u64 num) {
00476 return ((uint32_t) (num >> 32)) ^ ((uint32_t) (num & 0xFFFFFFFF));
00477 }
00478
00479 void cvec_t::_calc_kvl(u32 seed, w_base_t::uint4_t& rh) const
00480 {
00481
00482
00483
00484
00485 u64 ret = 0;
00486 for (int i = 0; i < _cnt; i++) {
00487 const unsigned char* str = _base[i].ptr;
00488 int32_t len = _base[i].len;
00489 for (int16_t i = 0; i < len; ++i) {
00490 ret = seed * ret + str[i];
00491 }
00492 }
00493 rh = convert64_32(ret);
00494 }
00495 const uint32_t CALC_KVL_MULT_1 = 0xD04C3175;
00496 const uint32_t CALC_KVL_MULT_2 = 0x53DA9022;
00497
00498 void cvec_t::calc_kvl(w_base_t::uint4_t& rh) const
00499 {
00500 _calc_kvl (CALC_KVL_MULT_1, rh);
00501 }
00502 void cvec_t::calc_kvl2(w_base_t::uint4_t& rh) const
00503 {
00504 _calc_kvl (CALC_KVL_MULT_2, rh);
00505 }
00506
00507 void cvec_t::_grow(int total_cnt)
00508 {
00509 w_assert3(total_cnt > max_small);
00510 w_assert3(_cnt <= _max_cnt());
00511
00512 int prev_max = _max_cnt();
00513
00514 if (total_cnt > prev_max) {
00515
00516
00517 int grow_to = MAX(prev_max*2, total_cnt);
00518 vec_pair_t* tmp = NULL;
00519
00520 if (_is_large()) {
00521 tmp = (vec_pair_t*) realloc((char*)_base, grow_to * sizeof(*_base));
00522 if (!tmp) W_FATAL(fcOUTOFMEMORY);
00523 } else {
00524 tmp = (vec_pair_t*) malloc(grow_to * sizeof(*_base));
00525 if (!tmp) W_FATAL(fcOUTOFMEMORY);
00526 for (int i = 0; i < prev_max; i++) {
00527 tmp[i] = _base[i];
00528 }
00529 }
00530 _pair[0].len = grow_to;
00531 _base = tmp;
00532 }
00533 }
00534
00535 #include <cctype>
00536
00537 ostream& operator<<(ostream& o, const cvec_t& v)
00538 {
00539 char *p;
00540 u_char c, oldc;
00541 int repeated;
00542 int nparts = v.count();
00543 int i = 0;
00544 size_t j = 0;
00545 size_t l = 0;
00546
00547 o << "{ ";
00548 for(i=0; i< nparts; i++) {
00549
00550 l = (i < v._cnt) ? v._base[i].len : 0;
00551
00552
00553 p = (i < v._cnt) ? (char *)v._base[i].ptr : (char *)NULL;
00554
00555 o << "{" << l << " " << "\"" ;
00556
00557 repeated=0;
00558 oldc = '\0';
00559 for(j=0; j<l; j++,p++) {
00560 c = *p;
00561 if(c == oldc) {
00562 repeated++;
00563 } else {
00564 if(repeated>0) {
00565 o << "<" <<repeated << " times>";
00566 repeated = 0;
00567 }
00568 if( c == '"' ) {
00569 o << "\\\"" ;
00570 } else if( isprint(c) ) {
00571 if( isascii(c) ) {
00572 o << c ;
00573 } else {
00574
00575 o << "\\0" << oct << c << dec ;
00576 }
00577 } else if(c=='\0') {
00578 o << "\\0" ;
00579 } else {
00580 o << "\\0" << oct << (unsigned int)c << dec ;
00581 }
00582 }
00583 oldc = c;
00584 }
00585 if(repeated>0) {
00586 o << "<" <<repeated << " times>";
00587 repeated = 0;
00588 }
00589 o <<"\" }";
00590 w_assert3(j==l);
00591 w_assert3(j==v._base[i].len);
00592 }
00593 o <<"}";
00594 return o;
00595 }
00596
00597 istream& operator>>(istream& is, cvec_t& v)
00598 {
00599 char c=' ';
00600 size_t len=0;
00601 int err = 0;
00602 char *temp = 0;
00603 const char leftbracket='{';
00604 const char rightbracket='}';
00605
00606 is.clear();
00607 v.reset();
00608
00609 enum {
00610 starting=0,
00611 getting_nparts, got_nparts,
00612 getting_pair, got_len,got_string,
00613 done
00614 } state;
00615
00616 state = starting;
00617 while(state != done) {
00618 is >> ws;
00619 c = is.peek();
00620
00621
00622
00623
00624
00625
00626
00627
00628
00629
00630 if(is.eof()) {
00631 err ++;
00632 } else {
00633 switch(state) {
00634 case done:
00635 break;
00636
00637 case starting:
00638 if(c==leftbracket) {
00639 is >> c;
00640 state = getting_nparts;
00641 } else err ++;
00642 break;
00643
00644 case getting_nparts:
00645 is >> ws;
00646 if(is.bad()) { err ++; }
00647 else state = got_nparts;
00648 break;
00649
00650 case got_nparts:
00651 is >> ws;
00652 if(is.peek() == leftbracket) {
00653 state = getting_pair;
00654 } else {
00655 err ++;
00656 }
00657 break;
00658
00659 case getting_pair:
00660 is >> ws;
00661 is >> c;
00662 if( c == leftbracket ) {
00663 is >> ws;
00664 is >> len;
00665 if(is.bad()) {
00666 err ++;
00667 } else state = got_len;
00668 } else if( c== rightbracket ) {
00669 state = done;
00670 } else {
00671 err ++;
00672 }
00673 break;
00674
00675 case got_len:
00676 if(c == '"') {
00677 (void) is.get();
00678
00679 char *t;
00680
00681
00682 temp = new char[len];
00683 size_t j = 0;
00684 for(t=temp; j<len; j++, t++) {
00685 is >> *t;
00686 }
00687 state = got_string;
00688 c = is.peek();
00689 }
00690 if(c != '"') {
00691 err++;
00692 } else {
00693 (void) is.get();
00694 }
00695 break;
00696
00697 case got_string:
00698 is >> c;
00699 if(c != rightbracket ) {
00700 err ++;
00701 } else {
00702 v.put(temp, len);
00703
00704
00705 temp = 0;
00706 len = 0;
00707 state = getting_pair;
00708 }
00709 break;
00710
00711 }
00712
00713
00714
00715
00716
00717
00718
00719
00720
00721 }
00722 if(err >0) {
00723 #if defined(__SUNPRO_CC)
00724
00725 is.clear(is.rdstate() | ios::badbit);
00726 #elif defined(__GNUC__)
00727 #if W_GCC_THIS_VER >= W_GCC_VER(3,0)
00728
00729 is.setstate(ios::badbit);
00730 #else
00731 is.set(ios::badbit);
00732 #endif
00733 #else
00734 is.set(ios::badbit);
00735 #endif
00736
00737 state = done;
00738 err = is.tellg();
00739
00740 }
00741 }
00742 return is;
00743 }
00744
00745