1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2 #define BMSPARSEVEC_ALGO__H__INCLUDED__
24 #ifndef BM__H__INCLUDED__
27 # error missing include (bm.h or bm64.h)
38 # pragma warning( disable: 4146 )
66 unsigned sv_plains = svect.plains();
68 BM_ASSERT(sv_plains > high_bit && high_bit > 0);
74 for (i = high_bit+1; i < sv_plains; ++i)
85 for (i = high_bit;
true; --i)
104 template<
typename SV>
107 if (low_bit == 0)
return;
110 unsigned sv_plains = svect.plains();
115 for (i = low_bit+1; i < sv_plains; ++i)
119 bv_acc1.
bit_or(*bv_plain);
126 for (i = low_bit-1;
true; --i)
131 bv_acc2.
bit_or(*bv_plain);
144 bv_acc1.bit_xor(bv_acc2);
145 bv_low_plain->bit_or(bv_acc1);
169 template<
typename SV>
172 typename SV::size_type& midx,
180 unsigned plains1 = sv1.plains();
193 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
194 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
195 if (bv_null1 && bv_null2)
197 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
198 if (f && (midx < mismatch))
200 found = f; mismatch = midx;
209 bv_tmp.
resize(sv2.size());
213 bool f = bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
214 if (f && (midx < mismatch))
216 found = f; mismatch = midx;
222 bv_tmp.
resize(sv1.size());
225 bool f = bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
226 if (f && (midx < mismatch))
228 found = f; mismatch = midx;
235 for (
unsigned i = 0; mismatch & (i < plains1); ++i)
237 typename SV::bvector_type_const_ptr bv1 = sv1.get_plain(i);
238 typename SV::bvector_type_const_ptr bv2 = sv2.get_plain(i);
243 bool f = bv2->find(midx);
244 if (f && (midx < mismatch))
246 found = f; sv_idx = 2; mismatch = midx;
253 bool f = bv1->find(midx);
254 if (f && (midx < mismatch))
256 found = f; sv_idx = 1; mismatch = midx;
262 bool f = bv1->find_first_mismatch(*bv2, midx, mismatch);
263 if (f && (midx < mismatch))
265 found = f; mismatch = midx;
268 sv_idx = (bv1->test(mismatch)) ? 1 : 2;
285 found = sv1.find_rank(midx + 1, mismatch);
288 found = sv2.find_rank(midx + 1, mismatch);
300 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
301 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
302 found = bv_null1->find_first_mismatch(*bv_null2, mismatch);
329 template<
typename SV1,
typename SV2>
337 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
339 allocator_pool_type pool;
355 unsigned plains = sv1.plains();
356 if (plains < sv2.plains())
357 plains = sv2.plains();
359 for (
unsigned i = 0; i < plains; ++i)
361 typename SV1::bvector_type_const_ptr bv1 = sv1.get_plain(i);
362 typename SV2::bvector_type_const_ptr bv2 = sv2.get_plain(i);
384 bv_xor.
bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
391 typename SV1::size_type sz1 = sv1.
size();
392 typename SV2::size_type sz2 = sv2.size();
402 bv.set_range(sz1, sz2-1);
408 typename SV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
409 typename SV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
416 if (bv_null1 && bv_null2)
434 if (bv_null1 && bv_null2)
451 bv_null.
resize(sv1.size());
456 bv_null.
resize(sv2.size());
480 template<
typename SV>
502 void bind(
const SV& sv,
bool sorted);
630 template<typename It>
641 bool any_zero =
false;
642 for (; start < end; ++start)
645 any_zero |= (v == 0);
662 typename SV::value_type value,
681 typename SV::value_type value,
683 typename SV::size_type search_limit = 0);
687 typename SV::value_type value,
692 const typename SV::value_type* str,
699 typename SV::value_type value);
703 const typename SV::value_type* str,
704 unsigned octet_start);
733 typedef bm::heap_matrix<
typename SV::value_type,
754 value_type remap_value_vect_[SV::max_vector_size];
756 bm::id64_t vector_plain_masks_[SV::max_vector_size];
772 template<
typename SV>
835 remap(bv_in, sv_brel, bv_out);
848 void attach_sv(
const SV* sv_brel,
bool compute_stats =
false);
857 template<
unsigned BSIZE>
892 template<
typename SV>
894 : sv_ptr_(0), gb_(0), have_stats_(false)
899 SV::throw_bad_alloc();
905 template<
typename SV>
915 template<
typename SV>
925 if (sv_brel->empty() || !compute_stats)
927 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
943 template<
typename SV>
951 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
954 if (!bv_non_null->test(id_from))
957 size_type idx = sv_brel.translate_address(id_from);
958 id_to = sv_brel.get(idx);
964 template<
typename SV>
976 remap(bv_in, bv_out);
981 template<
typename SV>
989 if (sv_ptr_ == 0 || sv_ptr_->empty())
1002 const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
1006 bv_product_ = bv_in;
1007 bv_product_.bit_and(*bv_non_null);
1008 enum_bv = &bv_product_;
1019 bv_product_ = bv_in;
1021 bv_product_.bit_sub(bv_zero_);
1027 bv_out.set_bit_no_check(0);
1029 enum_bv = &bv_product_;
1036 buf_cnt = nb_old = 0;
1039 for (; en.valid(); ++en)
1041 typename SV::size_type idx = *en;
1042 idx = sv_ptr_->translate_address(idx);
1049 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1050 bv_out.set(&gb_->buffer_[0], buf_cnt,
BM_SORTED);
1054 gb_->gather_idx_[buf_cnt++] = idx;
1058 gb_->gather_idx_[buf_cnt++] = idx;
1061 if (buf_cnt == sv_g_size)
1063 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1070 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1078 template<
typename SV>
1083 if (sv_brel.empty())
1088 typename SV::const_iterator it = sv_brel.begin();
1089 for (; it.valid(); ++it)
1091 typename SV::value_type t_id = *it;
1093 if (bv_in.test(idx))
1095 bv_out.set_bit_no_check(t_id);
1105 template<
typename SV>
1112 effective_str_max_ = 0;
1117 template<
typename SV>
1126 effective_str_max_ = sv.effective_vector_max();
1128 block0_elements_cache_.resize(total_nb, effective_str_max_+1);
1129 block0_elements_cache_.set_zero();
1131 block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
1132 block3_elements_cache_.set_zero();
1138 value_type* s0 = block0_elements_cache_.row(nb);
1139 sv.get(i, s0,
size_type(block0_elements_cache_.cols()));
1143 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1145 sv.get(idx, s1,
size_type(block3_elements_cache_.cols()));
1151 for (
unsigned i = 0; i < SV::max_vector_size; ++i)
1153 vector_plain_masks_[i] = sv.get_plains_mask(i);
1160 template<
typename SV>
1164 effective_str_max_ = 0;
1169 template<
typename SV>
1178 find_nonzero(sv, bv_out);
1179 if (sv.is_compressed())
1182 bv_out.set_range(sv.effective_size(),
bm::id_max - 1,
false);
1183 decompress(sv, bv_out);
1189 correct_nulls(sv, bv_out);
1194 template<
typename SV>
1210 bv_out.set_range(sv.size(),
bm::id_max - 1,
false);
1216 template<
typename SV>
1222 bv_out.bit_and(*bv_null);
1227 template<
typename SV>
1229 typename SV::value_type value,
1231 typename SV::size_type search_limit)
1238 find_zero(sv, bv_out);
1239 return bv_out.any();
1243 bool found = prepare_and_sub_aggregator(sv, value);
1250 bool any = (search_limit == 1);
1251 found = agg_.combine_and_sub(bv_out, any);
1258 template<
typename SV>
1260 typename SV::value_type value,
1271 bool found = prepare_and_sub_aggregator(sv, value);
1274 found = agg_.find_first_and_sub(idx);
1282 template<
typename SV>
1284 const typename SV::value_type* str,
1296 unsigned common_prefix_len = 0;
1300 agg_.set_range_hint(mask_from_, mask_to_);
1301 common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
1306 str = remap_value_vect_;
1310 if (sv.is_remap() && str != remap_value_vect_)
1312 bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1315 str = remap_value_vect_;
1319 bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
1323 found = agg_.find_first_and_sub(idx);
1330 template<
typename SV>
1332 const typename SV::value_type* str,
1333 unsigned octet_start)
1335 unsigned char bits[64];
1338 for (; str[len] != 0; ++len)
1343 for (
int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1345 if (
unsigned(octet_idx) < octet_start)
1348 unsigned value = unsigned(str[octet_idx]) & 0xFF;
1352 if (&sv == bound_sv_)
1353 plains_mask = vector_plain_masks_[octet_idx];
1355 plains_mask = sv.get_plains_mask(
unsigned(octet_idx));
1357 if ((value & plains_mask) != value)
1362 unsigned short bit_count_v =
bm::bitscan(value, bits);
1363 for (
unsigned i = 0; i < bit_count_v; ++i)
1365 unsigned bit_idx = bits[i];
1367 unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1372 unsigned vinv = unsigned(value);
1383 vinv &= unsigned(plains_mask);
1384 for (
unsigned octet_plain = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1388 unsigned plain_idx = octet_plain + bit_idx;
1397 unsigned plain_idx = unsigned(len * 8) + 1;
1398 typename SV::size_type plains;
1399 if (&sv == bound_sv_)
1400 plains = effective_str_max_ * unsigned(
sizeof(
value_type)) * 8;
1402 plains = sv.plains();
1403 for (; plain_idx < plains; ++plain_idx)
1405 bvector_type_const_ptr bv = sv.get_plain(plain_idx);
1414 template<
typename SV>
1416 typename SV::value_type value)
1418 unsigned char bits[
sizeof(value) * 8];
1419 unsigned short bit_count_v =
bm::bitscan(value, bits);
1426 for (
unsigned i = bit_count_v; i > 0; --i)
1428 unsigned bit_idx = bits[i-1];
1437 unsigned sv_plains = sv.effective_plains();
1438 for (
unsigned i = 0; i < sv_plains; ++i)
1440 bvector_type_const_ptr bv = sv.get_plain(i);
1442 if (bv && !(value & mask))
1451 template<
typename SV>
1453 typename SV::value_type value,
1461 find_zero(sv, bv_out);
1465 unsigned char bits[
sizeof(value) * 8];
1466 unsigned short bit_count_v =
bm::bitscan(value, bits);
1471 const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1480 for (
unsigned i = 0; i < bit_count_v; ++i)
1484 bv_out &= *bv_plain;
1493 unsigned sv_plains = sv.effective_plains();
1494 for (
unsigned i = 0; (i < sv_plains) && value; ++i)
1497 if (bv_plain && !(value & (
value_type(1) << i)))
1498 bv_out -= *bv_plain;
1504 template<
typename SV>
1506 typename SV::size_type& pos)
1509 return this->find_eq_str(*bound_sv_, str, pos);
1514 template<
typename SV>
1516 const typename SV::value_type* str,
1517 typename SV::size_type& pos)
1524 bool remaped =
false;
1527 if (sv.is_remap() && str != remap_value_vect_)
1529 remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1532 str = remap_value_vect_;
1537 found = find_first_eq(sv, str, found_pos, remaped);
1543 if (sv.is_compressed())
1544 found = sv.find_rank(found_pos + 1, pos);
1552 find_zero(sv, bv_zero);
1553 found = bv_zero.find(pos);
1560 template<
typename SV>
1562 const typename SV::value_type* str,
1563 typename SV::size_type& pos)
1571 bool remaped =
false;
1575 if (sv.is_remap() && str != remap_value_vect_)
1577 remaped = sv.remap_tosv(
1578 remap_value_vect_, SV::max_vector_size, str);
1584 reset_search_range();
1589 l = 0; r = sv.size()-1;
1596 if (dist < min_distance_cutoff)
1609 int cmp = this->compare_str(sv, mid, str);
1627 int cmp = this->compare_str(sv, mid, str);
1632 cmp = this->compare_str(sv, mid, str);
1637 cmp = this->compare_str(sv, mid, str);
1655 set_search_range(l, r);
1659 typename SV::size_type mid = dist/2+l;
1671 int cmp = this->compare_str(sv, mid, str);
1676 set_search_range(l, mid);
1686 found = find_first_eq(sv, str, found_pos, remaped);
1692 if (sv.is_compressed())
1693 found = sv.find_rank(found_pos + 1, pos);
1696 reset_search_range();
1702 find_zero(sv, bv_zero);
1703 found = bv_zero.find(pos);
1710 template<
typename SV>
1712 typename SV::size_type& pos)
1715 return bfind_eq_str(*bound_sv_, str, pos);
1720 template<
typename SV>
1722 const typename SV::value_type val,
1723 typename SV::size_type& pos)
1735 cmp = this->compare(sv, l, val);
1746 cmp = this->compare(sv, r, val);
1754 cmp = this->compare(sv, r, val);
1768 if (dist < linear_cutoff1)
1772 cmp = this->compare(sv, l, val);
1789 cmp = this->compare(sv, mid, val);
1796 cmp = this->compare(sv, i, val);
1810 if (dist < linear_cutoff2)
1812 typename SV::const_iterator it(&sv, l);
1813 for (; it.valid(); ++it, ++l)
1815 typename SV::value_type sv_value = it.value();
1816 if (sv_value == val)
1840 template<
typename SV>
1843 const typename SV::value_type* str,
1844 typename SV::size_type& pos)
1857 cmp = this->compare_str(sv, l, str);
1868 cmp = this->compare_str(sv, r, str);
1876 cmp = this->compare_str(sv, r, str);
1890 if (dist < linear_cutoff1)
1894 cmp = this->compare_str(sv, l, str);
1910 cmp = this->compare_str(sv, mid, str);
1917 cmp = this->compare_str(sv, i, str);
1931 if (dist < linear_cutoff2)
1935 dist = sv.decode(hmatr_, l, dist+1);
1936 for (
unsigned i = 0; i < dist; ++i, ++l)
1938 const typename SV::value_type* hm_str = hmatr_.row(i);
1939 cmp = ::strcmp(hm_str, str);
1951 cmp = this->compare_str(sv, l, str);
1970 template<
typename SV>
1975 if (bound_sv_ == &sv)
1981 value_type* s0 = block0_elements_cache_.row(nb);
1984 sv.get(idx, s0,
size_type(block0_elements_cache_.cols()));
1987 for (
unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1989 char octet = str[i];
char value = s0[i];
1990 res = (value > octet) - (value < octet);
2002 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
2004 for (
unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
2006 char octet = str[i];
char value = s1[i];
2007 res = (value > octet) - (value < octet);
2016 return sv.compare(idx, str);
2021 template<
typename SV>
2027 return sv.compare(idx, val);
2032 template<
typename SV>
2034 typename SV::value_type value,
2044 find_zero(sv, bv_out);
2048 find_eq_with_nulls(sv, value, bv_out, 0);
2050 decompress(sv, bv_out);
2051 correct_nulls(sv, bv_out);
2056 template<
typename SV>
2058 typename SV::value_type value,
2059 typename SV::size_type& pos)
2064 find_eq(sv, value, bv_zero);
2065 bool found = bv_zero.find(pos);
2070 bool found = find_first_eq(sv, value, found_pos);
2076 if (sv.is_compressed())
2077 found = sv.find_rank(found_pos + 1, pos);
2085 template<
typename SV>
2090 for (
unsigned i = 0; i < sv.plains(); ++i)
2091 agg_.add(sv.get_plain(i));
2092 agg_.combine_or(bv_out);
2098 template<
typename SV>
2102 if (!sv.is_compressed())
2104 const bvector_type* bv_non_null = sv.get_null_bvector();
2108 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
2109 bv_out.swap(bv_tmp_);
2114 template<
typename SV>
2125 template<
typename SV>