1 #ifndef BMAGGREGATOR__H__INCLUDED__
2 #define BMAGGREGATOR__H__INCLUDED__
26 #ifndef BM__H__INCLUDED__
29 # error missing include (bm.h or bm64.h)
295 unsigned src_and_size,
297 unsigned src_sub_size);
348 int* is_result_full);
361 bool init_clear =
true);
369 unsigned i,
unsigned j,
370 unsigned* arg_blk_count,
375 unsigned i,
unsigned j,
376 unsigned* arg_blk_count,
381 unsigned i,
unsigned j,
unsigned block_count);
417 unsigned k,
unsigned i,
unsigned j)
BMNOEXCEPT;
444 unsigned arg_group0_size = 0;
445 unsigned arg_group1_size = 0;
452 unsigned top_block_size_ = 0;
455 bool range_set_ =
false;
480 template<
typename Agg,
typename It>
485 int pipeline_size = 0;
486 for (It it = first; it != last; ++it, ++pipeline_size)
498 for (It it = first; it != last; ++it, ++w)
501 auto op_st = agg.get_operation_status();
502 if (op_st != Agg::op_done)
504 op_st = agg.run_step(i, j);
505 pipeline_size -= (op_st == Agg::op_done);
508 if (pipeline_size <= 0)
522 template<
typename BV>
531 template<
typename BV>
541 template<
typename BV>
544 arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0;
545 operation_status_ = op_undefined;
552 template<
typename BV>
555 range_from_ = from; range_to_ = to;
561 template<
typename BV>
575 template<
typename BV>
584 BM_ASSERT(arg_group1_size < max_aggregator_cap);
588 return arg_group1_size;
590 ar_->arg_bv1[arg_group1_size++] = bv;
591 return arg_group1_size;
595 BM_ASSERT(arg_group0_size < max_aggregator_cap);
599 return arg_group0_size;
601 ar_->arg_bv0[arg_group0_size++] = bv;
602 return arg_group0_size;
608 template<
typename BV>
611 combine_or(bv_target, ar_->arg_bv0, arg_group0_size);
616 template<
typename BV>
619 combine_and(bv_target, ar_->arg_bv0, arg_group0_size);
624 template<
typename BV>
627 return combine_and_sub(bv_target,
628 ar_->arg_bv0, arg_group0_size,
629 ar_->arg_bv1, arg_group1_size,
false);
634 template<
typename BV>
637 return combine_and_sub(bv_target,
638 ar_->arg_bv0, arg_group0_size,
639 ar_->arg_bv1, arg_group1_size, any);
644 template<
typename BV>
647 return find_first_and_sub(idx,
648 ar_->arg_bv0, arg_group0_size,
649 ar_->arg_bv1, arg_group1_size);
654 template<
typename BV>
657 combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size,
false);
662 template<
typename BV>
664 const bvector_type_const_ptr* bv_src,
unsigned src_size)
672 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
673 for (
unsigned i = 0; i < top_blocks; ++i)
675 unsigned set_array_max =
676 find_effective_sub_block_size(i, bv_src, src_size,
false);
677 for (
unsigned j = 0; j < set_array_max; ++j)
679 combine_or(i, j, bv_target, bv_src, src_size);
686 template<
typename BV>
688 const bvector_type_const_ptr* bv_src,
703 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
704 for (
unsigned i = 0; i < top_blocks; ++i)
707 unsigned set_array_max =
708 find_effective_sub_block_size(i, bv_src, src_size,
true);
709 for (
unsigned j = 0; j < set_array_max; ++j)
719 template<
typename BV>
721 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
722 const bvector_type_const_ptr* bv_src_sub,
unsigned src_sub_size,
728 bool global_found =
false;
730 if (!bv_src_and || !src_and_size)
738 unsigned top_blocks = resize_target(bv_target, bv_src_and, src_and_size);
739 unsigned top_blocks2 = resize_target(bv_target, bv_src_sub, src_sub_size,
false);
741 if (top_blocks2 > top_blocks)
742 top_blocks = top_blocks2;
744 for (
unsigned i = 0; i < top_blocks; ++i)
746 unsigned set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size,
true);
751 unsigned set_array_max2 =
752 find_effective_sub_block_size(i, bv_src_sub, src_sub_size,
false);
753 if (set_array_max2 > set_array_max)
754 set_array_max = set_array_max2;
756 for (
unsigned j = 0; j < set_array_max; ++j)
760 bv_src_and, src_and_size,
761 bv_src_sub, src_sub_size,
765 bman_target.check_alloc_top_subblock(i);
769 bman_target.validate_top_full(i);
779 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
780 opt_mode_, ar_->tb_opt);
784 global_found |= found;
793 template<
typename BV>
795 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
796 const bvector_type_const_ptr* bv_src_sub,
unsigned src_sub_size)
801 if (!bv_src_and || !src_and_size)
804 unsigned top_blocks = max_top_blocks(bv_src_and, src_and_size);
805 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
807 if (top_blocks2 > top_blocks)
808 top_blocks = top_blocks2;
819 if (nblock_from == nblock_to)
822 unsigned i = top_from;
825 bv_src_and, src_and_size,
826 bv_src_sub, src_sub_size,
831 unsigned block_bit_idx = 0;
840 if (top_to < top_blocks)
841 top_blocks = top_to+1;
848 for (
unsigned i = top_from; i < top_blocks; ++i)
865 set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size,
true);
870 unsigned set_array_max2 =
871 find_effective_sub_block_size(i, bv_src_sub, src_sub_size,
false);
872 if (set_array_max2 > set_array_max)
873 set_array_max = set_array_max2;
876 for (; j < set_array_max; ++j)
880 bv_src_and, src_and_size,
881 bv_src_sub, src_sub_size,
885 unsigned block_bit_idx = 0;
899 template<
typename BV>
903 const bvector_type_const_ptr* bv_src,
913 for (
unsigned k = 0; k < src_size; ++k)
918 bv->get_blocks_manager();
919 const bm::word_t*
const* blk_blk_arg = bman_arg.get_topblock(i);
922 if (top_null_as_zero)
948 template<
typename BV>
951 const bvector_type_const_ptr* bv_src,
956 unsigned arg_blk_count = 0;
957 unsigned arg_blk_gap_count = 0;
959 sort_input_blocks_or(bv_src, src_size, i, j,
960 &arg_blk_count, &arg_blk_gap_count);
966 bman_target.check_alloc_top_subblock(i);
967 bman_target.set_block_ptr(i, j, blk);
970 bman_target.validate_top_full(i);
976 if (arg_blk_count || arg_blk_gap_count)
979 process_bit_blocks_or(bman_target, i, j, arg_blk_count);
982 if (arg_blk_gap_count)
984 process_gap_blocks_or(arg_blk_gap_count);
987 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
988 opt_mode_, ar_->tb_opt);
996 template<
typename BV>
999 const bvector_type_const_ptr* bv_src,
1004 unsigned arg_blk_count = 0;
1005 unsigned arg_blk_gap_count = 0;
1007 sort_input_blocks_and(bv_src, src_size,
1009 &arg_blk_count, &arg_blk_gap_count);
1015 if (arg_blk_count || arg_blk_gap_count)
1017 if (!arg_blk_gap_count && (arg_blk_count == 1))
1023 bman_target.check_alloc_top_subblock(i);
1024 bman_target.set_block_ptr(i, j, blk);
1026 bman_target.validate_top_full(i);
1033 digest = process_bit_blocks_and(arg_blk_count, digest);
1039 if (arg_blk_gap_count)
1041 digest = process_gap_blocks_and(arg_blk_gap_count, digest);
1046 bman_target.opt_copy_bit_block(i, j, ar_->tb1,
1047 opt_mode_, ar_->tb_opt);
1054 template<
typename BV>
1057 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
1058 const bvector_type_const_ptr* bv_src_sub,
unsigned src_sub_size,
1059 int* is_result_full)
1064 unsigned arg_blk_and_count = 0;
1065 unsigned arg_blk_and_gap_count = 0;
1066 unsigned arg_blk_sub_count = 0;
1067 unsigned arg_blk_sub_gap_count = 0;
1069 *is_result_full = 0;
1070 bm::word_t* blk = sort_input_blocks_and(bv_src_and, src_and_size,
1072 &arg_blk_and_count, &arg_blk_and_gap_count);
1074 if (!blk || !(arg_blk_and_count | arg_blk_and_gap_count))
1079 blk = sort_input_blocks_or(bv_src_sub, src_sub_size,
1081 &arg_blk_sub_count, &arg_blk_sub_gap_count);
1088 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1092 *is_result_full = 1;
1102 digest = process_bit_blocks_and(arg_blk_and_count, digest);
1105 digest = process_bit_blocks_sub(arg_blk_sub_count, digest);
1112 process_gap_blocks_and(arg_blk_and_gap_count, digest);
1116 if (arg_blk_sub_gap_count)
1119 process_gap_blocks_sub(arg_blk_sub_gap_count, digest);
1127 template<
typename BV>
1131 for (
unsigned k = 0; k < arg_blk_gap_count; ++k)
1137 template<
typename BV>
1143 bool single_bit_found;
1144 unsigned single_bit_idx;
1145 for (
unsigned k = 0; k < arg_blk_gap_count; ++k)
1155 if (single_bit_found)
1157 for (++k; k < arg_blk_gap_count; ++k)
1171 template<
typename BV>
1177 bool single_bit_found;
1178 unsigned single_bit_idx;
1179 for (
unsigned k = 0; k < arg_blk_gap_count; ++k)
1190 if (single_bit_found)
1192 for (++k; k < arg_blk_gap_count; ++k)
1206 template<
typename BV>
1211 for (
unsigned i = 0; i < arg_blk_gap_count && b; ++i)
1220 template<
typename BV>
1225 for (
unsigned i = 0; i < arg_blk_gap_count; ++i)
1237 template<
typename BV>
1239 unsigned i,
unsigned j,
1240 unsigned arg_blk_count)
1254 unsigned unroll_factor, len, len_unr;
1257 len = arg_blk_count - k;
1258 len_unr = len - (len % unroll_factor);
1259 for( ;k < len_unr; k+=unroll_factor)
1262 ar_->v_arg_or_blk[k], ar_->v_arg_or_blk[k+1],
1263 ar_->v_arg_or_blk[k+2], ar_->v_arg_or_blk[k+3]);
1274 len = arg_blk_count - k;
1275 len_unr = len - (len % unroll_factor);
1276 for( ;k < len_unr; k+=unroll_factor)
1288 for (; k < arg_blk_count; ++k)
1305 template<
typename BV>
1315 if (range_set_ && (nb_from == nb_to))
1325 switch (arg_blk_count)
1335 ar_->v_arg_and_blk[k],
1336 ar_->v_arg_and_blk[k+1],
1343 unsigned unroll_factor, len, len_unr;
1344 unsigned single_bit_idx;
1347 len = arg_blk_count - k;
1348 len_unr = len - (len % unroll_factor);
1349 for (; k < len_unr; k += unroll_factor)
1353 ar_->v_arg_and_blk[k], ar_->v_arg_and_blk[k + 1],
1354 ar_->v_arg_and_blk[k + 2], ar_->v_arg_and_blk[k + 3],
1363 for (++k; k < arg_blk_count; ++k)
1365 const bm::word_t* arg_blk = ar_->v_arg_and_blk[k];
1366 if (!(mask & arg_blk[nword]))
1376 for (; k < arg_blk_count; ++k)
1389 template<
typename BV>
1395 unsigned single_bit_idx;
1396 const word_t** args = &ar_->v_arg_or_blk[0];
1397 for (
unsigned k = 0; k < arg_blk_count; ++k)
1413 for (++k; k < arg_blk_count; ++k)
1415 if (mask & args[k][nword])
1429 template<
typename BV>
1431 const bvector_type_const_ptr* bv_src,
unsigned src_size,
1437 if (bman_target.is_init())
1438 bman_target.deinit_tree();
1441 unsigned top_blocks = bman_target.top_block_size();
1443 bool need_realloc =
false;
1446 for (
unsigned i = 0; i < src_size; ++i)
1451 bv->get_blocks_manager();
1452 unsigned arg_top_blocks = bman_arg.top_block_size();
1453 if (arg_top_blocks > top_blocks)
1455 need_realloc =
true;
1456 top_blocks = arg_top_blocks;
1459 if (arg_size > size)
1465 bman_target.reserve_top_blocks(top_blocks);
1467 if (!bman_target.is_init())
1468 bman_target.init_tree();
1469 if (size > bv_target.size())
1470 bv_target.resize(size);
1477 template<
typename BV>
1482 unsigned top_blocks = 1;
1485 for (
unsigned i = 0; i < src_size; ++i)
1490 unsigned arg_top_blocks = bman_arg.top_block_size();
1491 if (arg_top_blocks > top_blocks)
1492 top_blocks = arg_top_blocks;
1499 template<
typename BV>
1501 const bvector_type_const_ptr* bv_src,
1503 unsigned i,
unsigned j,
1504 unsigned* arg_blk_count,
1508 for (
unsigned k = 0; k < src_size; ++k)
1513 const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1518 ar_->v_arg_or_blk_gap[*arg_blk_gap_count] =
BMGAP_PTR(arg_blk);
1519 (*arg_blk_gap_count)++;
1526 *arg_blk_gap_count = *arg_blk_count = 0;
1529 ar_->v_arg_or_blk[*arg_blk_count] = arg_blk;
1538 template<
typename BV>
1540 const bvector_type_const_ptr* bv_src,
1542 unsigned i,
unsigned j,
1543 unsigned* arg_blk_count,
1546 unsigned full_blk_cnt = 0;
1548 for (
unsigned k = 0; k < src_size; ++k)
1553 const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1557 *arg_blk_gap_count = *arg_blk_count = 0;
1562 ar_->v_arg_and_blk_gap[*arg_blk_gap_count] =
BMGAP_PTR(arg_blk);
1563 (*arg_blk_gap_count)++;
1579 ar_->v_arg_and_blk[*arg_blk_count] = arg_blk;
1593 template<
typename BV>
1595 const bvector_type_const_ptr* bv_src,
unsigned src_size)
1607 for (
unsigned i = 1; i < src_size; ++i)
1611 bv_target.bit_or(*bv);
1617 template<
typename BV>
1619 const bvector_type_const_ptr* bv_src,
unsigned src_size)
1631 for (
unsigned i = 1; i < src_size; ++i)
1635 bv_target.bit_and(*bv);
1641 template<
typename BV>
1643 const bvector_type_const_ptr* bv_src_and,
1644 unsigned src_and_size,
1645 const bvector_type_const_ptr* bv_src_sub,
1646 unsigned src_sub_size)
1650 combine_and_horizontal(bv_target, bv_src_and, src_and_size);
1652 for (
unsigned i = 0; i < src_sub_size; ++i)
1662 template<
typename BV>
1664 const bvector_type_const_ptr* bv_src,
1667 top_block_size_ = resize_target(bv_target, bv_src, src_size);
1670 for (
unsigned i = 0; i < src_size; ++i)
1671 ar_->carry_overs_[i] = 0;
1676 template<
typename BV>
1679 const bvector_type_const_ptr* bv_src_and,
unsigned src_and_size,
1688 prepare_shift_right_and(bv_target, bv_src_and, src_and_size);
1692 if (i > top_block_size_)
1694 if (!any_carry_overs(&ar_->carry_overs_[0], src_and_size))
1702 combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
1709 return bv_target.any();
1714 template<
typename BV>
1717 const bvector_type_const_ptr* bv_src,
1720 bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
1721 unsigned char* carry_overs = &(ar_->carry_overs_[0]);
1725 bool blk_zero =
false;
1730 const bm::word_t* arg_blk = bman_arg.get_block(i, j);
1751 for (
unsigned k = 1; k < src_size; ++k)
1753 unsigned carry_over = carry_overs[k];
1754 if (!digest && !carry_over)
1759 blk_zero = !blk_zero;
1761 const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
1762 carry_overs[k] = (
unsigned char)
1763 process_shift_right_and(blk, arg_blk, digest, carry_over);
1764 BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1);
1776 bman_target.opt_copy_bit_block(i, j, blk, opt_mode_, ar_->tb_opt);
1784 template<
typename BV>
1791 BM_ASSERT(carry_over == 1 || carry_over == 0);
1803 blk[0] = carry_over;
1824 blk[0] = carry_over & arg_blk[0];
1845 template<
typename BV>
1847 const bvector_type_const_ptr* bv_src,
1848 unsigned k,
unsigned i,
unsigned j)
BMNOEXCEPT
1850 return bv_src[k]->get_blocks_manager().get_block(i, j);
1855 template<
typename BV>
1860 unsigned acc = carry_overs[0];
1861 for (
unsigned i = 1; i < co_size; ++i)
1862 acc |= carry_overs[i];
1871 template<
typename BV>
1877 temp_blk_ = temp_block;
1881 case BM_NOT_DEFINED:
1883 case BM_SHIFT_R_AND:
1884 prepare_shift_right_and(*bv_target, ar_->arg_bv0, arg_group0_size);
1885 operation_status_ = op_prepared;
1894 template<
typename BV>
1898 BM_ASSERT(operation_status_ == op_prepared || operation_status_ == op_in_progress);
1903 case BM_NOT_DEFINED:
1906 case BM_SHIFT_R_AND:
1908 if (i > top_block_size_)
1910 if (!this->any_carry_overs(&ar_->carry_overs_[0], arg_group0_size))
1912 operation_status_ = op_done;
1913 return operation_status_;
1917 this->combine_shift_right_and(i, j, *bv_target_,
1918 ar_->arg_bv0, arg_group0_size);
1919 operation_status_ = op_in_progress;
1927 return operation_status_;