Go to the documentation of this file.
83 <<
"BitMagic SNP Search Sample Utility (c) 2018" << std::endl
84 <<
"-isnp file-name -- input set file (SNP FASTA) to parse" << std::endl
85 <<
"-svout spase vector output -- sparse vector name to save" << std::endl
86 <<
"-rscout rs-compressed spase vector output -- name to save" << std::endl
87 <<
"-svin sparse vector input -- sparse vector file name to load " << std::endl
88 <<
"-rscin rs-compressed sparse vector input -- file name to load " << std::endl
89 <<
"-diag -- run diagnostics" << std::endl
90 <<
"-timing -- collect timings" << std::endl
111 for (
int i = 1; i < argc; ++i)
113 std::string arg = argv[i];
114 if ((arg ==
"-h") || (arg ==
"--help"))
120 if (arg ==
"-svout" || arg ==
"--svout")
128 std::cerr <<
"Error: -svout requires file name" << std::endl;
133 if (arg ==
"-rscout" || arg ==
"--rscout")
141 std::cerr <<
"Error: -rscout requires file name" << std::endl;
147 if (arg ==
"-svin" || arg ==
"--svin")
155 std::cerr <<
"Error: -svin requires file name" << std::endl;
161 if (arg ==
"-rscin" || arg ==
"--rscin")
169 std::cerr <<
"Error: -rscin requires file name" << std::endl;
175 if (arg ==
"-isnp" || arg ==
"--isnp" || arg ==
"-snp" || arg ==
"--snp")
183 std::cerr <<
"Error: -isnp requires file name" << std::endl;
189 if (arg ==
"-diag" || arg ==
"--diag" || arg ==
"-d" || arg ==
"--d")
191 if (arg ==
"-timing" || arg ==
"--timing" || arg ==
"-t" || arg ==
"--t")
193 if (arg ==
"-bench" || arg ==
"--bench" || arg ==
"-b" || arg ==
"--b")
221 std::ifstream fin(fname.c_str(), std::ios::in);
225 unsigned rs_id, rs_pos;
229 std::string delim =
" \t";
231 std::regex reg(
"\\s+");
232 std::sregex_token_iterator it_end;
238 for (
unsigned i = 0; std::getline(fin, line); ++i)
241 !isdigit(line.front())
246 std::sregex_token_iterator it(line.begin(), line.end(), reg, -1);
247 std::vector<std::string> line_vec(it, it_end);
248 if (line_vec.empty())
254 rs_id = unsigned(std::stoul(line_vec.at(0), &idx));
256 if (bv_rs.
test(rs_id))
260 rs_pos = unsigned(std::stoul(line_vec.at(11), &idx));
263 sv.
set(rs_pos, rs_id);
267 catch (std::exception& )
279 if (rs_cnt % (4 * 1024) == 0)
280 std::cout <<
"\r" << rs_cnt <<
" / " << i;
283 std::cout << std::endl;
284 std::cout <<
"SNP count=" << rs_cnt << std::endl;
297 rand_sampler.
sample(bv_sample, *bv_null, count);
300 for (; en.
valid(); ++en)
303 unsigned v = sv[idx];
316 for (; it != it_end; ++it)
320 std::pair<unsigned, unsigned> pos2rs = std::make_pair(it.pos(), it.value());
321 vp.push_back(pos2rs);
331 for (
unsigned i = 0; i < vp.size(); ++i)
333 if (vp[i].second == rs_id)
350 const unsigned rs_sample_count = 2000;
352 std::vector<unsigned> rs_vect;
356 std::cerr <<
"Benchmark subset empty!" << std::endl;
383 for (
unsigned i = 0; i < rs_vect.size(); ++i)
385 unsigned rs_id = rs_vect[i];
387 bool found = scanner.
find_eq(sv, rs_id, rs_pos);
395 std::cout <<
"Error: rs_id = " << rs_id <<
" not found!" << std::endl;
406 for (
unsigned i = 0; i < rs_vect.size(); ++i)
408 unsigned rs_id = rs_vect[i];
410 bool found = scanner.
find_eq(csv, rs_id, rs_pos);
418 std::cout <<
"rs_id = " << rs_id <<
" not found!" << std::endl;
427 for (
unsigned i = 0; i < rs_vect.size(); ++i)
429 unsigned rs_id = rs_vect[i];
439 std::cout <<
"rs_id = " << rs_id <<
" not found!" << std::endl;
445 int res = bv_found1.
compare(bv_found2);
448 std::cerr <<
"Error: search discrepancy (sparse search) detected!" << std::endl;
450 res = bv_found1.
compare(bv_found3);
453 std::cerr <<
"Error: search discrepancy (std::vector<>) detected!" << std::endl;
459 int main(
int argc,
char *argv[])
512 std::cerr <<
"Error: rs-compressed vector check failed!" << std::endl;
537 std::cout << std::endl
538 <<
"sparse vector statistics:"
540 bm::print_svector_stat(sv,
true);
544 std::cout << std::endl
545 <<
"RS compressed sparse vector statistics:"
547 bm::print_svector_stat(csv,
true);
558 std::cout << std::endl <<
"Performance:" << std::endl;
562 catch (std::exception& ex)
564 std::cerr <<
"Error:" << ex.what() << std::endl;
static void build_vector_pairs(const sparse_vector_u32 &sv, vector_pairs &vp)
bool empty() const
return true if vector is empty
Algorithms for bm::sparse_vector.
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector plains
Generation of random subset.
void load_to(sparse_vector_type &sv) const
Exort compressed vector to a sparse vector (with NULLs)
std::vector< std::pair< unsigned, unsigned > > vector_pairs
sparse vector with runtime compression using bit transposition method
Utility class to collect performance measurements and statistics.
void set_bit_no_check(size_type n)
Set bit without checking preconditions (size, etc)
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Constant iterator designed to enumerate "ON" bits.
algorithms for sparse_vector scan/search
int compare(const bvector< Alloc > &bvect) const BMNOEXCEPT
Lexicographical comparison with a bitvector.
static int load_snp_report(const std::string &fname, sparse_vector_u32 &sv)
Serialization for sparse_vector<>
Algorithms for bvector<> (main include)
bool equal(const sparse_vector< Val, BV > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
Compressed sparse container rsc_sparse_vector<> for integer types.
support "non-assigned" or "NULL" logic
void sample(BV &bv_out, const BV &bv_in, size_type sample_count)
Get random subset of input vector.
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
bool valid() const BMNOEXCEPT
Checks if iterator is still valid. Analog of != 0 comparison for pointers.
bm::chrono_taker::duration_map_type timing_map
void init()
Explicit post-construction initialization.
static int parse_args(int argc, char *argv[])
Timing utilities for benchmarking (internal)
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
static bool search_vector_pairs(const vector_pairs &vp, unsigned rs_id, unsigned &pos)
void load_from(const sparse_vector_type &sv_src)
Load compressed vector from a sparse vector (with NULLs)
bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
void push_back(value_type v)
push value back into vector
bm::rsc_sparse_vector< unsigned, sparse_vector_u32 > rsc_sparse_vector_u32
bool test(size_type n) const BMNOEXCEPT
returns true if bit n is set and false is bit n is 0.
std::map< std::string, statistics > duration_map_type
test name to duration map
Rank-Select compressed sparse vector.
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, statistics *stat=0)
run memory optimization for all vector plains
static void print_duration_map(const duration_map_type &dmap, format fmt=ct_time)
bool empty() const BMNOEXCEPT
return true if vector is empty
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
int main(int argc, char *argv[])
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
static void generate_random_subset(const sparse_vector_u32 &sv, std::vector< unsigned > &vect, unsigned count)
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
static void run_benchmark(const sparse_vector_u32 &sv, const rsc_sparse_vector_u32 &csv)
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way)
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value