54 <<
"BitMagic DNA Index Build Sample (c) 2018" << std::endl
55 <<
"-fa file-name -- input FASTA file" << std::endl
56 <<
"-j number -- number of parallel jobs to run" << std::endl
57 <<
"-timing -- collect timings" << std::endl
73 for (
int i = 1; i < argc; ++i)
75 std::string arg = argv[i];
76 if ((arg ==
"-h") || (arg ==
"--help"))
81 if (arg ==
"-fa" || arg ==
"--fa")
89 std::cerr <<
"Error: -fa requires file name" << std::endl;
94 if (arg ==
"-j" || arg ==
"--j")
102 std::cerr <<
"Error: -j requires number of jobs" << std::endl;
108 if (arg ==
"-timing" || arg ==
"--timing" || arg ==
"-t" || arg ==
"--t")
123 int load_FASTA(
const std::string& fname, std::vector<char>& seq_vect)
128 std::ifstream fin(fname.c_str(), std::ios::in);
133 for (
unsigned i = 0; std::getline(fin, line); ++i)
139 for (std::string::iterator it = line.begin(); it != line.end(); ++it)
140 seq_vect.push_back(*it);
154 enum { eA = 0, eC, eG, eT, eN, eEnd };
160 void Build(
const vector<char>& sequence)
168 for (
size_t i = 0; i < sequence.size(); ++i)
170 unsigned pos = unsigned(i);
204 for (
size_t i = 0; i < sequence.size(); ++i)
206 unsigned pos = unsigned(i);
239 const std::vector<char>* src_sequence;
242 : target_idx(idx), src_sequence(&src) {}
244 void operator() (
size_t from,
size_t to)
246 const vector<char>& sequence = *src_sequence;
255 for (
size_t i = from; i < sequence.size() && (i < to); ++i)
257 unsigned pos = unsigned(i);
305 std::vector<std::future<void> > futures;
307 unsigned range = unsigned(sequence.size() / threads);
309 for (
unsigned k = 0; k < sequence.size(); k += range)
311 futures.emplace_back(std::async(std::launch::async,
312 Func(
this, sequence), k, k + range));
316 for (
auto& e : futures)
326 static std::mutex mtx_A;
327 static std::mutex mtx_T;
328 static std::mutex mtx_G;
329 static std::mutex mtx_C;
330 static std::mutex mtx_N;
336 std::lock_guard<std::mutex> guard(mtx_A);
337 m_FPrintBV[eA].merge(bv);
342 std::lock_guard<std::mutex> guard(mtx_C);
343 m_FPrintBV[eC].merge(bv);
348 std::lock_guard<std::mutex> guard(mtx_G);
349 m_FPrintBV[eG].merge(bv);
354 std::lock_guard<std::mutex> guard(mtx_T);
355 m_FPrintBV[eT].merge(bv);
360 std::lock_guard<std::mutex> guard(mtx_N);
361 m_FPrintBV[eN].merge(bv);
376 return m_FPrintBV[eA];
378 return m_FPrintBV[eC];
380 return m_FPrintBV[eG];
382 return m_FPrintBV[eT];
384 return m_FPrintBV[eN];
388 throw runtime_error(
"Error. Invalid letter!");
401 std::vector<char> letters {
'A',
'T',
'G',
'C'};
402 for (
char base : letters)
410 throw runtime_error(
string(
"Fingerprint mismatch for:") +
string(1, base));
417 int main(
int argc,
char *argv[])
425 std::vector<char> seq_vect;
441 std::cout <<
"FASTA sequence size=" << seq_vect.size() << std::endl;
445 idx1.
Build(seq_vect);
462 std::cout << std::endl <<
"Performance:" << std::endl;
466 catch (std::exception& ex)
468 std::cerr <<
"Error:" << ex.what() << std::endl;