Needle
An application for fast and efficient searches of NGS data.
Loading...
Searching...
No Matches
ibf.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
5#pragma once
6
7#include <filesystem>
8#include <iostream>
9#include <math.h>
10#include <numeric>
11#include <string>
12
13#include <seqan3/alphabet/container/concatenated_sequences.hpp>
14#include <seqan3/alphabet/nucleotide/dna4.hpp>
15
16#include "shared.hpp"
17
19{
20 std::filesystem::path
21 include_file; // Needs to be defined when only minimisers appearing in this file should be stored
22 std::filesystem::path
23 exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
24 std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
25 bool paired = false; // If true, than experiments are seen as paired-end experiments
26 bool experiment_names = false; // Flag, if names of experiment should be stored in a txt file
27 bool ram_friendly = false;
28};
29
32{
33 int maxi;
34 RandomGenerator(int max) : maxi(max)
35 {}
36
38 {
39 return rand() % maxi;
40 }
41};
42
51void count(min_arguments const & args,
52 std::vector<std::filesystem::path> sequence_files,
53 std::filesystem::path include_file,
54 std::filesystem::path genome_file,
55 bool paired);
56
62void count_genome(min_arguments const & args, std::filesystem::path include_file, std::filesystem::path exclude_file);
63
69void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map<uint64_t, uint16_t> & hash_table);
70
78 std::filesystem::path filename,
79 uint64_t & num_of_minimisers,
80 uint8_t & cutoff);
81
94std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files,
95 estimate_ibf_arguments & ibf_args,
96 minimiser_arguments & minimiser_args,
97 std::vector<double> & fpr,
98 std::vector<uint8_t> & cutoffs,
99 std::filesystem::path const expression_by_genome_file = "",
100 size_t num_hash = 1);
101
112std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_files,
113 estimate_ibf_arguments & ibf_args,
114 std::vector<double> & fpr,
115 std::filesystem::path const expression_by_genome_file = "",
116 size_t num_hash = 1);
117
124void minimiser(std::vector<std::filesystem::path> const & sequence_files,
125 min_arguments const & args,
126 minimiser_arguments & minimiser_args,
127 std::vector<uint8_t> & cutoffs);
128
141std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & sequence_files,
142 estimate_ibf_arguments & ibf_args,
143 minimiser_arguments & minimiser_args,
144 std::vector<uint8_t> & cutoffs,
145 std::filesystem::path const expression_by_genome_file,
146 std::filesystem::path path_in,
147 bool samplewise);
148
159std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & minimiser_files,
160 estimate_ibf_arguments & ibf_args,
161 std::filesystem::path const expression_by_genome_file,
162 std::filesystem::path path_in,
163 bool samplewise);
164
172void delete_bin(std::vector<uint64_t> const & delete_files,
173 estimate_ibf_arguments & ibf_args,
174 std::filesystem::path path_in,
175 bool samplewise);
void count_genome(min_arguments const &args, std::filesystem::path include_file, std::filesystem::path exclude_file)
Creates a set of minimizers to ignore, which should be used as an input to count.
Definition ibf.cpp:385
void minimiser(std::vector< std::filesystem::path > const &sequence_files, min_arguments const &args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs)
Create minimiser and header files.
Definition ibf.cpp:1625
void count(min_arguments const &args, std::vector< std::filesystem::path > sequence_files, std::filesystem::path include_file, std::filesystem::path genome_file, bool paired)
Get the concrete expression values (= median of all counts of one transcript) for given experiments....
Definition ibf.cpp:427
void delete_bin(std::vector< uint64_t > const &delete_files, estimate_ibf_arguments &ibf_args, std::filesystem::path path_in, bool samplewise)
Delete bins from ibfs.
Definition ibf.cpp:1489
std::vector< uint16_t > insert(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise)
Insert into IBFs.
Definition ibf.cpp:1435
void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map< uint64_t, uint16_t > &hash_table)
Reads a binary file that needle minimiser creates.
Definition ibf.cpp:491
std::vector< uint16_t > ibf(std::vector< std::filesystem::path > const &sequence_files, estimate_ibf_arguments &ibf_args, minimiser_arguments &minimiser_args, std::vector< double > &fpr, std::vector< uint8_t > &cutoffs, std::filesystem::path const expression_by_genome_file="", size_t num_hash=1)
Creates IBFs.
Definition ibf.cpp:1025
void read_binary_start(min_arguments &args, std::filesystem::path filename, uint64_t &num_of_minimisers, uint8_t &cutoff)
Reads the beginning of a binary file that needle minimiser creates.
Definition ibf.cpp:522
Generates a random integer not greater than a given maximum.
Definition ibf.hpp:32
int maxi
Definition ibf.hpp:33
RandomGenerator(int max)
Definition ibf.hpp:34
int operator()()
Definition ibf.hpp:37
arguments used for estimate, ibf, ibfmin
Definition shared.hpp:39
arguments used for estimate, ibf, minimiser
Definition shared.hpp:30
Definition ibf.hpp:19
bool paired
Definition ibf.hpp:25
std::filesystem::path include_file
Definition ibf.hpp:21
bool ram_friendly
Definition ibf.hpp:27
std::vector< int > samples
Definition ibf.hpp:24
std::filesystem::path exclude_file
Definition ibf.hpp:23
bool experiment_names
Definition ibf.hpp:26