Raptor 3.0.1
A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
 
Loading...
Searching...
No Matches
threshold.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
13#pragma once
14
17
18namespace raptor::threshold
19{
20
22{
23public:
24 threshold() = default;
25 threshold(threshold const &) = default;
26 threshold & operator=(threshold const &) = default;
27 threshold(threshold &&) = default;
28 threshold & operator=(threshold &&) = default;
29 ~threshold() = default;
30
31 threshold(threshold_parameters const & arguments)
32 {
33 uint8_t const kmer_size{arguments.shape.size()};
34 size_t const kmers_per_window = arguments.window_size - kmer_size + 1;
35
36 if (!std::isnan(arguments.percentage))
37 {
38 threshold_kind = threshold_kinds::percentage;
39 threshold_percentage = arguments.percentage;
40 }
41 else if (kmers_per_window == 1u)
42 {
43 threshold_kind = threshold_kinds::lemma;
44 size_t const kmer_lemma_minuend = arguments.query_length + 1u;
45 size_t const kmer_lemma_subtrahend = (arguments.errors + 1u) * kmer_size;
46 kmer_lemma = kmer_lemma_minuend > kmer_lemma_subtrahend ? kmer_lemma_minuend - kmer_lemma_subtrahend : 0;
47 }
48 else
49 {
50 threshold_kind = threshold_kinds::probabilistic;
51 size_t const kmers_per_pattern = arguments.query_length - kmer_size + 1;
52 minimal_number_of_minimizers = kmers_per_pattern / kmers_per_window;
53 maximal_number_of_minimizers = arguments.query_length - arguments.window_size + 1;
54 precomp_correction = precompute_correction(arguments);
55 precomp_thresholds = precompute_threshold(arguments);
56 }
57 }
58
59 size_t get(size_t const minimiser_count) const noexcept
60 {
61 switch (threshold_kind)
62 {
63 case threshold_kinds::lemma:
64 return kmer_lemma;
65 case threshold_kinds::percentage:
66 return static_cast<size_t>(minimiser_count * threshold_percentage);
67 default:
68 {
69 assert(threshold_kind == threshold_kinds::probabilistic);
70 size_t const index = std::clamp(minimiser_count, minimal_number_of_minimizers, maximal_number_of_minimizers)
71 - minimal_number_of_minimizers;
72 return precomp_thresholds[index] + precomp_correction[index];
73 }
74 }
75 }
76
77private:
78 enum class threshold_kinds
79 {
80 probabilistic,
81 lemma,
82 percentage
83 };
84
85 threshold_kinds threshold_kind{threshold_kinds::probabilistic};
86 std::vector<size_t> precomp_correction{};
87 std::vector<size_t> precomp_thresholds{};
88 size_t kmer_lemma{};
89 size_t minimal_number_of_minimizers{};
90 size_t maximal_number_of_minimizers{};
91 double threshold_percentage{};
92};
93
94} // namespace raptor::threshold
T clamp(T... args)
Definition threshold.hpp:22
T isnan(T... args)
Provides raptor::threshold::precompute_correction.
Provides raptor::threshold::precompute_threshold.
Definition threshold_parameters.hpp:23