85 reference_type
const & reference,
86 uint32_t
const zero_based_reference_start_position,
87 sequence_type
const & query)
89 if (cigar_vector.
empty())
90 throw std::logic_error{
"An empty CIGAR is not a valid alignment representation."};
95 uint32_t reference_length{0};
96 uint32_t query_length{0};
98 for (
auto [cigar_count, cigar_operation] : cigar_vector)
100 if ((
'M'_cigar_operation == cigar_operation) || (
'='_cigar_operation == cigar_operation)
101 || (
'X'_cigar_operation == cigar_operation))
103 reference_length += cigar_count;
104 query_length += cigar_count;
106 else if (
'D'_cigar_operation == cigar_operation)
108 reference_length += cigar_count;
110 else if (
'I'_cigar_operation == cigar_operation)
112 query_length += cigar_count;
116 if (
static_cast<size_t>(zero_based_reference_start_position + reference_length) > std::ranges::size(reference))
117 throw std::logic_error{
"The CIGAR string indicates a reference length of at least "
118 +
std::to_string(zero_based_reference_start_position + reference_length)
119 +
", but the supplied reference sequence is only of size"
124 uint32_t soft_clipping_start{0};
125 uint32_t soft_clipping_end{0};
128 auto soft_clipping_at = [&cigar_vector](
size_t const index)
130 return cigar_vector[index] ==
'S'_cigar_operation;
133 auto hard_clipping_at = [&](
size_t const index)
135 return cigar_vector[index] ==
'H'_cigar_operation;
138 auto vector_size_at_least = [&](
size_t const min_size)
140 return cigar_vector.
size() >= min_size;
143 auto cigar_count_at = [&](
size_t const index)
145 return get<0>(cigar_vector[index]);
150 if (soft_clipping_at(0))
151 soft_clipping_start = cigar_count_at(0);
152 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
153 soft_clipping_start = cigar_count_at(1);
157 auto const last_index = cigar_vector.
size() - 1;
158 auto const second_last_index = last_index - 1;
160 if (vector_size_at_least(2) && soft_clipping_at(last_index))
161 soft_clipping_end = cigar_count_at(last_index);
162 else if (vector_size_at_least(3) && hard_clipping_at(last_index) && soft_clipping_at(second_last_index))
163 soft_clipping_end = cigar_count_at(second_last_index);
165 if (soft_clipping_start + query_length + soft_clipping_end != std::ranges::size(query))
166 throw std::logic_error{
"The CIGAR string indicates a query/read sequence length of "
167 +
std::to_string(soft_clipping_start + query_length + soft_clipping_end)
168 +
", but the supplied query/read sequence is of size"
182 zero_based_reference_start_position + reference_length));
184 assign_unaligned(get<1>(
alignment), query |
views::slice(soft_clipping_start, soft_clipping_start + query_length));
189 auto current_ref_pos = std::ranges::begin(get<0>(
alignment));
190 auto current_read_pos = std::ranges::begin(get<1>(
alignment));
192 for (
auto [cigar_count, cigar_operation] : cigar_vector)
194 if ((
'M'_cigar_operation == cigar_operation) || (
'='_cigar_operation == cigar_operation)
195 || (
'X'_cigar_operation == cigar_operation))
197 std::ranges::advance(current_ref_pos, cigar_count);
198 std::ranges::advance(current_read_pos, cigar_count);
200 else if ((
'D'_cigar_operation == cigar_operation) || (
'N'_cigar_operation == cigar_operation))
203 current_read_pos = get<1>(
alignment).insert_gap(current_read_pos, cigar_count);
205 std::ranges::advance(current_ref_pos, cigar_count);
207 else if ((
'I'_cigar_operation == cigar_operation))
209 current_ref_pos = get<0>(
alignment).insert_gap(current_ref_pos, cigar_count);
211 std::ranges::advance(current_read_pos, cigar_count);
213 else if ((
'P'_cigar_operation == cigar_operation))
215 current_ref_pos = get<0>(
alignment).insert_gap(current_ref_pos, cigar_count);
218 current_read_pos = get<1>(
alignment).insert_gap(current_read_pos, cigar_count);
auto alignment_from_cigar(std::vector< cigar > const &cigar_vector, reference_type const &reference, uint32_t const zero_based_reference_start_position, sequence_type const &query)
Construct an alignment from a CIGAR string and the corresponding sequences.
Definition alignment_from_cigar.hpp:84