libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
aastringcodec.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/aastringcodec.cpp
3 * \date 09/05/2023
4 * \author Olivier Langella
5 * \brief code and decodefrom amino acid string to integer
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2023 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "aastringcodec.h"
29#include <QDebug>
30
31using namespace pappso;
32
33AaStringCodec::AaStringCodec(const AaCode &aaCode) : m_aaCode(aaCode)
34{
35
36 m_base = m_aaCode.getSize() + 1;
37 m_units.resize(10);
38 uint32_t unit = 1;
39 for(auto &this_unit : m_units)
40 {
41 this_unit = unit;
42 unit *= m_base;
43 }
44}
45
47 : m_aaCode(other.m_aaCode)
48{
49 m_base = other.m_base;
50 m_units = other.m_units;
51}
52
56
57
58uint32_t
59pappso::AaStringCodec::code(const QString &aa_str) const
60{
61
62 std::size_t pos = 0;
63 uint32_t code = 0;
64 for(auto &aa_char : aa_str)
65 {
66 code += m_aaCode.getAaCode(aa_char.toLatin1()) * m_units[pos];
67 pos++;
68 }
69 return code;
70}
71
72uint32_t
73pappso::AaStringCodec::codeLlc(const QString &aa_str) const
74{
75 std::vector<uint8_t> llc_vec;
76
77 for(auto &aa_char : aa_str)
78 {
79 llc_vec.push_back(m_aaCode.getAaCode(aa_char.toLatin1()));
80 }
81 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
82
83
84 std::size_t pos = 0;
85 uint32_t code = 0;
86 for(auto &aa_code : llc_vec)
87 {
88 code += (uint32_t)aa_code * m_units[pos];
89 pos++;
90 }
91 return code;
92}
93
94uint32_t
95pappso::AaStringCodec::codeLlc(std::vector<uint8_t>::const_iterator it_begin,
96 std::size_t size) const
97{
98 std::vector<uint8_t> llc_vec;
99
100 for(std::size_t i = 0; i < size; i++)
101 {
102 llc_vec.push_back(*it_begin);
103 it_begin++;
104 }
105 std::sort(llc_vec.begin(), llc_vec.end(), std::greater<uint8_t>());
106
107
108 std::size_t pos = 0;
109 uint32_t code = 0;
110 for(auto &aa_code : llc_vec)
111 {
112 code += (uint32_t)aa_code * m_units[pos];
113 pos++;
114 }
115 return code;
116}
117
118
119QString
121{
122 QString aa_suite;
123
124 while(code > 0)
125 {
126 aa_suite.append(m_aaCode.getAa((uint8_t)(code % m_base)).getLetter());
127 code /= m_base;
128 }
129
130 // qDebug() << aa_suite;
131
132 return aa_suite;
133}
134
135QStringList
136pappso::AaStringCodec::decode(const std::vector<uint32_t> &code_list) const
137{
138 QStringList aa_string_list;
139 for(auto code : code_list)
140 {
141 aa_string_list << decode(code);
142 }
143 return aa_string_list;
144}
145
146
147double
149{
150 double mass = 0;
151
152 while(code > 0)
153 {
154 mass += m_aaCode.getMass((uint8_t)(code % m_base));
155 code /= m_base;
156 }
157
158 return mass;
159}
160
161
162std::vector<CodeToMass>
164 std::size_t size) const
165{
166 std::vector<CodeToMass> llc_list;
167 if(size == 0)
168 return llc_list;
169 std::vector<uint8_t> model;
170 for(uint8_t p = 1; p <= size; p++)
171 {
172 model.resize(p, 0);
173
174 for(uint8_t i = 1; i < m_base; i++)
175 {
176 model[0] = i;
177 if(p == 1)
178 {
179 llc_list.push_back(generateCodeMassFromModel(model));
180 }
181 else
182 {
183 recGenerateModel(llc_list, model, 1);
184 }
185 }
186 }
187 return llc_list;
188}
189
190
191std::vector<CodeToMass>
193 std::size_t size) const
194{
195 std::vector<CodeToMass> llc_list;
196 if(size == 0)
197 return llc_list;
198 std::vector<uint8_t> model;
199 model.resize(size, 0);
200
201 for(uint8_t i = 1; i < m_base; i++)
202 {
203 model[0] = i;
204 recGenerateModel(llc_list, model, 1);
205 }
206 return llc_list;
207}
208
209void
210pappso::AaStringCodec::recGenerateModel(std::vector<CodeToMass> &glist,
211 std::vector<uint8_t> &model,
212 std::size_t position) const
213{
214 if(position == model.size())
215 return;
216
217 if(position == model.size() - 1)
218 {
219 uint8_t max = model[position - 1];
220 for(uint8_t i = 1; i <= max; i++)
221 {
222 model[position] = i;
223 glist.push_back(generateCodeMassFromModel(model));
224 }
225 }
226 else
227 {
228 uint8_t max = model[position - 1];
229 for(uint8_t i = 1; i <= max; i++)
230 {
231 model[position] = i;
232 recGenerateModel(glist, model, position + 1);
233 }
234 }
235}
236
239 const std::vector<uint8_t> &model) const
240{
241 CodeToMass code_mass;
242 std::size_t pos = 0;
243 for(auto aacode : model)
244 {
245 code_mass.mass += m_aaCode.getMass(aacode);
246
247 code_mass.code += (uint32_t)aacode * m_units[pos];
248 pos++;
249 }
250
251 // qDebug() << code_mass.code << " " << code_mass.mass;
252 return code_mass;
253}
254
255
256std::size_t
258{
259
260 std::size_t code = 0;
261 for(std::size_t pos = 0; pos < size; pos++)
262 {
263 code += (std::size_t)(m_base - 1) * (std::size_t)m_units[pos];
264 }
265 return code;
266}
267
268bool
270 const std::vector<uint8_t> &aa_ok) const
271{
272
273 while(code > 0)
274 {
275 if(std::find(aa_ok.begin(), aa_ok.end(), (uint8_t)(code % m_base)) ==
276 aa_ok.end())
277 return false;
278
279 code /= m_base;
280 }
281 return true;
282}
283
284bool
286 uint8_t aa_ok,
287 int times) const
288{
289
290 int number = 0;
291 while(code > 0)
292 {
293 if(aa_ok == (uint8_t)(code % m_base))
294 {
295 number++;
296 if(number == times)
297 return true;
298 }
299
300 code /= m_base;
301 }
302 return false;
303}
304
305
306const pappso::AaCode &
308{
309 return m_aaCode;
310}
code and decodefrom amino acid string to integer
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition aacode.h:43
std::size_t getSize() const
Definition aacode.cpp:74
code and decode amino acid string sequence to unique integer
std::size_t getLimitMax(std::size_t size) const
get the maximum code number for a given peptide size
double getMass(uint32_t code) const
const AaCode & getAaCode() const
QString decode(uint32_t code) const
uint32_t codeLlc(const QString &aa_str) const
get the lowest common denominator integer from amino acide suite string
void recGenerateModel(std::vector< CodeToMass > &glist, std::vector< uint8_t > &model, std::size_t position) const
recursive method to generate models
std::vector< uint32_t > m_units
bool uniqueCodeContainsAminoAcid(uint32_t code, uint8_t aa_ok, int times) const
tell if a unique code only contains one amino acid 1 or n times
uint32_t code(const QString &aa_str) const
get integer from amino acide suite string
const AaCode & m_aaCode
CodeToMass generateCodeMassFromModel(const std::vector< uint8_t > &model) const
AaStringCodec(const AaCode &aaCode)
std::vector< CodeToMass > generateLlcCodeListByMaxPeptideSize(std::size_t size) const
generates all possible combination of llc code mass llc : the lowest common code denominator for a gi...
bool codeOnlyContains(uint32_t code, const std::vector< uint8_t > &aa_ok) const
tell if a code only contains a list of amino acid
std::vector< CodeToMass > generateLlcCodeListUpToMaxPeptideSize(std::size_t size) const
generates all possible combination of llc code mass llc : the lowest common code denominator for a gi...
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ max
maximum of intensities