IsoSpec 2.2.1
isoSpec++.cpp
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17
18#include "isoSpec++.h"
19#include <cmath>
20#include <algorithm>
21#include <vector>
22#include <cstdlib>
23#include <cstring>
24#include <unordered_map>
25#include <queue>
26#include <utility>
27#include <iostream>
28#include <iomanip>
29#include <stdexcept>
30#include <string>
31#include <limits>
32#include <memory>
33#include <cassert>
34#include <cctype>
35#include "platform.h"
36#include "conf.h"
37#include "dirtyAllocator.h"
38#include "operators.h"
39#include "summator.h"
40#include "marginalTrek++.h"
41#include "misc.h"
42#include "element_tables.h"
43#include "fasta.h"
44
45
46
47namespace IsoSpec
48{
49
50Iso::Iso() :
51disowned(false),
52dimNumber(0),
53isotopeNumbers(new int[0]),
54atomCounts(new int[0]),
55confSize(0),
56allDim(0),
57marginals(new Marginal*[0])
58{}
59
60
61Iso::Iso(
62 int _dimNumber,
63 const int* _isotopeNumbers,
64 const int* _atomCounts,
65 const double* const * _isotopeMasses,
66 const double* const * _isotopeProbabilities
67) :
68disowned(false),
69dimNumber(_dimNumber),
70isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72confSize(_dimNumber * sizeof(int)),
73allDim(0),
74marginals(nullptr)
75{
76 for(int ii = 0; ii < dimNumber; ++ii)
77 allDim += isotopeNumbers[ii];
78
79 std::unique_ptr<double[]> masses(new double[allDim]);
80 std::unique_ptr<double[]> probs(new double[allDim]);
81 size_t idx = 0;
82
83 for(int ii = 0; ii < dimNumber; ++ii)
84 for(int jj = 0; jj < isotopeNumbers[ii]; ++jj)
85 {
86 masses[idx] = _isotopeMasses[ii][jj];
87 probs[idx] = _isotopeProbabilities[ii][jj];
88 ++idx;
89 }
90
91 allDim = 0; // setupMarginals will recalculate it, assuming it's set to 0
92
93 try{
94 setupMarginals(masses.get(), probs.get());
95 }
96 catch(...)
97 {
98 delete[] isotopeNumbers;
99 delete[] atomCounts;
100 // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
101 // However, this is not the fast code path and we can afford two unneeded instructions to keep
102 // some static analysis tools happy.
103 isotopeNumbers = nullptr;
104 atomCounts = nullptr;
105 throw;
106 }
107}
108
109Iso::Iso(
110 int _dimNumber,
111 const int* _isotopeNumbers,
112 const int* _atomCounts,
113 const double* _isotopeMasses,
114 const double* _isotopeProbabilities
115) :
116disowned(false),
117dimNumber(_dimNumber),
118isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
119atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
120confSize(_dimNumber * sizeof(int)),
121allDim(0),
122marginals(nullptr)
123{
124 try{
125 setupMarginals(_isotopeMasses, _isotopeProbabilities);
126 }
127 catch(...)
128 {
129 delete[] isotopeNumbers;
130 delete[] atomCounts;
131 // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
132 // However, this is not the fast code path and we can afford two unneeded instructions to keep
133 // some static analysis tools happy.
134 isotopeNumbers = nullptr;
135 atomCounts = nullptr;
136 throw;
137 }
138}
139
140Iso::Iso(Iso&& other) :
141disowned(other.disowned),
142dimNumber(other.dimNumber),
143isotopeNumbers(other.isotopeNumbers),
144atomCounts(other.atomCounts),
145confSize(other.confSize),
146allDim(other.allDim),
147marginals(other.marginals)
148{
149 other.disowned = true;
150}
151
152
153Iso::Iso(const Iso& other, bool fullcopy) :
154disowned(!fullcopy),
155dimNumber(other.dimNumber),
156isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
157atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
158confSize(other.confSize),
159allDim(other.allDim),
160marginals(fullcopy ? new Marginal*[dimNumber] : other.marginals)
161{
162 if(fullcopy)
163 {
164 for(int ii = 0; ii < dimNumber; ii++)
165 marginals[ii] = new Marginal(*other.marginals[ii]);
166 }
167}
168
169Iso Iso::FromFASTA(const char* fasta, bool use_nominal_masses, bool add_water)
170{
171 int atomCounts[6];
172
173 parse_fasta(fasta, atomCounts);
174
175 if(add_water)
176 {
177 atomCounts[1] += 2;
178 atomCounts[3] += 1;
179 }
180
181 const int dimNr = atomCounts[5] > 0 ? 6 : 5;
182
183 return Iso(dimNr, aa_isotope_numbers, atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
184}
185
186inline void Iso::setupMarginals(const double* _isotopeMasses, const double* _isotopeProbabilities)
187{
188 if (marginals == nullptr)
189 {
190 int ii = 0;
192 try
193 {
194 while(ii < dimNumber)
195 {
196 marginals[ii] = new Marginal(
197 &_isotopeMasses[allDim],
198 &_isotopeProbabilities[allDim],
199 isotopeNumbers[ii],
200 atomCounts[ii]
201 );
202 allDim += isotopeNumbers[ii];
203 ii++;
204 }
205 }
206 catch(...)
207 {
208 ii--;
209 while(ii >= 0)
210 {
211 delete marginals[ii];
212 ii--;
213 }
214 delete[] marginals;
215 marginals = nullptr;
216 throw;
217 }
218 }
219}
220
222{
223 if(!disowned)
224 {
225 if (marginals != nullptr)
226 dealloc_table(marginals, dimNumber);
227 delete[] isotopeNumbers;
228 delete[] atomCounts;
229 }
230}
231
232bool Iso::doMarginalsNeedSorting() const
233{
234 int nontrivial_marginals = 0;
235 for(int ii = 0; ii < dimNumber; ii++)
236 {
237 if(marginals[ii]->get_isotopeNo() > 1)
238 nontrivial_marginals++;
239 if(nontrivial_marginals > 1)
240 return true;
241 }
242 return false;
243}
244
246{
247 double mass = 0.0;
248 for (int ii = 0; ii < dimNumber; ii++)
249 mass += marginals[ii]->getLightestConfMass();
250 return mass;
251}
252
254{
255 double mass = 0.0;
256 for (int ii = 0; ii < dimNumber; ii++)
257 mass += marginals[ii]->getHeaviestConfMass();
258 return mass;
259}
260
262{
263 double mass = 0.0;
264 for (int ii = 0; ii < dimNumber; ii++)
265 mass += marginals[ii]->getMonoisotopicConfMass();
266 return mass;
267}
268
270{
271 double lprob = 0.0;
272 for (int ii = 0; ii < dimNumber; ii++)
273 lprob += marginals[ii]->getSmallestLProb();
274 return lprob;
275}
276
277double Iso::getModeMass() const
278{
279 double mass = 0.0;
280 for (int ii = 0; ii < dimNumber; ii++)
281 mass += marginals[ii]->getModeMass();
282 return mass;
283}
284
285double Iso::getModeLProb() const
286{
287 double ret = 0.0;
288 for (int ii = 0; ii < dimNumber; ii++)
289 ret += marginals[ii]->getModeLProb();
290 return ret;
291}
292
294{
295 double mass = 0.0;
296 for (int ii = 0; ii < dimNumber; ii++)
298 return mass;
299}
300
301double Iso::variance() const
302{
303 double ret = 0.0;
304 for(int ii = 0; ii < dimNumber; ii++)
305 ret += marginals[ii]->variance();
306 return ret;
307}
308
309
310Iso::Iso(const char* formula, bool use_nominal_masses) :
311disowned(false),
312allDim(0),
313marginals(nullptr)
314{
315 std::vector<double> isotope_masses;
316 std::vector<double> isotope_probabilities;
317
318 dimNumber = parse_formula(formula, isotope_masses, isotope_probabilities, &isotopeNumbers, &atomCounts, &confSize, use_nominal_masses);
319
320 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
321}
322
323
324void Iso::addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities)
325{
326 Marginal* m = new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
327 realloc_append<int>(&isotopeNumbers, noIsotopes, dimNumber);
328 realloc_append<int>(&atomCounts, atomCount, dimNumber);
329 realloc_append<Marginal*>(&marginals, m, dimNumber);
330 dimNumber++;
331 confSize += sizeof(int);
332 allDim += noIsotopes;
333}
334
335void Iso::saveMarginalLogSizeEstimates(double* priorities, double target_total_prob) const
336{
337 /*
338 * We shall now use Gaussian approximations of the marginal multinomial distributions to estimate
339 * how many configurations we shall need to visit from each marginal. This should be approximately
340 * proportional to the volume of the optimal P-ellipsoid of the marginal, which, in turn is defined
341 * by the quantile function of the chi-square distribution plus some modifications.
342 *
343 * We're dropping the constant factor and the (monotonic) exp() transform - these will be used as keys
344 * for sorting, so only the ordering is important.
345 */
346
347 double K = allDim - dimNumber;
348
349 double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
350
351 for(int ii = 0; ii < dimNumber; ii++)
352 priorities[ii] = marginals[ii]->getLogSizeEstimate(log_R2);
353}
354
355unsigned int parse_formula(const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities, int** isotopeNumbers, int** atomCounts, unsigned int* confSize, bool use_nominal_masses)
356{
357 // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
358 size_t slen = strlen(formula);
359 // Yes, it would be more elegant to use std::string here, but it's the only promiment place where it would be used in IsoSpec, and avoiding it here
360 // means we can run the whole thing through Clang's memory sanitizer without the need for instrumented libc++/libstdc++. That's worth messing with char pointers a
361 // little bit.
362 std::vector<std::pair<const char*, size_t> > elements;
363 std::vector<int> numbers;
364
365 if(slen == 0)
366 throw std::invalid_argument("Invalid formula: can't be empty");
367
368 if(!isdigit(formula[slen-1]))
369 throw std::invalid_argument("Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
370
371 for(size_t ii = 0; ii < slen; ii++)
372 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373 throw std::invalid_argument("Invalid formula: contains invalid (non-digit, non-alpha) character");
374
375 size_t position = 0;
376
377 while(position < slen)
378 {
379 size_t elem_end = position;
380 while(isalpha(formula[elem_end]))
381 elem_end++;
382 size_t digit_end = elem_end;
383 while(isdigit(formula[digit_end]))
384 digit_end++;
385 elements.emplace_back(&formula[position], elem_end-position);
386 numbers.push_back(std::stoi(&formula[elem_end]));
387 position = digit_end;
388 }
389
390 std::vector<int> element_indexes;
391
392 for (unsigned int i = 0; i < elements.size(); i++)
393 {
394 int idx = -1;
395 for(int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
396 {
397 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
398 {
399 idx = j;
400 break;
401 }
402 }
403 if(idx < 0)
404 throw std::invalid_argument("Invalid formula");
405 element_indexes.push_back(idx);
406 }
407
408 std::vector<int> _isotope_numbers;
409 const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
410
411 for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
412 {
413 int num = 0;
414 int at_idx = *it;
415 int elem_ID = elem_table_ID[at_idx];
416 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
417 {
418 isotope_masses.push_back(masses[at_idx]);
419 isotope_probabilities.push_back(elem_table_probability[at_idx]);
420 at_idx++;
421 num++;
422 }
423 _isotope_numbers.push_back(num);
424 }
425
426 const unsigned int dimNumber = elements.size();
427
428 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430 *confSize = dimNumber * sizeof(int);
431
432 return dimNumber;
433}
434
435
436/*
437 * ----------------------------------------------------------------------------------------------------------
438 */
439
440
441
442IsoGenerator::IsoGenerator(Iso&& iso, bool alloc_partials) :
443 Iso(std::move(iso)),
444 mode_lprob(getModeLProb()),
445 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
446 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
447 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
448{
449 for(int ii = 0; ii < dimNumber; ++ii)
450 marginals[ii]->ensureModeConf();
451 if(alloc_partials)
452 {
455 partialProbs[dimNumber] = 1.0;
456 }
457}
458
459
461{
462 if(partialLProbs != nullptr)
463 delete[] partialLProbs;
464 if(partialMasses != nullptr)
465 delete[] partialMasses;
466 if(partialProbs != nullptr)
467 delete[] partialProbs;
468}
469
470
471
472/*
473 * ----------------------------------------------------------------------------------------------------------
474 */
475
476static const double minsqrt = -1.3407796239501852e+154; // == constexpr(-sqrt(std::numeric_limits<double>::max()));
477
478IsoThresholdGenerator::IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute, int tabSize, int hashSize, bool reorder_marginals)
479: IsoGenerator(std::move(iso)),
480Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
481{
482 counter = new int[dimNumber];
483 maxConfsLPSum = new double[dimNumber-1];
484 marginalResultsUnsorted = new PrecalculatedMarginal*[dimNumber];
485
486 empty = false;
487
488 const bool marginalsNeedSorting = doMarginalsNeedSorting();
489
490 for(int ii = 0; ii < dimNumber; ii++)
491 {
492 counter[ii] = 0;
493 marginalResultsUnsorted[ii] = new PrecalculatedMarginal(std::move(*(marginals[ii])),
494 Lcutoff - mode_lprob + marginals[ii]->fastGetModeLProb(),
495 marginalsNeedSorting,
496 tabSize,
497 hashSize);
498
499 if(!marginalResultsUnsorted[ii]->inRange(0))
500 empty = true;
501 }
502
503 if(reorder_marginals && dimNumber > 1)
504 {
505 OrderMarginalsBySizeDecresing<PrecalculatedMarginal> comparator(marginalResultsUnsorted);
506 int* tmpMarginalOrder = new int[dimNumber];
507
508 for(int ii = 0; ii < dimNumber; ii++)
509 tmpMarginalOrder[ii] = ii;
510
511 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, comparator);
512 marginalResults = new PrecalculatedMarginal*[dimNumber];
513
514 for(int ii = 0; ii < dimNumber; ii++)
515 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
516
517 marginalOrder = new int[dimNumber];
518 for(int ii = 0; ii < dimNumber; ii++)
519 marginalOrder[tmpMarginalOrder[ii]] = ii;
520
521 delete[] tmpMarginalOrder;
522 }
523 else
524 {
525 marginalResults = marginalResultsUnsorted;
526 marginalOrder = nullptr;
527 }
528
529 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
530
531 if(dimNumber > 1)
532 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
533
534 for(int ii = 1; ii < dimNumber-1; ii++)
535 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
536
537 lProbs_ptr = lProbs_ptr_start;
538
539 partialLProbs_second = partialLProbs;
540 partialLProbs_second++;
541
542 if(!empty)
543 {
544 recalc(dimNumber-1);
545 counter[0]--;
546 lProbs_ptr--;
547 }
548 else
549 {
551 lcfmsv = std::numeric_limits<double>::infinity();
552 }
553}
554
556{
557 for(int ii = 0; ii < dimNumber; ii++)
558 {
559 counter[ii] = marginalResults[ii]->get_no_confs()-1;
560 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
561 }
562 partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
563 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
564}
565
567{
568 if(empty)
569 return 0;
570
571 if(dimNumber == 1)
572 return marginalResults[0]->get_no_confs();
573
574 const double* lProbs_ptr_l = marginalResults[0]->get_lProbs_ptr() + marginalResults[0]->get_no_confs();
575
576 std::unique_ptr<const double* []> lProbs_restarts(new const double*[dimNumber]);
577
578 for(int ii = 0; ii < dimNumber; ii++)
579 lProbs_restarts[ii] = lProbs_ptr_l;
580
581 size_t count = 0;
582
583 while(*lProbs_ptr_l < lcfmsv)
584 lProbs_ptr_l--;
585
586 while(true)
587 {
588 count += lProbs_ptr_l - lProbs_ptr_start + 1;
589
590 int idx = 0;
591 int * cntr_ptr = counter;
592
593 while(idx < dimNumber - 1)
594 {
595 *cntr_ptr = 0;
596 idx++;
597 cntr_ptr++;
598 (*cntr_ptr)++;
599
600 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
601 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
602 {
603 short_recalc(idx-1);
604 lProbs_ptr_l = lProbs_restarts[idx];
605 while(*lProbs_ptr_l < lcfmsv)
606 lProbs_ptr_l--;
607 for(idx--; idx > 0; idx--)
608 lProbs_restarts[idx] = lProbs_ptr_l;
609 break;
610 }
611 }
612 if(idx == dimNumber - 1)
613 {
614 reset();
615 return count;
616 }
617 }
618}
619
621{
622 if(empty)
623 {
625 return;
626 }
627
629
630 memset(counter, 0, sizeof(int)*dimNumber);
631 recalc(dimNumber-1);
632 counter[0]--;
633
634 lProbs_ptr = lProbs_ptr_start - 1;
635}
636
637IsoThresholdGenerator::~IsoThresholdGenerator()
638{
639 delete[] counter;
640 delete[] maxConfsLPSum;
641 if (marginalResultsUnsorted != marginalResults)
642 delete[] marginalResultsUnsorted;
643 dealloc_table(marginalResults, dimNumber);
644 if(marginalOrder != nullptr)
645 delete[] marginalOrder;
646}
647
648
649/*
650 * ------------------------------------------------------------------------------------------------------------------------
651 */
652
653
654IsoLayeredGenerator::IsoLayeredGenerator(Iso&& iso, int tabSize, int hashSize, bool reorder_marginals, double t_prob_hint)
655: IsoGenerator(std::move(iso))
656{
657 counter = new int[dimNumber];
658 maxConfsLPSum = new double[dimNumber-1];
659 currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660 lastLThreshold = (std::numeric_limits<double>::min)();
661 marginalResultsUnsorted = new LayeredMarginal*[dimNumber];
662 resetPositions = new const double*[dimNumber];
663 marginalsNeedSorting = doMarginalsNeedSorting();
664
665 memset(counter, 0, sizeof(int)*dimNumber);
666
667 for(int ii = 0; ii < dimNumber; ii++)
668 marginalResultsUnsorted[ii] = new LayeredMarginal(std::move(*(marginals[ii])), tabSize, hashSize);
669
670 if(reorder_marginals && dimNumber > 1)
671 {
672 double* marginal_priorities = new double[dimNumber];
673
674 saveMarginalLogSizeEstimates(marginal_priorities, t_prob_hint);
675
676 int* tmpMarginalOrder = new int[dimNumber];
677
678 for(int ii = 0; ii < dimNumber; ii++)
679 tmpMarginalOrder[ii] = ii;
680
681 TableOrder<double> TO(marginal_priorities);
682
683 std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, TO);
684 marginalResults = new LayeredMarginal*[dimNumber];
685
686 for(int ii = 0; ii < dimNumber; ii++)
687 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
688
689 marginalOrder = new int[dimNumber];
690 for(int ii = 0; ii < dimNumber; ii++)
691 marginalOrder[tmpMarginalOrder[ii]] = ii;
692
693 delete[] tmpMarginalOrder;
694 delete[] marginal_priorities;
695 }
696 else
697 {
698 marginalResults = marginalResultsUnsorted;
699 marginalOrder = nullptr;
700 }
701
702 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
703
704 if(dimNumber > 1)
705 maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
706
707 for(int ii = 1; ii < dimNumber-1; ii++)
708 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
709
710 lProbs_ptr = lProbs_ptr_start;
711
712 partialLProbs_second = partialLProbs;
713 partialLProbs_second++;
714
715 counter[0]--;
716 lProbs_ptr--;
717 lastLThreshold = 10.0;
718 IsoLayeredGenerator::nextLayer(-0.00001);
719}
720
721bool IsoLayeredGenerator::nextLayer(double offset)
722{
723 size_t first_mrg_size = marginalResults[0]->get_no_confs();
724
725 if(lastLThreshold < getUnlikeliestPeakLProb())
726 return false;
727
728 lastLThreshold = currentLThreshold;
729 currentLThreshold += offset;
730
731 for(int ii = 0; ii < dimNumber; ii++)
732 {
733 marginalResults[ii]->extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
734 counter[ii] = 0;
735 }
736
737 lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr(); // vector relocation might have happened
738
739 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
740
741 for(int ii = 0; ii < dimNumber; ii++)
742 resetPositions[ii] = lProbs_ptr;
743
744 recalc(dimNumber-1);
745
746 return true;
747}
748
749bool IsoLayeredGenerator::carry()
750{
751 // If we reached this point, a carry is needed
752
753 int idx = 0;
754
755 int * cntr_ptr = counter;
756
757 while(idx < dimNumber-1)
758 {
759 *cntr_ptr = 0;
760 idx++;
761 cntr_ptr++;
762 (*cntr_ptr)++;
763 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
764 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
765 {
766 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
767 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
768 recalc(idx-1);
769 lProbs_ptr = resetPositions[idx];
770
771 while(*lProbs_ptr <= last_lcfmsv)
772 lProbs_ptr--;
773
774 for(int ii = 0; ii < idx; ii++)
775 resetPositions[ii] = lProbs_ptr;
776
777 return true;
778 }
779 }
780
781 return false;
782}
783
784
786{
787 for(int ii = 0; ii < dimNumber; ii++)
788 {
789 counter[ii] = marginalResults[ii]->get_no_confs()-1;
790 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
791 }
792 partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
793 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
794}
795
796IsoLayeredGenerator::~IsoLayeredGenerator()
797{
798 delete[] counter;
799 delete[] maxConfsLPSum;
800 delete[] resetPositions;
801 if (marginalResultsUnsorted != marginalResults)
802 delete[] marginalResultsUnsorted;
803 dealloc_table(marginalResults, dimNumber);
804 if(marginalOrder != nullptr)
805 delete[] marginalOrder;
806}
807
808
809/*
810 * ------------------------------------------------------------------------------------------------------------------------
811 */
812
813
814IsoOrderedGenerator::IsoOrderedGenerator(Iso&& iso, int _tabSize, int _hashSize) :
815IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
816{
817 partialLProbs = &currentLProb;
818 partialMasses = &currentMass;
819 partialProbs = &currentProb;
820
821 marginalResults = new MarginalTrek*[dimNumber];
822
823 for(int i = 0; i < dimNumber; i++)
824 marginalResults[i] = new MarginalTrek(std::move(*(marginals[i])), _tabSize, _hashSize);
825
826 logProbs = new const pod_vector<double>*[dimNumber];
827 masses = new const pod_vector<double>*[dimNumber];
828 marginalConfs = new const pod_vector<int*>*[dimNumber];
829
830 for(int i = 0; i < dimNumber; i++)
831 {
832 masses[i] = &marginalResults[i]->conf_masses();
833 logProbs[i] = &marginalResults[i]->conf_lprobs();
834 marginalConfs[i] = &marginalResults[i]->confs();
835 }
836
837 topConf = allocator.newConf();
838 memset(
839 reinterpret_cast<char*>(topConf) + sizeof(double),
840 0,
841 sizeof(int)*dimNumber
842 );
843
844 *(reinterpret_cast<double*>(topConf)) =
845 combinedSum(
846 getConf(topConf),
847 logProbs,
849 );
850
851 pq.push(topConf);
852}
853
854
856{
857 dealloc_table<MarginalTrek*>(marginalResults, dimNumber);
858 delete[] logProbs;
859 delete[] masses;
860 delete[] marginalConfs;
861 partialLProbs = nullptr;
862 partialMasses = nullptr;
863 partialProbs = nullptr;
864}
865
866
868{
869 if(pq.size() < 1)
870 return false;
871
872
873 topConf = pq.top();
874 pq.pop();
875
876 int* topConfIsoCounts = getConf(topConf);
877
878 currentLProb = *(reinterpret_cast<double*>(topConf));
879 currentMass = combinedSum( topConfIsoCounts, masses, dimNumber );
880 currentProb = exp(currentLProb);
881
882 ccount = -1;
883 for(int j = 0; j < dimNumber; ++j)
884 {
885 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
886 {
887 if(ccount == -1)
888 {
889 topConfIsoCounts[j]++;
890 *(reinterpret_cast<double*>(topConf)) = combinedSum(topConfIsoCounts, logProbs, dimNumber);
891 pq.push(topConf);
892 topConfIsoCounts[j]--;
893 ccount = j;
894 }
895 else
896 {
897 void* acceptedCandidate = allocator.newConf();
898 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
899 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts, confSize);
900
901 acceptedCandidateIsoCounts[j]++;
902
903 *(reinterpret_cast<double*>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs, dimNumber);
904
905 pq.push(acceptedCandidate);
906 }
907 }
908 if(topConfIsoCounts[j] > 0)
909 break;
910 }
911 if(ccount >=0)
912 topConfIsoCounts[ccount]++;
913
914 return true;
915}
916
917
918/*
919 * ---------------------------------------------------------------------------------------------------
920 */
921
922
923IsoStochasticGenerator::IsoStochasticGenerator(Iso&& iso, size_t no_molecules, double _precision, double _beta_bias) :
924IsoGenerator(std::move(iso)),
925ILG(std::move(*this)),
926to_sample_left(no_molecules),
927precision(_precision),
928beta_bias(_beta_bias),
929confs_prob(0.0),
930chasing_prob(0.0)
931{}
932
933/*
934 * ---------------------------------------------------------------------------------------------------
935 */
936
937
938
939
940
941} // namespace IsoSpec
942
The generator of isotopologues.
Definition: isoSpec++.h:184
virtual ~IsoGenerator()
Destructor.
Definition: isoSpec++.cpp:460
double * partialLProbs
Definition: isoSpec++.h:189
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
Definition: isoSpec++.cpp:442
double * partialMasses
Definition: isoSpec++.h:190
The Iso class for the calculation of the isotopic distribution.
Definition: isoSpec++.h:49
void saveMarginalLogSizeEstimates(double *priorities, double target_total_prob) const
Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument ...
Definition: isoSpec++.cpp:335
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
Definition: isoSpec++.cpp:253
double getTheoreticalAverageMass() const
Get the theoretical average mass of the molecule.
Definition: isoSpec++.cpp:293
double getUnlikeliestPeakLProb() const
Get the logprobability of the least probable subisotopologue.
Definition: isoSpec++.cpp:269
void addElement(int atomCount, int noIsotopes, const double *isotopeMasses, const double *isotopeProbabilities)
Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an...
Definition: isoSpec++.cpp:324
double getModeMass() const
Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be se...
Definition: isoSpec++.cpp:277
int * isotopeNumbers
Definition: isoSpec++.h:64
static Iso FromFASTA(const char *fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C string.
Definition: isoSpec++.cpp:169
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
Definition: isoSpec++.cpp:245
double variance() const
Get the theoretical variance of the distribution.
Definition: isoSpec++.cpp:301
double getMonoisotopicPeakMass() const
Definition: isoSpec++.cpp:261
unsigned int confSize
Definition: isoSpec++.h:66
virtual ~Iso()
Destructor.
Definition: isoSpec++.cpp:221
int dimNumber
Definition: isoSpec++.h:63
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
Definition: isoSpec++.cpp:285
int * atomCounts
Definition: isoSpec++.h:65
Marginal ** marginals
Definition: isoSpec++.h:68
void terminate_search()
Block the subsequent search of isotopologues.
Definition: isoSpec++.cpp:785
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
Definition: isoSpec++.h:520
virtual ~IsoOrderedGenerator()
Destructor.
Definition: isoSpec++.cpp:855
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.cpp:867
void terminate_search()
Block the subsequent search of isotopologues.
Definition: isoSpec++.cpp:555
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
The marginal distribution class (a subisotopologue).
double fastGetModeLProb()
Get the log-probability of the mode subisotopologue. Results undefined if ensureModeConf() wasn't cal...
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.