libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pwizmsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3 * \date 29/05/2018
4 * \author Olivier Langella
5 * \brief MSrun file reader base on proteowizard library
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31
32#include <QDebug>
33
34#include "pwizmsrunreader.h"
35
36#include <pwiz/data/msdata/DefaultReaderList.hpp>
37
38
39#include "../../utils.h"
40#include "../../pappsoexception.h"
41#include "../../exception/exceptionnotfound.h"
42#include "../../exception/exceptionnotpossible.h"
43#include "config.h"
44
45 // int pwizMsRunReaderMetaTypeId =
46 // qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47
48
49 namespace pappso
50{
51
53
55 : MsRunReader(msrun_id_csp)
56 {
57 // The initialization needs to be done immediately so that we get the pwiz
58 // MsDataPtr corresponding to the right ms_run_id in the parameter. That
59 // pointer will be set to msp_msData.
60
61 initialize();
62 }
63
64
66 {
67 std::string file_name_std =
69
70
71 // Now actually search the useful MSDataPtr to the member variable.
72
73 pwiz::msdata::DefaultReaderList defaultReaderList;
74
75 std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
76
77 try
78 {
79 defaultReaderList.read(file_name_std, msDataPtrVector);
80 }
81 catch(std::exception &error)
82 {
83 qDebug() << QString("Failed to read the data from file %1")
84 .arg(QString::fromStdString(file_name_std));
85
86 throw(PappsoException(
87 QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
88 .arg(mcsp_msRunId->getFileName())
89 .arg(mcsp_msRunId.get()->toString())
90 .arg(error.what())));
91 }
92
93 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
94 //<< "The number of runs is:" << msDataPtrVector.size()
95 //<< "The number of spectra in first run is:"
96 //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
97
98 // Single-run file handling here.
99
100 // Specific case of the MGF data format: we do not have a run id for that
101 // kind of data. In this case there must be a single run!
102
103 if(mcsp_msRunId->getRunId().isEmpty())
104 {
105 if(msDataPtrVector.size() != 1)
107 "For the kind of file at hand there can only be "
108 "one run in the file."));
109
110 // At this point we know the single msDataPtr is the one we are looking
111 // for.
112
113 msp_msData = msDataPtrVector.front();
114 }
115 else
116 {
117 // Multi-run file handling here.
118 for(const pwiz::msdata::MSDataPtr &msDataPtr : msDataPtrVector)
119 {
120 qDebug() << "msDataPtr->run.id=" << msDataPtr->run.id.c_str();
121 qDebug() << "mcsp_msRunId->getRunId()=" << mcsp_msRunId->getRunId();
122 qDebug() << "mcsp_msRunId->getXmlId()=" << mcsp_msRunId->getXmlId();
123 qDebug() << "mcsp_msRunId->getSampleName()="
124 << mcsp_msRunId->getSampleName();
125 if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
126 {
127 msp_msData = msDataPtr;
128
129 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
130 // "()"
131 //<< "Found the right MSDataPtr for run id.";
132
133 break;
134 }
135 }
136 }
137
138 if(msp_msData == nullptr)
139 {
140 throw(ExceptionNotPossible(QString("Could not find a pwiz MSDataPtr "
141 "matching the requested run id : %1")
142 .arg(mcsp_msRunId.get()->toString())));
143 }
144
145
146 // check if this MS run can be used with scan numbers
147 // MS:1000490 Agilent instrument model
148 pwiz::cv::CVID native_id_format =
149 pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
150
151 // msp_msData.get()->getDefaultNativeIDFormat();
152
153 if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
154 {
155 m_hasScanNumbers = true;
156 }
157 else
158 {
159 m_hasScanNumbers = false;
160 }
161
162 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::mzXML)
163 {
164 m_hasScanNumbers = true;
165 }
166 }
167
171
172
174 {
175
176 OboPsiModTerm term;
177
178 term.m_accession = "MS:1000824";
179 term.m_name = "no nativeID format";
180 term.m_definition =
181 "No nativeID format indicates that the file tagged with this term does "
182 "not "
183 "contain spectra that can have a nativeID format.";
184
185
186 pwiz::cv::CVID cvid =
187 pwiz::msdata::id::getDefaultNativeIDFormat(*(msp_msData.get()));
188
189 switch(cvid)
190 {
191 case pwiz::cv::MS_Thermo_nativeID_format:
192 term.m_accession = "MS:1000768";
193 term.m_name = "Thermo nativeID format";
194 term.m_definition =
195 "Native format defined by controllerType=xsd:nonNegativeInteger "
196 "controllerNumber=xsd:positiveInteger scan=xsd:positiveInteger.";
197 break;
198 default:
199 break;
200 }
201 return term;
202 }
203
204 pwiz::msdata::SpectrumPtr PwizMsRunReader::getPwizSpectrumPtr(
205 pwiz::msdata::SpectrumList * p_spectrum_list,
206 std::size_t spectrum_index,
207 bool want_binary_data) const
208 {
209 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
210
211 try
212 {
213 native_pwiz_spectrum_sp =
214 p_spectrum_list->spectrum(spectrum_index, want_binary_data);
215 }
216 catch(std::runtime_error &error)
217 {
218 qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
219 << typeid(error).name();
220
221 throw ExceptionNotFound(
222 QObject::tr("Pwiz spectrum index %1 not found in "
223 "MS file std::runtime_error :\n%2")
224 .arg(spectrum_index)
225 .arg(error.what()));
226 }
227 catch(std::exception &error)
228 {
229 qDebug() << "getPwizSpectrumPtr error " << error.what()
230 << typeid(error).name();
231
232 throw ExceptionNotFound(
233 QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
234 .arg(spectrum_index)
235 .arg(error.what()));
236 }
237
238 if(native_pwiz_spectrum_sp.get() == nullptr)
239 {
240 throw ExceptionNotFound(
241 QObject::tr(
242 "Pwiz spectrum index %1 not found in MS file : null pointer")
243 .arg(spectrum_index));
244 }
245
246 return native_pwiz_spectrum_sp;
247 }
248
249
251 pwiz::msdata::Spectrum * spectrum_p,
252 QualifiedMassSpectrum & qualified_mass_spectrum) const
253 {
254
255 // We now have to set the retention time at which this mass spectrum
256 // was acquired. This is the scan start time.
257
258 if(!spectrum_p->scanList.scans[0].hasCVParam(
259 pwiz::msdata::MS_scan_start_time))
260 {
261 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
262 { // MGF could not have scan start time
263 qualified_mass_spectrum.setRtInSeconds(-1);
264 }
265 else
266 {
268 "The spectrum has no scan start time value set."));
269 }
270 }
271 else
272 {
273 pwiz::data::CVParam retention_time_cv_param =
274 spectrum_p->scanList.scans[0].cvParam(
275 pwiz::msdata::MS_scan_start_time);
276
277 // Try to get the units of the retention time value.
278
279 std::string unit_name = retention_time_cv_param.unitsName();
280 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
281 //<< "Unit name for the retention time:"
282 //<< QString::fromStdString(unit_name);
283
284 if(unit_name == "second")
285 {
286 qualified_mass_spectrum.setRtInSeconds(
287 retention_time_cv_param.valueAs<double>());
288 }
289 else if(unit_name == "minute")
290 {
291 qualified_mass_spectrum.setRtInSeconds(
292 retention_time_cv_param.valueAs<double>() * 60);
293 }
294 else
295 throw(
296 ExceptionNotPossible("Could not determine the unit for the "
297 "scan start time value."));
298 }
299
300 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
301 //<< "Retention time for spectrum is:"
302 //<< qualified_mass_spectrum.getRtInSeconds();
303
304 // Old version not checking unit (by default unit is minutes for RT,
305 // not seconds)
306 //
307 // pappso_double retentionTime =
308 // QString(spectrum_p->scanList.scans[0]
309 //.cvParam(pwiz::msdata::MS_scan_start_time)
310 //.value.c_str())
311 //.toDouble();
312 // qualified_mass_spectrum.setRtInSeconds(retentionTime);
313
314 return true;
315 }
316
317
318 bool PwizMsRunReader::processDriftTime(pwiz::msdata::Spectrum * spectrum_p,
320 qualified_mass_spectrum) const
321 {
322 // Not all the acquisitions have ion mobility data. We need to test
323 // that:
324
325 if(spectrum_p->scanList.scans[0].hasCVParam(
326 pwiz::msdata::MS_ion_mobility_drift_time))
327 {
328
329 // qDebug() << "as strings:"
330 //<< QString::fromStdString(
331 // spectrum_p->scanList.scans[0]
332 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
333 //.valueAs<std::string>());
334
335 pappso_double driftTime =
336 spectrum_p->scanList.scans[0]
337 .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
338 .valueAs<double>();
339
340 // qDebug() << "driftTime:" << driftTime;
341
342 // Old version requiring use of QString.
343 // pappso_double driftTime =
344 // QString(spectrum_p->scanList.scans[0]
345 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
346 //.value.c_str())
347 //.toDouble();
348
349 // Now make positively sure that the obtained value is correct.
350 // Note that I suffered a lot with Waters Synapt data that
351 // contained apparently correct drift time XML element that in
352 // fact contained either NaN or inf. When such mass spectra were
353 // encountered, the mz,i data were bogus and crashed the data
354 // loading functions. We just want to skip this kind of bogus mass
355 // spectrum by letting the caller know that the drift time was
356 // bogus ("I" is Filippo Rusconi).
357
358 if(std::isnan(driftTime) || std::isinf(driftTime))
359 {
360 // qDebug() << "detected as nan or inf.";
361
362 return false;
363 }
364 else
365 {
366 // The mzML standard stipulates that drift times are in
367 // milliseconds.
368 qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
369 }
370 }
371 // End of
372 // if(spectrum_p->scanList.scans[0].hasCVParam(
373 // pwiz::msdata::MS_ion_mobility_drift_time))
374 else
375 {
376 // Not a bogus mass spectrum but also not a drift spectrum, set -1
377 // as the drift time value.
378 qualified_mass_spectrum.setDtInMilliSeconds(-1);
379 }
380
381 return true;
382 }
383
384
387 const MassSpectrumId &massSpectrumId,
388 pwiz::msdata::Spectrum *spectrum_p,
389 bool want_binary_data,
390 bool &ok) const
391 {
392 // qDebug();
393
394 if(spectrum_p == nullptr)
395 qFatal("Spectrum pointer cannot be nullptr.");
396
397 std::string backup_env = setGlobalLocaleToEnglish();
398
399 QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
400
401 try
402 {
403
404 // We want to store the ms level for this spectrum
405
406 int msLevel =
407 (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
408
409 qualified_mass_spectrum.setMsLevel(msLevel);
410
411 if(!spectrum_p->scanList.scans[0].hasCVParam(
412 pwiz::msdata::MS_peak_list_scans))
413 {
414
415 // qDebug() << spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
416 // .valueAs<double>();
417 qualified_mass_spectrum.setParameterValue(
419 spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
420 .valueAs<double>());
421 }
422 // We want to know if this spectrum is a fragmentation spectrum obtained
423 // from a selected precursor ion.
424
425 std::size_t precursor_list_size = spectrum_p->precursors.size();
426
427 // qDebug() << "For spectrum at index:" <<
428 // massSpectrumId.getSpectrumIndex()
429 //<< "msLevel:" << msLevel
430 //<< "with number of precursors:" << precursor_list_size;
431
432 if(precursor_list_size > 0)
433 {
434
435 // Sanity check
436 if(msLevel < 2)
437 {
438 qDebug()
439 << "Going to throw: msLevel cannot be less than two for "
440 "a spectrum that has items in its Precursor list.";
441
443 "msLevel cannot be less than two for "
444 "a spectrum that has items in its Precursor list."));
445 }
446
447 // See what is the first precursor in the list.
448
449 for(auto &precursor : spectrum_p->precursors)
450 {
451
452 // Set this variable ready as we need that default value in
453 // certain circumstances.
454
455 std::size_t precursor_spectrum_index =
456 std::numeric_limits<std::size_t>::max();
457
458 // The spectrum ID of the precursor might be empty.
459
460 if(precursor.spectrumID.empty())
461 {
462 // qDebug() << "The precursor's spectrum ID is empty.";
463
464 if(mcsp_msRunId.get()->getMsDataFormat() ==
466 {
467 // qDebug()
468 //<< "Format is MGF, precursor's spectrum ID can be
469 // empty.";
470 }
471 else
472 {
473 // When performing Lumos Fusion fragmentation
474 // experiments in Tune mode and with recording, the
475 // first spectrum of the list is a fragmentation
476 // spectrum (ms level 2) that has no identity for the
477 // precursor spectrum because there is no full scan
478 // accquisition.
479 }
480 }
481 // End of
482 // if(precursor.spectrumID.empty())
483 else
484 {
485 // We could get a native precursor spectrum id, so convert
486 // that native id to a spectrum index.
487
488 qualified_mass_spectrum.setPrecursorNativeId(
489 QString::fromStdString(precursor.spectrumID));
490
491 if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
492 {
493 // qDebug() << "The native id of the precursor spectrum
494 // is empty.";
495 }
496
497 // Get the spectrum index of the spectrum that contained
498 // the precursor ion.
499
500 precursor_spectrum_index =
501 msp_msData->run.spectrumListPtr->find(
502 precursor.spectrumID);
503
504 // Note that the Mascot MGF format has a peculiar handling
505 // of the precursor ion stuff so we cannot throw.
506 if(precursor_spectrum_index ==
507 msp_msData->run.spectrumListPtr->size())
508 {
509 if(mcsp_msRunId.get()->getMsDataFormat() !=
511 {
513 "Failed to find the index of the "
514 "precursor ion's spectrum."));
515 }
516 }
517
518 qualified_mass_spectrum.setPrecursorSpectrumIndex(
519 precursor_spectrum_index);
520
521 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__
522 // <<
523 // "()"
524 //<< "Set the precursor spectrum index to:"
525 //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
526 //<< "for qualified mass spectrum:"
527 //<< &qualified_mass_spectrum;
528 }
529
530 if(!precursor.selectedIons.size())
531 {
532 qDebug()
533 << "Going to throw The spectrum has msLevel > 1 but the "
534 "precursor ions's selected ions list is empty..";
535
537 "The spectrum has msLevel > 1 but the "
538 "precursor ions's selected ions "
539 "list is empty."));
540 }
541
542 pwiz::msdata::SelectedIon &ion =
543 *(precursor.selectedIons.begin());
544
545 // selected ion m/z
546
547 pappso_double selected_ion_mz =
548 QString(
549 ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
550 .toDouble();
551
552 // selected ion peak intensity
553 //<cvParam cvRef="MS" accession="MS:1000042"
554 // value="910663.949707031" name="peak intensity"
555 // unitAccession="MS:1000131" unitName="number of detector
556 // counts" unitCvRef="MS" />
557
558 pappso_double selected_ion_peak_intensity =
559 QString(
560 ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
561 .toDouble();
562
563 // charge state
564
565 unsigned int selected_ion_charge_state =
566 QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
567 .toUInt();
568
569 // At this point we can craft a new PrecursorIonData instance
570 // and push it back to the vector.
571
572 PrecursorIonData precursor_ion_data(
573 selected_ion_mz,
574 selected_ion_charge_state,
575 selected_ion_peak_intensity);
576
577 qualified_mass_spectrum.appendPrecursorIonData(
578 precursor_ion_data);
579
580 // General sum-up
581
582 // qDebug()
583 //<< "Appended new PrecursorIonData:"
584 //<< "mz:"
585 //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
586 //<< "charge:"
587 //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
588 //<< "intensity:"
589 //<< qualified_mass_spectrum.getPrecursorIonData()
590 //.back()
591 //.intensity;
592 }
593 // End of
594 // for(auto &precursor : spectrum_p->precursors)
595 }
596 // End of
597 // if(precursor_list_size > 0)
598 else
599 {
600 // Sanity check
601
602 // Unfortunately, logic here is defeated by some vendors that have
603 // files with MS2 spectra without <precursorList>. Thus we have
604 // spectrum_p->precursors.size() == 0 and msLevel > 1.
605
606 // if(msLevel != 1)
607 //{
608 // throw(
609 // ExceptionNotPossible("msLevel cannot be different than 1 if "
610 //"there is not a single precursor ion."));
611 //}
612 }
613
614 // Sanity check.
615
616 if(precursor_list_size !=
617 qualified_mass_spectrum.getPrecursorIonData().size())
618 {
619 qDebug()
620 << "Going to throw The number of precursors in the file is "
621 "different from the number of precursors in memory.";
622
624 QObject::tr("The number of precursors in the file is different "
625 "from the number of precursors in memory."));
626 }
627
628 // if(precursor_list_size == 1)
629 //{
630 // qDebug() << "Trying to get the mz value of the unique precursor ion:"
631 //<< qualified_mass_spectrum.getPrecursorMz();
632 //}
633
634 processRetentionTime(spectrum_p, qualified_mass_spectrum);
635
636 processDriftTime(spectrum_p, qualified_mass_spectrum);
637
638 // for(pwiz::data::CVParam cv_param : ion.cvParams)
639 //{
640 // pwiz::msdata::CVID param_id = cv_param.cvid;
641 // qDebug() << param_id;
642 // qDebug() << cv_param.cvid.c_str();
643 // qDebug() << cv_param.name().c_str();
644 // qDebug() << cv_param.value.c_str();
645 //}
646
647 if(want_binary_data)
648 {
649
650 // Fill-in MZIntensityPair vector for convenient access to binary
651 // data
652
653 std::vector<pwiz::msdata::MZIntensityPair> pairs;
654 spectrum_p->getMZIntensityPairs(pairs);
655
656 MassSpectrum spectrum;
657 double tic = 0;
658 // std::size_t iterCount = 0;
659
660 // Iterate through the m/z-intensity pairs
661 for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
662 it = pairs.begin(),
663 end = pairs.end();
664 it != end;
665 ++it)
666 {
667 //++iterCount;
668
669 // qDebug() << "it->mz " << it->mz << " it->intensity" <<
670 // it->intensity;
671 if(it->intensity)
672 {
673 spectrum.push_back(DataPoint(it->mz, it->intensity));
674 tic += it->intensity;
675 }
676 }
677
678 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
679 {
680 // Sort peaks by mz
681 spectrum.sortMz();
682 }
683
684 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
685 // "<< spectrum.size();
686 MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
687 qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
688
689 // double sumY =
690 // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
691 // <<
692 // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
693 //<< "iterCount:" << iterCount << "Spectrum size "
694 //<< spectrum.size() << "with tic:" << tic
695 //<< "and sumY:" << sumY;
696 }
697 else
698 qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
699 }
700 catch(PappsoException &errorp)
701 {
702 qDebug() << "Going to throw";
703
705 QObject::tr("Error reading data using the proteowizard library: %1")
706 .arg(errorp.qwhat()));
707 }
708 catch(std::exception &error)
709 {
710 qDebug() << "Going to throw";
711
713 QObject::tr("Error reading data using the proteowizard library: %1")
714 .arg(error.what()));
715 }
716
717 ok = true;
718
719 // qDebug() << "QualifiedMassSpectrum: " <<
720 // qualified_mass_spectrum.toString();
721 return qualified_mass_spectrum;
722 }
723
724
726 std::size_t spectrum_index, bool want_binary_data, bool &ok) const
727 {
728
729 std::string backup_env = setGlobalLocaleToEnglish();
730
731 MassSpectrumId massSpectrumId(mcsp_msRunId);
732
733 if(msp_msData == nullptr)
734 {
735 setGlobalLocaleToCurrentOs(backup_env);
736 return (QualifiedMassSpectrum(massSpectrumId));
737 }
738
739 // const bool want_binary_data = true;
740
741 pwiz::msdata::SpectrumListPtr spectrum_list_p =
742 msp_msData->run.spectrumListPtr;
743
744 if(spectrum_index == spectrum_list_p.get()->size())
745 {
746 setGlobalLocaleToCurrentOs(backup_env);
747 throw ExceptionNotFound(
748 QObject::tr("The spectrum index cannot be equal to the size of the "
749 "spectrum list."));
750 }
751
752 // At this point we know the spectrum index might be sane, so store it in
753 // the mass spec id object.
754 massSpectrumId.setSpectrumIndex(spectrum_index);
755
756 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp = getPwizSpectrumPtr(
757 spectrum_list_p.get(), spectrum_index, want_binary_data);
758
759 setGlobalLocaleToCurrentOs(backup_env);
760
761 massSpectrumId.setNativeId(
762 QString::fromStdString(native_pwiz_spectrum_sp->id));
763
765 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
766 }
767
768
769 bool PwizMsRunReader::accept(const QString &file_name) const
770 {
771 // We want to know if we can handle the file_name.
772 pwiz::msdata::ReaderList reader_list;
773
774 std::string reader_type = reader_list.identify(file_name.toStdString());
775
776 if(!reader_type.empty())
777 return true;
778
779 return false;
780 }
781
782
784 std::size_t spectrum_index)
785 {
786 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
787 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
788 }
789
791 std::size_t spectrum_index)
792 {
793 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
794 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
795 }
796
798 std::size_t spectrum_index, bool want_binary_data) const
799 {
800
801 QualifiedMassSpectrum spectrum;
802 bool ok = false;
803
804 spectrum =
805 qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
806
807 if(mcsp_msRunId->getMsDataFormat() == pappso::MsDataFormat::MGF)
808 {
809 if(spectrum.getRtInSeconds() == 0)
810 {
811 // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
812 }
813 }
814
815 // if(!ok)
816 // qDebug() << "Encountered a mass spectrum for which the status is bad.";
817
818 return spectrum;
819 }
820
821
824 {
825 try
826 {
828 }
829 catch(std::exception &error)
830 {
831 qDebug() << "Going to throw";
832
834 QObject::tr("Error reading data (spectrum collection) using the "
835 "proteowizard library: %1")
836 .arg(error.what()));
837 }
838 }
839
841 [[maybe_unused]] const MsRunReadConfig &config,
843 {
844 qDebug();
845 try
846 {
848 }
849 catch(std::exception &error)
850 {
851 qDebug() << "Going to throw";
852
854 QObject::tr("Error reading data (spectrum collection2) using the "
855 "proteowizard library: %1")
856 .arg(error.what()));
857 }
858 }
859
861 SpectrumCollectionHandlerInterface & handler, unsigned int ms_level)
862 {
863
865 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
866
867 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
868 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
869 // spectrum has been fully qualified (that is, the member data have been
870 // set), it is transferred to the handler passed as parameter to this
871 // function for the consumer to do what it wants with it.
872
873 // Does the handler consuming the mass spectra read from file want these
874 // mass spectra to hold the binary data arrays (mz/i vectors)?
875
876 const bool want_binary_data = handler.needPeakList();
877
878 std::string backup_env = setGlobalLocaleToEnglish();
879
880 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
881 // run member of msp_msData.
882
883 pwiz::msdata::SpectrumListPtr spectrum_list_p =
884 msp_msData->run.spectrumListPtr;
885
886 // We'll need it to perform the looping in the spectrum list.
887 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
888
889 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
890
891 // Inform the handler of the spectrum list so that it can handle feedback to
892 // the user.
893 handler.spectrumListHasSize(spectrum_list_size);
894
895 // Iterate in the full list of spectra.
896
897 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
898 {
899
900 // If the user of this reader instance wants to stop reading the
901 // spectra, then break this loop.
902 if(handler.shouldStop())
903 {
904 qDebug() << "The operation was cancelled. Breaking the loop.";
905 break;
906 }
907
908 // Get the native pwiz-spectrum from the spectrum list.
909 // Note that this pointer is a shared pointer from pwiz.
910
911 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
912 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
913 /*
914 * we want to load metadata of the spectrum even if it does not contain
915 peaks
916
917 * if(!native_pwiz_spectrum_sp->hasBinaryData())
918 {
919 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
920 "
921 ()"
922 //<< "native pwiz spectrum is empty, continuing.";
923 continue;
924 }
925 */
926
927 // Instantiate the mass spectrum id that will hold critical information
928 // like the the native id string and the spectrum index.
929
930 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
931
932 // Get the spectrum native id as a QString to store it in the mass
933 // spectrum id class. This is will allow later to refer to the same
934 // spectrum starting back from the file.
935
936 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
937 massSpectrumId.setNativeId(native_id);
938
939 // Finally, instantiate the qualified mass spectrum with its id. This
940 // function will continue performing pappso-spectrum detailed
941 // qualification.
942
943 bool ok = false;
944
945 QualifiedMassSpectrum qualified_mass_spectrum =
947 massSpectrumId,
948 native_pwiz_spectrum_sp.get(),
949 want_binary_data,
950 ok);
951
952 if(!ok)
953 {
954 // qDebug() << "Encountered a mass spectrum for which the returned "
955 //"status is bad.";
956 continue;
957 }
958
959 // Before handing the mass spectrum out to the handler, see if the
960 // native mass spectrum was empty or not.
961
962 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
963 // qDebug() << "The mass spectrum has not defaultArrayLength";
964
965 qualified_mass_spectrum.setEmptyMassSpectrum(
966 !native_pwiz_spectrum_sp->defaultArrayLength);
967
968 // The handler will receive the index of the mass spectrum in the
969 // current run via the mass spectrum id member datum.
970 if(ms_level == 0)
971 {
972 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
973 }
974 else
975 {
976 if(qualified_mass_spectrum.getMsLevel() == ms_level)
977 {
978 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
979 }
980 }
981 }
982
983 setGlobalLocaleToCurrentOs(backup_env);
984 // End of
985 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
986
987 // Now let the loading handler know that the loading of the data has ended.
988 // The handler might need this "signal" to perform additional tasks or to
989 // cleanup cruft.
990
991 // qDebug() << "Loading ended";
992 handler.loadingEnded();
993 }
994
997 {
999 // qDebug();
1000
1001 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
1002 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
1003 // spectrum has been fully qualified (that is, the member data have been
1004 // set), it is transferred to the handler passed as parameter to this
1005 // function for the consumer to do what it wants with it.
1006
1007 // Does the handler consuming the mass spectra read from file want these
1008 // mass spectra to hold the binary data arrays (mz/i vectors)?
1009
1010 const bool want_binary_data = config.needPeakList();
1011
1012
1013 std::string backup_env = setGlobalLocaleToEnglish();
1014
1015 // qDebug();
1016 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
1017 // run member of msp_msData.
1018
1019 pwiz::msdata::SpectrumListPtr spectrum_list_p =
1020 msp_msData->run.spectrumListPtr;
1021
1022 // We'll need it to perform the looping in the spectrum list.
1023 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
1024
1025 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
1026
1027 // Inform the handler of the spectrum list so that it can handle feedback to
1028 // the user.
1029 handler.spectrumListHasSize(spectrum_list_size);
1030
1031 // Iterate in the full list of spectra.
1032
1033 qDebug();
1034 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1035 {
1036
1037
1038 // If the user of this reader instance wants to stop reading the
1039 // spectra, then break this loop.
1040 if(handler.shouldStop())
1041 {
1042 qDebug() << "The operation was cancelled. Breaking the loop.";
1043 break;
1044 }
1045
1046 // qDebug();
1047 // Get the native pwiz-spectrum from the spectrum list.
1048 // Note that this pointer is a shared pointer from pwiz.
1049
1050 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
1051 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
1052 // qDebug();
1053 /*
1054 * we want to load metadata of the spectrum even if it does not contain
1055 peaks
1056
1057 * if(!native_pwiz_spectrum_sp->hasBinaryData())
1058 {
1059 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
1060 "
1061 ()"
1062 //<< "native pwiz spectrum is empty, continuing.";
1063 continue;
1064 }
1065 */
1066
1067 // Instantiate the mass spectrum id that will hold critical information
1068 // like the the native id string and the spectrum index.
1069
1070 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
1071
1072 // qDebug();
1073 // Get the spectrum native id as a QString to store it in the mass
1074 // spectrum id class. This is will allow later to refer to the same
1075 // spectrum starting back from the file.
1076
1077 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
1078 massSpectrumId.setNativeId(native_id);
1079
1080 // Finally, instantiate the qualified mass spectrum with its id. This
1081 // function will continue performing pappso-spectrum detailed
1082 // qualification.
1083
1084 bool ok = false;
1085
1086 QualifiedMassSpectrum qualified_mass_spectrum =
1088 massSpectrumId,
1089 native_pwiz_spectrum_sp.get(),
1090 want_binary_data,
1091 ok);
1092
1093 if(!ok)
1094 {
1095 // qDebug() << "Encountered a mass spectrum for which the returned "
1096 //"status is bad.";
1097 continue;
1098 }
1099
1100 // qDebug();
1101 // Before handing the mass spectrum out to the handler, see if the
1102 // native mass spectrum was empty or not.
1103
1104 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
1105 // qDebug() << "The mass spectrum has not defaultArrayLength";
1106
1107 qualified_mass_spectrum.setEmptyMassSpectrum(
1108 !native_pwiz_spectrum_sp->defaultArrayLength);
1109
1110 // The handler will receive the index of the mass spectrum in the
1111 // current run via the mass spectrum id member datum.
1112
1113 // qDebug();
1114 if(config.acceptMsLevel(qualified_mass_spectrum.getMsLevel()))
1115 {
1117 qualified_mass_spectrum.getRtInSeconds()))
1118 {
1119 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
1120 }
1121 }
1122 }
1123
1124
1125 qDebug();
1126 setGlobalLocaleToCurrentOs(backup_env);
1127 // End of
1128 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1129
1130 // Now let the loading handler know that the loading of the data has ended.
1131 // The handler might need this "signal" to perform additional tasks or to
1132 // cleanup cruft.
1133
1134 qDebug() << "Loading ended";
1135 handler.loadingEnded();
1136 }
1137
1139 {
1140 return msp_msData->run.spectrumListPtr.get()->size();
1141 }
1142
1144 {
1145 return m_hasScanNumbers;
1146 }
1147
1149 {
1150 msp_msData = nullptr;
1151 return true;
1152 }
1153
1155 {
1156 if(msp_msData == nullptr)
1157 {
1158 initialize();
1159 }
1160 return true;
1161 }
1162
1163
1165 std::size_t spectrum_index, pappso::PrecisionPtr precision) const
1166 {
1167
1168 QualifiedMassSpectrum mass_spectrum =
1169 qualifiedMassSpectrum(spectrum_index, false);
1170
1171 return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
1172 }
1173
1175 const pappso::QualifiedMassSpectrum &mass_spectrum,
1176 pappso::PrecisionPtr precision) const
1177 {
1178 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
1179
1180 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
1181
1182 xic_coord.get()->mzRange =
1183 MzRange(mass_spectrum.getPrecursorMz(), precision);
1184
1185 return xic_coord;
1186 }
1188 {
1189 std::string env_backup;
1190 try
1191 {
1192#ifdef MXE
1193 // std::locale::global(std::locale("C")); // set locale to default
1194 // locale
1195
1196 std::string env_backup = std::setlocale(LC_ALL, nullptr);
1197 std::setlocale(LC_ALL, "C");
1198#else
1199 std::locale::global(std::locale("C")); // set locale to default locale
1200#endif
1201 }
1202 catch(std::exception &error)
1203 {
1205 QObject::tr("Error trying to set local to C : %1").arg(error.what()));
1206 }
1207 return env_backup;
1208 }
1209
1211 const std::string &environment_locale)
1212 {
1213 try
1214 {
1215#ifdef MXE
1216 // std::locale::global(std::locale("C")); // set locale to default
1217 // locale
1218 std::setlocale(LC_ALL, environment_locale.c_str());
1219#else
1220 std::locale::global(
1221 std::locale("")); // sets locale according to OS environment
1222#endif
1223 }
1224 catch(std::exception &error)
1225 {
1227 QObject::tr(
1228 "Error trying to set local to original system one %1 : %2")
1229 .arg(environment_locale.c_str())
1230 .arg(error.what()));
1231 }
1232 }
1233
1234} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
bool acceptMsLevel(std::size_t ms_level) const
bool acceptRetentionTimeInSeconds(double retention_time_in_seconds) const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:63
MsRunIdCstSPtr mcsp_msRunId
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
static std::string setGlobalLocaleToEnglish()
virtual void readSpectrumCollectionWithMsrunReadConfig(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
static void setGlobalLocaleToCurrentOs(const std::string &environment_locale)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
const OboPsiModTerm getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setParameterValue(QualifiedMassSpectrumParameter parameter, const QVariant &value)
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition utils.cpp:165
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
std::shared_ptr< XicCoord > XicCoordSPtr
Definition xiccoord.h:43
MSrun file reader base on proteowizard library.