libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::spectree::SpecTree Class Reference

#include <spectree.h>

Classes

struct  MapSimilarityCount
 
struct  MapSimilarityCountElement
 
struct  SpecTreeNode
 

Public Member Functions

 SpecTree (const BucketClustering &bucket_clustering)
 
virtual ~SpecTree ()
 
QString toString () const
 
const std::vector< std::size_t > & getSpectrumFirstNodeIndex () const
 get the adress map of sepctrum index and their first node index
 
void xtract (UiMonitorInterface &monitor, SpecXtractInterface &spec_xtract, std::size_t minimum_count, std::size_t cart_id_range_max, std::size_t cart_id_range_min, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
 
std::size_t extractSpectrumPairSimilarityCount (std::size_t spectrum_a_index, std::size_t spectrum_b_index) const
 get the number of common component for a pair of spectrum
 

Private Member Functions

void addNewNode (const SpecTreeNode &node)
 
void manageSideAccess (std::vector< std::size_t > &spectrumLastNodeIndex)
 
void walkBackInBranchFromNode (const SpecTree::SpecTreeNode &start_node, MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
 
std::size_t walkBackInBranchFromNodeToTarget (const SpecTree::SpecTreeNode &start_node, std::size_t spectrum_index_target) const
 
void extractSpectrumSimilarityCount (MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t spectrum_index, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
 get a map of similarities for a given spectrum index
 

Private Attributes

std::vector< SpecTreeNodem_nodeList
 
std::vector< std::size_t > m_spectrumFirstNodeIndex
 

Static Private Attributes

static constexpr std::size_t index_not_defined
 

Detailed Description

Todo:
write docs

Definition at line 52 of file spectree.h.

Constructor & Destructor Documentation

◆ SpecTree()

pappso::spectree::SpecTree::SpecTree ( const BucketClustering bucket_clustering)

Build a SpecTree with BucketClustering

Definition at line 48 of file spectree.cpp.

49{
50
51
52 std::vector<Bucket> bins = bucket_clustering.asSortedList();
53 SpecTreeNode node;
54 node.value = bins[0].getCartList().at(0);
55 node.count = 1;
56 qDebug() << node.value;
57 // Specific processing for the first bucket : the first node and all its
58 // children are added in SpecTrees and the transversal accessor is updated
59 // accordingly
60
61
62 std::vector<std::size_t> spectrumLastNodeIndex;
63 spectrumLastNodeIndex.resize(bucket_clustering.getItemCartCount(),
65 m_spectrumFirstNodeIndex.resize(bucket_clustering.getItemCartCount(),
67
68 m_spectrumFirstNodeIndex[node.value] = m_nodeList.size() - 1;
69
70
71 m_nodeList.push_back(node);
72 manageSideAccess(spectrumLastNodeIndex);
73 for(std::size_t cpt = 1; cpt < bins[0].size(); ++cpt)
74 {
75 node.parentIndex = m_nodeList.size() - 1;
76 node.nextIndex = index_not_defined;
77 node.value = bins[0].getCartList().at(cpt);
78 node.count = 1;
79 m_nodeList.push_back(node);
80 manageSideAccess(spectrumLastNodeIndex);
81 }
82
83
84 qDebug();
85 // For the rest of the buckets, we increment the node counter in prefix
86 // common parts without creating new nodes. Once out of the prefix common
87 // part, new nodes are created similarily as they were for the first bucket.
88 for(std::size_t bins_index = 1; bins_index < bins.size(); ++bins_index)
89 {
90 qDebug() << "bins_index=" << bins_index;
91 std::size_t pos = 0;
92 bool common = false;
93 std::size_t parentNode = index_not_defined;
94
95 std::size_t previous_bin_size = bins[bins_index - 1].size();
96
97 // detection of a common first element
98 if(pos < previous_bin_size)
99 {
100 if(bins[bins_index].getCartList().at(pos) ==
101 bins[bins_index - 1].getCartList().at(pos))
102 {
103 common = true;
104 }
105 }
106 // insertion procedure for a common prefix part insertion : we go
107 // through existing nodes and increment their counter value by one
108 while(common)
109 {
110 qDebug() << "common pos=" << pos;
111 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
112 qDebug() << "common spectrum_index=" << spectrum_index;
113 std::size_t tempNodeIndex = spectrumLastNodeIndex[spectrum_index];
114 qDebug() << "common tempNodeIndex=" << tempNodeIndex;
115 m_nodeList[tempNodeIndex].count++;
116 // checking on the prefix common part persistence conditions ; if
117 // not valid, we are not in a common prefix part anymore for the
118 // currently inserted bucket
119 if((previous_bin_size - 1 == pos) ||
120 (bins[bins_index].size() - 1 == pos))
121 {
122 qDebug() << "last one";
123 common = false;
124 }
125 else
126 {
127 qDebug() << "bins[bins_index - 1].getSpectrumIndex(pos + 1)="
128 << bins[bins_index - 1].getCartList().at(pos + 1);
129 qDebug() << "bins[bins_index].getSpectrumIndex(pos + 1)="
130 << bins[bins_index].getCartList().at(pos + 1);
131 if(bins[bins_index - 1].getCartList().at(pos + 1) !=
132 bins[bins_index].getCartList().at(pos + 1))
133 {
134 common = false;
135 }
136 }
137 parentNode = tempNodeIndex;
138 ++pos;
139
140 qDebug() << "common=" << common;
141 }
142 qDebug();
143 // insertion procedure for all the remaining nodes not in the prefix
144 // common part : we create new nodes with a counter value of 1 and
145 // update the transversal accessor and last inserted node list
146 while(pos < bins[bins_index].size())
147 {
148 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
149
150 node.parentIndex = parentNode;
151 node.nextIndex = index_not_defined;
152 node.value = spectrum_index;
153 node.count = 1;
154 m_nodeList.push_back(node);
155 manageSideAccess(spectrumLastNodeIndex);
156 parentNode = m_nodeList.size() - 1;
157
158 ++pos;
159 }
160 }
161}
std::vector< SpecTreeNode > m_nodeList
Definition spectree.h:172
static constexpr std::size_t index_not_defined
Definition spectree.h:118
std::vector< std::size_t > m_spectrumFirstNodeIndex
Definition spectree.h:173
void manageSideAccess(std::vector< std::size_t > &spectrumLastNodeIndex)
Definition spectree.cpp:174

References pappso::spectree::BucketClustering::asSortedList(), pappso::spectree::SpecTree::SpecTreeNode::count, pappso::spectree::BucketClustering::getItemCartCount(), index_not_defined, m_nodeList, m_spectrumFirstNodeIndex, manageSideAccess(), pappso::spectree::SpecTree::SpecTreeNode::nextIndex, pappso::spectree::SpecTree::SpecTreeNode::parentIndex, and pappso::spectree::SpecTree::SpecTreeNode::value.

◆ ~SpecTree()

pappso::spectree::SpecTree::~SpecTree ( )
virtual

Destructor

Definition at line 163 of file spectree.cpp.

164{
165}

Member Function Documentation

◆ addNewNode()

void pappso::spectree::SpecTree::addNewNode ( const SpecTreeNode node)
private

Definition at line 168 of file spectree.cpp.

169{
170 m_nodeList.push_back(node);
171}

References m_nodeList.

◆ extractSpectrumPairSimilarityCount()

std::size_t pappso::spectree::SpecTree::extractSpectrumPairSimilarityCount ( std::size_t  spectrum_a_index,
std::size_t  spectrum_b_index 
) const

get the number of common component for a pair of spectrum

Parameters
spectrum_a_indexthe first spectrum index
spectrum_b_indexthe second spectrum index
Returns
integer the number of common component between spectrum

Definition at line 435 of file spectree.cpp.

437{
438 if(spectrum_a_index > spectrum_b_index)
439 std::swap(spectrum_a_index, spectrum_b_index);
440
441 std::size_t count_target = 0;
442 if(spectrum_b_index < m_spectrumFirstNodeIndex.size())
443 {
444 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_b_index];
445 while(node_index != index_not_defined)
446 {
447 qDebug() << "node_index=" << node_index;
448 const SpecTreeNode &current_node = m_nodeList[node_index];
449 // go back in the tree branch from the current_node
450 count_target +=
451 walkBackInBranchFromNodeToTarget(current_node, spectrum_a_index);
452 node_index = current_node.nextIndex;
453 }
454 }
455 else
456 {
458 QObject::tr(
459 "spectrum_index %1 out of range (spectrum_index max size=%2)")
460 .arg(spectrum_b_index)
461 .arg(m_spectrumFirstNodeIndex.size()));
462 }
463 return count_target;
464}
std::size_t walkBackInBranchFromNodeToTarget(const SpecTree::SpecTreeNode &start_node, std::size_t spectrum_index_target) const
Definition spectree.cpp:374

References index_not_defined, m_nodeList, m_spectrumFirstNodeIndex, pappso::spectree::SpecTree::SpecTreeNode::nextIndex, and walkBackInBranchFromNodeToTarget().

◆ extractSpectrumSimilarityCount()

void pappso::spectree::SpecTree::extractSpectrumSimilarityCount ( MapSimilarityCount map_count,
std::size_t  minimum_count,
std::size_t  spectrum_index,
std::size_t  target_cart_id_max,
std::size_t  target_cart_id_min 
) const
private

get a map of similarities for a given spectrum index

this function can only retrieve spectrum index map lower than the spectrum index given in the parameters (check original publication for details)

Parameters
spectrum_indexthe spectrum index to retrieve similarities
spectrum_index_lower_limitlower spectrum index limit to save CPU (do not use it if you want a full similarity map)
Returns
map of spectrum_index keys containing the corresponding count value (similarity) for the targeted spectrum index

Extraction of the similarities above a given threshold between a given spectrum and all other spectra higher located in SpecTrees.

Parameters
tree_indicesSpecTrees the data has to be extracted from
spectrum_indexThe spectrum with ated in SpecTrees.
tree_indicesSpecTrees the data has to be extracted from
spectrum_indexThe spectrum with which we want to extract similarities
minimum_countThe threshold to use for the extraction
Since
0.1

Definition at line 407 of file spectree.cpp.

412{
413
414 qDebug() << " spectrum_index=" << spectrum_index;
415 // sOne;
416 // we position the current node at the first occurence of the spectrum in
417 // the transversal accessor and initialise variablethresholds accordingly
418 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_index];
419 while(node_index != index_not_defined)
420 {
421 qDebug() << "node_index=" << node_index;
422 const SpecTreeNode &current_node = m_nodeList[node_index];
423 // go back in the tree branch from the current_node
424 walkBackInBranchFromNode(current_node,
425 map_count,
426 minimum_count,
427 target_cart_id_max,
428 target_cart_id_min);
429 node_index = current_node.nextIndex;
430 }
431 qDebug();
432}
void walkBackInBranchFromNode(const SpecTree::SpecTreeNode &start_node, MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
Definition spectree.cpp:318

References index_not_defined, m_nodeList, m_spectrumFirstNodeIndex, pappso::spectree::SpecTree::SpecTreeNode::nextIndex, and walkBackInBranchFromNode().

Referenced by xtract().

◆ getSpectrumFirstNodeIndex()

const std::vector< std::size_t > & pappso::spectree::SpecTree::getSpectrumFirstNodeIndex ( ) const

get the adress map of sepctrum index and their first node index

convenience function intended for testing

Returns
vector of the size of max spectrum index containing list of first node index

Definition at line 224 of file spectree.cpp.

225{
227}

References m_spectrumFirstNodeIndex.

◆ manageSideAccess()

void pappso::spectree::SpecTree::manageSideAccess ( std::vector< std::size_t > &  spectrumLastNodeIndex)
private

Definition at line 174 of file spectree.cpp.

175{
176 auto spectrum_index = m_nodeList.back().value;
177 auto node_position = m_nodeList.size() - 1;
178 if(m_spectrumFirstNodeIndex[spectrum_index] == index_not_defined)
179 {
180 // first occurence of this spectrum
181 m_spectrumFirstNodeIndex[spectrum_index] = node_position;
182 spectrumLastNodeIndex[spectrum_index] = node_position;
183 }
184 else
185 {
186 if(spectrumLastNodeIndex[spectrum_index] == node_position)
187 {
188 }
189 else
190 {
191 m_nodeList[spectrumLastNodeIndex[spectrum_index]].nextIndex =
192 node_position;
193 spectrumLastNodeIndex[spectrum_index] = node_position;
194 }
195 }
196}

References index_not_defined, m_nodeList, and m_spectrumFirstNodeIndex.

Referenced by SpecTree().

◆ toString()

QString pappso::spectree::SpecTree::toString ( ) const

Definition at line 199 of file spectree.cpp.

200{
201 QString node_str("node|spectrum|parent|next|count|\n");
202 std::size_t i = 0;
203 for(auto node : m_nodeList)
204 {
205 int parent = -1;
206 if(node.parentIndex < index_not_defined)
207 parent = node.parentIndex;
208 int next = -1;
209 if(node.nextIndex < index_not_defined)
210 next = node.nextIndex;
211 node_str.append(QString("node_%1|%2|%3|%4|%5end\n")
212 .arg(i)
213 .arg(node.value)
214 .arg(parent)
215 .arg(next)
216 .arg(node.count));
217
218 i++;
219 }
220 return node_str;
221}

References index_not_defined, and m_nodeList.

◆ walkBackInBranchFromNode()

void pappso::spectree::SpecTree::walkBackInBranchFromNode ( const SpecTree::SpecTreeNode start_node,
MapSimilarityCount map_count,
std::size_t  minimum_count,
std::size_t  target_cart_id_max,
std::size_t  target_cart_id_min 
) const
private

Definition at line 318 of file spectree.cpp.

323{
324
325 std::size_t parent_id = start_node.parentIndex;
326
327 qDebug() << " start node spectrum=" << start_node.value
328 << " parent_id=" << parent_id << " init_count=" << start_node.count
329 << " minimum_count=" << minimum_count
330 << " target_cart_id_min=" << target_cart_id_min;
331
332 while(parent_id != index_not_defined)
333 {
334 // next_node :
335 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
336
337 if(current_node.value < target_cart_id_min)
338 break;
339 if(current_node.value <= target_cart_id_max)
340 {
341 auto &map_id_count = map_count.map_id_count[current_node.value];
342 if(map_id_count.lastWitness == start_node.value)
343 {
344 map_id_count.count += start_node.count;
345 // if at one time, a similarity exceeds the threshold value for
346 // the first time, it is written into the proper stack
347 if((map_id_count.count >= minimum_count) &&
348 (map_id_count.aboveThreshold == false))
349 {
350 map_id_count.aboveThreshold = true;
351 map_count.aboveThreshold.push_back(current_node.value);
352 }
353 }
354 else
355 {
356 map_id_count.lastWitness = start_node.value;
357 map_id_count.count = start_node.count;
358 map_id_count.aboveThreshold = false;
359 // if at one time, a similarity exceeds the threshold value for
360 // the first time, it is written into the proper stack
361 if(map_id_count.count >= minimum_count)
362 {
363 map_id_count.aboveThreshold = true;
364 map_count.aboveThreshold.push_back(current_node.value);
365 }
366 map_count.keys.push_back(current_node.value);
367 }
368 }
369 parent_id = current_node.parentIndex;
370 }
371}

References pappso::spectree::SpecTree::MapSimilarityCount::aboveThreshold, pappso::spectree::SpecTree::SpecTreeNode::count, index_not_defined, pappso::spectree::SpecTree::MapSimilarityCount::keys, m_nodeList, pappso::spectree::SpecTree::MapSimilarityCount::map_id_count, pappso::spectree::SpecTree::SpecTreeNode::parentIndex, and pappso::spectree::SpecTree::SpecTreeNode::value.

Referenced by extractSpectrumSimilarityCount().

◆ walkBackInBranchFromNodeToTarget()

std::size_t pappso::spectree::SpecTree::walkBackInBranchFromNodeToTarget ( const SpecTree::SpecTreeNode start_node,
std::size_t  spectrum_index_target 
) const
private

Definition at line 374 of file spectree.cpp.

377{
378 std::size_t parent_id = start_node.parentIndex;
379
380 while(parent_id != index_not_defined)
381 {
382 // next_node :
383 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
384
385 if(current_node.value < spectrum_index_target)
386 return 0;
387 if(current_node.value == spectrum_index_target)
388 return start_node.count;
389
390 parent_id = current_node.parentIndex;
391 }
392 return 0;
393}

References pappso::spectree::SpecTree::SpecTreeNode::count, index_not_defined, m_nodeList, pappso::spectree::SpecTree::SpecTreeNode::parentIndex, and pappso::spectree::SpecTree::SpecTreeNode::value.

Referenced by extractSpectrumPairSimilarityCount().

◆ xtract()

void pappso::spectree::SpecTree::xtract ( UiMonitorInterface monitor,
SpecXtractInterface spec_xtract,
std::size_t  minimum_count,
std::size_t  cart_id_range_max,
std::size_t  cart_id_range_min,
std::size_t  target_cart_id_max,
std::size_t  target_cart_id_min 
) const

Extract all similarities above a threshold value between all spectra from the deepest one up to the given limit using the predefined extraction algorithm. The results are stored in the mentionned reporter.

Performs the extraction of the similarities above a given threshold for all spectra between the deepest one and a given index in the transversal accessor. The extracted pairs are feed to a shifter to try to improve the spectrum identification. Afterwards, the retained results are written to the given reporter. This extraction step represents most of SpecOMS execution time. Note: This function heavily evolved through the development process to match new needs, it became clumsy and heavy. A clean refactor might be appropriate.

Parameters
monitorprogress monitor, indicates progression in spectree
spec_xtractreport results of similarities to the user (write in file or consolidate)
cart_id_range_maxThe position in the transversal accessor from which the similarities must be reported
cart_id_range_minThe position in the transversal accessor up to which the similarities must be reported
item_cart_index_lower_limitlower spectrum index limit to save CPU (do not use it if you want a full similarity map)

Definition at line 230 of file spectree.cpp.

237{
238
239 if(cart_id_range_max < cart_id_range_min)
240 {
241 std::swap(cart_id_range_max, cart_id_range_min);
242 }
243
244 if(cart_id_range_max < m_spectrumFirstNodeIndex.size())
245 {
246
247 monitor.setStatus(QObject::tr("starting SpecXtract operation"));
248
249 MapSimilarityCount map_count;
250 map_count.map_id_count.resize(cart_id_range_max);
251
252 // initialisation of the structure to store the compared pairs
253 // int[][] results = new int[m_count][3];
254 // int count = 0;
255
256 // iteration on the transversal accessor starting from the deepest spetrum
257 // and climbing up to limit
258 std::size_t spectra1 = cart_id_range_max;
259 monitor.setTotalSteps(cart_id_range_max - cart_id_range_min);
260 while(true)
261 {
262
263 if(monitor.shouldIstop())
264 {
266 QObject::tr("SpecXtract job stopped"));
267 }
268 qDebug() << "spectra1=" << spectra1;
269 spec_xtract.beginItemCartExtraction(spectra1);
270 monitor.count();
271 // System.out.println(start);
272
273 map_count.keys.clear();
274 map_count.aboveThreshold.clear();
275
277 minimum_count,
278 spectra1,
279 target_cart_id_max,
280 target_cart_id_min);
281
282 qDebug() << "spectra1=" << spectra1
283 << " map_count.aboveThreshold.size()="
284 << map_count.aboveThreshold.size()
285 << " map_count.keys.size()=" << map_count.keys.size();
286
287 for(std::size_t spectra2 : map_count.aboveThreshold)
288 {
289
290 // std::size_t spectra2 = *it;
291 qDebug() << "spectra2=" << spectra2;
292
293 // std::size_t total_extracted_count =
294 // map_count.map_id_count.at(spectra2);
295 spec_xtract.reportSimilarity(
296 spectra1, spectra2, map_count.map_id_count.at(spectra2).count);
297 }
298
299 spec_xtract.endItemCartExtraction(spectra1);
300 // monitor.count();
301 if(spectra1 == cart_id_range_min)
302 break;
303 spectra1--;
304 }
305 }
306 else
307 {
309 QObject::tr(
310 "item cart index %1 out of range (item cart id max size=%2)")
311 .arg(cart_id_range_max)
312 .arg(m_spectrumFirstNodeIndex.size()));
313 }
314 qDebug();
315}
void extractSpectrumSimilarityCount(MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t spectrum_index, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
get a map of similarities for a given spectrum index
Definition spectree.cpp:407

References pappso::spectree::SpecTree::MapSimilarityCount::aboveThreshold, pappso::spectree::SpecXtractInterface::beginItemCartExtraction(), pappso::UiMonitorInterface::count(), pappso::spectree::SpecXtractInterface::endItemCartExtraction(), extractSpectrumSimilarityCount(), pappso::spectree::SpecTree::MapSimilarityCount::keys, m_spectrumFirstNodeIndex, pappso::spectree::SpecTree::MapSimilarityCount::map_id_count, pappso::spectree::SpecXtractInterface::reportSimilarity(), pappso::UiMonitorInterface::setStatus(), pappso::UiMonitorInterface::setTotalSteps(), and pappso::UiMonitorInterface::shouldIstop().

Member Data Documentation

◆ index_not_defined

constexpr std::size_t pappso::spectree::SpecTree::index_not_defined
staticconstexprprivate
Initial value:
{
std::numeric_limits<std::size_t>::max()}

Definition at line 118 of file spectree.h.

118 {
119 std::numeric_limits<std::size_t>::max()};

Referenced by SpecTree(), extractSpectrumPairSimilarityCount(), extractSpectrumSimilarityCount(), manageSideAccess(), toString(), walkBackInBranchFromNode(), and walkBackInBranchFromNodeToTarget().

◆ m_nodeList

◆ m_spectrumFirstNodeIndex

std::vector<std::size_t> pappso::spectree::SpecTree::m_spectrumFirstNodeIndex
private

The documentation for this class was generated from the following files: