RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Greg Landrum
3// Adapted from pseudo-code from Roger Sayle
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11
12#include <RDGeneral/export.h>
13#include <RDGeneral/hanoiSort.h>
14#include <GraphMol/ROMol.h>
15#include <GraphMol/RingInfo.h>
17#include <cstdint>
18#include <boost/dynamic_bitset.hpp>
20#include <cstring>
21#include <iostream>
22#include <cassert>
23#include <cstring>
24#include <vector>
25
26// #define VERBOSE_CANON 1
27
28namespace RDKit {
29namespace Canon {
30class canon_atom;
31
33 Bond::BondType bondType{Bond::BondType::UNSPECIFIED};
34 unsigned int bondStereo{
35 static_cast<unsigned int>(Bond::BondStereo::STEREONONE)};
36 unsigned int nbrSymClass{0};
37 unsigned int nbrIdx{0};
38 Bond::BondStereo stype{Bond::BondStereo::STEREONONE};
39 const canon_atom *controllingAtoms[4]{nullptr, nullptr, nullptr, nullptr};
40 const std::string *p_symbol{
41 nullptr}; // if provided, this is used to order bonds
42 unsigned int bondIdx{0};
43
46 unsigned int nsc, unsigned int bidx)
47 : bondType(bt),
48 bondStereo(static_cast<unsigned int>(bs)),
49 nbrSymClass(nsc),
50 nbrIdx(ni),
51 bondIdx(bidx) {}
52 bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
53 unsigned int nsc, unsigned int bidx)
54 : bondType(bt),
55 bondStereo(bs),
56 nbrSymClass(nsc),
57 nbrIdx(ni),
58 bondIdx(bidx) {}
59
60 int compareStereo(const bondholder &o) const;
61
62 bool operator<(const bondholder &o) const { return compare(*this, o) < 0; }
63 static bool greater(const bondholder &lhs, const bondholder &rhs) {
64 return compare(lhs, rhs) > 0;
65 }
66
67 static int compare(const bondholder &x, const bondholder &y,
68 unsigned int div = 1) {
69 if (x.p_symbol && y.p_symbol) {
70 if ((*x.p_symbol) < (*y.p_symbol)) {
71 return -1;
72 } else if ((*x.p_symbol) > (*y.p_symbol)) {
73 return 1;
74 }
75 }
76 if (x.bondType < y.bondType) {
77 return -1;
78 } else if (x.bondType > y.bondType) {
79 return 1;
80 }
81 if (x.bondStereo < y.bondStereo) {
82 return -1;
83 } else if (x.bondStereo > y.bondStereo) {
84 return 1;
85 }
86 auto scdiv = x.nbrSymClass / div - y.nbrSymClass / div;
87 if (scdiv) {
88 return scdiv;
89 }
90 if (x.bondStereo && y.bondStereo) {
91 auto cs = x.compareStereo(y);
92 if (cs) {
93 return cs;
94 }
95 }
96 return 0;
97 }
98};
100 public:
101 const Atom *atom{nullptr};
102 int index{-1};
103 unsigned int degree{0};
104 unsigned int totalNumHs{0};
105 bool hasRingNbr{false};
106 bool isRingStereoAtom{false};
107 int *nbrIds{nullptr};
108 const std::string *p_symbol{
109 nullptr}; // if provided, this is used to order atoms
110 std::vector<int> neighborNum;
111 std::vector<int> revistedNeighbors;
112 std::vector<bondholder> bonds;
113
115
116 ~canon_atom() { free(nbrIds); }
117};
118
120 canon_atom *atoms, std::vector<bondholder> &nbrs);
121
123 canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
124 std::vector<std::pair<unsigned int, unsigned int>> &result);
125
126/*
127 * Different types of atom compare functions:
128 *
129 * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
130 *dependent chirality
131 * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
132 *highly symmetrical graphs/molecules
133 * - AtomCompareFunctor: Basic atom compare function which also allows to
134 *include neighbors within the ranking
135 */
136
138 public:
139 Canon::canon_atom *dp_atoms{nullptr};
140 const ROMol *dp_mol{nullptr};
141 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
142 *dp_bondsInPlay{nullptr};
143
146 Canon::canon_atom *atoms, const ROMol &m,
147 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
148 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
149 : dp_atoms(atoms),
150 dp_mol(&m),
151 dp_atomsInPlay(atomsInPlay),
152 dp_bondsInPlay(bondsInPlay) {}
153 int operator()(int i, int j) const {
154 PRECONDITION(dp_atoms, "no atoms");
155 PRECONDITION(dp_mol, "no molecule");
156 PRECONDITION(i != j, "bad call");
157 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
158 return 0;
159 }
160
161 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
162 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
163 }
164 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
165 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
166 }
167 for (unsigned int ii = 0;
168 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
169 int cmp =
170 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
171 if (cmp) {
172 return cmp;
173 }
174 }
175
176 std::vector<std::pair<unsigned int, unsigned int>> swapsi;
177 std::vector<std::pair<unsigned int, unsigned int>> swapsj;
178 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
179 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
180 }
181 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
182 updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
183 }
184 for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
185 int cmp = swapsi[ii].second - swapsj[ii].second;
186 if (cmp) {
187 return cmp;
188 }
189 }
190 return 0;
191 }
192};
193
195 public:
196 Canon::canon_atom *dp_atoms{nullptr};
197 const ROMol *dp_mol{nullptr};
198 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
199 *dp_bondsInPlay{nullptr};
200
203 Canon::canon_atom *atoms, const ROMol &m,
204 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
205 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
206 : dp_atoms(atoms),
207 dp_mol(&m),
208 dp_atomsInPlay(atomsInPlay),
209 dp_bondsInPlay(bondsInPlay) {}
210 int operator()(int i, int j) const {
211 PRECONDITION(dp_atoms, "no atoms");
212 PRECONDITION(dp_mol, "no molecule");
213 PRECONDITION(i != j, "bad call");
214 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
215 return 0;
216 }
217
218 if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
219 return -1;
220 } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
221 return 1;
222 }
223
224 if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
225 return -1;
226 } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
227 return 1;
228 }
229
230 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
231 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
232 }
233 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
234 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
235 }
236 for (unsigned int ii = 0;
237 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
238 int cmp =
239 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
240 if (cmp) {
241 return cmp;
242 }
243 }
244
245 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
246 return -1;
247 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
248 return 1;
249 }
250 return 0;
251 }
252};
253
254namespace {
255unsigned int getChiralRank(const ROMol *dp_mol, canon_atom *dp_atoms,
256 unsigned int i) {
257 unsigned int res = 0;
258 std::vector<unsigned int> perm;
259 perm.reserve(dp_atoms[i].atom->getDegree());
260 for (const auto nbr : dp_mol->atomNeighbors(dp_atoms[i].atom)) {
261 auto rnk = dp_atoms[nbr->getIdx()].index;
262 // make sure we don't have duplicate ranks
263 if (std::find(perm.begin(), perm.end(), rnk) != perm.end()) {
264 break;
265 } else {
266 perm.push_back(rnk);
267 }
268 }
269 if (perm.size() == dp_atoms[i].atom->getDegree()) {
270 auto ctag = dp_atoms[i].atom->getChiralTag();
271 if (ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ||
272 ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CCW) {
273 auto sortedPerm = perm;
274 std::sort(sortedPerm.begin(), sortedPerm.end());
275 auto nswaps = countSwapsToInterconvert(perm, sortedPerm);
276 res = ctag == Atom::ChiralType::CHI_TETRAHEDRAL_CW ? 2 : 1;
277 if (nswaps % 2) {
278 res = res == 2 ? 1 : 2;
279 }
280 }
281 }
282 return res;
283}
284} // namespace
286 unsigned int getAtomRingNbrCode(unsigned int i) const {
287 if (!dp_atoms[i].hasRingNbr) {
288 return 0;
289 }
290
291 int *nbrs = dp_atoms[i].nbrIds;
292 unsigned int code = 0;
293 for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
294 if (dp_atoms[nbrs[j]].isRingStereoAtom) {
295 code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
296 }
297 }
298 return code;
299 }
300
301 int basecomp(int i, int j) const {
302 unsigned int ivi, ivj;
303
304 // always start with the current class:
305 ivi = dp_atoms[i].index;
306 ivj = dp_atoms[j].index;
307 if (ivi < ivj) {
308 return -1;
309 } else if (ivi > ivj) {
310 return 1;
311 }
312 // use the atom-mapping numbers if they were assigned
313 /* boost::any_cast FILED here:
314 std::string molAtomMapNumber_i="";
315 std::string molAtomMapNumber_j="";
316 */
317 int molAtomMapNumber_i = 0;
318 int molAtomMapNumber_j = 0;
319 dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
320 molAtomMapNumber_i);
321 dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
322 molAtomMapNumber_j);
323 if (molAtomMapNumber_i < molAtomMapNumber_j) {
324 return -1;
325 } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
326 return 1;
327 }
328 // start by comparing degree
329 ivi = dp_atoms[i].degree;
330 ivj = dp_atoms[j].degree;
331 if (ivi < ivj) {
332 return -1;
333 } else if (ivi > ivj) {
334 return 1;
335 }
336 if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
337 if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
338 return -1;
339 } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
340 return 1;
341 } else {
342 return 0;
343 }
344 }
345
346 // move onto atomic number
347 ivi = dp_atoms[i].atom->getAtomicNum();
348 ivj = dp_atoms[j].atom->getAtomicNum();
349 if (ivi < ivj) {
350 return -1;
351 } else if (ivi > ivj) {
352 return 1;
353 }
354 // isotopes if we're using them
355 if (df_useIsotopes) {
356 ivi = dp_atoms[i].atom->getIsotope();
357 ivj = dp_atoms[j].atom->getIsotope();
358 if (ivi < ivj) {
359 return -1;
360 } else if (ivi > ivj) {
361 return 1;
362 }
363 }
364
365 // nHs
366 ivi = dp_atoms[i].totalNumHs;
367 ivj = dp_atoms[j].totalNumHs;
368 if (ivi < ivj) {
369 return -1;
370 } else if (ivi > ivj) {
371 return 1;
372 }
373 // charge
374 ivi = dp_atoms[i].atom->getFormalCharge();
375 ivj = dp_atoms[j].atom->getFormalCharge();
376 if (ivi < ivj) {
377 return -1;
378 } else if (ivi > ivj) {
379 return 1;
380 }
381 // chirality if we're using it
382 if (df_useChirality) {
383 // first atom stereochem:
384 ivi = 0;
385 ivj = 0;
386 // can't actually use values here, because they are arbitrary
387 ivi = dp_atoms[i].atom->getChiralTag() != 0;
388 ivj = dp_atoms[j].atom->getChiralTag() != 0;
389 if (ivi < ivj) {
390 return -1;
391 } else if (ivi > ivj) {
392 return 1;
393 }
394 // stereo set
395 if (ivi && ivj) {
396 if (ivi) {
397 ivi = getChiralRank(dp_mol, dp_atoms, i);
398 }
399 if (ivj) {
400 ivj = getChiralRank(dp_mol, dp_atoms, j);
401 }
402 if (ivi < ivj) {
403 return -1;
404 } else if (ivi > ivj) {
405 return 1;
406 }
407 }
408 }
409
410 if (df_useChiralityRings) {
411 // ring stereochemistry
412 ivi = getAtomRingNbrCode(i);
413 ivj = getAtomRingNbrCode(j);
414 if (ivi < ivj) {
415 return -1;
416 } else if (ivi > ivj) {
417 return 1;
418 } // bond stereo is taken care of in the neighborhood comparison
419 }
420 return 0;
421 }
422
423 public:
424 Canon::canon_atom *dp_atoms{nullptr};
425 const ROMol *dp_mol{nullptr};
426 const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
427 *dp_bondsInPlay{nullptr};
428 bool df_useNbrs{false};
429 bool df_useIsotopes{true};
430 bool df_useChirality{true};
431 bool df_useChiralityRings{true};
432
435 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
436 const boost::dynamic_bitset<> *bondsInPlay = nullptr)
437 : dp_atoms(atoms),
438 dp_mol(&m),
439 dp_atomsInPlay(atomsInPlay),
440 dp_bondsInPlay(bondsInPlay),
441 df_useNbrs(false),
442 df_useIsotopes(true),
443 df_useChirality(true),
444 df_useChiralityRings(true) {}
445 int operator()(int i, int j) const {
446 if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
447 return 0;
448 }
449 int v = basecomp(i, j);
450 if (v) {
451 return v;
452 }
453
454 if (df_useNbrs) {
455 if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
456 updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
457 }
458 if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
459 updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
460 }
461
462 for (unsigned int ii = 0;
463 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
464 ++ii) {
465 int cmp =
466 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
467 if (cmp) {
468 return cmp;
469 }
470 }
471
472 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
473 return -1;
474 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
475 return 1;
476 }
477 }
478 return 0;
479 }
480};
481
482/*
483 * A compare function to discriminate chiral atoms, similar to the CIP rules.
484 * This functionality is currently not used.
485 */
486
487const unsigned int ATNUM_CLASS_OFFSET = 10000;
489 void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
490 for (unsigned j = 0; j < nbrs.size(); ++j) {
491 unsigned int nbrIdx = nbrs[j].nbrIdx;
492 if (nbrIdx == ATNUM_CLASS_OFFSET) {
493 // Ignore the Hs
494 continue;
495 }
496 const Atom *nbr = dp_atoms[nbrIdx].atom;
497 nbrs[j].nbrSymClass =
498 nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
499 }
500 std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
501 // FIX: don't want to be doing this long-term
502 }
503
504 int basecomp(int i, int j) const {
505 PRECONDITION(dp_atoms, "no atoms");
506 unsigned int ivi, ivj;
507
508 // always start with the current class:
509 ivi = dp_atoms[i].index;
510 ivj = dp_atoms[j].index;
511 if (ivi < ivj) {
512 return -1;
513 } else if (ivi > ivj) {
514 return 1;
515 }
516
517 // move onto atomic number
518 ivi = dp_atoms[i].atom->getAtomicNum();
519 ivj = dp_atoms[j].atom->getAtomicNum();
520 if (ivi < ivj) {
521 return -1;
522 } else if (ivi > ivj) {
523 return 1;
524 }
525
526 // isotopes:
527 ivi = dp_atoms[i].atom->getIsotope();
528 ivj = dp_atoms[j].atom->getIsotope();
529 if (ivi < ivj) {
530 return -1;
531 } else if (ivi > ivj) {
532 return 1;
533 }
534
535 // atom stereochem:
536 ivi = 0;
537 ivj = 0;
538 std::string cipCode;
539 if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
540 cipCode)) {
541 ivi = cipCode == "R" ? 2 : 1;
542 }
543 if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
544 cipCode)) {
545 ivj = cipCode == "R" ? 2 : 1;
546 }
547 if (ivi < ivj) {
548 return -1;
549 } else if (ivi > ivj) {
550 return 1;
551 }
552
553 // bond stereo is taken care of in the neighborhood comparison
554 return 0;
555 }
556
557 public:
558 Canon::canon_atom *dp_atoms{nullptr};
559 const ROMol *dp_mol{nullptr};
560 bool df_useNbrs{false};
563 : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
564 int operator()(int i, int j) const {
565 PRECONDITION(dp_atoms, "no atoms");
566 PRECONDITION(dp_mol, "no molecule");
567 PRECONDITION(i != j, "bad call");
568 int v = basecomp(i, j);
569 if (v) {
570 return v;
571 }
572
573 if (df_useNbrs) {
574 getAtomNeighborhood(dp_atoms[i].bonds);
575 getAtomNeighborhood(dp_atoms[j].bonds);
576
577 // we do two passes through the neighbor lists. The first just uses the
578 // atomic numbers (by passing the optional 10000 to bondholder::compare),
579 // the second takes the already-computed index into account
580 for (unsigned int ii = 0;
581 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
582 ++ii) {
583 int cmp = bondholder::compare(
584 dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
585 if (cmp) {
586 return cmp;
587 }
588 }
589 for (unsigned int ii = 0;
590 ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
591 ++ii) {
592 int cmp =
593 bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
594 if (cmp) {
595 return cmp;
596 }
597 }
598 if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
599 return -1;
600 } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
601 return 1;
602 }
603 }
604 return 0;
605 }
606};
607
608/*
609 * Basic canonicalization function to organize the partitions which will be
610 * sorted next.
611 * */
612
613template <typename CompareFunc>
614void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
615 int mode, int *order, int *count, int &activeset,
616 int *next, int *changed, char *touchedPartitions) {
617 unsigned int nAtoms = mol.getNumAtoms();
618 int partition;
619 int symclass = 0;
620 int *start;
621 int offset;
622 int index;
623 int len;
624 int i;
625 // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
626
627 // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
628 while (activeset != -1) {
629 // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
630 // std::cerr<<" next: ";
631 // for(unsigned int ii=0;ii<nAtoms;++ii){
632 // std::cerr<<ii<<":"<<next[ii]<<" ";
633 // }
634 // std::cerr<<std::endl;
635 // for(unsigned int ii=0;ii<nAtoms;++ii){
636 // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
637 // "<<atoms[order[ii]].index<<std::endl;
638 // }
639
640 partition = activeset;
641 activeset = next[partition];
642 next[partition] = -2;
643
644 len = count[partition];
645 offset = atoms[partition].index;
646 start = order + offset;
647 // std::cerr<<"\n\n**************************************************************"<<std::endl;
648 // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
649 // for(unsigned int ii=0;ii<len;++ii){
650 // std::cerr<<" "<<order[offset+ii]+1;
651 // }
652 // std::cerr<<std::endl;
653 // for(unsigned int ii=0;ii<nAtoms;++ii){
654 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
655 // "<<atoms[order[ii]].index<<std::endl;
656 // }
657 hanoisort(start, len, count, changed, compar);
658 // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
659 // std::cerr<<" result:";
660 // for(unsigned int ii=0;ii<nAtoms;++ii){
661 // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
662 // "<<atoms[order[ii]].index<<std::endl;
663 // }
664 for (int k = 0; k < len; ++k) {
665 changed[start[k]] = 0;
666 }
667
668 index = start[0];
669 // std::cerr<<" len:"<<len<<" index:"<<index<<"
670 // count:"<<count[index]<<std::endl;
671 for (i = count[index]; i < len; i++) {
672 index = start[i];
673 if (count[index]) {
674 symclass = offset + i;
675 }
676 atoms[index].index = symclass;
677 // std::cerr<<" "<<index+1<<"("<<symclass<<")";
678 // if(mode && (activeset<0 || count[index]>count[activeset]) ){
679 // activeset=index;
680 //}
681 for (unsigned j = 0; j < atoms[index].degree; ++j) {
682 changed[atoms[index].nbrIds[j]] = 1;
683 }
684 }
685 // std::cerr<<std::endl;
686
687 if (mode) {
688 index = start[0];
689 for (i = count[index]; i < len; i++) {
690 index = start[i];
691 for (unsigned j = 0; j < atoms[index].degree; ++j) {
692 unsigned int nbor = atoms[index].nbrIds[j];
693 touchedPartitions[atoms[nbor].index] = 1;
694 }
695 }
696 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
697 if (touchedPartitions[ii]) {
698 partition = order[ii];
699 if ((count[partition] > 1) && (next[partition] == -2)) {
700 next[partition] = activeset;
701 activeset = partition;
702 }
703 touchedPartitions[ii] = 0;
704 }
705 }
706 }
707 }
708} // end of RefinePartitions()
709
710template <typename CompareFunc>
711void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
712 int mode, int *order, int *count, int &activeset, int *next,
713 int *changed, char *touchedPartitions) {
714 unsigned int nAtoms = mol.getNumAtoms();
715 int partition;
716 int offset;
717 int index;
718 int len;
719 int oldPart = 0;
720
721 for (unsigned int i = 0; i < nAtoms; i++) {
722 partition = order[i];
723 oldPart = atoms[partition].index;
724 while (count[partition] > 1) {
725 len = count[partition];
726 offset = atoms[partition].index + len - 1;
727 index = order[offset];
728 atoms[index].index = offset;
729 count[partition] = len - 1;
730 count[index] = 1;
731
732 // test for ions, water molecules with no
733 if (atoms[index].degree < 1) {
734 continue;
735 }
736 for (unsigned j = 0; j < atoms[index].degree; ++j) {
737 unsigned int nbor = atoms[index].nbrIds[j];
738 touchedPartitions[atoms[nbor].index] = 1;
739 changed[nbor] = 1;
740 }
741
742 for (unsigned int ii = 0; ii < nAtoms; ++ii) {
743 if (touchedPartitions[ii]) {
744 int npart = order[ii];
745 if ((count[npart] > 1) && (next[npart] == -2)) {
746 next[npart] = activeset;
747 activeset = npart;
748 }
749 touchedPartitions[ii] = 0;
750 }
751 }
752 RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
753 changed, touchedPartitions);
754 }
755 // not sure if this works each time
756 if (atoms[partition].index != oldPart) {
757 i -= 1;
758 }
759 }
760} // end of BreakTies()
761
763 int *order, int *count,
764 canon_atom *atoms);
765
766RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
767 int *count, int &activeset,
768 int *next, int *changed);
769
771 std::vector<unsigned int> &res,
772 bool breakTies = true,
773 bool includeChirality = true,
774 bool includeIsotopes = true);
775
777 const ROMol &mol, std::vector<unsigned int> &res,
778 const boost::dynamic_bitset<> &atomsInPlay,
779 const boost::dynamic_bitset<> &bondsInPlay,
780 const std::vector<std::string> *atomSymbols,
781 const std::vector<std::string> *bondSymbols, bool breakTies,
782 bool includeChirality, bool includeIsotope);
783
785 const ROMol &mol, std::vector<unsigned int> &res,
786 const boost::dynamic_bitset<> &atomsInPlay,
787 const boost::dynamic_bitset<> &bondsInPlay,
788 const std::vector<std::string> *atomSymbols = nullptr,
789 bool breakTies = true, bool includeChirality = true,
790 bool includeIsotopes = true) {
791 rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
792 breakTies, includeChirality, includeIsotopes);
793};
794
796 std::vector<unsigned int> &res);
797
799 std::vector<Canon::canon_atom> &atoms,
800 bool includeChirality = true);
801
802namespace detail {
804 std::vector<Canon::canon_atom> &atoms,
805 bool includeChirality,
806 const std::vector<std::string> *atomSymbols,
807 const std::vector<std::string> *bondSymbols,
808 const boost::dynamic_bitset<> &atomsInPlay,
809 const boost::dynamic_bitset<> &bondsInPlay,
810 bool needsInit);
811void freeCanonAtoms(std::vector<Canon::canon_atom> &atoms);
812template <typename T>
813void rankWithFunctor(T &ftor, bool breakTies, int *order,
814 bool useSpecial = false, bool useChirality = false,
815 const boost::dynamic_bitset<> *atomsInPlay = nullptr,
816 const boost::dynamic_bitset<> *bondsInPlay = nullptr);
817
818} // namespace detail
819
820} // namespace Canon
821} // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing atoms.
Definition: Atom.h:68
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:126
BondType
the type of Bond
Definition: Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:434
int operator()(int i, int j) const
Definition: new_canon.h:445
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:562
int operator()(int i, int j) const
Definition: new_canon.h:564
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:145
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:202
std::vector< bondholder > bonds
Definition: new_canon.h:112
std::vector< int > revistedNeighbors
Definition: new_canon.h:111
std::vector< int > neighborNum
Definition: new_canon.h:110
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:413
CXXAtomIterator< const MolGraph, Atom *const, MolGraph::adjacency_iterator > atomNeighbors(Atom const *at) const
Definition: ROMol.h:282
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:225
void rankWithFunctor(T &ftor, bool breakTies, int *order, bool useSpecial=false, bool useChirality=false, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
void initFragmentCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, bool needsInit)
void freeCanonAtoms(std::vector< Canon::canon_atom > &atoms)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:487
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int > > &result)
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:711
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:614
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:19
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:151
unsigned int countSwapsToInterconvert(const T &ref, T probe)
Definition: utils.h:54
const std::string * p_symbol
Definition: new_canon.h:40
Bond::BondType bondType
Definition: new_canon.h:33
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:63
bool operator<(const bondholder &o) const
Definition: new_canon.h:62
int compareStereo(const bondholder &o) const
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:52
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc, unsigned int bidx)
Definition: new_canon.h:45
unsigned int bondStereo
Definition: new_canon.h:34
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:67
unsigned int nbrSymClass
Definition: new_canon.h:36