RDKit
Open-source cheminformatics and machine learning.
SubstructMatch.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SUBSTRUCTMATCH_H
12#define RD_SUBSTRUCTMATCH_H
13
14// std bits
15#include <vector>
16#include <functional>
17#include <unordered_map>
18#include <cstdint>
20#include <string>
21
22namespace RDKit {
23class ROMol;
24class Atom;
25class Bond;
26class ResonanceMolSupplier;
27class MolBundle;
28
29//! \brief used to return matches from substructure searching,
30//! The format is (queryAtomIdx, molAtomIdx)
31typedef std::vector<std::pair<int, int>> MatchVectType;
32
34 bool useChirality = false; //!< Use chirality in determining whether or not
35 //!< atoms/bonds match
36 bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
37 //!< determining whether atoms/bonds match
38 bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
39 //!< match each other
40 bool useQueryQueryMatches = false; //!< Consider query-query matches, not
41 //!< just simple matches
42 bool useGenericMatchers = false; //!< Looks for generic atoms in the query
43 //!< and uses them as part of the matching
44 bool recursionPossible = true; //!< Allow recursive queries
45 bool uniquify = true; //!< uniquify (by atom index) match results
46 unsigned int maxMatches = 1000; //!< maximum number of matches to return
47 int numThreads = 1; //!< number of threads to use when multi-threading
48 //!< is possible. 0 selects the number of
49 //!< concurrent threads supported by the hardware
50 //!< negative values are added to the number of
51 //!< concurrent threads supported by the hardware
52 std::function<bool(const ROMol &mol,
53 const std::vector<unsigned int> &match)>
54 extraFinalCheck; //!< a function to be called at the end to validate a
55 //!< match
56
58};
59
61 SubstructMatchParameters &params, const std::string &json);
62
63//! Find a substructure match for a query in a molecule
64/*!
65 \param mol The ROMol to be searched
66 \param query The query ROMol
67 \param matchParams Parameters controlling the matching
68
69 \return The matches, if any
70
71*/
73 const ROMol &mol, const ROMol &query,
75
76//! Find all substructure matches for a query in a ResonanceMolSupplier object
77/*!
78 \param resMolSuppl The ResonanceMolSupplier object to be searched
79 \param query The query ROMol
80 \param matchParams Parameters controlling the matching
81
82 \return The matches, if any
83
84*/
86 ResonanceMolSupplier &resMolSuppl, const ROMol &query,
88
90 const MolBundle &bundle, const ROMol &query,
93 const ROMol &mol, const MolBundle &query,
96 const MolBundle &bundle, const MolBundle &query,
98
99//! Find a substructure match for a query
100/*!
101 \param mol The object to be searched
102 \param query The query
103 \param matchVect Used to return the match
104 (pre-existing contents will be deleted)
105 \param recursionPossible flags whether or not recursive matches are allowed
106 \param useChirality use atomic CIP codes as part of the comparison
107 \param useQueryQueryMatches if set, the contents of atom and bond queries
108 will be used as part of the matching
109
110 \return whether or not a match was found
111
112*/
113template <typename T1, typename T2>
114bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
115 bool recursionPossible = true, bool useChirality = false,
116 bool useQueryQueryMatches = false) {
118 params.recursionPossible = recursionPossible;
119 params.useChirality = useChirality;
120 params.useQueryQueryMatches = useQueryQueryMatches;
121 params.maxMatches = 1;
122 std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
123 if (matchVects.size()) {
124 matchVect = matchVects.front();
125 } else {
126 matchVect.clear();
127 }
128 return matchVect.size() != 0;
129};
130
131//! Find all substructure matches for a query
132/*!
133 \param mol The object to be searched
134 \param query The query
135 \param matchVect Used to return the matches
136 (pre-existing contents will be deleted)
137 \param uniquify Toggles uniquification (by atom index) of the results
138 \param recursionPossible flags whether or not recursive matches are allowed
139 \param useChirality use atomic CIP codes as part of the comparison
140 \param useQueryQueryMatches if set, the contents of atom and bond queries
141 will be used as part of the matching
142 \param maxMatches The maximum number of matches that will be returned.
143 In high-symmetry cases with medium-sized molecules, it is
144 very
145 easy to end up with a combinatorial explosion in the
146 number of
147 possible matches. This argument prevents that from having
148 unintended consequences
149
150 \return the number of matches found
151
152*/
153template <typename T1, typename T2>
154unsigned int SubstructMatch(T1 &mol, const T2 &query,
155 std::vector<MatchVectType> &matchVect,
156 bool uniquify = true, bool recursionPossible = true,
157 bool useChirality = false,
158 bool useQueryQueryMatches = false,
159 unsigned int maxMatches = 1000,
160 int numThreads = 1) {
162 params.uniquify = uniquify;
163 params.recursionPossible = recursionPossible;
164 params.useChirality = useChirality;
165 params.useQueryQueryMatches = useQueryQueryMatches;
166 params.maxMatches = maxMatches;
167 params.numThreads = numThreads;
168 matchVect = SubstructMatch(mol, query, params);
169 return static_cast<unsigned int>(matchVect.size());
170};
171
172// ----------------------------------------------
173//
174// find one match in ResonanceMolSupplier object
175//
176template <>
177inline bool SubstructMatch(ResonanceMolSupplier &resMolSupplier,
178 const ROMol &query, MatchVectType &matchVect,
179 bool recursionPossible, bool useChirality,
180 bool useQueryQueryMatches) {
182 params.recursionPossible = recursionPossible;
183 params.useChirality = useChirality;
184 params.useQueryQueryMatches = useQueryQueryMatches;
185 params.maxMatches = 1;
186 std::vector<MatchVectType> matchVects =
187 SubstructMatch(resMolSupplier, query, params);
188 if (matchVects.size()) {
189 matchVect = matchVects.front();
190 } else {
191 matchVect.clear();
192 }
193 return matchVect.size() != 0;
194}
195
196template <>
197inline unsigned int SubstructMatch(ResonanceMolSupplier &resMolSupplier,
198 const ROMol &query,
199 std::vector<MatchVectType> &matchVect,
200 bool uniquify, bool recursionPossible,
201 bool useChirality, bool useQueryQueryMatches,
202 unsigned int maxMatches, int numThreads) {
204 params.uniquify = uniquify;
205 params.recursionPossible = recursionPossible;
206 params.useChirality = useChirality;
207 params.useQueryQueryMatches = useQueryQueryMatches;
208 params.maxMatches = maxMatches;
209 params.numThreads = numThreads;
210 matchVect = SubstructMatch(resMolSupplier, query, params);
211 return static_cast<unsigned int>(matchVect.size());
212};
213
214//! Class used as a final step to confirm whether or not a given atom->atom
215//! mapping is a valid substructure match.
217 public:
218 MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
219 const SubstructMatchParameters &ps);
220
221 bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const;
222
223 private:
224 const ROMol &d_query;
225 const ROMol &d_mol;
226 const SubstructMatchParameters &d_params;
227 std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
228};
229
230} // namespace RDKit
231
232#endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition: MolBundle.h:39
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition: export.h:489
Std stuff.
Definition: Abbreviations.h:19
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck