![]() |
RDKit
Open-source cheminformatics and machine learning.
|
Substructure Search a library of molecules. More...
#include <SubstructLibrary.h>
Public Member Functions | |
SubstructLibrary () | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules) | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints) | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< KeyHolderBase > keys) | |
SubstructLibrary (boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints, boost::shared_ptr< KeyHolderBase > keys) | |
SubstructLibrary (const std::string &pickle) | |
boost::shared_ptr< MolHolderBase > & | getMolHolder () |
Get the underlying molecule holder implementation. More... | |
const boost::shared_ptr< MolHolderBase > & | getMolHolder () const |
boost::shared_ptr< FPHolderBase > & | getFpHolder () |
Get the underlying molecule holder implementation. More... | |
const boost::shared_ptr< FPHolderBase > & | getFpHolder () const |
Get the underlying molecule holder implementation. More... | |
boost::shared_ptr< KeyHolderBase > & | getKeyHolder () |
Get the underlying molecule holder implementation. More... | |
const boost::shared_ptr< KeyHolderBase > & | getKeyHolder () const |
Get the underlying molecule holder implementation. More... | |
const MolHolderBase & | getMolecules () const |
FPHolderBase & | getFingerprints () |
Get the underlying fingerprint implementation. More... | |
const FPHolderBase & | getFingerprints () const |
KeyHolderBase & | getKeys () |
Get the underlying key holder implementation. More... | |
const KeyHolderBase & | getKeys () const |
Get the underlying key holder implementation. More... | |
unsigned int | addMol (const ROMol &mol) |
Add a molecule to the library. More... | |
template<class Query > | |
std::vector< unsigned int > | getMatches (const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const |
Get the matching indices for the query. More... | |
template<class Query > | |
std::vector< unsigned int > | getMatches (const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload More... | |
template<class Query > | |
std::vector< unsigned int > | getMatches (const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const |
Get the matching indices for the query between the given indices. More... | |
std::vector< unsigned int > | getMatches (const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload More... | |
std::vector< unsigned int > | getMatches (const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload More... | |
std::vector< unsigned int > | getMatches (const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const |
overload More... | |
template<class Query > | |
unsigned int | countMatches (const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
Return the number of matches for the query. More... | |
template<class Query > | |
unsigned int | countMatches (const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
template<class Query > | |
unsigned int | countMatches (const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
Return the number of matches for the query. More... | |
unsigned int | countMatches (const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
unsigned int | countMatches (const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
unsigned int | countMatches (const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
template<class Query > | |
bool | hasMatch (const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
Returns true if any match exists for the query. More... | |
template<class Query > | |
bool | hasMatch (const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
template<class Query > | |
bool | hasMatch (const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const |
bool | hasMatch (const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
bool | hasMatch (const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
bool | hasMatch (const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const |
overload More... | |
boost::shared_ptr< ROMol > | getMol (unsigned int idx) const |
Returns the molecule at the given index. More... | |
boost::shared_ptr< ROMol > | operator[] (unsigned int idx) |
Returns the molecule at the given index. More... | |
unsigned int | size () const |
return the number of molecules in the library More... | |
void | setSearchOrder (const std::vector< unsigned int > &order) |
does error checking More... | |
const std::vector< unsigned int > & | getSearchOrder () const |
std::vector< unsigned int > & | getSearchOrder () |
void | resetHolders () |
access required for serialization More... | |
void | toStream (std::ostream &ss) const |
serializes (pickles) to a stream More... | |
std::string | Serialize () const |
returns a string with a serialized (pickled) representation More... | |
void | initFromStream (std::istream &ss) |
initializes from a stream pickle More... | |
void | initFromString (const std::string &text) |
initializes from a string pickle More... | |
Substructure Search a library of molecules.
This class allows for multithreaded substructure searches of large datasets.
The implementations can use fingerprints to speed up searches and have molecules cached as binary forms to reduce memory usage.
basic usage:
Using different mol holders and pattern fingerprints.
Cached molecule holders create molecules on demand. There are currently three styles of cached molecules.
CachedMolHolder: stores molecules in the rdkit binary format. CachedSmilesMolHolder: stores molecules in smiles format. CachedTrustedSmilesMolHolder: stores molecules in smiles format.
The CachedTrustedSmilesMolHolder is made to add molecules from a trusted source. This makes the basic assumption that RDKit was used to sanitize and canonicalize the smiles string. In practice this is considerably faster than using arbitrary smiles strings since certain assumptions can be made. Molecules generated from trusted smiles do not have ring information (although this is created in the molecule being searched if necessary).
When loading from external data, as opposed to using the "addMol" API, care must be taken to ensure that the pattern fingerprints and smiles are synchronized.
Each pattern holder has an API point for making its fingerprint. This is useful to ensure that the pattern stored in the database will be compatible with the patterns made when analyzing queries.
Finally, using the KeyFromPropHolder will store user ids or keys. By default, it uses RDKit's default _Name prop, but can be changed to any property.
Definition at line 516 of file SubstructLibrary.h.
|
inline |
Definition at line 527 of file SubstructLibrary.h.
|
inline |
Definition at line 533 of file SubstructLibrary.h.
|
inline |
Definition at line 540 of file SubstructLibrary.h.
|
inline |
Definition at line 553 of file SubstructLibrary.h.
|
inline |
Definition at line 566 of file SubstructLibrary.h.
|
inline |
Definition at line 580 of file SubstructLibrary.h.
References RDKit::EnumerationStrategyPickler::pickle().
unsigned int RDKit::SubstructLibrary::addMol | ( | const ROMol & | mol | ) |
Add a molecule to the library.
mol | Molecule to add |
returns index for the molecule in the library
unsigned int RDKit::SubstructLibrary::countMatches | ( | const MolBundle & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
|
inline |
Return the number of matches for the query.
query | Molecule or Tautomer Query to match against molecules |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 760 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
|
inline |
overload
Definition at line 772 of file SubstructLibrary.h.
|
inline |
Return the number of matches for the query.
Return the number of matches for the query between the given indices
query | Query to match against molecules |
startIdx | Start index of the search |
endIdx | Ending idx (non-inclusive) of the search. |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 795 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
unsigned int RDKit::SubstructLibrary::countMatches | ( | const ROMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
unsigned int RDKit::SubstructLibrary::countMatches | ( | const TautomerQuery & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
|
inline |
Get the underlying fingerprint implementation.
Throws a value error if no fingerprints have been set
Definition at line 622 of file SubstructLibrary.h.
|
inline |
Definition at line 629 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 600 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 603 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 608 of file SubstructLibrary.h.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 611 of file SubstructLibrary.h.
|
inline |
Get the underlying key holder implementation.
Throws a value error if no keyholder have been set
Definition at line 638 of file SubstructLibrary.h.
|
inline |
Get the underlying key holder implementation.
Throws a value error if no keyholder have been set
Definition at line 647 of file SubstructLibrary.h.
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const MolBundle & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
|
inline |
Get the matching indices for the query.
query | Query or Tautomer Query to match against molecules |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
maxResults | Maximum results to return, -1 means return all [default -1] |
Definition at line 677 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
|
inline |
overload
Definition at line 691 of file SubstructLibrary.h.
|
inline |
Get the matching indices for the query between the given indices.
query | Query to match against molecules |
startIdx | Start index of the search |
endIdx | Ending idx (non-inclusive) of the search. |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
maxResults | Maximum results to return, -1 means return all [default -1] |
Definition at line 714 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const ROMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
std::vector< unsigned int > RDKit::SubstructLibrary::getMatches | ( | const TautomerQuery & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 , |
||
int | maxResults = -1 |
||
) | const |
overload
|
inline |
Returns the molecule at the given index.
idx | Index of the molecule in the library (n.b. could contain null) |
Definition at line 891 of file SubstructLibrary.h.
References RDKit::MolHolderBase::getMol(), and PRECONDITION.
|
inline |
Definition at line 615 of file SubstructLibrary.h.
References PRECONDITION.
|
inline |
Get the underlying molecule holder implementation.
Definition at line 593 of file SubstructLibrary.h.
|
inline |
Definition at line 595 of file SubstructLibrary.h.
|
inline |
Definition at line 928 of file SubstructLibrary.h.
|
inline |
Definition at line 924 of file SubstructLibrary.h.
bool RDKit::SubstructLibrary::hasMatch | ( | const MolBundle & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
|
inline |
Returns true if any match exists for the query.
query | Molecule or Tautomer Query to match against molecules |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 836 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
|
inline |
overload
Definition at line 847 of file SubstructLibrary.h.
|
inline |
Returns true if any match exists for the query between the specified indices
query | Query to match against molecules |
startIdx | Start index of the search |
endIdx | Ending idx (inclusive) of the search. |
recursionPossible | flags whether or not recursive matches are allowed [default true] |
useChirality | use atomic CIP codes as part of the comparison [default true] |
useQueryQueryMatches | if set, the contents of atom and bond queries will be used as part of the matching [default false] |
numThreads | If -1 use all available processors [default -1] |
Definition at line 865 of file SubstructLibrary.h.
References RDKit::SubstructMatchParameters::recursionPossible, RDKit::SubstructMatchParameters::useChirality, and RDKit::SubstructMatchParameters::useQueryQueryMatches.
bool RDKit::SubstructLibrary::hasMatch | ( | const ROMol & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
bool RDKit::SubstructLibrary::hasMatch | ( | const TautomerQuery & | query, |
unsigned int | startIdx, | ||
unsigned int | endIdx, | ||
const SubstructMatchParameters & | params, | ||
int | numThreads = -1 |
||
) | const |
overload
void RDKit::SubstructLibrary::initFromStream | ( | std::istream & | ss | ) |
initializes from a stream pickle
void RDKit::SubstructLibrary::initFromString | ( | const std::string & | text | ) |
initializes from a string pickle
|
inline |
Returns the molecule at the given index.
idx | Index of the molecule in the library (n.b. could contain null) |
Definition at line 902 of file SubstructLibrary.h.
References RDKit::MolHolderBase::getMol(), and PRECONDITION.
|
inline |
access required for serialization
Definition at line 930 of file SubstructLibrary.h.
std::string RDKit::SubstructLibrary::Serialize | ( | ) | const |
returns a string with a serialized (pickled) representation
|
inline |
does error checking
Definition at line 915 of file SubstructLibrary.h.
References RDKit::MolHolderBase::size().
|
inline |
return the number of molecules in the library
Definition at line 909 of file SubstructLibrary.h.
References PRECONDITION.
void RDKit::SubstructLibrary::toStream | ( | std::ostream & | ss | ) | const |
serializes (pickles) to a stream