Package rdkit :: Package Chem :: Package Pharm2D :: Module LazyGenerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.LazyGenerator

  1  # 
  2  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ lazy generator of 2D pharmacophore signature data 
 11   
 12  """ 
 13  from __future__ import print_function 
 14   
 15  from rdkit.Chem.Pharm2D import SigFactory, Matcher 
 16   
 17  raise NotImplementedError('not finished yet') 
 18   
 19   
20 -class Generator(object):
21 """ 22 23 Important attributes: 24 25 - mol: the molecules whose signature is being worked with 26 27 - sigFactory : the SigFactory object with signature parameters 28 NOTE: no preprocessing is carried out for _sigFactory_. 29 It *must* be pre-initialized. 30 31 **Notes** 32 33 - 34 """ 35
36 - def __init__(self, sigFactory, mol, dMat=None, bitCache=True):
37 """ constructor 38 39 **Arguments** 40 41 - sigFactory: a signature factory, see class docs 42 43 - mol: a molecule, see class docs 44 45 - dMat: (optional) a distance matrix for the molecule. If this 46 is not provided, one will be calculated 47 48 - bitCache: (optional) if nonzero, a local cache of which bits 49 have been queried will be maintained. Otherwise things must 50 be recalculate each time a bit is queried. 51 52 """ 53 if not isinstance(sigFactory, SigFactory.SigFactory): 54 raise ValueError('bad factory') 55 56 self.sigFactory = sigFactory 57 self.mol = mol 58 59 if dMat is None: 60 useBO = sigFactory.includeBondOrder 61 dMat = Chem.GetDistanceMatrix(mol, useBO) 62 63 self.dMat = dMat 64 65 if bitCache: 66 self.bits = {} 67 else: 68 self.bits = None 69 70 featFamilies = [fam for fam in sigFactory.featFactory.GetFeatureFamilies() 71 if fam not in sigFactory.skipFeats] 72 nFeats = len(featFamilies) 73 featMatches = {} 74 for fam in featFamilies: 75 featMatches[fam] = [] 76 feats = sigFactory.featFactory.GetFeaturesForMol(mol) 77 for feat in feats: 78 if feat.GetFamily() not in sigFactory.skipFeats: 79 featMatches[feat.GetFamily()].append(feat.GetAtomIds()) 80 featMatches = [None] * nFeats 81 for i in range(nFeats): 82 featMatches[i] = sigFactory.featFactory.GetMolFeature() 83 self.pattMatches = pattMatches
84
85 - def GetBit(self, idx):
86 """ returns a bool indicating whether or not the bit is set 87 88 """ 89 if idx < 0 or idx >= self.sig.GetSize(): 90 raise IndexError('Index %d invalid' % (idx)) 91 if self.bits is not None and idx in self.bits: 92 return self.bits[idx] 93 94 tmp = Matcher.GetAtomsMatchingBit(self.sig, idx, self.mol, dMat=self.dMat, justOne=1, 95 matchingAtoms=self.pattMatches) 96 if not tmp or len(tmp) == 0: 97 res = 0 98 else: 99 res = 1 100 101 if self.bits is not None: 102 self.bits[idx] = res 103 return res
104
105 - def __len__(self):
106 """ allows class to support len() 107 108 """ 109 return self.sig.GetSize()
110
111 - def __getitem__(self, itm):
112 """ allows class to support random access. 113 Calls self.GetBit() 114 115 """ 116 return self.GetBit(itm)
117 118 119 if __name__ == '__main__': 120 import time 121 from rdkit import RDConfig, Chem 122 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate 123 import random 124 125 factory = Gobbi_Pharm2D.factory 126 nToDo = 100 127 inD = open(RDConfig.RDDataDir + "/NCI/first_5K.smi", 'r').readlines()[:nToDo] 128 mols = [None] * len(inD) 129 for i in range(len(inD)): 130 smi = inD[i].split('\t')[0] 131 smi.strip() 132 mols[i] = Chem.MolFromSmiles(smi) 133 134 sig = factory.GetSignature() 135 136 nBits = 300 137 random.seed(23) 138 bits = [random.randint(0, sig.GetSize() - 1) for x in range(nBits)] 139 140 print('Using the Lazy Generator') 141 t1 = time.time() 142 for i in range(len(mols)): 143 if not i % 10: 144 print('done mol %d of %d' % (i, len(mols))) 145 gen = Generator(factory, mols[i]) 146 for bit in bits: 147 v = gen[bit] 148 t2 = time.time() 149 print('\tthat took %4.2f seconds' % (t2 - t1)) 150 151 print('Generating and checking signatures') 152 t1 = time.time() 153 for i in range(len(mols)): 154 if not i % 10: 155 print('done mol %d of %d' % (i, len(mols))) 156 sig = Generate.Gen2DFingerprint(mols[i], factory) 157 for bit in bits: 158 v = sig[bit] 159 t2 = time.time() 160 print('\tthat took %4.2f seconds' % (t2 - t1)) 161