1
2
3
4
5
6
7
8
9
10 """ lazy generator of 2D pharmacophore signature data
11
12 """
13 from __future__ import print_function
14
15 from rdkit.Chem.Pharm2D import SigFactory, Matcher
16
17 raise NotImplementedError('not finished yet')
18
19
21 """
22
23 Important attributes:
24
25 - mol: the molecules whose signature is being worked with
26
27 - sigFactory : the SigFactory object with signature parameters
28 NOTE: no preprocessing is carried out for _sigFactory_.
29 It *must* be pre-initialized.
30
31 **Notes**
32
33 -
34 """
35
36 - def __init__(self, sigFactory, mol, dMat=None, bitCache=True):
37 """ constructor
38
39 **Arguments**
40
41 - sigFactory: a signature factory, see class docs
42
43 - mol: a molecule, see class docs
44
45 - dMat: (optional) a distance matrix for the molecule. If this
46 is not provided, one will be calculated
47
48 - bitCache: (optional) if nonzero, a local cache of which bits
49 have been queried will be maintained. Otherwise things must
50 be recalculate each time a bit is queried.
51
52 """
53 if not isinstance(sigFactory, SigFactory.SigFactory):
54 raise ValueError('bad factory')
55
56 self.sigFactory = sigFactory
57 self.mol = mol
58
59 if dMat is None:
60 useBO = sigFactory.includeBondOrder
61 dMat = Chem.GetDistanceMatrix(mol, useBO)
62
63 self.dMat = dMat
64
65 if bitCache:
66 self.bits = {}
67 else:
68 self.bits = None
69
70 featFamilies = [fam for fam in sigFactory.featFactory.GetFeatureFamilies()
71 if fam not in sigFactory.skipFeats]
72 nFeats = len(featFamilies)
73 featMatches = {}
74 for fam in featFamilies:
75 featMatches[fam] = []
76 feats = sigFactory.featFactory.GetFeaturesForMol(mol)
77 for feat in feats:
78 if feat.GetFamily() not in sigFactory.skipFeats:
79 featMatches[feat.GetFamily()].append(feat.GetAtomIds())
80 featMatches = [None] * nFeats
81 for i in range(nFeats):
82 featMatches[i] = sigFactory.featFactory.GetMolFeature()
83 self.pattMatches = pattMatches
84
86 """ returns a bool indicating whether or not the bit is set
87
88 """
89 if idx < 0 or idx >= self.sig.GetSize():
90 raise IndexError('Index %d invalid' % (idx))
91 if self.bits is not None and idx in self.bits:
92 return self.bits[idx]
93
94 tmp = Matcher.GetAtomsMatchingBit(self.sig, idx, self.mol, dMat=self.dMat, justOne=1,
95 matchingAtoms=self.pattMatches)
96 if not tmp or len(tmp) == 0:
97 res = 0
98 else:
99 res = 1
100
101 if self.bits is not None:
102 self.bits[idx] = res
103 return res
104
106 """ allows class to support len()
107
108 """
109 return self.sig.GetSize()
110
112 """ allows class to support random access.
113 Calls self.GetBit()
114
115 """
116 return self.GetBit(itm)
117
118
119 if __name__ == '__main__':
120 import time
121 from rdkit import RDConfig, Chem
122 from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate
123 import random
124
125 factory = Gobbi_Pharm2D.factory
126 nToDo = 100
127 inD = open(RDConfig.RDDataDir + "/NCI/first_5K.smi", 'r').readlines()[:nToDo]
128 mols = [None] * len(inD)
129 for i in range(len(inD)):
130 smi = inD[i].split('\t')[0]
131 smi.strip()
132 mols[i] = Chem.MolFromSmiles(smi)
133
134 sig = factory.GetSignature()
135
136 nBits = 300
137 random.seed(23)
138 bits = [random.randint(0, sig.GetSize() - 1) for x in range(nBits)]
139
140 print('Using the Lazy Generator')
141 t1 = time.time()
142 for i in range(len(mols)):
143 if not i % 10:
144 print('done mol %d of %d' % (i, len(mols)))
145 gen = Generator(factory, mols[i])
146 for bit in bits:
147 v = gen[bit]
148 t2 = time.time()
149 print('\tthat took %4.2f seconds' % (t2 - t1))
150
151 print('Generating and checking signatures')
152 t1 = time.time()
153 for i in range(len(mols)):
154 if not i % 10:
155 print('done mol %d of %d' % (i, len(mols)))
156 sig = Generate.Gen2DFingerprint(mols[i], factory)
157 for bit in bits:
158 v = sig[bit]
159 t2 = time.time()
160 print('\tthat took %4.2f seconds' % (t2 - t1))
161