1
2
3
4
5
6
7
8
9
10 """ contains factory class for producing signatures
11
12
13 """
14 from __future__ import print_function, division
15
16 import copy
17
18 import numpy
19
20 from rdkit.Chem.Pharm2D import Utils
21 from rdkit.DataStructs import SparseBitVect, IntSparseIntVect, LongSparseIntVect
22
23 _verbose = False
24
25
27 """
28
29 SigFactory's are used by creating one, setting the relevant
30 parameters, then calling the GetSignature() method each time a
31 signature is required.
32
33 """
34
35 - def __init__(self, featFactory, useCounts=False, minPointCount=2, maxPointCount=3,
36 shortestPathsOnly=True, includeBondOrder=False, skipFeats=None,
37 trianglePruneBins=True):
38 self.featFactory = featFactory
39 self.useCounts = useCounts
40 self.minPointCount = minPointCount
41 self.maxPointCount = maxPointCount
42 self.shortestPathsOnly = shortestPathsOnly
43 self.includeBondOrder = includeBondOrder
44 self.trianglePruneBins = trianglePruneBins
45 if skipFeats is None:
46 self.skipFeats = []
47 else:
48 self.skipFeats = skipFeats
49 self._bins = None
50 self.sigKlass = None
51
53 """ bins should be a list of 2-tuples """
54 self._bins = copy.copy(bins)
55 self.Init()
56
59
61 return len(self._bins)
62
64 return self.sigKlass(self._sigSize)
65
67 nPts, combo, scaffold = self.GetBitInfo(bitIdx)
68 fams = self.GetFeatFamilies()
69 labels = [fams[x] for x in combo]
70 dMat = numpy.zeros((nPts, nPts), numpy.int)
71 dVect = Utils.nPointDistDict[nPts]
72 for idx in range(len(dVect)):
73 i, j = dVect[idx]
74 dMat[i, j] = scaffold[idx]
75 dMat[j, i] = scaffold[idx]
76
77 return nPts, combo, scaffold, labels, dMat
78
79 - def GetBitDescriptionAsText(self, bitIdx, includeBins=0, fullPage=1):
80 """ returns text with a description of the bit
81
82 **Arguments**
83
84 - bitIdx: an integer bit index
85
86 - includeBins: (optional) if nonzero, information about the bins will be
87 included as well
88
89 - fullPage: (optional) if nonzero, html headers and footers will
90 be included (so as to make the output a complete page)
91
92 **Returns**
93
94 a string with the HTML
95
96 """
97 raise NotImplementedError('Missing implementation')
98
100 """ returns a text description of the bit
101
102 **Arguments**
103
104 - bitIdx: an integer bit index
105
106 **Returns**
107
108 a string
109
110 """
111 nPts, combo, scaffold, labels, dMat = self._GetBitSummaryData(bitIdx)
112 res = " ".join(labels) + " "
113 for row in dMat:
114 res += "|" + " ".join([str(x) for x in row])
115 res += "|"
116 return res
117
119 """ OBSOLETE: this has been rewritten in C++
120 Internal use only
121 Returns the index of a bin defined by a set of distances.
122
123 **Arguments**
124
125 - dists: a sequence of distances (not binned)
126
127 - bins: a sorted sequence of distance bins (2-tuples)
128
129 - scaffolds: a list of possible scaffolds (bin combinations)
130
131 **Returns**
132
133 an integer bin index
134
135 **Note**
136
137 the value returned here is not an index in the overall
138 signature. It is, rather, an offset of a scaffold in the
139 possible combinations of distance bins for a given
140 proto-pharmacophore.
141
142 """
143 nDists = len(dists)
144 whichBins = [0] * nDists
145
146
147
148
149
150
151 for i in range(nDists):
152 dist = dists[i]
153 where = -1
154
155
156 startP, endP = 0, len(bins)
157 while startP < endP:
158 midP = (startP + endP) // 2
159 begBin, endBin = bins[midP]
160 if dist < begBin:
161 endP = midP
162 elif dist >= endBin:
163 startP = midP + 1
164 else:
165 where = midP
166 break
167 if where < 0:
168 return None
169 whichBins[i] = where
170 res = scaffolds.index(tuple(whichBins))
171 if _verbose:
172 print('----- _fBI -----------')
173 print(' scaffolds:', scaffolds)
174 print(' bins:', whichBins)
175 print(' res:', res)
176 return res
177
179 fams = [fam for fam in self.featFactory.GetFeatureFamilies() if fam not in self.skipFeats]
180 fams.sort()
181 return fams
182
184 featFamilies = self.GetFeatFamilies()
185 featMatches = {}
186 for fam in featFamilies:
187 featMatches[fam] = []
188 feats = self.featFactory.GetFeaturesForMol(mol, includeOnly=fam)
189 for feat in feats:
190 featMatches[fam].append(feat.GetAtomIds())
191 return [featMatches[x] for x in featFamilies]
192
193 - def GetBitIdx(self, featIndices, dists, sortIndices=True):
194 """ returns the index for a pharmacophore described using a set of
195 feature indices and distances
196
197 **Arguments***
198
199 - featIndices: a sequence of feature indices
200
201 - dists: a sequence of distance between the features, only the
202 unique distances should be included, and they should be in the
203 order defined in Utils.
204
205 - sortIndices : sort the indices
206
207 **Returns**
208
209 the integer bit index
210
211 """
212 nPoints = len(featIndices)
213 if nPoints > 3:
214 raise NotImplementedError('>3 points not supported')
215 if nPoints < self.minPointCount:
216 raise IndexError('bad number of points')
217 if nPoints > self.maxPointCount:
218 raise IndexError('bad number of points')
219
220
221 startIdx = self._starts[nPoints]
222
223
224
225
226 if sortIndices:
227 tmp = list(featIndices)
228 tmp.sort()
229 featIndices = tmp
230
231 if featIndices[0] < 0:
232 raise IndexError('bad feature index')
233 if max(featIndices) >= self._nFeats:
234 raise IndexError('bad feature index')
235
236 if nPoints == 3:
237 featIndices, dists = Utils.OrderTriangle(featIndices, dists)
238
239 offset = Utils.CountUpTo(self._nFeats, nPoints, featIndices)
240 if _verbose:
241 print('offset for feature %s: %d' % (str(featIndices), offset))
242 offset *= len(self._scaffolds[len(dists)])
243
244 try:
245 if _verbose:
246 print('>>>>>>>>>>>>>>>>>>>>>>>')
247 print('\tScaffolds:', repr(self._scaffolds[len(dists)]), type(self._scaffolds[len(dists)]))
248 print('\tDists:', repr(dists), type(dists))
249 print('\tbins:', repr(self._bins), type(self._bins))
250 bin_ = self._findBinIdx(dists, self._bins, self._scaffolds[len(dists)])
251 except ValueError:
252 fams = self.GetFeatFamilies()
253 fams = [fams[x] for x in featIndices]
254 raise IndexError('distance bin not found: feats: %s; dists=%s; bins=%s; scaffolds: %s' %
255 (fams, dists, self._bins, self._scaffolds))
256
257 return startIdx + offset + bin_
258
260 """ returns information about the given bit
261
262 **Arguments**
263
264 - idx: the bit index to be considered
265
266 **Returns**
267
268 a 3-tuple:
269
270 1) the number of points in the pharmacophore
271
272 2) the proto-pharmacophore (tuple of pattern indices)
273
274 3) the scaffold (tuple of distance indices)
275
276 """
277 if idx >= self._sigSize:
278 raise IndexError('bad index (%d) queried. %d is the max' % (idx, self._sigSize))
279
280 nPts = self.minPointCount
281 while nPts < self.maxPointCount and self._starts[nPts + 1] <= idx:
282 nPts += 1
283
284
285 offsetFromStart = idx - self._starts[nPts]
286 if _verbose:
287 print('\t %d Points, %d offset' % (nPts, offsetFromStart))
288
289
290 nDists = len(Utils.nPointDistDict[nPts])
291 scaffolds = self._scaffolds[nDists]
292
293 nScaffolds = len(scaffolds)
294
295
296 protoIdx = offsetFromStart // nScaffolds
297 indexCombos = Utils.GetIndexCombinations(self._nFeats, nPts)
298 combo = tuple(indexCombos[protoIdx])
299 if _verbose:
300 print('\t combo: %s' % (str(combo)))
301
302
303 scaffoldIdx = offsetFromStart % nScaffolds
304 scaffold = scaffolds[scaffoldIdx]
305 if _verbose:
306 print('\t scaffold: %s' % (str(scaffold)))
307 return nPts, combo, scaffold
308
310 """ Initializes internal parameters. This **must** be called after
311 making any changes to the signature parameters
312
313 """
314 accum = 0
315 self._scaffolds = [0] * (len(Utils.nPointDistDict[self.maxPointCount + 1]))
316 self._starts = {}
317 if not self.skipFeats:
318 self._nFeats = len(self.featFactory.GetFeatureFamilies())
319 else:
320 self._nFeats = 0
321 for fam in self.featFactory.GetFeatureFamilies():
322 if fam not in self.skipFeats:
323 self._nFeats += 1
324 for i in range(self.minPointCount, self.maxPointCount + 1):
325 self._starts[i] = accum
326 nDistsHere = len(Utils.nPointDistDict[i])
327 scaffoldsHere = Utils.GetPossibleScaffolds(i, self._bins,
328 useTriangleInequality=self.trianglePruneBins)
329 nBitsHere = len(scaffoldsHere)
330 self._scaffolds[nDistsHere] = scaffoldsHere
331 pointsHere = Utils.NumCombinations(self._nFeats, i) * nBitsHere
332 accum += pointsHere
333 self._sigSize = accum
334 if not self.useCounts:
335 self.sigKlass = SparseBitVect
336 elif self._sigSize < 2**31:
337 self.sigKlass = IntSparseIntVect
338 else:
339 self.sigKlass = LongSparseIntVect
340
343
344
345 try:
346 from rdkit.Chem.Pharmacophores import cUtils
347 except ImportError:
348 pass
349 else:
350 SigFactory._findBinIdx = cUtils.FindBinIdx
351