1
2
3
4
5
6
7
8
9
10
11 from __future__ import print_function
12 import copy
13 import struct
14 from rdkit.six import iterkeys
15 from rdkit import six
16 from rdkit import DataStructs
17
18
20 """
21
22 >>> vc = VectCollection()
23 >>> bv1 = DataStructs.ExplicitBitVect(10)
24 >>> bv1.SetBitsFromList((1,3,5))
25 >>> vc.AddVect(1,bv1)
26 >>> bv1 = DataStructs.ExplicitBitVect(10)
27 >>> bv1.SetBitsFromList((6,8))
28 >>> vc.AddVect(2,bv1)
29 >>> len(vc)
30 10
31 >>> vc.GetNumBits()
32 10
33 >>> vc[0]
34 0
35 >>> vc[1]
36 1
37 >>> vc[9]
38 0
39 >>> vc[6]
40 1
41 >>> vc.GetBit(6)
42 1
43 >>> list(vc.GetOnBits())
44 [1, 3, 5, 6, 8]
45
46 keys must be unique, so adding a duplicate replaces the
47 previous values:
48 >>> bv1 = DataStructs.ExplicitBitVect(10)
49 >>> bv1.SetBitsFromList((7,9))
50 >>> vc.AddVect(1,bv1)
51 >>> len(vc)
52 10
53 >>> vc[1]
54 0
55 >>> vc[9]
56 1
57 >>> vc[6]
58 1
59
60 we can also query the children:
61 >>> vc.NumChildren()
62 2
63 >>> cs = vc.GetChildren()
64 >>> id,fp = cs[0]
65 >>> id
66 1
67 >>> list(fp.GetOnBits())
68 [7, 9]
69 >>> id,fp = cs[1]
70 >>> id
71 2
72 >>> list(fp.GetOnBits())
73 [6, 8]
74
75 attach/detach operations:
76 >>> bv1 = DataStructs.ExplicitBitVect(10)
77 >>> bv1.SetBitsFromList((5,6))
78 >>> vc.AddVect(3,bv1)
79 >>> vc.NumChildren()
80 3
81 >>> list(vc.GetOnBits())
82 [5, 6, 7, 8, 9]
83 >>> vc.DetachVectsNotMatchingBit(6)
84 >>> vc.NumChildren()
85 2
86 >>> list(vc.GetOnBits())
87 [5, 6, 8]
88
89
90 >>> bv1 = DataStructs.ExplicitBitVect(10)
91 >>> bv1.SetBitsFromList((7,9))
92 >>> vc.AddVect(1,bv1)
93 >>> vc.NumChildren()
94 3
95 >>> list(vc.GetOnBits())
96 [5, 6, 7, 8, 9]
97 >>> vc.DetachVectsMatchingBit(6)
98 >>> vc.NumChildren()
99 1
100 >>> list(vc.GetOnBits())
101 [7, 9]
102
103
104 to copy VectCollections, use the copy module:
105 >>> bv1 = DataStructs.ExplicitBitVect(10)
106 >>> bv1.SetBitsFromList((5,6))
107 >>> vc.AddVect(3,bv1)
108 >>> list(vc.GetOnBits())
109 [5, 6, 7, 9]
110 >>> vc2 = copy.copy(vc)
111 >>> vc.DetachVectsNotMatchingBit(6)
112 >>> list(vc.GetOnBits())
113 [5, 6]
114 >>> list(vc2.GetOnBits())
115 [5, 6, 7, 9]
116
117 The Uniquify() method can be used to remove duplicate vectors:
118 >>> vc = VectCollection()
119 >>> bv1 = DataStructs.ExplicitBitVect(10)
120 >>> bv1.SetBitsFromList((7,9))
121 >>> vc.AddVect(1,bv1)
122 >>> vc.AddVect(2,bv1)
123 >>> bv1 = DataStructs.ExplicitBitVect(10)
124 >>> bv1.SetBitsFromList((2,3,5))
125 >>> vc.AddVect(3,bv1)
126 >>> vc.NumChildren()
127 3
128 >>> vc.Uniquify()
129 >>> vc.NumChildren()
130 2
131
132
133 """
134
136 self.__vects = {}
137 self.__orVect = None
138 self.__numBits = -1
139 self.__needReset = True
140
142 if self.__needReset:
143 self.Reset()
144 return self.__orVect
145
146 orVect = property(GetOrVect)
147
149 self.__vects[idx] = vect
150 self.__needReset = True
151
153 if not self.__needReset:
154 return
155 self.__orVect = None
156 if not self.__vects:
157 return
158 ks = list(iterkeys(self.__vects))
159 self.__orVect = copy.copy(self.__vects[ks[0]])
160 self.__numBits = self.__orVect.GetNumBits()
161 for i in range(1, len(ks)):
162 self.__orVect |= self.__vects[ks[i]]
163 self.__needReset = False
164
166 return len(self.__vects.keys())
167
169 return tuple(self.__vects.items())
170
172 if self.__needReset:
173 self.Reset()
174 return self.__orVect.GetBit(idx)
175
176 GetBit = __getitem__
177
179 if self.__needReset:
180 self.Reset()
181 return self.__numBits
182
183 GetNumBits = __len__
184
186 if self.__needReset:
187 self.Reset()
188 return self.__orVect.GetOnBits()
189
191 items = list(self.__vects.items())
192 for k, v in items:
193 if not v.GetBit(bit):
194 del (self.__vects[k])
195 self.__needReset = True
196
198 items = list(self.__vects.items())
199 for k, v in items:
200 if v.GetBit(bit):
201 del (self.__vects[k])
202 self.__needReset = True
203
205 obls = {}
206 for k, v in self.__vects.items():
207 obls[k] = list(v.GetOnBits())
208
209 keys = list(self.__vects.keys())
210 nKeys = len(keys)
211 keep = list(self.__vects.keys())
212 for i in range(nKeys):
213 k1 = keys[i]
214 if k1 in keep:
215 obl1 = obls[k1]
216 idx = keys.index(k1)
217 for j in range(idx + 1, nKeys):
218 k2 = keys[j]
219 if k2 in keep:
220 obl2 = obls[k2]
221 if obl1 == obl2:
222 keep.remove(k2)
223
224 self.__needsReset = True
225 tmp = {}
226 for k in keep:
227 tmp[k] = self.__vects[k]
228 if verbose:
229 print('uniquify:', len(self.__vects), '->', len(tmp))
230 self.__vects = tmp
231
232
233
234
236 pkl = struct.pack('<I', len(self.__vects))
237 for k, v in self.__vects.items():
238 pkl += struct.pack('<I', k)
239 p = v.ToBinary()
240 l = len(p)
241 pkl += struct.pack('<I', l)
242 pkl += struct.pack('%ds' % (l), p)
243 return pkl
244
246 if six.PY3 and isinstance(pkl, str):
247 pkl = bytes(pkl, encoding='Latin1')
248
249 self.__vects = {}
250 self.__orVect = None
251 self.__numBits = -1
252 self.__needReset = True
253 szI = struct.calcsize('I')
254 offset = 0
255 nToRead = struct.unpack('<I', pkl[offset:offset + szI])[0]
256 offset += szI
257 for _ in range(nToRead):
258 k = struct.unpack('<I', pkl[offset:offset + szI])[0]
259 offset += szI
260 l = struct.unpack('<I', pkl[offset:offset + szI])[0]
261 offset += szI
262 sz = struct.calcsize('%ds' % l)
263 bv = DataStructs.ExplicitBitVect(struct.unpack('%ds' % l, pkl[offset:offset + sz])[0])
264 offset += sz
265 self.AddVect(k, bv)
266
267
268
269
270
271
273 import sys
274 import doctest
275 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
276 sys.exit(failed)
277
278
279 if __name__ == '__main__':
280 _runDoctests()
281