1
2
3
4
5
6 from rdkit import Chem
7 from rdkit.VLib.Filter import FilterNode
8
9
11 """ canonical-smiles based duplicate filter
12
13 Assumptions:
14
15 - inputs are molecules
16
17
18 Sample Usage:
19 >>> import os
20 >>> from rdkit import RDConfig
21 >>> from rdkit.VLib.NodeLib.SDSupply import SDSupplyNode
22 >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\
23 'test_data','NCI_aids.10.sdf')
24 >>> suppl = SDSupplyNode(fileN)
25 >>> filt = DupeFilter()
26 >>> filt.AddParent(suppl)
27 >>> ms = [x for x in filt]
28 >>> len(ms)
29 10
30 >>> ms[0].GetProp("_Name")
31 '48'
32 >>> ms[1].GetProp("_Name")
33 '78'
34 >>> filt.reset()
35 >>> filt.next().GetProp("_Name")
36 '48'
37
38
39 """
40
44
48
50 smi = Chem.MolToSmiles(cmpd)
51 if smi not in self._smisSeen:
52 self._smisSeen.add(smi)
53 return 1
54 else:
55 return 0
56
57
58
59
60
61
63 import sys
64 import doctest
65 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
66 sys.exit(failed)
67
68
69 if __name__ == '__main__':
70 _runDoctests()
71