Package rdkit :: Package VLib :: Package NodeLib :: Module SmilesDupeFilter
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.SmilesDupeFilter

 1  #  $Id$ 
 2  # 
 3  #  Copyright (C) 2003 Rational Discovery LLC 
 4  #     All Rights Reserved 
 5  # 
 6  from rdkit import Chem 
 7  from rdkit.VLib.Filter import FilterNode 
 8   
 9   
10 -class DupeFilter(FilterNode):
11 """ canonical-smiles based duplicate filter 12 13 Assumptions: 14 15 - inputs are molecules 16 17 18 Sample Usage: 19 >>> import os 20 >>> from rdkit import RDConfig 21 >>> from rdkit.VLib.NodeLib.SDSupply import SDSupplyNode 22 >>> fileN = os.path.join(RDConfig.RDCodeDir,'VLib','NodeLib',\ 23 'test_data','NCI_aids.10.sdf') 24 >>> suppl = SDSupplyNode(fileN) 25 >>> filt = DupeFilter() 26 >>> filt.AddParent(suppl) 27 >>> ms = [x for x in filt] 28 >>> len(ms) 29 10 30 >>> ms[0].GetProp("_Name") 31 '48' 32 >>> ms[1].GetProp("_Name") 33 '78' 34 >>> filt.reset() 35 >>> filt.next().GetProp("_Name") 36 '48' 37 38 39 """ 40
41 - def __init__(self, **kwargs):
42 FilterNode.__init__(self, func=self.filter, **kwargs) 43 self._smisSeen = set()
44
45 - def reset(self):
46 FilterNode.reset(self) 47 self._smisSeen = set()
48
49 - def filter(self, cmpd):
50 smi = Chem.MolToSmiles(cmpd) 51 if smi not in self._smisSeen: 52 self._smisSeen.add(smi) 53 return 1 54 else: 55 return 0
56 57 58 # ------------------------------------ 59 # 60 # doctest boilerplate 61 #
62 -def _runDoctests(verbose=None): # pragma: nocover
63 import sys 64 import doctest 65 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 66 sys.exit(failed) 67 68 69 if __name__ == '__main__': # pragma: nocover 70 _runDoctests() 71