Package rdkit :: Package Chem :: Package Suppliers :: Module DbMolSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Suppliers.DbMolSupplier

  1  # 
  2  # Copyright (C) 2003-2006 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ 
 11  Supplies a class for working with molecules from databases 
 12  """ 
 13  import sys 
 14   
 15  from rdkit import Chem 
 16  from rdkit.Chem.Suppliers.MolSupplier import MolSupplier 
 17   
 18   
19 -def warning(msg, dest=sys.stderr):
20 dest.write(msg)
21 22
23 -class DbMolSupplier(MolSupplier):
24 """ 25 new molecules come back with all additional fields from the 26 database set in a "_fieldsFromDb" data member 27 28 """ 29
30 - def __init__(self, dbResults, molColumnFormats={'SMILES': 'SMI', 31 'SMI': 'SMI', 32 'MOLPKL': 'PKL'}, nameCol='', transformFunc=None, 33 **kwargs):
34 """ 35 36 DbResults should be a subclass of Dbase.DbResultSet.DbResultBase 37 38 """ 39 self._data = dbResults 40 self._colNames = [x.upper() for x in self._data.GetColumnNames()] 41 nameCol = nameCol.upper() 42 self.molCol = -1 43 self.transformFunc = transformFunc 44 try: 45 self.nameCol = self._colNames.index(nameCol) 46 except ValueError: 47 self.nameCol = -1 48 for name in molColumnFormats: 49 name = name.upper() 50 try: 51 idx = self._colNames.index(name) 52 except ValueError: 53 pass 54 else: 55 self.molCol = idx 56 self.molFmt = molColumnFormats[name] 57 break 58 if self.molCol < 0: 59 raise ValueError('DbResultSet has no recognizable molecule column') 60 del self._colNames[self.molCol] 61 self._colNames = tuple(self._colNames) 62 self._numProcessed = 0
63
64 - def GetColumnNames(self):
65 return self._colNames
66
67 - def _BuildMol(self, data):
68 data = list(data) 69 molD = data[self.molCol] 70 del data[self.molCol] 71 self._numProcessed += 1 72 try: 73 if self.molFmt == 'SMI': 74 newM = Chem.MolFromSmiles(str(molD)) 75 if not newM: 76 warning('Problems processing mol %d, smiles: %s\n' % (self._numProcessed, molD)) 77 elif self.molFmt == 'PKL': 78 newM = Chem.Mol(str(molD)) 79 except Exception: 80 import traceback 81 traceback.print_exc() 82 newM = None 83 else: 84 if newM and self.transformFunc: 85 try: 86 newM = self.transformFunc(newM, data) 87 except Exception: 88 import traceback 89 traceback.print_exc() 90 newM = None 91 if newM: 92 newM._fieldsFromDb = data 93 nFields = len(data) 94 for i in range(nFields): 95 newM.SetProp(self._colNames[i], str(data[i])) 96 if self.nameCol >= 0: 97 newM.SetProp('_Name', str(data[self.nameCol])) 98 return newM
99 100
101 -class ForwardDbMolSupplier(DbMolSupplier):
102 """ DbMol supplier supporting only forward iteration 103 104 105 new molecules come back with all additional fields from the 106 database set in a "_fieldsFromDb" data member 107 108 """ 109
110 - def __init__(self, dbResults, **kwargs):
111 """ 112 113 DbResults should be an iterator for Dbase.DbResultSet.DbResultBase 114 115 """ 116 DbMolSupplier.__init__(self, dbResults, **kwargs) 117 self.Reset()
118
119 - def Reset(self):
120 self._dataIter = iter(self._data)
121
122 - def NextMol(self):
123 """ 124 125 NOTE: this has side effects 126 127 """ 128 try: 129 d = self._dataIter.next() 130 except StopIteration: 131 d = None 132 if d is not None: 133 newM = self._BuildMol(d) 134 else: 135 newM = None 136 137 return newM
138 139
140 -class RandomAccessDbMolSupplier(DbMolSupplier):
141
142 - def __init__(self, dbResults, **kwargs):
143 """ 144 145 DbResults should be a Dbase.DbResultSet.RandomAccessDbResultSet 146 147 """ 148 DbMolSupplier.__init__(self, dbResults, **kwargs) 149 self._pos = -1
150
151 - def __len__(self):
152 return len(self._data)
153
154 - def __getitem__(self, idx):
155 newD = self._data[idx] 156 return self._BuildMol(newD)
157
158 - def Reset(self):
159 self._pos = -1
160
161 - def NextMol(self):
162 self._pos += 1 163 res = None 164 if self._pos < len(self): 165 res = self[self._pos] 166 return res
167