1
2
3
4
5
6
7
8
9
10 import sqlalchemy
11
12 from rdkit import Chem
13 from rdkit.Chem import AllChem
14 from rdkit.Chem import Lipinski, Descriptors, Crippen
15 from rdkit.Dbase.DbConnection import DbConnect
16 from rdkit.Dbase import DbModule
17 import os
18
19 from sqlalchemy.ext.declarative import declarative_base
20 from sqlalchemy import Table, Column, MetaData
21 from sqlalchemy import Integer, Text, String, ForeignKey, Binary, DateTime, Float
22 from sqlalchemy.orm import relation, mapper, sessionmaker, backref
23 from sqlalchemy import create_engine
24
25 decBase = declarative_base()
26
27
32
33
35 engine = create_engine(dbUrl, echo=echo)
36 decBase.metadata.create_all(engine)
37 maker = sessionmaker(bind=engine)
38 return maker
39
40
41 ConnectToSchema = RegisterSchema
42
43
45 engine = create_engine(dbUrl, echo=echo)
46 meta
47 decBase.metadata.create_all(engine)
48 maker = sessionmaker(bind=engine)
49 return maker
50
51
52
53 import rdkit.RDLogger as logging
54 logger = logging.logger()
55 logger.setLevel(logging.INFO)
56
57
58 -def ProcessMol(session, mol, globalProps, nDone, nameProp='_Name', nameCol='compound_id',
59 redraw=False, keepHs=False, skipProps=False, addComputedProps=False,
60 skipSmiles=False):
98
99
100 -def LoadDb(suppl, dbName, nameProp='_Name', nameCol='compound_id', silent=False, redraw=False,
101 errorsTo=None, keepHs=False, defaultVal='N/A', skipProps=False, regName='molecules',
102 skipSmiles=False, maxRowsCached=-1, uniqNames=False, addComputedProps=False,
103 lazySupplier=False, numForPropScan=10, startAnew=True):
104 if not lazySupplier:
105 nMols = len(suppl)
106 else:
107 nMols = -1
108 if not silent:
109 logger.info("Generating molecular database in file %s" % dbName)
110 if not lazySupplier:
111 logger.info(" Processing %d molecules" % nMols)
112
113 globalProps = {}
114 if startAnew:
115 if os.path.exists(dbName):
116 for i in range(5):
117 try:
118 os.unlink(dbName)
119 break
120 except:
121 import time
122 time.sleep(2)
123 if os.path.exists(dbName):
124 raise IOError('could not delete old database %s' % dbName)
125 sIter = iter(suppl)
126 setattr(Compound, nameCol.lower(), Column(nameCol.lower(), String, default=defaultVal,
127 unique=uniqNames))
128 if not skipSmiles:
129 Compound.smiles = Column(Text, unique=True)
130 if not skipProps:
131 while numForPropScan > 0:
132 try:
133 m = next(sIter)
134 except StopIteration:
135 numForPropScan = 0
136 break
137 if not m:
138 continue
139 for pn in m.GetPropNames():
140 if pn.lower() == nameCol.lower():
141 continue
142 if pn not in globalProps:
143 globalProps[pn] = 1
144 setattr(Compound, pn.lower(), Column(pn.lower(), String, default=defaultVal))
145 numForPropScan -= 1
146 if addComputedProps:
147 Compound.DonorCount = Column(Integer)
148 Compound.AcceptorCount = Column(Integer)
149 Compound.RotatableBondCount = Column(Integer)
150 Compound.AMW = Column(Float)
151 Compound.MolLogP = Column(Float)
152 session = RegisterSchema('sqlite:///%s' % (dbName))()
153
154 nDone = 0
155 cache = []
156 for m in suppl:
157 nDone += 1
158 if not m:
159 if errorsTo:
160 if hasattr(suppl, 'GetItemText'):
161 d = suppl.GetItemText(nDone - 1)
162 errorsTo.write(d)
163 else:
164 logger.warning('full error file support not complete')
165 continue
166
167 cmpd = ProcessMol(session, m, globalProps, nDone, nameProp=nameProp, nameCol=nameCol,
168 redraw=redraw, keepHs=keepHs, skipProps=skipProps,
169 addComputedProps=addComputedProps, skipSmiles=skipSmiles)
170 if cmpd is not None:
171 cache.append(cmpd)
172
173 if not silent and not nDone % 100:
174 logger.info(' done %d' % nDone)
175 try:
176 session.commit()
177 except Exception:
178 session.rollback()
179 for cmpd in cache:
180 try:
181 session.add(cmpd)
182 session.commit()
183 except Exception:
184 session.rollback()
185 except BaseException:
186
187 session.rollback()
188 raise
189 cache = []
190
191 try:
192 session.commit()
193 except BaseException as exc:
194 import traceback
195 traceback.print_exc()
196 session.rollback()
197 for cmpd in cache:
198 try:
199 session.add(cmpd)
200 session.commit()
201 except Exception:
202 session.rollback()
203 except BaseException:
204 session.rollback()
205 raise
206 if not isinstance(exc, Exception):
207
208 raise exc
209
210
211 if __name__ == '__main__':
212 import sys
213 sdf = Chem.SDMolSupplier(sys.argv[1])
214 db = sys.argv[2]
215 LoadDb(sdf, db, addComputedProps=False)
216 session = RegisterSchema('sqlite:///%s' % (db))()
217 print('>>>>', len(session.query(Compound).all()))
218