1
2
3
4
5
6 from __future__ import print_function
7 from rdkit import RDLogger as logging
8 logger = logging.logger()
9 logger.setLevel(logging.INFO)
10 from rdkit import Chem
11 from rdkit.Chem import Crippen
12 from rdkit.Chem import AllChem
13 from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict
14
15 import sys
16 _version = "0.8.0"
17 _greet = "This is TemplateExpand version %s" % _version
18
19 _usage = """
20 Usage: TemplateExpand [options] template <sidechains>
21
22 Unless otherwise indicated, the template and sidechains are assumed to be
23 Smiles
24
25 Each sidechain entry should be:
26 [-r] SMARTS filename
27 The SMARTS pattern is used to recognize the attachment point,
28 if the -r argument is not provided, then atoms matching the pattern
29 will be removed from the sidechains.
30 or
31 -n filename
32 where the attachment atom is the first atom in each molecule
33 The filename provides the list of potential sidechains.
34
35 options:
36 -o filename.sdf: provides the name of the output file, otherwise
37 stdout is used
38
39 --sdf : expect the sidechains to be in SD files
40
41 --moltemplate: the template(s) are in a mol/SD file, new depiction(s)
42 will not be generated unless the --redraw argument is also
43 provided
44
45 --smilesFileTemplate: extract the template(s) from a SMILES file instead of
46 expecting SMILES on the command line.
47
48 --redraw: generate a new depiction for the molecular template(s)
49
50 --useall:
51 or
52 --useallmatches: generate a product for each possible match of the attachment
53 pattern to each sidechain. If this is not provided, the first
54 match (not canonically defined) will be used.
55
56 --force: by default, the program prompts the user if the library is
57 going to contain more than 1000 compounds. This argument
58 disables the prompt.
59
60 --templateSmarts="smarts": provides a space-delimited list containing the SMARTS
61 patterns to be used to recognize attachment points in
62 the template
63
64 --autoNames: when set this toggle causes the resulting compounds to be named
65 based on there sequence id in the file, e.g.
66 "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc.
67 otherwise the names of the template and building blocks (from
68 the input files) will be combined to form a name for each
69 product molecule.
70
71 --3D : Generate 3d coordinates for the product molecules instead of 2d coordinates,
72 requires the --moltemplate option
73
74 --tether : refine the 3d conformations using a tethered minimization
75
76
77 """
78
79
81 print(_usage, file=sys.stderr)
82 sys.exit(-1)
83
84 nDumped = 0
85
86
87 -def _exploder(mol, depth, sidechains, core, chainIndices, autoNames=True, templateName='',
88 resetCounter=True, do3D=False, useTethers=False):
89 global nDumped
90 if resetCounter:
91 nDumped = 0
92 ourChains = sidechains[depth]
93 patt = '[%d*]' % (depth + 1)
94 patt = Chem.MolFromSmiles(patt)
95 for i, (chainIdx, chain) in enumerate(ourChains):
96 tchain = chainIndices[:]
97 tchain.append((i, chainIdx))
98 rs = Chem.ReplaceSubstructs(mol, patt, chain, replaceAll=True)
99 if rs:
100 r = rs[0]
101 if depth < len(sidechains) - 1:
102 for entry in _exploder(r, depth + 1, sidechains, core, tchain, autoNames=autoNames,
103 templateName=templateName, resetCounter=0, do3D=do3D,
104 useTethers=useTethers):
105 yield entry
106 else:
107 try:
108 Chem.SanitizeMol(r)
109 except ValueError:
110 import traceback
111 traceback.print_exc()
112 continue
113 if not do3D:
114 if r.HasSubstructMatch(core):
115 try:
116 AlignDepict(r, core)
117 except Exception:
118 import traceback
119 traceback.print_exc()
120 print(Chem.MolToSmiles(r), file=sys.stderr)
121 else:
122 print('>>> no match', file=sys.stderr)
123 AllChem.Compute2DCoords(r)
124 else:
125 r = Chem.AddHs(r)
126 AllChem.ConstrainedEmbed(r, core, useTethers)
127 Chem.Kekulize(r)
128 if autoNames:
129 tName = "TemplateEnum: Mol_%d" % (nDumped + 1)
130 else:
131 tName = templateName
132 for bbI, bb in enumerate(tchain):
133 bbMol = sidechains[bbI][bb[0]][1]
134 if bbMol.HasProp('_Name'):
135 bbNm = bbMol.GetProp('_Name')
136 else:
137 bbNm = str(bb[1])
138 tName += '_' + bbNm
139
140 r.SetProp("_Name", tName)
141 r.SetProp('seq_num', str(nDumped + 1))
142 r.SetProp('reagent_indices', '_'.join([str(x[1]) for x in tchain]))
143 for bbI, bb in enumerate(tchain):
144 bbMol = sidechains[bbI][bb[0]][1]
145 if bbMol.HasProp('_Name'):
146 bbNm = bbMol.GetProp('_Name')
147 else:
148 bbNm = str(bb[1])
149 r.SetProp('building_block_%d' % (bbI + 1), bbNm)
150 r.SetIntProp('_idx_building_block_%d' % (bbI + 1), bb[1])
151 for propN in bbMol.GetPropNames():
152 r.SetProp('building_block_%d_%s' % (bbI + 1, propN), bbMol.GetProp(propN))
153 nDumped += 1
154 if not nDumped % 100:
155 logger.info('Done %d molecules' % nDumped)
156 yield r
157
158
159 -def Explode(template, sidechains, outF, autoNames=True, do3D=False, useTethers=False):
160 chainIndices = []
161 core = Chem.DeleteSubstructs(template, Chem.MolFromSmiles('[*]'))
162 try:
163 templateName = template.GetProp('_Name')
164 except KeyError:
165 templateName = "template"
166 for mol in _exploder(template, 0, sidechains, core, chainIndices, autoNames=autoNames,
167 templateName=templateName, do3D=do3D, useTethers=useTethers):
168 outF.write(Chem.MolToMolBlock(mol))
169 for pN in mol.GetPropNames():
170 print('> <%s>\n%s\n' % (pN, mol.GetProp(pN)), file=outF)
171 print('$$$$', file=outF)
172
173
175 dummyPatt = Chem.MolFromSmiles('[*]')
176 matches = mol.GetSubstructMatches(dummyPatt)
177 res = []
178 for match in matches:
179 matchIdx = match[0]
180 smi = Chem.MolToSmiles(mol, True, rootedAtAtom=matchIdx)
181 entry = Chem.MolFromSmiles(smi)
182
183
184 entry = Chem.DeleteSubstructs(entry, dummyPatt)
185 for propN in mol.GetPropNames():
186 entry.SetProp(propN, mol.GetProp(propN))
187
188
189
190 res.append(entry)
191 if not useAll:
192 break
193 return res
194
195
197 if sma:
198 patt = Chem.MolFromSmarts(sma)
199 if patt is None:
200 logger.error('could not construct pattern from smarts: %s' % sma, exc_info=True)
201 return None
202 else:
203 patt = None
204
205 if replace:
206 replacement = Chem.MolFromSmiles('[*]')
207
208 res = []
209 for idx, mol in enumerate(suppl):
210 if not mol:
211 continue
212 if patt:
213 if not mol.HasSubstructMatch(patt):
214 logger.warning(
215 'The substructure pattern did not match sidechain %d. This may result in errors.' %
216 (idx + 1))
217 if replace:
218 tmp = list(Chem.ReplaceSubstructs(mol, patt, replacement))
219 if not useAll:
220 tmp = [tmp[0]]
221 for i, entry in enumerate(tmp):
222 entry = MoveDummyNeighborsToBeginning(entry)
223 if not entry:
224 continue
225 entry = entry[0]
226
227 for propN in mol.GetPropNames():
228 entry.SetProp(propN, mol.GetProp(propN))
229
230
231 tmp[i] = (idx + 1, entry)
232 else:
233
234
235 matches = mol.GetSubstructMatches(patt)
236 if matches:
237 tmp = []
238 for match in matches:
239 smi = Chem.MolToSmiles(mol, True, rootedAtAtom=match[0])
240 entry = Chem.MolFromSmiles(smi)
241 for propN in mol.GetPropNames():
242 entry.SetProp(propN, mol.GetProp(propN))
243
244
245
246 tmp.append((idx + 1, entry))
247 else:
248 tmp = None
249 else:
250 tmp = [(idx + 1, mol)]
251 if tmp:
252 res.extend(tmp)
253 return res
254
255
256 if __name__ == '__main__':
257 import getopt
258 print(_greet, file=sys.stderr)
259
260 try:
261 args, extras = getopt.getopt(sys.argv[1:], 'o:h', [
262 'sdf',
263 'moltemplate',
264 'molTemplate',
265 'smilesFileTemplate',
266 'templateSmarts=',
267 'redraw',
268 'force',
269 'useall',
270 'useallmatches',
271 'autoNames',
272 '3D',
273 '3d',
274 'tethers',
275 'tether',
276 ])
277 except Exception:
278 import traceback
279 traceback.print_exc()
280 Usage()
281
282 if len(extras) < 3:
283 Usage()
284
285 tooLong = 1000
286 sdLigands = False
287 molTemplate = False
288 redrawTemplate = False
289 outF = None
290 forceIt = False
291 useAll = False
292 templateSmarts = []
293 smilesFileTemplate = False
294 autoNames = False
295 do3D = False
296 useTethers = False
297 for arg, val in args:
298 if arg == '-o':
299 outF = val
300 elif arg == '--sdf':
301 sdLigands = True
302 elif arg in ('--moltemplate', '--molTemplate'):
303 molTemplate = True
304 elif arg == '--smilesFileTemplate':
305 smilesFileTemplate = True
306 elif arg == '--templateSmarts':
307 templateSmarts = val
308 elif arg == '--redraw':
309 redrawTemplate = True
310 elif arg == '--force':
311 forceIt = True
312 elif arg == '--autoNames':
313 autoNames = True
314 elif arg in ('--useall', '--useallmatches'):
315 useAll = True
316 elif arg in ('--3D', '--3d'):
317 do3D = True
318 elif arg in ('--tethers', '--tether'):
319 useTethers = True
320 elif arg == '-h':
321 Usage()
322 sys.exit(0)
323
324 if do3D:
325 if not molTemplate:
326 raise ValueError('the --3D option is only useable in combination with --moltemplate')
327 if redrawTemplate:
328 logger.warning(
329 '--redrawTemplate does not make sense in combination with --molTemplate. removing it')
330 redrawTemplate = False
331
332 if templateSmarts:
333 splitL = templateSmarts.split(' ')
334 templateSmarts = []
335 for i, sma in enumerate(splitL):
336 patt = Chem.MolFromSmarts(sma)
337 if not patt:
338 raise ValueError('could not convert smarts "%s" to a query' % sma)
339 if i >= 4:
340 i += 1
341 replace = Chem.MolFromSmiles('[%d*]' % (i + 1))
342 templateSmarts.append((patt, replace))
343
344 if molTemplate:
345 removeHs = not do3D
346 try:
347 s = Chem.SDMolSupplier(extras[0], removeHs=removeHs)
348 templates = [x for x in s]
349 except Exception:
350 logger.error('Could not construct templates from input file: %s' % extras[0], exc_info=True)
351 sys.exit(1)
352 if redrawTemplate:
353 for template in templates:
354 AllChem.Compute2DCoords(template)
355 else:
356 if not smilesFileTemplate:
357 try:
358 templates = [Chem.MolFromSmiles(extras[0])]
359 except Exception:
360 logger.error('Could not construct template from smiles: %s' % extras[0], exc_info=True)
361 sys.exit(1)
362 else:
363 try:
364 s = Chem.SmilesMolSupplier(extras[0], titleLine=False)
365 templates = [x for x in s]
366 except Exception:
367 logger.error('Could not construct templates from input file: %s' % extras[0], exc_info=True)
368 sys.exit(1)
369 for template in templates:
370 AllChem.Compute2DCoords(template)
371 if templateSmarts:
372 finalTs = []
373 for i, template in enumerate(templates):
374 for j, (patt, replace) in enumerate(templateSmarts):
375 if not template.HasSubstructMatch(patt):
376 logger.error('template %d did not match sidechain pattern %d, skipping it' %
377 (i + 1, j + 1))
378 template = None
379 break
380 template = Chem.ReplaceSubstructs(template, patt, replace)[0]
381 if template:
382 Chem.SanitizeMol(template)
383 finalTs.append(template)
384 templates = finalTs
385
386 sidechains = []
387 pos = 1
388 while pos < len(extras):
389 if extras[pos] == '-r':
390 replaceIt = False
391 pos += 1
392 else:
393 replaceIt = True
394 if extras[pos] == '-n':
395 sma = None
396 else:
397 sma = extras[pos]
398 pos += 1
399 try:
400 dat = extras[pos]
401 except IndexError:
402 logger.error('missing a sidechain filename')
403 sys.exit(-1)
404 pos += 1
405 if sdLigands:
406 try:
407 suppl = Chem.SDMolSupplier(dat)
408 except Exception:
409 logger.error('could not construct supplier from SD file: %s' % dat, exc_info=True)
410 suppl = []
411 else:
412 tmpF = file(dat, 'r')
413 inL = tmpF.readline()
414 if len(inL.split(' ')) < 2:
415 nmCol = -1
416 else:
417 nmCol = 1
418 try:
419 suppl = Chem.SmilesMolSupplier(dat, nameColumn=nmCol)
420 except Exception:
421 logger.error('could not construct supplier from smiles file: %s' % dat, exc_info=True)
422 suppl = []
423 suppl = [x for x in suppl]
424 chains = ConstructSidechains(suppl, sma=sma, replace=replaceIt, useAll=useAll)
425 if chains:
426 sidechains.append(chains)
427 count = 1
428 for chain in sidechains:
429 count *= len(chain)
430 count *= len(templates)
431 if not sidechains or not count:
432 print("No molecules to be generated.", file=sys.stderr)
433 sys.exit(0)
434
435 if not forceIt and count > tooLong:
436 print("This will generate %d molecules." % count, file=sys.stderr)
437 print("Continue anyway? [no] ", file=sys.stderr, end='')
438 sys.stderr.flush()
439 ans = sys.stdin.readline().strip()
440 if ans not in ('y', 'yes', 'Y', 'YES'):
441 sys.exit(0)
442
443 if outF and outF != "-":
444 try:
445 outF = file(outF, 'w+')
446 except IOError:
447 logger.error('could not open file %s for writing' % (outF), exc_info=True)
448 else:
449 outF = sys.stdout
450
451 for template in templates:
452 Explode(template, sidechains, outF, autoNames=autoNames, do3D=do3D, useTethers=useTethers)
453