1
2
3
4
5
6
7
8
9
10
11
12
13 """ EState fingerprinting
14
15 """
16 from __future__ import print_function
17 import numpy
18 from rdkit.Chem.EState import EStateIndices
19 from rdkit.Chem.EState import AtomTypes
20
21
23 """ generates the EState fingerprints for the molecule
24
25 Concept from the paper: Hall and Kier JCICS _35_ 1039-1045 (1995)
26
27 two numeric arrays are returned:
28 The first (of ints) contains the number of times each possible atom type is hit
29 The second (of floats) contains the sum of the EState indices for atoms of
30 each type.
31
32 """
33 if AtomTypes.esPatterns is None:
34 AtomTypes.BuildPatts()
35 esIndices = EStateIndices(mol)
36
37 nPatts = len(AtomTypes.esPatterns)
38 counts = numpy.zeros(nPatts, numpy.int)
39 sums = numpy.zeros(nPatts, numpy.float)
40
41 for i, (_, pattern) in enumerate(AtomTypes.esPatterns):
42 matches = mol.GetSubstructMatches(pattern, uniquify=1)
43 counts[i] = len(matches)
44 for match in matches:
45 sums[i] += esIndices[match[0]]
46 return counts, sums
47
48
50 """ Example code for calculating E-state fingerprints """
51 from rdkit import Chem
52 smis = ['CC', 'CCC', 'c1[nH]cnc1CC(N)C(O)=O', 'NCCc1ccc(O)c(O)c1']
53 for smi in smis:
54 m = Chem.MolFromSmiles(smi)
55 print(smi, Chem.MolToSmiles(m))
56 types = AtomTypes.TypeAtoms(m)
57 for i in range(m.GetNumAtoms()):
58 print('%d %4s: %s' % (i + 1, m.GetAtomWithIdx(i).GetSymbol(), str(types[i])))
59 es = EStateIndices(m)
60 counts, sums = FingerprintMol(m)
61 for i in range(len(AtomTypes.esPatterns)):
62 if counts[i]:
63 name, _ = AtomTypes.esPatterns[i]
64 print('%6s, % 2d, % 5.4f' % (name, counts[i], sums[i]))
65 for i in range(len(es)):
66 print('% 2d, % 5.4f' % (i + 1, es[i]))
67 print('--------')
68
69
70 if __name__ == '__main__':
71 _exampleCode()
72