1
2
3
4
5
6
7
8
9
10
11 """ Hybrid EState-VSA descriptors (like the MOE VSA descriptors)
12
13 """
14
15 import bisect
16
17 import numpy
18
19 from rdkit.Chem.EState.EState import EStateIndices as EStateIndices_
20 from rdkit.Chem.MolSurf import _LabuteHelper as VSAContribs_
21 """
22
23 These default VSA bins were chosen using the PP3K solubility data
24 set. An arbitrary number of bins were selected and the
25 boundaries were selected to give an approximately equal number of
26 atoms per bin
27
28 """
29
30 vsaBins = [4.78, 5.00, 5.410, 5.740, 6.00, 6.07, 6.45, 7.00, 11.0]
31
32
34 """ *Internal Use Only*
35 """
36 if not force and hasattr(mol, '_vsaEState'):
37 return mol._vsaEState
38
39 if bins is None:
40 bins = estateBins
41 propContribs = EStateIndices_(mol, force=force)
42 volContribs = VSAContribs_(mol)
43
44 ans = numpy.zeros(len(bins) + 1, numpy.float)
45 for i, prop in enumerate(propContribs):
46 if prop is not None:
47 nbin = bisect.bisect_right(bins, volContribs[i + 1])
48 ans[nbin] += prop
49 mol._vsaEState = ans
50 return ans
51
52
53 """
54
55 These default EState bins were chosen using the PP3K solubility data
56 set. An arbitrary number of bins (10) were selected and the
57 boundaries were selected to give an approximately equal number of
58 atoms per bin
59
60 """
61 estateBins = [-0.390, 0.290, 0.717, 1.165, 1.540, 1.807, 2.05, 4.69, 9.17, 15.0]
62
63
65 """ *Internal Use Only*
66 """
67 if not force and hasattr(mol, '_eStateVSA'):
68 return mol._eStateVSA
69
70 if bins is None:
71 bins = estateBins
72 propContribs = EStateIndices_(mol, force=force)
73 volContribs = VSAContribs_(mol)
74
75 ans = numpy.zeros(len(bins) + 1, numpy.float)
76 for i, prop in enumerate(propContribs):
77 if prop is not None:
78 nbin = bisect.bisect_right(bins, prop)
79 ans[nbin] += volContribs[i + 1]
80 mol._eStateVSA = ans
81 return ans
82
83
85 """ Create a docstring for the descriptor name """
86 if nbin == 0:
87 interval = "-inf < x < {0:.2f}".format(bins[nbin])
88 elif nbin < len(bins):
89 interval = " {0:.2f} <= x < {1:.2f}".format(bins[nbin - 1], bins[nbin])
90 else:
91 interval = " {0:.2f} <= x < inf".format(bins[nbin - 1])
92 return '{0} Descriptor {1} ({2})'.format(name, nbin + 1, interval)
93
94
96
97 def VSA_EState_bin(mol):
98 return VSA_EState_(mol, force=False)[nbin]
99
100 name = "VSA_EState{0}".format(nbin + 1)
101 fn = VSA_EState_bin
102 fn.__doc__ = _descriptorDocstring('VSA EState', nbin, vsaBins)
103 fn.version = '1.0.0'
104 return name, fn
105
106
108
109 def EState_VSA_bin(mol):
110 return EState_VSA_(mol, force=False)[nbin]
111
112 name = "EState_VSA{0}".format(nbin + 1)
113 fn = EState_VSA_bin
114 fn.__name__ = name
115 if hasattr(fn, '__qualname__'):
116 fn.__qualname__ = name
117 fn.__doc__ = _descriptorDocstring('EState VSA', nbin, estateBins)
118 fn.version = '1.0.1'
119 return name, fn
120
121
130
131
132
133 _InstallDescriptors()
134