1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """
32
33 QED stands for quantitative estimation of drug-likeness and the concept was for the first time
34 introduced by Richard Bickerton and coworkers [1]. The empirical rationale of the QED measure
35 reflects the underlying distribution of molecular properties including molecular weight, logP,
36 topological polar surface area, number of hydrogen bond donors and acceptors, the number of
37 aromatic rings and rotatable bonds, and the presence of unwanted chemical functionalities.
38
39 The QED results as generated by the RDKit-based implementation of Biscu-it(tm) are not completely
40 identical to those from the original publication [1]. These differences are a consequence of
41 differences within the underlying calculated property calculators used in both methods. For
42 example, discrepancies can be noted in the results from the logP calculations, nevertheless
43 despite the fact that both approaches (Pipeline Pilot in the original publication and RDKit
44 in our Biscu-it(tm) implementation) mention to use the Wildmann and Crippen methodology for the
45 calculation of their logP-values [2]. However, the differences in the resulting QED-values
46 are very small and are not compromising the usefulness of using Qed in your daily research.
47
48 [1] Bickerton, G.R.; Paolini, G.V.; Besnard, J.; Muresan, S.; Hopkins, A.L. (2012)
49 'Quantifying the chemical beauty of drugs',
50 Nature Chemistry, 4, 90-98 [http://dx.doi.org/10.1038/nchem.1243]
51
52 History:
53 2012-04 Adapted to internal RDkit implementation
54 2013-05 moved to rdkit.Chem.QED
55 """
56 from collections import namedtuple
57 import math
58
59 from rdkit import Chem
60 from rdkit.Chem import MolSurf, Crippen
61 from rdkit.Chem import rdMolDescriptors as rdmd
62 from rdkit.Chem.ChemUtils.DescriptorUtilities import setDescriptorVersion
63
64
65 QEDproperties = namedtuple('QEDproperties', 'MW,ALOGP,HBA,HBD,PSA,ROTB,AROM,ALERTS')
66 ADSparameter = namedtuple('ADSparameter', 'A,B,C,D,E,F,DMAX')
67
68 WEIGHT_MAX = QEDproperties(0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00)
69 WEIGHT_MEAN = QEDproperties(0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95)
70 WEIGHT_NONE = QEDproperties(1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00)
71
72 AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]')
73
74
75 AcceptorSmarts = [
76 '[oH0;X2]',
77 '[OH1;X2;v2]',
78 '[OH0;X2;v2]',
79 '[OH0;X1;v2]',
80 '[O-;X1]',
81 '[SH0;X2;v2]',
82 '[SH0;X1;v2]',
83 '[S-;X1]',
84 '[nH0;X2]',
85 '[NH0;X1;v3]',
86 '[$([N;+0;X3;v3]);!$(N[C,S]=O)]'
87 ]
88 Acceptors = [Chem.MolFromSmarts(hba) for hba in AcceptorSmarts]
89
90
91 StructuralAlertSmarts = [
92 '*1[O,S,N]*1',
93 '[S,C](=[O,S])[F,Br,Cl,I]',
94 '[CX4][Cl,Br,I]',
95 '[#6]S(=O)(=O)O[#6]',
96 '[$([CH]),$(CC)]#CC(=O)[#6]',
97 '[$([CH]),$(CC)]#CC(=O)O[#6]',
98 'n[OH]',
99 '[$([CH]),$(CC)]#CS(=O)(=O)[#6]',
100 'C=C(C=O)C=O',
101 'n1c([F,Cl,Br,I])cccc1',
102 '[CH1](=O)',
103 '[#8][#8]',
104 '[C;!R]=[N;!R]',
105 '[N!R]=[N!R]',
106 '[#6](=O)[#6](=O)',
107 '[#16][#16]',
108 '[#7][NH2]',
109 'C(=O)N[NH2]',
110 '[#6]=S',
111 '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]',
112 'C1(=[O,N])C=CC(=[O,N])C=C1',
113 'C1(=[O,N])C(=[O,N])C=CC=C1',
114 'a21aa3a(aa1aaaa2)aaaa3',
115 'a31a(a2a(aa1)aaaa2)aaaa3',
116 'a1aa2a3a(a1)A=AA=A3=AA=A2',
117 'c1cc([NH2])ccc1',
118 '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,' +
119 'Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]',
120 'I',
121 'OS(=O)(=O)[O-]',
122 '[N+](=O)[O-]',
123 'C(=O)N[OH]',
124 'C1NC(=O)NC(=O)1',
125 '[SH]',
126 '[S-]',
127 'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]',
128 'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]',
129 '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1',
130 '[CR1]1[CR1][CR1]cc[CR1][CR1]1',
131 '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1',
132 '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1',
133 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1',
134 '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1',
135 'C#C',
136 '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]',
137 '[$([N+R]),$([n+R]),$([N+]=C)][O-]',
138 '[#6]=N[OH]',
139 '[#6]=NOC=O',
140 '[#6](=O)[CX4,CR0X3,O][#6](=O)',
141 'c1ccc2c(c1)ccc(=O)o2',
142 '[O+,o+,S+,s+]',
143 'N=C=O',
144 '[NX3,NX4][F,Cl,Br,I]',
145 'c1ccccc1OC(=O)[#6]',
146 '[CR0]=[CR0][CR0]=[CR0]',
147 '[C+,c+,C-,c-]',
148 'N=[N+]=[N-]',
149 'C12C(NC(N1)=O)CSC2',
150 'c1c([OH])c([OH,NH2,NH])ccc1',
151 'P',
152 '[N,O,S]C#N',
153 'C=C=O',
154 '[Si][F,Cl,Br,I]',
155 '[SX2]O',
156 '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)',
157 'O1CCCCC1OC2CCC3CCCCC3C2',
158 'N=[CR0][N,n,O,S]',
159 '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2',
160 'C=[C!r]C#N',
161 '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1',
162 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1',
163 '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])',
164 '[OH]c1ccc([OH,NH2,NH])cc1',
165 'c1ccccc1OC(=O)O',
166 '[SX2H0][N]',
167 'c12ccccc1(SC(S)=N2)',
168 'c12ccccc1(SC(=S)N2)',
169 'c1nnnn1C=O',
170 's1c(S)nnc1NC=O',
171 'S1C=CSC1=S',
172 'C(=O)Onnn',
173 'OS(=O)(=O)C(F)(F)F',
174 'N#CC[OH]',
175 'N#CC(=O)',
176 'S(=O)(=O)C#N',
177 'N[CH2]C#N',
178 'C1(=O)NCC1',
179 'S(=O)(=O)[O-,OH]',
180 'NC[F,Cl,Br,I]',
181 'C=[C!r]O',
182 '[NX2+0]=[O+0]',
183 '[OR0,NR0][OR0,NR0]',
184 'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]',
185 '[CX2R0][NX3R0]',
186 'c1ccccc1[C;!R]=[C;!R]c2ccccc2',
187 '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]',
188 '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]',
189 '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]',
190 '[*]=[N+]=[*]',
191 '[SX3](=O)[O-,OH]',
192 'N#N',
193 'F.F.F.F',
194 '[R0;D2][R0;D2][R0;D2][R0;D2]',
195 '[cR,CR]~C(=O)NC(=O)~[cR,CR]',
196 'C=!@CC=[O,S]',
197 '[#6,#8,#16][#6](=O)O[#6]',
198 'c[C;R0](=[O,S])[#6]',
199 'c[SX2][C;!R]',
200 'C=C=C',
201 'c1nc([F,Cl,Br,I,S])ncc1',
202 'c1ncnc([F,Cl,Br,I,S])c1',
203 'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])',
204 '[#6]S(=O)(=O)c1ccc(cc1)F',
205 '[15N]',
206 '[13C]',
207 '[18O]',
208 '[34S]'
209 ]
210
211 StructuralAlerts = [Chem.MolFromSmarts(smarts) for smarts in StructuralAlertSmarts]
212
213 adsParameters = {
214 'MW': ADSparameter(A=2.817065973, B=392.5754953, C=290.7489764, D=2.419764353, E=49.22325677,
215 F=65.37051707, DMAX=104.9805561),
216 'ALOGP': ADSparameter(A=3.172690585, B=137.8624751, C=2.534937431, D=4.581497897, E=0.822739154,
217 F=0.576295591, DMAX=131.3186604),
218 'HBA': ADSparameter(A=2.948620388, B=160.4605972, C=3.615294657, D=4.435986202, E=0.290141953,
219 F=1.300669958, DMAX=148.7763046),
220 'HBD': ADSparameter(A=1.618662227, B=1010.051101, C=0.985094388, D=0.000000001, E=0.713820843,
221 F=0.920922555, DMAX=258.1632616),
222 'PSA': ADSparameter(A=1.876861559, B=125.2232657, C=62.90773554, D=87.83366614, E=12.01999824,
223 F=28.51324732, DMAX=104.5686167),
224 'ROTB': ADSparameter(A=0.010000000, B=272.4121427, C=2.558379970, D=1.565547684, E=1.271567166,
225 F=2.758063707, DMAX=105.4420403),
226 'AROM': ADSparameter(A=3.217788970, B=957.7374108, C=2.274627939, D=0.000000001, E=1.317690384,
227 F=0.375760881, DMAX=312.3372610),
228 'ALERTS': ADSparameter(A=0.010000000, B=1199.094025, C=-0.09002883, D=0.000000001, E=0.185904477,
229 F=0.875193782, DMAX=417.7253140),
230 }
231
232
233 -def ads(x, adsParameter):
234 """ ADS function """
235 p = adsParameter
236 exp1 = 1 + math.exp(-1 * (x - p.C + p.D / 2) / p.E)
237 exp2 = 1 + math.exp(-1 * (x - p.C - p.D / 2) / p.F)
238 dx = p.A + p.B / exp1 * (1 - 1 / exp2)
239 return dx / p.DMAX
240
243 """
244 Calculates the properties that are required to calculate the QED descriptor.
245 """
246 if mol is None:
247 raise ValueError('You need to provide a mol argument.')
248 mol = Chem.RemoveHs(mol)
249 qedProperties = QEDproperties(
250 MW=rdmd._CalcMolWt(mol),
251 ALOGP=Crippen.MolLogP(mol),
252 HBA=sum(len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors
253 if mol.HasSubstructMatch(pattern)),
254 HBD=rdmd.CalcNumHBD(mol),
255 PSA=MolSurf.TPSA(mol),
256 ROTB=rdmd.CalcNumRotatableBonds(mol, rdmd.NumRotatableBondsOptions.Strict),
257 AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol), AliphaticRings)),
258 ALERTS=sum(1 for alert in StructuralAlerts if mol.HasSubstructMatch(alert)),
259 )
260
261
262
263
264
265
266 return qedProperties
267
268
269 @setDescriptorVersion(version='1.1.0')
270 -def qed(mol, w=WEIGHT_MEAN, qedProperties=None):
271 """ Calculate the weighted sum of ADS mapped properties
272
273 some examples from the QED paper, reference values from Peter G's original implementation
274 >>> m = Chem.MolFromSmiles('N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O')
275 >>> qed(m)
276 0.253...
277 >>> m = Chem.MolFromSmiles('CNC(=NCCSCc1nc[nH]c1C)NC#N')
278 >>> qed(m)
279 0.234...
280 >>> m = Chem.MolFromSmiles('CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12')
281 >>> qed(m)
282 0.234...
283 """
284 if qedProperties is None:
285 qedProperties = properties(mol)
286 d = [ads(pi, adsParameters[name]) for name, pi in qedProperties._asdict().items()]
287 t = sum(wi * math.log(di) for wi, di in zip(w, d))
288 return math.exp(t / sum(w))
289
292 """
293 Calculates the QED descriptor using maximal descriptor weights.
294 """
295 return qed(mol, w=WEIGHT_MAX)
296
299 """
300 Calculates the QED descriptor using average descriptor weights.
301 """
302 return qed(mol, w=WEIGHT_MEAN)
303
306 """
307 Calculates the QED descriptor using unit weights.
308 """
309 return qed(mol, w=WEIGHT_NONE)
310
313 """
314 Calculates the QED descriptor using average descriptor weights.
315 """
316 return weights_mean(mol)
317
324 import sys
325 import doctest
326 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
327 sys.exit(failed)
328
329
330 if __name__ == '__main__':
331 _runDoctests()
332