1 """
2 $Id$
3
4 Scoring - Calculate rank statistics
5
6 Created by Sereina Riniker, October 2012
7 after a file from Peter Gedeck, Greg Landrum
8
9 \param scores: ordered list with descending similarity containing
10 active/inactive information
11 \param col: column index in scores where active/inactive information is stored
12 \param fractions: list of fractions at which the value shall be calculated
13 \param alpha: exponential weight
14 """
15
16 import math
17
18
20 """ Determines a ROC curve """
21 numMol = len(scores)
22 if numMol == 0:
23 raise ValueError('score list is empty')
24 TPR = [0] * numMol
25 TNR = [0] * numMol
26 numActives = 0
27 numInactives = 0
28
29
30 for i in range(numMol):
31 if scores[i][col]:
32 numActives += 1
33 else:
34 numInactives += 1
35 TPR[i] = numActives
36 TNR[i] = numInactives
37
38
39 if numActives > 0:
40 TPR = [1.0 * i / numActives for i in TPR]
41 if numInactives > 0:
42 TNR = [1.0 * i / numInactives for i in TNR]
43
44 return [TNR, TPR]
45
46
48 """ Determines the area under the ROC curve """
49
50 roc = CalcROC(scores, col)
51 TNR = roc[0]
52 TPR = roc[1]
53
54 numMol = len(scores)
55 AUC = 0
56
57
58 for i in range(0, numMol - 1):
59 AUC += (TNR[i + 1] - TNR[i]) * (TPR[i + 1] + TPR[i])
60
61 return 0.5 * AUC
62
63
65 numMol = len(scores)
66 alpha = float(alpha)
67 if numMol == 0:
68 raise ValueError('score list is empty')
69 if alpha <= 0.0:
70 raise ValueError('alpha must be greater than zero')
71
72 denom = 1.0 / numMol * ((1 - math.exp(-alpha)) / (math.exp(alpha / numMol) - 1))
73 numActives = 0
74 sum_exp = 0
75
76
77 for i in range(numMol):
78 active = scores[i][col]
79 if active:
80 numActives += 1
81 sum_exp += math.exp(-(alpha * (i + 1)) / numMol)
82
83 if numActives > 0:
84 RIE = sum_exp / (numActives * denom)
85 else:
86 RIE = 0.0
87
88 return RIE, numActives
89
90
92 """ RIE original definded here:
93 Sheridan, R.P., Singh, S.B., Fluder, E.M. & Kearsley, S.K.
94 Protocols for Bridging the Peptide to Nonpeptide Gap in Topological Similarity Searches.
95 J. Chem. Inf. Comp. Sci. 41, 1395-1406 (2001).
96 """
97 RIE, _ = _RIEHelper(scores, col, alpha)
98 return RIE
99
100
102 """ BEDROC original defined here:
103 Truchon, J. & Bayly, C.I.
104 Evaluating Virtual Screening Methods: Good and Bad Metric for the "Early Recognition"
105 Problem. J. Chem. Inf. Model. 47, 488-508 (2007).
106 """
107
108 RIE, numActives = _RIEHelper(scores, col, alpha)
109
110 if numActives > 0:
111 numMol = len(scores)
112 ratio = 1.0 * numActives / numMol
113 RIEmax = (1 - math.exp(-alpha * ratio)) / (ratio * (1 - math.exp(-alpha)))
114 RIEmin = (1 - math.exp(alpha * ratio)) / (ratio * (1 - math.exp(alpha)))
115
116 if RIEmax != RIEmin:
117 BEDROC = (RIE - RIEmin) / (RIEmax - RIEmin)
118 else:
119 BEDROC = 1.0
120 else:
121 BEDROC = 0.0
122
123 return BEDROC
124
125
127 """ Determines the enrichment factor for a set of fractions """
128 numMol = len(scores)
129 if numMol == 0:
130 raise ValueError('score list is empty')
131 if len(fractions) == 0:
132 raise ValueError('fraction list is empty')
133 for i in fractions:
134 if i > 1 or i < 0:
135 raise ValueError('fractions must be between [0,1]')
136
137 numPerFrac = [math.ceil(numMol * f) for f in fractions]
138 numPerFrac.append(numMol)
139 numActives = 0
140 enrich = []
141
142
143 for i in range(numMol):
144 if i > (numPerFrac[0] - 1) and i > 0:
145 enrich.append(1.0 * numActives * numMol / i)
146 numPerFrac.pop(0)
147 active = scores[i][col]
148 if active:
149 numActives += 1
150
151 if numActives > 0:
152 enrich = [e / numActives for e in enrich]
153 else:
154 enrich = [0.0] * len(fractions)
155 return enrich
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186