1
2
3
4 """ functionality for generating an image showing the results of a composite model
5 voting on a data set
6
7 Uses *Numeric* and *PIL*
8
9 """
10 from __future__ import print_function
11
12 from PIL import Image, ImageDraw
13 import numpy
14
15
17 """ collects the votes from _composite_ for the examples in _data_
18
19 **Arguments**
20
21 - composite: a composite model
22
23 - data: a list of examples to run through _composite_
24
25 - badOnly: if set only bad (misclassified) examples will be kept
26
27 **Returns**
28
29 a 4-tuple containing:
30
31 1) the expanded list of vote details (see below)
32
33 2) the list of predicted results
34
35 3) the list of true results
36
37 4) the number of miscounted examples
38
39
40 **Notes**
41
42 pp - the expanded list of vote details consists of:
43
44 '[ vote1, vote2, ... voteN, 0, res, trueRes]'
45
46 where _res_ is the predicted results and _trueRes_ is the actual result.
47 The extra zero is included to allow a line to be drawn between the votes
48 and the results.
49
50 """
51 res = []
52 values = []
53 trueValues = []
54 misCount = 0
55 for pt in data:
56 val, _ = composite.ClassifyExample(pt)
57 predict = pt[-1]
58 if not badOnly or val != predict:
59 values.append(val)
60 trueValues.append(predict)
61 if val != predict:
62 misCount = misCount + 1
63 res.append(composite.GetVoteDetails() + [0, val, pt[-1]])
64 return res, values, trueValues, misCount
65
66
67 -def BuildVoteImage(nModels, data, values, trueValues=[], sortTrueVals=0, xScale=10, yScale=2,
68 addLine=1):
69 """ constructs the actual image
70
71 **Arguments**
72
73 - nModels: the number of models in the composite
74
75 - data: the results of voting
76
77 - values: predicted values for each example
78
79 - trueValues: true values for each example
80
81 - sortTrueVals: if nonzero the votes will be sorted so
82 that the _trueValues_ are in order, otherwise the sort
83 is by _values_
84
85 - xScale: number of pixels per vote in the x direction
86
87 - yScale: number of pixels per example in the y direction
88
89 - addLine: if nonzero, a purple line is drawn separating
90 the votes from the examples
91
92 **Returns**
93
94 a PIL image
95
96 """
97 nData = len(data)
98 data = numpy.array(data, numpy.integer)
99 if sortTrueVals and trueValues != []:
100 order = numpy.argsort(trueValues)
101 else:
102 order = numpy.argsort(values)
103 data = [data[x] for x in order]
104 maxVal = max(numpy.ravel(data))
105 data = data * 255 / maxVal
106 datab = data.astype('B')
107 img = getattr(Image, 'frombytes', Image.fromstring)('L', (nModels, nData),
108 getattr(datab, 'tobytes', datab.tostring)())
109
110 if addLine:
111 img = img.convert('RGB')
112 canvas = ImageDraw.Draw(img)
113 if trueValues != []:
114 canvas.line([(nModels - 3, 0), (nModels - 3, nData)], fill=(128, 0, 128))
115 else:
116 canvas.line([(nModels - 2, 0), (nModels - 2, nData)], fill=(128, 0, 128))
117 img = img.resize((nModels * xScale, nData * yScale))
118 return img
119
120
121 -def VoteAndBuildImage(composite, data, badOnly=0, sortTrueVals=0, xScale=10, yScale=2, addLine=1):
122 """ collects votes on the examples and constructs an image
123
124 **Arguments**
125
126 - composte: a composite model
127
128 - data: the examples to be voted upon
129
130 - badOnly: if nonzero only the incorrect votes will be shown
131
132 - sortTrueVals: if nonzero the votes will be sorted so
133 that the _trueValues_ are in order, otherwise the sort
134 is by _values_
135
136 - xScale: number of pixels per vote in the x direction
137
138 - yScale: number of pixels per example in the y direction
139
140 - addLine: if nonzero, a purple line is drawn separating
141 the votes from the examples
142
143 **Returns**
144
145 a PIL image
146
147 """
148 nModels = len(composite) + 3
149 print('nModels:', nModels - 3)
150
151 res, values, trueValues, misCount = CollectVotes(composite, data, badOnly)
152 print('%d examples were misclassified' % misCount)
153 img = BuildVoteImage(nModels, res, values, trueValues, sortTrueVals, xScale, yScale, addLine)
154 return img
155
156
158 """ provides a list of arguments for when this is used from the command line
159
160 """
161 import sys
162
163 print('Usage: VoteImg.py [optional arguments] <modelfile.pkl> <datafile.qdat>')
164 print('Optional Arguments:')
165 print('\t-o outfilename: the name of the output image file.')
166 print('\t The extension determines the type of image saved.')
167 print('\t-b: only include bad (misclassified) examples')
168 print('\t-t: sort the results by the true (input) classification')
169 print('\t-x scale: scale the image along the x axis (default: 10)')
170 print('\t-y scale: scale the image along the y axis (default: 2)')
171 print('\t-d databasename: instead of using a qdat file, pull the data from')
172 print('\t a database. In this case the filename argument')
173 print('\t is used to indicate the name of the table in the database.')
174
175 sys.exit(-1)
176
177
178 if __name__ == '__main__':
179 import sys
180 import getopt
181 from rdkit.six.moves import cPickle
182 from rdkit.ML.Data import DataUtils
183
184 args, extra = getopt.getopt(sys.argv[1:], 'o:bthx:y:d:')
185 if len(extra) < 2:
186 Usage()
187 badOnly = 0
188 sortTrueVals = 0
189 xScale = 10
190 yScale = 2
191 dbName = ''
192 outFileName = 'foo.png'
193 for arg, val in args:
194 if arg == '-b':
195 badOnly = 1
196 elif arg == '-t':
197 sortTrueVals = 1
198 elif arg == '-o':
199 outFileName = val
200 elif arg == '-x':
201 xScale = int(val)
202 elif arg == '-y':
203 yScale = int(val)
204 elif arg == '-d':
205 dbName = val
206 elif arg == '-h':
207 Usage()
208 else:
209 Usage()
210 modelFile = open(extra[0], 'rb')
211 model = cPickle.load(modelFile)
212
213 fName = extra[1]
214 if dbName == '':
215 data = DataUtils.BuildQuantDataSet(fName)
216 else:
217 data = DataUtils.DBToQuantData(dbName, fName)
218
219 dataSet = data.GetNamedData()
220
221 img = VoteAndBuildImage(model, dataSet, badOnly=badOnly, sortTrueVals=sortTrueVals, xScale=xScale,
222 yScale=yScale)
223 img.save(outFileName)
224