44 #include "EST_Wagon.h"
45 #include "EST_cutils.h"
46 #include "EST_multistats.h"
47 #include "EST_Token.h"
48 #include "EST_cmd_line.h"
50 static int wagon_test_main(
int argc,
char **argv);
51 static LISP find_feature_value(
const char *feature,
52 LISP vector, LISP description);
53 static LISP wagon_vector_predict(LISP tree, LISP vector, LISP description);
56 LISP tree, LISP description,
int all_info);
58 LISP tree, LISP description);
60 LISP tree, LISP description);
63 int main(
int argc,
char **argv)
66 wagon_test_main(argc,argv);
72 static int wagon_test_main(
int argc,
char **argv)
77 LISP description,tree=NIL;;
84 "Summary: program to test CART models on data\n"+
85 "-desc <ifile> Field description file\n"+
86 "-data <ifile> Datafile, one vector per line\n"+
87 "-tree <ifile> File containing CART tree\n"+
89 " track for vertex indices\n"+
90 "-predict Predict for each vector returning full vector\n"+
91 "-predict_val Predict for each vector returning just value\n"+
92 "-predictee <string>\n"+
93 " name of field to predict (default is first field)\n"+
94 "-heap <int> {210000}\n"+
95 " Set size of Lisp heap, should not normally need\n"+
96 " to be changed from its default\n"+
97 "-o <ofile> File to save output in\n",
100 siod_init(al.
ival(
"-heap"));
104 gc_protect(&description);
105 description = car(vload(al.
val(
"-desc"),1));
109 cerr << argv[0] <<
": no description file specified" << endl;
116 tree = car(vload(al.
val(
"-tree"),1));
119 cerr << argv[0] <<
": no tree found in \"" << al.
val(
"-tree")
126 cerr << argv[0] <<
": no tree file specified" << endl;
132 if (data.
open(al.
val(
"-data")) != 0)
134 cerr << argv[0] <<
": can't open data file \"" <<
135 al.
val(
"-data") <<
"\" for input." << endl;
141 cerr << argv[0] <<
": no data file specified" << endl;
147 wgn_VertexTrack.
load(al.
val(
"-track"));
152 if ((wgn_output = fopen(al.
val(
"-o"),
"w")) == NULL)
154 cerr << argv[0] <<
": can't open output file \"" <<
155 al.
val(
"-o") <<
"\"" << endl;
165 wgn_predictee_name = al.
val(
"-predictee");
166 for (l=description,i=0; l != NIL; l=cdr(l),i++)
167 if (streq(wgn_predictee_name,get_c_string(car(car(l)))))
174 cerr << argv[0] <<
": predictee \"" << wgn_predictee <<
175 "\" not in description\n";
178 const char *predict_type =
179 get_c_string(car(cdr(siod_nth(wgn_predictee,description))));
182 simple_predict(data,wgn_output,tree,description,FALSE);
183 else if (al.
present(
"-predict_val"))
184 simple_predict(data,wgn_output,tree,description,TRUE);
185 else if (streq(predict_type,
"float") ||
186 streq(predict_type,
"int"))
187 test_tree_float(data,wgn_output,tree,description);
189 else if (streq(predict_type,
"vector"))
190 test_tree_vector(data,wgn_output,tree,description);
193 test_tree_class(data,wgn_output,tree,description);
195 if (wgn_output != stdout)
209 for (d=description; d != NIL; d=cdr(d))
213 if ((d != description) && (t.whitespace().
contains(
"\n")))
215 cerr <<
"wagon_test: unexpected newline within vector " <<
216 t.
row() <<
" wrong number of features" << endl;
219 if (streq(get_c_string(car(cdr(car(d)))),
"float") ||
220 streq(get_c_string(car(cdr(car(d)))),
"int"))
221 v = cons(flocons(atof(t.string())),v);
222 else if ((streq(get_c_string(car(cdr(car(d)))),
"_other_")) &&
223 (siod_member_str(t.string(),cdr(car(d))) == NIL))
224 v = cons(strintern(
"_other_"),v);
226 v = cons(strintern(t.string()),v);
233 LISP tree, LISP description,
int all_info)
238 for (vector=get_data_vector(data,description);
239 vector != NIL; vector=get_data_vector(data,description))
241 predict = wagon_vector_predict(tree,vector,description);
243 val = siod_sprint(car(reverse(predict)));
245 val = siod_sprint(predict);
246 fprintf(output,
"%s\n",(
const char *)val);
251 LISP tree, LISP description)
254 float predict_val,real_val;
259 for (vector=get_data_vector(data,description);
260 vector != NIL; vector=get_data_vector(data,description))
262 predict = wagon_vector_predict(tree,vector,description);
263 predict_val = get_c_float(car(reverse(predict)));
264 real_val = get_c_float(siod_nth(wgn_predictee,vector));
267 error = predict_val-real_val;
270 xx += predict_val*predict_val;
271 yy += real_val*real_val;
272 xy += predict_val*real_val;
279 fprintf(output,
";; RMSE %1.4f Correlation is %1.4f Mean (abs) Error %1.4f (%1.4f)\n",
287 LISP tree, LISP description)
293 LISP vector,w,predict;
297 for (vector=get_data_vector(data,description);
298 vector != NIL; vector=get_data_vector(data,description))
300 predict = wagon_vector_predict(tree,vector,description);
301 predict_class = get_c_string(car(reverse(predict)));
302 real_class = get_c_string(siod_nth(wgn_predictee,vector));
303 prob = get_c_float(car(cdr(siod_assoc_str(real_class,
310 pairs.
add_item(real_class,predict_class,1);
312 for (w=cdr(siod_nth(wgn_predictee,description)); w != NIL; w = cdr(w))
313 lex.
append(get_c_string(car(w)));
316 print_confusion(m,pairs,lex);
317 fprintf(stdout,
";; entropy %g perplexity %g\n",
318 (-1*(H/Q)),pow(2.0,(-1*(H/Q))));
322 LISP tree, LISP description)
330 LISP vector,w,predict;
334 for (vector=get_data_vector(data,description);
335 vector != NIL; vector=get_data_vector(data,description))
337 predict = wagon_vector_predict(tree,vector,description);
338 predict_class = get_c_string(car(reverse(predict)));
339 real_class = get_c_string(siod_nth(wgn_predictee,vector));
340 prob = get_c_float(car(cdr(siod_assoc_str(real_class,
347 pairs.
add_item(real_class,predict_class,1);
349 for (w=cdr(siod_nth(wgn_predictee,description)); w != NIL; w = cdr(w))
350 lex.
append(get_c_string(car(w)));
353 print_confusion(m,pairs,lex);
354 fprintf(stdout,
";; entropy %g perplexity %g\n",
355 (-1*(H/Q)),pow(2.0,(-1*(H/Q))));
358 static LISP wagon_vector_predict(LISP tree, LISP vector, LISP description)
362 if (cdr(tree) == NIL)
365 LISP value = find_feature_value(wgn_ques_feature(car(tree)),
366 vector, description);
368 if (wagon_ask_question(car(tree),value))
370 return wagon_vector_predict(car(cdr(tree)),vector,description);
373 return wagon_vector_predict(car(cdr(cdr(tree))),vector,description);
376 static LISP find_feature_value(
const char *feature,
377 LISP vector, LISP description)
381 for (v=vector,d=description; v != NIL; v=cdr(v),d=cdr(d))
382 if (streq(feature,get_c_string(car(car(d)))))
385 cerr <<
"wagon_test: can't find feature \"" << feature <<
386 "\" in description" << endl;
int row(void) const
Line number in original EST_TokenStream.
EST_TokenStream & get(EST_Token &t)
get next token in stream
double stddev(void) const
standard deviation of currently cummulated values
int ival(const EST_String &rkey, int m=1) const
double mean(void) const
mean of currently cummulated values
void close(void)
Close stream.
int open(const EST_String &filename)
open a EST_TokenStream for a file.
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
const int present(const K &rkey) const
Returns true if key is present.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
void append(const T &item)
add item onto end of list
int contains(const char *s, int pos=-1) const
Does it contain this substring?