CSvmMaxLikelihoodMS.cpp
Go to the documentation of this file.
1 #include <shark/Data/Dataset.h>
12 
13 using namespace std;
14 using namespace shark;
15 
16 
17 
18 // define the basic dimensionality of the problem
19 unsigned int useful_dim = 5;
20 unsigned int noise_dim = 5;
21 unsigned int total_dim = useful_dim + noise_dim;
22 
23 RealVector run_one_trial( bool verbose) {
24 
25  // set up the classification problem from a DataDistribution
26  PamiToy problem( useful_dim, noise_dim );
27 
28  // construct training and test sets from the problem distribution
29  unsigned int train_size = 500;
30  unsigned int test_size = 5000;
31  ClassificationDataset train = problem.generateDataset( train_size );
32  ClassificationDataset test = problem.generateDataset( test_size );
33 
34  // normalize data as usual
35  Normalizer<> normalizer;
36  NormalizeComponentsUnitVariance<> normalizationTrainer(false);
37  normalizationTrainer.train( normalizer, train.inputs() );
38  train = transformInputs( train, normalizer );
39  test = transformInputs( test, normalizer );
40 
41  // set up the ArdKernel
42  DenseARDKernel kernel( total_dim, 0.1 ); //for now with arbitrary value for gamma (gets properly initialized later)
43 
44  // set up partitions for cross-validation
45  unsigned int num_folds = 5;
46  CVFolds<ClassificationDataset> cv_folds = createCVIID( train, num_folds );
47 
48  // set up the learning machine
49  bool log_enc_c = true; //use log encoding for the regularization parameter C
50  QpStoppingCondition stop(1e-12); //use a very conservative stopping criterion for the individual SVM runs
51  SvmLogisticInterpretation<> mlms( cv_folds, &kernel, log_enc_c, &stop ); //the main class for this tutorial
52  //SvmLogisticInterpretation<> mlms( cv_folds, &kernel, log_enc_c ); //also possible without stopping criterion
53 
54  // set up a starting point for the optimization process
55  RealVector start( total_dim+1 );
56  if ( log_enc_c ) start( total_dim ) = 0.0; else start( total_dim ) = 1.0; //start at C = 1.0
57  for ( unsigned int k=0; k<total_dim; k++ )
58  start(k) = 0.5 / total_dim;
59 
60  // for illustration purposes, we also evalute the model selection criterion a single time at the starting point
61  double start_value = mlms.eval( start );
62 
63  if ( verbose ) {
64  std::cout << "Value of model selection criterion at starting point: " << start_value << std::endl << std::endl;
65  std::cout << " -------------------------------------------------------------------------------- " << std::endl;
66  std::cout << " ----------- Beginning gradient-based optimization of MLMS criterion ------------ " << std::endl;
67  std::cout << " -------------------------------------------------------------------------------- " << std::endl << std::endl;
68  }
69 
70  // set up the optimizer
71  IRpropPlus rprop;
72  double stepsize = 0.1;
73  double stop_delta = 1e-3;
74  rprop.init( mlms, start, stepsize );
75  unsigned int its = 50;
76 
77  // start the optimization loop
78  for (unsigned int i=0; i<its; i++) {
79  rprop.step( mlms );
80  if ( verbose )
81  std::cout << "iteration " << i << ": current NCLL = " << rprop.solution().value << " at parameter: " << rprop.solution().point << std::endl;
82  if ( rprop.maxDelta() < stop_delta ) {
83  if ( verbose ) std::cout << " Rprop quit pecause of small progress " << std::endl;
84  break;
85  }
86  }
87 
88  if ( verbose ) {
89  std::cout << std::endl;
90  std::cout << " -------------------------------------------------------------------------------- " << std::endl;
91  std::cout << " ----------- Done with gradient-based optimization of MLMS criterion ------------ " << std::endl;
92  std::cout << " -------------------------------------------------------------------------------- " << std::endl << std::endl;
93  }
94  if ( verbose ) std::cout << std::endl << std::endl << " EVALUATION of hyperparameters found:" << std::endl << std::endl << std::endl;
95 
96  double C_reg; //will hold regularization parameter
97  double test_error_v1, train_error_v1; //will hold errors determined via method 1
98  double test_error_v2, train_error_v2; //will hold errors determined via method 2
99 
100  // BEGIN POSSIBILITY ONE OF HYPERPARAMETER COPY
101  if ( verbose ) std::cout << std::endl << " Possibility 1: copy kernel parameters via eval() and C by hand..." << std::endl << std::endl;
102 
103  // copy final parameters, variant one
104  double end_value = mlms.eval( rprop.solution().point ); //this at the same time copies the most recent parameters from rprop to the kernel.
105  C_reg = ( log_enc_c ? exp( rprop.solution().point(total_dim) ) : rprop.solution().point(total_dim) ); //ATTENTION: mind the encoding
106 
107  if ( verbose ) {
108  std::cout << " Value of model selection criterion at final point: " << end_value << std::endl;
109  std::cout << " Done optimizing the SVM hyperparameters. The final parameters (true/unencoded) are:" << std::endl << std::endl;
110  std::cout << " C = " << C_reg << std::endl;
111  for ( unsigned int i=0; i<total_dim; i++ )
112  std::cout << " gamma(" << i << ") = " << kernel.parameterVector()(i)*kernel.parameterVector()(i) << std::endl;
113  std::cout << std::endl << " (as also given by kernel.gammaVector() : " << kernel.gammaVector() << " ) " << std::endl;
114  }
115 
116  // construct and train the final learner
118  CSvmTrainer<RealVector> trainer_v1( &kernel, C_reg, true, log_enc_c ); //encoding does not really matter in this case b/c it does not affect the ctor
119  if ( verbose ) {
120  std::cout << std::endl << std::endl << " Used mlms.eval(...) to copy kernel.parameterVector() " << kernel.parameterVector() << std::endl;
121  std::cout << " into trainer_v1.parameterVector() " << trainer_v1.parameterVector() << std::endl;
122  std::cout << " , where C (the last parameter) was set manually to " << trainer_v1.C() << std::endl << std::endl << std::endl;
123  }
124  trainer_v1.train( svm_v1, train ); //the kernel has the right parameters, and we copied C, so we are good to go
125 
126  // evaluate the final trained classifier on training and test set
128  Data<unsigned int> output_v1; //real-valued output
129  output_v1 = svm_v1( train.inputs() );
130  train_error_v1 = loss_v1.eval( train.labels(), output_v1 );
131  output_v1 = svm_v1( test.inputs() );
132  test_error_v1 = loss_v1.eval( test.labels(), output_v1 );
133  if ( verbose ) {
134  std::cout << " training error via possibility 1: " << train_error_v1 << std::endl;
135  std::cout << " test error via possibility 1: " << test_error_v1 << std::endl << std::endl << std::endl;
136  }
137  // END POSSIBILITY ONE OF HYPERPARAMETER COPY
138 
139  // BEGIN POSSIBILITY TWO OF HYPERPARAMETER COPY
140  if ( verbose ) std::cout << std::endl << " Possibility 2: copy best parameters via solution().point()..." << std::endl << std::endl;
141 
143  CSvmTrainer<RealVector> trainer_v2( &kernel, 0.1, true, log_enc_c ); //ATTENTION: must be constructed with same log-encoding preference
144  trainer_v2.setParameterVector( rprop.solution().point ); //copy best hyperparameters to svm trainer
145 
146  if ( verbose ) {
147  std::cout << " Copied rprop.solution().point = " << rprop.solution().point << std::endl;
148  std::cout << " into trainer_v2.parameterVector(), now = " << trainer_v2.parameterVector() << std::endl << std::endl << std::endl;
149  }
150 
151  trainer_v2.train( svm_v2, train );
152 
153  // evaluate the final trained classifier on training and test set
155  Data<unsigned int> output_v2; //real-valued output
156  output_v2 = svm_v2( train.inputs() );
157  train_error_v2 = loss_v2.eval( train.labels(), output_v2 );
158  output_v2 = svm_v2( test.inputs() );
159  test_error_v2 = loss_v2.eval( test.labels(), output_v2 );
160  if ( verbose ) {
161  std::cout << " training error via possibility 2: " << train_error_v2 << std::endl;
162  std::cout << " test error via possibility 2: " << test_error_v2 << std::endl << std::endl << std::endl;
163  std::cout << std::endl << "That's all folks - we are done!" << std::endl;
164  }
165  // END POSSIBILITY TWO OF HYPERPARAMETER COPY
166 
167  // copy the best parameters, as well as performance values into averaging vector:
168  RealVector final_params(total_dim+3);
169  final_params(total_dim) = C_reg;
170  for ( unsigned int i=0; i<total_dim; i++ )
171  final_params(i) = rprop.solution().point(i)*rprop.solution().point(i);
172  final_params(total_dim+1) = train_error_v1;
173  final_params(total_dim+2) = test_error_v1;
174  return final_params;
175 
176 }
177 
178 
179 int main() {
180 
181  // run one trial with output
182  run_one_trial( true);
183  std::cout << "\nNOW REPEAT WITH 100 TRIALS: now we do the exact same thing multiple times in a row, and note the average kernel weights. Please wait." << std::endl << std::endl;
184 
185  // run several trials without output, and average the results
186  unsigned int num_trials = 100;
187  Data<RealVector> many_results(num_trials,RealVector(total_dim+3));//each row is one run of resulting hyperparameters
188  for ( unsigned int i=0; i<num_trials; i++ ) {
189  many_results.element(i) = run_one_trial(false);
190  std::cout << "." << std::flush;
191  }
192  std::cout << "\n" << std::endl;
193 
194  RealVector overall_mean, overall_variance;
195  meanvar( many_results, overall_mean, overall_variance );
196  for ( unsigned int i=0; i<total_dim+1; i++ ) {
197  std::cout << "avg-param(" << i << ") = " << overall_mean(i) << " +- "<< overall_variance(i) << std::endl;
198  }
199  std::cout << std::endl << "avg-error-train = " << overall_mean(total_dim+1) << " +- "<< overall_variance(total_dim+1) << std::endl;
200  std::cout << "avg-error-test = " << overall_mean(total_dim+2) << " +- "<< overall_variance(total_dim+2) << std::endl;
201 
202 }