DeepNetworkTraining.cpp
Go to the documentation of this file.
1 //noisy AutoencoderModel model and deep network
2 #include <shark/Models/FFNet.h>// neural network for supervised training
3 #include <shark/Models/Autoencoder.h>// the autoencoder to train unsupervised
4 #include <shark/Models/ImpulseNoiseModel.h>// model adding noise to the inputs
5 #include <shark/Models/ConcatenatedModel.h>// to concatenate Autoencoder with noise adding model
6 
7 //training the model
8 #include <shark/ObjectiveFunctions/ErrorFunction.h>//the error function performing the regularisation of the hidden neurons
9 #include <shark/ObjectiveFunctions/Loss/SquaredLoss.h> // squared loss used for unsupervised pre-training
10 #include <shark/ObjectiveFunctions/Loss/CrossEntropy.h> // loss used for supervised training
11 #include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h> // loss used for evaluation of performance
12 #include <shark/ObjectiveFunctions/Regularizer.h> //L1 and L2 regularisation
13 #include <shark/Algorithms/GradientDescent/SteepestDescent.h> //optimizer: simple gradient descent.
14 #include <shark/Algorithms/GradientDescent/Rprop.h> //optimizer for autoencoders
15 
16 using namespace std;
17 using namespace shark;
18 
19 //our artificial problem
21  std::vector<RealVector> data(320,RealVector(16));
22  std::vector<unsigned int> label(320);
23  RealVector line(4);
24  for(std::size_t k = 0; k != 10; ++k){
25  for(size_t x=0; x != 16; x++) {
26  for(size_t j=0; j != 4; j++) {
27  bool val = (x & (1<<j)) > 0;
28  line(j) = val;
29  if(Rng::coinToss(0.3))
30  line(j) = !val;
31  }
32 
33  for(int i=0; i != 4; i++) {
34  subrange(data[x+k*16],i*4 ,i*4 + 4) = line;
35  }
36  for(int i=0; i != 4; i++) {
37  for(int l=0; l<4; l++) {
38  data[x+k*16+160](l*4 + i) = line(l);
39  }
40  }
41  label[x+k*16] = 1;
42  label[x+k*16+160] = 0;
43  }
44  }
45  return createLabeledDataFromRange(data,label);
46 }
47 
48 //training of an auto encoder with one hidden layer
49 template<class AutoencoderModel>
51  UnlabeledData<RealVector> const& data,//the data to train with
52  std::size_t numHidden,//number of features in the AutoencoderModel
53  double regularisation,//strength of the regularisation
54  double noiseStrength, // strength of the added noise
55  std::size_t iterations //number of iterations to optimize
56 ){
57  //create the model
58  std::size_t inputs = dataDimension(data);
59  AutoencoderModel baseModel;
60  baseModel.setStructure(inputs, numHidden);
61  initRandomUniform(baseModel,-0.1*std::sqrt(1.0/inputs),0.1*std::sqrt(1.0/inputs));
62  ImpulseNoiseModel noise(inputs,noiseStrength,0.0);//set an input pixel with probability p to 0
63  ConcatenatedModel<RealVector,RealVector> model = noise>> baseModel;
64  //create the objective function
65  LabeledData<RealVector,RealVector> trainSet(data,data);//labels identical to inputs
67  ErrorFunction error(trainSet, &model, &loss);
68  TwoNormRegularizer regularizer(error.numberOfVariables());
69  error.setRegularizer(regularisation,&regularizer);
70  //set up optimizer
71  IRpropPlusFull optimizer;
72  optimizer.init(error);
73  std::cout<<"Optimizing model: "+model.name()<<std::endl;
74  for(std::size_t i = 0; i != iterations; ++i){
75  optimizer.step(error);
76  std::cout<<i<<" "<<optimizer.solution().value<<std::endl;
77  }
78  model.setParameterVector(optimizer.solution().point);
79  return baseModel;
80 }
81 
83 typedef FFNet<RectifierNeuron,LinearNeuron> Network;//final supervised trained structure
84 
85 //unsupervised pre training of a network with two hidden layers
87  UnlabeledData<RealVector> const& data,
88  std::size_t numHidden1,std::size_t numHidden2, std::size_t numOutputs,
89  double regularisation, double noiseStrength, std::size_t iterations
90 ){
91  //train the first hidden layer
92  std::cout<<"training first layer"<<std::endl;
93  AutoencoderModel layer = trainAutoencoderModel<AutoencoderModel>(
94  data,numHidden1,
95  regularisation, noiseStrength,
96  iterations
97  );
98  //compute the mapping onto the features of the first hidden layer
99  UnlabeledData<RealVector> intermediateData = layer.evalLayer(0,data);
100 
101  //train the next layer
102  std::cout<<"training second layer"<<std::endl;
103  AutoencoderModel layer2 = trainAutoencoderModel<AutoencoderModel>(
104  intermediateData,numHidden2,
105  regularisation, noiseStrength,
106  iterations
107  );
108  //create the final network
109  Network network;
110  network.setStructure(dataDimension(data),numHidden1,numHidden2, numOutputs);
111  initRandomNormal(network,0.1);
112  network.setLayer(0,layer.encoderMatrix(),layer.hiddenBias());
113  network.setLayer(1,layer2.encoderMatrix(),layer2.hiddenBias());
114 
115  return network;
116 }
117 
118 int main()
119 {
120  //model parameters
121  std::size_t numHidden1 = 8;
122  std::size_t numHidden2 = 8;
123  //unsupervised hyper parameters
124  double unsupRegularisation = 0.001;
125  double noiseStrength = 0.3;
126  std::size_t unsupIterations = 100;
127  //supervised hyper parameters
128  double regularisation = 0.0001;
129  std::size_t iterations = 200;
130 
131  //load data and split into training and test
133  data.shuffle();
134  LabeledData<RealVector,unsigned int> test = splitAtElement(data,static_cast<std::size_t>(0.5*data.numberOfElements()));
135 
136  //unsupervised pre training
138  data.inputs(),numHidden1, numHidden2,numberOfClasses(data),
139  unsupRegularisation, noiseStrength, unsupIterations
140  );
141 
142  //create the supervised problem. Cross Entropy loss with one norm regularisation
143  CrossEntropy loss;
144  ErrorFunction error(data, &network, &loss);
145  OneNormRegularizer regularizer(error.numberOfVariables());
146  error.setRegularizer(regularisation,&regularizer);
147 
148  //optimize the model
149  std::cout<<"training supervised model"<<std::endl;
150  IRpropPlusFull optimizer;
151  optimizer.init(error);
152  for(std::size_t i = 0; i != iterations; ++i){
153  optimizer.step(error);
154  std::cout<<i<<" "<<optimizer.solution().value<<std::endl;
155  }
156  network.setParameterVector(optimizer.solution().point);
157 
158  //evaluation
160  Data<RealVector> predictionTrain = network(data.inputs());
161  cout << "classification error,train: " << loss01.eval(data.labels(), predictionTrain) << endl;
162 
163  Data<RealVector> prediction = network(test.inputs());
164  cout << "classification error,test: " << loss01.eval(test.labels(), prediction) << endl;
165 
166 }