Autoencoder.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the autoencoder
3  *
4  * \author O. Krause
5  * \date 2010-2014
6  *
7  *
8  * \par Copyright 1995-2015 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://image.diku.dk/shark/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef SHARK_MODELS_AUTOENCODER_H
29 #define SHARK_MODELS_AUTOENCODER_H
30 
32 #include <shark/Models/Neurons.h>
33 #include <boost/serialization/vector.hpp>
34 
35 namespace shark{
36 
37 /// \brief implements the autoencoder
38 ///
39 /// The formula is
40 /// \f[ f(x) = \sigma_2(W^T\sigma_1(Wx+b_1)+b_2)\f]
41 /// Where \f$ W, W_2, b_1 \f$ and \f$b_2 \f$ are the weights and
42 /// \f$\sigma_1\f$ and \f$ \sigma_2\f$ are the activation functions for hidden and output units.
43 ///
44 /// see TiedAutoencoder for the tied weights version where \f$ W_2=W_1^T \f$.
45 template<class HiddenNeuron,class OutputNeuron>
46 class Autoencoder :public AbstractModel<RealVector,RealVector>
47 {
48  struct InternalState: public State{
49  RealMatrix hiddenResponses;
50  RealMatrix outputResponses;
51  };
52 
53 
54 public:
58  }
59 
60  //! \brief From INameable: return the class name.
61  std::string name() const{
62  return "Autoencoder";
63  }
64 
65  //! \brief Number of input neurons.
66  std::size_t inputSize()const{
67  return outputSize();
68  }
69  //! \brief Number of output neurons.
70  std::size_t outputSize()const{
71  return outputBias().size();
72  }
73 
74  //! \brief Total number of hidden neurons.
75  std::size_t numberOfHiddenNeurons()const{
76  return encoderMatrix().size1();
77  }
78 
79  /// \brief Returns the hidden bias weight vector.
80  RealVector const& hiddenBias()const{
81  return m_hiddenBias;
82  }
83 
84  /// \brief Returns the hidden bias weight vector.
85  RealVector& hiddenBias(){
86  return m_hiddenBias;
87  }
88 
89  /// \brief Returns the output bias weight vector.
90  RealVector const& outputBias()const{
91  return m_outputBias;
92  }
93  /// \brief Returns the output bias weight vector.
94  RealVector& outputBias(){
95  return m_outputBias;
96  }
97 
98  /// \brief Weight matrix for the direction input->hidden.
99  RealMatrix const& encoderMatrix()const{
100  return m_encoderMatrix;
101  }
102  /// \brief Weight matrix for the direction input->hidden.
103  RealMatrix& encoderMatrix(){
104  return m_encoderMatrix;
105  }
106  /// \brief Weight matrix for the direction hidden->output
107  ///
108  RealMatrix const& decoderMatrix()const{
109  return m_decoderMatrix;
110  }
111  /// \brief Weight matrix for the direction hidden->output
112  RealMatrix& decoderMatrix(){
113  return m_decoderMatrix;
114  }
115 
116  //! \brief Returns the total number of parameters of the network.
117  std::size_t numberOfParameters()const{
119  }
120 
121  //! returns the vector of used parameters inside the weight matrix
122  RealVector parameterVector() const{
123  RealVector parameters(numberOfParameters());
124  init(parameters) << toVector(m_encoderMatrix),toVector(m_decoderMatrix),m_hiddenBias,m_outputBias;
125  return parameters;
126  }
127  //! uses the values inside the parametervector to set the used values inside the weight matrix
128  void setParameterVector(RealVector const& newParameters){
129  SIZE_CHECK(newParameters.size() == numberOfParameters());
130  init(newParameters) >> toVector(m_encoderMatrix),toVector(m_decoderMatrix),m_hiddenBias,m_outputBias;
131  }
132 
133  //! \brief Returns the output of all neurons after the last call of eval
134  //!
135  //! \param state last result of eval
136  //! \return Output value of the neurons.
137  RealMatrix const& hiddenResponses(State const& state)const{
138  InternalState const& s = state.toState<InternalState>();
139  return s.hiddenResponses;
140  }
141 
142  /// \brief Returns the activation function of the hidden units.
143  HiddenNeuron const& hiddenActivationFunction()const{
144  return m_hiddenNeuron;
145  }
146  /// \brief Returns the activation function of the output units.
147  OutputNeuron const& outputActivationFunction()const{
148  return m_outputNeuron;
149  }
150 
151  /// \brief Returns the activation function of the hidden units.
152  HiddenNeuron& hiddenActivationFunction(){
153  return m_hiddenNeuron;
154  }
155  /// \brief Returns the activation function of the output units.
156  OutputNeuron& outputActivationFunction(){
157  return m_outputNeuron;
158  }
159 
160  boost::shared_ptr<State> createState()const{
161  return boost::shared_ptr<State>(new InternalState());
162  }
163 
164  void evalLayer(std::size_t layer,RealMatrix const& patterns,RealMatrix& outputs)const{
165  SIZE_CHECK(layer < 2);
166  std::size_t numPatterns = patterns.size1();
167 
168  if(layer == 0){//input->hidden
169  SIZE_CHECK(patterns.size2() == encoderMatrix().size2());
170  std::size_t numOutputs = encoderMatrix().size1();
171  outputs.resize(numPatterns,numOutputs);
172  outputs.clear();
173  noalias(outputs) = prod(patterns,trans(encoderMatrix())) + repeat(hiddenBias(),numPatterns);
174  noalias(outputs) = m_hiddenNeuron(outputs);
175  }
176  else{//hidden->output
177  SIZE_CHECK(patterns.size2() == decoderMatrix().size2());
178  std::size_t numOutputs = decoderMatrix().size1();
179  outputs.resize(numPatterns,numOutputs);
180  outputs.clear();
181  noalias(outputs) = prod(patterns,trans(decoderMatrix())) + repeat(outputBias(),numPatterns);
182  noalias(outputs) = m_outputNeuron(outputs);
183  }
184  }
185 
186  ///\brief Returns the response of the i-th layer given the input of that layer.
187  ///
188  /// this is usefull if only a portion of the network needs to be evaluated
189  /// be aware that this only works without shortcuts in the network
190  Data<RealVector> evalLayer(std::size_t layer, Data<RealVector> const& patterns)const{
191  SIZE_CHECK(layer < 2);
192  int batches = (int) patterns.numberOfBatches();
193  Data<RealVector> result(batches);
194  SHARK_PARALLEL_FOR(int i = 0; i < batches; ++i){
195  evalLayer(layer,patterns.batch(i),result.batch(i));
196  }
197  return result;
198  }
199 
200  Data<RealVector> encode(Data<RealVector> const& patterns)const{
201  return evalLayer(0,patterns);
202  }
203 
204  Data<RealVector> decode(Data<RealVector> const& patterns)const{
205  return evalLayer(1,patterns);
206  }
207 
208  template<class Label>
211  )const{
212  return LabeledData<RealVector,Label>(encode(data.inputs()),data.labels());
213  }
214 
215  template<class Label>
218  )const{
219  return LabeledData<RealVector,Label>(decode(data.inputs()),data.labels());
220  }
221 
222 
223  void eval(RealMatrix const& patterns,RealMatrix& output, State& state)const{
224  InternalState& s = state.toState<InternalState>();
225  evalLayer(0,patterns,s.hiddenResponses);//propagate input->hidden
226  evalLayer(1,s.hiddenResponses,s.outputResponses);//propagate hidden->output
227  output = s.outputResponses;
228  }
230 
232  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, RealVector& gradient
233  )const{
234  SIZE_CHECK(coefficients.size2() == outputSize());
235  SIZE_CHECK(coefficients.size1() == patterns.size1());
236 
237  RealMatrix outputDelta = coefficients;
238  RealMatrix hiddenDelta;
239  computeDelta(state,outputDelta,hiddenDelta);
240  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,gradient);
241  }
242 
244  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, BatchInputType& inputDerivative
245  )const{
246  SIZE_CHECK(coefficients.size2() == outputSize());
247  SIZE_CHECK(coefficients.size1() == patterns.size1());
248 
249  RealMatrix outputDelta = coefficients;
250  RealMatrix hiddenDelta;
251  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
252  }
253 
254  virtual void weightedDerivatives(
255  BatchInputType const & patterns,
256  BatchOutputType const & coefficients,
257  State const& state,
258  RealVector& parameterDerivative,
259  BatchInputType& inputDerivative
260  )const{
261  SIZE_CHECK(coefficients.size2() == outputSize());
262  SIZE_CHECK(coefficients.size1() == patterns.size1());
263 
264  RealMatrix outputDelta = coefficients;
265  RealMatrix hiddenDelta;
266  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
267  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,parameterDerivative);
268  }
269 
271  std::size_t in,std::size_t hidden
272  ){
273  m_encoderMatrix.resize(hidden,in);
274  m_decoderMatrix.resize(in,hidden);
275  m_hiddenBias.resize(hidden);
276  m_outputBias.resize(in);
277  }
278 
279  //! From ISerializable, reads a model from an archive
280  void read( InArchive & archive ){
281  archive>>m_encoderMatrix;
282  archive>>m_decoderMatrix;
283  archive>>m_hiddenBias;
284  archive>>m_outputBias;
285  }
286 
287  //! From ISerializable, writes a model to an archive
288  void write( OutArchive & archive ) const{
289  archive<<m_encoderMatrix;
290  archive<<m_decoderMatrix;
291  archive<<m_hiddenBias;
292  archive<<m_outputBias;
293  }
294 
295 
296 private:
297 
298  void computeDelta(
299  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta
300  )const{
301  InternalState const& s = state.toState<InternalState>();
302 
303  noalias(outputDelta) *= m_outputNeuron.derivative(s.outputResponses);
304  hiddenDelta.resize(outputDelta.size1(),numberOfHiddenNeurons());
305  noalias(hiddenDelta) = prod(outputDelta,decoderMatrix());
306  noalias(hiddenDelta) *= m_hiddenNeuron.derivative(s.hiddenResponses);
307  }
308 
309  void computeDelta(
310  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta, RealMatrix& inputDelta
311  )const{
312  computeDelta(state,outputDelta,hiddenDelta);
313  inputDelta.resize(outputDelta.size1(),inputSize());
314  noalias(inputDelta) = prod(hiddenDelta,encoderMatrix());
315  }
316 
317  void computeParameterDerivative(
318  RealMatrix const& patterns, RealMatrix const& outputDelta, RealMatrix const& hiddenDelta,
319  State const& state, RealVector& gradient
320  )const{
321  InternalState const& s = state.toState<InternalState>();
322  std::size_t hiddenParams = inputSize()*numberOfHiddenNeurons();
323  std::size_t numHidden = numberOfHiddenNeurons();
324  gradient.resize(numberOfParameters());
325  gradient.clear();
326  axpy_prod(
327  trans(outputDelta),
328  s.hiddenResponses,
329  to_matrix(subrange(gradient,hiddenParams,2*hiddenParams),outputSize(),numHidden),false
330  );
331  axpy_prod(
332  trans(hiddenDelta),
333  patterns,
334  to_matrix(subrange(gradient,0,hiddenParams),numHidden,inputSize()),false
335  );
336 
337  std::size_t hiddenBiasPos = 2*hiddenParams;
338  std::size_t outputBiasPos = hiddenBiasPos+numHidden;
339  subrange(gradient,hiddenBiasPos,outputBiasPos) = sum_rows(hiddenDelta);
340  subrange(gradient,outputBiasPos,outputBiasPos+inputSize()) = sum_rows(outputDelta);
341  }
342 
343  //! weight matrix between input and hidden layer.
344  RealMatrix m_encoderMatrix;
345  //! weight matrix between hiddenand output layer.
346  RealMatrix m_decoderMatrix;
347  //! bias weights of the hidden neurons
348  RealVector m_hiddenBias;
349  //! bias weights of the visible neurons
350  RealVector m_outputBias;
351 
352  //!Type of hidden neuron. See Models/Neurons.h for a few choices
353  HiddenNeuron m_hiddenNeuron;
354  //! Type of output neuron. See Models/Neurons.h for a few choices
355  OutputNeuron m_outputNeuron;
356 };
357 
358 
359 }
360 #endif