TiedAutoencoder.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the autoencoder with tied weights
3  *
4  * \author O. Krause
5  * \date 2010-2014
6  *
7  *
8  * \par Copyright 1995-2015 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://image.diku.dk/shark/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef SHARK_MODELS_TIEDAUTOENCODER_H
29 #define SHARK_MODELS_TIEDAUTOENCODER_H
30 
32 #include <shark/Models/Neurons.h>
33 #include <shark/Models/FFNet.h>
34 #include <boost/serialization/vector.hpp>
35 
36 namespace shark{
37 
38 /// \brief implements the autoencoder with tied weights
39 ///
40 /// The formula is
41 /// \f[ f(x) = \sigma_2(W^T\sigma_1(Wx+b_1)+b_2)\f]
42 /// Where \f$ W \f$, \f$b_1 \f$ and \f$b_2 \f$ are the weights and
43 /// \f$\sigma_1\f$ and \f$ \sigma_2\f$ are the activation functions for hidden and output units.
44 template<class HiddenNeuron,class OutputNeuron>
45 class TiedAutoencoder :public AbstractModel<RealVector,RealVector>
46 {
47  struct InternalState: public State{
48  RealMatrix hiddenResponses;
49  RealMatrix outputResponses;
50  };
51 
52 
53 public:
57  }
58 
59  //! \brief From INameable: return the class name.
60  std::string name() const{
61  return "TiedAutoencoder";
62  }
63 
64  //! \brief Number of input neurons.
65  std::size_t inputSize()const{
66  return outputSize();
67  }
68  //! \brief Number of output neurons.
69  std::size_t outputSize()const{
70  return outputBias().size();
71  }
72 
73  //! \brief Total number of hidden neurons.
74  std::size_t numberOfHiddenNeurons()const{
75  return encoderMatrix().size1();
76  }
77 
78  /// \brief Returns the hidden bias weight vector.
79  RealVector const& hiddenBias()const{
80  return m_hiddenBias;
81  }
82 
83  /// \brief Returns the hidden bias weight vector.
84  RealVector& hiddenBias(){
85  return m_hiddenBias;
86  }
87 
88  /// \brief Returns the output bias weight vector.
89  RealVector const& outputBias()const{
90  return m_outputBias;
91  }
92  /// \brief Returns the output bias weight vector.
93  RealVector& outputBias(){
94  return m_outputBias;
95  }
96 
97  /// \brief Weight matrix for the direction input->hidden.
98  RealMatrix const& encoderMatrix()const{
99  return m_weightMatrix;
100  }
101  /// \brief Weight matrix for the direction input->hidden.
102  RealMatrix& encoderMatrix(){
103  return m_weightMatrix;
104  }
105  /// \brief Weight matrix for the direction hidden->output
106  ///
107  ///For tied autoencoders, this is the transpose of the encoder matrix
109  return trans(m_weightMatrix);
110  }
111  /// \brief Weight matrix for the direction hidden->output
112  ///
113  ///For tied autoencoders, this is the transpose of the encoder matrix
115  return trans(m_weightMatrix);
116  }
117 
118  //! \brief Returns the total number of parameters of the network.
119  std::size_t numberOfParameters()const{
121  }
122 
123  //! returns the vector of used parameters inside the weight matrix
124  RealVector parameterVector() const{
125  RealVector parameters(numberOfParameters());
126  init(parameters) << toVector(m_weightMatrix),m_hiddenBias,m_outputBias;
127  return parameters;
128  }
129  //! uses the values inside the parametervector to set the used values inside the weight matrix
130  void setParameterVector(RealVector const& newParameters){
131  SIZE_CHECK(newParameters.size() == numberOfParameters());
132  init(newParameters) >> toVector(m_weightMatrix),m_hiddenBias,m_outputBias;
133  }
134 
135  /// \brief Returns the activation function of the hidden units.
136  HiddenNeuron const& hiddenActivationFunction()const{
137  return m_hiddenNeuron;
138  }
139  /// \brief Returns the activation function of the output units.
140  OutputNeuron const& outputActivationFunction()const{
141  return m_outputNeuron;
142  }
143 
144  /// \brief Returns the activation function of the hidden units.
145  HiddenNeuron& hiddenActivationFunction(){
146  return m_hiddenNeuron;
147  }
148  /// \brief Returns the activation function of the output units.
149  OutputNeuron& outputActivationFunction(){
150  return m_outputNeuron;
151  }
152 
153  //! \brief Returns the output of all neurons after the last call of eval
154  //!
155  //! \param state last result of eval
156  //! \return Output value of the neurons.
157  RealMatrix const& hiddenResponses(State const& state)const{
158  InternalState const& s = state.toState<InternalState>();
159  return s.hiddenResponses;
160  }
161 
162  boost::shared_ptr<State> createState()const{
163  return boost::shared_ptr<State>(new InternalState());
164  }
165 
166  void evalLayer(std::size_t layer,RealMatrix const& patterns,RealMatrix& outputs)const{
167  SIZE_CHECK(layer < 2);
168  std::size_t numPatterns = patterns.size1();
169 
170  if(layer == 0){//input->hidden
171  SIZE_CHECK(patterns.size2() == encoderMatrix().size2());
172  std::size_t numOutputs = encoderMatrix().size1();
173  outputs.resize(numPatterns,numOutputs);
174  noalias(outputs) = prod(patterns,trans(encoderMatrix())) + repeat(hiddenBias(),numPatterns);
175  noalias(outputs) = m_hiddenNeuron(outputs);
176  }
177  else{//hidden->output
178  SIZE_CHECK(patterns.size2() == decoderMatrix().size2());
179  std::size_t numOutputs = decoderMatrix().size1();
180  outputs.resize(numPatterns,numOutputs);
181  noalias(outputs) = prod(patterns,trans(decoderMatrix())) + repeat(outputBias(),numPatterns);
182  noalias(outputs) = m_outputNeuron(outputs);
183  }
184  }
185 
186  ///\brief Returns the response of the i-th layer given the input of that layer.
187  ///
188  /// this is usefull if only a portion of the network needs to be evaluated
189  /// be aware that this only works without shortcuts in the network
190  Data<RealVector> evalLayer(std::size_t layer, Data<RealVector> const& patterns)const{
191  SIZE_CHECK(layer < 2);
192  int batches = (int) patterns.numberOfBatches();
193  Data<RealVector> result(batches);
194  SHARK_PARALLEL_FOR(int i = 0; i < batches; ++i){
195  evalLayer(layer,patterns.batch(i),result.batch(i));
196  }
197  return result;
198  }
199 
200  Data<RealVector> encode(Data<RealVector> const& patterns)const{
201  return evalLayer(0,patterns);
202  }
203 
204  Data<RealVector> decode(Data<RealVector> const& patterns)const{
205  return evalLayer(1,patterns);
206  }
207 
208  template<class Label>
211  )const{
212  return LabeledData<RealVector,Label>(encode(data.inputs()),data.labels());
213  }
214 
215  template<class Label>
218  )const{
219  return LabeledData<RealVector,Label>(decode(data.inputs()),data.labels());
220  }
221 
222  void eval(RealMatrix const& patterns,RealMatrix& output, State& state)const{
223  InternalState& s = state.toState<InternalState>();
224  evalLayer(0,patterns,s.hiddenResponses);//propagate input->hidden
225  evalLayer(1,s.hiddenResponses,s.outputResponses);//propagate hidden->output
226  output = s.outputResponses;
227  }
229 
231  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, RealVector& gradient
232  )const{
233  SIZE_CHECK(coefficients.size2() == outputSize());
234  SIZE_CHECK(coefficients.size1() == patterns.size1());
235 
236  RealMatrix outputDelta = coefficients;
237  RealMatrix hiddenDelta;
238  computeDelta(state,outputDelta,hiddenDelta);
239  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,gradient);
240  }
241 
243  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, BatchInputType& inputDerivative
244  )const{
245  SIZE_CHECK(coefficients.size2() == outputSize());
246  SIZE_CHECK(coefficients.size1() == patterns.size1());
247 
248  RealMatrix outputDelta = coefficients;
249  RealMatrix hiddenDelta;
250  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
251  }
252 
253  virtual void weightedDerivatives(
254  BatchInputType const & patterns,
255  BatchOutputType const & coefficients,
256  State const& state,
257  RealVector& parameterDerivative,
258  BatchInputType& inputDerivative
259  )const{
260  SIZE_CHECK(coefficients.size2() == outputSize());
261  SIZE_CHECK(coefficients.size1() == patterns.size1());
262 
263  RealMatrix outputDelta = coefficients;
264  RealMatrix hiddenDelta;
265  computeDelta(state,outputDelta,hiddenDelta,inputDerivative);
266  computeParameterDerivative(patterns,outputDelta,hiddenDelta,state,parameterDerivative);
267  }
268 
270  std::size_t in,std::size_t hidden
271  ){
272  m_weightMatrix.resize(hidden,in);
273  m_hiddenBias.resize(hidden);
274  m_outputBias.resize(in);
275  }
276 
277  //! From ISerializable, reads a model from an archive
278  void read( InArchive & archive ){
279  archive>>m_weightMatrix;
280  archive>>m_hiddenBias;
281  archive>>m_outputBias;
282  }
283 
284  //! From ISerializable, writes a model to an archive
285  void write( OutArchive & archive ) const{
286  archive<<m_weightMatrix;
287  archive<<m_hiddenBias;
288  archive<<m_outputBias;
289  }
290 
291 
292 private:
293 
294  void computeDelta(
295  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta
296  )const{
297  InternalState const& s = state.toState<InternalState>();
298 
299  noalias(outputDelta) *= m_outputNeuron.derivative(s.outputResponses);
300  hiddenDelta.resize(outputDelta.size1(),numberOfHiddenNeurons());
301  noalias(hiddenDelta) = prod(outputDelta,decoderMatrix());
302  noalias(hiddenDelta) *= m_hiddenNeuron.derivative(s.hiddenResponses);
303  }
304 
305  void computeDelta(
306  State const& state, RealMatrix& outputDelta, RealMatrix& hiddenDelta, RealMatrix& inputDelta
307  )const{
308  computeDelta(state,outputDelta,hiddenDelta);
309  inputDelta.resize(outputDelta.size1(),inputSize());
310  noalias(inputDelta) = prod(hiddenDelta,encoderMatrix());
311  }
312 
313  void computeParameterDerivative(
314  RealMatrix const& patterns, RealMatrix const& outputDelta, RealMatrix const& hiddenDelta,
315  State const& state, RealVector& gradient
316  )const{
317  InternalState const& s = state.toState<InternalState>();
318  std::size_t hiddenParams = inputSize()*numberOfHiddenNeurons();
319  std::size_t numHidden = numberOfHiddenNeurons();
320  gradient.resize(numberOfParameters());
321  gradient.clear();
322  axpy_prod(
323  trans(s.hiddenResponses),
324  outputDelta,
325  to_matrix(subrange(gradient,0,hiddenParams),numHidden,inputSize()),false
326  );
327  axpy_prod(
328  trans(hiddenDelta),
329  patterns,
330  to_matrix(subrange(gradient,0,hiddenParams),numHidden,inputSize()),false
331  );
332 
333  std::size_t hiddenBiasPos = hiddenParams;
334  std::size_t outputBiasPos = hiddenBiasPos+numHidden;
335  subrange(gradient,hiddenBiasPos,outputBiasPos) = sum_rows(hiddenDelta);
336  subrange(gradient,outputBiasPos,outputBiasPos+inputSize()) = sum_rows(outputDelta);
337  }
338 
339  //! weight matrix between input and hidden layer. the transpose of this is used to connect hidden->output.
340  RealMatrix m_weightMatrix;
341  //! bias weights of the hidden neurons
342  RealVector m_hiddenBias;
343  //! bias weights of the visible neurons
344  RealVector m_outputBias;
345 
346  //!Type of hidden neuron. See Models/Neurons.h for a few choices
347  HiddenNeuron m_hiddenNeuron;
348  //! Type of output neuron. See Models/Neurons.h for a few choices
349  OutputNeuron m_outputNeuron;
350 };
351 
352 
353 }
354 #endif