RBM.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2015 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://image.diku.dk/shark/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_RBM_H
31 #define SHARK_UNSUPERVISED_RBM_RBM_H
32 
35 #include <shark/Unsupervised/RBM/Impl/AverageEnergyGradient.h>
36 
37 #include <sstream>
38 #include <boost/serialization/string.hpp>
39 namespace shark{
40 
41 ///\brief stub for the RBM class. at the moment it is just a holder of the parameter set and the Energy.
42 template<class VisibleLayerT,class HiddenLayerT, class RngT>
43 class RBM : public AbstractModel<RealVector, RealVector>{
44 private:
46 public:
47  typedef HiddenLayerT HiddenType; ///< type of the hidden layer
48  typedef VisibleLayerT VisibleType; ///< type of the visible layer
49  typedef RngT RngType;
50  typedef Energy<RBM<VisibleType,HiddenType,RngT> > EnergyType;///< Type of the energy function
51  typedef detail::AverageEnergyGradient<RBM> GradientType;///< Type of the gradient calculator
52 
55 
56 private:
57  /// \brief The weight matrix connecting hidden and visible layer.
58  RealMatrix m_weightMatrix;
59 
60  ///The layer of hidden Neurons
61  HiddenType m_hiddenNeurons;
62 
63  ///The Layer of visible Neurons
64  VisibleType m_visibleNeurons;
65 
66  RngType* mpe_rng;
67  bool m_forward;
68  bool m_evalMean;
69 
70  ///\brief Evaluates the input by propagating the visible input to the hidden neurons.
71  ///
72  ///@param patterns batch of states of visible units
73  ///@param outputs batch of (expected) states of hidden units
74  void evalForward(BatchInputType const& state,BatchOutputType& output)const{
75  std::size_t batchSize=state.size1();
76  typename HiddenType::StatisticsBatch statisticsBatch(batchSize,numberOfHN());
77  RealMatrix inputBatch(batchSize,numberOfHN());
78  output.resize(state.size1(),numberOfHN());
79 
80  energy().inputHidden(inputBatch,state);
81  hiddenNeurons().sufficientStatistics(inputBatch,statisticsBatch,blas::repeat(1.0,batchSize));
82 
83  if(m_evalMean){
84  noalias(output) = hiddenNeurons().mean(statisticsBatch);
85  }
86  else{
87  hiddenNeurons().sample(statisticsBatch,output,0.0,*mpe_rng);
88  }
89  }
90 
91  ///\brief Evaluates the input by propagating the hidden input to the visible neurons.
92  ///
93  ///@param patterns batch of states of hidden units
94  ///@param outputs batch of (expected) states of visible units
95  void evalBackward(BatchInputType const& state,BatchOutputType& output)const{
96  std::size_t batchSize = state.size1();
97  typename VisibleType::StatisticsBatch statisticsBatch(batchSize,numberOfVN());
98  RealMatrix inputBatch(batchSize,numberOfVN());
99  output.resize(batchSize,numberOfVN());
100 
101  energy().inputVisible(inputBatch,state);
102  visibleNeurons().sufficientStatistics(inputBatch,statisticsBatch,blas::repeat(1.0,batchSize));
103 
104  if(m_evalMean){
105  noalias(output) = visibleNeurons().mean(statisticsBatch);
106  }
107  else{
108  visibleNeurons().sample(statisticsBatch,output,0.0,*mpe_rng);
109  }
110  }
111 public:
112  RBM(RngType& rng):mpe_rng(&rng),m_forward(true),m_evalMean(true)
113  { }
114 
115  /// \brief From INameable: return the class name.
116  std::string name() const
117  { return "RBM"; }
118 
119  ///\brief Returns the total number of parameters of the model.
120  std::size_t numberOfParameters()const {
121  std::size_t parameters = numberOfVN()*numberOfHN();
122  parameters += m_hiddenNeurons.numberOfParameters();
123  parameters += m_visibleNeurons.numberOfParameters();
124  return parameters;
125  }
126 
127  ///\brief Returns the parameters of the Model as parameter vector.
128  RealVector parameterVector () const {
129  RealVector ret(numberOfParameters());
130  init(ret) << toVector(m_weightMatrix),blas::parameters(m_hiddenNeurons),blas::parameters(m_visibleNeurons);
131  return ret;
132  };
133 
134  ///\brief Sets the parameters of the model.
135  ///
136  /// @param newParameters vector of parameters
137  void setParameterVector(const RealVector& newParameters) {
138  init(newParameters) >> toVector(m_weightMatrix),blas::parameters(m_hiddenNeurons),blas::parameters(m_visibleNeurons);
139  }
140 
141  ///\brief Creates the structure of the RBM.
142  ///
143  ///@param hiddenNeurons number of hidden neurons.
144  ///@param visibleNeurons number of visible neurons.
145  void setStructure(std::size_t visibleNeurons,std::size_t hiddenNeurons){
146  m_weightMatrix.resize(hiddenNeurons,visibleNeurons);
147  m_weightMatrix.clear();
148 
149  m_hiddenNeurons.resize(hiddenNeurons);
150  m_visibleNeurons.resize(visibleNeurons);
151  }
152 
153  ///\brief Returns the layer of hidden neurons.
154  HiddenType const& hiddenNeurons()const{
155  return m_hiddenNeurons;
156  }
157  ///\brief Returns the layer of hidden neurons.
158  HiddenType& hiddenNeurons(){
159  return m_hiddenNeurons;
160  }
161  ///\brief Returns the layer of visible neurons.
162  VisibleType& visibleNeurons(){
163  return m_visibleNeurons;
164  }
165  ///\brief Returns the layer of visible neurons.
166  VisibleType const& visibleNeurons()const{
167  return m_visibleNeurons;
168  }
169 
170  ///\brief Returns the weight matrix connecting the layers.
171  RealMatrix& weightMatrix(){
172  return m_weightMatrix;
173  }
174  ///\brief Returns the weight matrix connecting the layers.
175  RealMatrix const& weightMatrix()const{
176  return m_weightMatrix;
177  }
178 
179  ///\brief Returns the energy function of the RBM.
180  EnergyType energy()const{
181  return EnergyType(*this);
182  }
183 
184  ///\brief Returns the random number generator associated with this RBM.
185  RngType& rng(){
186  return *mpe_rng;
187  }
188 
189  ///\brief Sets the type of evaluation, eval will perform.
190  ///
191  ///Eval performs its operation based on the state of this function.
192  ///There are two ways to pass data through an rbm: either forward, setting the states of the
193  ///visible neurons and sample the hidden states or backwards, where the state of the hidden is fixed and the visible
194  ///are sampled.
195  ///Instead of the state of the hidden/visible, one often wants the mean of the state \f$ E_{p(h|v)}\left(h\right)\f$.
196  ///By default, the RBM uses the forward evaluation and returns the mean of the state
197  ///
198  ///@param forward whether the forward view should be used false=backwards
199  ///@param evalMean whether the mean state should be returned. false=a sample is returned
200  void evaluationType(bool forward,bool evalMean){
201  m_forward = forward;
202  m_evalMean = evalMean;
203  }
204 
205  boost::shared_ptr<State> createState()const{
206  return boost::shared_ptr<State>(new EmptyState());
207  }
208 
209  ///\brief Passes information through/samples from an RBM in a forward or backward way.
210  ///
211  ///Eval performs its operation based on the given evaluation type.
212  ///There are two ways to pass data through an RBM: either forward, setting the states of the
213  ///visible neurons and sample the hidden states or backwards, where the state of the hidden is fixed and the visible
214  ///are sampled.
215  ///Instead of the state of the hidden/visible, one often wants the mean of the state \f$ E_{p(h|v)}\left(h\right)\f$.
216  ///By default, the RBM uses the forward evaluation and returns the mean of the state,
217  ///but other evaluation modes can be set by evaluationType().
218  ///
219  ///@param patterns the batch of (visible or hidden) inputs
220  ///@param outputs the batch of (visible or hidden) outputs
221  void eval(BatchInputType const& patterns,BatchOutputType& outputs)const{
222  if(m_forward){
223  evalForward(patterns,outputs);
224  }
225  else{
226  evalBackward(patterns,outputs);
227  }
228  }
229 
230 
231  void eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{
232  eval(patterns,outputs);
233  }
234 
235  ///\brief Calculates the input of the hidden neurons given the state of the visible in a batch-vise fassion.
236  ///
237  ///@param inputs the batch of vectors the input of the hidden neurons is stored in
238  ///@param visibleStates the batch of states of the visible neurons
239  void inputHidden(RealMatrix& inputs, RealMatrix const& visibleStates)const{
240  SIZE_CHECK(visibleStates.size1() == inputs.size1());
241  SIZE_CHECK(inputs.size2() == m_hiddenNeurons.size());
242  SIZE_CHECK( visibleStates.size2() == m_visibleNeurons.size());
243 
244  noalias(inputs) = prod(m_visibleNeurons.phi(visibleStates),trans(m_weightMatrix));
245  }
246 
247 
248  ///\brief Calculates the input of the visible neurons given the state of the hidden.
249  ///
250  ///@param inputs the vector the input of the visible neurons is stored in
251  ///@param hiddenStates the state of the hidden neurons
252  void inputVisible(RealMatrix& inputs, RealMatrix const& hiddenStates)const{
253  SIZE_CHECK(hiddenStates.size1() == inputs.size1());
254  SIZE_CHECK(inputs.size2() == m_visibleNeurons.size());
255 
256  noalias(inputs) = prod(m_hiddenNeurons.phi(hiddenStates),m_weightMatrix);
257  }
258 
259  using base_type::eval;
260 
261 
262  ///\brief Returns the number of hidden Neurons.
263  std::size_t numberOfHN()const{
264  return m_hiddenNeurons.size();
265  }
266  ///\brief Returns the number of visible Neurons.
267  std::size_t numberOfVN()const{
268  return m_visibleNeurons.size();
269  }
270 
271  /// \brief Reads the network from an archive.
272  void read(InArchive& archive){
273  archive >> m_weightMatrix;
274  archive >> m_hiddenNeurons;
275  archive >> m_visibleNeurons;
276 
277  //serialization of the rng is a bit...complex
278  //let's hope that we can remove this hack one time. But we really can't ignore the state of the rng.
279  std::string str;
280  archive>> str;
281  std::stringstream stream(str);
282  stream>> *mpe_rng;
283  }
284 
285  /// \brief Writes the network to an archive.
286  void write(OutArchive& archive) const{
287  archive << m_weightMatrix;
288  archive << m_hiddenNeurons;
289  archive << m_visibleNeurons;
290 
291  std::stringstream stream;
292  stream <<*mpe_rng;
293  std::string str = stream.str();
294  archive <<str;
295  }
296 
297 };
298 
299 }
300 
301 #endif