ExactGradient.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2015 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://image.diku.dk/shark/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H
31 #define SHARK_UNSUPERVISED_RBM_GRADIENTAPPROXIMATIONS_EXACTGRADIENT_H
32 
36 #include <boost/type_traits/is_same.hpp>
37 
38 namespace shark{
39 
40 template<class RBMType>
42 private:
44 public:
45  typedef RBMType RBM;
46 
47  ExactGradient(RBM* rbm): mpe_rbm(rbm),m_regularizer(0){
48  SHARK_ASSERT(rbm != NULL);
49 
52  };
53 
54  /// \brief From INameable: return the class name.
55  std::string name() const
56  { return "ExactGradient"; }
57 
59  m_data = data;
60  }
61 
63  return mpe_rbm->parameterVector();
64  }
65 
66  std::size_t numberOfVariables()const{
67  return mpe_rbm->numberOfParameters();
68  }
69 
70  void setRegularizer(double factor, SingleObjectiveFunction* regularizer){
71  m_regularizer = regularizer;
72  m_regularizationStrength = factor;
73  }
74 
75  double eval( SearchPointType const & parameter) const {
76  mpe_rbm->setParameterVector(parameter);
77 
78  double negLogLikelihood = negativeLogLikelihood(*mpe_rbm,m_data)/m_data.numberOfElements();
79  if(m_regularizer){
80  negLogLikelihood += m_regularizationStrength * m_regularizer->eval(parameter);
81  }
82  return negLogLikelihood;
83  }
84 
85  double evalDerivative( SearchPointType const & parameter, FirstOrderDerivative & derivative ) const {
86  mpe_rbm->setParameterVector(parameter);
87 
88  //the gradient approximation for the energy terms of the RBM
89  typename RBM::GradientType empiricalExpectation(mpe_rbm);
90  typename RBM::GradientType modelExpectation(mpe_rbm);
91 
92  Gibbs gibbsSampler(mpe_rbm);
93 
94  //calculate the expectation of the energy gradient with respect to the data
95  double negLogLikelihood = 0;
96  BOOST_FOREACH(RealMatrix const& batch,m_data.batches()) {
97  std::size_t currentBatchSize = batch.size1();
98  typename Gibbs::HiddenSampleBatch hiddenSamples(currentBatchSize,mpe_rbm->numberOfHN());
99  typename Gibbs::VisibleSampleBatch visibleSamples(currentBatchSize,mpe_rbm->numberOfVN());
100 
101  gibbsSampler.createSample(hiddenSamples,visibleSamples,batch);
102  empiricalExpectation.addVH(hiddenSamples, visibleSamples);
103  negLogLikelihood -= sum(mpe_rbm->energy().logUnnormalizedProbabilityVisible(
104  batch,hiddenSamples.input,blas::repeat(1,currentBatchSize)
105  ));
106  }
107 
108  //calculate the expectation of the energy gradient with respect to the model distribution
109  if(mpe_rbm->numberOfVN() < mpe_rbm->numberOfHN()){
110  integrateOverVisible(modelExpectation);
111  }
112  else{
113  integrateOverHidden(modelExpectation);
114  }
115 
116  derivative.resize(mpe_rbm->numberOfParameters());
117  noalias(derivative) = modelExpectation.result() - empiricalExpectation.result();
118 
119  m_logPartition = modelExpectation.logWeightSum();
120  negLogLikelihood/=m_data.numberOfElements();
121  negLogLikelihood += m_logPartition;
122 
123  if(m_regularizer){
124  FirstOrderDerivative regularizerDerivative;
125  negLogLikelihood += m_regularizationStrength * m_regularizer->evalDerivative(parameter,regularizerDerivative);
126  noalias(derivative) += m_regularizationStrength * regularizerDerivative;
127  }
128 
129  return negLogLikelihood;
130  }
131 
132  long double getLogPartition(){
133  return m_logPartition;
134  }
135 
136 private:
137  RBM* mpe_rbm;
138 
139  SingleObjectiveFunction* m_regularizer;
140  double m_regularizationStrength;
141 
142  //batchwise loops over all hidden units to calculate the gradient as well as partition
143  template<class GradientApproximator>//mostly dummy right now
144  void integrateOverVisible(GradientApproximator & modelExpectation) const{
145 
146  Gibbs sampler(mpe_rbm);
147 
148  typedef typename RBM::VisibleType::StateSpace VisibleStateSpace;
149  std::size_t values = VisibleStateSpace::numberOfStates(mpe_rbm->numberOfVN());
150  std::size_t batchSize = std::min(values, std::size_t(256));
151 
152  for (std::size_t x = 0; x < values; x+=batchSize) {
153  //create batch of states
154  std::size_t currentBatchSize=std::min(batchSize,values-x);
155  typename Batch<RealVector>::type stateBatch(currentBatchSize,mpe_rbm->numberOfVN());
156  for(std::size_t elem = 0; elem != currentBatchSize;++elem){
157  //generation of the x+elem-th state vector
158  VisibleStateSpace::state(row(stateBatch,elem),x+elem);
159  }
160 
161  //create sample from state batch
162  typename Gibbs::HiddenSampleBatch hiddenBatch(currentBatchSize,mpe_rbm->numberOfHN());
163  typename Gibbs::VisibleSampleBatch visibleBatch(currentBatchSize,mpe_rbm->numberOfVN());
164  sampler.createSample(hiddenBatch,visibleBatch,stateBatch);
165 
166  //calculate probabilities and update
167  RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityVisible(
168  stateBatch,hiddenBatch.input,blas::repeat(1,currentBatchSize)
169  );
170  modelExpectation.addVH(hiddenBatch, visibleBatch, logP);
171  }
172  }
173 
174  //batchwise loops over all hidden units to calculate the gradient as well as partition
175  template<class GradientApproximator>//mostly dummy right now
176  void integrateOverHidden(GradientApproximator & modelExpectation) const{
177 
178  Gibbs sampler(mpe_rbm);
179 
180  typedef typename RBM::HiddenType::StateSpace HiddenStateSpace;
181  std::size_t values = HiddenStateSpace::numberOfStates(mpe_rbm->numberOfHN());
182  std::size_t batchSize = std::min(values, std::size_t(256) );
183 
184  for (std::size_t x = 0; x < values; x+=batchSize) {
185  //create batch of states
186  std::size_t currentBatchSize=std::min(batchSize,values-x);
187  typename Batch<RealVector>::type stateBatch(currentBatchSize,mpe_rbm->numberOfHN());
188  for(std::size_t elem = 0; elem != currentBatchSize;++elem){
189  //generation of the x+elem-th state vector
190  HiddenStateSpace::state(row(stateBatch,elem),x+elem);
191  }
192 
193  //create sample from state batch
194  typename Gibbs::HiddenSampleBatch hiddenBatch(currentBatchSize,mpe_rbm->numberOfHN());
195  typename Gibbs::VisibleSampleBatch visibleBatch(currentBatchSize,mpe_rbm->numberOfVN());
196  hiddenBatch.state=stateBatch;
197  sampler.precomputeVisible(hiddenBatch,visibleBatch, blas::repeat(1,currentBatchSize));
198 
199  //calculate probabilities and update
200  RealVector logP = mpe_rbm->energy().logUnnormalizedProbabilityHidden(
201  stateBatch,visibleBatch.input,blas::repeat(1,currentBatchSize)
202  );
203  modelExpectation.addHV(hiddenBatch, visibleBatch, logP);
204  }
205  }
206 
208 
209  mutable double m_logPartition; //the partition function of the model distribution
210 };
211 
212 }
213 
214 #endif