TruncatedExponentialLayer.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2015 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://image.diku.dk/shark/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_TRUNCATEDEXPONENTIALLAYER_H
31 #define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_TRUNCATEDEXPONENTIALLAYER_H
32 
35 #include <shark/LinAlg/Base.h>
40 #include <shark/Core/OpenMP.h>
41 namespace shark{
42 namespace detail{
43 template<class VectorType>
44 struct TruncatedExponentialSufficientStatistics{
46  VectorType expMinusLambda;
47 
48  TruncatedExponentialSufficientStatistics(std::size_t numberOfNeurons)
49  :lambda(numberOfNeurons), expMinusLambda(numberOfNeurons){}
50  TruncatedExponentialSufficientStatistics(){}
51 };
52 }
53 
54 
55 /// \cond
56 
57 //auto generate the batch interface for the BinarySufficientStatistics
58 template<class VectorType>
59 struct Batch< detail::TruncatedExponentialSufficientStatistics<VectorType> >{
61  detail::TruncatedExponentialSufficientStatistics<VectorType>,
62  (VectorType, lambda)(VectorType, expMinusLambda)
63  )
64 };
65 
66 /// \endcond
67 
68 ///\brief A layer of truncated exponential neurons.
69 ///
70 /// Truncated exponential distributions arise, when the state space of the binary neurons is extended to the
71 /// real numbers in [0,1]. The conditional distribution of the state of this neurons given the states of the
72 /// connecred layer is an exponential distribution restricted to [0,1].
74 private:
75  RealVector m_bias;
76 public:
77  ///the state space of this neuron is real valued, so it can't be enumerated
79 
80  ///\brief Stores lambda, the defining parameter of the statistics and also exp(-lambda) since it is used regularly.
81  typedef detail::TruncatedExponentialSufficientStatistics<RealVector> SufficientStatistics;
82  ///\brief Sufficient statistics of a batch of data.
84 
85  /// \brief Returns the bias values of the units.
86  const RealVector& bias()const{
87  return m_bias;
88  }
89  /// \brief Returns the bias values of the units.
90  RealVector& bias(){
91  return m_bias;
92  }
93 
94  ///\brief Resizes this neuron layer.
95  ///
96  ///@param newSize number of neurons in the layer
97  void resize(std::size_t newSize){
98  m_bias.resize(newSize);
99  }
100 
101  ///\brief Returns the number of neurons of this layer.
102  std::size_t size()const{
103  return m_bias.size();
104  }
105 
106  /// \brief Takes the input of the neuron and calculates the statistics required to sample from the conditional distribution
107  ///
108  /// @param input the batch of inputs of the neuron
109  /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
110  /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
111  template<class Input, class BetaVector>
112  void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
113  SIZE_CHECK(input.size2() == size());
114  SIZE_CHECK(statistics.lambda.size2() == size());
115  SIZE_CHECK(input.size1() == statistics.lambda.size1());
116 
117  for(std::size_t i = 0; i != input.size1(); ++i){
118  noalias(row(statistics.lambda,i)) = -(row(input,i)+m_bias)*beta(i);
119  }
120  noalias(statistics.expMinusLambda) = exp(-statistics.lambda);
121  }
122 
123 
124  /// \brief Samples from the truncated exponential distribution using either Gibbs- or flip-the-state sampling given the sufficient statistics
125  /// (i.e. the parameter lambda and the value of exp(-lambda))
126  ///
127  ///The truncated exponential function is defined as
128  ///\f[ p(x) = \lambda \frac{e^{-lambdax}}{1 - e^{-\lambda}}\f]
129  ///with x being in the range of [0,1]
130  ///
131  /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
132  /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
133  /// into states with higher probability. THIS IS NOT IMPLEMENTED YET and alpha is ignored!
134  ///
135  /// @param statistics sufficient statistics for the batch to be computed
136  /// @param state the state matrix that will hold the sampled states
137  /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
138  /// @param rng the random number generator used for sampling
139  template<class Matrix, class Rng>
140  void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
141  SIZE_CHECK(statistics.lambda.size2() == size());
142  SIZE_CHECK(statistics.lambda.size1() == state.size1());
143  SIZE_CHECK(statistics.lambda.size2() == state.size2());
144 
146  for(std::size_t i = 0; i != state.size1();++i){
147  for(std::size_t j = 0; j != state.size2();++j){
148  double integral = 1.0 - statistics.expMinusLambda(i,j);
149  TruncatedExponential<Rng> truncExp(integral,rng,statistics.lambda(i,j));
150  state(i,j) = truncExp();
151  }
152  }
153  }
154  (void)alpha;//TODO: USE ALPHA
155  }
156 
157  /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
158  /// i.e. calculates the value of the phi-function used in the interaction term.
159  ///
160  /// @param state the state matrix of the neuron layer
161  /// @return the value of the phi-function
162  template<class Matrix>
163  Matrix const& phi(Matrix const& state)const{
164  SIZE_CHECK(state.size2() == size());
165  return state;
166  }
167 
168 
169 
170  /// \brief Returns the conditional expectation of the phi-function given the state of the connected layer.
171  ///
172  /// @param statistics the sufficient statistics of the layer
173  RealMatrix expectedPhiValue(StatisticsBatch const& statistics)const{
174  SIZE_CHECK(statistics.lambda.size2() == size());
175  SIZE_CHECK(statistics.expMinusLambda.size2() == size());
176  std::size_t batchSize=statistics.lambda.size1();
177  RealMatrix mean(batchSize,size());
178 
179  for(std::size_t i = 0; i != batchSize;++i){
180  for(std::size_t j = 0; j != size();++j){
181  double expML=statistics.expMinusLambda(i,j);
182  mean(i,j) = 1.0/statistics.lambda(i,j)-expML/(1.0 - expML);
183  }
184  }
185  return mean;
186  }
187  /// \brief Returns the mean of the conditional distribution.
188  /// @param statistics the sufficient statistics defining the conditional distribution
189  RealMatrix mean(StatisticsBatch const& statistics)const{
190  return expectedPhiValue(statistics);
191  }
192 
193  /// \brief Returns the energy term this neuron adds to the energy function.
194  ///
195  /// @param state the state of the neuron layer
196  /// @param beta the inverse temperature of the i-th state
197  /// @return the energy term of the neuron layer
198  template<class Matrix, class BetaVector>
199  RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
200  SIZE_CHECK(state.size2() == size());
201  SIZE_CHECK(state.size1() == beta.size());
202  //the following code does for batches the equivalent thing to:
203  //return inner_prod(m_bias,state)
204 
205  RealVector energies = beta * prod(state,m_bias);
206  return energies;
207  }
208 
209  ///\brief Integrates over the terms of the Energy function which depend on the state of this layer and returns the logarithm of the result.
210  ///
211  ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
212  ///It calculates the part which depends on the neurons which are to be marinalized out.
213  ///(In the case of the exponential hidden neuron, this is the term \f$ \log \int_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
214  ///
215  /// @param inputs the inputs of the neurons they get from the other layer
216  /// @param beta the inverse temperature of the RBM
217  /// @return the marginal distribution of the connected layer
218  template<class Input>
219  double logMarginalize(const Input& inputs,double beta) const{
220  SIZE_CHECK(inputs.size() == size());
221  double lnResult = 0;
222  for(std::size_t i = 0; i != size(); ++i){
223  double a = (inputs(i)+m_bias(i))*beta;
224  //calculates log( (exp(a)-1)/a ). the argument of the log is always positive since for a > 0 is exp(a) > 1 and for a < 0 is exp(a)<1
225  //for a = 0 the result is 1 and log(1) = 0
226  //so we calculate log( (exp(a)-1)/a ) = log|exp(a)-1| -log|a|
227  //we use for a > 0 log|exp(a)-1|=log(exp(a)-1)=a+log(1-exp(-a)) which is numerically more stable if a is big
228  // for a< 0, log|exp(a)-1|=log(1-exp(a)) is fine.
229  if( a > 1.e-50)
230  lnResult += a+std::log(1.0 - std::exp(-a));
231  else if( a < 1.e-50)
232  lnResult += std::log( 1.0 - std::exp(a));
233  lnResult -= std::log(std::abs(a));
234  }
235  return lnResult;
236  }
237 
238 
239  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
240  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
241  ///
242  ///This function takes a batch of samples and extracts the required informations out of it.
243  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
244  ///@param samples the samples from which the informations can be extracted
245  template<class Vector, class SampleBatch>
246  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples )const{
247  SIZE_CHECK(derivative.size() == size());
248  sum_rows(samples.statistics.probability,derivative);
249  }
250 
251 
252  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
253  ///
254  ///This function takes a batch of samples and extracts the required informations out of it.
255  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
256  ///@param samples the sample from which the informations can be extracted
257  template<class Vector, class SampleBatch>
258  void parameterDerivative(Vector& derivative, SampleBatch const& samples)const{
259  SIZE_CHECK(derivative.size() == size());
260  sum_rows(samples.state,derivative);
261  }
262 
263  /// \brief Returns the vector with the parameters associated with the neurons in the layer.
264  RealVector parameterVector()const{
265  return m_bias;
266  }
267 
268  /// \brief Returns the vector with the parameters associated with the neurons in the layer.
269  void setParameterVector(RealVector const& newParameters){
270  m_bias = newParameters;
271  }
272 
273  /// \brief Returns the number of the parameters associated with the neurons in the layer.
274  std::size_t numberOfParameters()const{
275  return size();
276  }
277 
278  /// \brief Reads the bias parameters from an archive.
279  void read( InArchive & archive ){
280  archive >> m_bias;
281  }
282  /// \brief Writes the bias parameters to an archive.
283  void write( OutArchive & archive ) const{
284  archive << m_bias;
285  }
286 };
287 
288 }
289 #endif