BinaryLayer.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2015 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://image.diku.dk/shark/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BINARYLAYER_H
31 #define SHARK_UNSUPERVISED_RBM_NEURONLAYERS_BINARYLAYER_H
32 
35 #include <shark/LinAlg/Base.h>
37 #include <shark/Rng/Bernoulli.h>
39 #include <shark/Core/OpenMP.h>
40 namespace shark{
41 
42 ///\brief Layer of binary units taking values in {0,1}.
43 
44 ///A neuron in a Binary Layer takes values in {0,1} and the conditional probability to be 1
45 ///given the states of the neurons in the connected layer is determined by the sigmoid function
46 ///and the input it gets from the connected layer.
48 private:
49  ///\brief The bias terms associated with the neurons.
50  RealVector m_bias;
51  RealVector m_baseRate;
52 public:
53  ///\brief The state space of this neuron is binary.
55 
56  ///\brief The sufficient statistics for the Binary Layer store the probability for a neuron to be on
57  typedef RealVector SufficientStatistics;
58  ///\brief Sufficient statistics of a batch of data.
60 
61  /// \brief Returns the bias values of the units.
62  const RealVector& bias()const{
63  return m_bias;
64  }
65 
66  /// \brief Returns the bias values of the units.
67  RealVector& bias(){
68  return m_bias;
69  }
70 
71 
72  /// \brief Returns the base rate of the units
73  ///
74  ///The base-rate is the tempered disttribution for beta=0
75  ///beta then does a fading between the RBM and the base-rate
76  RealVector const& baseRate()const{
77  return m_baseRate;
78  }
79 
80  /// \brief Returns the base rate of the units
81  ///
82  ///The base-rate is the tempered disttribution for beta=0
83  ///beta then does a fading between the RBM and the base-rate
84  RealVector& baseRate(){
85  return m_baseRate;
86  }
87 
88  ///\brief Resizes this neuron layer.
89  ///
90  ///@param newSize number of neurons in the layer
91  void resize(std::size_t newSize){
92  m_bias.resize(newSize);
93  m_baseRate.resize(newSize);
94  m_baseRate.clear();
95  }
96 
97  ///\brief Returns the number of neurons of this layer.
98  std::size_t size()const{
99  return m_bias.size();
100  }
101 
102  /// \brief Takes the input of the neuron and estimates the expectation of the response of the neuron.
103  /// For binary neurons the expectation is identical with the conditional probability for the neuron to be on given the state of the connected layer.
104  ///
105  /// @param input the batch of inputs of the neuron
106  /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
107  /// @param beta the inverse Temperature of the RBM (typically 1) for the whole batch
108  template<class Input, class BetaVector>
109  void sufficientStatistics(Input const& input, StatisticsBatch& statistics,BetaVector const& beta)const{ // \todo: auch hier noch mal namen ueberdenken
110  SIZE_CHECK(input.size2() == size());
111  SIZE_CHECK(statistics.size2() == size());
112  SIZE_CHECK(input.size1() == statistics.size1());
113 
114  for(std::size_t i = 0; i != input.size1(); ++i){
115  noalias(row(statistics,i)) = sigmoid((row(input,i)+m_bias)*beta(i)+(1.0-beta(i))*m_baseRate);
116  //~ noalias(row(statistics,i)) = sigmoid((row(input,i)+m_bias)*beta(i));
117  }
118  }
119 
120  /// \brief Samples from the distribution using either Gibbs- or flip-the-state sampling.
121  ///
122  /// For alpha= 0 gibbs sampling is performed. That is the next state for neuron i is directly taken from the conditional distribution of the i-th neuron.
123  /// In the case of alpha=1, flip-the-state sampling is performed, which takes the last state into account and tries to do deterministically jump
124  /// into states with higher probability. This is counterbalanced by a higher chance to jump back into a lower probability state in later steps.
125  /// For alpha between 0 and 1 a mixture of both is performed.
126  ///
127  /// @param statistics sufficient statistics containing the probabilities of the neurons to be one
128  /// @param state the state vector that shell hold the sampled states
129  /// @param alpha factor changing from gibbs to flip-the state sampling. 0<=alpha<=1
130  /// @param rng the random number generator used for sampling
131  template<class Matrix, class Rng>
132  void sample(StatisticsBatch const& statistics, Matrix& state, double alpha, Rng& rng) const{
133  SIZE_CHECK(statistics.size2() == size());
134  SIZE_CHECK(statistics.size1() == state.size1());
135  SIZE_CHECK(statistics.size2() == state.size2());
136 
138  Bernoulli<Rng> coinToss(rng,0.5);
139  if(alpha == 0.0){//special case: normal gibbs sampling
140  for(std::size_t s = 0; s != state.size1();++s){
141  for(std::size_t i = 0; i != state.size2();++i){
142  state(s,i) = coinToss(statistics(s,i));
143  }
144  }
145  }
146  else{//flip-the state sampling
147  for(size_t s = 0; s != state.size1(); ++s){
148  for (size_t i = 0; i != state.size2(); i++) {
149  double prob = statistics(s,i);
150  if (state(s,i) == 0) {
151  if (prob <= 0.5) {
152  prob = (1. - alpha) * prob + alpha * prob / (1. - prob);
153  } else {
154  prob = (1. - alpha) * prob + alpha;
155  }
156  } else {
157  if (prob >= 0.5) {
158  prob = (1. - alpha) * prob + alpha * (1. - (1. - prob) / prob);
159  } else {
160  prob = (1. - alpha) * prob;
161  }
162  }
163  state(s,i) = coinToss(prob);
164  }
165  }
166  }
167  }
168  }
169 
170  /// \brief Computes the log of the probability of the given states in the conditional distribution
171  ///
172  /// Currently it is only possible to compute the case with alpha=0
173  ///
174  /// @param statistics the statistics of the conditional distribution
175  /// @param state the state to check
176  template<class Matrix>
177  RealVector logProbability(StatisticsBatch const& statistics, Matrix const& state) const{
178  SIZE_CHECK(statistics.size2() == size());
179  SIZE_CHECK(statistics.size1() == state.size1());
180  SIZE_CHECK(statistics.size2() == state.size2());
181 
182  RealVector logProbabilities(state.size1(),1.0);
183  for(std::size_t s = 0; s != state.size1();++s){
184  for(std::size_t i = 0; i != state.size2();++i){
185  logProbabilities(s) += (state(s,i) > 0.0)? std::log(statistics(s,i)) : std::log(1-statistics(s,i));
186  }
187  }
188  return logProbabilities;
189  }
190 
191  /// \brief Transforms the current state of the neurons for the multiplication with the weight matrix of the RBM,
192  /// i.e. calculates the value of the phi-function used in the interaction term.
193  /// In the case of binary neurons the phi-function is just the identity.
194  ///
195  /// @param state the state matrix of the neuron layer
196  /// @return the value of the phi-function
197  template<class Matrix>
198  Matrix const& phi(Matrix const& state)const{
199  SIZE_CHECK(state.size2() == size());
200  return state;
201  }
202 
203 
204  /// \brief Returns the conditional expectation of the phi-function given the state of the connected layer,
205  /// i.e. in this case the probabilities of the neurons having state one.
206  ///
207  /// @param statistics the sufficient statistics of the layer
208  RealMatrix const& expectedPhiValue(StatisticsBatch const& statistics)const{
209  return statistics;
210  }
211 
212  /// \brief Returns the mean given the state of the connected layer, i.e. in this case the probabilities of the neurons having state one.
213  ///
214  /// @param statistics the sufficient statistics of the layer for a whole batch
215  RealMatrix const& mean(StatisticsBatch const& statistics)const{
216  SIZE_CHECK(statistics.size2() == size());
217  return statistics;
218  }
219 
220  /// \brief Returns the energy term this neuron adds to the energy function.
221  ///
222  /// @param state the state of the neuron layer
223  /// @param beta the inverse temperature of the i-th state
224  /// @return the energy term of the neuron layer
225  template<class Matrix, class BetaVector>
226  RealVector energyTerm(Matrix const& state, BetaVector const& beta)const{
227  SIZE_CHECK(state.size2() == size());
228  SIZE_CHECK(state.size1() == beta.size());
229  //the following code does for batches the equivalent thing to:
230  //return inner_prod(m_bias,state)
231  RealVector energies = prod(state,m_bias);
232  RealVector baseRateEnergies = prod(state,m_baseRate);
233  noalias(energies) = beta*energies +(1-beta)*baseRateEnergies;
234 
235  return energies;
236  }
237 
238 
239  ///\brief Sums over all possible values of the terms of the energy function which depend on the this layer and returns the logarithmic result.
240  ///
241  ///This function is called by Energy when the unnormalized marginal probability of the connected layer is to be computed.
242  ///This function calculates the part which depends on the neurons which are to be marginalized out.
243  ///(In the case of the binary hidden neuron, this is the term \f$ \sum_h e^{\vec h^T W \vec v+ \vec h^T \vec c} \f$).
244  ///The rest is calculated by the energy function.
245  ///In the general case of a hidden layer, this function calculates \f$ \int_h e^(\phi_h(\vec h)^T W \phi_v(\vec v)+f_h(\vec h) ) \f$
246  ///where f_h is the energy term of this layer.
247  ///
248  /// @param inputs the inputs of the neurons they get from the other layer
249  /// @param beta the inverse temperature of the RBM
250  /// @return the marginal distribution of the connected layer
251  template<class Input>
252  double logMarginalize(Input const& inputs, double beta) const{
253  SIZE_CHECK(inputs.size() == size());
254  long double logFactorization = 0;
255  for(std::size_t i = 0; i != inputs.size(); ++i){
256  double arg = (inputs(i)+m_bias(i))*beta+(1-beta)*m_baseRate(i);
257  //~ double arg = (inputs(i)+m_bias(i))*beta;
258  logFactorization += softPlus(arg);
259  }
260  return logFactorization;
261  }
262 
263 
264  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
265  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
266  ///
267  ///This function takes a batch of samples and extracts the required informations out of it.
268  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
269  ///@param samples the samples from which the informations can be extracted
270  template<class Vector, class SampleBatch>
271  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples )const{
272  SIZE_CHECK(derivative.size() == size());
273  sum_rows(samples.statistics,derivative);
274  }
275 
276  ///\brief Calculates the expectation of the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
277  /// The expectation is taken with respect to the conditional probability distribution of the layer given the state of the connected layer.
278  ///
279  ///This function takes a batch of samples and weights the results
280  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
281  ///@param samples the samples from which the informations can be extracted
282  ///@param weights The weights for alle samples
283  template<class Vector, class SampleBatch, class WeightVector>
284  void expectedParameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights )const{
285  SIZE_CHECK(derivative.size() == size());
286  noalias(derivative) += prod(weights,samples.statistics);
287  }
288 
289 
290  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
291  ///
292  ///This function takes a batch of samples and extracts the required informations out of it.
293  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
294  ///@param samples the sample from which the informations can be extracted
295  template<class Vector, class SampleBatch>
296  void parameterDerivative(Vector& derivative, SampleBatch const& samples)const{
297  SIZE_CHECK(derivative.size() == size());
298  sum_rows(samples.state,derivative);
299  }
300 
301  ///\brief Calculates the derivatives of the energy term of this neuron layer with respect to it's parameters - the bias weights.
302  ///
303  ///This function takes a batch of samples and calculates a weighted derivative
304  ///@param derivative the derivative with respect to the parameters, the result is added on top of it to accumulate derivatives
305  ///@param samples the sample from which the informations can be extracted
306  ///@param weights the weights for the single sample derivatives
307  template<class Vector, class SampleBatch, class WeightVector>
308  void parameterDerivative(Vector& derivative, SampleBatch const& samples, WeightVector const& weights)const{
309  SIZE_CHECK(derivative.size() == size());
310  noalias(derivative) += prod(weights,samples.state);
311  }
312 
313  /// \brief Returns the vector with the parameters associated with the neurons in the layer, i.e. the bias vector.
314  RealVector parameterVector()const{
315  return m_bias;
316  }
317 
318  /// \brief Sets the parameters associated with the neurons in the layer, i.e. the bias vector.
319  void setParameterVector(RealVector const& newParameters){
320  m_bias = newParameters;
321  }
322 
323  /// \brief Returns the number of the parameters associated with the neurons in the layer.
324  std::size_t numberOfParameters()const{
325  return size();
326  }
327 
328  /// \brief Reads the bias vector from an archive.
329  ///
330  /// @param archive the archive
331  void read( InArchive & archive ){
332  archive >> m_bias;
333  m_baseRate = RealVector(m_bias.size(),0);
334  }
335 
336  /// \brief Writes the bias vector to an archive.
337  ///
338  /// @param archive the archive
339  void write( OutArchive & archive ) const{
340  archive << m_bias;
341  }
342 };
343 }
344 #endif