AbstractModel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief base class for all models, as well as a specialized differentiable model
6  *
7  *
8  *
9  * \author T.Glasmachers, O. Krause
10  * \date 2010
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_ABSTRACTMODEL_H
36 #define SHARK_MODELS_ABSTRACTMODEL_H
37 
38 #include <shark/Core/Flags.h>
40 #include <shark/Core/INameable.h>
41 #include <shark/Core/State.h>
42 #include <shark/Rng/Normal.h>
43 #include<shark/Data/Dataset.h>
44 
45 namespace shark {
46 
47 ///\brief Base class for all Models
48 ///
49 /// \par
50 /// A model is one of the three fundaments of supervised learning: model, error measure
51 /// and an optimization algorithm.
52 /// It is a concept of a function which performs a mapping \f$ x \rightarrow f_w(x)\f$.
53 /// In contrast to an error function it has two sets of parameters:
54 /// The first is the current point to map \f$x\f$, the others are the internal model parameters \f$w\f$
55 /// which define the mapping.
56 /// Often a model is used to find an optimal mapping for a problem, for example a function which
57 /// best fits the points of a given dataset. Therefore, AbstractModel does not only offer
58 /// the mapping itself, but also a set of special derivatives with respect to \f$ x \f$ and \f$ w \f$.
59 /// Most of the time, only the derivative with respect to \f$ w \f$ is needed, but in some special problems,
60 /// like finding optimal stimuli or stacking models, also the input derivative is needed.
61 ///
62 ///\par Models are optimized for batch processing. This means, that instead of only one data point at a time, it can
63 /// evaluate a big set of inputs at the same time, using optimized routines for this task.
64 ///
65 /// \par
66 /// The derivatives are weighted, which means that the derivatives of every single output are added together
67 /// weighted by coefficients (see #weightedParameterDerivative). This is an optimization for the chain rule
68 /// which is very efficient to calculate most of the time.
69 ///
70 /// \par
71 /// It is allowed to store intermediate values during #eval and use them to speed up calculation of
72 /// derivatives. Therefore it must be guaranteed that eval() is called before calculating derivatives.
73 /// This is no restriction, since typical error measures need the mapping itself and not only the derivative.
74 ///
75 /// \par
76 /// Models have names and can be serialised
77 template<class InputTypeT, class OutputTypeT>
78 class AbstractModel : public IParameterizable, public INameable, public ISerializable
79 {
80 public:
81  /// \brief Defines the input type of the model.
82  typedef InputTypeT InputType;
83  /// \brief Defines the output type of the model.
84  typedef OutputTypeT OutputType;
85  typedef OutputType result_type;
86 
87  /// \brief defines the batch type of the input type.
88  ///
89  /// This could for example be std::vector<InputType> but for example for RealVector it could be RealMatrix
91  /// \brief defines the batch type of the output type
93 
94 
96 
97  virtual ~AbstractModel() { }
98 
99  enum Feature {
105  };
107 
108  /// \brief Returns true when the first parameter derivative is implemented.
111  }
112  /// \brief Returns true when the second parameter derivative is implemented.
115  }
116  /// \brief Returns true when the first input derivative is implemented.
119  }
120  /// \brief Returns true when the second parameter derivative is implemented.
123  }
124  bool isSequential()const{
125  return m_features & IS_SEQUENTIAL;
126  }
127 
128  ///\brief Creates an internal state of the model.
129  ///
130  ///The state is needed when the derivatives are to be
131  ///calculated. Eval can store a state which is then reused to speed up
132  ///the calculations of the derivatives. This also allows eval to be
133  ///evaluated in parallel!
134  virtual boost::shared_ptr<State> createState() const
135  {
140  {
141  throw SHARKEXCEPTION("[AbstractModel::createState] createState must be overridden by models with derivatives");
142  }
143  return boost::shared_ptr<State>(new EmptyState());
144  }
145 
146  /// \brief From ISerializable, reads a model from an archive.
147  virtual void read( InArchive & archive ){
148  m_features.read(archive);
149  RealVector p;
150  archive & p;
152  }
153 
154  /// \brief writes a model to an archive
155  ///
156  /// the default implementation just saves the parameters, not the structure!
157  virtual void write( OutArchive & archive ) const{
158  m_features.write(archive);
159  RealVector p = parameterVector();
160  archive & p;
161  }
162 
163  /// \brief Standard interface for evaluating the response of the model to a batch of patterns.
164  ///
165  /// \param patterns the inputs of the model
166  /// \param outputs the predictions or response of the model to every pattern
167  virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs) const{
168  boost::shared_ptr<State> state = createState();
169  eval(patterns,outputs,*state);
170  }
171 
172  /// \brief Standard interface for evaluating the response of the model to a batch of patterns.
173  ///
174  /// \param patterns the inputs of the model
175  /// \param outputs the predictions or response of the model to every pattern
176  /// \param state intermediate results stored by eval which can be reused for derivative computation.
177  virtual void eval(BatchInputType const & patterns, BatchOutputType& outputs, State& state) const = 0;
178 
179  /// \brief Standard interface for evaluating the response of the model to a single pattern.
180  ///
181  /// \param pattern the input of the model
182  /// \param output the prediction or response of the model to the pattern
183  virtual void eval(InputType const & pattern, OutputType& output)const{
184  BatchInputType patternBatch=Batch<InputType>::createBatch(pattern);
185  get(patternBatch,0) = pattern;
186  BatchOutputType outputBatch;
187  eval(patternBatch,outputBatch);
188  output = get(outputBatch,0);
189  }
190 
191  /// \brief Model evaluation as an operator for a whole dataset. This is a convenience function
192  ///
193  /// \param patterns the input of the model
194  /// \returns the responses of the model
196  int batches = (int) patterns.numberOfBatches();
197  Data<OutputType> result(batches);
198  SHARK_PARALLEL_FOR(int i = 0; i < batches; ++i)
199  result.batch(i)= (*this)(patterns.batch(i));
200  return result;
201  //return transform(patterns,*this);//todo this leads to compiler errors.
202  }
203 
204  /// \brief Model evaluation as an operator for a single pattern. This is a convenience function
205  ///
206  /// \param pattern the input of the model
207  /// \returns the response of the model
208  OutputType operator()(InputType const & pattern)const{
209  OutputType output;
210  eval(pattern,output);
211  return output;
212  }
213 
214  /// \brief Model evaluation as an operator for a single pattern. This is a convenience function
215  ///
216  /// \param patterns the input of the model
217  /// \returns the response of the model
218  BatchOutputType operator()(BatchInputType const & patterns)const{
219  BatchOutputType output;
220  eval(patterns,output);
221  return output;
222  }
223 
224  /// \brief calculates the weighted sum of derivatives w.r.t the parameters.
225  ///
226  /// \param pattern the patterns to evaluate
227  /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern
228  /// \param state intermediate results stored by eval to speed up calculations of the derivatives
229  /// \param derivative the calculated derivative as sum over all derivates of all patterns
231  BatchInputType const & pattern,
232  BatchOutputType const & coefficients,
233  State const& state,
234  RealVector& derivative
235  )const{
237  }
238 
239  /// \brief calculates the weighted sum of derivatives w.r.t the parameters
240  ///
241  /// \param pattern the patterns to evaluate
242  /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern
243  /// \param errorHessian the second derivative of the error function for every pattern
244  /// \param state intermediate results stored by eval to speed up calculations of the derivatives
245  /// \param derivative the calculated derivative as sum over all derivates of all patterns
246  /// \param hessian the calculated hessian as sum over all derivates of all patterns
248  BatchInputType const & pattern,
249  BatchOutputType const & coefficients,
250  Batch<RealMatrix>::type const & errorHessian,//maybe a batch of matrices is bad?,
251  State const& state,
252  RealVector& derivative,
253  RealMatrix& hessian
254  )const{
256  }
257 
258  ///\brief calculates the weighted sum of derivatives w.r.t the inputs
259  ///
260  /// \param pattern the patterns to evaluate
261  /// \param coefficients the coefficients which are used to calculate the weighted sum for every pattern
262  /// \param state intermediate results stored by eval to sped up calculations of the derivatives
263  /// \param derivative the calculated derivative for every pattern
265  BatchInputType const & pattern,
266  BatchOutputType const & coefficients,
267  State const& state,
268  BatchInputType& derivative
269  )const{
271  }
272 
273  ///\brief calculates the weighted sum of derivatives w.r.t the inputs
274  ///
275  /// \param pattern the pattern to evaluate
276  /// \param coefficients the coefficients which are used to calculate the weighted sum
277  /// \param errorHessian the second derivative of the error function for every pattern
278  /// \param state intermediate results stored by eval to sped up calculations of the derivatives
279  /// \param derivative the calculated derivative for every pattern
280  /// \param hessian the calculated hessian for every pattern
282  BatchInputType const & pattern,
283  BatchOutputType const & coefficients,
284  typename Batch<RealMatrix>::type const & errorHessian,
285  State const& state,
286  RealMatrix& derivative,
287  Batch<RealMatrix>::type& hessian
288  )const{
290  }
291  ///\brief calculates weighted input and parameter derivative at the same time
292  ///
293  /// Sometimes, both derivatives are needed at the same time. But sometimes, when calculating the
294  /// weighted parameter derivative, the input derivative can be calculated for free. This is for example true for
295  /// the feed-forward neural networks. However, there exists the obvious default implementation to just calculate
296  /// the derivatives one after another.
297  /// \param patterns the patterns to evaluate
298  /// \param coefficients the coefficients which are used to calculate the weighted sum
299  /// \param state intermediate results stored by eval to sped up calculations of the derivatives
300  /// \param parameterDerivative the calculated parameter derivative as sum over all derivates of all patterns
301  /// \param inputDerivative the calculated derivative for every pattern
302  virtual void weightedDerivatives(
303  BatchInputType const & patterns,
304  BatchOutputType const & coefficients,
305  State const& state,
306  RealVector& parameterDerivative,
307  BatchInputType& inputDerivative
308  )const{
309  weightedParameterDerivative(patterns,coefficients,state,parameterDerivative);
310  weightedInputDerivative(patterns,coefficients,state,inputDerivative);
311  }
312 };
313 
314 
315 /**
316  * \ingroup shark_globals
317  *
318  * @{
319  */
320 
321 /// \brief Initialize model parameters normally distributed.
322 ///
323 /// \param model: model to be initialized
324 /// \param s: variance of mean-free normal distribution
325 template <class InputType, class OutputType>
327 {
328  Normal<> gauss(Rng::globalRng,0, s);
329  RealVector weights(model.numberOfParameters());
330  std::generate(weights.begin(), weights.end(), gauss);
331  model.setParameterVector(weights);
332 }
333 
334 
335 /// \brief Initialize model parameters uniformly at random.
336 ///
337 /// \param model: model to be initialized
338 /// \param l: lower bound of initialization interval
339 /// \param h: upper bound of initialization interval
340 template <class InputType, class OutputType>
342 {
343  Uniform<> uni(Rng::globalRng,l, h);
344  RealVector weights(model.numberOfParameters());
345  std::generate(weights.begin(), weights.end(), uni);
346  model.setParameterVector(weights);
347 }
348 
349 /** @}*/
350 
351 }
352 
353 
354 #endif