LinearModel.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Implements a Model using a linear function.
5  *
6  *
7  *
8  * \author T. Glasmachers, O. Krause
9  * \date 2010-2011
10  *
11  *
12  * \par Copyright 1995-2015 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://image.diku.dk/shark/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_MODELS_LINEARMODEL_H
33 #define SHARK_MODELS_LINEARMODEL_H
34 
36 namespace shark {
37 
38 
39 ///
40 /// \brief Linear Prediction
41 ///
42 /// \par
43 /// A linear model makes predictions according to
44 /// \f$ y = f(x) = A x + b \f$. There are two important special cases:
45 /// The output may be a single number, and the offset term b may be
46 /// dropped.
47 ///
48 /// The class allows for dense and sparse input vector types. However it assumes that
49 /// the weight matrix and the ouputs are dense. There are some cases where this is not
50 /// good behavior. Check for example Normalizer for a class which is designed for sparse
51 /// inputs and outputs.
52 template <class InputType = RealVector>
53 class LinearModel : public AbstractModel<InputType,RealVector>
54 {
55 private:
58  /// Wrapper for the type erasure
59  RealMatrix m_matrix;
60  RealVector m_offset;
61 public:
64 
65  /// CDefault Constructor; use setStructure later
69  }
70  /// Constructor creating a model with given dimnsionalities and optional offset term.
71  LinearModel(std::size_t inputs, std::size_t outputs = 1, bool offset = false)
72  : m_matrix(outputs,inputs,0.0),m_offset(offset?outputs:0,0.0){
75  }
76  ///copy constructor
77  LinearModel(LinearModel const& model)
78  :m_matrix(model.m_matrix),m_offset(model.m_offset){
81  }
82 
83  /// \brief From INameable: return the class name.
84  std::string name() const
85  { return "LinearModel"; }
86 
87  ///swap
88  friend void swap(LinearModel& model1,LinearModel& model2){
89  swap(model1.m_matrix,model2.m_matrix);
90  swap(model1.m_offset,model2.m_offset);
91  }
92 
93  ///operator =
95  self_type tempModel(model);
96  swap(*this,tempModel);
97  return *this;
98  }
99 
100  /// Construction from matrix (and vector)
101  LinearModel(RealMatrix const& matrix, RealVector const& offset = RealVector())
102  :m_matrix(matrix),m_offset(offset){
105  }
106 
107  /// check for the presence of an offset term
108  bool hasOffset() const{
109  return m_offset.size() != 0;
110  }
111 
112  /// obtain the input dimension
113  size_t inputSize() const{
114  return m_matrix.size2();
115  }
116 
117  /// obtain the output dimension
118  size_t outputSize() const{
119  return m_matrix.size1();
120  }
121 
122  /// obtain the parameter vector
123  RealVector parameterVector() const{
124  RealVector ret(numberOfParameters());
125  init(ret) << toVector(m_matrix),m_offset;
126 
127  return ret;
128  }
129 
130  /// overwrite the parameter vector
131  void setParameterVector(RealVector const& newParameters)
132  {
133  init(newParameters) >> toVector(m_matrix),m_offset;
134  }
135 
136  /// return the number of parameter
137  size_t numberOfParameters() const{
138  return m_matrix.size1()*m_matrix.size2()+m_offset.size();
139  }
140 
141  /// overwrite structure and parameters
142  void setStructure(std::size_t inputs, std::size_t outputs = 1, bool offset = false){
143  LinearModel<InputType> model(inputs,outputs,offset);
144  swap(*this,model);
145  }
146 
147  /// overwrite structure and parameters
148  void setStructure(RealMatrix const& matrix, RealVector const& offset = RealVector()){
149  m_matrix = matrix;
150  m_offset = offset;
151  }
152 
153  /// return a copy of the matrix in dense format
154  RealMatrix const& matrix() const{
155  return m_matrix;
156  }
157 
158  RealMatrix& matrix(){
159  return m_matrix;
160  }
161 
162  /// return the offset
163  RealVector const& offset() const{
164  return m_offset;
165  }
166  RealVector& offset(){
167  return m_offset;
168  }
169 
170  boost::shared_ptr<State> createState()const{
171  return boost::shared_ptr<State>(new EmptyState());
172  }
173 
174  using base_type::eval;
175 
176  /// Evaluate the model: output = matrix * input + offset
177  void eval(BatchInputType const& inputs, BatchOutputType& outputs)const{
178  outputs.resize(inputs.size1(),m_matrix.size1());
179  //we multiply with a set of row vectors from the left
180  noalias(outputs) = prod(inputs,trans(m_matrix));
181  if (hasOffset()){
182  noalias(outputs)+=repeat(m_offset,inputs.size1());
183  }
184  }
185  /// Evaluate the model: output = matrix * input + offset
186  void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{
187  eval(inputs,outputs);
188  }
189 
190  ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all patterns of the last computed batch
192  BatchInputType const& patterns, RealMatrix const& coefficients, State const& state, RealVector& gradient
193  )const{
194  SIZE_CHECK(coefficients.size2()==outputSize());
195  SIZE_CHECK(coefficients.size1()==patterns.size1());
196 
197  gradient.resize(numberOfParameters());
198  std::size_t inputs = inputSize();
199  std::size_t outputs = outputSize();
200  gradient.clear();
201 
202  blas::dense_matrix_adaptor<double> weightGradient = blas::adapt_matrix(outputs,inputs,gradient.storage());
203  //sum_i coefficients(output,i)*pattern(i))
204  noalias(weightGradient) = prod(trans(coefficients),patterns);
205 
206  if (hasOffset()){
207  std::size_t start = inputs*outputs;
208  noalias(subrange(gradient, start, start + outputs)) = sum_rows(coefficients);
209  }
210  }
211  ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all patterns of the last computed batch
213  BatchInputType const & patterns,
214  BatchOutputType const & coefficients,
215  State const& state,
216  BatchInputType& derivative
217  )const{
218  SIZE_CHECK(coefficients.size2() == outputSize());
219  SIZE_CHECK(coefficients.size1() == patterns.size1());
220 
221  derivative.resize(patterns.size1(),inputSize());
222  noalias(derivative) = prod(coefficients,m_matrix);
223  }
224 
225  /// From ISerializable
226  void read(InArchive& archive){
227  archive >> m_matrix;
228  archive >> m_offset;
229  }
230  /// From ISerializable
231  void write(OutArchive& archive) const{
232  archive << m_matrix;
233  archive << m_offset;
234  }
235 };
236 
237 
238 }
239 #endif