AbstractKernelFunction.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief abstract super class of all kernel functions
6  *
7  *
8  *
9  * \author T.Glasmachers, O. Krause, M. Tuma
10  * \date 2010-2012
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_KERNELS_ABSTRACTKERNELFUNCTION_H
36 #define SHARK_MODELS_KERNELS_ABSTRACTKERNELFUNCTION_H
37 
39 #include <shark/LinAlg/Base.h>
40 #include <shark/Core/Flags.h>
41 #include <shark/Core/State.h>
42 namespace shark {
43 
44 #ifdef SHARK_COUNT_KERNEL_LOOKUPS
45  #define INCREMENT_KERNEL_COUNTER( counter ) { counter++; }
46 #else
47  #define INCREMENT_KERNEL_COUNTER( counter ) { }
48 #endif
49 
50 /// \brief Base class of all Kernel functions.
51 ///
52 /// \par
53 /// A (Mercer) kernel is a symmetric positive definite
54 /// function of two parameters. It is (currently) used
55 /// in two contexts in Shark, namely for kernel methods
56 /// such as support vector machines (SVMs), and for
57 /// radial basis function networks.
58 ///
59 /// \par
60 /// In Shark a kernel function class represents a parametric
61 /// family of such kernel functions: The AbstractKernelFunction
62 /// interface inherits the IParameterizable interface.
63 ///
64 template<class InputTypeT>
65 class AbstractKernelFunction : public AbstractMetric<InputTypeT>
66 {
67 private:
69  typedef Batch<InputTypeT> Traits;
70 public:
71  /// \brief Input type of the Kernel.
72  typedef typename base_type::InputType InputType;
73  /// \brief batch input type of the kernel
75  /// \brief Const references to InputType
77  /// \brief Const references to BatchInputType
79 
81 
82  /// enumerations of kerneland metric features (flags)
83  enum Feature {
84  HAS_FIRST_PARAMETER_DERIVATIVE = 1, ///< is the kernel differentiable w.r.t. its parameters?
85  HAS_FIRST_INPUT_DERIVATIVE = 2, ///< is the kernel differentiable w.r.t. its inputs?
86  IS_NORMALIZED = 4 , ///< does k(x, x) = 1 hold for all inputs x?
87  SUPPORTS_VARIABLE_INPUT_SIZE = 8 ///< Input arguments must have same size, but not the same size in different calls to eval
88  };
89 
90  /// This statement declares the member m_features. See Core/Flags.h for details.
92 
95  }
98  }
99  bool isNormalized() const{
100  return m_features & IS_NORMALIZED;
101  }
104  }
105 
106  ///\brief Creates an internal state of the kernel.
107  ///
108  ///The state is needed when the derivatives are to be
109  ///calculated. Eval can store a state which is then reused to speed up
110  ///the calculations of the derivatives. This also allows eval to be
111  ///evaluated in parallel!
112  virtual boost::shared_ptr<State> createState()const
113  {
115  {
116  throw SHARKEXCEPTION("[AbstractKernelFunction::createState] createState must be overridden by kernels with derivatives");
117  }
118  return boost::shared_ptr<State>(new EmptyState());
119  }
120 
121  ///////////////////////////////////////////SINGLE ELEMENT INTERFACE///////////////////////////////////////////
122  // By default, this is mapped to the batch case.
123 
124  /// \brief Evaluates the kernel function.
125  virtual double eval(ConstInputReference x1, ConstInputReference x2) const{
126  RealMatrix res;
127  BatchInputType b1 = Traits::createBatch(x1,1);
128  BatchInputType b2 = Traits::createBatch(x2,1);
129  get(b1,0) = x1;
130  get(b2,0) = x2;
131  eval(b1, b2, res);
132  return res(0, 0);
133  }
134 
135  /// \brief Convenience operator which evaluates the kernel function.
136  inline double operator () (ConstInputReference x1, ConstInputReference x2) const {
137  return eval(x1, x2);
138  }
139 
140  //////////////////////////////////////BATCH INTERFACE///////////////////////////////////////////
141 
142  /// \brief Evaluates the subset of the KernelGram matrix which is defined by X1(rows) and X2 (columns).
143  ///
144  /// The result matrix is filled in with the values result(i,j) = kernel(x1[i], x2[j]);
145  /// The State object is filled in with data used in subsequent derivative computations.
146  virtual void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result, State& state) const = 0;
147 
148  /// \brief Evaluates the subset of the KernelGram matrix which is defined by X1(rows) and X2 (columns).
149  ///
150  /// The result matrix is filled in with the values result(i,j) = kernel(x1[i], x2[j]);
151  virtual void eval(ConstBatchInputReference batchX1, ConstBatchInputReference batchX2, RealMatrix& result) const {
152  boost::shared_ptr<State> state = createState();
153  eval(batchX1, batchX2, result, *state);
154  }
155 
156  /// \brief Evaluates the subset of the KernelGram matrix which is defined by X1(rows) and X2 (columns).
157  ///
158  /// Convenience operator.
159  /// The result matrix is filled in with the values result(i,j) = kernel(x1[i], x2[j]);
160  inline RealMatrix operator () (ConstBatchInputReference batchX1, ConstBatchInputReference batchX2) const{
161  RealMatrix result;
162  eval(batchX1, batchX2, result);
163  return result;
164  }
165 
166  /// \brief Computes the gradient of the parameters as a weighted sum over the gradient of all elements of the batch.
167  ///
168  /// The default implementation throws a "not implemented" exception.
170  ConstBatchInputReference batchX1,
171  ConstBatchInputReference batchX2,
172  RealMatrix const& coefficients,
173  State const& state,
174  RealVector& gradient
175  ) const {
176  throw SHARKEXCEPTION("[AbstractKernelFunction::weightedParameterDerivative] weightedParameterDerivative(...) not implemented.");
177  }
178 
179  /// \brief Calculates the derivative of the inputs X1 (only x1!).
180  ///
181  /// The i-th row of the resulting matrix is a weighted sum of the form:
182  /// c[i,0] * k'(x1[i], x2[0]) + c[i,1] * k'(x1[i], x2[1]) + ... + c[i,n] * k'(x1[i], x2[n]).
183  ///
184  /// The default implementation throws a "not implemented" exception.
185  virtual void weightedInputDerivative(
186  ConstBatchInputReference batchX1,
187  ConstBatchInputReference batchX2,
188  RealMatrix const& coefficientsX2,
189  State const& state,
190  BatchInputType& gradient
191  ) const {
192  throw SHARKEXCEPTION("[AbstractKernelFunction::weightedInputDerivative] weightedInputDerivative(...) not implemented");
193  }
194 
195 
196  //////////////////////////////////NORMS AND DISTANCES/////////////////////////////////
197 
198  /// Computes the squared distance in the kernel induced feature space.
199  virtual double featureDistanceSqr(ConstInputReference x1, ConstInputReference x2) const{
200  if (isNormalized()){
201  double k12 = eval(x1, x2);
202  return (2.0 - 2.0 * k12);
203  } else {
204  double k11 = eval(x1, x1);
205  double k12 = eval(x1, x2);
206  double k22 = eval(x2, x2);
207  return (k11 - 2.0 * k12 + k22);
208  }
209  }
210 
211  virtual RealMatrix featureDistanceSqr(ConstBatchInputReference batchX1,ConstBatchInputReference batchX2) const{
212  std::size_t sizeX1=shark::size(batchX1);
213  std::size_t sizeX2=shark::size(batchX2);
214  RealMatrix result=(*this)(batchX1,batchX2);
215  result*=-2;
216  if (isNormalized()){
217  noalias(result)+=RealScalarMatrix(sizeX1,sizeX2,2.0);
218  } else {
219  //compute self-product
220  RealVector kx2(sizeX2);
221  for(std::size_t i = 0; i != sizeX2;++i){
222  kx2(i)=eval(get(batchX2,i),get(batchX2,i));
223  }
224  for(std::size_t j = 0; j != sizeX1;++j){
225  double kx1=eval(get(batchX1,j),get(batchX1,j));
226  noalias(row(result,j)) += kx1 + kx2;
227  }
228  }
229  return result;
230  }
231 };
232 
233 
234 }
235 #endif