EpsilonSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for the Epsilon-Support Vector Machine for Regression
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_EPSILONSVMTRAINER_H
38 #define SHARK_ALGORITHMS_EPSILONSVMTRAINER_H
39 
40 
47 
48 namespace shark {
49 
50 
51 ///
52 /// \brief Training of Epsilon-SVMs for regression.
53 ///
54 /// The Epsilon-SVM is a support vector machine variant
55 /// for regression problems. Given are data tuples
56 /// \f$ (x_i, y_i) \f$ with x-component denoting input and
57 /// y-component denoting a real-valued label (see the tutorial on
58 /// label conventions; the implementation uses RealVector),
59 /// a kernel function k(x, x'), a regularization constant C > 0,
60 /// and a loss insensitivity parameter \f$ \varepsilon \f$.
61 /// Let H denote the kernel induced reproducing kernel Hilbert
62 /// space of k, and let \f$ \phi \f$ denote the corresponding
63 /// feature map. Then the SVM regression function is of the form
64 /// \f[
65 /// (x) = \langle w, \phi(x) \rangle + b
66 /// \f]
67 /// with coefficients w and b given by the (primal)
68 /// optimization problem
69 /// \f[
70 /// \min \frac{1}{2} \|w\|^2 + C \sum_i L(y_i, f(x_i)),
71 /// \f]
72 /// where
73 /// \f[
74 /// L(y, f(x)) = \max\{0, |y - f(x)| - \varepsilon \}
75 /// \f]
76 /// is the \f$ \varepsilon \f$ insensitive absolute loss.
77 ///
78 template <class InputType, class CacheType = float>
79 class EpsilonSvmTrainer : public AbstractSvmTrainer<InputType, RealVector, KernelExpansion<InputType> >
80 {
81 public:
82 
83  typedef CacheType QpFloatType;
84 
89 
93 
94  /// Constructor
95  /// \param kernel kernel function to use for training and prediction
96  /// \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
97  /// \param epsilon Loss insensitivity parameter.
98  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
99  EpsilonSvmTrainer(KernelType* kernel, double C, double epsilon, bool unconstrained = false)
100  : base_type(kernel, C, true, unconstrained)
101  , m_epsilon(epsilon)
102  { }
103 
104  /// \brief From INameable: return the class name.
105  std::string name() const
106  { return "EpsilonSvmTrainer"; }
107 
108  double epsilon() const
109  { return m_epsilon; }
110  void setEpsilon(double epsilon)
111  { m_epsilon = epsilon; }
112 
113  /// get the hyper-parameter vector
114  RealVector parameterVector() const
115  {
116  size_t sp = base_type::numberOfParameters();
117  RealVector ret(sp + 1);
118  blas::init(ret)<< base_type::parameterVector(),(base_type::m_unconstrained ? std::log(m_epsilon) : m_epsilon);
119  return ret;
120  }
121 
122  /// set the vector of hyper-parameters
123  void setParameterVector(RealVector const& newParameters)
124  {
125  size_t sp = base_type::numberOfParameters();
126  SHARK_ASSERT(newParameters.size() == sp + 1);
127  base_type::setParameterVector(subrange(newParameters, 0, sp));
128  setEpsilon(base_type::m_unconstrained ? std::exp(newParameters(sp)) : newParameters(sp));
129  }
130 
131  /// return the number of hyper-parameters
132  size_t numberOfParameters() const
133  { return (base_type::numberOfParameters() + 1); }
134 
136  svm.setStructure(base_type::m_kernel,dataset.inputs(),true,1);
137 
138  SHARK_CHECK(labelDimension(dataset) == 1, "[EpsilonSvmTrainer::train] can only train 1D labels");
139 
141  trainSVM<PrecomputedBlockMatrixType>(svm,dataset);
142  else
143  trainSVM<CachedBlockMatrixType>(svm,dataset);
144 
145  if (base_type::sparsify()) svm.sparsify();
146  }
147 
148 private:
149  template<class MatrixType>
150  void trainSVM(KernelExpansion<InputType>& svm, LabeledData<InputType, RealVector> const& dataset){
151  typedef GeneralQuadraticProblem<MatrixType> SVMProblemType;
152  typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
153 
154  //Set up the problem
155  KernelMatrixType km(*base_type::m_kernel, dataset.inputs());
156  std::size_t ic = km.size();
157  BlockMatrixType blockkm(&km);
158  MatrixType matrix(&blockkm);
159  SVMProblemType svmProblem(matrix);
160  for(std::size_t i = 0; i != ic; ++i){
161  svmProblem.linear(i) = dataset.element(i).label(0) - m_epsilon;
162  svmProblem.linear(i+ic) = dataset.element(i).label(0) + m_epsilon;
163  svmProblem.boxMin(i) = 0;
164  svmProblem.boxMax(i) = this->C();
165  svmProblem.boxMin(i+ic) = -this->C();
166  svmProblem.boxMax(i+ic) = 0;
167  }
168  ProblemType problem(svmProblem,base_type::m_shrinking);
169 
170  //solve it
171  QpSolver< ProblemType> solver(problem);
173  RealVector alpha = problem.getUnpermutedAlpha();
174  column(svm.alpha(),0)= subrange(alpha,0,ic)+subrange(alpha,ic,2*ic);
175 
176  // compute the offset from the KKT conditions
177  double lowerBound = -1e100;
178  double upperBound = 1e100;
179  double sum = 0.0;
180 
181  std::size_t freeVars = 0;
182  for (std::size_t i=0; i< ic; i++)
183  {
184  if (problem.alpha(i) > 0.0)
185  {
186  double value = problem.gradient(i);
187  if (problem.alpha(i) < this->C())
188  {
189  sum += value;
190  freeVars++;
191  }
192  else
193  {
194  lowerBound = std::max(value,lowerBound);
195  }
196  }
197  if (problem.alpha(i + ic) < 0.0)
198  {
199  double value = problem.gradient(i + ic);
200  if (problem.alpha(i + ic) > -this->C())
201  {
202  sum += value;
203  freeVars++;
204  }
205  else
206  {
207  upperBound = std::min(value,upperBound);
208  }
209  }
210  }
211  if (freeVars > 0)
212  svm.offset(0) = sum / freeVars; // stabilized (averaged) exact value
213  else
214  svm.offset(0) = 0.5 * (lowerBound + upperBound); // best estimate
215 
216  base_type::m_accessCount = km.getAccessCount();
217  }
218  double m_epsilon;
219 };
220 
221 
222 }
223 #endif