AbstractSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Abstract Support Vector Machine Trainer, general and linear case
6  *
7  *
8  * \par
9  * This file provides: 1) the QpConfig class, which can configure and
10  * provide information about an SVM training procedure; 2) a super-class
11  * for general SVM trainers, namely the AbstractSvmTrainer; and 3) a
12  * streamlined variant thereof for purely linear SVMs, namely the
13  * AbstractLinearSvmTrainer. In general, the SvmTrainers hold as parameters
14  * all hyperparameters of the underlying SVM, which includes the kernel
15  * parameters for non-linear SVMs.
16  *
17  *
18  *
19  *
20  * \author T. Glasmachers
21  * \date -
22  *
23  *
24  * \par Copyright 1995-2015 Shark Development Team
25  *
26  * <BR><HR>
27  * This file is part of Shark.
28  * <http://image.diku.dk/shark/>
29  *
30  * Shark is free software: you can redistribute it and/or modify
31  * it under the terms of the GNU Lesser General Public License as published
32  * by the Free Software Foundation, either version 3 of the License, or
33  * (at your option) any later version.
34  *
35  * Shark is distributed in the hope that it will be useful,
36  * but WITHOUT ANY WARRANTY; without even the implied warranty of
37  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
38  * GNU Lesser General Public License for more details.
39  *
40  * You should have received a copy of the GNU Lesser General Public License
41  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
42  *
43  */
44 //===========================================================================
45 
46 
47 #ifndef SHARK_ALGORITHMS_ABSTRACTSVMTRAINER_H
48 #define SHARK_ALGORITHMS_ABSTRACTSVMTRAINER_H
49 
50 
51 #include <shark/LinAlg/Base.h>
56 
57 
58 namespace shark {
59 
60 
61 ///
62 /// \brief Super class of all support vector machine trainers.
63 ///
64 /// \par
65 /// The QpConfig class holds two structures describing
66 /// the stopping condition and the solution obtained by the underlying
67 /// quadratic programming solvers. It provides a uniform interface for
68 /// setting, e.g., the target solution accuracy and obtaining the
69 /// accuracy of the actual solution.
70 ///
71 class QpConfig
72 {
73 public:
74  /// Constructor
75  QpConfig(bool precomputedFlag = false, bool sparsifyFlag = true)
76  : m_precomputedKernelMatrix(precomputedFlag)
77  , m_sparsify(sparsifyFlag)
78  , m_shrinking(true)
79  , m_s2do(true)
80  , m_verbosity(0)
81  , m_accessCount(0)
82  { }
83 
84  /// Read/write access to the stopping condition
86  { return m_stoppingcondition; }
87 
88  /// Read access to the stopping condition
90  { return m_stoppingcondition; }
91 
92  /// Access to the solution properties
94  { return m_solutionproperties; }
95 
96  /// Flag for using a precomputed kernel matrix
98  { return m_precomputedKernelMatrix; }
99 
100  /// Flag for using a precomputed kernel matrix
101  bool const& precomputeKernel() const
102  { return m_precomputedKernelMatrix; }
103 
104  /// Flag for sparsifying the model after training
105  bool& sparsify()
106  { return m_sparsify; }
107 
108  /// Flag for sparsifying the model after training
109  bool const& sparsify() const
110  { return m_sparsify; }
111 
112  /// Flag for shrinking in the decomposition solver
113  bool& shrinking()
114  { return m_shrinking; }
115 
116  /// Flag for shrinking in the decomposition solver
117  bool const& shrinking() const
118  { return m_shrinking; }
119 
120  /// Flag for S2DO (instead of SMO)
121  bool& s2do()
122  { return m_s2do; }
123 
124  /// Flag for S2DO (instead of SMO)
125  bool const& s2do() const
126  { return m_s2do; }
127 
128  /// Verbosity level of the solver
129  unsigned int& verbosity()
130  { return m_verbosity; }
131 
132  /// Verbosity level of the solver
133  unsigned int const& verbosity() const
134  { return m_verbosity; }
135 
136  /// Number of kernel accesses
137  unsigned long long const& accessCount() const
138  { return m_accessCount; }
139 
140  // Set threshold for minimum dual accuracy stopping condition
142  // Set number of iterations for maximum number of iterations stopping condition
143  void setMaxIterations(unsigned long long i) { m_stoppingcondition.maxIterations = i; }
144  // Set values for target value stopping condition
146  // Set maximum training time in seconds for the maximum seconds stopping condition
148 
149 protected:
150  /// conditions for when to stop the QP solver
152  /// properties of the approximate solution found by the solver
154  /// should the solver use a precomputed kernel matrix?
156  /// should the trainer sparsify the model after training?
158  /// should shrinking be used?
160  /// should S2DO be used instead of SMO?
161  bool m_s2do;
162  /// verbosity level (currently unused)
163  unsigned int m_verbosity;
164  /// kernel access count
165  unsigned long long m_accessCount;
166 };
167 
168 
169 ///
170 /// \brief Super class of all kernelized (non-linear) SVM trainers.
171 ///
172 /// \par
173 /// This class holds general information shared by most if not
174 /// all SVM trainers. First of all, this includes the kernel and
175 /// the regularization parameter. The class also manages
176 /// meta-information of the training process, like the maximal
177 /// size of the kernel cache, the stopping criterion, as well
178 /// as information on the actual solution.
179 ///
180 template <
181  class InputType, class LabelType,
182  class Model = KernelClassifier<InputType>,
183  class Trainer= AbstractTrainer< Model,LabelType>
184 >
186 : public Trainer,public QpConfig, public IParameterizable
187 {
188 public:
190 
191  //! Constructor
192  //! \param kernel kernel function to use for training and prediction
193  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
194  //! \param offset train svm with offset - this is not supported for all SVM solvers.
195  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
196  AbstractSvmTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
197  : m_kernel(kernel)
198  , m_regularizers(1,C)
199  , m_trainOffset(offset)
200  , m_unconstrained(unconstrained)
201  , m_cacheSize(0x4000000)
202  { RANGE_CHECK( C > 0 ); }
203 
204  //! Constructor featuring two regularization parameters
205  //! \param kernel kernel function to use for training and prediction
206  //! \param negativeC regularization parameter of the negative class (label 0)
207  //! \param positiveC regularization parameter of the positive class (label 1)
208  //! \param offset train svm with offset - this is not supported for all SVM solvers.
209  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
210  AbstractSvmTrainer(KernelType* kernel, double negativeC, double positiveC, bool offset, bool unconstrained = false)
211  : m_kernel(kernel)
212  , m_regularizers(2)
213  , m_trainOffset(offset)
214  , m_unconstrained(unconstrained)
215  , m_cacheSize(0x4000000)
216  {
217  RANGE_CHECK( positiveC > 0 );
218  RANGE_CHECK( negativeC > 0 );
219  m_regularizers[0] = negativeC;
220  m_regularizers[1] = positiveC;
221 
222  }
223 
224  /// \brief Return the value of the regularization parameter C.
225  double C() const
226  {
227  SIZE_CHECK(m_regularizers.size() == 1);
228  return m_regularizers[0];
229  }
230 
231  RealVector const& regularizationParameters() const
232  {
233  return m_regularizers;
234  }
235 
237  {
238  return m_regularizers;
239  }
240 
241  KernelType* kernel()
242  { return m_kernel; }
243  const KernelType* kernel() const
244  { return m_kernel; }
245  void setKernel(KernelType* kernel)
246  { m_kernel = kernel; }
247 
248  bool isUnconstrained() const
249  { return m_unconstrained; }
250 
251  bool trainOffset() const
252  { return m_trainOffset; }
253 
254  double CacheSize() const
255  { return m_cacheSize; }
256  void setCacheSize( std::size_t size )
257  { m_cacheSize = size; }
258 
259  /// get the hyper-parameter vector
260  RealVector parameterVector() const
261  {
262  size_t kp = m_kernel->numberOfParameters();
263  RealVector ret(kp + m_regularizers.size());
264  if(m_unconstrained)
265  init(ret) << parameters(m_kernel), log(m_regularizers);
266  else
267  init(ret) << parameters(m_kernel), m_regularizers;
268  return ret;
269  }
270 
271  /// set the vector of hyper-parameters
272  void setParameterVector(RealVector const& newParameters)
273  {
274  size_t kp = m_kernel->numberOfParameters();
275  SHARK_ASSERT(newParameters.size() == kp + m_regularizers.size());
276  init(newParameters) >> parameters(m_kernel), m_regularizers;
277  if(m_unconstrained)
278  m_regularizers = exp(m_regularizers);
279  }
280 
281  /// return the number of hyper-parameters
282  size_t numberOfParameters() const{
283  return m_kernel->numberOfParameters() + m_regularizers.size();
284  }
285 
286 protected:
287  KernelType* m_kernel; ///< Kernel object.
288  ///\brief Vector of regularization parameters.
289  ///
290  /// If the size of the vector is 1 there is only one regularization parameter for all classes, else there must
291  /// be one for every class in the dataset.
292  /// The exact meaning depends on the sub-class, but the value is always positive,
293  /// and higher implies a less regular solution.
294  RealVector m_regularizers;
296  bool m_unconstrained; ///< Is log(C) stored internally as a parameter instead of C? If yes, then we get rid of the constraint C > 0 on the level of the parameter interface.
297  std::size_t m_cacheSize; ///< Number of values in the kernel cache. The size of the cache in bytes is the size of one entry (4 for float, 8 for double) times this number.
298 };
299 
300 
301 ///
302 /// \brief Super class of all linear SVM trainers.
303 ///
304 /// \par
305 /// This class is analogous to the AbstractSvmTrainer class,
306 /// but for training of linear SVMs. It represents the
307 /// regularization parameter of the SVM. The class also manages
308 /// meta-information of the training process, like the stopping
309 /// criterion and information on the actual solution.
310 ///
311 template <class InputType>
313 : public AbstractTrainer<LinearClassifier<InputType>, unsigned int>
314 , public QpConfig
315 , public IParameterizable
316 {
317 public:
320 
321  //! Constructor
322  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
323  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
324  AbstractLinearSvmTrainer(double C, bool unconstrained = false)
325  : m_C(C)
326  , m_unconstrained(unconstrained)
327  { RANGE_CHECK( C > 0 ); }
328 
329  /// \brief Return the value of the regularization parameter C.
330  double C() const
331  { return m_C; }
332 
333  /// \brief Set the value of the regularization parameter C.
334  void setC(double C) {
335  RANGE_CHECK( C > 0 );
336  m_C = C;
337  }
338 
339  /// \brief Is the regularization parameter provided in logarithmic (unconstrained) form as a parameter?
340  bool isUnconstrained() const
341  { return m_unconstrained; }
342 
343  /// \brief Get the hyper-parameter vector.
344  RealVector parameterVector() const
345  {
346  RealVector ret(1);
347  ret(0) = (m_unconstrained ? std::log(m_C) : m_C);
348  return ret;
349  }
350 
351  /// \brief Set the vector of hyper-parameters.
352  void setParameterVector(RealVector const& newParameters)
353  {
354  SHARK_ASSERT(newParameters.size() == 1);
355  setC(m_unconstrained ? std::exp(newParameters(0)) : newParameters(0));
356  }
357 
358  /// \brief Return the number of hyper-parameters.
359  size_t numberOfParameters() const
360  { return 1; }
361 
364  using QpConfig::m_verbosity;
365 
366 protected:
367  double m_C; ///< Regularization parameter. The exact meaning depends on the sub-class, but the value is always positive, and higher implies a less regular solution.
368  bool m_unconstrained; ///< Is log(C) stored internally as a parameter instead of C? If yes, then we get rid of the constraint C > 0 on the level of the parameter interface.
369 };
370 
371 
372 }
373 #endif