McSvmMMRTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for the Maximum Margin Regression Multi-class Support Vector Machine
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date -
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_MCSVMMMRTRAINER_H
38 #define SHARK_ALGORITHMS_MCSVMMMRTRAINER_H
39 
40 
47 
48 
49 namespace shark {
50 
51 
52 ///
53 /// \brief Training of the maximum margin regression (MMR) multi-category SVM.
54 ///
55 /// This is a special support vector machine variant for
56 /// classification of more than two classes. Given are data
57 /// tuples \f$ (x_i, y_i) \f$ with x-component denoting input
58 /// and y-component denoting the label 1, ..., d (see the tutorial on
59 /// label conventions; the implementation uses values 0 to d-1),
60 /// a kernel function k(x, x') and a regularization
61 /// constant C > 0. Let H denote the kernel induced
62 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
63 /// denote the corresponding feature map.
64 /// Then the SVM classifier is the function
65 /// \f[
66 /// h(x) = \arg \max (f_c(x))
67 /// \f]
68 /// \f[
69 /// f_c(x) = \langle w_c, \phi(x) \rangle + b_c
70 /// \f]
71 /// \f[
72 /// f = (f_1, \dots, f_d)
73 /// \f]
74 /// with class-wise coefficients w_c and b_c given by the
75 /// (primal) optimization problem
76 /// \f[
77 /// \min \frac{1}{2} \sum_c \|w_c\|^2 + C \sum_i L(y_i, f(x_i)),
78 /// \f]
79 /// The special property of the so-called MMR-machine is its
80 /// loss function, which measures the self-component of the
81 /// absolute margin violation.
82 /// Let \f$ h(m) = \max\{0, 1-m\} \f$ denote the hinge loss
83 /// as a function of the margin m, then the MMR loss is given
84 /// by
85 /// \f[
86 /// L(y, f(x)) = h(f_y(x))
87 /// \f]
88 ///
89 /// For more details see the report:<br/>
90 /// <p>Learning via linear operators: Maximum margin regression.
91 /// S. Szedmak, J. Shawe-Taylor, and E. Parado-Hernandez,
92 /// PASCAL, 2006.</p>
93 ///
94 template <class InputType, class CacheType = float>
95 class McSvmMMRTrainer : public AbstractSvmTrainer<InputType, unsigned int>
96 {
97 public:
98  typedef CacheType QpFloatType;
99 
103 
104  //! Constructor
105  //! \param kernel kernel function to use for training and prediction
106  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
107  //! \param offset whether to train with offset/bias parameter or not
108  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
109  McSvmMMRTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
110  : base_type(kernel, C, offset, unconstrained)
111  { }
112 
113  /// \brief From INameable: return the class name.
114  std::string name() const
115  { return "McSvmMMRTrainer"; }
116 
118  {
119  std::size_t ic = dataset.numberOfElements();
120  std::size_t classes = numberOfClasses(dataset);
121 
122  // prepare the problem description
123  RealVector alpha(ic,0.0);
124  RealVector bias(classes,0.0);
125  RealMatrix gamma(classes, 1,1.0);
126  UIntVector rho(1,0);
127 
128  QpSparseArray<QpFloatType> nu(classes, classes, classes);
129  for (unsigned int y=0; y<classes; y++)
130  nu.add(y, y, 1.0);
131 
132  QpSparseArray<QpFloatType> M(classes * classes, 1, classes);
133  QpFloatType mood = (QpFloatType)(-1.0 / (double)classes);
134  QpFloatType val = (QpFloatType)1.0 + mood;
135  for (unsigned int r=0, yv=0; yv<classes; yv++)
136  {
137  for (unsigned int yw=0; yw<classes; yw++, r++)
138  {
139  M.setDefaultValue(r, mood);
140  if (yv == yw) M.add(r, 0, val);
141  }
142  }
143 
144  typedef KernelMatrix<InputType, QpFloatType> KernelMatrixType;
145  typedef CachedMatrix< KernelMatrixType > CachedMatrixType;
146  typedef PrecomputedMatrix< KernelMatrixType > PrecomputedMatrixType;
147 
148  KernelMatrixType km(*base_type::m_kernel, dataset.inputs());
149 
150  // solve the problem
152  {
153  PrecomputedMatrixType matrix(&km);
154  QpMcDecomp< PrecomputedMatrixType > solver(matrix, gamma, rho, nu, M, true);
157  if (base_type::m_s2do)
158  solver.solve(dataset.labels(), this->C(), alpha, base_type::m_stoppingcondition, &prop, (this->m_trainOffset ? &bias : NULL));
159  else
160  solver.solveSMO(dataset.labels(), this->C(), alpha, base_type::m_stoppingcondition, &prop, (this->m_trainOffset ? &bias : NULL));
161  }
162  else
163  {
164  CachedMatrixType matrix(&km, base_type::m_cacheSize);
165  QpMcDecomp< CachedMatrixType > solver(matrix, gamma, rho, nu, M, true);
168  if (base_type::m_s2do)
169  solver.solve(dataset.labels(), this->C(), alpha, base_type::m_stoppingcondition, &prop, (this->m_trainOffset ? &bias : NULL));
170  else
171  solver.solveSMO(dataset.labels(), this->C(), alpha, base_type::m_stoppingcondition, &prop, (this->m_trainOffset ? &bias : NULL));
172  }
173 
174  svm.decisionFunction().setStructure(this->m_kernel,dataset.inputs(),this->m_trainOffset,classes);
175 
176  // write the solution into the model
177  for (std::size_t i=0; i<ic; i++)
178  {
179  unsigned int y = dataset.element(i).label;
180  for (unsigned int c=0; c<classes; c++)
181  {
182  svm.decisionFunction().alpha(i,c) = nu(y, c) * alpha(i);
183  }
184  }
185  if (this->m_trainOffset)
186  svm.decisionFunction().offset() = bias;
187 
188  base_type::m_accessCount = km.getAccessCount();
189  if (this->sparsify())
190  svm.decisionFunction().sparsify();
191  }
192 };
193 
194 
195 template <class InputType>
197 {
198 public:
200 
201  LinearMcSvmMMRTrainer(double C, bool unconstrained = false)
202  : AbstractLinearSvmTrainer<InputType>(C, unconstrained){ }
203 
204  /// \brief From INameable: return the class name.
205  std::string name() const
206  { return "LinearMcSvmMMRTrainer"; }
207 
209  {
210  std::size_t dim = inputDimension(dataset);
211  std::size_t classes = numberOfClasses(dataset);
212 
213  QpMcLinearMMR<InputType> solver(dataset, dim, classes);
214  RealMatrix w = solver.solve(this->C(), this->stoppingCondition(), &this->solutionProperties(), this->verbosity() > 0);
215  model.decisionFunction().setStructure(w);
216  }
217 };
218 
219 
220 }
221 #endif