McSvmCSTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for the Multi-class Support Vector Machine by Crammer and Singer
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date -
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_MCSVMCSTRAINER_H
38 #define SHARK_ALGORITHMS_MCSVMCSTRAINER_H
39 
40 
47 
48 namespace shark {
49 
50 
51 ///
52 /// \brief Training of the multi-category SVM by Crammer and Singer (CS).
53 ///
54 /// This is a special support vector machine variant for
55 /// classification of more than two classes. Given are data
56 /// tuples \f$ (x_i, y_i) \f$ with x-component denoting input
57 /// and y-component denoting the label 1, ..., d (see the tutorial on
58 /// label conventions; the implementation uses values 0 to d-1),
59 /// a kernel function k(x, x') and a regularization
60 /// constant C > 0. Let H denote the kernel induced
61 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
62 /// denote the corresponding feature map.
63 /// Then the SVM classifier is the function
64 /// \f[
65 /// h(x) = \arg \max (f_c(x))
66 /// \f]
67 /// \f[
68 /// f_c(x) = \langle w_c, \phi(x) \rangle + b_c
69 /// \f]
70 /// \f[
71 /// f = (f_1, \dots, f_d)
72 /// \f]
73 /// with class-wise coefficients w_c and b_c given by the
74 /// (primal) optimization problem
75 /// \f[
76 /// \min \frac{1}{2} \sum_c \|w_c\|^2 + C \sum_i L(y_i, f(x_i)),
77 /// \f]
78 /// The special property of the so-called CS-machine is its
79 /// loss function, which measures the maximal relative margin
80 /// violation.
81 /// Let \f$ h(m) = \max\{0, 1-m\} \f$ denote the hinge loss
82 /// as a function of the margin m, then the CS loss is given
83 /// by
84 /// \f[
85 /// L(y, f(x)) = \max_{c} h(f_y(x) - f_c(x))
86 /// \f]
87 ///
88 /// For details refer to the paper:<br/>
89 /// <p>On the algorithmic implementation of multiclass kernel-based vector machines. K. Crammer and Y. Singer, Journal of Machine Learning Research, 2002.</p>
90 ///
91 template <class InputType, class CacheType = float>
92 class McSvmCSTrainer : public AbstractSvmTrainer<InputType, unsigned int>
93 {
94 public:
95  typedef CacheType QpFloatType;
96 
100 
101  //! Constructor
102  //! \param kernel kernel function to use for training and prediction
103  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
104  //! \param offset whether to train offset/bias parameter
105  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
106  McSvmCSTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
107  : base_type(kernel, C, offset, unconstrained)
108  { }
109 
110  /// \brief From INameable: return the class name.
111  std::string name() const
112  { return "McSvmCSTrainer"; }
113 
115  {
116  std::size_t ic = dataset.numberOfElements();
117  std::size_t classes = numberOfClasses(dataset);
118 
119  // prepare the problem description
120  RealMatrix linear(ic, classes-1,1.0);
121 
122  QpSparseArray<QpFloatType> nu(classes * (classes-1), classes, 2*classes*(classes-1));
123  for (unsigned int r=0, y=0; y<classes; y++)
124  {
125  for (unsigned int p=0, pp=0; p<classes-1; p++, pp++, r++)
126  {
127  if (pp == y) pp++;
128  if (y < pp)
129  {
130  nu.add(r, y, 0.5);
131  nu.add(r, pp, -0.5);
132  }
133  else
134  {
135  nu.add(r, pp, -0.5);
136  nu.add(r, y, 0.5);
137  }
138  }
139  }
140 
141  QpSparseArray<QpFloatType> M(classes * (classes-1) * classes, classes-1, 2 * classes * (classes-1) * (classes-1));
142  for (unsigned int r=0, yv=0; yv<classes; yv++)
143  {
144  for (unsigned int pv=0, ppv=0; pv<classes-1; pv++, ppv++)
145  {
146  if (ppv == yv) ppv++;
147  for (unsigned int yw=0; yw<classes; yw++, r++)
148  {
149  QpFloatType baseM = (yv == yw ? (QpFloatType)0.25 : (QpFloatType)0.0) - (ppv == yw ? (QpFloatType)0.25 : (QpFloatType)0.0); //4 casts are for compiler warnings
150  M.setDefaultValue(r, baseM);
151  if (yv == yw)
152  {
153  M.add(r, ppv - (ppv >= yw ? 1 : 0), baseM + (QpFloatType)0.25);
154  }
155  else if (ppv == yw)
156  {
157  M.add(r, yv - (yv >= yw ? 1 : 0), baseM - (QpFloatType)0.25);
158  }
159  else
160  {
161  unsigned int pw = ppv - (ppv >= yw ? 1 : 0);
162  unsigned int pw2 = yv - (yv >= yw ? 1 : 0);
163  if (pw < pw2)
164  {
165  M.add(r, pw, baseM + (QpFloatType)0.25);
166  M.add(r, pw2, baseM - (QpFloatType)0.25);
167  }
168  else
169  {
170  M.add(r, pw2, baseM - (QpFloatType)0.25);
171  M.add(r, pw, baseM + (QpFloatType)0.25);
172  }
173  }
174  }
175  }
176  }
177 
178  typedef KernelMatrix<InputType, QpFloatType> KernelMatrixType;
179  typedef CachedMatrix< KernelMatrixType > CachedMatrixType;
180  typedef PrecomputedMatrix< KernelMatrixType > PrecomputedMatrixType;
181 
182  KernelMatrixType km(*base_type::m_kernel, dataset.inputs());
183 
184  RealMatrix alpha(ic,classes-1,0.0);
185  RealVector bias(classes,0.0);
186  // solve the problem
188  {
189  PrecomputedMatrixType matrix(&km);
190  QpMcSimplexDecomp< PrecomputedMatrixType> problem(matrix, M, dataset.labels(), linear, this->C());
192  problem.setShrinking(base_type::m_shrinking);
193  //problem.setShrinking(false);
194  if(this->m_trainOffset){
195  BiasSolverSimplex<PrecomputedMatrixType> biasSolver(&problem);
196  biasSolver.solve(bias,base_type::m_stoppingcondition,nu);
197  }
198  else{
200  solver.solve( base_type::m_stoppingcondition, &prop);
201  }
202  alpha = problem.solution();
203  }
204  else
205  {
206  CachedMatrixType matrix(&km, base_type::m_cacheSize);
207  QpMcSimplexDecomp< CachedMatrixType> problem(matrix, M, dataset.labels(), linear, this->C());
209  problem.setShrinking(base_type::m_shrinking);
210  //problem.setShrinking(false);
211  if(this->m_trainOffset){
212  BiasSolverSimplex<CachedMatrixType> biasSolver(&problem);
213  biasSolver.solve(bias,base_type::m_stoppingcondition,nu);
214  }
215  else{
217  solver.solve( base_type::m_stoppingcondition, &prop);
218  }
219  alpha = problem.solution();
220  }
221 
222  svm.decisionFunction().setStructure(this->m_kernel,dataset.inputs(),this->m_trainOffset,classes);
223 
224  for (std::size_t i=0; i<ic; i++)
225  {
226  unsigned int y = dataset.element(i).label;
227  for (std::size_t c=0; c<classes; c++)
228  {
229  double sum = 0.0;
230  std::size_t r = (classes-1) * y;
231  for (std::size_t p=0; p<classes-1; p++, r++)
232  sum += nu(r, c) * alpha(i, p);
233  svm.decisionFunction().alpha(i,c) = sum;
234  }
235  }
236 
237  if (this->m_trainOffset)
238  svm.decisionFunction().offset() = bias;
239 
240  base_type::m_accessCount = km.getAccessCount();
241  if (this->sparsify())
242  svm.decisionFunction().sparsify();
243  }
244 };
245 
246 
247 template <class InputType>
249 {
250 public:
252 
253  LinearMcSvmCSTrainer(double C, bool unconstrained = false)
254  : AbstractLinearSvmTrainer<InputType>(C, unconstrained){ }
255 
256  /// \brief From INameable: return the class name.
257  std::string name() const
258  { return "LinearMcSvmCSTrainer"; }
259 
261  {
262  std::size_t dim = inputDimension(dataset);
263  std::size_t classes = numberOfClasses(dataset);
264 
265  QpMcLinearCS<InputType> solver(dataset, dim, classes);
266  RealMatrix w = solver.solve(this->C(), this->stoppingCondition(), &this->solutionProperties(), this->verbosity() > 0);
267  model.decisionFunction().setStructure(w);
268  }
269 };
270 
271 
272 // shorthands for unified naming scheme; we resort to #define
273 // statements since old c++ does not support templated typedefs
274 #define McSvmRDMTrainer McSvmCSTrainer
275 #define LinearMcSvmRDMTrainer LinearMcSvmCSTrainer
276 
277 
278 }
279 #endif