McSvmWWTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for the Multi-class Support Vector Machine by Weston and Watkins
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date -
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_MCSVMWWTRAINER_H
38 #define SHARK_ALGORITHMS_MCSVMWWTRAINER_H
39 
40 
45 
49 
50 namespace shark {
51 
52 
53 ///
54 /// \brief Training of the multi-category SVM by Weston and Watkins (WW).
55 ///
56 /// This is a special support vector machine variant for
57 /// classification of more than two classes. Given are data
58 /// tuples \f$ (x_i, y_i) \f$ with x-component denoting input
59 /// and y-component denoting the label 1, ..., d (see the tutorial on
60 /// label conventions; the implementation uses values 0 to d-1),
61 /// a kernel function k(x, x') and a regularization
62 /// constant C > 0. Let H denote the kernel induced
63 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
64 /// denote the corresponding feature map.
65 /// Then the SVM classifier is the function
66 /// \f[
67 /// h(x) = \arg \max (f_c(x))
68 /// \f]
69 /// \f[
70 /// f_c(x) = \langle w_c, \phi(x) \rangle + b_c
71 /// \f]
72 /// \f[
73 /// f = (f_1, \dots, f_d)
74 /// \f]
75 /// with class-wise coefficients w_c and b_c given by the
76 /// (primal) optimization problem
77 /// \f[
78 /// \min \frac{1}{2} \sum_c \|w_c\|^2 + C \sum_i L(y_i, f(x_i)),
79 /// \f]
80 /// The special property of the so-called WW-machine is its
81 /// loss function, which measures the sum of relative margin
82 /// violations.
83 /// Let \f$ h(m) = \max\{0, 1-m\} \f$ denote the hinge loss
84 /// as a function of the margin m, then the WW loss is given
85 /// by
86 /// \f[
87 /// L(y, f(x)) = \sum_{c} h(f_y(x) - f_c(x))
88 /// \f]
89 ///
90 /// The essentially same method has been co-invented by Vapnik
91 /// and by Bredensteiner and Bennett. For details see the literature:<br/>
92 /// <p>Support vector machines for multi-class pattern recognition. J. Weston and C. Watkins, Proceedings of the Seventh European Symposium On Artificial Neural Networks (ESANN), 1999.</p>
93 /// <p>Statistical Learning Theory. V. Vapnik, John Wiley and Sons, 1998.</p>
94 /// <p>Multicategory classification by support vector machines. E. J. Bredensteiner and K. P. Bennett, Computational Optimization and Applications, 12(1), 1999.</p>
95 ///
96 template <class InputType, class CacheType = float>
97 class McSvmWWTrainer : public AbstractSvmTrainer<InputType, unsigned int>
98 {
99 public:
100 
101  typedef CacheType QpFloatType;
105 
106  //! Constructor
107  //! \param kernel kernel function to use for training and prediction
108  //! \param C regularization parameter - always the 'true' value of C, even when unconstrained is set
109  //! \param offset whether to train with offset/bias parameter or not
110  //! \param unconstrained when a C-value is given via setParameter, should it be piped through the exp-function before using it in the solver?
111  McSvmWWTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
112  : base_type(kernel, C, offset, unconstrained)
113  { }
114 
115  /// \brief From INameable: return the class name.
116  std::string name() const
117  { return "McSvmWWTrainer"; }
118 
120  {
121  std::size_t ic = dataset.numberOfElements();
122  std::size_t classes = numberOfClasses(dataset);
123 
124  // prepare the problem description
125  RealMatrix linear(ic, classes-1,1.0);
126  UIntVector rho(classes-1,0);
127 
128  QpSparseArray<QpFloatType> nu(classes * (classes-1), classes, 2*classes*(classes-1));
129  for (unsigned int r=0, y=0; y<classes; y++)
130  {
131  for (unsigned int p=0, pp=0; p<classes-1; p++, pp++, r++)
132  {
133  if (pp == y) pp++;
134  if (y < pp)
135  {
136  nu.add(r, y, 0.5);
137  nu.add(r, pp, -0.5);
138  }
139  else
140  {
141  nu.add(r, pp, -0.5);
142  nu.add(r, y, 0.5);
143  }
144  }
145  }
146 
147  QpSparseArray<QpFloatType> M(classes * (classes-1) * classes, classes-1, 2 * classes * (classes-1) * (classes-1));
148  for (unsigned int r=0, yv=0; yv<classes; yv++)
149  {
150  for (unsigned int pv=0, ppv=0; pv<classes-1; pv++, ppv++)
151  {
152  if (ppv == yv) ppv++;
153  for (unsigned int yw=0; yw<classes; yw++, r++)
154  {
155  QpFloatType baseM = (yv == yw ? (QpFloatType)0.25 : (QpFloatType)0.0) - (ppv == yw ? (QpFloatType)0.25 : (QpFloatType)0.0);
156  M.setDefaultValue(r, baseM);
157  if (yv == yw)
158  {
159  M.add(r, ppv - (ppv >= yw ? 1 : 0), baseM + (QpFloatType)0.25);
160  }
161  else if (ppv == yw)
162  {
163  M.add(r, yv - (yv >= yw ? 1 : 0), baseM - (QpFloatType)0.25);
164  }
165  else
166  {
167  unsigned int pw = ppv - (ppv >= yw ? 1 : 0);
168  unsigned int pw2 = yv - (yv >= yw ? 1 : 0);
169  if (pw < pw2)
170  {
171  M.add(r, pw, baseM + (QpFloatType)0.25);
172  M.add(r, pw2, baseM - (QpFloatType)0.25);
173  }
174  else
175  {
176  M.add(r, pw2, baseM - (QpFloatType)0.25);
177  M.add(r, pw, baseM + (QpFloatType)0.25);
178  }
179  }
180  }
181  }
182  }
183 
184  typedef KernelMatrix<InputType, QpFloatType> KernelMatrixType;
185  typedef CachedMatrix< KernelMatrixType > CachedMatrixType;
186  typedef PrecomputedMatrix< KernelMatrixType > PrecomputedMatrixType;
187 
188  // solve the problem
189  RealMatrix alpha(ic,classes-1);
190  RealVector bias(classes,0);
191  KernelMatrixType km(*base_type::m_kernel, dataset.inputs());
193  {
194  PrecomputedMatrixType matrix(&km);
195  QpMcBoxDecomp< PrecomputedMatrixType > problem(matrix, M, dataset.labels(), linear, this->C());
197  problem.setShrinking(base_type::m_shrinking);
198  if(this->m_trainOffset){
199  BiasSolver< PrecomputedMatrixType > biasSolver(&problem);
200  biasSolver.solve(bias,base_type::m_stoppingcondition,nu);
201  }
202  else{
204  solver.solve( base_type::m_stoppingcondition, &prop);
205  }
206  alpha = problem.solution();
207  }
208  else
209  {
210  CachedMatrixType matrix(&km, base_type::m_cacheSize);
211  QpMcBoxDecomp< CachedMatrixType> problem(matrix, M, dataset.labels(), linear, this->C());
213  problem.setShrinking(base_type::m_shrinking);
214  if(this->m_trainOffset){
215  BiasSolver<CachedMatrixType> biasSolver(&problem);
216  biasSolver.solve(bias,base_type::m_stoppingcondition,nu);
217  }
218  else{
220  solver.solve( base_type::m_stoppingcondition, &prop);
221  }
222  alpha = problem.solution();
223  }
224 
225  svm.decisionFunction().setStructure(this->m_kernel,dataset.inputs(),this->m_trainOffset,classes);
226 
227  // write the solution into the model
228  for (std::size_t i=0; i<ic; i++)
229  {
230  unsigned int y = dataset.element(i).label;
231  for (std::size_t c=0; c<classes; c++)
232  {
233  double sum = 0.0;
234  std::size_t r = (classes-1) * y;
235  for (std::size_t p=0; p<classes-1; p++, r++)
236  sum += nu(r, c) * alpha(i,p);
237  svm.decisionFunction().alpha(i,c) = sum;
238  }
239  }
240  if (this->m_trainOffset)
241  svm.decisionFunction().offset() = bias;
242 
243  base_type::m_accessCount = km.getAccessCount();
244  if (this->sparsify())
245  svm.decisionFunction().sparsify();
246  }
247 };
248 
249 
250 template <class InputType>
252 {
253 public:
255 
256  LinearMcSvmWWTrainer(double C, bool unconstrained = false)
257  : AbstractLinearSvmTrainer<InputType>(C, unconstrained)
258  { }
259 
260  /// \brief From INameable: return the class name.
261  std::string name() const
262  { return "LinearMcSvmWWTrainer"; }
263 
265  {
266  std::size_t dim = inputDimension(dataset);
267  std::size_t classes = numberOfClasses(dataset);
268 
269  QpMcLinearWW<InputType> solver(dataset, dim, classes);
270  RealMatrix w = solver.solve(this->C(), this->stoppingCondition(), &this->solutionProperties(), this->verbosity() > 0);
271  model.decisionFunction().setStructure(w);
272  }
273 };
274 
275 
276 // shorthands for unified naming scheme; we resort to #define
277 // statements since old c++ does not support templated typedefs
278 #define McSvmRDSTrainer McSvmWWTrainer
279 #define LinearMcSvmRDSTrainer LinearMcSvmWWTrainer
280 
281 
282 }
283 #endif