QpMcBoxDecomp.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Quadratic programming m_problem for multi-class SVMs
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_QP_QPMCBOXDECOMP_H
38 #define SHARK_ALGORITHMS_QP_QPMCBOXDECOMP_H
39 
42 #include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
43 #include <shark/Core/Timer.h>
44 #include <shark/Data/Dataset.h>
45 
46 
47 namespace shark {
48 
49 template <class Matrix>
51 {
52 public:
53  typedef typename Matrix::QpFloatType QpFloatType;
54  /// \brief Working set selection eturning th S2DO working set
55  ///
56  /// This selection operator picks the first variable by maximum gradient,
57  /// the second by maximum unconstrained gain.
59  template<class Problem>
60  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
61  //todo move implementation here
62  return problem.selectWorkingSet(i,j);
63  }
64 
65  void reset(){}
66  };
67 
68  ///Constructor
69  ///\param kernel kernel matrix - cache or pre-computed matrix
70  ///\param M kernel modifiers in the format \f$ M_(y_i, p, y_j, q) = _M(classes*(y_i*|P|+p_i)+y_j, q) \f$
71  ///\param target the target labels for the variables
72  ///\param linearMat the linear part of the problem
73  ///\param C upper bound for all box variables, lower bound is 0.
75  Matrix& kernel,
77  Data<unsigned int> const& target,
78  RealMatrix const& linearMat,
79  double C
80  )
81  : m_kernelMatrix(kernel)
82  , m_M(M)
83  , m_C(C)
84  , m_classes(numberOfClasses(target))
85  , m_cardP(linearMat.size2())
86  , m_numExamples(kernel.size())
89  , m_alpha(m_numVariables,0.0)
95  , m_useShrinking(true)
96  {
97 
99  target.numberOfElements() == m_numExamples
100  && linearMat.size1() == kernel.size(),
101  "[QpMcDecomp::QpMcDecomp] dimension conflict"
102  );
103 
104  // prepare m_problem internal variables
107  for (std::size_t v=0, i=0; i<m_numExamples; i++)
108  {
109  unsigned int y = target.element(i);
110  m_examples[i].index = i;
111  m_examples[i].y = y;
112  m_examples[i].active = m_cardP;
113  m_examples[i].var = &m_storage1[m_cardP * i];
114  m_examples[i].avar = &m_storage2[m_cardP * i];
115  double k = m_kernelMatrix.entry(i, i);
116  for (std::size_t p=0; p<m_cardP; p++, v++)
117  {
118  m_variables[v].i = i;
119  m_variables[v].p = p;
120  m_variables[v].index = p;
121  double Q = m_M(m_classes * (y * m_cardP + p) + y, p) * k;
122  m_variables[v].diagonal = Q;
123  m_storage1[v] = v;
124  m_storage2[v] = v;
125 
126  m_linear(v) = m_gradient(v) = linearMat(i,p);
127  }
128  }
129  }
130 
131  ///enable/disable shrinking
132  void setShrinking(bool shrinking = true)
133  {
134  m_useShrinking = shrinking;
135  }
136 
137  /// \brief Return the solution found.
138  RealMatrix solution() const{
139  RealMatrix solutionMatrix(m_numVariables,m_cardP,0);
140  for (std::size_t v=0; v<m_numVariables; v++)
141  {
142  solutionMatrix(originalIndex(v),m_variables[v].p) = m_alpha(v);
143  }
144  return solutionMatrix;
145  }
146  /// \brief Return the gradient of the solution.
147  RealMatrix solutionGradient() const{
148  RealMatrix solutionGradientMatrix(m_numVariables,m_cardP,0);
149  for (std::size_t v=0; v<m_numVariables; v++)
150  {
151  solutionGradientMatrix(originalIndex(v),m_variables[v].p) = m_gradient(v);
152  }
153  return solutionGradientMatrix;
154  }
155 
156  /// \brief Compute the objective value of the current solution.
157  double functionValue()const{
159  }
160 
161  unsigned int label(std::size_t i){
162  return m_examples[i].y;
163  }
164 
165  std::size_t dimensions()const{
166  return m_numVariables;
167  }
168  std::size_t cardP()const{
169  return m_cardP;
170  }
171 
172  std::size_t getNumExamples()const{
173  return m_numExamples;
174  }
175 
176  ///return the largest KKT violation
177  double checkKKT()const
178  {
179  double maxViolation = 0.0;
180  for (std::size_t v=0; v<m_activeVar; v++)
181  {
182  double a = m_alpha(v);
183  double g = m_gradient(v);
184  if (a < m_C)
185  {
186  maxViolation = std::max(maxViolation,g);
187  }
188  if (a > 0.0)
189  {
190  maxViolation = std::max(maxViolation,-g);
191  }
192  }
193  return maxViolation;
194  }
195 
196  /// \brief change the linear part of the problem by some delta
197  void addDeltaLinear(RealMatrix const& deltaLinear){
198  SIZE_CHECK(deltaLinear.size1() == m_numExamples);
199  SIZE_CHECK(deltaLinear.size2() == m_cardP);
200  for (std::size_t v=0; v<m_numVariables; v++)
201  {
202 
203  std::size_t p = m_variables[v].p;
204  m_gradient(v) += deltaLinear(originalIndex(v),p);
205  m_linear(v) += deltaLinear(originalIndex(v),p);
206  }
207  }
208 
209  void updateSMO(std::size_t v, std::size_t w){
210  SIZE_CHECK(v < m_activeVar);
211  SIZE_CHECK(w < m_activeVar);
212  // update
213  if (v == w)
214  {
215  // Limit case of a single variable;
216  // this means that there is only one
217  // non-optimal variables left.
218  std::size_t i = m_variables[v].i;
220  unsigned int p = m_variables[v].p;
221  unsigned int y = m_examples[i].y;
222  unsigned int r = m_cardP * y + p;
223  QpFloatType* q = m_kernelMatrix.row(i, 0, m_activeEx);
224  double Qvv = m_variables[v].diagonal;
225  double mu = -m_alpha(v);
226  detail::solveQuadraticEdge(m_alpha(v),m_gradient(v),Qvv,0,m_C);
227  mu+=m_alpha(v);
228  gradientUpdate(r, mu, q);
229  }
230  else
231  {
232  // S2DO
233  std::size_t iv = m_variables[v].i;
234  SHARK_ASSERT(iv < m_activeEx);
235  unsigned int pv = m_variables[v].p;
236  unsigned int yv = m_examples[iv].y;
237 
238  std::size_t iw = m_variables[w].i;
239  SHARK_ASSERT(iw < m_activeEx);
240  unsigned int pw = m_variables[w].p;
241  unsigned int yw = m_examples[iw].y;
242 
243  // get the matrix rows corresponding to the working set
244  QpFloatType* qv = m_kernelMatrix.row(iv, 0, m_activeEx);
245  QpFloatType* qw = m_kernelMatrix.row(iw, 0, m_activeEx);
246  unsigned int rv = m_cardP*yv+pv;
247  unsigned int rw = m_cardP*yw+pw;
248 
249  // get the Q-matrix restricted to the working set
250  double Qvv = m_variables[v].diagonal;
251  double Qww = m_variables[w].diagonal;
252  double Qvw = m_M(m_classes * rv + yw, pw) * qv[iw];
253 
254  // solve the sub-problem and update the gradient using the step sizes mu
255  double mu_v = -m_alpha(v);
256  double mu_w = -m_alpha(w);
257  detail::solveQuadratic2DBox(m_alpha(v), m_alpha(w),
258  m_gradient(v), m_gradient(w),
259  Qvv, Qvw, Qww,
260  0, m_C, 0, m_C
261  );
262  mu_v += m_alpha(v);
263  mu_w += m_alpha(w);
264 
265  gradientUpdate(rv, mu_v, qv);
266  gradientUpdate(rw, mu_w, qw);
267  }
268  }
269 
270  ///Shrink the problem
271  bool shrink(double epsilon)
272  {
273  if(! m_useShrinking)
274  return false;
275  if (! bUnshrinked)
276  {
277  double largest = 0.0;
278  for (std::size_t a = 0; a < m_activeVar; a++)
279  {
280  if (m_alpha(a) < m_C)
281  {
282  largest = std::max(largest,m_gradient(a));
283  }
284  if (m_alpha(a) > 0.0)
285  {
286  largest = std::max(largest,-m_gradient(a));
287  }
288  }
289  if (largest < 10.0 * epsilon)
290  {
291  // unshrink the problem at this accuracy level
292  unshrink();
293  bUnshrinked = true;
294  }
295  }
296 
297  // shrink variables
298  bool se = false;
299  for (int a= m_activeVar-1; a >= 0; a--)
300  {
301  double v = m_alpha(a);
302  double g = m_gradient(a);
303 
304  if ((v == 0.0 && g <= 0.0) || (v == m_C && g >= 0.0))
305  {
306  // In this moment no feasible step including this variables
307  // can improve the objective. Thus deactivate the variables.
308  std::size_t e = m_variables[a].i;
310  if (m_examples[e].active == 0)
311  {
312  se = true;
313  }
314  }
315  }
316 
317  if (se)
318  {
319  // exchange examples such that shrinked examples
320  // are moved to the ends of the lists
321  for (int a = m_activeEx - 1; a >= 0; a--)
322  {
323  if (m_examples[a].active == 0)
325  }
326  }
327  return true;
328  }
329 
330  ///Activate all m_numVariables
331  void unshrink()
332  {
333  if (m_activeVar == m_numVariables) return;
334 
335  // compute the inactive m_gradient components (quadratic time complexity)
337  for (std::size_t v = 0; v != m_numVariables; v++)
338  {
339  double mu = m_alpha(v);
340  if (mu == 0.0) continue;
341 
342  std::size_t iv = m_variables[v].i;
343  unsigned int pv = m_variables[v].p;
344  unsigned int yv = m_examples[iv].y;
345  unsigned int r = m_cardP * yv + pv;
346  std::vector<QpFloatType> q(m_numExamples);
347  m_kernelMatrix.row(iv, 0, m_numExamples, &q[0]);
348 
349  for (std::size_t a = 0; a != m_numExamples; a++)
350  {
351  double k = q[a];
352  Example& ex = m_examples[a];
353  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
354  QpFloatType def = row.defaultvalue;
355  for (std::size_t b=0; b<row.size; b++)
356  {
357  std::size_t f = ex.var[row.entry[b].index];
358  if (f >= m_activeVar)
359  m_gradient(f) -= mu * (row.entry[b].value - def) * k;
360  }
361  if (def != 0.0)
362  {
363  double upd = mu * def * k;
364  for (std::size_t b=ex.active; b<m_cardP; b++)
365  {
366  std::size_t f = ex.avar[b];
368  m_gradient(f) -= upd;
369  }
370  }
371  }
372  }
373 
374  for (std::size_t i=0; i<m_numExamples; i++)
375  m_examples[i].active = m_cardP;
378  }
379 
380  //!
381  ///\brief select the working set
382  //!
383  ///Select one or two numVariables for the sub-problem
384  ///and return the maximal KKT violation. The method
385  ///MAY select the same index for i and j. In that
386  ///case the working set consists of a single variables.
387  ///The working set may be invalid if the method reports
388  ///a KKT violation of zero, indicating optimality.
389  double selectWorkingSet(std::size_t& i, std::size_t& j)
390  {
391  // box case
392  double maxViolation = 0.0;
393 
394  // first order selection
395  for (std::size_t a=0; a<m_activeVar; a++)
396  {
397  double aa = m_alpha(a);
398  double ga = m_gradient(a);
399  if (ga >maxViolation && aa < m_C)
400  {
401  maxViolation = ga;
402  i = a;
403  }
404  else if (-ga > maxViolation && aa > 0.0)
405  {
406  maxViolation = -ga;
407  i = a;
408  }
409  }
410  if (maxViolation == 0.0) return maxViolation;
411 
412  // second order selection
413  Variable& vari = m_variables[i];
414  std::size_t ii = vari.i;
415  SHARK_ASSERT(ii < m_activeEx);
416  unsigned int pi = vari.p;
417  unsigned int yi = m_examples[ii].y;
418  double di = vari.diagonal;
419  double gi = m_gradient(i);
420  QpFloatType* k = m_kernelMatrix.row(ii, 0, m_activeEx);
421 
422  j = i;
423  double bestgain = gi * gi / di;
424 
425  for (std::size_t a=0; a<m_activeEx; a++)
426  {
427  Example const& exa = m_examples[a];
428  unsigned int ya = exa.y;
429  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (yi * m_cardP + pi) + ya);
430  QpFloatType def = row.defaultvalue;
431 
432  for (std::size_t pf=0, b=0; pf < m_cardP; pf++)
433  {
434  std::size_t f = exa.var[pf];
435  double qif = def * k[a];
436  //check whether we are at an existing element of the sparse row
437  if( b != row.size && pf == row.entry[b].index){
438  qif = row.entry[b].value * k[a];
439  ++b;//move to next element
440  }
441  if(f >= m_activeVar || f == i)
442  continue;
443 
444  double af = m_alpha(f);
445  double gf = m_gradient(f);
446  double df = m_variables[f].diagonal;
447 
448  //check whether a step is possible at all.
449  if (!(af > 0.0 && gf < 0.0) && !(af < m_C && gf > 0.0))
450  continue;
451 
452  double gain = detail::maximumGainQuadratic2D(di,df,qif,di,gi,gf);
453  if( gain > bestgain){
454  j = f;
455  bestgain = gain;
456  }
457  }
458  }
459 
460  return maxViolation;
461  }
462 
463 protected:
464 
465  void gradientUpdate(std::size_t r, double mu, float* q)
466  {
467  for ( std::size_t a= 0; a< m_activeEx; a++)
468  {
469  double k = q[a];
470  Example& ex = m_examples[a];
471  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
472  QpFloatType def = row.defaultvalue;
473  for (std::size_t b=0; b<row.size; b++){
474  std::size_t p = row.entry[b].index;
475  m_gradient(ex.var[p]) -= mu * (row.entry[b].value - def) * k;
476  }
477  if (def != 0.0){
478  double upd = mu* def * k;
479  for (std::size_t b=0; b<ex.active; b++)
480  m_gradient(ex.avar[b]) -= upd;
481  }
482  }
483  }
484 
485  ///true if the problem has already been unshrinked
487 
488  ///shrink a variable
489  void deactivateVariable(std::size_t v)
490  {
491  std::size_t ev = m_variables[v].i;
492  unsigned int iv = m_variables[v].index;
493  unsigned int pv = m_variables[v].p;
494  Example* exv = &m_examples[ev];
495 
496  std::size_t ih = exv->active - 1;
497  std::size_t h = exv->avar[ih];
498  m_variables[v].index = ih;
499  m_variables[h].index = iv;
500  std::swap(exv->avar[iv], exv->avar[ih]);
501  iv = ih;
502  exv->active--;
503 
504  std::size_t j = m_activeVar - 1;
505  std::size_t ej = m_variables[j].i;
506  unsigned int ij = m_variables[j].index;
507  unsigned int pj = m_variables[j].p;
508  Example* exj = &m_examples[ej];
509 
510  // exchange entries in the lists
511  std::swap(m_alpha(v), m_alpha(j));
513  std::swap(m_linear(v), m_linear(j));
515 
516  m_variables[exv->avar[iv]].index = ij;
517  m_variables[exj->avar[ij]].index = iv;
518  exv->avar[iv] = j;
519  exv->var[pv] = j;
520  exj->avar[ij] = v;
521  exj->var[pj] = v;
522 
523  m_activeVar--;
524  }
525 
526  ///shrink an m_examples
527  void deactivateExample(std::size_t e)
528  {
530  std::size_t j = m_activeEx - 1;
531  m_activeEx--;
532  if(e == j) return;
533 
535 
536  std::size_t* pe = m_examples[e].var;
537  std::size_t* pj = m_examples[j].var;
538  for (std::size_t v = 0; v < m_cardP; v++)
539  {
540  SHARK_ASSERT(pj[v] >= m_activeVar);
541  m_variables[pe[v]].i = e;
542  m_variables[pj[v]].i = j;
543  }
544 
545  m_kernelMatrix.flipColumnsAndRows(e, j);
546  }
547 
548  /// \brief Returns the original index of the example of a variable in the dataset before optimization.
549  ///
550  /// Shrinking is an internal detail so the communication with the outside world uses the original indizes.
551  std::size_t originalIndex(std::size_t v)const{
552  std::size_t i = m_variables[v].i;
553  return m_examples[i].index;//i before shrinking
554  }
555 
556  /// data structure describing one m_variables of the problem
557  struct Variable
558  {
559  ///index into the example list
560  std::size_t i;
561  /// constraint corresponding to this m_variables
562  unsigned int p;
563  /// index into example->m_numVariables
564  unsigned int index;
565  /// diagonal entry of the big Q-matrix
566  double diagonal;
567  };
568 
569  /// data structure describing one training example
570  struct Example
571  {
572  /// example index in the dataset, not the example vector!
573  std::size_t index;
574  /// label of this example
575  unsigned int y;
576  /// number of active m_numVariables
577  unsigned int active;
578  /// list of all m_cardP m_numVariables, in order of the p-index
579  std::size_t* var;
580  /// list of active m_numVariables
581  std::size_t* avar;
582  };
583 
584  ///kernel matrix (precomputed matrix or matrix cache)
585  Matrix& m_kernelMatrix;
586 
587  ///kernel modifiers
588  QpSparseArray<QpFloatType> const& m_M; // M(|P|*y_i+p, y_j, q)
589 
590  ///complexity constant; upper bound on all variabless
591  double m_C;
592 
593  ///number of m_classes in the problem
594  unsigned int m_classes;
595 
596  ///number of dual m_numVariables per example
597  unsigned int m_cardP;
598 
599  ///number of m_examples in the problem (size of the kernel matrix)
600  std::size_t m_numExamples;
601 
602  ///number of m_numVariables in the problem = m_examples times m_cardP
603  std::size_t m_numVariables;
604 
605  ///m_linear part of the objective function
606  RealVector m_linear;
607 
608  ///solution candidate
609  RealVector m_alpha;
610 
611  ///m_gradient of the objective function
612  ///The m_gradient array is of fixed size and not subject to shrinking.
613  RealVector m_gradient;
614 
615  ///information about each training example
616  std::vector<Example> m_examples;
617 
618  ///information about each m_variables of the problem
619  std::vector<Variable> m_variables;
620 
621  ///space for the example[i].var pointers
622  std::vector<std::size_t> m_storage1;
623 
624  ///space for the example[i].avar pointers
625  std::vector<std::size_t> m_storage2;
626 
627  ///number of currently active m_examples
628  std::size_t m_activeEx;
629 
630  ///number of currently active variabless
631  std::size_t m_activeVar;
632 
633  ///should the m_problem use the shrinking heuristics?
635 };
636 
637 
638 template<class Matrix>
640 public:
641  typedef typename Matrix::QpFloatType QpFloatType;
642  BiasSolver(QpMcBoxDecomp<Matrix>* problem) : m_problem(problem){}
643 
644  void solve(
645  RealVector& bias,
646  QpStoppingCondition& stop,
647  QpSparseArray<QpFloatType> const& nu,
648  bool sumToZero = false
649  ){
650  std::size_t classes = bias.size();
651  std::size_t numExamples = m_problem->getNumExamples();
652  std::size_t cardP = m_problem->cardP();
653  RealVector stepsize(classes, 0.01);
654  RealVector prev(classes,0);
655  RealVector step(classes);
656 
657  do{
658  QpSolver<QpMcBoxDecomp<Matrix> > solver(*m_problem);
659  solver.solve(stop);
660 
661  // Rprop loop to update the bias
662  while (true)
663  {
664  RealMatrix dualGradient = m_problem->solutionGradient();
665  // compute the primal m_gradient w.r.t. bias
666  RealVector grad(classes,0);
667 
668  for (std::size_t i=0; i<numExamples; i++){
669  for (std::size_t p=0; p<cardP; p++){
670  double g = dualGradient(i,p);
671  if (g > 0.0)
672  {
673  unsigned int y = m_problem->label(i);
674  typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP + p);
675  for (std::size_t b=0; b<row.size; b++)
676  grad(row.entry[b].index) -= row.entry[b].value;
677  }
678  }
679  }
680 
681  if (sumToZero)
682  {
683  // project the m_gradient
684  grad -= sum(grad) / classes;
685  }
686 
687  // Rprop
688  for (std::size_t c=0; c<classes; c++)
689  {
690  double g = grad(c);
691  if (g > 0.0)
692  step(c) = -stepsize(c);
693  else if (g < 0.0)
694  step(c) = stepsize(c);
695 
696  double gg = prev(c) * grad(c);
697  if (gg > 0.0)
698  stepsize(c) *= 1.2;
699  else
700  stepsize(c) *= 0.5;
701  }
702  prev = grad;
703 
704  if (sumToZero)
705  {
706  // project the step
707  step -= sum(step) / classes;
708  }
709 
710  // update the solution and the dual m_gradient
711  bias += step;
712  performBiasUpdate(step,nu);
713  //~ std::cout<<grad<<" "<<m_problem->checkKKT()<<" "<<stepsize<<" "<<bias<<std::endl;
714 
715  if (max(stepsize) < 0.01 * stop.minAccuracy) break;
716  }
717  }while(m_problem->checkKKT()> stop.minAccuracy);
718  }
719 private:
720  void performBiasUpdate(
721  RealVector const& step, QpSparseArray<QpFloatType> const& nu
722  ){
723  std::size_t numExamples = m_problem->getNumExamples();
724  std::size_t cardP = m_problem->cardP();
725  RealMatrix deltaLinear(numExamples,cardP,0.0);
726  for (std::size_t i=0; i<numExamples; i++){
727  for (std::size_t p=0; p<cardP; p++){
728  unsigned int y = m_problem->label(i);
729  // delta = \sum_m \nu_{m,p,y_i} \Delta b(m)
730  typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP +p);
731  for (std::size_t b=0; b<row.size; b++)
732  {
733  deltaLinear(i,p) -= row.entry[b].value * step(row.entry[b].index);
734  }
735  }
736  }
737  m_problem->addDeltaLinear(deltaLinear);
738 
739  }
740  QpMcBoxDecomp<Matrix>* m_problem;
741 };
742 
743 
744 }
745 #endif