QpMcSimplexDecomp.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Quadratic programming m_problem for multi-class SVMs
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_QP_QPMCSIMPLEXDECOMP_H
38 #define SHARK_ALGORITHMS_QP_QPMCSIMPLEXDECOMP_H
39 
42 #include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
43 #include <shark/Core/Timer.h>
44 #include <shark/Data/Dataset.h>
45 
46 
47 namespace shark {
48 
49 template <class Matrix>
51 {
52 public:
53  typedef typename Matrix::QpFloatType QpFloatType;
54  /// \brief Working set selection eturning th S2DO working set
55  ///
56  /// This selection operator picks the first variable by maximum gradient,
57  /// the second by maximum unconstrained gain.
59  template<class Problem>
60  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
61  //todo move implementation here
62  return problem.selectWorkingSet(i,j);
63  }
64 
65  void reset(){}
66  };
67 
68  /// Constructor
69  /// \param kernel kernel matrix - cache or pre-computed matrix
70  /// \param M kernel modifiers in the format \f$ M_(y_i, p, y_j, q) = _M(classes*(y_i*|P|+p_i)+y_j, q) \f$
71  /// \param target the target labels for the variables
72  /// \param linearMat the linear part of the problem
73  /// \param C upper bound for all box variables, lower bound is 0.
75  Matrix& kernel,
77  Data<unsigned int> const& target,
78  RealMatrix const& linearMat,
79  double C
80  )
81  : m_kernelMatrix(kernel)
82  , m_M(M)
83  , m_C(C)
84  , m_classes(numberOfClasses(target))
85  , m_cardP(linearMat.size2())
86  , m_numExamples(kernel.size())
89  , m_alpha(m_numVariables,0.0)
95  , m_useShrinking(true)
96  {
97 
99  target.numberOfElements() == m_numExamples
100  && linearMat.size1() == kernel.size(),
101  "[QpMcDecomp::QpMcDecomp] dimension conflict"
102  );
103 
104  // prepare problem internal variables
107  for (std::size_t v=0, i=0; i<m_numExamples; i++)
108  {
109  unsigned int y = target.element(i);
110  m_examples[i].index = i;
111  m_examples[i].y = y;
112  m_examples[i].active = m_cardP;
113  m_examples[i].var = &m_storage1[m_cardP * i];
114  m_examples[i].avar = &m_storage2[m_cardP * i];
115  m_examples[i].varsum = 0;
116  double k = m_kernelMatrix.entry(i, i);
117  m_examples[i].diagonal = k;
118  for (std::size_t p=0; p<m_cardP; p++, v++)
119  {
120  m_variables[v].example = i;
121  m_variables[v].p = p;
122  m_variables[v].index = p;
123  double Q = m_M(m_classes * (y * m_cardP + p) + y, p) * k;
124  m_variables[v].diagonal = Q;
125  m_storage1[v] = v;
126  m_storage2[v] = v;
127 
128  m_linear(v) = m_gradient(v) = linearMat(i,p);
129  }
130  }
131  // initialize unshrinking to make valgrind happy.
132  bUnshrinked = false;
133  }
134 
135  /// enable/disable shrinking
136  void setShrinking(bool shrinking = true)
137  {
138  m_useShrinking = shrinking;
139  }
140 
141  /// \brief Returns the solution found.
142  RealMatrix solution() const{
143  RealMatrix solutionMatrix(m_numVariables,m_cardP,0);
144  for (std::size_t v=0; v<m_numVariables; v++)
145  {
146  solutionMatrix(originalIndex(v),m_variables[v].p) = m_alpha(v);
147  }
148  return solutionMatrix;
149  }
150  /// \brief Returns the gradient of the solution.
151  RealMatrix solutionGradient() const{
152  RealMatrix solutionGradientMatrix(m_numVariables,m_cardP,0);
153  for (std::size_t v=0; v<m_numVariables; v++)
154  {
155  solutionGradientMatrix(originalIndex(v),m_variables[v].p) = m_gradient(v);
156  }
157  return solutionGradientMatrix;
158  }
159 
160  /// \brief Compute the objective value of the current solution.
161  double functionValue()const{
163  }
164 
165  unsigned int label(std::size_t i){
166  return m_examples[i].y;
167  }
168 
169  std::size_t dimensions()const{
170  return m_numVariables;
171  }
172  std::size_t cardP()const{
173  return m_cardP;
174  }
175 
176  std::size_t getNumExamples()const{
177  return m_numExamples;
178  }
179 
180  /// \brief change the linear part of the problem by some delta
181  void addDeltaLinear(RealMatrix const& deltaLinear){
182  SIZE_CHECK(deltaLinear.size1() == m_numExamples);
183  SIZE_CHECK(deltaLinear.size2() == m_cardP);
184  for (std::size_t v=0; v<m_numVariables; v++)
185  {
186  std::size_t p = m_variables[v].p;
187  m_gradient(v) += deltaLinear(originalIndex(v),p);
188  m_linear(v) += deltaLinear(originalIndex(v),p);
189  }
190  }
191 
192  void updateSMO(std::size_t v, std::size_t w){
193  SIZE_CHECK(v < m_activeVar);
194  SIZE_CHECK(w < m_activeVar);
195  // update
196  if (v == w)
197  {
198  // Limit case of a single variable;
199  std::size_t i = m_variables[v].example;
201  unsigned int p = m_variables[v].p;
202  unsigned int y = m_examples[i].y;
203  unsigned int r = m_cardP * y + p;
204  double& varsum = m_examples[i].varsum;
205  //the upper bound depends on the values of the variables of the other classes.
206  double upperBound = m_C-varsum+m_alpha(v);
207 
208  QpFloatType* q = m_kernelMatrix.row(i, 0, m_activeEx);
209  double Qvv = m_variables[v].diagonal;
210  double mu = -m_alpha(v);
211  detail::solveQuadraticEdge(m_alpha(v),m_gradient(v),Qvv,0,upperBound);
212  mu += m_alpha(v);
213  updateVarsum(i,mu);
214  gradientUpdate(r, mu, q);
215  }
216  else
217  {
218  // S2DO
219  std::size_t iv = m_variables[v].example;
220  SHARK_ASSERT(iv < m_activeEx);
221  unsigned int pv = m_variables[v].p;
222  unsigned int yv = m_examples[iv].y;
223  double& varsumv = m_examples[iv].varsum;
224 
225  std::size_t iw = m_variables[w].example;
226  SHARK_ASSERT(iw < m_activeEx);
227  unsigned int pw = m_variables[w].p;
228  unsigned int yw = m_examples[iw].y;
229  double& varsumw = m_examples[iw].varsum;
230 
231  // get the matrix rows corresponding to the working set
232  QpFloatType* qv = m_kernelMatrix.row(iv, 0, m_activeEx);
233  QpFloatType* qw = m_kernelMatrix.row(iw, 0, m_activeEx);
234  unsigned int rv = m_cardP*yv+pv;
235  unsigned int rw = m_cardP*yw+pw;
236 
237  // get the Q-matrix restricted to the working set
238  double Qvv = m_variables[v].diagonal;
239  double Qww = m_variables[w].diagonal;
240  double Qvw = m_M(m_classes * rv + yw, pw) * qv[iw];
241 
242  //same sample - simplex case
243  double mu_v = -m_alpha(v);
244  double mu_w = -m_alpha(w);
245  if(iv == iw){
246 
247  double upperBound = m_C-varsumv+m_alpha(v)+m_alpha(w);
248  // solve the sub-problem and update the gradient using the step sizes mu
249  detail::solveQuadratic2DTriangle(m_alpha(v), m_alpha(w),
250  m_gradient(v), m_gradient(w),
251  Qvv, Qvw, Qww,
252  upperBound
253  );
254  mu_v += m_alpha(v);
255  mu_w += m_alpha(w);
256  updateVarsum(iv,mu_v+mu_w);
257  }
258  else{
259  double Uv = m_C-varsumv+m_alpha(v);
260  double Uw = m_C-varsumw+m_alpha(w);
261  // solve the sub-problem and update the gradient using the step sizes mu
262  detail::solveQuadratic2DBox(m_alpha(v), m_alpha(w),
263  m_gradient(v), m_gradient(w),
264  Qvv, Qvw, Qww,
265  0, Uv, 0, Uw
266  );
267  mu_v += m_alpha(v);
268  mu_w += m_alpha(w);
269  updateVarsum(iv,mu_v);
270  updateVarsum(iw,mu_w);
271  }
272 
273  double varsumvo = 0;
274  for(std::size_t p = 0; p != m_cardP; ++p){
275  std::size_t varIndex = m_examples[iv].var[p];
276  varsumvo += m_alpha[varIndex];
277  }
278  double varsumwo = 0;
279  for(std::size_t p = 0; p != m_cardP; ++p){
280  std::size_t varIndex = m_examples[iw].var[p];
281  varsumwo += m_alpha[varIndex];
282  }
283  gradientUpdate(rv, mu_v, qv);
284  gradientUpdate(rw, mu_w, qw);
285  }
286  }
287 
288  /// Shrink the problem
289  bool shrink(double epsilon)
290  {
291  if(! m_useShrinking)
292  return false;
293  if (! bUnshrinked)
294  {
295  if (checkKKT() < 10.0 * epsilon)
296  {
297  // unshrink the problem at this accuracy level
298  unshrink();
299  bUnshrinked = true;
300  }
301  }
302 
303  //iterate through all simplices.
304  for (std::size_t i = m_activeEx; i > 0; i--){
305  Example const& ex = m_examples[i-1];
306  std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
307  double up = pair.first.first;
308  double down = pair.second.first;
309 
310  //check the simplex for possible search directions
311  //case 1: simplex is bounded and stays at the bound, in this case proceed as in MVP
312  if(down > 0 && ex.varsum == m_C && up-down > 0){
313  int pc = ex.active;
314  for(int p = pc-1; p >= 0; --p){
315  double a = m_alpha(ex.avar[p]);
316  double g = m_gradient(ex.avar[p]);
317  //if we can't do a step along the simplex, we can shrink the variable.
318  if(a == 0 && g-down < 0){
319  deactivateVariable(ex.avar[p]);
320  }
321  else if (a == m_C && up-g < 0){
322  //shrinking this variable means, that the whole simplex can't move,
323  //so shrink every variable, even the ones that previously couldn't
324  //be shrinked
325  for(int q = ex.active; q >= 0; --q){
326  deactivateVariable(ex.avar[q]);
327  }
328  p = 0;
329  }
330  }
331  }
332  //case 2: all variables are zero and pushed against the boundary
333  // -> shrink the example
334  else if(ex.varsum == 0 && up < 0){
335  int pc = ex.active;
336  for(int p = pc-1; p >= 0; --p){
337  deactivateVariable(ex.avar[p]);
338  }
339  }
340  //case 3: the simplex is not bounded and there are free variables.
341  //in this case we currently do not shrink
342  //as a variable might get bounded at some point which means that all variables
343  //can be important again.
344  //else{
345  //}
346 
347  }
348 // std::cout<<"shrunk. remaining: "<<m_activeEx<<","<<m_activeVar<<std::endl;
349  return true;
350  }
351 
352  /// Activate all m_numVariables
353  void unshrink()
354  {
355  if (m_activeVar == m_numVariables) return;
356 
357  // compute the inactive m_gradient components (quadratic time complexity)
359  for (std::size_t v = 0; v != m_numVariables; v++)
360  {
361  double mu = m_alpha(v);
362  if (mu == 0.0) continue;
363 
364  std::size_t iv = m_variables[v].example;
365  unsigned int pv = m_variables[v].p;
366  unsigned int yv = m_examples[iv].y;
367  unsigned int r = m_cardP * yv + pv;
368  std::vector<QpFloatType> q(m_numExamples);
369  m_kernelMatrix.row(iv, 0, m_numExamples, &q[0]);
370 
371  for (std::size_t a = 0; a != m_numExamples; a++)
372  {
373  double k = q[a];
374  Example& ex = m_examples[a];
375  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
376  QpFloatType def = row.defaultvalue;
377  for (std::size_t b=0; b<row.size; b++)
378  {
379  std::size_t f = ex.var[row.entry[b].index];
380  if (f >= m_activeVar)
381  m_gradient(f) -= mu * (row.entry[b].value - def) * k;
382  }
383  if (def != 0.0)
384  {
385  double upd = mu * def * k;
386  for (std::size_t b=ex.active; b<m_cardP; b++)
387  {
388  std::size_t f = ex.avar[b];
390  m_gradient(f) -= upd;
391  }
392  }
393  }
394  }
395 
396  for (std::size_t i=0; i<m_numExamples; i++)
397  m_examples[i].active = m_cardP;
400  }
401 
402  ///
403  /// \brief select the working set
404  ///
405  /// Select one or two numVariables for the sub-problem
406  /// and return the maximal KKT violation. The method
407  /// MAY select the same index for i and j. In that
408  /// case the working set consists of a single variables.
409  /// The working set may be invalid if the method reports
410  /// a KKT violation of zero, indicating optimality.
411  double selectWorkingSet(std::size_t& i, std::size_t& j)
412  {
413 
414  //first order selection
415  //we select the best variable along the box constraint (for a step between samples)
416  //and the best gradient and example index for a step along the simplex (step inside a sample)
417  double maxGradient = 0;//max gradient for variables between samples (box constraints)
418  double maxSimplexGradient = 0;//max gradient along the simplex constraints
419  std::size_t maxSimplexExample = 0;//example with the maximum simplex constraint
420  i = j = 0;
421  // first order selection
422  for (std::size_t e=0; e< m_activeEx; e++)
423  {
424  Example& ex = m_examples[e];
425  bool canGrow = ex.varsum < m_C;
426 
427  //calculate the maximum violationg pair for the example.
428  std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
429  double up = pair.first.first;
430  double down = pair.second.first;
431 
432  if(!canGrow && up - down > maxSimplexGradient){
433  maxSimplexGradient = up-down;
434  maxSimplexExample = e;
435  }
436  if (canGrow && up > maxGradient) {
437  maxGradient = up;
438  i = pair.first.second;
439  }
440  if (-down > maxGradient) {
441  maxGradient = -down;
442  i = pair.second.second;
443  }
444  }
445 
446  //find the best possible partner of the variable
447  //by searching every other sample
448  std::pair<std::pair<std::size_t,std::size_t> ,double > boxPair = maxGainBox(i);
449  double bestGain = boxPair.second;
450  std::pair<std::size_t, std::size_t> best = boxPair.first;
451 
452  //always search the simplex of the variable
453  std::pair<std::pair<std::size_t,std::size_t> ,double > simplexPairi = maxGainSimplex(m_variables[i].example);
454  if(simplexPairi.second > bestGain){
455  best = simplexPairi.first;
456  bestGain = simplexPairi.second;
457  }
458  //finally search also in the best simplex
459  if(maxSimplexGradient > 0){
460  std::pair<std::pair<std::size_t,std::size_t> ,double > simplexPairBest = maxGainSimplex(maxSimplexExample);
461  if(simplexPairBest.second > bestGain){
462  best = simplexPairBest.first;
463  bestGain = simplexPairBest.second;
464  }
465  }
466  i = best.first;
467  j = best.second;
468  //return the mvp gradient
469  return std::max(maxGradient,maxSimplexGradient);
470  }
471 
472  /// return the largest KKT violation
473  double checkKKT()const
474  {
475  double ret = 0.0;
476  for (std::size_t i=0; i<m_activeEx; i++){
477  Example const& ex = m_examples[i];
478  std::pair<std::pair<double,std::size_t>,std::pair<double,std::size_t> > pair = getSimplexMVP(ex);
479  double up = pair.first.first;
480  double down = pair.second.first;
481 
482  //check all search directions
483  //case 1: decreasing the value of a variable
484  ret = std::max(-down, ret);
485  //case 2: if we are not at the boundary increasing the variable
486  if(ex.varsum < m_C)
487  ret = std::max(up, ret);
488  //case 3: along the plane \sum_i alpha_i = m_C
489  if(ex.varsum == m_C)
490  ret = std::max(up-down, ret);
491  }
492  return ret;
493  }
494 
495 protected:
496 
497  /// data structure describing one variable of the problem
498  struct Variable
499  {
500  ///index into the example list
501  std::size_t example;
502  /// constraint corresponding to this variable
503  std::size_t p;
504  /// index into example->m_numVariables
505  std::size_t index;
506  /// diagonal entry of the big Q-matrix
507  double diagonal;
508  };
509 
510  /// data structure describing one training example
511  struct Example
512  {
513  /// example index in the dataset, not the example vector!
514  std::size_t index;
515  /// label of this example
516  unsigned int y;
517  /// number of active variables
518  std::size_t active;
519  /// list of all m_cardP variables, in order of the p-index
520  std::size_t* var;
521  /// list of active variables
522  std::size_t* avar;
523  /// total sum of all variables of this example
524  double varsum;
525  /// diagonal entry of the kernel matrix k(x,x);
526  double diagonal;
527  };
528 
529  /// \brief Finds the second variable of a working set using maximum gain and returns the pair and gain
530  ///
531  /// The variable is searched in-between samples. And not inside the simplex of i.
532  /// It returns the best pair (i,j) as well as the gain. If the first variable
533  /// can't make a step, gain 0 is returned with pair(i,i).
534  std::pair<std::pair<std::size_t,std::size_t>,double> maxGainBox(std::size_t i)const{
535  std::size_t e = m_variables[i].example;
537  unsigned int pi = m_variables[i].p;
538  unsigned int yi = m_examples[e].y;
539  double Qii = m_variables[i].diagonal;
540  double gi = m_gradient(i);
541  if(m_examples[e].varsum == m_C && gi > 0)
542  return std::make_pair(std::make_pair(i,i),0.0);
543 
544 
545  QpFloatType* k = m_kernelMatrix.row(e, 0, m_activeEx);
546 
547  std::size_t bestj = i;
548  double bestGain = gi * gi / Qii;
549 
550  for (std::size_t a=0; a<m_activeEx; a++)
551  {
552  //don't search the simplex of the first variable
553  if(a == e) continue;
554 
555  Example const& exa = m_examples[a];
556  unsigned int ya = exa.y;
557  bool canGrow = exa.varsum != m_C;
558 
559  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (yi * m_cardP + pi) + ya);
560  QpFloatType def = row.defaultvalue;
561 
562  for (std::size_t p=0, b=0; p < m_cardP; p++)
563  {
564  std::size_t j = exa.var[p];
565 
566  std::size_t Qjj = m_variables[j].diagonal;
567  double gj = m_gradient(j);
568  double Qij = def * k[a];
569  //check whether we are at an existing element of the sparse row
570  if( b != row.size && p == row.entry[b].index){
571  Qij = row.entry[b].value * k[a];
572  ++b;//move to next element
573  }
574 
575  //don't check variables which are shrinked or bounded
576  if(j >= m_activeVar || (m_alpha(j) == 0 && gj <= 0)|| (!canGrow && gj >= 0))
577  continue;
578 
579  double gain = detail::maximumGainQuadratic2D(Qii, Qjj, Qij, gi,gj);
580  if( bestGain < gain){
581  bestj = j;
582  bestGain = gain;
583  }
584  }
585  }
586  return std::make_pair(std::make_pair(i,bestj),bestGain);
587  }
588 
589  ///\brief Returns the best variable pair (i,j) and gain for a given example.
590  ///
591  /// For a given example all possible pairs of variables are checkd and the one giving
592  /// the maximum gain is returned. This method has a special handling for the
593  /// simplex case.
594  std::pair<std::pair<std::size_t,std::size_t>,double> maxGainSimplex(std::size_t e)const{
595  Example const& ex = m_examples[e];
596  std::size_t pc = ex.active;//number of active variables for this example
597  unsigned int y = ex.y;//label of the example
598  bool canGrow = ex.varsum < m_C; //are we inside the simplex?
599  double Qee = m_examples[e].diagonal; //kernel entry of the example
600 
601  double bestGain = -1e100;
602  std::size_t besti = 0;
603  std::size_t bestj = 0;
604 
605  //search through all possible variable pairs
606  //for every pair we will build the quadratic subproblem
607  //and than decide whether we can do
608  // 1.a valid step in the inside of the simplex
609  // that is canGrow==true or the gradients of both variables point inside
610  // 2. a valid step along the simplex constraint,
611  // that is cangrow == true and both variables point outside)
612  // 3. a 1-D step
613  // that is canGrow == true or alpha(i) > 0 & gradient(i) < 0
614 
615  //iterate over the active ones as the first variable
616  for(std::size_t p1 = 0; p1 != pc; ++p1){
617  std::size_t i = ex.avar[p1];
618  double gi = m_gradient(i);
619  double ai = m_alpha(i);
620  double Qii = m_variables[i].diagonal;
621 
622  //check whether a 1-D gain is possible
623  if((gi < 0 && m_alpha(i) > 0.0) || (gi > 0 && canGrow)){
624  double gain = gi * gi / Qii;
625  if(gain > bestGain){
626  bestGain= gain;
627  besti = i;
628  bestj = i;
629  }
630  }
631 
632  //now create the 2D problem for all possible variable pairs
633  //and than check for possible gain steps
634  //find first the row of coefficients for M(y,y,i,j) for all j
635  //question: is p1 == m_variables[ex.avar[p1]].p?
636  //otherwise: is p1 == m_variables[ex.var[p1]].p for *all* variables?
637  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * (y * m_cardP + m_variables[i].p) + y);
638  QpFloatType def = row.defaultvalue;
639 
640  //we need to iterate over all vars instead of only the active variables to be in sync with the matrix row
641  //we will still overstep all inactive variables
642  for(std::size_t p2 = 0, b=0; p2 != m_cardP; ++p2){
643  std::size_t j = ex.var[p2];
644  double gj = m_gradient(j);
645  double aj = m_alpha(j);
646  double Qjj = m_variables[j].diagonal;
647 
648  //create the offdiagonal element of the 2D problem
649  double Qij = def * Qee;
650  //check whether we are at an existing element of the sparse row
651  if( b != row.size && p2 == row.entry[b].index){
652  Qij = row.entry[b].value * Qee;
653  ++b;//move to next element
654  }
655 
656  //ignore inactive variables or variables we already checked
657  if(j >= m_activeVar || j <= i ){
658  continue;
659  }
660 
661  double gain = 0;
662  //check whether we can move along the simplex constraint
663  if(!canGrow && gi > 0 && gj > 0){
664  double gainUp = 0;
665  double gainDown = 0;
666  //we need to check both search directions for ai
667  if(aj > 0 && gi-gj > 0){
668  gainUp = detail::maximumGainQuadratic2DOnLine(Qii, Qjj, Qij, gi,gj);
669  }
670  //also check whether a line search in the other direction works
671  if (ai > 0 &&gj-gi > 0){
672  gainDown = detail::maximumGainQuadratic2DOnLine(Qjj, Qii, Qij, gj,gi);
673  }
674  gain = std::max(gainUp,gainDown);
675  }
676  //else we are inside the simplex
677  //in this case only check that both variables can shrink if needed
678  else if(!(gi <= 0 && ai == 0) && !(gj<= 0 && aj == 0)){
679  gain = detail::maximumGainQuadratic2D(Qii, Qjj, Qij, gi,gj);
680  }
681 
682  //accept only maximum gain
683  if(gain > bestGain){
684  bestGain= gain;
685  besti = i;
686  bestj = j;
687  }
688 
689  }
690  }
691  //return best pair and possible gain
692  return std::make_pair(std::make_pair(besti,bestj),bestGain);
693  }
694 
695  /// \brief For a given simplex returns the MVP indicies (max_up,min_down)
696  std::pair<
697  std::pair<double,std::size_t>,
698  std::pair<double,std::size_t>
699  > getSimplexMVP(Example const& ex)const{
700  unsigned int pc = ex.active;
701  double up = -1e100;
702  double down = 1e100;
703  std::size_t maxUp = ex.avar[0];
704  std::size_t minDown = ex.avar[0];
705  for (std::size_t p = 0; p != pc; p++)
706  {
707  std::size_t v = ex.avar[p];
709  double a = m_alpha(v);
710  double g = m_gradient(v);
711  if (g > up) {
712  maxUp = v;
713  up = g;
714  }
715  if (a > 0.0 && g < down){
716  minDown = v;
717  down = g;
718  }
719  }
720  return std::make_pair(std::make_pair(up,maxUp),std::make_pair(down,minDown));
721  }
722 
723  void updateVarsum(std::size_t exampleId, double mu){
724  double& varsum = m_examples[exampleId].varsum;
725  varsum += mu;
726  if(varsum > 1.e-12 && m_C-varsum > 1.e-12*m_C)
727  return;
728  //recompute for numerical accuracy
729 
730  varsum = 0;
731  for(std::size_t p = 0; p != m_cardP; ++p){
732  std::size_t varIndex = m_examples[exampleId].var[p];
733  varsum += m_alpha[varIndex];
734  }
735 
736  if(varsum < 1.e-14)
737  varsum = 0;
738  if(m_C-varsum < 1.e-14*m_C)
739  varsum = m_C;
740  }
741 
742  void gradientUpdate(std::size_t r, double mu, float* q)
743  {
744  for ( std::size_t a= 0; a< m_activeEx; a++)
745  {
746  double k = q[a];
747  Example& ex = m_examples[a];
748  typename QpSparseArray<QpFloatType>::Row const& row = m_M.row(m_classes * r + ex.y);
749  QpFloatType def = row.defaultvalue;
750  for (std::size_t b=0; b<row.size; b++){
751  std::size_t p = row.entry[b].index;
752  m_gradient(ex.var[p]) -= mu * (row.entry[b].value - def) * k;
753  }
754  if (def != 0.0){
755  double upd = mu* def * k;
756  for (std::size_t b=0; b<ex.active; b++)
757  m_gradient(ex.avar[b]) -= upd;
758  }
759  }
760  }
761 
762  /// shrink a variable
763  void deactivateVariable(std::size_t v)
764  {
765  std::size_t ev = m_variables[v].example;
766  unsigned int iv = m_variables[v].index;
767  unsigned int pv = m_variables[v].p;
768  Example* exv = &m_examples[ev];
769 
770  std::size_t ih = exv->active - 1;
771  std::size_t h = exv->avar[ih];
772  m_variables[v].index = ih;
773  m_variables[h].index = iv;
774  std::swap(exv->avar[iv], exv->avar[ih]);
775  iv = ih;
776  exv->active--;
777 
778  std::size_t j = m_activeVar - 1;
779  std::size_t ej = m_variables[j].example;
780  unsigned int ij = m_variables[j].index;
781  unsigned int pj = m_variables[j].p;
782  Example* exj = &m_examples[ej];
783 
784  // exchange entries in the lists
785  std::swap(m_alpha(v), m_alpha(j));
787  std::swap(m_linear(v), m_linear(j));
789 
790  m_variables[exv->avar[iv]].index = ij;
791  m_variables[exj->avar[ij]].index = iv;
792  exv->avar[iv] = j;
793  exv->var[pv] = j;
794  exj->avar[ij] = v;
795  exj->var[pj] = v;
796 
797  m_activeVar--;
798 
799  //finally check if the example is needed anymore
800  if(exv->active == 0)
801  deactivateExample(ev);
802  }
803 
804  /// shrink an example
805  void deactivateExample(std::size_t e)
806  {
808  std::size_t j = m_activeEx - 1;
809  m_activeEx--;
810  if(e == j) return;
811 
813 
814  std::size_t* pe = m_examples[e].var;
815  std::size_t* pj = m_examples[j].var;
816  for (std::size_t v = 0; v < m_cardP; v++)
817  {
818  SHARK_ASSERT(pj[v] >= m_activeVar);
819  m_variables[pe[v]].example = e;
820  m_variables[pj[v]].example = j;
821  }
822  m_kernelMatrix.flipColumnsAndRows(e, j);
823  }
824 
825  /// \brief Returns the original index of the example of a variable in the dataset before optimization.
826  ///
827  /// Shrinking is an internal detail so the communication with the outside world uses the original indizes.
828  std::size_t originalIndex(std::size_t v)const{
829  std::size_t i = m_variables[v].example;
830  return m_examples[i].index;//i before shrinking
831  }
832 
833  /// kernel matrix (precomputed matrix or matrix cache)
834  Matrix& m_kernelMatrix;
835 
836  /// kernel modifiers
837  QpSparseArray<QpFloatType> const& m_M; // M(|P|*y_i+p, y_j, q)
838 
839  /// complexity constant; upper bound on all variabless
840  double m_C;
841 
842  /// number of classes in the problem
843  std::size_t m_classes;
844 
845  /// number of dual variables per example
846  std::size_t m_cardP;
847 
848  /// number of examples in the problem (size of the kernel matrix)
849  std::size_t m_numExamples;
850 
851  /// number of variables in the problem = m_numExamples * m_cardP
852  std::size_t m_numVariables;
853 
854  /// linear part of the objective function
855  RealVector m_linear;
856 
857  /// solution candidate
858  RealVector m_alpha;
859 
860  /// gradient of the objective function
861  /// The m_gradient array is of fixed size and not subject to shrinking.
862  RealVector m_gradient;
863 
864  /// information about each training example
865  std::vector<Example> m_examples;
866 
867  /// information about each variable of the problem
868  std::vector<Variable> m_variables;
869 
870  /// space for the example[i].var pointers
871  std::vector<std::size_t> m_storage1;
872 
873  /// space for the example[i].avar pointers
874  std::vector<std::size_t> m_storage2;
875 
876  /// number of currently active examples
877  std::size_t m_activeEx;
878 
879  /// number of currently active variables
880  std::size_t m_activeVar;
881 
882  /// should the m_problem use the shrinking heuristics?
884 
885  /// true if the problem has already been unshrinked
887 };
888 
889 
890 template<class Matrix>
892 public:
893  typedef typename Matrix::QpFloatType QpFloatType;
894  BiasSolverSimplex(QpMcSimplexDecomp<Matrix>* problem) : m_problem(problem){}
895 
896  void solve(
897  RealVector& bias,
898  QpStoppingCondition& stop,
899  QpSparseArray<QpFloatType> const& nu,
900  bool sumToZero = false
901  ){
902  std::size_t classes = bias.size();
903  std::size_t numExamples = m_problem->getNumExamples();
904  std::size_t cardP = m_problem->cardP();
905  RealVector stepsize(classes, 0.01);
906  RealVector prev(classes,0);
907  RealVector step(classes);
908 
909  do{
910  QpSolver<QpMcSimplexDecomp<Matrix> > solver(*m_problem);
911  solver.solve(stop);
912 
913  // Rprop loop to update the bias
914  while (true)
915  {
916  RealMatrix dualGradient = m_problem->solutionGradient();
917  // compute the primal m_gradient w.r.t. bias
918  RealVector grad(classes,0);
919  for (std::size_t i=0; i<numExamples; i++)
920  {
921  std::size_t largestP = cardP;
922  double largest_value = 0.0;
923  for (std::size_t p=0; p<cardP; p++)
924  {
925  if (dualGradient(i,p) > largest_value)
926  {
927  largest_value = dualGradient(i,p);
928  largestP = p;
929  }
930  }
931  if (largestP < cardP)
932  {
933  unsigned int y = m_problem->label(i);
934  typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP + largestP);
935  for (std::size_t b=0; b != row.size; b++)
936  grad(row.entry[b].index) -= row.entry[b].value;
937  }
938  }
939 
940  if (sumToZero)
941  {
942  // project the m_gradient
943  grad -= sum(grad) / classes;
944  }
945 
946  // Rprop
947  for (std::size_t c=0; c<classes; c++)
948  {
949  double g = grad(c);
950  if (g > 0.0)
951  step(c) = -stepsize(c);
952  else if (g < 0.0)
953  step(c) = stepsize(c);
954 
955  double gg = prev(c) * grad(c);
956  if (gg > 0.0)
957  stepsize(c) *= 1.2;
958  else
959  stepsize(c) *= 0.5;
960  }
961  prev = grad;
962 
963  if (sumToZero)
964  {
965  // project the step
966  step -= sum(step) / classes;
967  }
968 
969  // update the solution and the dual m_gradient
970  bias += step;
971  performBiasUpdate(step,nu);
972 
973  if (max(stepsize) < 0.01 * stop.minAccuracy) break;
974  }
975  }while(m_problem->checkKKT()> stop.minAccuracy);
976  }
977 private:
978  void performBiasUpdate(
979  RealVector const& step, QpSparseArray<QpFloatType> const& nu
980  ){
981  std::size_t numExamples = m_problem->getNumExamples();
982  std::size_t cardP = m_problem->cardP();
983  RealMatrix deltaLinear(numExamples,cardP,0.0);
984  for (std::size_t i=0; i<numExamples; i++){
985  for (std::size_t p=0; p<cardP; p++){
986  unsigned int y = m_problem->label(i);
987  // delta = \sum_m \nu_{m,p,y_i} \Delta b(m)
988  typename QpSparseArray<QpFloatType>::Row const& row = nu.row(y * cardP +p);
989  for (std::size_t b=0; b<row.size; b++)
990  {
991  deltaLinear(i,p) -= row.entry[b].value * step(row.entry[b].index);
992  }
993  }
994  }
995  m_problem->addDeltaLinear(deltaLinear);
996 
997  }
998  QpMcSimplexDecomp<Matrix>* m_problem;
999 };
1000 
1001 
1002 }
1003 #endif