BoxConstrainedProblems.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief Quadratic program definitions.
5  *
6  *
7  *
8  * \author T. Glasmachers, O.Krause
9  * \date 2013
10  *
11  *
12  * \par Copyright 1995-2015 Shark Development Team
13  *
14  * <BR><HR>
15  * This file is part of Shark.
16  * <http://image.diku.dk/shark/>
17  *
18  * Shark is free software: you can redistribute it and/or modify
19  * it under the terms of the GNU Lesser General Public License as published
20  * by the Free Software Foundation, either version 3 of the License, or
21  * (at your option) any later version.
22  *
23  * Shark is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26  * GNU Lesser General Public License for more details.
27  *
28  * You should have received a copy of the GNU Lesser General Public License
29  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
30  *
31  */
32 #ifndef SHARK_ALGORITHMS_QP_BOXCONSTRAINEDPROBLEMS_H
33 #define SHARK_ALGORITHMS_QP_BOXCONSTRAINEDPROBLEMS_H
34 
36 #include <shark/Algorithms/QP/Impl/AnalyticProblems.h>
37 
38 namespace shark {
39 
40 /// \brief Working set selection by maximization of the projected gradient.
41 ///
42 /// This selection operator picks the largest and second largest variable index if possible.
44  template<class Problem>
45  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
46  i = 0;
47  j = 0;
48  double largestGradient = 0;
49  double secondLargestGradient = 0;
50 
51  for (std::size_t a = 0; a < problem.active(); a++){
52  double g = problem.gradient(a);
53  if (!problem.isUpperBound(a) && g > secondLargestGradient){
54  secondLargestGradient = g;
55  j = a;
56  }
57  if (!problem.isLowerBound(a) && -g > secondLargestGradient){
58  secondLargestGradient = -g;
59  j = a;
60  }
61  if(secondLargestGradient > largestGradient){
62  std::swap(secondLargestGradient,largestGradient);
63  std::swap(i,j);
64  }
65  }
66  if(secondLargestGradient == 0)
67  j = i;
68  return largestGradient;
69  }
70 
71  void reset(){}
72 };
73 
74 /// \brief Working set selection by maximization of the projected gradient.
75 ///
76 /// This selection operator picks a single variable index.
78  template<class Problem>
79  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
81  double value = criterium(problem, i,j);
82  j = i; //we just use one variable here
83  return value;
84  }
85 
86  void reset(){}
87 };
88 
89 /// \brief Working set selection by maximization of the dual objective gain.
91  template<class Problem>
92  double operator()(Problem& problem, std::size_t& i, std::size_t& j){
93  //choose first variable by first order criterion
94  MaximumGradientCriterion firstOrder;
95  double maxGrad = firstOrder(problem,i,j);
96  if (maxGrad == 0.0)
97  return maxGrad;
98 
99  double gi = problem.gradient(i);
100  typename Problem::QpFloatType* q = problem.quadratic().row(i, 0, problem.active());
101  double Qii = problem.diagonal(i);
102 
103  // select second variable j with second order method
104  double maxGain = 0.0;
105  for (std::size_t a=0; a<problem.active(); a++)
106  {
107  if (a == i) continue;
108  double ga = problem.gradient(a);
109  if (
110  (!problem.isLowerBound(a) && ga < 0.0)
111  || (!problem.isUpperBound(a) && ga > 0.0)
112  ){
113  double Qia = q[a];
114  double Qaa = problem.diagonal(a);
115  double gain = detail::maximumGainQuadratic2D(Qii,Qaa,Qia,gi,ga);
116  if (gain > maxGain)
117  {
118  maxGain = gain;
119  j = a;
120  }
121  }
122  }
123 
124  return maxGrad; // solution is not optimal
125  }
126 
127  void reset(){}
128 };
129 
130 /// \brief Quadratic program with box constraints.
131 ///
132 /// \par
133 /// An instance of this class represents a quadratic program of the type
134 /// TODO: write documentation!
135 ///
136 template<class SVMProblem>
138 public:
139  typedef typename SVMProblem::QpFloatType QpFloatType;
142  //~ typedef MaximumGradientCriterion PreferedSelectionStrategy;
143 
144  BoxConstrainedProblem(SVMProblem& problem)
145  : m_problem(problem)
146  , m_gradient(problem.linear)
147  , m_active (problem.dimensions())
148  , m_alphaStatus(problem.dimensions(),AlphaFree){
149  //compute the gradient if alpha != 0
150  for (std::size_t i=0; i != dimensions(); i++){
151  double v = alpha(i);
152  if (v != 0.0){
153  QpFloatType* q = quadratic().row(i, 0, dimensions());
154  for (std::size_t a=0; a < dimensions(); a++)
155  m_gradient(a) -= q[a] * v;
156  }
157  updateAlphaStatus(i);
158  }
159  }
160  std::size_t dimensions()const{
161  return m_problem.dimensions();
162  }
163 
164  std::size_t active()const{
165  return m_active;
166  }
167 
168  double boxMin(std::size_t i)const{
169  return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMin(i);
170  }
171  double boxMax(std::size_t i)const{
172  return m_alphaStatus[i]==AlphaDeactivated? alpha(i): m_problem.boxMax(i);
173  }
174  bool isLowerBound(std::size_t i)const{
175  return m_alphaStatus[i] & AlphaLowerBound;
176  }
177  bool isUpperBound(std::size_t i)const{
178  return m_alphaStatus[i] & AlphaUpperBound;
179  }
180  bool isDeactivated(std::size_t i)const{
181  return isUpperBound(i) && isLowerBound(i);
182  }
183 
184  /// representation of the quadratic part of the objective function
185  MatrixType& quadratic(){
186  return m_problem.quadratic;
187  }
188 
189  double linear(std::size_t i)const{
190  return m_problem.linear(i);
191  }
192 
193  double alpha(std::size_t i)const{
194  return m_problem.alpha(i);
195  }
196 
197  double diagonal(std::size_t i)const{
198  return m_problem.diagonal(i);
199  }
200 
201  double gradient(std::size_t i)const{
202  return m_gradient(i);
203  }
204 
205  std::size_t permutation(std::size_t i)const{
206  return m_problem.permutation[i];
207  }
208 
209  RealVector getUnpermutedAlpha()const{
210  RealVector alpha(dimensions());
211  for (std::size_t i=0; i<dimensions(); i++)
212  alpha(m_problem.permutation[i]) = m_problem.alpha(i);
213  return alpha;
214  }
215 
216  ///\brief Does an update of SMO given a working set with indices i and j.
217  virtual void updateSMO(std::size_t i, std::size_t j){
218  SIZE_CHECK(i < active());
219  SIZE_CHECK(j < active());
220  if(i == j){//both variables are identical, thus solve the 1-d problem.
221  // get the matrix row corresponding to the working set
222  QpFloatType* q = quadratic().row(i, 0, active());
223 
224  // update alpha, that is, solve the sub-problem defined by i
225  // and compute the stepsize mu of the step
226  double mu = -alpha(i);
227  detail::solveQuadraticEdge(m_problem.alpha(i),gradient(i),diagonal(i),boxMin(i),boxMax(i));
228  mu+=alpha(i);
229 
230  // update the internal states
231  for (std::size_t a = 0; a < active(); a++)
232  m_gradient(a) -= mu * q[a];
233 
234  updateAlphaStatus(i);
235  return;
236  }
237 
238  double Li = boxMin(i);
239  double Ui = boxMax(i);
240  double Lj = boxMin(j);
241  double Uj = boxMax(j);
242 
243  // get the matrix rows corresponding to the working set
244  QpFloatType* qi = quadratic().row(i, 0, active());
245  QpFloatType* qj = quadratic().row(j, 0, active());
246 
247  // solve the 2D sub-problem imposed by the two chosen variables
248  // and compute the stepsizes mu
249  double mui = -alpha(i);
250  double muj = -alpha(j);
251  detail::solveQuadratic2DBox(m_problem.alpha(i), m_problem.alpha(j),
252  m_gradient(i), m_gradient(j),
253  diagonal(i), qi[j], diagonal(j),
254  Li, Ui, Lj, Uj
255  );
256  mui += alpha(i);
257  muj += alpha(j);
258 
259  // update the internal states
260  for (std::size_t a = 0; a < active(); a++)
261  m_gradient(a) -= mui * qi[a] + muj * qj[a];
262 
263  updateAlphaStatus(i);
264  updateAlphaStatus(j);
265  }
266 
267  ///\brief Returns the current function value of the problem.
268  double functionValue()const{
269  return 0.5*inner_prod(m_gradient+m_problem.linear,m_problem.alpha);
270  }
271 
272  bool shrink(double){return false;}
273  void reshrink(){}
274  void unshrink(){}
275 
276  ///\brief Remove the i-th example from the problem.
277  virtual void deactivateVariable(std::size_t i){
278  SIZE_CHECK(i < dimensions());
279  double alphai = alpha(i);
280  m_problem.alpha(i) = 0;
281  //update the internal state
282  QpFloatType* qi = quadratic().row(i, 0, active());
283  for (std::size_t a = 0; a < active(); a++)
284  m_gradient(a) += alphai * qi[a];
285  m_alphaStatus[i] = AlphaDeactivated;
286  }
287  ///\brief Reactivate an previously deactivated variable.
288  void activateVariable(std::size_t i){
289  SIZE_CHECK(i < dimensions());
290  updateAlphaStatus(i);
291  }
292 
293  /// exchange two variables via the permutation
294  void flipCoordinates(std::size_t i, std::size_t j)
295  {
296  SIZE_CHECK(i < dimensions());
297  SIZE_CHECK(j < dimensions());
298  if (i == j) return;
299 
300  m_problem.flipCoordinates(i, j);
301  std::swap( m_gradient[i], m_gradient[j]);
302  std::swap( m_alphaStatus[i], m_alphaStatus[j]);
303  }
304 
305  /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
306  virtual void setLinear(std::size_t i, double newValue){
307  m_gradient(i) -= linear(i);
308  m_gradient(i) += newValue;
309  m_problem.linear(i) = newValue;
310  }
311 
312  double checkKKT()const{
313  double maxViolation = 0.0;
314  for(std::size_t i = 0; i != dimensions(); ++i){
315  if(isDeactivated(i)) continue;
316  if(!isUpperBound(i)){
317  maxViolation = std::max(maxViolation, gradient(i));
318  }
319  if(!isLowerBound(i)){
320  maxViolation = std::max(maxViolation, -gradient(i));
321  }
322  }
323  return maxViolation;
324  }
325 
326 protected:
327  SVMProblem& m_problem;
328 
329  /// gradient of the objective function at the current alpha
330  RealVector m_gradient;
331 
332  std::size_t m_active;
333 
334  std::vector<char> m_alphaStatus;
335 
336  void updateAlphaStatus(std::size_t i){
337  SIZE_CHECK(i < dimensions());
338  m_alphaStatus[i] = AlphaFree;
339  if(m_problem.alpha(i) == boxMax(i))
340  m_alphaStatus[i] |= AlphaUpperBound;
341  if(m_problem.alpha(i) == boxMin(i))
342  m_alphaStatus[i] |= AlphaLowerBound;
343  }
344 };
345 
346 
347 
348 template<class Problem>
350 private:
352 public:
356 
357  BoxConstrainedShrinkingProblem(Problem& problem, bool shrink=true)
358  : base_type(problem)
359  , m_isUnshrinked(false)
360  , m_shrink(shrink)
361  , m_gradientEdge(problem.linear){}
362 
363  using base_type::alpha;
364  using base_type::gradient;
365  using base_type::linear;
366  using base_type::active;
367  using base_type::dimensions;
368  using base_type::quadratic;
369  using base_type::isLowerBound;
370  using base_type::isUpperBound;
371  using base_type::boxMin;
372  using base_type::boxMax;
373 
374  virtual void updateSMO(std::size_t i, std::size_t j){
375  double aiOld = alpha(i);
376  double ajOld = alpha(j);
377  //call base class to do the step
378  base_type::updateSMO(i,j);
379  double ai = alpha(i);
380  double aj = alpha(j);
381 
382  // update the gradient edge data structure to keep up with changes
383  updateGradientEdge(i,aiOld,ai);
384  updateGradientEdge(j,ajOld,aj);
385  }
386 
387  bool shrink(double epsilon){
388  if(!m_shrink) return false;
389 
390  double largestUp;
391  double smallestDown;
392  getMaxKKTViolations(largestUp,smallestDown,active());
393 
394  // check whether unshrinking is necessary at this accuracy level
395  if (!m_isUnshrinked && (largestUp - smallestDown < 10.0 * epsilon))
396  {
397  unshrink();
398  //recalculate maximum KKT violation for immediate re-shrinking
399  getMaxKKTViolations(largestUp,smallestDown,dimensions());
400  }
401  //shrink
402  smallestDown = std::min(smallestDown,0.0);
403  largestUp = std::max(largestUp,0.0);
404  for (std::size_t a = this->active(); a > 0; --a){
405  if(testShrinkVariable(a-1,largestUp,smallestDown)){
406  flipCoordinates(a-1,active()-1);
407  --this->m_active;
408  }
409  }
410  return true;
411  }
412 
413  ///\brief Unshrink the problem
414  void unshrink(){
415  if (active() == dimensions()) return;
416  m_isUnshrinked = true;
417 
418  // recompute the gradient of the whole problem.
419  // we assume here that all shrinked variables are on the border of the problem.
420  // the gradient of the active components is already correct and
421  // we store the gradient of the subset of variables which are on the
422  // borders of the box for the whole set.
423  // Thus we only have to recompute the part of the gradient which is
424  // based on variables in the active set which are not on the border.
425  for (std::size_t a = active(); a < dimensions(); a++)
426  this->m_gradient(a) = m_gradientEdge(a);
427 
428  for (std::size_t i = 0; i < active(); i++)
429  {
430  //check whether alpha value is already stored in gradientEdge
431  if (isUpperBound(i) || isLowerBound(i)) continue;
432 
433  QpFloatType* q = quadratic().row(i, 0, dimensions());
434  for (std::size_t a = active(); a < dimensions(); a++)
435  this->m_gradient(a) -= alpha(i) * q[a] ;
436  }
437 
438  this->m_active = dimensions();
439  }
440 
441  void setShrinking(bool shrinking){
442  m_shrink = shrinking;
443  if(!shrinking)
444  unshrink();
445  }
446 
447  /// \brief Scales all box constraints by a constant factor and adapts the solution by scaling it by the same factor.
448  void scaleBoxConstraints(double factor, double variableScalingFactor){
449  base_type::scaleBoxConstraints(factor,variableScalingFactor);
450  if(factor != variableScalingFactor){
451  for(std::size_t i = 0; i != dimensions(); ++i){
452  m_gradientEdge(i) = linear(i);
453  }
454  }
455  else{
456  for(std::size_t i = 0; i != dimensions(); ++i){
457  m_gradientEdge(i) -= linear(i);
458  m_gradientEdge(i) *= factor;
459  m_gradientEdge(i) += linear(i);
460  }
461  }
462  }
463 
464  virtual void deactivateVariable(std::size_t i){
465  SIZE_CHECK(i < dimensions());
466  double alphai = alpha(i);
467  base_type::deactivateVariable(i);
468  updateGradientEdge(i,alphai,0.0);
469  }
470 
471  /// \brief adapts the linear part of the problem and updates the internal data structures accordingly.
472  virtual void setLinear(std::size_t i, double newValue){
473  m_gradientEdge(i) -= linear(i);
474  m_gradientEdge(i) += newValue;
475  base_type::setLinear(i,newValue);
476  }
477 
478  ///\brief swap indizes (i,j)
479  void flipCoordinates(std::size_t i,std::size_t j){
480  base_type::flipCoordinates(i,j);
481  std::swap( m_gradientEdge[i], m_gradientEdge[j]);
482  }
483 protected:
484  ///\brief Updates the edge-part of the gradient when an alpha valu was changed
485  ///
486  /// This function overwite the base class method and is called, whenever the
487  /// base class changes an alpha value.
488  void updateGradientEdge(std::size_t i, double oldAlpha, double newAlpha){
489  SIZE_CHECK(i < active());
490  if(!m_shrink || oldAlpha==newAlpha) return;
491  bool isInsideOld = oldAlpha > boxMin(i) && oldAlpha < boxMax(i);
492  bool isInsideNew = newAlpha > boxMin(i) && newAlpha < boxMax(i);
493  //check if variable is relevant at all, that means that old and new alpha value are inside
494  //or old alpha is 0 and new alpha inside
495  if( (oldAlpha == 0 || isInsideOld) && isInsideNew )
496  return;
497 
498  //compute change to the gradient
499  double diff = 0;
500  if(!isInsideOld)//the value was on a border, so remove it's old influeence to the gradient
501  diff -=oldAlpha;
502  if(!isInsideNew){//variable entered boundary or changed from one boundary to another
503  diff += newAlpha;
504  }
505 
506  QpFloatType* q = quadratic().row(i, 0, dimensions());
507  for(std::size_t a = 0; a != dimensions(); ++a){
508  m_gradientEdge(a) -= diff*q[a];
509  }
510  }
511 private:
512 
513  bool testShrinkVariable(std::size_t a, double largestUp, double smallestDown)const{
514  if (
515  ( isLowerBound(a) && gradient(a) < smallestDown)
516  || ( isUpperBound(a) && gradient(a) >largestUp)
517  ){
518  // In this moment no feasible step including this variable
519  // can improve the objective. Thus deactivate the variable.
520  return true;
521  }
522  return false;
523  }
524 
525  void getMaxKKTViolations(double& largestUp, double& smallestDown, std::size_t maxIndex){
526  largestUp = -1e100;
527  smallestDown = 1e100;
528  for (std::size_t a = 0; a < maxIndex; a++){
529  if (!isLowerBound(a))
530  smallestDown = std::min(smallestDown,gradient(a));
531  if (!isUpperBound(a))
532  largestUp = std::max(largestUp,gradient(a));
533  }
534  }
535 
536  bool m_isUnshrinked;
537 
538  ///\brief true if shrinking is to be used.
539  bool m_shrink;
540 
541  ///\brief Stores the gradient of the alpha dimeensions which are either 0 or C
542  RealVector m_gradientEdge;
543 };
544 
545 }
546 #endif