MergeBudgetMaintenanceStrategy.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Merge budget maintenance strategy
6  *
7  * \par
8  * This is an budget strategy that adds a new vector by merging
9  * a pair of budget vectors. The pair to merge is found by first
10  * searching for the budget vector with the smallest alpha-coefficients
11  * (measured in 2-norm), and then finding the second one by
12  * computing a certain degradation measure. This is therefore linear
13  * in the size of the budget.
14  *
15  * \par
16  * The method is an implementation of the merge strategy
17  * given in wang, crammer, vucetic: "Breaking the Curse of Kernelization:
18  * Budgeted Stochastic Gradient Descent for Large-Scale SVM Training"
19  * and owes very much to the implementation in BudgetedSVM.
20  *
21  *
22  *
23  * \author Aydin Demircioglu
24  * \date 2014
25  *
26  *
27  * \par Copyright 1995-2015 Shark Development Team
28  *
29  * <BR><HR>
30  * This file is part of Shark.
31  * <http://image.diku.dk/shark/>
32  *
33  * Shark is free software: you can redistribute it and/or modify
34  * it under the terms of the GNU Lesser General Public License as published
35  * by the Free Software Foundation, either version 3 of the License, or
36  * (at your option) any later version.
37  *
38  * Shark is distributed in the hope that it will be useful,
39  * but WITHOUT ANY WARRANTY; without even the implied warranty of
40  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41  * GNU Lesser General Public License for more details.
42  *
43  * You should have received a copy of the GNU Lesser General Public License
44  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
45  *
46  */
47 //===========================================================================
48 
49 
50 #ifndef SHARK_MODELS_MERGEBUDGETMAINTENANCESTRATEGY_H
51 #define SHARK_MODELS_MERGEBUDGETMAINTENANCESTRATEGY_H
52 
54 #include <shark/Data/Dataset.h>
55 #include <shark/Data/DataView.h>
56 #include <shark/Models/Converter.h>
58 
60 
61 
62 namespace shark
63 {
64 
65 ///
66 /// \brief Budget maintenance strategy that merges two vectors
67 ///
68 /// \par This is an budget strategy that simply merges two budget vectors
69 /// in order to make space for a new one. This is done by first searching
70 /// for the budget vector that has smallest \f[ ||\alpha||_2\f] coefficient-- only
71 /// then a second one is searched for, by inspecting the expected
72 /// weight degradation after merging. The vector with smallest weight
73 /// degradation is the vector one should merge with the first one.
74 /// By this heuristic, the merging strategy has complexity \f[ \mathcal{O}(B) \f].
75 /// Compared with the projection strategy, merging should be faster, and stil
76 /// obtains similar accuracy. Unluckily any kind of timing numbers are missing
77 /// in the reference paper of Wang, Crammer and Vucetic.
78 ///
79 /// \par Note that in general it is unclear how two data objects should be merged,
80 /// e.g. strings must be merged differently than vectors. Therefore it is necessary
81 /// to create an specialization of this strategy for a given input type.
82 ///
83 template<class InputType>
85 {
88  typedef typename DataType::element_type ElementType;
89 
90 public:
91 
92  /// constructor.
94  {
95  }
96 
97 
98  /// add to model.
99  /// this is just a fake here, as it is unclear in general how to merge two objects,
100  /// one needs to specialize this template.
101  ///
102  /// @param[in,out] model the model the strategy will work with
103  /// @param[in] alpha alphas for the new budget vector
104  /// @param[in] supportVector the vector to add to the model by applying the maintenance strategy
105  ///
106  virtual void addToModel(ModelType& model, InputType &alpha, ElementType &supportVector)
107  {
108  // this has to be implemented for every specialization
109  // so here we throw some error
110  throw(SHARKEXCEPTION("MergeBudgetMaintenanceStrategy: There is no default merging strategy for the InputType you provided! Please specialize this class."));
111  }
112 
113 
114  /// class name
115  std::string name() const
116  { return "MergeBudgetMaintenanceStrategy"; }
117 };
118 
119 
120 
121 ///
122 /// \brief Budget maintenance strategy merging vectors.
123 ///
124 /// \par This is an specialization of the merge budget maintenance strategy
125 /// that handles simple real-valued vectors. This is a nearly 1:1 adoption of
126 /// the strategy presented in Wang, Cramer and Vucetic.
127 ///
128 template<>
130 {
133  typedef RealVector InputType;
134 
135 public:
136 
137  /// constructor.
139  {
140  }
141 
142 
143  /// This is the objective function we need to optimize during merging.
144  /// Basically the merging strategy needs a line search to find the parameter
145  /// which maximizes \f[ a \cdot k_h (x_m, x_n) + b \cdot k_{1-h}(x_m, x_n) \f].
146  /// (all in the notation of wang, crammer and vucetic).
147  /// The coefficients a and b are given by the alpha coefficients of the
148  /// corresponding support vectors \f[ x_m \f] and \f[ x_n\f], more precicely
149  /// we have \f[ a = \sum \alpha_m^{(i)}/d_i\f] and \f[b = 1 - a = \sum \alpha_n^{(i)}/d_i\f]
150  /// with \f[d_i = \alpha_m^{(i)} + \alpha_n^{(i)} \f].
151  ///
152  struct MergingProblemFunction : public SingleObjectiveFunction
153  {
155 
156  /// class name
157  std::string name() const
158  { return "MergingProblemFunction"; }
159 
160 
161  /// parameters for the function.
162  double m_a, m_b;
163  double m_k; //< contains (in our problem) k(x_m, x_n)
164 
165 
166  /// constructor.
167  /// \param[in] a a coefficient of the formula
168  /// \param[in] b b coefficient of the formula
169  /// \param[in] k k coefficient of the formula
170  ///
171  MergingProblemFunction(double a, double b, double k)
172  {
173  m_a = a;
174  m_b = b;
175  m_k = k;
176  }
177 
178 
179  /// number of variables, we have a one-dimensional problem here.
180  std::size_t numberOfVariables()const
181  {
182  return 1;
183  }
184 
185 
186  /// evaluation
187  /// \param[in] pattern vector to evaluate the function at. as we have a 1d problem,
188  /// we ignore everything beyond the first component.
189  /// \return function value at the point
190  ///
191  virtual double eval(RealVector const& pattern)const
192  {
193  double h = pattern(0);
194  // we want to maximize, thus minimize -function
195  return (- (m_a * pow(m_k, (1.0 - h) * (1.0 - h)) + m_b * pow(m_k, h * h)));
196  }
197 
198 
199  /// Derivative of function.
200  /// Unsure if the derivative is really needed, but wolfram alpha
201  /// helped computing it, do not want to let it down, wasting its capacity.
202  /// The search routine uses it, as we did not removed the derivative-feature.
203  /// \param[in] input Point to evaluate the function at
204  /// \param[out] derivative Derivative at the given point
205  /// \return Function value at the point
206  ///
207  virtual double evalDerivative(const SearchPointType & input, FirstOrderDerivative & derivative)const
208  {
209  double h = input(0);
210  // we want to maximize, thus minimize -function
211  derivative(0) = 2 * log(m_k) * (-m_a * (h - 1.0) * pow(m_k, (h - 1.0) * (h - 1.0))
212  - m_b * h * pow(m_k, (h * h)));
213  return eval(input);
214  }
215  };
216 
217 
218 
219  /// Reduce the budget.
220  /// This is a helper routine. after the addToModel adds the new support vector
221  /// to the end of the budget (it was chosen one bigger than the capacity),
222  /// this routine will do the real merging. Given a index it will search for a second
223  /// index, so that merging is 'optimal'. It then will perform the merging. After that
224  /// the last budget vector will be freed again (by setting its alpha-coefficients to zero).
225  /// \param[in] model Model to work on
226  /// \param[in] firstIndex The index of the first element of the pair to merge.
227  ///
228  virtual void reduceBudget(ModelType& model, size_t firstIndex)
229  {
230  size_t maxIndex = model.basis().numberOfElements();
231 
232  // compute the kernel row of the given, first element and all the others
233  // should take O(B) time, as it is a row of size B
234  blas::vector<float> kernelRow(maxIndex, 0.0);
235  for(size_t j = 0; j < maxIndex; j++)
236  kernelRow(j) = static_cast<float>( model.kernel()->eval(model.basis().element(firstIndex), model.basis().element(j)));
237 
238  // initialize the search
239  double fret(0.);
240  RealVector h(1); // initial search starting point
241  RealVector xi(1); // direction of search
242 
243  // save the parameter at the minimum
244  double minDegradation = std::numeric_limits<double>::infinity();
245  double minH = 0.0;
246  double minAlphaMergedFirst = 0.0;
247  double minAlphaMergedSecond = 0.0;
248  size_t secondIndex = 0;
249 
250 
251  // we need to check every other vector
252  RealMatrix &alpha = model.alpha();
253  for(size_t currentIndex = 0; currentIndex < maxIndex; currentIndex++)
254  {
255  // we do not want the vector already chosen
256  if(firstIndex == currentIndex)
257  continue;
258 
259  // compute the alphas for the model, this is the formula
260  // between (6.7) and (6.8) in wang, crammer, vucetic
261  double a = 0.0;
262  double b = 0.0;
263  for(size_t c = 0; c < alpha.size2(); c++)
264  {
265  double d = std::min(0.00001, alpha(currentIndex, c) + alpha(firstIndex, c));
266  a += alpha(firstIndex, c) / d;
267  b += alpha(currentIndex, c) / d;
268  }
269 
270  // Initialize search starting point and direction:
271  h(0) = 0.0;
272  xi(0) = 0.5;
273 
274  double k = kernelRow(currentIndex);
275  MergingProblemFunction mergingProblemFunction(a, b, k);
276 
277  // minimize function
278  // search between 0 and 1
279  detail::dlinmin(h, xi, fret, mergingProblemFunction, 0.0, 1.0);
280 
281  // the optimal point is now given by h.
282  // the vector that corresponds to this is
283  // $z = h x_m + (1-h) x_n$ by formula (6.7)
284  RealVector firstVector = model.basis().element(firstIndex);
285  RealVector currentVector = model.basis().element(currentIndex);
286  RealVector mergedVector = h(0) * firstVector + (1.0 - h(0)) * currentVector;
287 
288  // this is another minimization problem, which has as optimal
289  // solution $\alpha_z^{(i)} = \alpha_m^{(i)} k(x_m, z) + \alpha_n^{(i)} k(x_n, z).$
290 
291  // the coefficient of this z is computed by approximating
292  // both vectors by the merged one. maybe KernelBasisDistance can be used
293  // but i am not sure, if at all and if, if its faster.
294 
295  long double alphaMergedFirst = pow(k, (1.0 - h(0)) * (1.0 - h(0)));
296  long double alphaMergedCurrent = pow(k, h(0) * h(0));
297 
298  // degradation is computed for each class
299  // this is computed by using formula (6.8), applying it to each class and summing up
300  // here a kernel with $k(x,x) = 1$ is assumed
301  double currentDegradation = 0.0f;
302  for(size_t c = 0; c < alpha.size2(); c++)
303  {
304  double zAlpha = alphaMergedFirst * alpha(firstIndex, c) + alphaMergedCurrent * alpha(currentIndex, c);
305  // TODO: unclear to me why this is the thing we want to compute
306  currentDegradation += pow(alpha(firstIndex, c), 2) + pow(alpha(currentIndex, c), 2) +
307  2.0 * k * alpha(firstIndex, c) * alpha(currentIndex, c) - zAlpha * zAlpha;
308  }
309 
310  // TODO: this is shamelessly copied, as the rest, but maybe i want to refactor it and make it nicer.
311  if(currentDegradation < minDegradation)
312  {
313  minDegradation = currentDegradation;
314  minH = h(0);
315  minAlphaMergedFirst = alphaMergedFirst;
316  minAlphaMergedSecond = alphaMergedCurrent;
317  secondIndex = currentIndex;
318  }
319  }
320 
321  // compute merged vector
322  RealVector firstVector = model.basis().element(firstIndex);
323  RealVector secondVector = model.basis().element(secondIndex);
324  RealVector mergedVector = minH * firstVector + (1.0 - minH) * secondVector;
325 
326  // replace the second vector by the merged one
327  model.basis().element(secondIndex) = mergedVector;
328 
329  // and update the alphas
330  for(size_t c = 0; c < alpha.size2(); c++)
331  {
332  alpha(secondIndex, c) = minAlphaMergedFirst * alpha(firstIndex, c) + minAlphaMergedSecond * alpha(secondIndex, c);
333  }
334 
335  // the first index is now obsolete, so we copy the
336  // last vector, which serves as a buffer, to this position
337  row(alpha, firstIndex) = row(alpha, maxIndex - 1);
338  model.basis().element(firstIndex) = model.basis().element(maxIndex - 1);
339 
340  // clear the buffer by cleaning the alphas
341  // finally the vectors we merged.
342  row(model.alpha(), maxIndex - 1).clear();
343  }
344 
345 
346 
347  /// add a vector to the model.
348  /// this will add the given vector to the model and merge the budget so that afterwards
349  /// the budget size is kept the same. If the budget has a free entry anyway, no merging
350  /// will be performed, but instead the given vector is simply added to the budget.
351  ///
352  /// @param[in,out] model the model the strategy will work with
353  /// @param[in] alpha alphas for the new budget vector
354  /// @param[in] supportVector the vector to add to the model by applying the maintenance strategy
355  ///
356  virtual void addToModel(ModelType& model, InputType const& alpha, ElementType const& supportVector)
357  {
358 
359  // find the two indicies we want to merge
360 
361  // note that we have to crick ourselves, as the model has
362  // a fixed size, but actually we want to work with the model
363  // together with the new supportvector. so our budget size
364  // is one greater than the user specified and we use this
365  // last entry of the model for buffer. it will be freed again,
366  // when merging is finished.
367 
368  // put the new vector into place
369  size_t maxIndex = model.basis().numberOfElements();
370  model.basis().element(maxIndex - 1) = supportVector.input;
371  row(model.alpha(), maxIndex - 1) = alpha;
372 
373 
374  // the first vector to merge is the one with the smallest alpha coefficient
375  // (it cannot be our new vector, because in each iteration the
376  // old weights get downscaled and the new ones get the biggest)
377  size_t firstIndex = 0;
378  double firstAlpha = 0;
379  findSmallestVector(model, firstIndex, firstAlpha);
380 
381  // if the smallest vector has zero alpha,
382  // the budget is not yet filled so we can skip merging it.
383  if(firstAlpha == 0.0f)
384  {
385  // as we need the last vector to be zero, we put the new
386  // vector to that place and undo our putting-the-vector-to-back-position
387  model.basis().element(firstIndex) = supportVector.input;
388  row(model.alpha(), firstIndex) = alpha;
389 
390  // enough to zero out the alpha
391  row(model.alpha(), maxIndex - 1).clear();
392 
393  // ready.
394  return;
395  }
396 
397  // the second one is given by searching for the best match now,
398  // taking O(B) time. we also have to provide to the findVectorToMerge
399  // function the supportVector we want to add, as we cannot, as
400  // said, just extend the model with this vector.
401  reduceBudget(model, firstIndex);
402  }
403 
404 
405  /// class name
406  std::string name() const
407  { return "MergeBudgetMaintenanceStrategy"; }
408 
409 
410 protected:
411 };
412 
413 }
414 #endif