MissingFeatureSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for binary SVMs natively supporting missing features.
6  *
7  *
8  *
9  * \author B. Li
10  * \date 2012
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_ALGORITHMS_TRAINERS_MISSING_FEATURE_SVM_H
36 #define SHARK_ALGORITHMS_TRAINERS_MISSING_FEATURE_SVM_H
37 
46 
47 #include <boost/foreach.hpp>
48 
49 namespace shark {
50 
51 /// \brief Trainer for binary SVMs natively supporting missing features.
52 ///
53 /// This is a specialized variant of the standard binary C-SVM which can be used
54 /// to learn from data with missing features, without the need for prior imputation
55 /// of missing values. The key idea is that each example is considered as having an
56 /// instance-specific margin value, which is computed in the lower-dimensional subspace
57 /// for which all features of that example are present.
58 ///
59 /// The resulting optimization problem has the drawback that it is not convex any
60 /// more. However, a local minimum can be easily reached by an iterative wrapper
61 /// algorithm around a standard C-SVM solver. In detail, example-wise weights \f$ s_i \f$
62 /// are incorporated into the quadratic term of the standard SVM optimization problem.
63 /// These take initial values of 1, and are then iteratively updated according to the
64 /// instance-specific margin values. After each such update, the SVM solver is again called,
65 /// and so on. Usually, between 2 and 5 iterations have been shown to be sufficient for
66 /// an acceptable level of convergence.
67 ///
68 /// For details, see the paper:<br/>
69 /// <p>Max-margin Classification of %Data with Absent Features
70 /// Gal Chechik, Geremy Heitz, Gal Elidan, Pieter Abbeel and Daphne Koller. JMLR 2008.</p>
71 ///
72 /// \note Much of the code in this class is borrowed from the class CSvmTrainer
73 template <class InputType, class CacheType = float>
74 class MissingFeatureSvmTrainer : public AbstractSvmTrainer<InputType, unsigned int,MissingFeaturesKernelExpansion<InputType> >
75 {
76 protected:
77 
78  typedef CacheType QpFloatType;
81 
82 public:
83 
84  /// Constructor
85  MissingFeatureSvmTrainer(KernelType* kernel, double C, bool offset, bool unconstrained = false)
86  :
87  base_type(kernel, C, offset, unconstrained),
88  m_maxIterations(4u)
89  { }
90 
91  /// \brief From INameable: return the class name.
92  std::string name() const
93  { return "MissingFeatureSvmTrainer"; }
94 
96  {
97  // Check prerequisites
98  SHARK_CHECK(numberOfClasses(dataset) == 2, "[MissingFeatureSvmTrainer::train] Not a binary problem");
99 
101 
102  if(svm.hasOffset())
103  trainWithOffset(svm,dataset);
104  else
105  trainWithoutOffset(svm,dataset);
106  }
107 
108  void setMaxIterations(std::size_t newIterations)
109  { m_maxIterations = newIterations; }
110 
111 private:
112 
113  /// Number of iterations to run
114  std::size_t m_maxIterations;
115 
116  void trainWithoutOffset(MissingFeaturesKernelExpansion<InputType>& svm, LabeledData<InputType, unsigned int> const& dataset)
117  {
118 
119  // Initialize scaling coefficients as 1.0
120  RealVector scalingCoefficients(dataset.numberOfElements(), 1.0);
121 
122  // What body of this loop does:
123  // *) Solve the QP with a normal solver treating s_i as constants
124  // *) Calculate norms: w_i and w
125  // *) Update s_i with w_i / w
126  for(std::size_t iteration = 0; iteration != m_maxIterations; ++iteration){
127  //Set up the problem
129  typedef CachedMatrix<MatrixType> CachedMatrixType;
130  typedef CSVMProblem<CachedMatrixType> SVMProblemType;
132  MatrixType kernelMatrix(*base_type::m_kernel, dataset.inputs());
133  kernelMatrix.setScalingCoefficients(scalingCoefficients);
134  CachedMatrixType matrix(&kernelMatrix);
135  SVMProblemType svmProblem(matrix,dataset.labels(),this->C());
136  ProblemType problem(svmProblem,base_type::m_shrinking);
137 
138  //solve it
139  QpSolver< ProblemType > solver(problem);
141  RealVector alpha = problem.getUnpermutedAlpha();
142 
143  //update s_i and w_i
144  const double classifierNorm = svm.computeNorm(alpha, scalingCoefficients);
145  SHARK_ASSERT(classifierNorm > 0.0);
146  for (std::size_t i = 0; i < scalingCoefficients.size(); ++i)
147  {
148  // Update scaling coefficients
149  scalingCoefficients(i) = svm.computeNorm(
150  alpha,
151  scalingCoefficients,
152  dataset.element(i).input)
153  / classifierNorm;
154  }
155 
156  //store alpha in the last iteration inside the svm
157  if(iteration == m_maxIterations-1)
158  column(svm.alpha(),0)= alpha;
159 
160  //keep track of number of kernel evaluations
161  base_type::m_accessCount += kernelMatrix.getAccessCount();
162  }
163  svm.setScalingCoefficients(scalingCoefficients);
164  }
166  {
167  // Initialize scaling coefficients as 1.0
168  std::size_t datasetSize = dataset.numberOfElements();
169  RealVector scalingCoefficients(datasetSize, 1.0);
170 
171  // What body of this loop does:
172  // *) Solve the QP with a normal solver treating s_i as constants
173  // *) Calculate norms: w_i and w
174  // *) Update s_i with w_i / w
175  for(std::size_t iteration = 0; iteration != m_maxIterations; ++iteration){
176  //Set up the problem
178  typedef CachedMatrix<MatrixType> CachedMatrixType;
179  typedef CSVMProblem<CachedMatrixType> SVMProblemType;
180  typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
181  MatrixType kernelMatrix(*base_type::m_kernel, dataset.inputs());
182  kernelMatrix.setScalingCoefficients(scalingCoefficients);
183  CachedMatrixType matrix(&kernelMatrix);
184  SVMProblemType svmProblem(matrix,dataset.labels(),this->C());
185  ProblemType problem(svmProblem,base_type::m_shrinking);
186 
187  //solve it
188  QpSolver< ProblemType > solver(problem);
190  RealVector unpermutedAlpha = problem.getUnpermutedAlpha();
191 
192  //update s_i and w_i
193  const double classifierNorm = svm.computeNorm(unpermutedAlpha, scalingCoefficients);
194  SHARK_ASSERT(classifierNorm > 0.0);
195  for (std::size_t i = 0; i < scalingCoefficients.size(); ++i)
196  {
197  // Update scaling coefficients
198  scalingCoefficients(i) = svm.computeNorm(
199  unpermutedAlpha,
200  scalingCoefficients,
201  dataset.element(i).input
202  )/ classifierNorm;
203  }
204 
205 
206  if(iteration == m_maxIterations-1){
207  //in the last tieration,y
208  // Compute the offset(i.e., b or Bias) and push it along with alpha to SVM
209  column(svm.alpha(),0)= unpermutedAlpha;
210  double lowerBound = -1e100;
211  double upperBound = 1e100;
212  double sum = 0.0;
213  std::size_t freeVars = 0;
214 
215  // No reason to init to 0, but avoid compiler warnings
216  for (std::size_t i = 0; i < datasetSize; i++)
217  {
218  // In case of no free SVs, we are looking for the largest gradient of all alphas at the lower bound
219  // and the smallest gradient of all alphas at the upper bound
220  const double value = problem.gradient(i);
221  if (problem.alpha(i) == problem.boxMin(i)){
222  lowerBound = std::max(value,lowerBound);
223  }
224  else if (problem.alpha(i) == problem.boxMax(i)){
225  upperBound = std::min(value,upperBound);
226  }
227  else{
228  sum += value;
229  freeVars++;
230  }
231  }
232 
233  if (freeVars > 0) {
234  // Stabilized (averaged) exact value
235  svm.offset(0) = sum / freeVars;
236  } else {
237  // TODO: need work out how to do the calculation of the derivative with missing features
238 
239  // Return best estimate
240  svm.offset(0) = 0.5 * (lowerBound + upperBound);
241  }
242  }
243 
244  //keep track of number of kernel evaluations
245  base_type::m_accessCount += kernelMatrix.getAccessCount();
246  }
247  svm.setScalingCoefficients(scalingCoefficients);
248  }
249 
250 };
251 
252 } // namespace shark {
253 
254 #endif