OneClassSvmTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for One-Class Support Vector Machines
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_ONECLASSSVMTRAINER_H
38 #define SHARK_ALGORITHMS_ONECLASSSVMTRAINER_H
39 
40 
46 
47 
48 namespace shark {
49 
50 
51 ///
52 /// \brief Training of one-class SVMs.
53 ///
54 /// The one-class support vector machine is an unsupervised
55 /// method for learning the high probability region of a
56 /// distribution. Given are data points \f$ x_i, i \in \{1, \dots, m\} \f$,
57 /// a kernel function k(x, x') and a regularization
58 /// constant C > 0. Let H denote the kernel induced
59 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
60 /// denote the corresponding feature map.
61 /// Then an estimate of a high probability region of the
62 /// distribution generating the sample points is described
63 /// by the set where the following function takes positive
64 /// values:
65 /// \f[
66 /// f(x) = \langle w, \phi(x) \rangle + b
67 /// \f]
68 /// with coefficients w and b given by the (primal)
69 /// optimization problem
70 /// \f[
71 /// \min \frac{1}{2} \|w\|^2 + \frac{1}{\nu m} \sum_{i=1}^m \xi_i - \rho
72 /// \f]
73 /// \f[
74 /// \text{s.t. } \langle w, \phi(x_i) \rangle + b \geq \rho - \xi_i; \xi_i \geq 0
75 /// \f]
76 /// \f$ 0 \leq \nu, \rho \leq 1 \f$ are parameters of the
77 /// method for controlling the smoothness of the solution
78 /// and the amount of probability mass covered.
79 ///
80 /// For more details refer to the paper:<br/>
81 /// <p>Estimating the support of a high-dimensional distribution. B. Sch&ouml;lkopf, J. C. Platt, J. Shawe-Taylor, A. Smola, and R. C. Williamson, 1999.</p>
82 ///
83 template <class InputType, class CacheType = float>
84 class OneClassSvmTrainer : public AbstractUnsupervisedTrainer<KernelExpansion<InputType> >, public QpConfig, public IParameterizable
85 {
86 public:
87 
88  typedef CacheType QpFloatType;
91  typedef QpConfig base_type;
92 
96 
97  OneClassSvmTrainer(KernelType* kernel, double nu)
98  : m_kernel(kernel)
99  , m_nu(nu)
100  , m_cacheSize(0x4000000)
101  { }
102 
103  /// \brief From INameable: return the class name.
104  std::string name() const
105  { return "OneClassSvmTrainer"; }
106 
107  double nu() const
108  { return m_nu; }
109  void setNu(double nu)
110  { m_nu = nu; }
111  KernelType* kernel()
112  { return m_kernel; }
113  const KernelType* kernel() const
114  { return m_kernel; }
115  void setKernel(KernelType* kernel)
116  { m_kernel = kernel; }
117 
118  double CacheSize() const
119  { return m_cacheSize; }
120  void setCacheSize( std::size_t size )
121  { m_cacheSize = size; }
122 
123  /// get the hyper-parameter vector
124  RealVector parameterVector() const
125  {
126  size_t kp = m_kernel->numberOfParameters();
127  RealVector ret(kp + 1);
128  RealVectorRange(ret, Range(0, kp)) = m_kernel->parameterVector();
129  ret(kp) = m_nu;
130  return ret;
131  }
132 
133  /// set the vector of hyper-parameters
134  void setParameterVector(RealVector const& newParameters)
135  {
136  size_t kp = m_kernel->numberOfParameters();
137  SHARK_ASSERT(newParameters.size() == kp + 1);
138  m_kernel->setParameterVector(ConstRealVectorRange(newParameters, Range(0, kp)));
139  setNu(newParameters(kp));
140  }
141 
142  /// return the number of hyper-parameters
143  size_t numberOfParameters() const
144  { return (m_kernel->numberOfParameters() + 1); }
145 
147  {
148  SHARK_CHECK(m_nu > 0.0 && m_nu< 1.0, "[OneClassSvmTrainer::train] invalid setting of the parameter nu (must be 0 < nu < 1)");
149  svm.setStructure(m_kernel,inputset,true);
150 
151  // solve the quadratic program
153  trainSVM<PrecomputedMatrixType>(svm,inputset);
154  else
155  trainSVM<CachedMatrixType>(svm,inputset);
156 
157  if (base_type::sparsify())
158  svm.sparsify();
159  }
160 
161 protected:
162  KernelType* m_kernel;
163  double m_nu;
164  std::size_t m_cacheSize;
165 
166  template<class MatrixType>
168  typedef BoxedSVMProblem<MatrixType> SVMProblemType;
169  typedef SvmShrinkingProblem<SVMProblemType> ProblemType;
170 
171  // Setup the problem
172 
173  KernelMatrixType km(*m_kernel, inputset);
174  MatrixType matrix(&km);
175  std::size_t ic = matrix.size();
176  double upper = 1.0/(m_nu*ic);
177  SVMProblemType svmProblem(matrix,blas::repeat(0.0,ic),0.0,upper);
178  ProblemType problem(svmProblem,base_type::m_shrinking);
179 
180  //solve it
181  QpSolver< ProblemType > solver(problem);
183  column(svm.alpha(),0)= problem.getUnpermutedAlpha();
184 
185  // compute the offset from the KKT conditions
186  double lowerBound = -1e100;
187  double upperBound = 1e100;
188  double sum = 0.0;
189  std::size_t freeVars = 0;
190  for (std::size_t i=0; i != problem.dimensions(); i++)
191  {
192  double value = problem.gradient(i);
193  if (problem.alpha(i) == 0.0)
194  lowerBound = std::max(value,lowerBound);
195  else if (problem.alpha(i) == upper)
196  upperBound = std::min(value,upperBound);
197  else
198  {
199  sum += value;
200  freeVars++;
201  }
202  }
203  if (freeVars > 0)
204  svm.offset(0) = sum / freeVars; // stabilized (averaged) exact value
205  else
206  svm.offset(0) = 0.5 * (lowerBound + upperBound); // best estimate
207 
209  }
210 };
211 
212 
213 }
214 #endif