RegularizationNetworkTrainer.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Trainer for a Regularization Network or a Gaussian Process
6  *
7  *
8  *
9  *
10  * \author T. Glasmachers
11  * \date 2007-2012
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 
37 #ifndef SHARK_ALGORITHMS_REGULARIZATIONNETWORKTRAINER_H
38 #define SHARK_ALGORITHMS_REGULARIZATIONNETWORKTRAINER_H
39 
40 
44 
45 
46 namespace shark {
47 
48 
49 ///
50 /// \brief Training of a regularization network.
51 ///
52 /// A regularization network is a kernel-based model for
53 /// regression problems. Given are data tuples
54 /// \f$ (x_i, y_i) \f$ with x-component denoting input and
55 /// y-component denoting a real-valued label (see the tutorial on
56 /// label conventions; the implementation uses RealVector),
57 /// a kernel function k(x, x') and a regularization
58 /// constant \f$ C > 0\f$. Let H denote the kernel induced
59 /// reproducing kernel Hilbert space of k, and let \f$ \phi \f$
60 /// denote the corresponding feature map.
61 /// Then the SVM regression function is of the form
62 /// \f[
63 /// f(x) = \langle w, \phi(x) \rangle + b
64 /// \f]
65 /// with coefficients w and b given by the (primal)
66 /// optimization problem
67 /// \f[
68 /// \min \frac{1}{2} \|w\|^2 + C \sum_i L(y_i, f(x_i)),
69 /// \f]
70 /// where the simple quadratic loss is employed:
71 /// \f[
72 /// L(y, f(x)) = (y - f(x))^2
73 /// \f]
74 /// Regularization networks can be interpreted as a special
75 /// type of support vector machine (for regression, with
76 /// squared loss, and thus with non-sparse weights).
77 ///
78 /// Training a regularization network is identical to training a
79 /// Gaussian process for regression. The parameter \f$ C \f$ then
80 /// corresponds precision of the noise (denoted by \f$ \beta \f$ in
81 /// Bishop's textbook). The precision is the inverse of the variance
82 /// of the noise. The variance of the noise is denoted by \f$
83 /// \sigma_n^2 \f$ in the textbook by Rasmussen and
84 /// Williams. Accordingly, \f$ C = 1/\sigma_n^2 \f$.
85 
86 template <class InputType>
87 class RegularizationNetworkTrainer : public AbstractSvmTrainer<InputType, RealVector,KernelExpansion<InputType> >
88 {
89 public:
93 
94  /// \param kernel Kernel
95  /// \param betaInv Inverse precision, equal to assumed noise variance, equal to inverse regularization parameter C
96  /// \param unconstrained Indicates exponential encoding of the regularization parameter
97  RegularizationNetworkTrainer(KernelType* kernel, double betaInv, bool unconstrained = false)
98  : base_type(kernel, 1.0 / betaInv, false, unconstrained)
99  { }
100 
101  /// \brief From INameable: return the class name.
102  std::string name() const
103  { return "RegularizationNetworkTrainer"; }
104 
105  /// \brief Returns the assumed noise variance (i.e., 1/C)
106  double noiseVariance() const
107  { return 1.0 / this->C(); }
108  /// \brief Sets the assumed noise variance (i.e., 1/C)
109  void setNoiseVariance(double betaInv)
110  { this->C() = 1.0 / betaInv; }
111 
112  /// \brief Returns the precision (i.e., C), the inverse of the assumed noise variance
113  double precision() const
114  { return this->C(); }
115  /// \brief Sets the precision (i.e., C), the inverse of the assumed noise variance
116  void setPrecision(double beta)
117  { this->C() = beta; }
118 
120  {
121  svm.setStructure(base_type::m_kernel,dataset.inputs(),false);
122 
123  // Setup the kernel matrix
124  RealMatrix M = calculateRegularizedKernelMatrix(*(this->m_kernel),dataset.inputs(), noiseVariance());
125  RealVector v = column(createBatch<RealVector>(dataset.labels().elements()),0);
126  //~ blas::approxsolveSymmPosDefSystemInPlace(M,v); //try this later instad the below
127  blas::solveSymmPosDefSystemInPlace<blas::SolveAXB>(M,v);
128  column(svm.alpha(),0) = v;
129  }
130 };
131 
132 
133 // A regularization network can be interpreted as a Gaussian
134 // process, with the same trainer:
135 #define GaussianProcessTrainer RegularizationNetworkTrainer
136 
137 
138 }
139 #endif