MissingFeaturesKernelExpansion.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief A kernel expansion with support of missing features
6  *
7  *
8  *
9  * \author B. Li
10  * \date 2012
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 #include <shark/Data/Dataset.h>
35 #include <shark/Data/DataView.h>
38 
39 #include <boost/foreach.hpp>
40 #include <boost/optional/optional.hpp>
41 
42 namespace shark {
43 
44 /// Kernel expansion with missing features support
45 template<class InputType>
47 {
48 private:
50 public:
51  typedef typename Base::KernelType KernelType;
54  /// Constructors from the base class
55  ///@{
57 
58 
60  : Base(kernel)
61  {}
62 
64  : Base(kernel, basis, offset, 1u)
65  {}
66  ///@}
67 
68  /// \brief From INameable: return the class name.
69  std::string name() const
70  { return "MissingFeaturesKernelExpansion"; }
71 
72  boost::shared_ptr<State> createState()const{
73  return boost::shared_ptr<State>(new EmptyState());
74  }
75 
76  /// Override eval(...) in the base class
77  virtual void eval(BatchInputType const& patterns, BatchOutputType& outputs)const{
79  SIZE_CHECK(Base::m_alpha.size1() > 0u);
80 
81  //Todo: i am too lazy to us iterated loops in this function.
82  //so i am using a DataView to have O(1) random access lookup. but this is not needed!
83  DataView<Data<InputType> const > indexedBasis(Base::m_basis);
84 
85  ensure_size(outputs,size(patterns),Base::outputSize());
86  if (Base::hasOffset())
87  noalias(outputs) = repeat(Base::m_b,size(patterns));
88  else
89  outputs.clear();
90 
91  for(std::size_t p = 0; p != size(patterns); ++p){
92 
93 
94  // Calculate scaling coefficient for the 'pattern'
95  const double patternNorm = computeNorm(column(Base::m_alpha, 0), m_scalingCoefficients, get(patterns,p));
96  const double patternSc = patternNorm / m_classifierNorm;
97 
98  // Do normal classification except that we use kernel which supports inputs with Missing features
99  //TODO: evaluate k for all i and replace the += with a matrix-vector operation.
100  //better: do this for all p and i and go matrix-matrix-multiplication
101  for (std::size_t i = 0; i != indexedBasis.size(); ++i){
102  const double k = evalSkipMissingFeatures(
104  indexedBasis[i],
105  get(patterns,p)) / m_scalingCoefficients[i] / patternSc;
106  noalias(row(outputs,p)) += k * row(Base::m_alpha, i);
107 
108  }
109  }
110  }
111  void eval(BatchInputType const& patterns, BatchOutputType& outputs, State & state)const{
112  eval(patterns, outputs);
113  }
114 
115  /// Calculate norm of classifier, i.e., ||w||
116  ///
117  /// formula:
118  /// \f$ \sum_{i,j=1}^{n}\alpha_i\frac{y_i}{s_i}K\left(x_i,x_j)\right)\frac{y_j}{s_j}\alpha_j \f$
119  /// where \f$ s_i \f$ is scaling coefficient, and \f$ K \f$ is kernel function,
120  /// \f$ K\left(x_i,x_j)\right) \f$ is taken only over features that are valid for both \f$ x_i \f$ and \f$ x_j \f$
121  template<class InputTypeT>
122  double computeNorm(
123  const RealVector& alpha,
124  const RealVector& scalingCoefficient,
125  InputTypeT const& missingness
126  ) const{
128  SIZE_CHECK(alpha.size() == scalingCoefficient.size());
129  SIZE_CHECK(Base::m_basis.numberOfElements() == alpha.size());
130 
131  // Calculate ||w||^2
132  double norm_sqr = 0.0;
133 
134  //Todo: i am too lazy to use iterated loops in this function.
135  //so i am using a DataView to have O(1) random access lookup. but this is not needed!
136  DataView<Data<InputType> const > indexedBasis(Base::m_basis);
137 
138  for (std::size_t i = 0; i < alpha.size(); ++i){
139  for (std::size_t j = 0; j < alpha.size(); ++j){
140  const double evalResult = evalSkipMissingFeatures(
142  indexedBasis[i],
143  indexedBasis[j],
144  missingness);
145  // Note that in Shark solver, we do axis flip by substituting \alpha with y \times \alpha
146  norm_sqr += evalResult * alpha(i) * alpha(j) / scalingCoefficient(i) / scalingCoefficient(j);
147  }
148  }
149 
150  // Return ||w||
151  return std::sqrt(norm_sqr);
152  }
153 
154  double computeNorm(
155  const RealVector& alpha,
156  const RealVector& scalingCoefficient
157  ) const{
159  SIZE_CHECK(alpha.size() == scalingCoefficient.size());
160  SIZE_CHECK(Base::m_basis.numberOfElements() == alpha.size());
161 
162  //Todo: i am too lazy to us iterated loops in this function.
163  //so i am using a DataView to have O(1) random access lookup. but this is not needed!
164  DataView<Data<InputType> const > indexedBasis(Base::m_basis);
165 
166  // Calculate ||w||^2
167  double norm_sqr = 0.0;
168 
169  for (std::size_t i = 0; i < alpha.size(); ++i){
170  for (std::size_t j = 0; j < alpha.size(); ++j){
171  const double evalResult = evalSkipMissingFeatures(
173  indexedBasis[i],
174  indexedBasis[j]);
175  // Note that in Shark solver, we do axis flip by substituting \alpha with y \times \alpha
176  norm_sqr += evalResult * alpha(i) * alpha(j) / scalingCoefficient(i) / scalingCoefficient(j);
177  }
178  }
179 
180  // Return ||w||
181  return std::sqrt(norm_sqr);
182  }
183 
184  void setScalingCoefficients(const RealVector& scalingCoefficients)
185  {
186 #if DEBUG
187  BOOST_FOREACH(double v, scalingCoefficients)
188  {
189  SHARK_ASSERT(v > 0.0);
190  }
191 #endif
192  m_scalingCoefficients = scalingCoefficients;
193  }
194 
195  void setClassifierNorm(double classifierNorm)
196  {
197  SHARK_ASSERT(classifierNorm > 0.0);
198  m_classifierNorm = classifierNorm;
199  }
200 
201 protected:
202  /// The scaling coefficients
204 
205  /// The norm of classifier(w)
207 };
208 
209 } // namespace shark {