MultiTaskKernel.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Special kernel classes for multi-task and transfer learning.
6  *
7  *
8  *
9  * \author T. Glasmachers, O.Krause
10  * \date 2012
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_KERNELS_MULTITASKKERNEL_H
36 #define SHARK_MODELS_KERNELS_MULTITASKKERNEL_H
37 
40 #include <shark/Data/Dataset.h>
41 #include "Impl/MklKernelBase.h"
42 
43 namespace shark {
44 
45 ///
46 /// \brief Aggregation of input data and task index.
47 ///
48 /// \par
49 /// Generic data structure for augmenting arbitrary data
50 /// with an integer. This integer is typically used as a
51 /// task identifier in multi-task and transfer learning.
52 ///
53 template <class InputTypeT>
55 {
56  typedef InputTypeT InputType;
57  /// \brief Default constructor.
59  { }
60 
61  /// \brief Construction from an input and a task index
62  MultiTaskSample(InputType const& i, std::size_t t)
63  : input(i), task(t)
64  { }
65 
66  void read(InArchive& ar){
67  ar >> input;
68  ar >> task;
69  }
70 
71  void write(OutArchive& ar) const{
72  ar << input;
73  ar << task;
74  }
75 
76  InputType input; ///< input data
77  std::size_t task; ///< task index
78 
79 };
80 }
81 
82 
83 #ifndef DOXYGEN_SHOULD_SKIP_THIS
84 
85  BOOST_FUSION_ADAPT_TPL_STRUCT(
86  (InputType),
88  (InputType, input)(std::size_t, task)
89  )
90 
91 
92 namespace shark{
93 
94 
95 template<class InputType>
96 struct Batch< MultiTaskSample<InputType> >{
99  (InputType, input)(std::size_t, task)
100  )
101 };
102 
103 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
104 
105 
106 ///
107 /// \brief Special "Gaussian-like" kernel function on tasks.
108 ///
109 /// \par
110 /// See<br/>
111 /// Learning Marginal Predictors: Transfer to an Unlabeled Task.
112 /// G. Blanchard, G. Lee, C. Scott.
113 ///
114 /// \par
115 /// This class computes a Gaussian kernel based on the distance
116 /// of empirical distributions in feature space induced by yet
117 /// another kernel. This is useful for multi-task and transfer
118 /// learning. It reduces the definition of a kernel on tasks to
119 /// that of a kernel on inputs, plus a single bandwidth parameter
120 /// for the Gaussian kernel of distributions.
121 ///
122 /// \par
123 /// Given unlabaled data \f$ x_i, t_i \f$ where the x-component
124 /// is an input and the t-component is a task index, the kernel
125 /// on tasks t and t' is defined as
126 /// \f[
127 /// k(t, t') = \exp \left( -\gamma \cdot \left\| \frac{1}{\ell_{t}\ell{t'}} \sum_{i | t_i = t}\sum_{j | t_j = t'} k'(x_i, x_j) \right\|^2 \right)
128 /// \f]
129 /// where k' is an arbitrary kernel on inputs.
130 ///
131 template <class InputTypeT >
132 class GaussianTaskKernel : public DiscreteKernel
133 {
134 private:
135  typedef DiscreteKernel base_type;
136 public:
137  typedef InputTypeT InputType;
138  typedef MultiTaskSample<InputType> MultiTaskSampleType;
139  typedef AbstractKernelFunction<InputType> KernelType;
140 
141  /// \brief Construction of a Gaussian kernel on tasks.
142  ///
143  /// \param data unlabeled data from multiple tasks
144  /// \param tasks number of tasks in the problem
145  /// \param inputkernel kernel on inputs based on which task similarity is defined
146  /// \param gamma Gaussian bandwidth parameter (also refer to the member functions setGamma and setSigma).
148  Data<MultiTaskSampleType> const& data,
149  std::size_t tasks,
150  KernelType& inputkernel,
151  double gamma)
152  : DiscreteKernel(RealMatrix(tasks, tasks,0.0))
153  , m_data(data)
154  , m_inputkernel(inputkernel)
155  , m_gamma(gamma){
156  computeMatrix();
157  }
158 
159  /// \brief From INameable: return the class name.
160  std::string name() const
161  { return "GaussianTaskKernel"; }
162 
163  RealVector parameterVector() const
164  {
165  const std::size_t n = m_inputkernel.numberOfParameters();
166  RealVector ret(n + 1);
167  init(ret)<<parameters(m_inputkernel),m_gamma;
168  return ret;
169  }
170 
171  void setParameterVector(RealVector const& newParameters){
172  init(newParameters)>>parameters(m_inputkernel),m_gamma;
173  computeMatrix();
174  }
175 
176  std::size_t numberOfParameters() const{
177  return m_inputkernel.numberOfParameters() + 1;
178  }
179 
180  std::size_t numberOfTasks() const
181  { return size(); }
182 
183  /// \brief Kernel bandwidth parameter.
184  double gamma() const
185  { return m_gamma; }
186 
187  /// \brief Kernel width parameter, equivalent to the bandwidth parameter.
188  ///
189  /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$.
190  double sigma() const
191  { return (1.0 / std::sqrt(2 * m_gamma)); }
192 
193  // \brief Set the kernel bandwidth parameter.
194  void setGamma(double gamma)
195  {
196  SHARK_ASSERT(gamma > 0.0);
197  m_gamma = gamma;
198  }
199 
200  /// \brief Set the kernel width (equivalent to setting the bandwidth).
201  ///
202  /// The bandwidth gamma and the width sigma are connected: \f$ gamma = 1 / (2 \cdot sigma^2) \f$.
203  void setWidth(double sigma)
204  {
205  SHARK_ASSERT(sigma > 0.0);
206  m_gamma = 1.0 / (2.0 * sigma * sigma);
207  }
208 
209  /// From ISerializable.
210  void read(InArchive& ar)
211  {
212  base_type::read(ar);
213  ar >> m_gamma;
214  }
215 
216  /// From ISerializable.
217  void write(OutArchive& ar) const
218  {
219  base_type::write(ar);
220  ar << m_gamma;
221  }
222 
223 protected:
224 
225  /// \brief Compute the Gram matrix of the task kernel.
226  ///
227  /// \par
228  /// Here is the real meat. This function implements the
229  /// kernel function defined in<br/>
230  /// Learning Marginal Predictors: Transfer to an Unlabeled Task.
231  /// G. Blanchard, G. Lee, C. Scott.
232  ///
233  /// \par
234  /// In a first step the function computes the inner products
235  /// of the task-wise empirical distributions, represented by
236  /// their mean elements in the kernel-induced feature space.
237  /// In a second step this information is used for the computation
238  /// of squared distances between empirical distribution, which
239  /// allows for the straightforward computation of a Gaussian
240  /// kernel.
242  {
243  // count number of examples for each task
244  const std::size_t tasks = numberOfTasks();
245  std::size_t elements = m_data.numberOfElements();
246  std::vector<std::size_t> ell(tasks, 0);
247  for (std::size_t i=0; i<elements; i++)
248  ell[m_data.element(i).task]++;
249 
250  // compute inner products between mean elements of empirical distributions
251  for (std::size_t i=0; i<elements; i++)
252  {
253  const std::size_t task_i = m_data.element(i).task;
254  for (std::size_t j=0; j<i; j++)
255  {
256  const std::size_t task_j = m_data.element(j).task;
257  const double k = m_inputkernel.eval(m_data.element(i).input, m_data.element(j).input);
258  base_type::m_matrix(task_i, task_j) += k;
259  base_type::m_matrix(task_j, task_i) += k;
260  }
261  const double k = m_inputkernel.eval(m_data.element(i).input, m_data.element(i).input);
262  base_type::m_matrix(task_i, task_i) += k;
263  }
264  for (std::size_t i=0; i<tasks; i++)
265  {
266  if (ell[i] == 0) continue;
267  for (std::size_t j=0; j<tasks; j++)
268  {
269  if (ell[j] == 0) continue;
270  base_type::m_matrix(i, j) /= (double)(ell[i] * ell[j]);
271  }
272  }
273 
274  // compute Gaussian kernel
275  for (std::size_t i=0; i<tasks; i++)
276  {
277  const double norm2_i = base_type::m_matrix(i, i);
278  for (std::size_t j=0; j<i; j++)
279  {
280  const double norm2_j = base_type::m_matrix(j, j);
281  const double dist2 = norm2_i + norm2_j - 2.0 * base_type::m_matrix(i, j);
282  const double k = std::exp(-m_gamma * dist2);
283  base_type::m_matrix(i, j) = base_type::m_matrix(j, i) = k;
284  }
285  }
286  for (std::size_t i=0; i<tasks; i++) base_type::m_matrix(i, i) = 1.0;
287  }
288 
289 
290  Data<MultiTaskSampleType > const& m_data; ///< multi-task data
291  KernelType& m_inputkernel; ///< kernel on inputs
292  double m_gamma; ///< bandwidth of the Gaussian task kernel
293 };
294 
295 
296 ///
297 /// \brief Special kernel function for multi-task and transfer learning.
298 ///
299 /// \par
300 /// This class is a convenience wrapper for the product of an
301 /// input kernel and a kernel on tasks. It also encapsulates
302 /// the projection from multi-task learning data (see class
303 /// MultiTaskSample) to inputs and task indices.
304 ///
305 template <class InputTypeT>
307 : private detail::MklKernelBase<MultiTaskSample<InputTypeT> >
308 , public ProductKernel< MultiTaskSample<InputTypeT> >
309 {
310 private:
311  typedef detail::MklKernelBase<MultiTaskSample<InputTypeT> > base_type1;
312  typedef ProductKernel< MultiTaskSample<InputTypeT> > base_type2;
313 public:
314  typedef AbstractKernelFunction<InputTypeT> InputKernelType;
315  /// \brief Constructor.
316  ///
317  /// \param inputkernel kernel on inputs
318  /// \param taskkernel kernel on task indices
320  InputKernelType* inputkernel,
321  DiscreteKernel* taskkernel)
322  :base_type1(boost::fusion::make_vector(inputkernel,taskkernel))
323  ,base_type2(base_type1::makeKernelVector())
324  {}
325 
326  /// \brief From INameable: return the class name.
327  std::string name() const
328  { return "MultiTaskKernel"; }
329 };
330 
331 } // namespace shark {
332 
333 #endif