CrossEntropyIndependent.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Error measure for classification tasks of non exclusive attributes
6  * that can be used for model training.
7  *
8  *
9  *
10  * \author -
11  * \date -
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 
35 #ifndef SHARK_OBJECTIVEFUNCTIONS_LOSS_CROSS_ENTROPY_INDEPENDENT_H
36 #define SHARK_OBJECTIVEFUNCTIONS_LOSS_CROSS_ENTROPY_INDEPENDENT_H
37 
39 
40 namespace shark{
41 /*!
42  * \brief Error measure for classification tasks of non exclusive attributes
43  * that can be used for model training.
44  *
45  * If your model should return a vector whose components are connected to
46  * multiple mutually independent attributes and the \em k-th component of the
47  * output vector is representing the probability of the presence of the \em k-th
48  * attribute given any input vector, 'CrossEntropyIndependent' is the adequate
49  * error measure for model-training. For \em C>1, dimension of model's output
50  * and every output dimension represents a single attribute or class respectively,
51  * it follows the formular
52  * \f[
53  * E = - \sum_{i=1}^N \sum_{k=1}^{C} \{tar^i_k \ln model_k(in^i) + (1-tar^i_k) \ln
54  * (1-model_k(in^i))\}
55  * \f]
56  * where \em i runs over all input patterns.
57  * This error functional can be derivated and so used for training. In case
58  * of only one single output dimension 'CrossEntropyIndependent' returns actually the
59  * true cross entropy for two classes, using the formalism
60  * \f[
61  * E = - \sum_{i=1}^N \{tar^i \ln model(in^i) + (1-tar^i) \ln
62  * (1-model(in^i))\}
63  * \f]
64  * For theoretical reasons it is suggested to use for neural networks
65  * the logistic sigmoid activation function at the output neurons.
66  * However, actually every sigmoid activation could be applied to the output
67  * neurons as far as the image of this function is identical to the intervall
68  * \em [0,1].
69  * In this implementation every target value to be chosen from {0,1}
70  * (binary encoding). For detailed information refer to
71  * (C.M. Bishop, Neural Networks for Pattern Recognition, Clarendon Press 1996, Chapter 6.8.)
72  */
73 class CrossEntropyIndependent : public AbstractLoss<unsigned int,RealVector>
74 {
76 
77  // This code uses a different formula to compute the binary case for 1 output.
78  // It should be numerically more stable.
79  // formula: ln(1+exp(-yx)) with y = -1/1
80  double evalError(unsigned int target,double exponential,double value) const {
81  double label = 2 * static_cast<double>(target) - 1; // converts labels from 0/1 to -1/+1
82  if(value*label < -100 ){
83  //below this, we might get numeric instabilities
84  //but we know, that ln(1+exp(x)) converges to x for big arguments
85  return - value * label;
86  }
87  if(target == 0)
88  exponential = 1/exponential;
89 
90  return std::log(1+exponential);
91  }
92 
93 public:
95  {
98  }
99 
100 
101  /// \brief From INameable: return the class name.
102  std::string name() const
103  { return "CrossEntropyIndependent"; }
104 
105  // annoyingness of C++ templates
106  using base_type::eval;
107 
108  double eval(unsigned int const& target, RealVector const& prediction) const
109  {
110  double error = 0;
111  for (std::size_t c = 0; c != prediction.size(); c++){
112  double exponential = exp ( -prediction ( c ) );
113  error += evalError(target,exponential,prediction ( c ));
114  }
115 
116  return error;
117  }
118 
119  double evalDerivative(unsigned int const& target, RealVector const& prediction, RealVector& gradient) const
120  {
121  gradient.resize(target.size());
122 
123  double error = 0;
124  for (std::size_t c = 0; c < output.nelem(); c++){
125  double exponential = exp ( -prediction ( c ) );
126  double sigmoid = 1/ ( 1+ exponential);
127  gradient ( c ) = ( sigmoid - target );
128  error += evalError(target, exponential, prediction ( c ));
129  }
130  return error;
131  }
132 
133  double evalDerivative (
134  unsigned int const& target,
135  RealVector const& prediction,
136  RealVector& gradient,
137  typename base_type::MatrixType& hessian) const
138  {
139  gradient.resize(target.size());
140  hessian.resize(0,0);
141  hessian.clear();
142 
143  double error = 0;
144  for (std::size_t c = 0; c < output.nelem(); c++){
145  double exponential = exp ( -prediction ( c ) );
146  double sigmoid = 1/ ( 1+ exponential);
147  gradient ( c ) = ( sigmoid - target );
148  hessian ( c,c ) = std::max(0.0, sigmoid * ( 1-sigmoid ));
149  error += evalError(target, exponential, prediction ( c ));
150  }
151  return error;
152  }
153 };
154 
155 
156 }
157 #endif