NegativeAUC.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Functions for measuring the area under the (ROC) curve
6  *
7  *
8  *
9  * \author Christian Igel
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 
34 #ifndef SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_AUC_H
35 #define SHARK_OBJECTIVEFUNCTIONS_NEGATIVE_AUC_H
36 
37 
40 
41 namespace shark {
42 ///
43 /// \brief Negative area under the curve
44 ///
45 /// This class computes the area under the ROC (receiver operating characteristic) curve.
46 /// It implements the algorithm described in:
47 /// Tom Fawcett. ROC Graphs: Notes and Practical Considerations for Researchers. 2004
48 ///
49 /// The area is negated so that optimizing the AUC corresponds to a minimization task.
50 ///
51 template<class LabelType = unsigned int, class OutputType = RealVector>
52 class NegativeAUC : public AbstractCost<LabelType, OutputType>
53 {
54 public:
56 
57  /// Constructor.
58  /// \param invert: if set to true, the role of positive and negative class are switched
59  NegativeAUC(bool invert = false) {
60  m_invert = invert;
61  }
62 
63  /// \brief From INameable: return the class name.
64  std::string name() const
65  { return "NegativeAUC"; }
66 
67  /// \brief Computes area under the curve.
68  /// \param target: class label, 0 or 1
69  /// \param prediction: prediction by classifier, OutputType-valued vector
70  /// \param column: indicates the column of the prediction vector interpreted as probability of positive class
71  double eval(Data<LabelType> const& target, Data<OutputType> const& prediction, unsigned int column) const {
72  SHARK_CHECK(dataDimension(prediction) > column,"[NegativeAUC::eval] column number too large");
73 
74  std::size_t elements = target.numberOfElements();
75 
76  unsigned P = 0; // positive examples
77  unsigned N = 0; // negative examples
78  std::vector<AUCPair> L(elements); // list of predictions and labels
79 
80  for(std::size_t i=0; i!= elements; i++) { // build list
81  LabelType t = target.element(i);
82  // negate predictions if m_invert is set
83  if(!m_invert)
84  L[i] = AUCPair(prediction.element(i)(column), t);
85  else
86  L[i] = AUCPair(-prediction.element(i)(column), t);
87  // count positive and negative examples
88  if(t > 0)
89  P++;
90  else
91  N++;
92  }
93 
94  std::sort (L.begin(), L.end(),std::greater<AUCPair>() ); // sort in decreasing order
95 
96  double A = 0; // area
97  unsigned TP = 0; // true positives
98  unsigned FP = 0; // false positives
99  unsigned TPPrev = 0; // previous true positives
100  unsigned FPPrev = 0; // previous false positives
101  double predictionPrev = -std::numeric_limits<double>::max(); // previous prediction
102  for(std::size_t i=0; i != elements; i++) {
103  if(L[i].key != predictionPrev){
104  A += trapArea(FP/double(N),FPPrev/double(N),TP/double(P),TPPrev/double(P));
105  predictionPrev = L[i].key;
106  FPPrev = FP;
107  TPPrev = TP;
108  }
109  if(L[i].value > 0)
110  TP++; // positive example
111  else
112  FP++; // negative example
113  }
114  // deviation from the original algorithm description: A += trapArea(1, FPPrev, 1, TPPrev);
115  A += trapArea(FP/double(N), FPPrev/double(N), TP/double(P), TPPrev/double(P));
116 
117  //~ A /= double(N*P);
118  return -A;
119  }
120 
121  /// \brief Computes area under the curve. If the prediction vector is
122  /// 1-dimensional, the "positive" class is mapped to larger values. If
123  /// the prediction vector is 2-dimensional, the second dimension is
124  /// viewed as the "positive" class. For higher dimensional vectors, an
125  /// exception is thrown. In such a case, the column has to be
126  /// explicitly specified as an additional parameter.
127  ///
128  /// \param target: class label, 0 or 1
129  /// \param prediction: prediction by classifier, OutputType-valued vector
130  double eval(Data<LabelType> const& target, Data<OutputType> const& prediction) const {
131  SHARK_CHECK(prediction.numberOfElements() >= 1,"[NegativeAUC::eval] empty prediction set");
132 
133  std::size_t dim = dataDimension(prediction);
134  if(dim == 1)
135  return eval(target, prediction, 0);
136  else if(dim == 2)
137  return eval(target, prediction, 1);
138 
139  throw SHARKEXCEPTION("[NegativeAUC::eval] no default value for column");
140  return 0.;
141  }
142 
143 
144 protected:
145  double trapArea(double x1, double x2, double y1, double y2) const {
146  double base = std::abs(x1-x2);
147  double heightAvg = (y1+y2)/2;
148  return base * heightAvg;
149  }
150 
151  bool m_invert;
152 };
153 
154 ///
155 /// \brief Negative Wilcoxon-Mann-Whitney statistic
156 ///
157 /// This class computes the Wilcoxon-Mann-Whitney statistic, which is
158 /// an unbiased estimate of the area under the ROC curve.
159 ///
160 /// See, for example:
161 /// Corinna Cortes, Mehryar Mohri. Confidence Intervals for the Area under the ROC Curve. NIPS, 2004
162 ///
163 /// The area is negated so that optimizing the AUC corresponds to a minimization task.
164 ///
165 template<class LabelType = unsigned int, class OutputType = LabelType>
166 class NegativeWilcoxonMannWhitneyStatistic : public AbstractCost<LabelType, OutputType>
167 {
168 public:
169  /// Constructor.
170  /// \param invert: if set to true, the role of positive and negative class are switched
172  m_invert = invert;
173  }
174 
175  /// \brief From INameable: return the class name.
176  std::string name() const
177  { return "NegativeWilcoxonMannWhitneyStatistic"; }
178 
179  /// \brief Computes Wilcoxon-Mann-Whitney statistic.
180  /// \param target: interpreted as binary class label
181  /// \param prediction: interpreted as binary class label
182  /// \param column: indicates the column of the prediction vector interpreted as probability of positive class
183  double eval(Data<LabelType> const& target, Data<OutputType> const& prediction, unsigned int column) const {
184  SHARK_CHECK(prediction(0).size() > column,"[NegativeWilcoxonMannWhitneyStatistic::eval] column number too large");
185  std::vector<double> pos, neg;
186  for(std::size_t i=0; i<prediction.size(); i++) {
187  if(!m_invert){
188  if(target(i) > 0)
189  pos.push_back(prediction.element(i)(column));
190  else
191  neg.push_back(prediction.element(i)(column));
192  }else{
193  if(target(i) > 0)
194  pos.push_back(-prediction.element(i)(column));
195  else
196  neg.push_back(-prediction.element(i)(column));
197  }
198  }
199  std::size_t m = pos.size();
200  std::size_t n = neg.size();
201 
202  std::sort (pos.begin(), pos.end());
203  std::sort (neg.begin(), neg.end());
204 
205  double A = 0;
206  for(std::size_t i = 0, j = 0; i != m; i++) {
207  A += j;
208  for(std::size_t j = 0; j != n; j++) {
209  if(pos[i] > neg[j])
210  A++;
211  else
212  break;
213  }
214  }
215 
216 #ifdef DEBUG
217  // most naive implementation
218  double verifyA = 0.;
219  for(std::size_t i=0; i<m; i++) {
220  for(std::size_t j=0; j<n; j++) {
221  if(pos[i] > neg[j]) verifyA++;
222  }
223  }
224  if (A!=verifyA) {
225  throw( shark::Exception( "shark::WilcoxonMannWhitneyStatistic::eval: error in algorithm, efficient and naive implementation do no coincide", __FILE__, __LINE__ ) );
226  }
227 #endif
228 
229  return -A / (n*m);
230  }
231 
232  double eval(Data<LabelType> const& target, Data<OutputType> const& prediction) const {
233  SHARK_CHECK(prediction.numberOfElements() >= 1,"[NegativeAUC::eval] empty prediction set");
234 
235  std::size_t dim = dataDimension(prediction);
236  if(dim == 1)
237  return eval(target, prediction, 0);
238  else if(dim == 2)
239  return eval(target, prediction, 1);
240 
241  throw SHARKEXCEPTION("[NegativeAUC::eval] no default value for column");
242  return 0.;
243  }
244 
245 private:
246  bool m_invert;
247 };
248 
249 
250 }
251 #endif