WilcoxonRankSumTest.h
Go to the documentation of this file.
1 /*!
2  * \brief Wilcoxon Ranksum test implementation
3  *
4  * \author T.Voss
5  * \date 2011
6  *
7  *
8  * \par Copyright 1995-2015 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://image.diku.dk/shark/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef WILCOXONRANKSUMTEST_H
29 #define WILCOXONRANKSUMTEST_H
30 
31 #include <boost/math/special_functions/binomial.hpp>
32 #include <boost/math/special_functions/erf.hpp>
33 
34 namespace shark {
35  /// \brief Wilcoxon rank-sum test / Mann–Whitney U test.
36  ///
37  /// Non-parametric statistical hypothesis test for assessing
38  /// whether two independent samples of observations have
39  /// equally large elements.
41  /// Stores result of Wilcoxon rank-sum test.
42  struct Result {
43  double m_pH0;
46  };
47  /// Stores information about an observation.
48  struct Element {
49  enum Sample {
51  SAMPLE_B
52  };
53  double m_value;
54  double m_rank;
56 
57  bool operator<( const Element & rhs ) const {
58  return( m_value < rhs.m_value );
59  }
60  };
61 
62  std::size_t faculty( std::size_t n ) {
63  if( n == 1 )
64  return( 1 );
65 
66  return( n * faculty( n-1 ) );
67  }
68 
69  double frequency( double u, int sampleSizeA, int sampleSizeB ) {
70  if( u < 0. || sampleSizeA < 0 || sampleSizeB < 0 )
71  return( 0. );
72 
73  if( u == 0 && sampleSizeA >= 0 && sampleSizeB >= 0 )
74  return( 1 );
75 
76  return( frequency( u - sampleSizeB, sampleSizeA - 1, sampleSizeB ) + frequency( u, sampleSizeA, sampleSizeB - 1 ) );
77  }
78 
79  template<typename SampleType>
80  Result operator()( const SampleType & x, const SampleType & y ) {
81 
82  Result result;
83 
84  std::vector<Element> combinedSample( x.size() + y.size() );
85 
86  for( unsigned int i = 0; i < x.size(); i++ ) {
87  combinedSample[i].m_value = x.at( i );
88  combinedSample[i].m_sample = Element::SAMPLE_A;
89  }
90  for( unsigned int i = 0; i < y.size(); i++ ) {
91  combinedSample[i + x.size()].m_value = y.at( i );
92  combinedSample[i + x.size()].m_sample = Element::SAMPLE_B;
93  }
94 
95  std::sort( combinedSample.begin(), combinedSample.end() );
96 
97  std::pair< std::vector<Element>::iterator, std::vector<Element>::iterator > p;
98  std::vector<Element>::iterator it = combinedSample.begin();
99  std::size_t rank = 1;
100  while( it != combinedSample.end() ) {
101  p = std::equal_range( it, combinedSample.end(), *it );
102  it->m_rank = rank;
103  std::size_t c = std::distance( p.first, p.second );
104  if( c == 1 ) {
105  ++it;
106  rank += c;
107  continue;
108  }
109  it = p.first;
110  while( it != p.second ) {
111  it->m_rank = rank + c/2.;
112  ++it;
113  }
114 
115  rank += c;
116  }
117 
118  // std::copy( combinedSample.begin(), combinedSample.end(), std::ostream_iterator<Element>( std::cout, "" ) );
119 
120  double wA = 0.;
121  double wB = 0.;
122 
123  for( it = combinedSample.begin(); it != combinedSample.end(); ++it ) {
124  if( it->m_sample == Element::SAMPLE_A )
125  wA += it->m_rank;
126  else
127  wB += it->m_rank;
128  }
129 
130  // Check on consistency
131  /*if( wA + wB != ( 0.5 * (x.size() + y.size()) * (x.size() + y.size() + 1) ) )
132  throw( Exception( "Wilcoxon sums are inconsistent", __LINE__, __FILE__ ) );*/
133 
134  double uA = wA - x.size() * ( x.size() + 1 ) / 2.;
135  double uB = wB - y.size() * ( y.size() + 1 ) / 2.;
136 
137  double pA = 1.;
138  double pB = 1.;
139 
140  std::cout << "ua: " << uA << ", ub: " << uB << std::endl;
141 
142  double cases = boost::math::binomial_coefficient<double>( x.size() + y.size(), x.size() );
143  if( cases < 0 || cases > 1000000 ) {
144  std::cout << "Normal approximation" << std::endl;
145  // normal approximation
146  double mu = (0.5 * x.size()) * y.size();
147  double sigma = ::sqrt( ( ( x.size() + 1.0) * x.size() ) * y.size() / 12.0 );
148  double Za = (uA - mu) / sigma;
149  double Zb = (uB - mu) / sigma;
150  pA = 0.5 * boost::math::erfc(-Za / M_SQRT2);
151  pB = 0.5 * boost::math::erfc(-Zb / M_SQRT2);
152  } else {
153  pA = ( faculty( x.size() ) * faculty( y.size() ) ) / ( faculty( x.size() + y.size() ) ) * frequency( uA, x.size(), y.size() );
154  pB = ( faculty( x.size() ) * faculty( y.size() ) ) / ( faculty( x.size() + y.size() ) ) * frequency( uB, x.size(), y.size() );
155  }
156 
157  result.m_pH1 = 1.-pA;
158  result.m_pH2 = 1.-pB;
159 
160  return( result );
161  }
162 
163  };
164 
165  template<typename Stream>
166  Stream & operator<<( Stream & s, const WilcoxonRankSumTest::Element & element ) {
167  s << "Element=" << element.m_value << " " << element.m_rank << " " << element.m_sample << std::endl;
168  }
169 
170 }
171 
172 #endif // WILCOXONRANKSUMTEST_H