PCA.cpp
Go to the documentation of this file.
2 
3 //header needed for data generation
5 #include <boost/foreach.hpp>//just for beauty :)
6 
7 using namespace shark;
8 using namespace std;
9 
10 ///In this test, we will use PCA to calculate the
11 ///eigenvectors of a scatter matrix and do a
12 ///reduction of the subspace to the space
13 ///spanned by the two eigenvectors with the biggest
14 ///eigenvalues.
15 
16 ///the principal components of our multivariate data distribution
17 ///we will use them later for checking
18 double principalComponents[3][3] =
19 {
20  { 5, 0, 0},
21  { 0, 2, 2},
22  { 0,-1, 1}
23 };
24 
25 std::size_t numberOfExamples = 30000;
26 
27 ///The test distribution is just a multivariate Gaussian.
29 {
30  RealVector mean(3);
31  mean(0) = 1;
32  mean(1) = -1;
33  mean(2) = 3;
34 
35  // to create the covariance matrix we first put the
36  // copy the principal components in the matrix
37  // and than use an outer product
38  RealMatrix covariance(3,3);
39  for(int i = 0; i != 3; ++i)
40  {
41  for(int j = 0; j != 3; ++j)
42  {
43  covariance(i,j) = principalComponents[i][j];
44  }
45  }
46  covariance = prod(trans(covariance),covariance);
47 
48  //now we can create the distribution
49  MultiVariateNormalDistribution distribution(covariance);
50 
51  //and we sample from it
52  std::vector<RealVector> data(numberOfExamples);
53  BOOST_FOREACH(RealVector& sample, data)
54  {
55  //first element is the sample, second is the underlying uniform gaussian
56  sample = mean + distribution(Rng::globalRng).first;
57  }
58  return createDataFromRange(data);
59 }
60 
61 
62 int main(){
63 
64  // We first create our problem. Since the PCA is a unsupervised Method,
65  // We use UnlabeledData instead of Datas.
67 
68  // With the definition of the model, we declare, how many
69  // principal components we want. If we want all, we set
70  // inputs=outputs = 3, but since want to do a reduction, we
71  // use only 2 in the second argument. The third argument is
72  // the bias. pca needs a bias to work.
73  LinearModel<> pcaModel(3,2,true);
74 
75  // Now we can construct the PCA.
76  // We can decide whether we want a whitened output or not.
77  // For testing purposes, we don't want whitening in this
78  // example.
79  PCA pca;
80  pca.setWhitening(false);
81  pca.train(pcaModel,data);
82 
83 
84 
85  //Print the rescaled results.
86  //Should be the same as principalComponents, except for sign change
87  //and numerical errors.
88  cout << "RESULTS: " << std::endl;
89  cout << "======== " << std::endl << std::endl;
90  cout << "principal component 1: " << row(pcaModel.matrix(),0) * sqrt(pca.eigenvalues()(0)) << std::endl;
91  cout << "principal component 2: " << row(pcaModel.matrix(),1) * sqrt( pca.eigenvalues()(1) ) << std::endl;
92 
93 }