Datasets.cpp
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Data Normalization
6  *
7  * This file is part of the tutorial "Data Containers".
8  * By itself, it does not do anything particularly useful.
9  *
10  * \author T. Glasmachers
11  * \date 2014
12  *
13  *
14  * \par Copyright 1995-2015 Shark Development Team
15  *
16  * <BR><HR>
17  * This file is part of Shark.
18  * <http://image.diku.dk/shark/>
19  *
20  * Shark is free software: you can redistribute it and/or modify
21  * it under the terms of the GNU Lesser General Public License as published
22  * by the Free Software Foundation, either version 3 of the License, or
23  * (at your option) any later version.
24  *
25  * Shark is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU Lesser General Public License
31  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
32  *
33  */
34 //===========================================================================
35 
36 #include <shark/Data/Dataset.h>
37 
38 #include <shark/Data/DataView.h>
39 
42 
43 
44 using namespace shark;
45 
46 
47 class F
48 {
49 public:
50  typedef RealVector result_type;
51  RealVector operator () (RealVector x) const
52  { return (2.0 * x); }
53 };
54 
55 class G
56 {
57 public:
58  typedef unsigned int result_type;
59  unsigned int operator () (unsigned int y) const
60  { return y + 1; }
61 };
62 
63  class Add
64  {
65  public:
66  Add(RealVector offset) : m_offset(offset) {}
67 
68  typedef RealVector result_type; // do not forget to specify the result type
69 
70  RealVector operator () (RealVector input) const { // const is important
71  return (input + m_offset);
72  }
73 
74  private:
75  RealVector m_offset;
76  };
77 
78 
79 int main()
80 {
81 
82 {
83  std::vector<RealVector> points;
85 }
86 {
87  std::vector<RealVector> inputs;
88  std::vector<unsigned int> labels;
90 }
91 {
92  Data<RealVector> data(1000, RealVector(5));
93 }
94 {
95  Data<RealVector> data(1000, RealVector(5), 100);
96 }
97 {
98  Data<RealVector> data;
99  Data<RealVector> data2(data);
100  data = data2;
101  data.makeIndependent();
102 }
103 {
104  Data<RealVector> data;
105  typedef Data<RealVector>::batch_range Batches;
106  Batches batches = data.batches();
107 
108  std::cout << batches.size() << std::endl;
109  for (Batches::iterator pos = batches.begin(); pos != batches.end(); ++pos) {
110  std::cout << *pos << std::endl;
111  }
112 }
113 {
114  Data<RealVector> data;
117  BOOST_FOREACH(BatchRef batch, data.batches()) {
118  std::cout << batch << std::endl;
119  }
120  for (std::size_t i = 0; i != data.numberOfBatches(); ++i) {
121  std::cout << data.batch(i) << std::endl;
122  }
123  BOOST_FOREACH(BatchRef batch, data.batches()) {
124  for(std::size_t i=0; i != boost::size(batch); ++i) {
125  std::cout << shark::get(batch,i ); // prints element i of the batch
126  }
127  }
128  typedef Data<RealVector>::element_range Elements;
130 
131  // 1: explicit iterator loop using the range over the elements
132  Elements elements = data.elements();
133  for (Elements::iterator pos = elements.begin(); pos != elements.end(); ++pos) {
134  std::cout << *pos << std::endl;
135  }
136 
137  // 2: BOOST_FOREACH
138  BOOST_FOREACH(ElementRef element, data.elements()) {
139  std::cout << element << std::endl;
140  }
141 }
142 {
143  Data<unsigned int> data;
144  std::size_t classes = numberOfClasses(data); // maximal class label minus one
145  std::vector<std::size_t> sizes = classSizes(data); // number of occurrences of every class label
146 
147  Data<RealVector> dataVec;
148  std::size_t dim = dataDimension(dataVec); // dimensionality of the data points
149 }
150 {
152  std::size_t classes = numberOfClasses(data); // maximal class label minus one
153  std::vector<std::size_t> sizes = classSizes(data); // number of occurrences of every class label
154  std::size_t dim = inputDimension(data); // dimensionality of the data points
155 }
156 {
157  F f;
158  G g;
159  Data<RealVector> data; // initial data set
160  data = transform(data, f); // applies f to each element
161 
162  LabeledData<RealVector, unsigned int> labeledData; // initial labeled dataset
163  labeledData = transformInputs(labeledData, f); // applies f to each input
164  labeledData = transformLabels(labeledData, g); // applies g to each label
165 
166  // a linear model, for example for whitening
167  LinearModel<> model;
168  // application of the model to the data
169  labeledData = transformInputs(labeledData, model);
170  // or an alternate shortcut:
171  data = model(data);
172 }
173 {
174  Data<RealVector> data;
175  RealVector v(3); v(0) = 1.0; v(1) = 3.0; v(2) = -0.5;
176  data = transform(data, Add(v));
177 }
178 {
179  Data<unsigned int> dataset;
180  DataView<Data<unsigned int> > view(dataset);
181  for (std::size_t i=0; i != view.size(); ++i) {
182  std::cout << view[i] << std::endl;
183  }
184  std::vector<std::size_t> indices;
185  // somehow choose a set of indices
186  Data<unsigned int> subsetData = toDataset(subset(view, indices));
187 }
188 {
189  Data<unsigned int> dataset;
190  DataView<Data<unsigned int> > view(dataset);
191  std::vector<std::size_t> indices;
192  std::size_t maximumBatchSize = 100;
193  Data<unsigned int> subsetData = toDataset(subset(view, indices), maximumBatchSize);
194 }
195 {
198  std::cout << numberOfClasses(view) << " " << inputDimension(view) << std::endl;
199 }
200 
201 }