LabelOrder.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief This will relabel a given dataset to have labels 0..N-1 (and vice versa)
6  *
7  *
8  *
9  * \author Aydin Demircioglu
10  * \date 2014
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 
36 #ifndef SHARK_LABELORDER_H
37 #define SHARK_LABELORDER_H
38 
39 #include <shark/Core/INameable.h>
41 
42 #include <shark/Data/Dataset.h>
43 
44 
45 
46 
47 namespace shark
48 {
49 
50 
51 /// \brief This will normalize the labels of a given dataset to 0..N-1
52 ///
53 /// \par This will normalize the labels of a given dataset to 0..N-1
54 /// and store the ordering in a member variable.
55 /// After processing, the dataset will afterwards have labels ranging
56 /// from 0 to N-1, with N the number of classes, so usual Shark
57 /// trainers can work with it.
58 /// One can then revert the original labeling just by calling restoreOriginalLabels
59 class LabelOrder : public INameable
60 {
61 private:
62 
63 public:
64 
65 
66  LabelOrder() {};
67 
68 
69  virtual ~LabelOrder() {};
70 
71 
72  /// \brief From INameable: return the class name.
73  std::string name() const
74  { return "LabelOrder"; }
75 
76 
77  /// \brief This will normalize the labels and store the ordering in the
78  /// member variables. The dataset will afterwards have labels ranging
79  /// from 0 to N-1, with N the number of classes.
80  /// This will overwrite any previously stored label ordering in the object.
81  ///
82  /// \param[in,out] dataset dataset that will be relabeled
83 
85  {
86  // determine the min and max labels of the given dataset
87  int minLabel = std::numeric_limits<int>::max();
88  int maxLabel = -1;
89  for(std::size_t i = 0; i < dataset.numberOfElements(); ++i)
90  {
91  int label = dataset.labels().element(i);
92 
93  // we react allergic to negative labels
94  if(label < 0)
95  throw SHARKEXCEPTION("Negative label found. Will not process negative labels!");
96 
97  if(label < minLabel)
98  minLabel = label;
99  if(label > maxLabel)
100  maxLabel = label;
101  }
102 
103  // now we create an vector that can hold the label ordering
104  m_labelOrder.clear();
105 
106  // and one array that tracks what we already encountered
107  std::vector<unsigned int> foundLabels(maxLabel - minLabel + 1, -1);
108 
109  // and insert all labels we encounter
110  unsigned int currentPosition = 0;
111  for(std::size_t i = 0; i < dataset.numberOfElements(); i++)
112  {
113  // is it a new label?
114  unsigned int label = dataset.labels().element(i);
115  if(foundLabels[label - minLabel] == -1)
116  {
117  foundLabels[label - minLabel] = currentPosition;
118  m_labelOrder.push_back(label);
119  currentPosition++;
120  }
121  }
122 
123  // now map every label
124  for(std::size_t i = 0; i < dataset.numberOfElements(); i++)
125  {
126  int label = dataset.labels().element(i);
127  dataset.labels().element(i) = foundLabels[label - minLabel];
128  }
129  }
130 
131 
132 
133  /// \brief This will restore the original labels of the dataset. This
134  /// must be called with data compatible the original dataset, so that the labels will
135  /// fit. The label ordering will not be destroyed after calling this function, so
136  /// it can be called multiple times, e.g. to testsets or similar data.
137  ///
138  /// \param[in,out] dataset dataset to relabel (restore labels)
139 
141  {
142  // now map every label
143  for(std::size_t i = 0; i < dataset.numberOfElements(); ++i)
144  {
145  int label = dataset.labels().element(i);
146 
147  // check if the reordering fit the data
148  if(label >= (int) m_labelOrder.size())
149  throw SHARKEXCEPTION("Dataset labels does not fit to the stored ordering!");
150 
151  // relabel
152  label = m_labelOrder[label];
153  dataset.labels().element(i) = label;
154  }
155  }
156 
157 
158 
159  /// \brief Get label ordering directly
160  ///
161  /// \param[out] labelOrder vector to store the current label order.
162 
163  void getLabelOrder(std::vector<int> &labelOrder)
164  {
165  labelOrder = m_labelOrder;
166  }
167 
168 
169 
170  /// \brief Get label ordering directly
171  ///
172  /// \param[out] labelOrder vector to store the current label order.
173 
174  void getLabelOrder (std::vector<unsigned int> &labelOrder)
175  {
176  labelOrder = std::vector<unsigned int>( m_labelOrder.begin(), m_labelOrder.end() );
177  }
178 
179 
180  /// \brief Set label ordering directly
181  ///
182  /// \param[in] labelOrder vector with the new label order
183 
184  void setLabelOrder(std::vector<int> &labelOrder)
185  {
186  m_labelOrder = labelOrder;
187  }
188 
189 
190  /// \brief Set label ordering directly
191  ///
192  /// \param[in] labelOrder vector with the new label order
193  void setLabelOrder (std::vector<unsigned int> &labelOrder)
194  {
195  m_labelOrder = std::vector<int>( labelOrder.begin(), labelOrder.end() );
196  }
197 
198 
199 protected:
200 
201  std::vector<int> m_labelOrder;
202 };
203 
204 }
205 
206 #endif
207