Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Algorithms
Trainers
NormalizeComponentsUnitInterval.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Data normalization to the unit interval
6
*
7
*
8
*
9
*
10
* \author T. Glasmachers
11
* \date 2010, 2013
12
*
13
*
14
* \par Copyright 1995-2015 Shark Development Team
15
*
16
* <BR><HR>
17
* This file is part of Shark.
18
* <http://image.diku.dk/shark/>
19
*
20
* Shark is free software: you can redistribute it and/or modify
21
* it under the terms of the GNU Lesser General Public License as published
22
* by the Free Software Foundation, either version 3 of the License, or
23
* (at your option) any later version.
24
*
25
* Shark is distributed in the hope that it will be useful,
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
* GNU Lesser General Public License for more details.
29
*
30
* You should have received a copy of the GNU Lesser General Public License
31
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
32
*
33
*/
34
//===========================================================================
35
36
37
#ifndef SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITINTERVAL_H
38
#define SHARK_ALGORITHMS_TRAINERS_NORMALIZECOMPONENTSUNITINTERVAL_H
39
40
41
#include <
shark/Models/Normalizer.h
>
42
#include <
shark/Algorithms/Trainers/AbstractTrainer.h
>
43
44
namespace
shark
{
45
46
47
///
48
/// \brief Train a model to normalize the components of a dataset to fit into the unit inverval
49
///
50
/// \par
51
/// Normalizing the components of a dataset works via
52
/// training a LinearMap model. This model is then
53
/// applied to the dataset in order to perform the
54
/// normalization. The same model can be applied to
55
/// different datasets.
56
///
57
/// \par
58
/// The typical use case is that the AffineLinearMap
59
/// model is trained on the training data. Later, as
60
/// "test" data comes in, the same model is used, of
61
/// course without being recalibrated. Thus, the model
62
/// used for normalization must be independent of the
63
/// dataset it was trained on.
64
///
65
/// \par
66
/// Note that the transformation represented by this
67
/// trainer destroys sparsity of the data. Therefore
68
/// one may prefer NormalizeComponentsUnitVariance
69
/// particularly on sparse data.
70
///
71
template
<
class
DataType = RealVector>
72
class
NormalizeComponentsUnitInterval
:
public
AbstractUnsupervisedTrainer
< Normalizer<DataType> >
73
{
74
public
:
75
typedef
AbstractUnsupervisedTrainer< Normalizer<DataType>
>
base_type
;
76
77
NormalizeComponentsUnitInterval
()
78
{ }
79
80
/// \brief From INameable: return the class name.
81
std::string
name
()
const
82
{
return
"NormalizeComponentsUnitInterval"
; }
83
84
void
train
(
Normalizer<DataType>
& model,
UnlabeledData<DataType>
const
& input)
85
{
86
//SHARK_CHECK(model.hasOffset(), "[NormalizeComponentsUnitInterval::train] model must have an offset term");
87
std:: size_t ic = input.
numberOfElements
();
88
SHARK_CHECK
(ic >= 2,
"[NormalizeComponentsUnitInterval::train] input needs to consist of at least two points"
);
89
std::size_t dc =
dataDimension
(input);
90
91
RealVector
min
= input.
element
(0);
92
RealVector
max
= input.
element
(0);
93
for
(std::size_t i=1; i != ic; i++){
94
for
(std::size_t d = 0; d != dc; d++){
95
double
x = input.
element
(i)(d);
96
min
(d) =
std::min
(
min
(d), x);
97
max
(d) =
std::max
(
max
(d), x);
98
}
99
}
100
101
RealVector diagonal(dc);
102
RealVector offset(dc);
103
104
for
(std::size_t d=0; d != dc; d++)
105
{
106
if
(
min
(d) ==
max
(d))
107
{
108
diagonal(d) = 0.0;
109
offset(d) = -
min
(d) + 0.5;
110
}
111
else
112
{
113
double
n = 1.0 / (
max
(d) -
min
(d));
114
diagonal(d) = n;
115
offset(d) = -
min
(d) * n;
116
}
117
}
118
119
model.
setStructure
(diagonal, offset);
120
}
121
};
122
123
124
}
125
#endif