Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Models
Clustering
Centroids.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Clusters defined by centroids.
6
*
7
*
8
*
9
* \author T. Glasmachers
10
* \date 2011
11
*
12
*
13
* \par Copyright 1995-2015 Shark Development Team
14
*
15
* <BR><HR>
16
* This file is part of Shark.
17
* <http://image.diku.dk/shark/>
18
*
19
* Shark is free software: you can redistribute it and/or modify
20
* it under the terms of the GNU Lesser General Public License as published
21
* by the Free Software Foundation, either version 3 of the License, or
22
* (at your option) any later version.
23
*
24
* Shark is distributed in the hope that it will be useful,
25
* but WITHOUT ANY WARRANTY; without even the implied warranty of
26
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
* GNU Lesser General Public License for more details.
28
*
29
* You should have received a copy of the GNU Lesser General Public License
30
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
31
*
32
*/
33
//===========================================================================
34
35
#ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H
36
#define SHARK_MODELS_CLUSTERING_CENTROIDS_H
37
38
#include <
shark/Core/DLLSupport.h
>
39
#include <
shark/Models/Clustering/AbstractClustering.h
>
40
#include <
shark/Data/Dataset.h
>
41
42
43
namespace
shark
{
44
45
46
/// \brief Clusters defined by centroids.
47
///
48
/// \par
49
/// Centroids are an elementary way to define clusters by means
50
/// of the one-nearest-neighbor rule. This rule defines a hard
51
/// clustering decision.
52
///
53
/// \par
54
/// The Centroids class uses inverse distances to compute soft
55
/// clustering memberships. This is arbitrary and can be changed
56
/// by overriding the membershipKernel function.
57
///
58
class
Centroids
:
public
AbstractClustering
<RealVector>
59
{
60
typedef
AbstractClustering<RealVector>
base_type
;
61
62
public
:
63
/// Default constructor
64
SHARK_EXPORT_SYMBOL
Centroids
();
65
66
/// Constructor
67
///
68
/// \param centroids number of centroids in the model (initially zero)
69
/// \param dimension dimension of the input space, and thus of the centroids
70
SHARK_EXPORT_SYMBOL
Centroids
(std::size_t
centroids
, std::size_t
dimension
);
71
72
/// Constructor
73
///
74
/// \param centroids centroid vectors
75
SHARK_EXPORT_SYMBOL
Centroids
(
Data<RealVector>
const
& centroids);
76
77
/// \brief From INameable: return the class name.
78
std::string
name
()
const
79
{
return
"Centroids"
; }
80
81
/// from IParameterizable
82
SHARK_EXPORT_SYMBOL
RealVector
parameterVector
()
const
;
83
84
/// from IParameterizable
85
SHARK_EXPORT_SYMBOL
void
setParameterVector
(RealVector
const
& newParameters);
86
87
/// from IParameterizable
88
SHARK_EXPORT_SYMBOL
std::size_t
numberOfParameters
()
const
;
89
90
/// return the dimension of the inputs
91
std::size_t
dimension
()
const
92
{
93
return
dataDimension
(
m_centroids
);
94
}
95
96
/// return the number of centroids in the model
97
SHARK_EXPORT_SYMBOL
std::size_t
numberOfClusters
()
const
;
98
99
/// read access to the centroid vectors
100
Data<RealVector>
const
&
centroids
()
const
{
101
return
m_centroids
;
102
}
103
104
/// overwrite the centroid vectors
105
void
setCentroids
(
Data<RealVector>
const
& newCentroids){
106
m_centroids
= newCentroids;
107
}
108
109
/// from ISerializable
110
SHARK_EXPORT_SYMBOL
void
read
(
InArchive
& archive);
111
112
/// from ISerializable
113
SHARK_EXPORT_SYMBOL
void
write
(
OutArchive
& archive)
const
;
114
115
/// from AbstractClustering: Compute cluster memberships.
116
SHARK_EXPORT_SYMBOL
RealVector
softMembership
(RealVector
const
& pattern)
const
;
117
/// From AbstractClustering: Compute cluster memberships for a batch of patterns.
118
SHARK_EXPORT_SYMBOL
RealMatrix
softMembership
(
BatchInputType
const
& patterns)
const
;
119
120
/// Computes the distances of each pattern to all cluster centers
121
SHARK_EXPORT_SYMBOL
RealMatrix
distances
(
BatchInputType
const
& patterns)
const
;
122
123
124
/// initialize centroids from labeled data: take the first
125
/// data points with different labels; if there are more
126
/// centroids than classes, the remaining centroids are filled
127
/// with the first elements in the data set
128
///
129
/// \param data dataset from which to take the centroids
130
/// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set
131
/// \param noClasses number of clases in the dataset, default 0 means that the number is computed
132
SHARK_EXPORT_SYMBOL
void
initFromData
(
ClassificationDataset
const
& data, std::size_t noClusters = 0, std::size_t noClasses = 0);
133
134
/// initialize centroids from unlabeled data:
135
/// take a random subset of data points
136
///
137
/// \param dataset dataset from which to take the centroids
138
/// \param noClusters number of centroids in the model
139
SHARK_EXPORT_SYMBOL
void
initFromData
(
Data<RealVector>
const
& dataset, std::size_t noClusters);
140
141
protected
:
142
/// Compute unnormalized membership from distance.
143
/// The default implementation is to return exp(-distance)
144
SHARK_EXPORT_SYMBOL
virtual
double
membershipKernel
(
double
dist)
const
;
145
146
/// centroid vectors
147
Data<RealVector>
m_centroids
;
148
};
149
150
151
}
152
#endif