Centroids.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Clusters defined by centroids.
6  *
7  *
8  *
9  * \author T. Glasmachers
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H
36 #define SHARK_MODELS_CLUSTERING_CENTROIDS_H
37 
38 #include <shark/Core/DLLSupport.h>
40 #include <shark/Data/Dataset.h>
41 
42 
43 namespace shark {
44 
45 
46 /// \brief Clusters defined by centroids.
47 ///
48 /// \par
49 /// Centroids are an elementary way to define clusters by means
50 /// of the one-nearest-neighbor rule. This rule defines a hard
51 /// clustering decision.
52 ///
53 /// \par
54 /// The Centroids class uses inverse distances to compute soft
55 /// clustering memberships. This is arbitrary and can be changed
56 /// by overriding the membershipKernel function.
57 ///
58 class Centroids : public AbstractClustering<RealVector>
59 {
61 
62 public:
63  /// Default constructor
65 
66  /// Constructor
67  ///
68  /// \param centroids number of centroids in the model (initially zero)
69  /// \param dimension dimension of the input space, and thus of the centroids
70  SHARK_EXPORT_SYMBOL Centroids(std::size_t centroids, std::size_t dimension);
71 
72  /// Constructor
73  ///
74  /// \param centroids centroid vectors
76 
77  /// \brief From INameable: return the class name.
78  std::string name() const
79  { return "Centroids"; }
80 
81  /// from IParameterizable
82  SHARK_EXPORT_SYMBOL RealVector parameterVector() const;
83 
84  /// from IParameterizable
85  SHARK_EXPORT_SYMBOL void setParameterVector(RealVector const& newParameters);
86 
87  /// from IParameterizable
88  SHARK_EXPORT_SYMBOL std::size_t numberOfParameters() const;
89 
90  /// return the dimension of the inputs
91  std::size_t dimension() const
92  {
93  return dataDimension(m_centroids);
94  }
95 
96  /// return the number of centroids in the model
97  SHARK_EXPORT_SYMBOL std::size_t numberOfClusters() const;
98 
99  /// read access to the centroid vectors
100  Data<RealVector> const& centroids() const{
101  return m_centroids;
102  }
103 
104  /// overwrite the centroid vectors
105  void setCentroids(Data<RealVector> const& newCentroids){
106  m_centroids = newCentroids;
107  }
108 
109  /// from ISerializable
110  SHARK_EXPORT_SYMBOL void read(InArchive& archive);
111 
112  /// from ISerializable
113  SHARK_EXPORT_SYMBOL void write(OutArchive& archive) const;
114 
115  /// from AbstractClustering: Compute cluster memberships.
116  SHARK_EXPORT_SYMBOL RealVector softMembership(RealVector const& pattern) const;
117  /// From AbstractClustering: Compute cluster memberships for a batch of patterns.
118  SHARK_EXPORT_SYMBOL RealMatrix softMembership(BatchInputType const& patterns) const;
119 
120  /// Computes the distances of each pattern to all cluster centers
121  SHARK_EXPORT_SYMBOL RealMatrix distances(BatchInputType const& patterns) const;
122 
123 
124  /// initialize centroids from labeled data: take the first
125  /// data points with different labels; if there are more
126  /// centroids than classes, the remaining centroids are filled
127  /// with the first elements in the data set
128  ///
129  /// \param data dataset from which to take the centroids
130  /// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set
131  /// \param noClasses number of clases in the dataset, default 0 means that the number is computed
132  SHARK_EXPORT_SYMBOL void initFromData(ClassificationDataset const& data, std::size_t noClusters = 0, std::size_t noClasses = 0);
133 
134  /// initialize centroids from unlabeled data:
135  /// take a random subset of data points
136  ///
137  /// \param dataset dataset from which to take the centroids
138  /// \param noClusters number of centroids in the model
139  SHARK_EXPORT_SYMBOL void initFromData(Data<RealVector> const& dataset, std::size_t noClusters);
140 
141 protected:
142  /// Compute unnormalized membership from distance.
143  /// The default implementation is to return exp(-distance)
144  SHARK_EXPORT_SYMBOL virtual double membershipKernel(double dist) const;
145 
146  /// centroid vectors
148 };
149 
150 
151 }
152 #endif