AbstractClustering.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Super class for clustering definitions.
6  *
7  *
8  *
9  * \author T. Glasmachers
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2015 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://image.diku.dk/shark/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
36 #define SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
37 
38 
40 #include <shark/Core/Flags.h>
41 #include <shark/Core/INameable.h>
44 
45 
46 namespace shark {
47 
48 
49 ///
50 /// \brief Base class for clustering.
51 ///
52 /// \par
53 /// Clustering algorithms vary widely in the data structures
54 /// on which they operate. For example, simple centroid-based
55 /// approaches such as k-means are mutually incompatible with
56 /// tree-based hierarchical clustering. This interface class
57 /// is the attempt to cast different cluster descriptions into
58 /// a minimal common interface that is useful for prediction.
59 ///
60 /// \par
61 /// There are at least two common types of predictions made
62 /// with clusterings. The first one is the assignment of the
63 /// best matching cluster to a patters, such as in vector
64 /// quantization or unsupervised clustering. This is often
65 /// referred to as "hard clustering". The second one is the
66 /// computation of a membership function ("soft clustering").
67 /// The membership of a pattern to a cluster is non-negative,
68 /// and the total membership should add to one.
69 ///
70 /// \par
71 /// This interface makes minimal assumptions to allow for
72 /// these types of predictions. It assumes that clusters can
73 /// be enumbered (identified by an index), and that at least
74 /// a membership test can be made (hard clustering). It is
75 /// optional to provide a membership function. Only one of
76 /// the two interfaces for best matching cluster or membership
77 /// function need to be implemented, since the best matching
78 /// cluster can be deduced from the membership function.
79 /// However, often the best matching cluster can be computed
80 /// more efficiently than the membership function. In these
81 /// cases both interface functions should be implemented.
82 ///
83 /// \par
84 /// The purpose of this interface is to act as a common
85 /// super class to different data structures describing the
86 /// outcome of a clustering operation. The interface allows
87 /// all of these data structures to be used in the two
88 /// clustering model classes: HardClusteringModel and
89 /// SoftClusteringModel.
90 ///
91 template <class InputT>
93 {
94 public:
95  typedef InputT InputType;
96  typedef unsigned int OutputType;
99 
100  enum Feature {
102  };
104 
105  /// Tests whether the clustering can compute a (soft)
106  /// member ship function, describing the membership
107  /// of a sample to the different clusters.
110  }
111 
112  /// return the number of clusters
113  virtual std::size_t numberOfClusters() const = 0;
114 
115  /// \brief Compute best matching cluster.
116  ///
117  /// \par
118  /// This function should be overriden by sub-classes to
119  /// compute the cluster best matching the input pattern.
120  /// The (typically slow) default implementation is to
121  /// create a batch of size 1 and return the result of the batch call to hardMembership
122  virtual unsigned int hardMembership(InputType const& pattern) const{
124  get(b,0) = pattern;
125  return hardMembership(b)(0);
126  }
127 
128  /// \brief Compute best matching cluster for a batch of inputs.
129  ///
130  /// \par
131  /// This function should be overriden by sub-classes to
132  /// compute the cluster best matching the input pattern.
133  /// The (typically slow) default implementation is to
134  /// return the arg max of the soft membership function for every pattern.
135  virtual BatchOutputType hardMembership(BatchInputType const& patterns) const{
136  std::size_t numPatterns = boost::size(patterns);
137  RealMatrix f = softMembership(patterns);
138  SHARK_ASSERT(f.size2() > 0);
139  SHARK_ASSERT(f.size1() == numPatterns);
140  BatchOutputType outputs(numPatterns);
141  for(std::size_t i = 0; i != numPatterns;++i){
142  RealMatrixRow membership(f,i);
143  outputs(i) = (unsigned int)(std::max_element(membership.begin(),membership.end())-membership.begin());
144  }
145  return outputs;
146  }
147 
148  /// \brief Compute cluster membership function.
149  ///
150  /// \par
151  /// This function should be overriden by sub-classes to
152  /// compute the membership of a pattern to the clusters.
153  /// The default implementation creates a batch of size 1
154  /// and calls the batched version. If this is not overriden, an xception is thrown.
155  virtual RealVector softMembership(InputType const& pattern) const{
157  }
158 
159  /// \brief Compute cluster membership function.
160  ///
161  /// \par
162  /// This function should be overriden by sub-classes to
163  /// compute the membership of a pattern to the clusters.
164  /// This default implementation throws an exception.
165  virtual RealMatrix softMembership(BatchInputType const& patterns) const{
167  }
168 
169  /// empty default implementation of ISerializable::read
170  void read(InArchive& archive) {}
171 
172  /// empty default implementation of ISerializable::write
173  void write(OutArchive& archive) const {}
174 };
175 
176 
177 }
178 #endif