36 #ifndef SHARK_ALGORITHMS_KMEANS_H 37 #define SHARK_ALGORITHMS_KMEANS_H 78 SHARK_EXPORT_SYMBOL std::size_t
kMeans(Data<RealVector>
const& data, std::size_t k, Centroids& centroids, std::size_t maxIterations = 0);
107 template<
class InputType>
114 UIntVector clusterMembership(ell);
115 UIntVector clusterSizes(k,0);
116 RealVector ckck(k,0);
119 for(
unsigned int i = 0; i != ell; ++i){
120 clusterMembership(i) = i % k;
123 std::random_shuffle(clusterMembership.begin(),clusterMembership.end(),
uni);
124 for(std::size_t i = 0; i != ell; ++i){
125 ++clusterSizes(clusterMembership(i));
129 std::size_t iter = 0;
131 for(; iter != maxIterations && !equal; ++iter) {
137 for(std::size_t i = 0; i != ell; ++i){
138 std::size_t c1 = clusterMembership(i);
139 for(std::size_t j = 0; j != ell; ++j){
140 std::size_t c2 = clusterMembership(j);
141 if(c1 != c2)
continue;
142 ckck(c1) += kernelMatrix(i,j);
147 UIntVector newClusterMembership(kernelMatrix.size1());
148 RealVector currentDistances(k);
149 for(std::size_t i = 0; i != ell; ++i){
152 noalias(currentDistances) = ckck;
153 for(std::size_t j = 0; j != ell; ++j){
154 std::size_t c = clusterMembership(j);
155 currentDistances(c) -= 2* kernelMatrix(i,j)/clusterSizes(c);
158 newClusterMembership(i) = (
unsigned int)
arg_min(currentDistances);
160 equal = boost::equal(
161 newClusterMembership,
164 noalias(clusterMembership) = newClusterMembership;
166 clusterSizes.clear();
167 for(std::size_t i = 0; i != ell; ++i){
168 ++clusterSizes(clusterMembership(i));
172 for(
unsigned int i = 0; i != k; ++i){
173 if(clusterSizes(i) == 0){
174 std::size_t elem =
uni(ell-1);
175 --clusterSizes(clusterMembership(elem));
176 clusterMembership(elem)=i;
185 expansion.
offset() = -ckck;
186 expansion.
alpha().clear();
187 for(std::size_t i = 0; i != ell; ++i){
188 std::size_t c = clusterMembership(i);
189 expansion.
alpha()(i,c) = 2.0 / clusterSizes(c);