Spaces:
Running
Running
File size: 1,224 Bytes
e87a50a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | #include<vector>
#include "clustering.h"
#include "IndexFlat.h"
#include <random>
#include <cstring>
#include <cmath>
void kmean_clustering(int d, int n, int k, const float *x, float *centroids, int seed){
std::mt19937 gen(seed);
std::uniform_int_distribution<int> distr(0, n - 1);
for (int i = 0; i < k; i++) {
int rand_idx = distr(gen);
std::memcpy(centroids + (i * d), x + (rand_idx * d), d * sizeof(float));
}
int niter = 15;
std::vector<int> assign(n);
std::vector<float> distances(n);
for(int iter = 0; iter<niter; iter++){
IndexFlatL2 index(d);
index.add(k,centroids);
index.search(n,x,1,distances.data(), assign.data());
std::vector<float> newcentroid(k*d,0.0);
std::vector<int> counts(k,0);
for(int i = 0; i<n; i++){
int c = assign[i];
counts[c]+=1;
for(int m =0; m<d; m++){
newcentroid[c*d+m] += x[i*d+m];
}
}
for(int c = 0; c<k; c++){
if (counts[c]>0){
for(int m = 0; m<d; m++){
centroids[c*d+m] = newcentroid[c*d+m]/counts[c];
}
}
}
}
} |