#include "IndexIVFPQ.h" #include "IndexIVF.h" #include "clustering.h" #include #include #include #include #include IndexIVFPQ::IndexIVFPQ(int d, int nbucket, int m): d(d), m(m), nbucket(nbucket), router(d), pq(d, m){ codes.resize(nbucket); ids.resize(nbucket); }; void IndexIVFPQ::train(int n, const float *x, bool subsampling, int seed){ if(trained)return; coarse_centroids.resize(nbucket*d); int maxtrain = 150000; if(n>maxtrain && subsampling){ std::mt19937 gen(seed); std::uniform_int_distributiondis(0,n-1); std::vector sample_buffer(maxtrain * d); for(int i=0; iresiduals(n*d); std::vector distances(n); std::vector labels(n); router.search(n,x,1,distances.data(), labels.data()); for(int i = 0;iresiduals(n*d); std::vector distances(n); std::vector labels(n); router.search(n,x,1,distances.data(), labels.data()); std::cout << "expected centroids size: " << nbucket * d << std::endl; std::cout << "actual centroids size: " << coarse_centroids.size() << std::endl; std::cout << "codes vector size: " << codes.size() << std::endl; for(int i = 0;i zipvect(m); pq.encode(residuals.data()+(i*d), zipvect.data()); codes[drawerid].insert(codes[drawerid].end(), zipvect.begin(), zipvect.end()); ids[drawerid].push_back(xids[i]); } } void IndexIVFPQ::search(int n, const float *query, int k, int nprobe, float* distances, int64_t* labels){ std::vector assign(n*nprobe); std::vector coarse_distances(n*nprobe); router.search(n,query, nprobe, coarse_distances.data(),assign.data()); for(int i = 0; i> max_heap; std::vector query_residual(d); for(int p=0; p distance_table(m*256); pq.compute_distance_table(query_residual.data(), distance_table.data()); for(int v = 0; v=0; c--){ subdist[c] = max_heap.top().first; sublbs[c] = max_heap.top().second; max_heap.pop(); } for(int fod = count; fod