| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifndef FLANN_KMEANS_INDEX_H_ |
| | #define FLANN_KMEANS_INDEX_H_ |
| |
|
| | #include <algorithm> |
| | #include <string> |
| | #include <map> |
| | #include <cassert> |
| | #include <limits> |
| | #include <cmath> |
| |
|
| | #include "FLANN/general.h" |
| | #include "FLANN/algorithms/nn_index.h" |
| | #include "FLANN/algorithms/dist.h" |
| | #include <FLANN/algorithms/center_chooser.h> |
| | #include "FLANN/util/matrix.h" |
| | #include "FLANN/util/result_set.h" |
| | #include "FLANN/util/heap.h" |
| | #include "FLANN/util/allocator.h" |
| | #include "FLANN/util/random.h" |
| | #include "FLANN/util/saving.h" |
| | #include "FLANN/util/logger.h" |
| |
|
| |
|
| |
|
| | namespace flann |
| | { |
| |
|
| | struct KMeansIndexParams : public IndexParams |
| | { |
| | KMeansIndexParams(int branching = 32, int iterations = 11, |
| | flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM, float cb_index = 0.2 ) |
| | { |
| | (*this)["algorithm"] = FLANN_INDEX_KMEANS; |
| | |
| | (*this)["branching"] = branching; |
| | |
| | (*this)["iterations"] = iterations; |
| | |
| | (*this)["centers_init"] = centers_init; |
| | |
| | (*this)["cb_index"] = cb_index; |
| | } |
| | }; |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | template <typename Distance> |
| | class KMeansIndex : public NNIndex<Distance> |
| | { |
| | public: |
| | typedef typename Distance::ElementType ElementType; |
| | typedef typename Distance::ResultType DistanceType; |
| |
|
| | typedef NNIndex<Distance> BaseClass; |
| |
|
| | typedef bool needs_vector_space_distance; |
| |
|
| |
|
| |
|
| | flann_algorithm_t getType() const |
| | { |
| | return FLANN_INDEX_KMEANS; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | KMeansIndex(const Matrix<ElementType>& inputData, const IndexParams& params = KMeansIndexParams(), |
| | Distance d = Distance()) |
| | : BaseClass(params,d), root_(NULL), memoryCounter_(0) |
| | { |
| | branching_ = get_param(params,"branching",32); |
| | iterations_ = get_param(params,"iterations",11); |
| | if (iterations_<0) { |
| | iterations_ = (std::numeric_limits<int>::max)(); |
| | } |
| | centers_init_ = get_param(params,"centers_init",FLANN_CENTERS_RANDOM); |
| | cb_index_ = get_param(params,"cb_index",0.4f); |
| |
|
| | initCenterChooser(); |
| | setDataset(inputData); |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | KMeansIndex(const IndexParams& params = KMeansIndexParams(), Distance d = Distance()) |
| | : BaseClass(params, d), root_(NULL), memoryCounter_(0) |
| | { |
| | branching_ = get_param(params,"branching",32); |
| | iterations_ = get_param(params,"iterations",11); |
| | if (iterations_<0) { |
| | iterations_ = (std::numeric_limits<int>::max)(); |
| | } |
| | centers_init_ = get_param(params,"centers_init",FLANN_CENTERS_RANDOM); |
| | cb_index_ = get_param(params,"cb_index",0.4f); |
| |
|
| | initCenterChooser(); |
| | } |
| |
|
| |
|
| | KMeansIndex(const KMeansIndex& other) : BaseClass(other), |
| | branching_(other.branching_), |
| | iterations_(other.iterations_), |
| | centers_init_(other.centers_init_), |
| | cb_index_(other.cb_index_), |
| | memoryCounter_(other.memoryCounter_) |
| | { |
| | initCenterChooser(); |
| |
|
| | copyTree(root_, other.root_); |
| | } |
| |
|
| | KMeansIndex& operator=(KMeansIndex other) |
| | { |
| | this->swap(other); |
| | return *this; |
| | } |
| |
|
| |
|
| | void initCenterChooser() |
| | { |
| | switch(centers_init_) { |
| | case FLANN_CENTERS_RANDOM: |
| | chooseCenters_ = new RandomCenterChooser<Distance>(distance_, points_); |
| | break; |
| | case FLANN_CENTERS_GONZALES: |
| | chooseCenters_ = new GonzalesCenterChooser<Distance>(distance_, points_); |
| | break; |
| | case FLANN_CENTERS_KMEANSPP: |
| | chooseCenters_ = new KMeansppCenterChooser<Distance>(distance_, points_); |
| | break; |
| | default: |
| | throw FLANNException("Unknown algorithm for choosing initial centers."); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | virtual ~KMeansIndex() |
| | { |
| | delete chooseCenters_; |
| | freeIndex(); |
| | } |
| |
|
| | BaseClass* clone() const |
| | { |
| | return new KMeansIndex(*this); |
| | } |
| |
|
| |
|
| | void set_cb_index( float index) |
| | { |
| | cb_index_ = index; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | int usedMemory() const |
| | { |
| | return pool_.usedMemory+pool_.wastedMemory+memoryCounter_; |
| | } |
| |
|
| | using BaseClass::buildIndex; |
| |
|
| | void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2) |
| | { |
| | assert(points.cols==veclen_); |
| | size_t old_size = size_; |
| |
|
| | extendDataset(points); |
| |
|
| | if (rebuild_threshold>1 && size_at_build_*rebuild_threshold<size_) { |
| | buildIndex(); |
| | } |
| | else { |
| | for (size_t i=0;i<points.rows;++i) { |
| | DistanceType dist = distance_(root_->pivot, points[i], veclen_); |
| | addPointToTree(root_, old_size + i, dist); |
| | } |
| | } |
| | } |
| |
|
| | template<typename Archive> |
| | void serialize(Archive& ar) |
| | { |
| | ar.setObject(this); |
| |
|
| | ar & *static_cast<NNIndex<Distance>*>(this); |
| |
|
| | ar & branching_; |
| | ar & iterations_; |
| | ar & memoryCounter_; |
| | ar & cb_index_; |
| | ar & centers_init_; |
| |
|
| | if (Archive::is_loading::value) { |
| | root_ = new(pool_) Node(); |
| | } |
| | ar & *root_; |
| |
|
| | if (Archive::is_loading::value) { |
| | index_params_["algorithm"] = getType(); |
| | index_params_["branching"] = branching_; |
| | index_params_["iterations"] = iterations_; |
| | index_params_["centers_init"] = centers_init_; |
| | index_params_["cb_index"] = cb_index_; |
| | } |
| | } |
| |
|
| | void saveIndex(FILE* stream) |
| | { |
| | serialization::SaveArchive sa(stream); |
| | sa & *this; |
| | } |
| |
|
| | void loadIndex(FILE* stream) |
| | { |
| | freeIndex(); |
| | serialization::LoadArchive la(stream); |
| | la & *this; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const |
| | { |
| | if (removed_) { |
| | findNeighborsWithRemoved<true>(result, vec, searchParams); |
| | } |
| | else { |
| | findNeighborsWithRemoved<false>(result, vec, searchParams); |
| | } |
| |
|
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int getClusterCenters(Matrix<DistanceType>& centers) |
| | { |
| | int numClusters = centers.rows; |
| | if (numClusters<1) { |
| | throw FLANNException("Number of clusters must be at least 1"); |
| | } |
| |
|
| | DistanceType variance; |
| | std::vector<NodePtr> clusters(numClusters); |
| |
|
| | int clusterCount = getMinVarianceClusters(root_, clusters, numClusters, variance); |
| |
|
| | Logger::info("Clusters requested: %d, returning %d\n",numClusters, clusterCount); |
| |
|
| | for (int i=0; i<clusterCount; ++i) { |
| | DistanceType* center = clusters[i]->pivot; |
| | for (size_t j=0; j<veclen_; ++j) { |
| | centers[i][j] = center[j]; |
| | } |
| | } |
| |
|
| | return clusterCount; |
| | } |
| |
|
| | protected: |
| | |
| | |
| | |
| | void buildIndexImpl() |
| | { |
| | chooseCenters_->setDataSize(veclen_); |
| |
|
| | if (branching_<2) { |
| | throw FLANNException("Branching factor must be at least 2"); |
| | } |
| |
|
| | std::vector<int> indices(size_); |
| | for (size_t i=0; i<size_; ++i) { |
| | indices[i] = int(i); |
| | } |
| |
|
| | root_ = new(pool_) Node(); |
| | computeNodeStatistics(root_, indices); |
| | computeClustering(root_, &indices[0], (int)size_, branching_); |
| | } |
| |
|
| | private: |
| |
|
| | struct PointInfo |
| | { |
| | size_t index; |
| | ElementType* point; |
| | private: |
| | template<typename Archive> |
| | void serialize(Archive& ar) |
| | { |
| | typedef KMeansIndex<Distance> Index; |
| | Index* obj = static_cast<Index*>(ar.getObject()); |
| |
|
| | ar & index; |
| | |
| |
|
| | if (Archive::is_loading::value) point = obj->points_[index]; |
| | } |
| | friend struct serialization::access; |
| | }; |
| |
|
| | |
| | |
| | |
| | struct Node |
| | { |
| | |
| | |
| | |
| | DistanceType* pivot=NULL; |
| | |
| | |
| | |
| | DistanceType radius; |
| | |
| | |
| | |
| | DistanceType variance; |
| | |
| | |
| | |
| | int size; |
| | |
| | |
| | |
| | std::vector<Node*> childs; |
| | |
| | |
| | |
| | std::vector<PointInfo> points; |
| | |
| | |
| | |
| | |
| |
|
| | ~Node() |
| | { |
| | delete[] pivot; |
| | if (!childs.empty()) { |
| | for (size_t i=0; i<childs.size(); ++i) { |
| | childs[i]->~Node(); |
| | } |
| | } |
| | } |
| |
|
| | template<typename Archive> |
| | void serialize(Archive& ar) |
| | { |
| | typedef KMeansIndex<Distance> Index; |
| | Index* obj = static_cast<Index*>(ar.getObject()); |
| |
|
| | if (Archive::is_loading::value) { |
| | delete[] pivot; |
| | pivot = new DistanceType[obj->veclen_]; |
| | } |
| | ar & serialization::make_binary_object(pivot, obj->veclen_*sizeof(DistanceType)); |
| | ar & radius; |
| | ar & variance; |
| | ar & size; |
| |
|
| | size_t childs_size; |
| | if (Archive::is_saving::value) { |
| | childs_size = childs.size(); |
| | } |
| | ar & childs_size; |
| |
|
| | if (childs_size==0) { |
| | ar & points; |
| | } |
| | else { |
| | if (Archive::is_loading::value) { |
| | childs.resize(childs_size); |
| | } |
| | for (size_t i=0;i<childs_size;++i) { |
| | if (Archive::is_loading::value) { |
| | childs[i] = new(obj->pool_) Node(); |
| | } |
| | ar & *childs[i]; |
| | } |
| | } |
| | } |
| | friend struct serialization::access; |
| | }; |
| | typedef Node* NodePtr; |
| |
|
| | |
| | |
| | |
| | typedef BranchStruct<NodePtr, DistanceType> BranchSt; |
| |
|
| |
|
| | |
| | |
| | |
| | void freeIndex() |
| | { |
| | if (root_) root_->~Node(); |
| | root_ = NULL; |
| | pool_.free(); |
| | } |
| |
|
| | void copyTree(NodePtr& dst, const NodePtr& src) |
| | { |
| | dst = new(pool_) Node(); |
| | dst->pivot = new DistanceType[veclen_]; |
| | std::copy(src->pivot, src->pivot+veclen_, dst->pivot); |
| | dst->radius = src->radius; |
| | dst->variance = src->variance; |
| | dst->size = src->size; |
| |
|
| | if (src->childs.size()==0) { |
| | dst->points = src->points; |
| | } |
| | else { |
| | dst->childs.resize(src->childs.size()); |
| | for (size_t i=0;i<src->childs.size();++i) { |
| | copyTree(dst->childs[i], src->childs[i]); |
| | } |
| | } |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void computeNodeStatistics(NodePtr node, const std::vector<int>& indices) |
| | { |
| | size_t size = indices.size(); |
| |
|
| | DistanceType* mean = new DistanceType[veclen_]; |
| | memoryCounter_ += int(veclen_*sizeof(DistanceType)); |
| | memset(mean,0,veclen_*sizeof(DistanceType)); |
| |
|
| | for (size_t i=0; i<size; ++i) { |
| | ElementType* vec = points_[indices[i]]; |
| | for (size_t j=0; j<veclen_; ++j) { |
| | mean[j] += vec[j]; |
| | } |
| | } |
| | DistanceType div_factor = DistanceType(1)/size; |
| | for (size_t j=0; j<veclen_; ++j) { |
| | mean[j] *= div_factor; |
| | } |
| |
|
| | DistanceType radius = 0; |
| | DistanceType variance = 0; |
| | for (size_t i=0; i<size; ++i) { |
| | DistanceType dist = distance_(mean, points_[indices[i]], veclen_); |
| | if (dist>radius) { |
| | radius = dist; |
| | } |
| | variance += dist; |
| | } |
| | variance /= size; |
| |
|
| | node->variance = variance; |
| | node->radius = radius; |
| | delete[] node->pivot; |
| | node->pivot = mean; |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void computeClustering(NodePtr node, int* indices, int indices_length, int branching) |
| | { |
| | node->size = indices_length; |
| |
|
| | if (indices_length < branching) { |
| | node->points.resize(indices_length); |
| | for (int i=0;i<indices_length;++i) { |
| | node->points[i].index = indices[i]; |
| | node->points[i].point = points_[indices[i]]; |
| | } |
| | node->childs.clear(); |
| | return; |
| | } |
| |
|
| | std::vector<int> centers_idx(branching); |
| | int centers_length; |
| | (*chooseCenters_)(branching, indices, indices_length, ¢ers_idx[0], centers_length); |
| |
|
| | if (centers_length<branching) { |
| | node->points.resize(indices_length); |
| | for (int i=0;i<indices_length;++i) { |
| | node->points[i].index = indices[i]; |
| | node->points[i].point = points_[indices[i]]; |
| | } |
| | node->childs.clear(); |
| | return; |
| | } |
| |
|
| |
|
| | Matrix<double> dcenters(new double[branching*veclen_],branching,veclen_); |
| | for (int i=0; i<centers_length; ++i) { |
| | ElementType* vec = points_[centers_idx[i]]; |
| | for (size_t k=0; k<veclen_; ++k) { |
| | dcenters[i][k] = double(vec[k]); |
| | } |
| | } |
| |
|
| | std::vector<DistanceType> radiuses(branching,0); |
| | std::vector<int> count(branching,0); |
| |
|
| | |
| | std::vector<int> belongs_to(indices_length); |
| | for (int i=0; i<indices_length; ++i) { |
| |
|
| | DistanceType sq_dist = distance_(points_[indices[i]], dcenters[0], veclen_); |
| | belongs_to[i] = 0; |
| | for (int j=1; j<branching; ++j) { |
| | DistanceType new_sq_dist = distance_(points_[indices[i]], dcenters[j], veclen_); |
| | if (sq_dist>new_sq_dist) { |
| | belongs_to[i] = j; |
| | sq_dist = new_sq_dist; |
| | } |
| | } |
| | if (sq_dist>radiuses[belongs_to[i]]) { |
| | radiuses[belongs_to[i]] = sq_dist; |
| | } |
| | count[belongs_to[i]]++; |
| | } |
| |
|
| | bool converged = false; |
| | int iteration = 0; |
| | while (!converged && iteration<iterations_) { |
| | converged = true; |
| | iteration++; |
| |
|
| | |
| | for (int i=0; i<branching; ++i) { |
| | memset(dcenters[i],0,sizeof(double)*veclen_); |
| | radiuses[i] = 0; |
| | } |
| | for (int i=0; i<indices_length; ++i) { |
| | ElementType* vec = points_[indices[i]]; |
| | double* center = dcenters[belongs_to[i]]; |
| | for (size_t k=0; k<veclen_; ++k) { |
| | center[k] += vec[k]; |
| | } |
| | } |
| | for (int i=0; i<branching; ++i) { |
| | int cnt = count[i]; |
| | double div_factor = 1.0/cnt; |
| | for (size_t k=0; k<veclen_; ++k) { |
| | dcenters[i][k] *= div_factor; |
| | } |
| | } |
| |
|
| | |
| | for (int i=0; i<indices_length; ++i) { |
| | DistanceType sq_dist = distance_(points_[indices[i]], dcenters[0], veclen_); |
| | int new_centroid = 0; |
| | for (int j=1; j<branching; ++j) { |
| | DistanceType new_sq_dist = distance_(points_[indices[i]], dcenters[j], veclen_); |
| | if (sq_dist>new_sq_dist) { |
| | new_centroid = j; |
| | sq_dist = new_sq_dist; |
| | } |
| | } |
| | if (sq_dist>radiuses[new_centroid]) { |
| | radiuses[new_centroid] = sq_dist; |
| | } |
| | if (new_centroid != belongs_to[i]) { |
| | count[belongs_to[i]]--; |
| | count[new_centroid]++; |
| | belongs_to[i] = new_centroid; |
| |
|
| | converged = false; |
| | } |
| | } |
| |
|
| | for (int i=0; i<branching; ++i) { |
| | |
| | |
| | if (count[i]==0) { |
| | int j = (i+1)%branching; |
| | while (count[j]<=1) { |
| | j = (j+1)%branching; |
| | } |
| |
|
| | for (int k=0; k<indices_length; ++k) { |
| | if (belongs_to[k]==j) { |
| | belongs_to[k] = i; |
| | count[j]--; |
| | count[i]++; |
| | break; |
| | } |
| | } |
| | converged = false; |
| | } |
| | } |
| |
|
| | } |
| |
|
| | std::vector<DistanceType*> centers(branching); |
| |
|
| | for (int i=0; i<branching; ++i) { |
| | centers[i] = new DistanceType[veclen_]; |
| | memoryCounter_ += veclen_*sizeof(DistanceType); |
| | for (size_t k=0; k<veclen_; ++k) { |
| | centers[i][k] = (DistanceType)dcenters[i][k]; |
| | } |
| | } |
| |
|
| |
|
| | |
| | node->childs.resize(branching); |
| | int start = 0; |
| | int end = start; |
| | for (int c=0; c<branching; ++c) { |
| | int s = count[c]; |
| |
|
| | DistanceType variance = 0; |
| | for (int i=0; i<indices_length; ++i) { |
| | if (belongs_to[i]==c) { |
| | variance += distance_(centers[c], points_[indices[i]], veclen_); |
| | std::swap(indices[i],indices[end]); |
| | std::swap(belongs_to[i],belongs_to[end]); |
| | end++; |
| | } |
| | } |
| | variance /= s; |
| |
|
| | node->childs[c] = new(pool_) Node(); |
| | node->childs[c]->radius = radiuses[c]; |
| | node->childs[c]->pivot = centers[c]; |
| | node->childs[c]->variance = variance; |
| | computeClustering(node->childs[c],indices+start, end-start, branching); |
| | start=end; |
| | } |
| |
|
| | delete[] dcenters.ptr(); |
| | } |
| |
|
| |
|
| | template<bool with_removed> |
| | void findNeighborsWithRemoved(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) const |
| | { |
| |
|
| | int maxChecks = searchParams.checks; |
| |
|
| | if (maxChecks==FLANN_CHECKS_UNLIMITED) { |
| | findExactNN<with_removed>(root_, result, vec); |
| | } |
| | else { |
| | |
| | Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_); |
| |
|
| | int checks = 0; |
| | findNN<with_removed>(root_, result, vec, checks, maxChecks, heap); |
| |
|
| | BranchSt branch; |
| | while (heap->popMin(branch) && (checks<maxChecks || !result.full())) { |
| | NodePtr node = branch.node; |
| | findNN<with_removed>(node, result, vec, checks, maxChecks, heap); |
| | } |
| |
|
| | delete heap; |
| | } |
| |
|
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | template<bool with_removed> |
| | void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks, |
| | Heap<BranchSt>* heap) const |
| | { |
| | |
| | { |
| | DistanceType bsq = distance_(vec, node->pivot, veclen_); |
| | DistanceType rsq = node->radius; |
| | DistanceType wsq = result.worstDist(); |
| |
|
| | DistanceType val = bsq-rsq-wsq; |
| | DistanceType val2 = val*val-4*rsq*wsq; |
| |
|
| | |
| | if ((val>0)&&(val2>0)) { |
| | return; |
| | } |
| | } |
| |
|
| | if (node->childs.empty()) { |
| | if (checks>=maxChecks) { |
| | if (result.full()) return; |
| | } |
| | for (int i=0; i<node->size; ++i) { |
| | PointInfo& point_info = node->points[i]; |
| | int index = point_info.index; |
| | if (with_removed) { |
| | if (removed_points_.test(index)) continue; |
| | } |
| | DistanceType dist = distance_(point_info.point, vec, veclen_); |
| | result.addPoint(dist, index); |
| | ++checks; |
| | } |
| | } |
| | else { |
| | int closest_center = exploreNodeBranches(node, vec, heap); |
| | findNN<with_removed>(node->childs[closest_center],result,vec, checks, maxChecks, heap); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int exploreNodeBranches(NodePtr node, const ElementType* q, Heap<BranchSt>* heap) const |
| | { |
| | std::vector<DistanceType> domain_distances(branching_); |
| | int best_index = 0; |
| | domain_distances[best_index] = distance_(q, node->childs[best_index]->pivot, veclen_); |
| | for (int i=1; i<branching_; ++i) { |
| | domain_distances[i] = distance_(q, node->childs[i]->pivot, veclen_); |
| | if (domain_distances[i]<domain_distances[best_index]) { |
| | best_index = i; |
| | } |
| | } |
| |
|
| | |
| | for (int i=0; i<branching_; ++i) { |
| | if (i != best_index) { |
| | domain_distances[i] -= cb_index_*node->childs[i]->variance; |
| |
|
| | |
| | |
| | |
| | |
| | heap->insert(BranchSt(node->childs[i],domain_distances[i])); |
| | } |
| | } |
| |
|
| | return best_index; |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | template<bool with_removed> |
| | void findExactNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec) const |
| | { |
| | |
| | { |
| | DistanceType bsq = distance_(vec, node->pivot, veclen_); |
| | DistanceType rsq = node->radius; |
| | DistanceType wsq = result.worstDist(); |
| |
|
| | DistanceType val = bsq-rsq-wsq; |
| | DistanceType val2 = val*val-4*rsq*wsq; |
| |
|
| | |
| | if ((val>0)&&(val2>0)) { |
| | return; |
| | } |
| | } |
| |
|
| | if (node->childs.empty()) { |
| | for (int i=0; i<node->size; ++i) { |
| | PointInfo& point_info = node->points[i]; |
| | int index = point_info.index; |
| | if (with_removed) { |
| | if (removed_points_.test(index)) continue; |
| | } |
| | DistanceType dist = distance_(point_info.point, vec, veclen_); |
| | result.addPoint(dist, index); |
| | } |
| | } |
| | else { |
| | std::vector<int> sort_indices(branching_); |
| | getCenterOrdering(node, vec, sort_indices); |
| |
|
| | for (int i=0; i<branching_; ++i) { |
| | findExactNN<with_removed>(node->childs[sort_indices[i]],result,vec); |
| | } |
| |
|
| | } |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | void getCenterOrdering(NodePtr node, const ElementType* q, std::vector<int>& sort_indices) const |
| | { |
| | std::vector<DistanceType> domain_distances(branching_); |
| | for (int i=0; i<branching_; ++i) { |
| | DistanceType dist = distance_(q, node->childs[i]->pivot, veclen_); |
| |
|
| | int j=0; |
| | while (domain_distances[j]<dist && j<i) j++; |
| | for (int k=i; k>j; --k) { |
| | domain_distances[k] = domain_distances[k-1]; |
| | sort_indices[k] = sort_indices[k-1]; |
| | } |
| | domain_distances[j] = dist; |
| | sort_indices[j] = i; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | DistanceType getDistanceToBorder(DistanceType* p, DistanceType* c, DistanceType* q) const |
| | { |
| | DistanceType sum = 0; |
| | DistanceType sum2 = 0; |
| |
|
| | for (int i=0; i<veclen_; ++i) { |
| | DistanceType t = c[i]-p[i]; |
| | sum += t*(q[i]-(c[i]+p[i])/2); |
| | sum2 += t*t; |
| | } |
| |
|
| | return sum*sum/sum2; |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int getMinVarianceClusters(NodePtr root, std::vector<NodePtr>& clusters, int clusters_length, DistanceType& varianceValue) const |
| | { |
| | int clusterCount = 1; |
| | clusters[0] = root; |
| |
|
| | DistanceType meanVariance = root->variance*root->size; |
| |
|
| | while (clusterCount<clusters_length) { |
| | DistanceType minVariance = (std::numeric_limits<DistanceType>::max)(); |
| | int splitIndex = -1; |
| |
|
| | for (int i=0; i<clusterCount; ++i) { |
| | if (!clusters[i]->childs.empty()) { |
| |
|
| | DistanceType variance = meanVariance - clusters[i]->variance*clusters[i]->size; |
| |
|
| | for (int j=0; j<branching_; ++j) { |
| | variance += clusters[i]->childs[j]->variance*clusters[i]->childs[j]->size; |
| | } |
| | if (variance<minVariance) { |
| | minVariance = variance; |
| | splitIndex = i; |
| | } |
| | } |
| | } |
| |
|
| | if (splitIndex==-1) break; |
| | if ( (branching_+clusterCount-1) > clusters_length) break; |
| |
|
| | meanVariance = minVariance; |
| |
|
| | |
| | NodePtr toSplit = clusters[splitIndex]; |
| | clusters[splitIndex] = toSplit->childs[0]; |
| | for (int i=1; i<branching_; ++i) { |
| | clusters[clusterCount++] = toSplit->childs[i]; |
| | } |
| | } |
| |
|
| | varianceValue = meanVariance/root->size; |
| | return clusterCount; |
| | } |
| |
|
| | void addPointToTree(NodePtr node, size_t index, DistanceType dist_to_pivot) |
| | { |
| | ElementType* point = points_[index]; |
| | if (dist_to_pivot>node->radius) { |
| | node->radius = dist_to_pivot; |
| | } |
| | |
| | node->variance = (node->size*node->variance+dist_to_pivot)/(node->size+1); |
| | node->size++; |
| |
|
| | if (node->childs.empty()) { |
| | PointInfo point_info; |
| | point_info.index = index; |
| | point_info.point = point; |
| | node->points.push_back(point_info); |
| |
|
| | std::vector<int> indices(node->points.size()); |
| | for (size_t i=0;i<node->points.size();++i) { |
| | indices[i] = node->points[i].index; |
| | } |
| | computeNodeStatistics(node, indices); |
| | if (indices.size()>=size_t(branching_)) { |
| | computeClustering(node, &indices[0], indices.size(), branching_); |
| | } |
| | } |
| | else { |
| | |
| | int closest = 0; |
| | DistanceType dist = distance_(node->childs[closest]->pivot, point, veclen_); |
| | for (size_t i=1;i<size_t(branching_);++i) { |
| | DistanceType crt_dist = distance_(node->childs[i]->pivot, point, veclen_); |
| | if (crt_dist<dist) { |
| | dist = crt_dist; |
| | closest = i; |
| | } |
| | } |
| | addPointToTree(node->childs[closest], index, dist); |
| | } |
| | } |
| |
|
| |
|
| | void swap(KMeansIndex& other) |
| | { |
| | std::swap(branching_, other.branching_); |
| | std::swap(iterations_, other.iterations_); |
| | std::swap(centers_init_, other.centers_init_); |
| | std::swap(cb_index_, other.cb_index_); |
| | std::swap(root_, other.root_); |
| | std::swap(pool_, other.pool_); |
| | std::swap(memoryCounter_, other.memoryCounter_); |
| | std::swap(chooseCenters_, other.chooseCenters_); |
| | } |
| |
|
| |
|
| | private: |
| | |
| | int branching_; |
| |
|
| | |
| | int iterations_; |
| |
|
| | |
| | flann_centers_init_t centers_init_; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | float cb_index_; |
| |
|
| | |
| | |
| | |
| | NodePtr root_; |
| |
|
| | |
| | |
| | |
| | PooledAllocator pool_; |
| |
|
| | |
| | |
| | |
| | int memoryCounter_; |
| |
|
| | |
| | |
| | |
| | CenterChooser<Distance>* chooseCenters_; |
| |
|
| | USING_BASECLASS_SYMBOLS |
| | }; |
| |
|
| | } |
| |
|
| | #endif |
| |
|