| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | #ifndef FLANN_LSH_INDEX_H_ |
| | #define FLANN_LSH_INDEX_H_ |
| |
|
| | #include <algorithm> |
| | #include <cassert> |
| | #include <cstring> |
| | #include <map> |
| | #include <vector> |
| |
|
| | #include "FLANN/general.h" |
| | #include "FLANN/algorithms/nn_index.h" |
| | #include "FLANN/util/matrix.h" |
| | #include "FLANN/util/result_set.h" |
| | #include "FLANN/util/heap.h" |
| | #include "FLANN/util/lsh_table.h" |
| | #include "FLANN/util/allocator.h" |
| | #include "FLANN/util/random.h" |
| | #include "FLANN/util/saving.h" |
| |
|
| | namespace flann |
| | { |
| |
|
| | struct LshIndexParams : public IndexParams |
| | { |
| | LshIndexParams(unsigned int table_number = 12, unsigned int key_size = 20, unsigned int multi_probe_level = 2) |
| | { |
| | (* this)["algorithm"] = FLANN_INDEX_LSH; |
| | |
| | (*this)["table_number"] = table_number; |
| | |
| | (*this)["key_size"] = key_size; |
| | |
| | (*this)["multi_probe_level"] = multi_probe_level; |
| | } |
| | }; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | template<typename Distance> |
| | class LshIndex : public NNIndex<Distance> |
| | { |
| | public: |
| | typedef typename Distance::ElementType ElementType; |
| | typedef typename Distance::ResultType DistanceType; |
| |
|
| | typedef NNIndex<Distance> BaseClass; |
| |
|
| | |
| | |
| | |
| | |
| | LshIndex(const IndexParams& params = LshIndexParams(), Distance d = Distance()) : |
| | BaseClass(params, d) |
| | { |
| | table_number_ = get_param<unsigned int>(index_params_,"table_number",12); |
| | key_size_ = get_param<unsigned int>(index_params_,"key_size",20); |
| | multi_probe_level_ = get_param<unsigned int>(index_params_,"multi_probe_level",2); |
| |
|
| | fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_); |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | LshIndex(const Matrix<ElementType>& input_data, const IndexParams& params = LshIndexParams(), Distance d = Distance()) : |
| | BaseClass(params, d) |
| | { |
| | table_number_ = get_param<unsigned int>(index_params_,"table_number",12); |
| | key_size_ = get_param<unsigned int>(index_params_,"key_size",20); |
| | multi_probe_level_ = get_param<unsigned int>(index_params_,"multi_probe_level",2); |
| |
|
| | fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_); |
| |
|
| | setDataset(input_data); |
| | } |
| |
|
| | LshIndex(const LshIndex& other) : BaseClass(other), |
| | tables_(other.tables_), |
| | table_number_(other.table_number_), |
| | key_size_(other.key_size_), |
| | multi_probe_level_(other.multi_probe_level_), |
| | xor_masks_(other.xor_masks_) |
| | { |
| | } |
| |
|
| | LshIndex& operator=(LshIndex other) |
| | { |
| | this->swap(other); |
| | return *this; |
| | } |
| |
|
| | virtual ~LshIndex() |
| | { |
| | freeIndex(); |
| | } |
| |
|
| |
|
| | BaseClass* clone() const |
| | { |
| | return new LshIndex(*this); |
| | } |
| |
|
| | using BaseClass::buildIndex; |
| |
|
| | void addPoints(const Matrix<ElementType>& points, float rebuild_threshold = 2) |
| | { |
| | assert(points.cols==veclen_); |
| | size_t old_size = size_; |
| |
|
| | extendDataset(points); |
| |
|
| | if (rebuild_threshold>1 && size_at_build_*rebuild_threshold<size_) { |
| | buildIndex(); |
| | } |
| | else { |
| | for (unsigned int i = 0; i < table_number_; ++i) { |
| | lsh::LshTable<ElementType>& table = tables_[i]; |
| | for (size_t i=old_size;i<size_;++i) { |
| | table.add(i, points_[i]); |
| | } |
| | } |
| | } |
| | } |
| |
|
| |
|
| | flann_algorithm_t getType() const |
| | { |
| | return FLANN_INDEX_LSH; |
| | } |
| |
|
| |
|
| | template<typename Archive> |
| | void serialize(Archive& ar) |
| | { |
| | ar.setObject(this); |
| |
|
| | ar & *static_cast<NNIndex<Distance>*>(this); |
| |
|
| | ar & table_number_; |
| | ar & key_size_; |
| | ar & multi_probe_level_; |
| |
|
| | ar & xor_masks_; |
| | ar & tables_; |
| |
|
| | if (Archive::is_loading::value) { |
| | index_params_["algorithm"] = getType(); |
| | index_params_["table_number"] = table_number_; |
| | index_params_["key_size"] = key_size_; |
| | index_params_["multi_probe_level"] = multi_probe_level_; |
| | } |
| | } |
| |
|
| | void saveIndex(FILE* stream) |
| | { |
| | serialization::SaveArchive sa(stream); |
| | sa & *this; |
| | } |
| |
|
| | void loadIndex(FILE* stream) |
| | { |
| | serialization::LoadArchive la(stream); |
| | la & *this; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | int usedMemory() const |
| | { |
| | return size_ * sizeof(int); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int knnSearch(const Matrix<ElementType>& queries, |
| | Matrix<size_t>& indices, |
| | Matrix<DistanceType>& dists, |
| | size_t knn, |
| | const SearchParams& params) const |
| | { |
| | assert(queries.cols == veclen_); |
| | assert(indices.rows >= queries.rows); |
| | assert(dists.rows >= queries.rows); |
| | assert(indices.cols >= knn); |
| | assert(dists.cols >= knn); |
| |
|
| | int count = 0; |
| | if (params.use_heap==FLANN_True) { |
| | #pragma omp parallel num_threads(params.cores) |
| | { |
| | KNNUniqueResultSet<DistanceType> resultSet(knn); |
| | #pragma omp for schedule(dynamic) reduction(+:count) |
| | for (int i = 0; i < (int)queries.rows; i++) { |
| | resultSet.clear(); |
| | findNeighbors(resultSet, queries[i], params); |
| | size_t n = std::min(resultSet.size(), knn); |
| | resultSet.copy(indices[i], dists[i], n, params.sorted); |
| | indices_to_ids(indices[i], indices[i], n); |
| | count += n; |
| | } |
| | } |
| | } |
| | else { |
| | #pragma omp parallel num_threads(params.cores) |
| | { |
| | KNNResultSet<DistanceType> resultSet(knn); |
| | #pragma omp for schedule(dynamic) reduction(+:count) |
| | for (int i = 0; i < (int)queries.rows; i++) { |
| | resultSet.clear(); |
| | findNeighbors(resultSet, queries[i], params); |
| | size_t n = std::min(resultSet.size(), knn); |
| | resultSet.copy(indices[i], dists[i], n, params.sorted); |
| | indices_to_ids(indices[i], indices[i], n); |
| | count += n; |
| | } |
| | } |
| | } |
| |
|
| | return count; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int knnSearch(const Matrix<ElementType>& queries, |
| | std::vector< std::vector<size_t> >& indices, |
| | std::vector<std::vector<DistanceType> >& dists, |
| | size_t knn, |
| | const SearchParams& params) const |
| | { |
| | assert(queries.cols == veclen_); |
| | if (indices.size() < queries.rows ) indices.resize(queries.rows); |
| | if (dists.size() < queries.rows ) dists.resize(queries.rows); |
| |
|
| | int count = 0; |
| | if (params.use_heap==FLANN_True) { |
| | #pragma omp parallel num_threads(params.cores) |
| | { |
| | KNNUniqueResultSet<DistanceType> resultSet(knn); |
| | #pragma omp for schedule(dynamic) reduction(+:count) |
| | for (int i = 0; i < (int)queries.rows; i++) { |
| | resultSet.clear(); |
| | findNeighbors(resultSet, queries[i], params); |
| | size_t n = std::min(resultSet.size(), knn); |
| | indices[i].resize(n); |
| | dists[i].resize(n); |
| | if (n > 0) { |
| | resultSet.copy(&indices[i][0], &dists[i][0], n, params.sorted); |
| | indices_to_ids(&indices[i][0], &indices[i][0], n); |
| | } |
| | count += n; |
| | } |
| | } |
| | } |
| | else { |
| | #pragma omp parallel num_threads(params.cores) |
| | { |
| | KNNResultSet<DistanceType> resultSet(knn); |
| | #pragma omp for schedule(dynamic) reduction(+:count) |
| | for (int i = 0; i < (int)queries.rows; i++) { |
| | resultSet.clear(); |
| | findNeighbors(resultSet, queries[i], params); |
| | size_t n = std::min(resultSet.size(), knn); |
| | indices[i].resize(n); |
| | dists[i].resize(n); |
| | if (n > 0) { |
| | resultSet.copy(&indices[i][0], &dists[i][0], n, params.sorted); |
| | indices_to_ids(&indices[i][0], &indices[i][0], n); |
| | } |
| | count += n; |
| | } |
| | } |
| | } |
| |
|
| | return count; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& ) const |
| | { |
| | getNeighbors(vec, result); |
| | } |
| |
|
| | protected: |
| |
|
| | |
| | |
| | |
| | void buildIndexImpl() |
| | { |
| | tables_.resize(table_number_); |
| | std::vector<std::pair<size_t,ElementType*> > features; |
| | features.reserve(points_.size()); |
| | for (size_t i=0;i<points_.size();++i) { |
| | features.push_back(std::make_pair(i, points_[i])); |
| | } |
| | for (unsigned int i = 0; i < table_number_; ++i) { |
| | lsh::LshTable<ElementType>& table = tables_[i]; |
| | table = lsh::LshTable<ElementType>(veclen_, key_size_); |
| |
|
| | |
| | table.add(features); |
| | } |
| | } |
| |
|
| | void freeIndex() |
| | { |
| | |
| | } |
| |
|
| |
|
| | private: |
| | |
| | |
| | typedef std::pair<float, unsigned int> ScoreIndexPair; |
| | struct SortScoreIndexPairOnSecond |
| | { |
| | bool operator()(const ScoreIndexPair& left, const ScoreIndexPair& right) const |
| | { |
| | return left.second < right.second; |
| | } |
| | }; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | void fill_xor_mask(lsh::BucketKey key, int lowest_index, unsigned int level, |
| | std::vector<lsh::BucketKey>& xor_masks) |
| | { |
| | xor_masks.push_back(key); |
| | if (level == 0) return; |
| | for (int index = lowest_index - 1; index >= 0; --index) { |
| | |
| | lsh::BucketKey new_key = key | (lsh::BucketKey(1) << index); |
| | fill_xor_mask(new_key, index, level - 1, xor_masks); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void getNeighbors(const ElementType* vec, bool do_radius, float radius, bool do_k, unsigned int k_nn, |
| | float& checked_average) |
| | { |
| | static std::vector<ScoreIndexPair> score_index_heap; |
| |
|
| | if (do_k) { |
| | unsigned int worst_score = std::numeric_limits<unsigned int>::max(); |
| | typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin(); |
| | typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end(); |
| | for (; table != table_end; ++table) { |
| | size_t key = table->getKey(vec); |
| | std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin(); |
| | std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end(); |
| | for (; xor_mask != xor_mask_end; ++xor_mask) { |
| | size_t sub_key = key ^ (*xor_mask); |
| | const lsh::Bucket* bucket = table->getBucketFromKey(sub_key); |
| | if (bucket == 0) continue; |
| |
|
| | |
| | std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin(); |
| | std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end(); |
| | DistanceType hamming_distance; |
| |
|
| | |
| | for (; training_index < last_training_index; ++training_index) { |
| | if (removed_ && removed_points_.test(*training_index)) continue; |
| | hamming_distance = distance_(vec, points_[*training_index].point, veclen_); |
| |
|
| | if (hamming_distance < worst_score) { |
| | |
| | score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index)); |
| | std::push_heap(score_index_heap.begin(), score_index_heap.end()); |
| |
|
| | if (score_index_heap.size() > (unsigned int)k_nn) { |
| | |
| | std::pop_heap(score_index_heap.begin(), score_index_heap.end()); |
| | score_index_heap.pop_back(); |
| | |
| | worst_score = score_index_heap.front().first; |
| | } |
| | } |
| | } |
| | } |
| | } |
| | } |
| | else { |
| | typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin(); |
| | typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end(); |
| | for (; table != table_end; ++table) { |
| | size_t key = table->getKey(vec); |
| | std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin(); |
| | std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end(); |
| | for (; xor_mask != xor_mask_end; ++xor_mask) { |
| | size_t sub_key = key ^ (*xor_mask); |
| | const lsh::Bucket* bucket = table->getBucketFromKey(sub_key); |
| | if (bucket == 0) continue; |
| |
|
| | |
| | std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin(); |
| | std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end(); |
| | DistanceType hamming_distance; |
| |
|
| | |
| | for (; training_index < last_training_index; ++training_index) { |
| | if (removed_ && removed_points_.test(*training_index)) continue; |
| | |
| | hamming_distance = distance_(vec, points_[*training_index].point, veclen_); |
| | if (hamming_distance < radius) score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index)); |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | void getNeighbors(const ElementType* vec, ResultSet<DistanceType>& result) const |
| | { |
| | typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin(); |
| | typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end(); |
| | for (; table != table_end; ++table) { |
| | size_t key = table->getKey(vec); |
| | std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin(); |
| | std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end(); |
| | for (; xor_mask != xor_mask_end; ++xor_mask) { |
| | size_t sub_key = key ^ (*xor_mask); |
| | const lsh::Bucket* bucket = table->getBucketFromKey(sub_key); |
| | if (bucket == 0) continue; |
| |
|
| | |
| | std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin(); |
| | std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end(); |
| | DistanceType hamming_distance; |
| |
|
| | |
| | for (; training_index < last_training_index; ++training_index) { |
| | if (removed_ && removed_points_.test(*training_index)) continue; |
| | |
| | hamming_distance = distance_(vec, points_[*training_index], veclen_); |
| | result.addPoint(hamming_distance, *training_index); |
| | } |
| | } |
| | } |
| | } |
| |
|
| |
|
| | void swap(LshIndex& other) |
| | { |
| | BaseClass::swap(other); |
| | std::swap(tables_, other.tables_); |
| | std::swap(size_at_build_, other.size_at_build_); |
| | std::swap(table_number_, other.table_number_); |
| | std::swap(key_size_, other.key_size_); |
| | std::swap(multi_probe_level_, other.multi_probe_level_); |
| | std::swap(xor_masks_, other.xor_masks_); |
| | } |
| |
|
| | |
| | std::vector<lsh::LshTable<ElementType> > tables_; |
| |
|
| | |
| | unsigned int table_number_; |
| | |
| | unsigned int key_size_; |
| | |
| | unsigned int multi_probe_level_; |
| |
|
| | |
| | std::vector<lsh::BucketKey> xor_masks_; |
| |
|
| | USING_BASECLASS_SYMBOLS |
| | }; |
| | } |
| |
|
| | #endif |
| |
|