| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | #ifndef FLANN_UTIL_CUDA_RESULTSET_H |
| | #define FLANN_UTIL_CUDA_RESULTSET_H |
| |
|
| | #include <FLANN/util/cuda/heap.h> |
| | #include <limits> |
| |
|
| | __device__ __forceinline__ |
| | float infinity() |
| | { |
| | return __int_as_float(0x7f800000); |
| | } |
| |
|
| | #ifndef INFINITY |
| | #define INFINITY infinity() |
| | #endif |
| |
|
| | namespace flann |
| | { |
| | namespace cuda |
| | { |
| | |
| | template< typename DistanceType > |
| | struct SingleResultSet |
| | { |
| | int bestIndex; |
| | DistanceType bestDist; |
| | const DistanceType epsError; |
| |
|
| | __device__ __host__ |
| | SingleResultSet( DistanceType eps ) : bestIndex(-1),bestDist(INFINITY), epsError(eps){ } |
| |
|
| | __device__ |
| | inline float |
| | worstDist() |
| | { |
| | return bestDist; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | insert(int index, DistanceType dist) |
| | { |
| | if( dist <= bestDist ) { |
| | bestIndex=index; |
| | bestDist=dist; |
| | } |
| | } |
| |
|
| | DistanceType* resultDist; |
| | int* resultIndex; |
| |
|
| | __device__ |
| | inline void |
| | setResultLocation( DistanceType* dists, int* index, int thread, int stride ) |
| | { |
| | resultDist=dists+thread*stride; |
| | resultIndex=index+thread*stride; |
| | if( stride != 1 ) { |
| | for( int i=1; i<stride; i++ ) { |
| | resultDist[i]=INFINITY; |
| | resultIndex[i]=-1; |
| | } |
| | } |
| | } |
| |
|
| | __device__ |
| | inline void |
| | finish() |
| | { |
| | resultDist[0]=bestDist; |
| | resultIndex[0]=bestIndex; |
| | } |
| | }; |
| |
|
| | template< typename DistanceType > |
| | struct GreaterThan |
| | { |
| | __device__ |
| | bool operator()(DistanceType a, DistanceType b) |
| | { |
| | return a>b; |
| | } |
| | }; |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | template< typename DistanceType, bool useHeap > |
| | struct KnnResultSet |
| | { |
| | int foundNeighbors; |
| | DistanceType largestHeapDist; |
| | int maxDistIndex; |
| | const int k; |
| | const bool sorted; |
| | const DistanceType epsError; |
| |
|
| |
|
| | __device__ __host__ |
| | KnnResultSet(int knn, bool sortResults, DistanceType eps) : foundNeighbors(0),largestHeapDist(INFINITY),k(knn), sorted(sortResults), epsError(eps){ } |
| |
|
| | |
| | |
| |
|
| | __device__ |
| | inline DistanceType |
| | worstDist() |
| | { |
| | return largestHeapDist; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | insert(int index, DistanceType dist) |
| | { |
| | if( foundNeighbors<k ) { |
| | resultDist[foundNeighbors]=dist; |
| | resultIndex[foundNeighbors]=index; |
| | if( foundNeighbors==k-1) { |
| | if( useHeap ) { |
| | flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>()); |
| | largestHeapDist=resultDist[0]; |
| | } |
| | else { |
| | findLargestDistIndex(); |
| | } |
| |
|
| | } |
| | foundNeighbors++; |
| | } |
| | else if( dist < largestHeapDist ) { |
| | if( useHeap ) { |
| | resultDist[0]=dist; |
| | resultIndex[0]=index; |
| | flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>()); |
| | largestHeapDist=resultDist[0]; |
| | } |
| | else { |
| | resultDist[maxDistIndex]=dist; |
| | resultIndex[maxDistIndex]=index; |
| | findLargestDistIndex(); |
| | } |
| |
|
| | } |
| | } |
| |
|
| | __device__ |
| | void |
| | findLargestDistIndex( ) |
| | { |
| | largestHeapDist=resultDist[0]; |
| | maxDistIndex=0; |
| | for( int i=1; i<k; i++ ) |
| | if( resultDist[i] > largestHeapDist ) { |
| | maxDistIndex=i; |
| | largestHeapDist=resultDist[i]; |
| | } |
| | } |
| |
|
| | float* resultDist; |
| | int* resultIndex; |
| |
|
| | __device__ |
| | inline void |
| | setResultLocation( DistanceType* dists, int* index, int thread, int stride ) |
| | { |
| | resultDist=dists+stride*thread; |
| | resultIndex=index+stride*thread; |
| | for( int i=0; i<stride; i++ ) { |
| | resultDist[i]=INFINITY; |
| | resultIndex[i]=-1; |
| | |
| | |
| | } |
| | } |
| |
|
| | __host__ __device__ |
| | inline void |
| | finish() |
| | { |
| | if( sorted ) { |
| | if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>()); |
| | for( int i=k-1; i>0; i-- ) { |
| | flann::cuda::swap( resultDist[0], resultDist[i] ); |
| | flann::cuda::swap( resultIndex[0], resultIndex[i] ); |
| | flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() ); |
| | } |
| | } |
| | } |
| | }; |
| |
|
| | template <typename DistanceType> |
| | struct CountingRadiusResultSet |
| | { |
| | int count_; |
| | DistanceType radius_sq_; |
| | int max_neighbors_; |
| |
|
| | __device__ __host__ |
| | CountingRadiusResultSet(DistanceType radius, int max_neighbors) : count_(0),radius_sq_(radius), max_neighbors_(max_neighbors){ } |
| |
|
| | __device__ |
| | inline DistanceType |
| | worstDist() |
| | { |
| | return radius_sq_; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | insert(int index, float dist) |
| | { |
| | if( dist < radius_sq_ ) { |
| | count_++; |
| | } |
| | } |
| |
|
| | int* resultIndex; |
| |
|
| | __device__ |
| | inline void |
| | setResultLocation( DistanceType* , int* count, int thread, int stride ) |
| | { |
| | resultIndex=count+thread*stride; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | finish() |
| | { |
| | if(( max_neighbors_<=0) ||( count_<=max_neighbors_) ) resultIndex[0]=count_; |
| | else resultIndex[0]=max_neighbors_; |
| | } |
| | }; |
| |
|
| | template<typename DistanceType, bool useHeap> |
| | struct RadiusKnnResultSet |
| | { |
| | int foundNeighbors; |
| | DistanceType largestHeapDist; |
| | int maxDistElem; |
| | const int k; |
| | const bool sorted; |
| | const DistanceType radius_sq_; |
| | int* segment_starts_; |
| | |
| |
|
| |
|
| | __device__ __host__ |
| | RadiusKnnResultSet(DistanceType radius, int knn, int* segment_starts, bool sortResults) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), radius_sq_(radius),segment_starts_(segment_starts) { } |
| |
|
| | |
| | |
| |
|
| | __device__ |
| | inline DistanceType |
| | worstDist() |
| | { |
| | return largestHeapDist; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | insert(int index, DistanceType dist) |
| | { |
| | if( dist < radius_sq_ ) { |
| | if( foundNeighbors<k ) { |
| | resultDist[foundNeighbors]=dist; |
| | resultIndex[foundNeighbors]=index; |
| | if(( foundNeighbors==k-1) && useHeap) { |
| | if( useHeap ) { |
| | flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>()); |
| | largestHeapDist=resultDist[0]; |
| | } |
| | else { |
| | findLargestDistIndex(); |
| | } |
| | } |
| | foundNeighbors++; |
| |
|
| | } |
| | else if( dist < largestHeapDist ) { |
| | if( useHeap ) { |
| | resultDist[0]=dist; |
| | resultIndex[0]=index; |
| | flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>()); |
| | largestHeapDist=resultDist[0]; |
| | } |
| | else { |
| | resultDist[maxDistElem]=dist; |
| | resultIndex[maxDistElem]=index; |
| | findLargestDistIndex(); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | __device__ |
| | void |
| | findLargestDistIndex( ) |
| | { |
| | largestHeapDist=resultDist[0]; |
| | maxDistElem=0; |
| | for( int i=1; i<k; i++ ) |
| | if( resultDist[i] > largestHeapDist ) { |
| | maxDistElem=i; |
| | largestHeapDist=resultDist[i]; |
| | } |
| | } |
| |
|
| |
|
| | DistanceType* resultDist; |
| | int* resultIndex; |
| |
|
| | __device__ |
| | inline void |
| | setResultLocation( DistanceType* dists, int* index, int thread, int ) |
| | { |
| | resultDist=dists+segment_starts_[thread]; |
| | resultIndex=index+segment_starts_[thread]; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | finish() |
| | { |
| | if( sorted ) { |
| | if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>()); |
| | for( int i=foundNeighbors-1; i>0; i-- ) { |
| | flann::cuda::swap( resultDist[0], resultDist[i] ); |
| | flann::cuda::swap( resultIndex[0], resultIndex[i] ); |
| | flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() ); |
| | } |
| | } |
| | } |
| | }; |
| |
|
| | |
| | template <typename DistanceType, bool useHeap> |
| | struct KnnRadiusResultSet |
| | { |
| | int foundNeighbors; |
| | DistanceType largestHeapDist; |
| | int maxDistIndex; |
| | const int k; |
| | const bool sorted; |
| | const DistanceType epsError; |
| | const DistanceType radius_sq; |
| |
|
| |
|
| | __device__ __host__ |
| | KnnRadiusResultSet(int knn, bool sortResults, DistanceType eps, DistanceType radius) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), epsError(eps),radius_sq(radius){ } |
| |
|
| | |
| | |
| |
|
| | __device__ |
| | inline DistanceType |
| | worstDist() |
| | { |
| | return largestHeapDist; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | insert(int index, DistanceType dist) |
| | { |
| | if( dist < largestHeapDist ) { |
| | if( foundNeighbors<k ) { |
| | resultDist[foundNeighbors]=dist; |
| | resultIndex[foundNeighbors]=index; |
| | if( foundNeighbors==k-1 ) { |
| | if( useHeap ) { |
| | flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>()); |
| | largestHeapDist=resultDist[0]; |
| | } |
| | else { |
| | findLargestDistIndex(); |
| | } |
| | } |
| | foundNeighbors++; |
| | } |
| | else { |
| | if( useHeap ) { |
| | resultDist[0]=dist; |
| | resultIndex[0]=index; |
| | flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>()); |
| | largestHeapDist=resultDist[0]; |
| | } |
| | else { |
| | resultDist[maxDistIndex]=dist; |
| | resultIndex[maxDistIndex]=index; |
| | findLargestDistIndex(); |
| | } |
| | } |
| | } |
| | } |
| | __device__ |
| | void |
| | findLargestDistIndex( ) |
| | { |
| | largestHeapDist=resultDist[0]; |
| | maxDistIndex=0; |
| | for( int i=1; i<k; i++ ) |
| | if( resultDist[i] > largestHeapDist ) { |
| | maxDistIndex=i; |
| | largestHeapDist=resultDist[i]; |
| | } |
| | } |
| |
|
| | DistanceType* resultDist; |
| | int* resultIndex; |
| |
|
| | __device__ |
| | inline void |
| | setResultLocation( DistanceType* dists, int* index, int thread, int stride ) |
| | { |
| | resultDist=dists+stride*thread; |
| | resultIndex=index+stride*thread; |
| | for( int i=0; i<stride; i++ ) { |
| | resultDist[i]=INFINITY; |
| | resultIndex[i]=-1; |
| | |
| | |
| | } |
| | } |
| |
|
| | __device__ |
| | inline void |
| | finish() |
| | { |
| | if( sorted ) { |
| | if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>()); |
| | for( int i=k-1; i>0; i-- ) { |
| | flann::cuda::swap( resultDist[0], resultDist[i] ); |
| | flann::cuda::swap( resultIndex[0], resultIndex[i] ); |
| | flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() ); |
| | } |
| | } |
| | } |
| | }; |
| |
|
| | |
| | |
| | |
| | template< typename DistanceType > |
| | struct RadiusResultSet |
| | { |
| | DistanceType radius_sq_; |
| | int* segment_starts_; |
| | int count_; |
| | bool sorted_; |
| |
|
| | __device__ __host__ |
| | RadiusResultSet(DistanceType radius, int* segment_starts, bool sorted) : radius_sq_(radius), segment_starts_(segment_starts), count_(0), sorted_(sorted){ } |
| |
|
| | __device__ |
| | inline DistanceType |
| | worstDist() |
| | { |
| | return radius_sq_; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | insert(int index, DistanceType dist) |
| | { |
| | if( dist < radius_sq_ ) { |
| | resultIndex[count_]=index; |
| | resultDist[count_]=dist; |
| | count_++; |
| | } |
| | } |
| |
|
| | int* resultIndex; |
| | DistanceType* resultDist; |
| |
|
| | __device__ |
| | inline void |
| | setResultLocation( DistanceType* dists, int* index, int thread, int ) |
| | { |
| | resultIndex=index+segment_starts_[thread]; |
| | resultDist=dists+segment_starts_[thread]; |
| | } |
| |
|
| | __device__ |
| | inline void |
| | finish() |
| | { |
| | if( sorted_ ) { |
| | flann::cuda::heap::make_heap( resultDist,resultIndex, count_, GreaterThan<DistanceType>() ); |
| | for( int i=count_-1; i>0; i-- ) { |
| | flann::cuda::swap( resultDist[0], resultDist[i] ); |
| | flann::cuda::swap( resultIndex[0], resultIndex[i] ); |
| | flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() ); |
| | } |
| | } |
| | } |
| | }; |
| | } |
| | } |
| |
|
| | #endif |
| |
|