|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef COORDINATE_MAP_GPU_CUH |
|
|
#define COORDINATE_MAP_GPU_CUH |
|
|
|
|
|
#include "3rdparty/concurrent_unordered_map.cuh" |
|
|
#include "3rdparty/hash/helper_functions.cuh" |
|
|
#include "allocators.cuh" |
|
|
#include "coordinate_map.hpp" |
|
|
#include "coordinate_map_functors.cuh" |
|
|
#include "kernel_map.cuh" |
|
|
#include "storage.cuh" |
|
|
|
|
|
#include <torch/extension.h> |
|
|
|
|
|
namespace minkowski { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename coordinate_type, template <typename T> |
|
|
class TemplatedAllocator = |
|
|
detail::c10_allocator> |
|
|
class CoordinateMapGPU |
|
|
: public CoordinateMap<coordinate_type, TemplatedAllocator> { |
|
|
public: |
|
|
|
|
|
using base_type = CoordinateMap<coordinate_type, TemplatedAllocator>; |
|
|
using self_type = CoordinateMapGPU<coordinate_type, TemplatedAllocator>; |
|
|
using size_type = typename base_type::size_type; |
|
|
using index_type = typename base_type::index_type; |
|
|
using stride_type = typename base_type::stride_type; |
|
|
|
|
|
using map_allocator_type = TemplatedAllocator<thrust::pair<coordinate<coordinate_type>, index_type>>; |
|
|
using byte_allocator_type = TemplatedAllocator<char>; |
|
|
|
|
|
|
|
|
using key_type = coordinate<coordinate_type>; |
|
|
using mapped_type = index_type; |
|
|
using hasher_type = detail::coordinate_murmur3<coordinate_type>; |
|
|
using key_equal_type = detail::coordinate_equal_to<coordinate_type>; |
|
|
using map_type = concurrent_unordered_map<key_type, |
|
|
mapped_type, |
|
|
hasher_type, |
|
|
key_equal_type, |
|
|
map_allocator_type>; |
|
|
using value_type = typename map_type::value_type; |
|
|
|
|
|
|
|
|
using kernel_map_type = gpu_kernel_map<index_type, byte_allocator_type>; |
|
|
|
|
|
|
|
|
using iterator = typename map_type::iterator; |
|
|
using const_iterator = typename map_type::const_iterator; |
|
|
|
|
|
|
|
|
using index_storage_type = gpu_storage<default_types::index_type, byte_allocator_type>; |
|
|
using coordnate_storage_type = gpu_storage<coordinate_type, byte_allocator_type>; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public: |
|
|
CoordinateMapGPU() = delete; |
|
|
CoordinateMapGPU(size_type const number_of_coordinates, |
|
|
size_type const coordinate_size, |
|
|
size_type const hashtable_occupancy = 50, |
|
|
stride_type const stride = {1}, |
|
|
map_allocator_type map_alloc = map_allocator_type(), |
|
|
byte_allocator_type byte_alloc = byte_allocator_type()) |
|
|
: base_type(number_of_coordinates, coordinate_size, stride, byte_alloc), |
|
|
m_hashtable_occupancy{hashtable_occupancy}, |
|
|
m_capacity(0), |
|
|
m_hasher(hasher_type{coordinate_size}), |
|
|
m_equal(key_equal_type{coordinate_size}), |
|
|
m_unused_key(coordinate<coordinate_type>{nullptr}), |
|
|
m_unused_element(std::numeric_limits<coordinate_type>::max()), |
|
|
m_map_allocator(map_alloc) { |
|
|
|
|
|
reserve(number_of_coordinates); |
|
|
|
|
|
LOG_DEBUG("tensor_stride", base_type::m_tensor_stride); |
|
|
m_device_tensor_stride.from_vector(base_type::m_tensor_stride); |
|
|
|
|
|
LOG_DEBUG("device tensor_stride set"); |
|
|
static_assert( |
|
|
sizeof(index_type) == sizeof(size_type), |
|
|
"kernel_map shared memory requires the type sizes to be the same"); |
|
|
static_assert( |
|
|
sizeof(coordinate_type) == sizeof(size_type), |
|
|
"kernel_map shared memory requires the type sizes to be the same"); |
|
|
} |
|
|
|
|
|
template <bool remap> |
|
|
void insert(coordinate_iterator<coordinate_type> key_first, |
|
|
coordinate_iterator<coordinate_type> key_last); |
|
|
|
|
|
template <bool remap> |
|
|
std::pair<index_storage_type, index_storage_type> |
|
|
insert_and_map(coordinate_iterator<coordinate_type> key_first, |
|
|
coordinate_iterator<coordinate_type> key_last); |
|
|
|
|
|
std::pair<index_storage_type, index_storage_type> |
|
|
find(coordinate_iterator<coordinate_type> key_first, |
|
|
coordinate_iterator<coordinate_type> key_last) const; |
|
|
|
|
|
inline void reserve(size_type size) { |
|
|
if (size > m_capacity) { |
|
|
|
|
|
base_type::reserve(size); |
|
|
|
|
|
LOG_DEBUG("Reserve map of", |
|
|
compute_hash_table_size(size, m_hashtable_occupancy), |
|
|
"for concurrent_unordered_map of size", size); |
|
|
m_map = map_type::create( |
|
|
compute_hash_table_size(size, m_hashtable_occupancy), |
|
|
m_unused_element, m_unused_key, m_hasher, m_equal, m_map_allocator); |
|
|
LOG_DEBUG("Done concurrent_unordered_map creation"); |
|
|
CUDA_TRY(cudaStreamSynchronize(0)); |
|
|
m_capacity = size; |
|
|
LOG_DEBUG("Reserved concurrent_unordered_map"); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self_type stride(stride_type const &stride) const; |
|
|
self_type stride_region(cpu_kernel_region<coordinate_type> &kernel, |
|
|
stride_type const &out_tensor_stride) const; |
|
|
self_type origin() const; |
|
|
self_type prune(bool const *keep_begin, bool const *keep_end) const; |
|
|
self_type |
|
|
merge(std::vector<std::reference_wrapper<self_type>> const &maps) const; |
|
|
|
|
|
kernel_map_type kernel_map(self_type const &out_coordinate_map, |
|
|
gpu_kernel_region<coordinate_type> const &kernel, |
|
|
CUDAKernelMapMode::Mode kernel_map_mode, |
|
|
uint32_t thread_dim = CUDA_NUM_THREADS) const; |
|
|
kernel_map_type stride_map(self_type const &out_coordinate_map, |
|
|
stride_type const &out_tensor_stride, |
|
|
uint32_t thread_dim = CUDA_NUM_THREADS) const; |
|
|
kernel_map_type origin_map(self_type const &origin_coordinate_map, |
|
|
uint32_t thread_dim = CUDA_NUM_THREADS) const; |
|
|
std::vector<at::Tensor> |
|
|
interpolation_map_weight(at::Tensor const &tfield) const; |
|
|
|
|
|
template <typename coordinate_field_type> |
|
|
std::pair<at::Tensor, at::Tensor> |
|
|
field_map(coordinate_field_type const *p_tfield, |
|
|
size_type const num_tfield) const; |
|
|
|
|
|
std::vector<at::Tensor> |
|
|
union_map(std::vector<std::reference_wrapper<self_type>> const &maps, |
|
|
uint32_t thread_dim = CUDA_NUM_THREADS) const; |
|
|
|
|
|
|
|
|
inline size_type size() const { return m_size; } |
|
|
void copy_coordinates(coordinate_type *dst_coordinate) const; |
|
|
std::string to_string() const { |
|
|
Formatter o; |
|
|
o << "CoordinateMapGPU:" << size() << "x" << m_coordinate_size; |
|
|
return o.str(); |
|
|
} |
|
|
inline map_type const const_hash_map() const { return *m_map.get(); }; |
|
|
|
|
|
|
|
|
void initialize_valid_indices(size_t const N_unique); |
|
|
|
|
|
|
|
|
using base_type::const_coordinate_data; |
|
|
using base_type::coordinate_data; |
|
|
using base_type::coordinate_size; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private: |
|
|
using base_type::m_byte_allocator; |
|
|
using base_type::m_coordinate_size; |
|
|
|
|
|
size_type m_hashtable_occupancy; |
|
|
size_type m_capacity; |
|
|
size_type m_size; |
|
|
hasher_type const m_hasher; |
|
|
key_equal_type const m_equal; |
|
|
key_type const m_unused_key; |
|
|
mapped_type const m_unused_element; |
|
|
index_storage_type m_valid_row_index; |
|
|
index_storage_type m_valid_map_index; |
|
|
index_storage_type m_inverse_row_index; |
|
|
index_storage_type m_device_tensor_stride; |
|
|
map_allocator_type m_map_allocator; |
|
|
std::shared_ptr<map_type> m_map; |
|
|
}; |
|
|
|
|
|
template <typename coordinate_field_type, typename coordinate_int_type, |
|
|
template <typename T> class TemplatedAllocator = |
|
|
detail::c10_allocator> |
|
|
class CoordinateFieldMapGPU |
|
|
: public CoordinateMap<coordinate_field_type, TemplatedAllocator> { |
|
|
|
|
|
public: |
|
|
using base_type = CoordinateMap<coordinate_field_type, TemplatedAllocator>; |
|
|
using coordinate_map_type = |
|
|
CoordinateMapGPU<coordinate_int_type, TemplatedAllocator>; |
|
|
using self_type = |
|
|
CoordinateFieldMapGPU<coordinate_field_type, coordinate_int_type, |
|
|
TemplatedAllocator>; |
|
|
using size_type = typename base_type::size_type; |
|
|
using index_type = typename base_type::index_type; |
|
|
using stride_type = typename base_type::stride_type; |
|
|
using byte_allocator_type = TemplatedAllocator<char>; |
|
|
using map_allocator_type = TemplatedAllocator< |
|
|
thrust::pair<coordinate<coordinate_int_type>, index_type>>; |
|
|
using kernel_map_type = gpu_kernel_map<index_type, byte_allocator_type>; |
|
|
|
|
|
|
|
|
using key_type = coordinate<coordinate_int_type>; |
|
|
using mapped_type = index_type; |
|
|
using hasher_type = detail::coordinate_murmur3<coordinate_int_type>; |
|
|
using key_equal_type = detail::coordinate_equal_to<coordinate_int_type>; |
|
|
using int_hash_map_type = |
|
|
concurrent_unordered_map<key_type, |
|
|
mapped_type, |
|
|
hasher_type, |
|
|
key_equal_type, |
|
|
map_allocator_type>; |
|
|
public: |
|
|
CoordinateFieldMapGPU() = delete; |
|
|
CoordinateFieldMapGPU(size_type const number_of_coordinates, |
|
|
size_type const coordinate_size, |
|
|
stride_type const &stride = {1}, |
|
|
byte_allocator_type alloc = byte_allocator_type()) |
|
|
: base_type(number_of_coordinates, coordinate_size, stride, alloc), |
|
|
m_size(number_of_coordinates) { |
|
|
base_type::reserve(number_of_coordinates); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void insert(coordinate_field_type const *coordinate_begin, |
|
|
coordinate_field_type const *coordinate_end) { |
|
|
size_type N = (coordinate_end - coordinate_begin) / m_coordinate_size; |
|
|
base_type::allocate(N); |
|
|
|
|
|
CUDA_CHECK(cudaMemcpy(base_type::coordinate_data(), coordinate_begin, |
|
|
N * m_coordinate_size * sizeof(coordinate_field_type), |
|
|
cudaMemcpyDeviceToDevice)); |
|
|
} |
|
|
|
|
|
void copy_coordinates(coordinate_field_type *dst_coordinate) const { |
|
|
CUDA_CHECK( |
|
|
cudaMemcpy(dst_coordinate, base_type::const_coordinate_data(), |
|
|
size() * m_coordinate_size * sizeof(coordinate_field_type), |
|
|
cudaMemcpyDeviceToDevice)); |
|
|
} |
|
|
|
|
|
void quantize_coordinates(coordinate_int_type *p_dst_coordinates, |
|
|
stride_type const &tensor_stride) const; |
|
|
|
|
|
using base_type::const_coordinate_data; |
|
|
using base_type::coordinate_data; |
|
|
|
|
|
coordinate_map_type origin() const; |
|
|
|
|
|
kernel_map_type origin_map(coordinate_map_type const &origin_coordinate_map, |
|
|
uint32_t thread_dim = CUDA_NUM_THREADS) const; |
|
|
|
|
|
inline size_type size() const noexcept { return m_size; } |
|
|
std::string to_string() const { |
|
|
Formatter o; |
|
|
o << "CoordinateFieldMapGPU:" << size() << "x" << m_coordinate_size; |
|
|
return o.str(); |
|
|
} |
|
|
|
|
|
private: |
|
|
using base_type::m_byte_allocator; |
|
|
using base_type::m_coordinate_size; |
|
|
size_type m_size; |
|
|
}; |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
|