/* * Copyright (c) 2020 NVIDIA CORPORATION. * Copyright (c) Chris Choy (chrischoy@ai.stanford.edu). * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural * Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part * of the code. */ #include "coordinate_map_manager.hpp" #include "coordinate_map_key.hpp" #include "errors.hpp" #include "kernel_region.hpp" #include "utils.hpp" #include #include #include namespace py = pybind11; namespace minkowski { namespace detail { default_types::stride_type zeros(size_t const len) { return _fill_vec<0>(len); } default_types::stride_type ones(size_t const len) { return _fill_vec<1>(len); } } // namespace detail /******************************* * Initialization *******************************/ namespace detail { template struct insert_and_map_functor { std::pair operator()(coordinate_map_key_type &map_key, at::Tensor const &th_coordinate, CoordinateMapManager &manager) { LOG_DEBUG("initialize_and_map"); uint32_t const N = th_coordinate.size(0); uint32_t const coordinate_size = th_coordinate.size(1); coordinate_type *p_coordinate = th_coordinate.data_ptr(); auto map = CoordinateMapCPU( N, coordinate_size, map_key.first); auto map_inverse_map = map.template insert_and_map( p_coordinate, p_coordinate + N * coordinate_size); LOG_DEBUG("mapping size:", map_inverse_map.first.size()); // insert moves map THRUST_CHECK(manager.insert(map_key, map)); auto const &mapping = map_inverse_map.first; auto const &inverse_mapping = map_inverse_map.second; // return tensors at::Tensor th_mapping = torch::empty( {(int64_t)mapping.size()}, torch::TensorOptions().requires_grad(false).dtype(torch::kInt64)); at::Tensor th_inverse_mapping = torch::empty( {(int64_t)inverse_mapping.size()}, torch::TensorOptions().requires_grad(false).dtype(torch::kInt64)); // copy_n to int to long int64_t *p_mapping = th_mapping.data_ptr(); for (default_types::index_type i = 0; i < mapping.size(); ++i) { p_mapping[i] = mapping[i]; } int64_t *p_inverse_mapping = th_inverse_mapping.data_ptr(); for (default_types::index_type i = 0; i < inverse_mapping.size(); ++i) { p_inverse_mapping[i] = inverse_mapping[i]; } return std::make_pair(std::move(th_mapping), std::move(th_inverse_mapping)); } }; template struct insert_field_functor< coordinate_type, coordinate_field_type, std::allocator, CoordinateMapCPU, CoordinateFieldMapCPU> { void operator()(coordinate_map_key_type &map_key, at::Tensor const &th_coordinate, CoordinateMapManager &manager) { LOG_DEBUG("insert field"); uint32_t const N = th_coordinate.size(0); uint32_t const coordinate_size = th_coordinate.size(1); coordinate_field_type *p_coordinate = th_coordinate.data_ptr(); auto map = CoordinateFieldMapCPU(N, coordinate_size, map_key.first); THRUST_CHECK(map.insert(p_coordinate, p_coordinate + N * coordinate_size)); LOG_DEBUG("insert map with tensor_stride", map_key.first); manager.insert_field_map(map_key, map); } }; } // namespace detail /* * coords: coordinates in IntTensor * mapping: output mapping in IntTensor * tensor_strides: current tensor strides this coords will be initializeds * force_creation: even when there's a duplicate coords with the same tensor * strides. * force_remap: if there's duplicate coords, remap * allow_duplicate_coords: create map when there are duplicates in the * coordinates */ template class TemplatedAllocator, template class A> class CoordinateMapType> py::object CoordinateMapManager:: insert_field(at::Tensor const &coordinates, default_types::stride_type const tensor_stride, std::string const string_id) { torch::TensorArg arg_coordinate(coordinates, "coordinates", 0); torch::CheckedFrom c = "initialize"; torch::checkContiguous(c, arg_coordinate); // must match coordinate_type torch::checkScalarType(c, arg_coordinate, torch::kFloat); torch::checkBackend(c, arg_coordinate.tensor, detail::is_cpu_coordinate_map::value ? torch::Backend::CPU : torch::Backend::CUDA); torch::checkDim(c, arg_coordinate, 2); auto const coordinate_size = (index_type)coordinates.size(1); // Basic assertions ASSERT(coordinate_size - 1 == tensor_stride.size(), "The coordinate dimension (coordinate_size - 1):", coordinate_size - 1, " must match the size of tensor stride:", ArrToString(tensor_stride)); // generate the map_key coordinate_map_key_type map_key = std::make_pair(tensor_stride, string_id); if (m_field_coordinates.find(map_key) != m_field_coordinates.end()) { LOG_DEBUG("CoordinateMapKey collision detected:", map_key, "generating new string id."); map_key = get_random_string_id(tensor_stride, string_id); } LOG_DEBUG("initializing a map with tensor stride:", map_key.first, "string id:", map_key.second); // Create the concurrent coords map detail::insert_field_functor()(map_key, coordinates, *this); py::object py_key = py::cast(new CoordinateMapKey(coordinate_size, map_key)); return py_key; } /* to_sparse_and_map */ /* * coords: coordinates in IntTensor * tensor_strides: current tensor strides this coords will be initializeds */ template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair> CoordinateMapManager:: field_to_sparse_insert_and_map( CoordinateMapKey const *p_in_field_map_key, default_types::stride_type const sparse_tensor_stride, std::string const sparse_tensor_string_id) { auto const coordinate_size = p_in_field_map_key->get_coordinate_size(); // Basic assertions ASSERT(coordinate_size - 1 == sparse_tensor_stride.size(), "The coordinate dimension (coordinate_size - 1):", coordinate_size - 1, " must match the size of tensor stride:", ArrToString(sparse_tensor_stride)); // Find coordinate field auto const it = m_field_coordinates.find(p_in_field_map_key->get_key()); ASSERT(it != m_field_coordinates.end(), ERROR_MAP_NOT_FOUND); auto const &field_map = it->second; auto options = torch::TensorOptions().dtype(torch::kInt).requires_grad(false); if (!detail::is_cpu_coordinate_map::value) { #ifndef CPU_ONLY auto device_id = at::cuda::current_device(); options = options.device(torch::kCUDA, device_id); #else ASSERT(false, ERROR_CPU_ONLY); #endif } // generate the map_key coordinate_map_key_type map_key = std::make_pair(sparse_tensor_stride, sparse_tensor_string_id); if (m_coordinate_maps.find(map_key) != m_coordinate_maps.end()) { LOG_DEBUG("CoordinateMapKey collision detected:", map_key, "generating new string id."); map_key = get_random_string_id(sparse_tensor_stride, sparse_tensor_string_id); } LOG_DEBUG("initializing a field map with tensor stride:", map_key.first, "string id:", map_key.second); // Quantize the field with tensor stride. // The coordinate must be a tensor. Wrap a pointer with a tensor. at::Tensor int_coordinates = at::empty({field_map.size(), coordinate_size}, options); field_map.quantize_coordinates(int_coordinates.data_ptr(), sparse_tensor_stride); auto const map_inverse_map = detail::insert_and_map_functor()( map_key, int_coordinates, *this); auto const field_to_sparse_map_key = std::pair{ p_in_field_map_key->get_key(), map_key}; auto result = m_field_to_sparse_maps.insert( std::pair< const std::pair, const std::pair>{field_to_sparse_map_key, map_inverse_map}); LOG_DEBUG("field to sparse tensor map insertion", result.second); py::object py_key = py::cast(new CoordinateMapKey(coordinate_size, map_key)); return std::make_pair(py_key, map_inverse_map); } /* * Return existing field to sparse map. Raise an except if the map doesn't * exist */ template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager:: get_field_to_sparse_map(CoordinateMapKey const *p_field_key, CoordinateMapKey const *p_sparse_key) const { auto key = std::pair{ p_field_key->get_key(), p_sparse_key->get_key()}; auto it = m_field_to_sparse_maps.find(key); ASSERT(it != m_field_to_sparse_maps.end(), "Field To Sparse Map doesn't exist"); return it->second; } /* * Create a field to sparse map if it doesn't exist. */ template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager:: field_to_sparse_map(CoordinateMapKey const *p_in_field_map_key, CoordinateMapKey const *p_out_sparse_map_key) { auto const coordinate_size = p_in_field_map_key->get_coordinate_size(); // Basic assertions ASSERT(coordinate_size == p_out_sparse_map_key->get_coordinate_size(), "The coordinate dimension mismatch.", coordinate_size, "!=", p_out_sparse_map_key->get_coordinate_size()); // Find coordinate field auto const it_field = m_field_coordinates.find(p_in_field_map_key->get_key()); ASSERT(it_field != m_field_coordinates.end(), ERROR_MAP_NOT_FOUND); auto const &field_map = it_field->second; auto const it_sparse = m_coordinate_maps.find(p_out_sparse_map_key->get_key()); ASSERT(it_sparse != m_coordinate_maps.end(), ERROR_MAP_NOT_FOUND); auto const &sparse_map = it_sparse->second; auto options = torch::TensorOptions().dtype(torch::kInt).requires_grad(false); if (!detail::is_cpu_coordinate_map::value) { #ifndef CPU_ONLY auto device_id = at::cuda::current_device(); options = options.device(torch::kCUDA, device_id); #else ASSERT(false, ERROR_CPU_ONLY); #endif } auto const map_inverse_map = sparse_map.field_map(field_map.const_coordinate_data(), field_map.size()); auto const field_to_sparse_map_key = std::pair{ p_in_field_map_key->get_key(), p_out_sparse_map_key->get_key()}; auto result = m_field_to_sparse_maps.insert( std::pair< const std::pair, const std::pair>{field_to_sparse_map_key, map_inverse_map}); LOG_DEBUG("field to sparse tensor map insertion", result.second); return map_inverse_map; } /* * coords: coordinates in IntTensor * tensor_strides: current tensor strides this coords will be initializeds */ template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair> CoordinateMapManager:: insert_and_map(at::Tensor const &coordinate, default_types::stride_type const tensor_stride, std::string const string_id) { torch::TensorArg arg_coordinate(coordinate, "coordinates", 0); torch::CheckedFrom c = "initialize"; torch::checkContiguous(c, arg_coordinate); // must match coordinate_type torch::checkScalarType(c, arg_coordinate, torch::kInt); torch::checkBackend(c, arg_coordinate.tensor, detail::is_cpu_coordinate_map::value ? torch::Backend::CPU : torch::Backend::CUDA); torch::checkDim(c, arg_coordinate, 2); auto const coordinate_size = (index_type)coordinate.size(1); // Basic assertions ASSERT(coordinate_size - 1 == tensor_stride.size(), "The coordinate dimension (coordinate_size - 1):", coordinate_size - 1, " must match the size of tensor stride:", ArrToString(tensor_stride)); // generate the map_key coordinate_map_key_type map_key = std::make_pair(tensor_stride, string_id); if (m_coordinate_maps.find(map_key) != m_coordinate_maps.end()) { LOG_DEBUG("CoordinateMapKey collision detected:", map_key, "generating new string id."); map_key = get_random_string_id(tensor_stride, string_id); } LOG_DEBUG("initializing a map with tensor stride:", map_key.first, "string id:", map_key.second); // Create the concurrent coords map auto const map_inverse_map = detail::insert_and_map_functor()( map_key, coordinate, *this); LOG_DEBUG("map_inverse_map initialized"); py::object py_key = py::cast(new CoordinateMapKey(coordinate_size, map_key)); LOG_DEBUG("py key initialized"); return std::make_pair(py_key, map_inverse_map); } // stride template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager< coordinate_type, coordinate_field_type, TemplatedAllocator, CoordinateMapType>::stride(coordinate_map_key_type const &in_map_key, stride_type const &kernel_stride, std::string const string_id) { ASSERT(exists(in_map_key), ERROR_MAP_NOT_FOUND); // check if the key exists. LOG_DEBUG("In tensor stride:", in_map_key.first, "kernel stride:", kernel_stride); coordinate_map_key_type out_map_key( detail::stride_tensor_stride(in_map_key.first, kernel_stride, false), string_id == "" ? in_map_key.second : string_id); LOG_DEBUG("Out stride map key:", out_map_key); bool const exists_out_map = exists(out_map_key); if (!exists_out_map) { // operator[] required mapped_type(), which is not defined. // ASSERTION already checked that in_map_key exists. map_type const &in_map = m_coordinate_maps.find(in_map_key)->second; map_type out_map = in_map.stride(kernel_stride); insert(out_map_key, out_map); } // (key, new map generated flag) return std::make_pair(out_map_key, !exists_out_map); } template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager:: stride_region(coordinate_map_key_type const &in_map_key, cpu_kernel_region &kernel, stride_type const &out_tensor_stride, bool const expand_coordinates) { ASSERT(exists(in_map_key), ERROR_MAP_NOT_FOUND); LOG_DEBUG("stride_region"); // kernel.tensor_stride must be set to out tensor stride. // stride_type out_tensor_stride{kernel.tensor_stride(), // kernel.tensor_stride() + // kernel.coordinate_size() - 1}; // check if the key exists. coordinate_map_key_type out_map_key(out_tensor_stride, ""); bool const exists_out_map = exists(out_map_key); if (!exists_out_map || expand_coordinates) { LOG_DEBUG("Create a new stride region map for tensor_stride:", out_tensor_stride); map_type const &in_map = m_coordinate_maps.find(in_map_key)->second; map_type out_map = in_map.stride_region(kernel, out_tensor_stride); if (exists_out_map) { LOG_DEBUG("coordinate map exists for tensor_stride:", out_tensor_stride); out_map_key = get_random_string_id(out_tensor_stride, ""); LOG_DEBUG("created a random key:", out_map_key); } insert(out_map_key, out_map); } // (key, new map generated flag) return std::make_pair(out_map_key, !exists_out_map || expand_coordinates); } template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager::origin() { ASSERT(m_coordinate_maps.size() > 0, "No coordinate map found"); // check if the key exists. map_type const &random_map = m_coordinate_maps.begin()->second; stride_type origin_tensor_stride(random_map.coordinate_size() - 1); std::for_each(origin_tensor_stride.begin(), origin_tensor_stride.end(), [](auto &i) { i = 0; }); LOG_DEBUG("origin tensor stride:", origin_tensor_stride); coordinate_map_key_type origin_map_key(origin_tensor_stride, ""); bool const exists_origin_map = exists(origin_map_key); if (!exists_origin_map) { LOG_DEBUG("origin coordinate map not found"); map_type const *p_min_coordinate_map{nullptr}; size_type min_size = std::numeric_limits::max(); for (auto map_it = m_coordinate_maps.begin(); map_it != m_coordinate_maps.end(); ++map_it) { if (min_size > map_it->second.size()) { p_min_coordinate_map = &(map_it->second); } } if (p_min_coordinate_map != nullptr) { map_type origin_map = p_min_coordinate_map->origin(); LOG_DEBUG("origin map with size:", origin_map.size(), " inserted"); insert(origin_map_key, origin_map); } else { ASSERT(false, "Invalid origin map"); } } LOG_DEBUG("return origin()"); // (key, new map generated flag) return std::make_pair(origin_map_key, !exists_origin_map); } template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager::origin_field() { ASSERT(m_field_coordinates.size() > 0, "No coordinate map found"); // check if the key exists. field_map_type const &random_map = m_field_coordinates.begin()->second; stride_type origin_tensor_stride(random_map.coordinate_size() - 1); std::for_each(origin_tensor_stride.begin(), origin_tensor_stride.end(), [](auto &i) { i = 0; }); LOG_DEBUG("origin tensor stride:", origin_tensor_stride); coordinate_map_key_type origin_map_key(origin_tensor_stride, ""); bool const exists_origin_map = exists(origin_map_key); if (!exists_origin_map) { LOG_DEBUG("origin coordinate map not found"); field_map_type const *p_min_coordinate_map{nullptr}; size_type min_size = std::numeric_limits::max(); for (auto map_it = m_field_coordinates.begin(); map_it != m_field_coordinates.end(); ++map_it) { if (min_size > map_it->second.size()) { p_min_coordinate_map = &(map_it->second); } } if (p_min_coordinate_map != nullptr) { map_type origin_map = p_min_coordinate_map->origin(); LOG_DEBUG("origin map with size:", origin_map.size(), " inserted"); insert(origin_map_key, origin_map); } else { ASSERT(false, "Invalid origin map"); } } // (key, new map generated flag) return std::make_pair(origin_map_key, !exists_origin_map); } template class TemplatedAllocator, template class A> class CoordinateMapType> coordinate_map_key_type CoordinateMapManager::prune(coordinate_map_key_type const &in_key, bool const *keep_begin, bool const *keep_end) { auto const map_it = m_coordinate_maps.find(in_key); ASSERT(map_it != m_coordinate_maps.end(), ERROR_MAP_NOT_FOUND); // create a coordinate_map_key coordinate_map_key_type map_key = std::make_pair(in_key.first, "pruned"); if (m_coordinate_maps.find(map_key) != m_coordinate_maps.end()) { map_key = get_random_string_id(map_key.first, map_key.second); } map_type pruned_map = map_it->second.prune(keep_begin, keep_end); LOG_DEBUG("pruned map with size:", pruned_map.size(), " inserted"); insert(map_key, pruned_map); return map_key; } // Kernel map namespace detail { template struct kernel_map_functor { cpu_kernel_map operator()(CoordinateMapCPU const &in_map, CoordinateMapCPU const &out_map, CUDAKernelMapMode::Mode kernel_map_mode, cpu_kernel_region &kernel) { return in_map.kernel_map(out_map, kernel); } }; template struct stride_map_functor { cpu_kernel_map operator()(CoordinateMapCPU const &in_map, CoordinateMapCPU const &out_map, default_types::stride_type const &out_tensor_stride) { return in_map.stride_map(out_map, out_tensor_stride); } }; // a partial specialization functor for kernel map in/out swap template <> struct swap_in_out_map_functor { cpu_kernel_map operator()(cpu_kernel_map const &kernel_map) { return std::make_pair(kernel_map.second, kernel_map.first); } }; template struct empty_map_functor { cpu_kernel_map operator()() { return cpu_kernel_map{}; } }; } // namespace detail /* * Given tensor_stride_src and tensor_stride_dst, find the respective coord_maps * and return the indices of the coord_map_ind in coord_map_dst */ template class TemplatedAllocator, template class A> class CoordinateMapType> typename CoordinateMapManager::kernel_map_type const & CoordinateMapManager< coordinate_type, coordinate_field_type, TemplatedAllocator, CoordinateMapType>::kernel_map(CoordinateMapKey const *p_in_map_key, CoordinateMapKey const *p_out_map_key) { // when kernel has volume 1 auto const &map_it = m_coordinate_maps.find(p_in_map_key->get_key()); ASSERT(map_it != m_coordinate_maps.end(), ERROR_MAP_NOT_FOUND); auto const coordinate_size = map_it->second.coordinate_size(); auto const one_vec = detail::ones(coordinate_size - 1); auto const offset = torch::empty( {0}, torch::TensorOptions().dtype(torch::kInt32).requires_grad(false)); return kernel_map(p_in_map_key, p_out_map_key, one_vec, one_vec, one_vec, RegionType::HYPER_CUBE, offset, false, false); } /* * Given tensor_stride_src and tensor_stride_dst, find the respective coord_maps * and return the indices of the coord_map_ind in coord_map_dst */ template class TemplatedAllocator, template class A> class CoordinateMapType> typename CoordinateMapManager::kernel_map_type const & CoordinateMapManager< coordinate_type, coordinate_field_type, TemplatedAllocator, CoordinateMapType>::kernel_map(CoordinateMapKey const *p_in_map_key, CoordinateMapKey const *p_out_map_key, stride_type const &kernel_size, // stride_type const &kernel_stride, stride_type const &kernel_dilation, RegionType::Type const region_type, at::Tensor const &offset, bool is_transpose, bool is_pool) { ASSERT(region_type != RegionType::CUSTOM, "Not implemented yet."); if (region_type == RegionType::CUSTOM) ASSERT(offset.is_cuda() == !detail::is_cpu_coordinate_map::value, "Invalid device for offset"); size_type kernel_dim = kernel_size.size(); ASSERT(kernel_dim == kernel_stride.size(), "kernel size mismatch"); ASSERT(kernel_dim == kernel_dilation.size(), "kernel size mismatch"); // in_coords_key->tensor_stride * kernel_stride == // out_coords_key->tensor_stride kernel_map_key_type const kernel_map_key = std::make_tuple(p_in_map_key->get_key(), p_out_map_key->get_key(), // maps kernel_size, kernel_stride, kernel_dilation, // kernels region_type, is_transpose, is_pool); const auto &kernel_map_iter = m_kernel_maps.find(kernel_map_key); LOG_DEBUG("set kernel map key for kernel map:", p_in_map_key->get_key(), "->", p_out_map_key->get_key()); if (kernel_map_iter == m_kernel_maps.end()) { // create a kernel map if it exists auto const in_map_it = m_coordinate_maps.find(p_in_map_key->get_key()); auto const out_map_it = m_coordinate_maps.find(p_out_map_key->get_key()); ASSERT(in_map_it != m_coordinate_maps.end(), "in_map", ERROR_MAP_NOT_FOUND); ASSERT(out_map_it != m_coordinate_maps.end(), "out_map", ERROR_MAP_NOT_FOUND); auto const &in_map = in_map_it->second; auto const &out_map = out_map_it->second; LOG_DEBUG("coordinate_size:", in_map.coordinate_size(), "in tensor_stride:", in_map.get_tensor_stride(), "out tensor_stride:", out_map.get_tensor_stride()); // +1 for batch index ASSERT(kernel_dim + 1 == in_map.coordinate_size(), "kernel size mismatch"); ASSERT(kernel_dim + 1 == out_map.coordinate_size(), "kernel size mismatch"); // If either coordinate map is empty if (in_map.size() == 0 || out_map.size() == 0) { return detail::empty_map_functor()(); } if (!is_transpose) { if (is_pool && (kernel_stride == kernel_size)) { LOG_DEBUG("generating stride_map"); auto const stride_map = detail::stride_map_functor()( in_map, out_map, out_map.get_tensor_stride()); m_kernel_maps[kernel_map_key] = std::move(stride_map); } else { LOG_DEBUG("generating kernel map"); // Default kernel map LOG_DEBUG( "kernel region with kernel: ", PtrToString(kernel_size.data(), in_map.coordinate_size() - 1)); LOG_DEBUG( "kernel region with dilation: ", PtrToString(kernel_dilation.data(), in_map.coordinate_size() - 1)); auto kernel_region = cpu_kernel_region( region_type, // in_map.coordinate_size(), // in_map.get_tensor_stride().data(), // kernel_size.data(), // kernel_dilation.data(), // 0, offset.data_ptr(), offset.size(0)); auto const kernel_map = detail::kernel_map_functor()( in_map, out_map, m_kernel_map_mode, kernel_region); LOG_DEBUG("kernel_map done"); m_kernel_maps[kernel_map_key] = std::move(kernel_map); LOG_DEBUG("kernel_map saved"); } } else { // is_transpose == true // Check first if the out2in kernel map exists // // Create temporary key for the flipped in/out kernel_map_key_type const swapped_kernel_map_key = std::make_tuple( p_out_map_key->get_key(), p_in_map_key->get_key(), // maps kernel_size, kernel_stride, kernel_dilation, // kernels region_type, false, is_pool); // Check if the temporary key exists and return swapped in/out if (m_kernel_maps.find(swapped_kernel_map_key) != m_kernel_maps.end()) { // copy the in out maps from the existing maps LOG_DEBUG("found existing kernel_map_key for transposed kernel map"); m_kernel_maps[kernel_map_key] = detail::swap_in_out_map_functor()( m_kernel_maps[swapped_kernel_map_key]); } else { // create in out kernel if it doesn't exist LOG_DEBUG("No existing kernel_map_key for transposed kernel map"); if (is_pool && kernel_stride == kernel_size) { // e.g. out_map has tensor stride 2 in_map has tensor stride 4. // Thus, create a stride map from 2 to 4, out to in. auto const stride_map = detail::stride_map_functor()( out_map, in_map, in_map.get_tensor_stride()); // TODO Replace the kernel_map values to shared pointers. m_kernel_maps[kernel_map_key] = detail::swap_in_out_map_functor()(stride_map); } else { // Default kernel map auto kernel_region = cpu_kernel_region( region_type, // out_map.coordinate_size(), // out_map.get_tensor_stride().data(), // kernel_size.data(), // kernel_dilation.data(), // 0, offset.data_ptr(), offset.size(0), true // is_transpose ); // out to in kernel map auto const kernel_map = detail::kernel_map_functor()( out_map, in_map, m_kernel_map_mode, kernel_region); LOG_DEBUG("kernel_map done"); m_kernel_maps[kernel_map_key] = detail::swap_in_out_map_functor()( std::move(kernel_map)); LOG_DEBUG("kernel_map saved"); } } } } #ifdef DEBUG else { LOG_DEBUG("kernel map found"); } #endif // TODO check if it copies or moves the internal data return m_kernel_maps[kernel_map_key]; } namespace detail { template struct origin_map_functor { std::pair> operator()(CoordinateMapCPU const &origin_coordinate_map, cpu_kernel_map const &origin_map) { auto options = torch::TensorOptions().dtype(torch::kLong).requires_grad(false); auto const out_size = origin_coordinate_map.size(); auto const coordinate_size = origin_coordinate_map.coordinate_size(); at::Tensor batch_indices = torch::empty({origin_coordinate_map.size()}, options); int64_t *p_batch_indices = batch_indices.data_ptr(); LOG_DEBUG("Copying", origin_coordinate_map.size(), "batch indices"); for (default_types::index_type i = 0; i < out_size; ++i) { p_batch_indices[i] = origin_coordinate_map.const_coordinate_data()[i * coordinate_size]; } // WARNING: this is an inclusive max index coordinate_type const max_batch_index = *std::max_element(p_batch_indices, p_batch_indices + out_size); std::vector in_maps; for (auto i = 0; i <= max_batch_index; ++i) { at::Tensor row_indices = torch::empty({0}, options); in_maps.push_back(std::move(row_indices)); } ASSERT(origin_map.first.size() == origin_map.second.size(), "invalid kernel_map"); LOG_DEBUG("Iterating over", origin_map.first.size(), "unique maps"); for (uint32_t out_row_index = 0; out_row_index < origin_map.first.size(); ++out_row_index) { auto const &in_map = origin_map.first[out_row_index]; int32_t const curr_size = in_map.size(); ASSERT(curr_size > 0, "invalid kernel map for index", out_row_index); auto const curr_batch_index = p_batch_indices[out_row_index]; ASSERT(curr_batch_index <= max_batch_index, "invalid batch index"); at::Tensor &row_indices = in_maps[curr_batch_index]; row_indices.resize_({curr_size}); int64_t *p_row_indices = row_indices.data_ptr(); LOG_DEBUG("Copying", curr_size, "elements to batch index", curr_batch_index, "and row index", out_row_index); for (auto i = 0; i < curr_size; ++i) { p_row_indices[i] = in_map[i]; } } return std::make_pair(batch_indices, in_maps); } }; } // namespace detail template class TemplatedAllocator, template class A> class CoordinateMapType> typename CoordinateMapManager::kernel_map_type const & CoordinateMapManager::origin_map(CoordinateMapKey const *p_in_map_key) { ASSERT(exists(p_in_map_key), ERROR_MAP_NOT_FOUND); kernel_map_key_type const kernel_map_key = origin_map_key(p_in_map_key->get_key()); coordinate_map_key_type const origin_key = std::get<1>(kernel_map_key); if (m_kernel_maps.find(kernel_map_key) == m_kernel_maps.end()) { auto const key = origin().first; auto const &origin_coordinate_map = m_coordinate_maps.find(key)->second; auto origin_map = m_coordinate_maps.find(p_in_map_key->get_key()) ->second.origin_map(origin_coordinate_map); m_kernel_maps[kernel_map_key] = std::move(origin_map); } return m_kernel_maps[kernel_map_key]; } template class TemplatedAllocator, template class A> class CoordinateMapType> typename CoordinateMapManager::kernel_map_type const & CoordinateMapManager::origin_field_map(CoordinateMapKey const *p_in_map_key) { ASSERT(exists_field(p_in_map_key), ERROR_MAP_NOT_FOUND); kernel_map_key_type const kernel_map_key = origin_map_key(p_in_map_key->get_key()); coordinate_map_key_type const origin_key = std::get<1>(kernel_map_key); if (m_field_kernel_maps.find(kernel_map_key) == m_field_kernel_maps.end()) { auto const key = origin_field().first; auto const &origin_coordinate_map = m_coordinate_maps.find(key)->second; auto origin_map = m_field_coordinates.find(p_in_map_key->get_key()) ->second.origin_map(origin_coordinate_map); m_field_kernel_maps[kernel_map_key] = std::move(origin_map); } return m_field_kernel_maps[kernel_map_key]; } namespace detail { template struct stride_map2tensor_functor { std::pair operator()(cpu_kernel_map const &stride_kernel_map) { ASSERT(stride_kernel_map.first.size() == 1, "Invalid kernel_map"); ASSERT(stride_kernel_map.first.size() == stride_kernel_map.second.size(), "invalid kernel_map"); auto const &in_map = stride_kernel_map.first[0]; auto const &out_map = stride_kernel_map.second[0]; auto options = torch::TensorOptions().dtype(torch::kLong).requires_grad(false); int64_t const out_size = (int64_t)in_map.size(); at::Tensor th_in_map = torch::empty({out_size}, options); at::Tensor th_out_map = torch::empty({out_size}, options); int64_t *p_in_map = th_in_map.data_ptr(); int64_t *p_out_map = th_out_map.data_ptr(); // from int32_t to long type for (uint32_t i = 0; i < out_size; ++i) p_in_map[i] = in_map[i]; for (uint32_t i = 0; i < out_size; ++i) p_out_map[i] = out_map[i]; return std::make_pair(std::move(th_in_map), std::move(th_out_map)); } }; } // namespace detail template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair CoordinateMapManager::stride_map_th(CoordinateMapKey const *p_in_map_key, CoordinateMapKey const *p_strided_map_key) { ASSERT(exists(p_in_map_key), ERROR_MAP_NOT_FOUND); ASSERT(exists(p_strided_map_key), ERROR_MAP_NOT_FOUND); map_type const &in_map = m_coordinate_maps.find(p_in_map_key->get_key())->second; map_type const &strided_map = m_coordinate_maps.find(p_strided_map_key->get_key())->second; // Get tensor strides and find kernel stride size // Check if the kernel map key exists auto const &in_map_stride = in_map.get_tensor_stride(); auto const &strided_map_stride = strided_map.get_tensor_stride(); stride_type kernel_stride(in_map_stride.size()); for (index_type i = 0; i < kernel_stride.size(); ++i) { ASSERT(strided_map_stride[i] % in_map_stride[i] == 0, "The tensor stride of the strided map must be divisible by the " "tensor stride of the input map. strided_map_stride:", ArrToString(strided_map_stride), " in_map_stride:", ArrToString(in_map_stride)); kernel_stride[i] = strided_map_stride[i] / in_map_stride[i]; } auto const one_vec = detail::ones(in_map.coordinate_size() - 1); kernel_map_key_type const kernel_map_key = std::make_tuple( p_in_map_key->get_key(), p_strided_map_key->get_key(), // maps kernel_stride, kernel_stride, one_vec, // kernels RegionType::HYPER_CUBE /* region_type */, 0 /* is_transpose */, true /* is_pool */); if (m_kernel_maps.find(kernel_map_key) == m_kernel_maps.end()) { LOG_DEBUG("Creating stride kernel map with kernel size:", ArrToString(kernel_stride)); auto const stride_map = detail::stride_map_functor()( in_map, strided_map, strided_map.get_tensor_stride()); m_kernel_maps[kernel_map_key] = std::move(stride_map); } // copy the kernel map to tensors return detail::stride_map2tensor_functor()( m_kernel_maps[kernel_map_key]); } template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair> CoordinateMapManager::origin_map_th(CoordinateMapKey const *p_in_map_key) { kernel_map_type const &kernel_map = origin_map(p_in_map_key); coordinate_map_key_type const origin_key = origin().first; map_type const &origin_map = m_coordinate_maps.find(origin_key)->second; return detail::origin_map_functor()( origin_map, kernel_map); } template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair> CoordinateMapManager:: origin_field_map_th(CoordinateMapKey const *p_in_map_key) { kernel_map_type const &kernel_map = origin_field_map(p_in_map_key); coordinate_map_key_type const origin_key = origin_field().first; map_type const &origin_map = m_coordinate_maps.find(origin_key)->second; return detail::origin_map_functor()( origin_map, kernel_map); } // Interpolation map template class TemplatedAllocator, template class A> class CoordinateMapType> std::vector CoordinateMapManager:: interpolation_map_weight(at::Tensor const &tfield, CoordinateMapKey const *p_in_map_key) { ASSERT(exists(p_in_map_key), ERROR_MAP_NOT_FOUND); return m_coordinate_maps.find(p_in_map_key->get_key()) ->second.interpolation_map_weight(tfield); } /*********************************/ /* template uint64_t CoordsManager::createUnionCoords(vector py_in_coords_keys, py::object py_out_coords_key) { vector p_in_coords_keys; CoordsKey *p_in_coords_key = py_in_coords_keys[0].cast(); auto tensor_strides = p_in_coords_key->getTensorStride(); for (const auto &py_in_coords_key : py_in_coords_keys) { // Set the tensor strides to the smallest elements. p_in_coords_key = py_in_coords_key.cast(); p_in_coords_keys.push_back(p_in_coords_key); transform(tensor_strides.begin(), // In1 begin tensor_strides.end(), // In1 end p_in_coords_key->getTensorStride().begin(), // In2 begin tensor_strides.begin(), // out begin [](int a, int b) -> int { return std::min(a, b); } // binary op ); const uint64_t in_coords_key = p_in_coords_key->getKey(); ASSERT(existsCoordsKey(in_coords_key), "The coord map doesn't exist for the given coords_key: ", to_string(in_coords_key), "."); } CoordsKey *p_out_coords_key = py_out_coords_key.cast(); vector>> in_coords_maps; for (const CoordsKey *p_in_coords_key : p_in_coords_keys) { CoordsMap &curr_map = coords_maps[p_in_coords_key->getKey()]; in_coords_maps.push_back(ref(curr_map)); } // set a random coords key const uint64_t out_coords_key = getRandomCoordsKey(); // Set the pycoordskey using the last coords_key p_out_coords_key->setDimension(p_in_coords_key->getDimension()); p_out_coords_key->setKey(out_coords_key); p_out_coords_key->setTensorStride(tensor_strides); coords_maps[out_coords_key] = CoordsMap::union_coords(in_coords_maps); return out_coords_key; } template const InOutMapKey CoordsManager::getUnionMapHashKey(vector py_in_coords_keys, py::object py_out_coords_key) const { CoordsKey *p_out_coords_key = py_out_coords_key.cast(); ASSERT(py_in_coords_keys.size() > 1, "Number of input coords must be > 1"); vector p_in_coords_keys; // We use sum of coords key (even with overflow, it will be unique with high // prob). We use sum to make the key invariant to the order of the keys. uint64_t sum_in_coords_key = 0; CoordsKey *p_in_coords_key = py_in_coords_keys[0].cast(); for (auto &py_in_coords_key : py_in_coords_keys) { p_in_coords_key = py_in_coords_key.cast(); const uint64_t in_coords_key = p_in_coords_key->getKey(); ASSERT(existsCoordsKey(in_coords_key), "The coord map doesn't exist for the given coords_key: ", to_string(in_coords_key), "."); sum_in_coords_key += in_coords_key; } ASSERT(p_out_coords_key->key_set, "Key is not set. out_coords_key: ", to_string(p_out_coords_key->getKey())); const uint64_t out_coords_key = p_out_coords_key->getKey(); const vector zero_vec(p_in_coords_key->getDimension(), 0); const uint64_t zero_hash = hash_vec(zero_vec); InOutMapKey map_key = {sum_in_coords_key, out_coords_key, zero_hash, zero_hash, zero_hash, 0, false, true}; return map_key; } */ /** * Entry function for coords map generation and the associated kernel maps. */ /* template const InOutMapsRefPair CoordsManager::getPruningInOutMaps(at::Tensor use_feat, py::object py_in_coords_key, py::object py_out_coords_key) { CoordsKey *p_in_coords_key = py_in_coords_key.cast(); CoordsKey *p_out_coords_key = py_out_coords_key.cast(); // Create output coordinates if it doesn't exist if (!p_out_coords_key->key_set) { // The following function setup py_out_coords_key createPrunedCoords(use_feat, py_in_coords_key, py_out_coords_key); } const uint64_t in_coords_key = p_in_coords_key->getKey(); const uint64_t out_coords_key = p_out_coords_key->getKey(); // Use the map key for origin hash map (stride, dilation, kernel are all // NULL) const InOutMapKey map_key = getOriginMapHashKey(py_in_coords_key, py_out_coords_key); // For non transpose case // make a kernel mapping. The kernel will be saved with the map_key. if (in_maps.find(map_key) == in_maps.end()) { const auto in_out = coords_maps[in_coords_key].pruned_kernel_map( coords_maps[out_coords_key]); in_maps[map_key] = in_out.first; out_maps[map_key] = in_out.second; } return make_pair(ref(in_maps[map_key]), ref(out_maps[map_key])); } template const InOutMapsRefPair CoordsManager::getUnionInOutMaps(vector py_in_coords_keys, py::object py_out_coords_key) { CoordsKey *p_out_coords_key = py_out_coords_key.cast(); // Create output coordinates if it doesn't exist if (!p_out_coords_key->key_set) createUnionCoords(py_in_coords_keys, py_out_coords_key); const uint64_t out_coords_key = p_out_coords_key->getKey(); // Map key for origin hash map const InOutMapKey map_key = getUnionMapHashKey(py_in_coords_keys, py_out_coords_key); vector>> in_coords_maps; for (const auto &py_in_coords_key : py_in_coords_keys) { const CoordsKey *p_in_coords_key = py_in_coords_key.cast(); uint64_t in_coords_key = p_in_coords_key->getKey(); in_coords_maps.push_back(ref(coords_maps[in_coords_key])); } // For non transpose case // make a kernel mapping. The kernel will be saved with the map_key. if (in_maps.find(map_key) == in_maps.end()) { const auto in_out = CoordsMap::union_map( in_coords_maps, coords_maps[out_coords_key]); in_maps[map_key] = in_out.first; out_maps[map_key] = in_out.second; } return make_pair(ref(in_maps[map_key]), ref(out_maps[map_key])); } */ template class TemplatedAllocator, template class A> class CoordinateMapType> coordinate_map_key_type CoordinateMapManager:: merge(std::vector const &map_keys) { ASSERT(map_keys.size() > 1, "Got one or zero map. Merge at least 2 maps."); // Aggregate all coords maps std::vector> maps; auto const tensor_stride_size = map_keys[0].first.size(); stride_type merged_map_tensor_stride{map_keys[0].first}; for (const auto &key : map_keys) { ASSERT(exists(key), ERROR_MAP_NOT_FOUND); auto &map = m_coordinate_maps.find(key)->second; maps.push_back(map); for (int k = 0; k < tensor_stride_size; ++k) { merged_map_tensor_stride[k] = std::min(merged_map_tensor_stride[k], map.get_tensor_stride()[k]); } } // Create a merged map with the smallest tensor stride coordinate_map_key_type merged_map_key = get_random_string_id(merged_map_tensor_stride, "merge"); map_type const &map = m_coordinate_maps.find(map_keys[0])->second; map_type merged_map = map.merge(maps); insert(merged_map_key, merged_map); return merged_map_key; } template class TemplatedAllocator, template class A> class CoordinateMapType> std::pair> CoordinateMapManager:: union_map(std::vector const &map_keys) { // Create a merged map auto const merged_key = merge(map_keys); map_type const &merged_map = m_coordinate_maps.find(merged_key)->second; std::vector> maps; for (const auto &key : map_keys) { ASSERT(exists(key), ERROR_MAP_NOT_FOUND); maps.push_back(std::ref(m_coordinate_maps.find(key)->second)); } return std::make_pair(merged_key, merged_map.union_map(maps)); } template class TemplatedAllocator, template class A> class CoordinateMapType> std::vector CoordinateMapManager:: union_map_th(std::vector const &p_map_keys, CoordinateMapKey *p_out_key) { ASSERT(!p_out_key->is_key_set(), "Out coordinate map key should be uninitialized"); std::vector map_keys; map_keys.reserve(p_map_keys.size()); std::for_each( p_map_keys.begin(), p_map_keys.end(), [&](CoordinateMapKey *p_key) { map_keys.push_back(p_key->get_key()); }); auto union_pair = union_map(map_keys); p_out_key->set_key(union_pair.first); return union_pair.second; } /* Helper functions */ template class TemplatedAllocator, template class A> class CoordinateMapType> at::Tensor CoordinateMapManager::get_coordinates(CoordinateMapKey const *p_key) const { ASSERT(exists(p_key), ERROR_MAP_NOT_FOUND); auto const it = m_coordinate_maps.find(p_key->get_key()); ASSERT(it != m_coordinate_maps.end(), ERROR_MAP_NOT_FOUND); auto const &map = it->second; auto const nrows = map.size(); auto const ncols = map.coordinate_size(); LOG_DEBUG("coordinate map nrows:", nrows, "ncols:", ncols); // CPU torch.IntTensor auto options = torch::TensorOptions().dtype(torch::kInt).requires_grad(false); if (!detail::is_cpu_coordinate_map::value) { #ifndef CPU_ONLY auto device_id = at::cuda::current_device(); options = options.device(torch::kCUDA, device_id); #else ASSERT(false, ERROR_CPU_ONLY); #endif } at::Tensor coordinates = torch::empty({(int64_t)nrows, (int64_t)ncols}, options); LOG_DEBUG("Initialized coordinates"); // copy to the out coords map.copy_coordinates(coordinates.template data_ptr()); LOG_DEBUG("Copied coordinates"); return coordinates; } namespace detail { template struct kernel_map_to_tensors { std::unordered_map operator()(cpu_kernel_map const &kernel_map) { const auto &in_maps = kernel_map.first; const auto &out_maps = kernel_map.second; auto options = torch::TensorOptions().dtype(torch::kInt).requires_grad(false); std::unordered_map th_kernel_maps; for (auto k = 0; k < in_maps.size(); ++k) { const auto &in_map = in_maps[k]; const auto &out_map = out_maps[k]; const int64_t N = in_map.size(); if (N > 0) { at::Tensor kernel_map = torch::empty({2, N}, options); int32_t *p_kernel_map = kernel_map.data_ptr(); std::copy_n(&in_map[0], N, p_kernel_map); std::copy_n(&out_map[0], N, p_kernel_map + N); th_kernel_maps[k] = std::move(kernel_map); } } return th_kernel_maps; } }; } // namespace detail template class TemplatedAllocator, template class A> class CoordinateMapType> std::unordered_map CoordinateMapManager< coordinate_type, coordinate_field_type, TemplatedAllocator, CoordinateMapType>::kernel_map_th(CoordinateMapKey const *p_in_map_key, CoordinateMapKey const *p_out_map_key, stride_type const &kernel_size, // stride_type const &kernel_stride, stride_type const &kernel_dilation, RegionType::Type const region_type, at::Tensor const &offset, bool is_transpose, bool is_pool) { auto const &curr_kernel_map = kernel_map(p_in_map_key, p_out_map_key, // maps kernel_size, kernel_stride, kernel_dilation, // kernels region_type, offset, is_transpose, is_pool); return detail::kernel_map_to_tensors()( curr_kernel_map); } template class TemplatedAllocator, template class A> class CoordinateMapType> at::Tensor CoordinateMapManager:: get_coordinate_field(CoordinateMapKey const *p_key) const { auto const it = m_field_coordinates.find(p_key->get_key()); ASSERT(it != m_field_coordinates.end(), ERROR_MAP_NOT_FOUND); auto const &map = it->second; auto const nrows = map.size(); auto const ncols = map.coordinate_size(); auto options = torch::TensorOptions() .dtype(std::is_same::value ? torch::kFloat : torch::kDouble) .requires_grad(false); if (!detail::is_cpu_coordinate_map::value) { #ifndef CPU_ONLY auto device_id = at::cuda::current_device(); options = options.device(torch::kCUDA, device_id); #else ASSERT(false, ERROR_CPU_ONLY); #endif } at::Tensor coordinates = torch::empty({(int64_t)nrows, (int64_t)ncols}, options); // copy to the out coords map.copy_coordinates(coordinates.template data_ptr()); return coordinates; } template class CoordinateMapManager; } // end namespace minkowski