|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "dispatcher.hpp" |
|
|
#include "types.hpp" |
|
|
|
|
|
#include <torch/extension.h> |
|
|
#include <torch/script.h> |
|
|
|
|
|
#ifndef CPU_ONLY |
|
|
#include <ATen/cuda/CUDAUtils.h> |
|
|
#endif |
|
|
|
|
|
namespace minkowski { |
|
|
|
|
|
template <typename Dtype, typename MaskItype, typename MapItype> |
|
|
void max_pooling_forward_pointer_kernel_cpu(Dtype const *p_in_feat, |
|
|
Dtype *p_out_feat, |
|
|
MaskItype *p_mask_index, |
|
|
size_t const nchannel, |
|
|
MapItype const *const p_in_maps, |
|
|
MapItype const *const p_out_maps, |
|
|
size_t const map_size); |
|
|
|
|
|
template <typename Dtype, typename MaskItype> |
|
|
void MaxPoolingBackwardKernelCPU(Dtype *p_grad_in_feat, size_t const in_nrows, |
|
|
Dtype const *p_grad_out_feat, |
|
|
size_t const out_nrows, |
|
|
MaskItype const *p_mask_index, |
|
|
size_t const nchannel); |
|
|
|
|
|
#ifndef CPU_ONLY |
|
|
template <typename Dtype, typename MaskItype, typename MapItype> |
|
|
void max_pool_forward_pointer_kernel_gpu( |
|
|
MapItype *d_in_map, |
|
|
MapItype *d_out_map, |
|
|
size_t const nmap, |
|
|
Dtype const *d_in_feat, |
|
|
Dtype *d_out_feat, |
|
|
size_t const out_nrows, |
|
|
size_t const nchannel, |
|
|
MaskItype *d_max_index, |
|
|
bool const is_sorted |
|
|
); |
|
|
|
|
|
template <typename Dtype, typename MaskItype> |
|
|
void MaxPoolingBackwardKernelGPU(Dtype *d_grad_in_feat, size_t const in_nrows, |
|
|
const Dtype *d_grad_out_feat, |
|
|
size_t const out_nrows, |
|
|
const MaskItype *d_max_index, |
|
|
size_t const nchannel); |
|
|
#endif |
|
|
|
|
|
std::pair<torch::Tensor, torch::Tensor> |
|
|
max_pool_fw(torch::Tensor const &in_map, |
|
|
torch::Tensor const &out_map, |
|
|
torch::Tensor const &in_feat, |
|
|
int const out_nrows, bool const is_sorted) { |
|
|
|
|
|
at::Tensor out_feat = |
|
|
at::zeros({out_nrows, in_feat.size(1)}, in_feat.options()); |
|
|
at::Tensor max_index = torch::zeros({out_nrows, in_feat.size(1)}, |
|
|
in_map.options().requires_grad(false)); |
|
|
|
|
|
if (in_feat.device().is_cuda()) { |
|
|
#ifdef CPU_ONLY |
|
|
AT_ERROR("Please compile again with CUDA support"); |
|
|
#else |
|
|
ASSERT(in_map.is_cuda(), "in_map must be a CUDA tensor."); |
|
|
ASSERT(out_map.is_cuda(), "kernel must be a CUDA tensor."); |
|
|
ASSERT(at::cuda::check_device({in_map, out_map, in_feat}), |
|
|
"all inputs must be on the same device"); |
|
|
|
|
|
MINK_DISPATCH_INTEGER_TYPES( |
|
|
in_map.scalar_type(), integer_t, "max_pool_forward_gpu", [&] { |
|
|
LOG_DEBUG("Integer size", sizeof(integer_t)); |
|
|
AT_DISPATCH_FLOATING_TYPES( |
|
|
in_feat.scalar_type(), "max_pool_forward_gpu", [&] { |
|
|
max_pool_forward_pointer_kernel_gpu<scalar_t, integer_t, |
|
|
integer_t>( |
|
|
in_map.data_ptr<integer_t>(), out_map.data_ptr<integer_t>(), |
|
|
in_map.numel(), in_feat.data_ptr<scalar_t>(), |
|
|
out_feat.data_ptr<scalar_t>(), out_nrows, in_feat.size(1), |
|
|
max_index.data_ptr<integer_t>(), is_sorted); |
|
|
}); |
|
|
}); |
|
|
#endif |
|
|
} else { |
|
|
MINK_DISPATCH_INTEGER_TYPES( |
|
|
in_map.scalar_type(), integer_t, "max_pool_forward_cpu", [&] { |
|
|
LOG_DEBUG("Integer size", sizeof(integer_t)); |
|
|
AT_DISPATCH_FLOATING_TYPES( |
|
|
in_feat.scalar_type(), "max_pool_forward_cpu", [&] { |
|
|
|
|
|
max_pooling_forward_pointer_kernel_cpu<scalar_t, integer_t, |
|
|
integer_t>( |
|
|
in_feat.data_ptr<scalar_t>(), out_feat.data_ptr<scalar_t>(), |
|
|
max_index.data_ptr<integer_t>(), in_feat.size(1), |
|
|
in_map.data_ptr<integer_t>(), out_map.data_ptr<integer_t>(), |
|
|
in_map.numel()); |
|
|
}); |
|
|
}); |
|
|
} |
|
|
return {out_feat, max_index}; |
|
|
} |
|
|
|
|
|
torch::Tensor max_pool_bw(torch::Tensor const &grad_out_feat, |
|
|
torch::Tensor const &mask_index, |
|
|
int const in_nrows) { |
|
|
int const out_nrows = grad_out_feat.size(0); |
|
|
at::Tensor grad_in_feat = |
|
|
at::zeros({in_nrows, grad_out_feat.size(1)}, grad_out_feat.options()); |
|
|
|
|
|
if (grad_out_feat.device().is_cuda()) { |
|
|
#ifdef CPU_ONLY |
|
|
AT_ERROR("Please compile again with CUDA support"); |
|
|
#else |
|
|
ASSERT(mask_index.is_cuda(), "kernel must be a CUDA tensor."); |
|
|
ASSERT(at::cuda::check_device({mask_index, grad_out_feat}), |
|
|
"all inputs must be on the same device"); |
|
|
MINK_DISPATCH_INTEGER_TYPES( |
|
|
mask_index.scalar_type(), integer_t, "max_pool_backward_gpu", [&] { |
|
|
AT_DISPATCH_FLOATING_TYPES( |
|
|
grad_out_feat.scalar_type(), "max_pool_backward_gpu", [&] { |
|
|
MaxPoolingBackwardKernelGPU<scalar_t, integer_t>( |
|
|
grad_in_feat.data_ptr<scalar_t>(), in_nrows, |
|
|
grad_out_feat.data_ptr<scalar_t>(), out_nrows, |
|
|
mask_index.data_ptr<integer_t>(), grad_out_feat.size(1)); |
|
|
}); |
|
|
}); |
|
|
#endif |
|
|
} else { |
|
|
MINK_DISPATCH_INTEGER_TYPES( |
|
|
mask_index.scalar_type(), integer_t, "max_pool_backward_cpu", [&] { |
|
|
AT_DISPATCH_FLOATING_TYPES( |
|
|
grad_out_feat.scalar_type(), "max_pool_backward_cpu", [&] { |
|
|
MaxPoolingBackwardKernelCPU<scalar_t, integer_t>( |
|
|
grad_in_feat.data_ptr<scalar_t>(), in_nrows, |
|
|
grad_out_feat.data_ptr<scalar_t>(), out_nrows, |
|
|
mask_index.data_ptr<integer_t>(), grad_out_feat.size(1)); |
|
|
}); |
|
|
}); |
|
|
} |
|
|
|
|
|
return grad_in_feat; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|