depthsplat / MinkowskiEngine /src /direct_max_pool.cpp

Upload folder using huggingface_hub

a6dd040 verified 5 months ago

8.85 kB

	/*
	* Copyright (c) 2020 NVIDIA Corporation.
	* Copyright (c) 2018-2020 Chris Choy (chrischoy@ai.stanford.edu).
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to deal
	* in the Software without restriction, including without limitation the rights
	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	* copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*
	* Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural
	* Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part
	* of the code.
	*/
	#include "dispatcher.hpp"
	#include "types.hpp"

	#include <torch/extension.h>
	#include <torch/script.h>

	#ifndef CPU_ONLY
	#include <ATen/cuda/CUDAUtils.h>
	#endif

	namespace minkowski {

	template <typename Dtype, typename MaskItype, typename MapItype>
	void max_pooling_forward_pointer_kernel_cpu(Dtype const *p_in_feat,
	Dtype *p_out_feat,
	MaskItype *p_mask_index,
	size_t const nchannel,
	MapItype const *const p_in_maps, //
	MapItype const *const p_out_maps, //
	size_t const map_size);

	template <typename Dtype, typename MaskItype>
	void MaxPoolingBackwardKernelCPU(Dtype *p_grad_in_feat, size_t const in_nrows,
	Dtype const *p_grad_out_feat,
	size_t const out_nrows,
	MaskItype const *p_mask_index,
	size_t const nchannel);

	#ifndef CPU_ONLY
	template <typename Dtype, typename MaskItype, typename MapItype>
	void max_pool_forward_pointer_kernel_gpu(
	MapItype *d_in_map, // this will be sorted
	MapItype *d_out_map, // this will be sorted
	size_t const nmap, // map size
	Dtype const *d_in_feat, //
	Dtype *d_out_feat, //
	size_t const out_nrows, //
	size_t const nchannel, //
	MaskItype *d_max_index, //
	bool const is_sorted //
	);

	template <typename Dtype, typename MaskItype>
	void MaxPoolingBackwardKernelGPU(Dtype *d_grad_in_feat, size_t const in_nrows,
	const Dtype *d_grad_out_feat,
	size_t const out_nrows,
	const MaskItype *d_max_index,
	size_t const nchannel);
	#endif

	std::pair<torch::Tensor, torch::Tensor>
	max_pool_fw(torch::Tensor const &in_map, //
	torch::Tensor const &out_map, //
	torch::Tensor const &in_feat, //
	int const out_nrows, bool const is_sorted) {
	// Out feat
	at::Tensor out_feat =
	at::zeros({out_nrows, in_feat.size(1)}, in_feat.options());
	at::Tensor max_index = torch::zeros({out_nrows, in_feat.size(1)},
	in_map.options().requires_grad(false));

	if (in_feat.device().is_cuda()) {
	#ifdef CPU_ONLY
	AT_ERROR("Please compile again with CUDA support");
	#else
	ASSERT(in_map.is_cuda(), "in_map must be a CUDA tensor.");
	ASSERT(out_map.is_cuda(), "kernel must be a CUDA tensor.");
	ASSERT(at::cuda::check_device({in_map, out_map, in_feat}),
	"all inputs must be on the same device");

	MINK_DISPATCH_INTEGER_TYPES(
	in_map.scalar_type(), integer_t, "max_pool_forward_gpu", [&] {
	LOG_DEBUG("Integer size", sizeof(integer_t));
	AT_DISPATCH_FLOATING_TYPES(
	in_feat.scalar_type(), "max_pool_forward_gpu", [&] {
	max_pool_forward_pointer_kernel_gpu<scalar_t, integer_t,
	integer_t>(
	in_map.data_ptr<integer_t>(), out_map.data_ptr<integer_t>(),
	in_map.numel(), in_feat.data_ptr<scalar_t>(),
	out_feat.data_ptr<scalar_t>(), out_nrows, in_feat.size(1),
	max_index.data_ptr<integer_t>(), is_sorted);
	});
	});
	#endif
	} else {
	MINK_DISPATCH_INTEGER_TYPES(
	in_map.scalar_type(), integer_t, "max_pool_forward_cpu", [&] {
	LOG_DEBUG("Integer size", sizeof(integer_t));
	AT_DISPATCH_FLOATING_TYPES(
	in_feat.scalar_type(), "max_pool_forward_cpu", [&] {
	// Dtype, MaskItype, MapType
	max_pooling_forward_pointer_kernel_cpu<scalar_t, integer_t,
	integer_t>(
	in_feat.data_ptr<scalar_t>(), out_feat.data_ptr<scalar_t>(),
	max_index.data_ptr<integer_t>(), in_feat.size(1),
	in_map.data_ptr<integer_t>(), out_map.data_ptr<integer_t>(),
	in_map.numel());
	});
	});
	}
	return {out_feat, max_index};
	}

	torch::Tensor max_pool_bw(torch::Tensor const &grad_out_feat, //
	torch::Tensor const &mask_index, //
	int const in_nrows) {
	int const out_nrows = grad_out_feat.size(0);
	at::Tensor grad_in_feat =
	at::zeros({in_nrows, grad_out_feat.size(1)}, grad_out_feat.options());

	if (grad_out_feat.device().is_cuda()) {
	#ifdef CPU_ONLY
	AT_ERROR("Please compile again with CUDA support");
	#else
	ASSERT(mask_index.is_cuda(), "kernel must be a CUDA tensor.");
	ASSERT(at::cuda::check_device({mask_index, grad_out_feat}),
	"all inputs must be on the same device");
	MINK_DISPATCH_INTEGER_TYPES(
	mask_index.scalar_type(), integer_t, "max_pool_backward_gpu", [&] {
	AT_DISPATCH_FLOATING_TYPES(
	grad_out_feat.scalar_type(), "max_pool_backward_gpu", [&] {
	MaxPoolingBackwardKernelGPU<scalar_t, integer_t>(
	grad_in_feat.data_ptr<scalar_t>(), in_nrows,
	grad_out_feat.data_ptr<scalar_t>(), out_nrows,
	mask_index.data_ptr<integer_t>(), grad_out_feat.size(1));
	});
	});
	#endif
	} else {
	MINK_DISPATCH_INTEGER_TYPES(
	mask_index.scalar_type(), integer_t, "max_pool_backward_cpu", [&] {
	AT_DISPATCH_FLOATING_TYPES(
	grad_out_feat.scalar_type(), "max_pool_backward_cpu", [&] {
	MaxPoolingBackwardKernelCPU<scalar_t, integer_t>(
	grad_in_feat.data_ptr<scalar_t>(), in_nrows, //
	grad_out_feat.data_ptr<scalar_t>(), out_nrows, //
	mask_index.data_ptr<integer_t>(), grad_out_feat.size(1));
	});
	});
	}

	return grad_in_feat;
	}

	/*
	using torch::autograd::AutogradContext;
	using torch::autograd::Variable;
	using torch::autograd::variable_list;

	class DirectMaxPool : public torch::autograd::Function<DirectMaxPool> {
	public:
	static variable_list forward(AutogradContext *ctx, Variable in_map,
	Variable out_map, Variable in_feat,
	int64_t num_out, bool is_sorted) {
	auto out_pair = max_pool_fw(in_map, out_map, in_feat, num_out, is_sorted);
	ctx->saved_data["in_nrows"] = in_feat.size(0);
	ctx->save_for_backward({std::get<1>(out_pair)});
	return {std::get<0>(out_pair)};
	}

	static variable_list backward(AutogradContext *ctx, variable_list grad_outs) {
	auto saved = ctx->get_saved_variables();
	auto mask_index = saved[0];
	int const in_nrows = ctx->saved_data["in_nrows"].toInt();

	auto grad = max_pool_bw(grad_outs[0], mask_index, in_nrows);
	return {Variable(), Variable(), grad, Variable(), Variable()};
	}
	};

	torch::Tensor direct_max_pool(torch::Tensor &in_map, torch::Tensor &out_map,
	torch::Tensor &in_feat, int64_t num_out,
	bool is_sorted) {
	return DirectMaxPool::apply(in_map, out_map, in_feat, num_out, is_sorted)[0];
	}

	static auto registry = torch::RegisterOperators().op(
	"MinkowskiEngineBackend::direct_max_pool", &minkowski::direct_max_pool);
	*/

	} // namespace minkowski