|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef CPU_CONVOLUTION |
|
|
#define CPU_CONVOLUTION |
|
|
|
|
|
#include "math_functions.hpp" |
|
|
#include "types.hpp" |
|
|
|
|
|
namespace minkowski { |
|
|
|
|
|
template <typename Dtype, typename Itype> |
|
|
void ConvolutionForwardKernelCPU(const Dtype *p_in_feat, int in_nchannel, |
|
|
Dtype *p_out_feat, int out_nchannel, |
|
|
const Dtype *p_kernel, |
|
|
const cpu_in_maps &in_maps, |
|
|
const cpu_out_maps &out_maps) { |
|
|
int kernel_volume, n_active_in_volume, row; |
|
|
std::vector<Dtype> input_buffer, output_buffer; |
|
|
|
|
|
|
|
|
kernel_volume = in_maps.size(); |
|
|
|
|
|
|
|
|
|
|
|
for (int k = 0; k < kernel_volume; k++) { |
|
|
n_active_in_volume = in_maps[k].size(); |
|
|
if (n_active_in_volume == 0) |
|
|
continue; |
|
|
|
|
|
input_buffer.resize(n_active_in_volume * in_nchannel); |
|
|
output_buffer.resize(n_active_in_volume * out_nchannel); |
|
|
|
|
|
|
|
|
for (row = 0; row < n_active_in_volume; row++) |
|
|
std::memcpy(&input_buffer[row * in_nchannel], |
|
|
p_in_feat + in_maps[k][row] * in_nchannel, |
|
|
sizeof(Dtype) * in_nchannel); |
|
|
|
|
|
|
|
|
cpu_gemm<Dtype>(CblasColMajor, CblasNoTrans, CblasNoTrans, |
|
|
out_nchannel, |
|
|
n_active_in_volume, |
|
|
in_nchannel, |
|
|
1, |
|
|
&p_kernel[k * in_nchannel * out_nchannel], |
|
|
&input_buffer[0], |
|
|
0, |
|
|
&output_buffer[0]); |
|
|
|
|
|
|
|
|
for (row = 0; row < n_active_in_volume; row++) { |
|
|
Dtype *dst = &p_out_feat[out_maps[k][row] * out_nchannel]; |
|
|
Dtype *src = &output_buffer[row * out_nchannel]; |
|
|
cpu_add<Dtype>(out_nchannel, src, dst, dst); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
template <typename Dtype, typename Itype> |
|
|
void ConvolutionBackwardKernelCPU(const Dtype *p_in_feat, Dtype *p_grad_in_feat, |
|
|
int in_nchannel, const Dtype *p_grad_out_feat, |
|
|
int out_nchannel, const Dtype *p_kernel, |
|
|
Dtype *p_grad_kernel, |
|
|
const cpu_in_maps &in_maps, |
|
|
const cpu_out_maps &out_maps) { |
|
|
int kernel_volume, n_active_in_volume, row; |
|
|
std::vector<Dtype> input_buffer, output_buffer; |
|
|
|
|
|
|
|
|
kernel_volume = in_maps.size(); |
|
|
|
|
|
|
|
|
for (int k = 0; k < kernel_volume; k++) { |
|
|
n_active_in_volume = in_maps[k].size(); |
|
|
if (n_active_in_volume == 0) |
|
|
continue; |
|
|
|
|
|
input_buffer.resize(n_active_in_volume * in_nchannel); |
|
|
output_buffer.resize(n_active_in_volume * out_nchannel); |
|
|
|
|
|
|
|
|
for (row = 0; row < n_active_in_volume; row++) |
|
|
std::memcpy(&output_buffer[row * out_nchannel], |
|
|
&p_grad_out_feat[out_maps[k][row] * out_nchannel], |
|
|
sizeof(Dtype) * out_nchannel); |
|
|
|
|
|
cpu_gemm<Dtype>(CblasColMajor, CblasTrans, CblasNoTrans, |
|
|
in_nchannel, |
|
|
n_active_in_volume, |
|
|
out_nchannel, |
|
|
1, |
|
|
&p_kernel[k * in_nchannel * out_nchannel], |
|
|
&output_buffer[0], |
|
|
0, |
|
|
&input_buffer[0] |
|
|
); |
|
|
|
|
|
|
|
|
for (row = 0; row < n_active_in_volume; row++) { |
|
|
Dtype *src = &input_buffer[row * in_nchannel]; |
|
|
Dtype *dst = &p_grad_in_feat[in_maps[k][row] * in_nchannel]; |
|
|
cpu_add<Dtype>(in_nchannel, src, dst, dst); |
|
|
} |
|
|
|
|
|
|
|
|
for (row = 0; row < n_active_in_volume; row++) |
|
|
std::memcpy(&input_buffer[row * in_nchannel], |
|
|
p_in_feat + in_maps[k][row] * in_nchannel, |
|
|
sizeof(Dtype) * in_nchannel); |
|
|
|
|
|
cpu_gemm<Dtype>(CblasColMajor, CblasNoTrans, CblasTrans, |
|
|
out_nchannel, |
|
|
in_nchannel, |
|
|
n_active_in_volume, |
|
|
1, |
|
|
&output_buffer[0], |
|
|
&input_buffer[0], |
|
|
1, |
|
|
&p_grad_kernel[k * in_nchannel * out_nchannel] |
|
|
); |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
|