|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef CPU_BROADCAST |
|
|
#define CPU_BROADCAST |
|
|
|
|
|
#include "math_functions.hpp" |
|
|
#include "types.hpp" |
|
|
#include "utils.hpp" |
|
|
|
|
|
namespace minkowski { |
|
|
|
|
|
template <typename Dtype, typename Itype> |
|
|
void BroadcastForwardKernelCPU(const Dtype *p_in_feat, uint32_t in_nrows, |
|
|
const Dtype *p_in_feat_global, |
|
|
uint32_t in_nrows_global, Dtype *p_out_feat, |
|
|
uint32_t nchannel, BroadcastMode::Type const op, |
|
|
const cpu_in_maps &in_maps, |
|
|
const cpu_out_maps &glob_maps) { |
|
|
Dtype *p_curr_out_feat; |
|
|
const Dtype *p_curr_in_feat_global; |
|
|
|
|
|
|
|
|
uint32_t num_map = 0; |
|
|
for (const auto &in_map : in_maps) |
|
|
num_map += in_map.size(); |
|
|
ASSERT(num_map == in_nrows, "The number of in-out map,", num_map, |
|
|
" mismatches the number of features,", in_nrows); |
|
|
|
|
|
|
|
|
std::memcpy(p_out_feat, p_in_feat, sizeof(Dtype) * in_nrows * nchannel); |
|
|
|
|
|
|
|
|
switch (op) { |
|
|
case BroadcastMode::ELEMENTWISE_ADDITON: |
|
|
for (uint32_t k = 0; k < in_maps.size(); ++k) { |
|
|
for (uint32_t row = 0; row < in_maps[k].size(); ++row) { |
|
|
p_curr_out_feat = p_out_feat + in_maps[k][row] * nchannel; |
|
|
p_curr_in_feat_global = p_in_feat_global + glob_maps[k][row] * nchannel; |
|
|
cpu_add<Dtype>(nchannel, p_curr_in_feat_global, p_curr_out_feat, |
|
|
p_curr_out_feat); |
|
|
} |
|
|
} |
|
|
break; |
|
|
case BroadcastMode::ELEMENTWISE_MULTIPLICATION: |
|
|
for (uint32_t k = 0; k < in_maps.size(); ++k) { |
|
|
for (uint32_t row = 0; row < in_maps[k].size(); ++row) { |
|
|
p_curr_out_feat = p_out_feat + in_maps[k][row] * nchannel; |
|
|
p_curr_in_feat_global = p_in_feat_global + glob_maps[k][row] * nchannel; |
|
|
cpu_mul<Dtype>(nchannel, p_curr_in_feat_global, p_curr_out_feat, |
|
|
p_curr_out_feat); |
|
|
} |
|
|
} |
|
|
break; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
default: |
|
|
throw std::invalid_argument(Formatter() << "Operation not supported: " |
|
|
<< std::to_string(op)); |
|
|
} |
|
|
} |
|
|
|
|
|
template <typename Dtype, typename Itype> |
|
|
void BroadcastBackwardKernelCPU(const Dtype *p_in_feat, |
|
|
Dtype *p_grad_in_feat, uint32_t in_nrows, |
|
|
const Dtype *p_in_feat_global, |
|
|
Dtype *p_grad_in_feat_global, |
|
|
uint32_t in_nrows_global, |
|
|
const Dtype *p_grad_out_feat, |
|
|
uint32_t nchannel, |
|
|
BroadcastMode::Type const op, |
|
|
const cpu_in_maps &in_maps, |
|
|
const cpu_out_maps &glob_maps) { |
|
|
Dtype *p_curr_grad_in_feat, *p_curr_grad_in_feat_global; |
|
|
const Dtype *p_curr_in_feat_global, *p_curr_in_feat, *p_curr_grad_out_feat; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::memcpy(p_grad_in_feat, p_grad_out_feat, |
|
|
sizeof(Dtype) * in_nrows * nchannel); |
|
|
|
|
|
|
|
|
switch (op) { |
|
|
case BroadcastMode::ELEMENTWISE_ADDITON: |
|
|
|
|
|
for (uint32_t k = 0; k < in_maps.size(); ++k) { |
|
|
for (uint32_t row = 0; row < in_maps[k].size(); ++row) { |
|
|
p_curr_grad_out_feat = p_grad_out_feat + in_maps[k][row] * nchannel; |
|
|
p_curr_grad_in_feat_global = |
|
|
p_grad_in_feat_global + glob_maps[k][row] * nchannel; |
|
|
cpu_add<Dtype>(nchannel, p_curr_grad_out_feat, |
|
|
p_curr_grad_in_feat_global, p_curr_grad_in_feat_global); |
|
|
} |
|
|
} |
|
|
break; |
|
|
case BroadcastMode::ELEMENTWISE_MULTIPLICATION: |
|
|
for (uint32_t k = 0; k < in_maps.size(); ++k) { |
|
|
for (uint32_t row = 0; row < in_maps[k].size(); ++row) { |
|
|
|
|
|
p_curr_in_feat = p_in_feat + in_maps[k][row] * nchannel; |
|
|
p_curr_grad_in_feat = p_grad_in_feat + in_maps[k][row] * nchannel; |
|
|
p_curr_grad_in_feat_global = |
|
|
p_grad_in_feat_global + glob_maps[k][row] * nchannel; |
|
|
p_curr_grad_out_feat = p_grad_out_feat + in_maps[k][row] * nchannel; |
|
|
p_curr_in_feat_global = p_in_feat_global + glob_maps[k][row] * nchannel; |
|
|
|
|
|
|
|
|
cpu_mul<Dtype>(nchannel, p_curr_in_feat_global, p_curr_grad_out_feat, |
|
|
p_curr_grad_in_feat); |
|
|
|
|
|
for (uint32_t j = 0; j < nchannel; j++) { |
|
|
p_curr_grad_in_feat_global[j] += |
|
|
p_curr_grad_out_feat[j] * p_curr_in_feat[j]; |
|
|
} |
|
|
} |
|
|
} |
|
|
break; |
|
|
default: |
|
|
throw std::invalid_argument(Formatter() << "Operation not supported: " |
|
|
<< std::to_string(op)); |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
|