| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifndef ARM_ACTIVATION_H |
| | #define ARM_ACTIVATION_H |
| |
|
| | #include "fused_activation.h" |
| |
|
| | #if __ARM_NEON |
| | #include <arm_neon.h> |
| | #include "neon_mathfun.h" |
| |
|
| | static inline float32x4_t activation_ps(float32x4_t _v, int activation_type, const ncnn::Mat& activation_params) |
| | { |
| | if (activation_type == 1) |
| | { |
| | const float32x4_t _zero = vdupq_n_f32(0.f); |
| | _v = vmaxq_f32(_v, _zero); |
| | } |
| | else if (activation_type == 2) |
| | { |
| | const float32x4_t _zero = vdupq_n_f32(0.f); |
| | const float32x4_t _slope = vdupq_n_f32(activation_params[0]); |
| | const uint32x4_t _lemask = vcleq_f32(_v, _zero); |
| | float32x4_t _ps = vmulq_f32(_v, _slope); |
| | _v = vbslq_f32(_lemask, _ps, _v); |
| | } |
| | else if (activation_type == 3) |
| | { |
| | const float32x4_t _min = vdupq_n_f32(activation_params[0]); |
| | const float32x4_t _max = vdupq_n_f32(activation_params[1]); |
| | _v = vmaxq_f32(_v, _min); |
| | _v = vminq_f32(_v, _max); |
| | } |
| | else if (activation_type == 4) |
| | { |
| | _v = sigmoid_ps(_v); |
| | } |
| | else if (activation_type == 5) |
| | { |
| | _v = vmulq_f32(_v, tanh_ps(log_ps(vaddq_f32(exp_ps(_v), vdupq_n_f32(1.f))))); |
| | } |
| | else if (activation_type == 6) |
| | { |
| | const float alpha = activation_params[0]; |
| | const float beta = activation_params[1]; |
| | const float32x4_t _zero = vdupq_n_f32(0.f); |
| | const float32x4_t _one = vdupq_n_f32(1.f); |
| | float32x4_t _ans = vdupq_n_f32(beta); |
| | _ans = vmlaq_n_f32(_ans, _v, alpha); |
| | _ans = vmaxq_f32(_ans, _zero); |
| | _ans = vminq_f32(_ans, _one); |
| | _v = vmulq_f32(_ans, _v); |
| | } |
| |
|
| | return _v; |
| | } |
| |
|
| | #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC |
| | #include "neon_mathfun_fp16s.h" |
| |
|
| | static inline __fp16 activation_ss(__fp16 v, int activation_type, const ncnn::Mat& activation_params) |
| | { |
| | if (activation_type == 1) |
| | { |
| | v = std::max(v, (__fp16)0.f); |
| | } |
| | else if (activation_type == 2) |
| | { |
| | const __fp16 slope = (__fp16)(activation_params[0]); |
| | v = v > 0.f ? v : v * slope; |
| | } |
| | else if (activation_type == 3) |
| | { |
| | const __fp16 min = (__fp16)(activation_params[0]); |
| | const __fp16 max = (__fp16)(activation_params[1]); |
| | if (v < min) |
| | v = min; |
| | if (v > max) |
| | v = max; |
| | } |
| | else if (activation_type == 4) |
| | { |
| | v = (__fp16)1.f / ((__fp16)1.f + expf(-v)); |
| | } |
| | else if (activation_type == 5) |
| | { |
| | v = v * tanhf(logf(expf(v) + (__fp16)1.f)); |
| | } |
| | else if (activation_type == 6) |
| | { |
| | const __fp16 alpha = (__fp16)(activation_params[0]); |
| | const __fp16 beta = (__fp16)(activation_params[1]); |
| | const __fp16 lower = -beta / alpha; |
| | const __fp16 upper = ((__fp16)1.f / alpha) + lower; |
| | if (v < lower) |
| | v = (__fp16)0.f; |
| | else if (v > upper) |
| | ; |
| | else |
| | v = v * (v * alpha + beta); |
| | } |
| |
|
| | return v; |
| | } |
| |
|
| | static inline float16x4_t activation_ps(float16x4_t _v, int activation_type, const ncnn::Mat& activation_params) |
| | { |
| | if (activation_type == 1) |
| | { |
| | const float16x4_t _zero = vdup_n_f16(0.f); |
| | _v = vmax_f16(_v, _zero); |
| | } |
| | else if (activation_type == 2) |
| | { |
| | const float16x4_t _zero = vdup_n_f16(0.f); |
| | const float16x4_t _slope = vdup_n_f16((__fp16)activation_params[0]); |
| | const uint16x4_t _lemask = vcle_f16(_v, _zero); |
| | float16x4_t _ps = vmul_f16(_v, _slope); |
| | _v = vbsl_f16(_lemask, _ps, _v); |
| | } |
| | else if (activation_type == 3) |
| | { |
| | const float16x4_t _min = vdup_n_f16((__fp16)activation_params[0]); |
| | const float16x4_t _max = vdup_n_f16((__fp16)activation_params[1]); |
| | _v = vmax_f16(_v, _min); |
| | _v = vmin_f16(_v, _max); |
| | } |
| | else if (activation_type == 4) |
| | { |
| | _v = sigmoid_ps(_v); |
| | } |
| | else if (activation_type == 5) |
| | { |
| | _v = vmul_f16(_v, tanh_ps(log_ps(vadd_f16(exp_ps(_v), vdup_n_f16(1.f))))); |
| | } |
| | else if (activation_type == 6) |
| | { |
| | const __fp16 alpha = (__fp16)activation_params[0]; |
| | const __fp16 beta = (__fp16)activation_params[1]; |
| | const float16x4_t _zero = vdup_n_f16(0.f); |
| | const float16x4_t _one = vdup_n_f16(1.f); |
| | float16x4_t _ans = vdup_n_f16(beta); |
| | _ans = vfma_n_f16(_ans, _v, alpha); |
| | _ans = vmax_f16(_ans, _zero); |
| | _ans = vmin_f16(_ans, _one); |
| | _v = vmul_f16(_ans, _v); |
| | } |
| |
|
| | return _v; |
| | } |
| |
|
| | static inline float16x8_t activation_ps(float16x8_t _v, int activation_type, const ncnn::Mat& activation_params) |
| | { |
| | if (activation_type == 1) |
| | { |
| | const float16x8_t _zero = vdupq_n_f16(0.f); |
| | _v = vmaxq_f16(_v, _zero); |
| | } |
| | else if (activation_type == 2) |
| | { |
| | const float16x8_t _zero = vdupq_n_f16(0.f); |
| | const float16x8_t _slope = vdupq_n_f16((__fp16)activation_params[0]); |
| | const uint16x8_t _lemask = vcleq_f16(_v, _zero); |
| | float16x8_t _ps = vmulq_f16(_v, _slope); |
| | _v = vbslq_f16(_lemask, _ps, _v); |
| | } |
| | else if (activation_type == 3) |
| | { |
| | const float16x8_t _min = vdupq_n_f16((__fp16)activation_params[0]); |
| | const float16x8_t _max = vdupq_n_f16((__fp16)activation_params[1]); |
| | _v = vmaxq_f16(_v, _min); |
| | _v = vminq_f16(_v, _max); |
| | } |
| | else if (activation_type == 4) |
| | { |
| | _v = sigmoid_ps(_v); |
| | } |
| | else if (activation_type == 5) |
| | { |
| | _v = vmulq_f16(_v, tanh_ps(log_ps(vaddq_f16(exp_ps(_v), vdupq_n_f16(1.f))))); |
| | } |
| | else if (activation_type == 6) |
| | { |
| | const __fp16 alpha_fp16 = (__fp16)activation_params[0]; |
| | const __fp16 beta_fp16 = (__fp16)activation_params[1]; |
| | const float16x8_t _zero = vdupq_n_f16(0.f); |
| | const float16x8_t _one = vdupq_n_f16(1.f); |
| | float16x8_t _ans = vdupq_n_f16(beta_fp16); |
| | _ans = vfmaq_n_f16(_ans, _v, alpha_fp16); |
| | _ans = vmaxq_f16(_ans, _zero); |
| | _ans = vminq_f16(_ans, _one); |
| | _v = vmulq_f16(_ans, _v); |
| | } |
| | return _v; |
| | } |
| | #endif |
| | #endif |
| |
|
| | #endif |
| |
|