|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef _NONE_H_ |
|
|
#define _NONE_H_ |
|
|
|
|
|
#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math_types.h" |
|
|
|
|
|
#ifdef __cplusplus |
|
|
extern "C" |
|
|
{ |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) |
|
|
__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) |
|
|
{ |
|
|
if (data == 0U) { return 32U; } |
|
|
|
|
|
uint32_t count = 0U; |
|
|
uint32_t mask = 0x80000000U; |
|
|
|
|
|
while ((data & mask) == 0U) |
|
|
{ |
|
|
count += 1U; |
|
|
mask = mask >> 1U; |
|
|
} |
|
|
return count; |
|
|
} |
|
|
|
|
|
__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) |
|
|
{ |
|
|
if ((sat >= 1U) && (sat <= 32U)) |
|
|
{ |
|
|
const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); |
|
|
const int32_t min = -1 - max ; |
|
|
if (val > max) |
|
|
{ |
|
|
return max; |
|
|
} |
|
|
else if (val < min) |
|
|
{ |
|
|
return min; |
|
|
} |
|
|
} |
|
|
return val; |
|
|
} |
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) |
|
|
{ |
|
|
if (sat <= 31U) |
|
|
{ |
|
|
const uint32_t max = ((1U << sat) - 1U); |
|
|
if (val > (int32_t)max) |
|
|
{ |
|
|
return max; |
|
|
} |
|
|
else if (val < 0) |
|
|
{ |
|
|
return 0U; |
|
|
} |
|
|
} |
|
|
return (uint32_t)val; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) |
|
|
{ |
|
|
op2 %= 32U; |
|
|
if (op2 == 0U) |
|
|
{ |
|
|
return op1; |
|
|
} |
|
|
return (op1 >> op2) | (op1 << (32U - op2)); |
|
|
} |
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE q31_t clip_q63_to_q31( |
|
|
q63_t x) |
|
|
{ |
|
|
return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? |
|
|
((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE q15_t clip_q63_to_q15( |
|
|
q63_t x) |
|
|
{ |
|
|
return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? |
|
|
((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE q7_t clip_q31_to_q7( |
|
|
q31_t x) |
|
|
{ |
|
|
return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ? |
|
|
((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE q15_t clip_q31_to_q15( |
|
|
q31_t x) |
|
|
{ |
|
|
return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ? |
|
|
((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE q63_t mult32x64( |
|
|
q63_t x, |
|
|
q31_t y) |
|
|
{ |
|
|
return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) + |
|
|
(((q63_t) (x >> 32) * y) ) ); |
|
|
} |
|
|
|
|
|
|
|
|
#define multAcc_32x32_keep32_R(a, x, y) \ |
|
|
a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) |
|
|
|
|
|
|
|
|
#define multSub_32x32_keep32_R(a, x, y) \ |
|
|
a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) |
|
|
|
|
|
|
|
|
#define mult_32x32_keep32_R(a, x, y) \ |
|
|
a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) |
|
|
|
|
|
|
|
|
#define multAcc_32x32_keep32(a, x, y) \ |
|
|
a += (q31_t) (((q63_t) x * y) >> 32) |
|
|
|
|
|
|
|
|
#define multSub_32x32_keep32(a, x, y) \ |
|
|
a -= (q31_t) (((q63_t) x * y) >> 32) |
|
|
|
|
|
|
|
|
#define mult_32x32_keep32(a, x, y) \ |
|
|
a = (q31_t) (((q63_t) x * y ) >> 32) |
|
|
|
|
|
|
|
|
#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) |
|
|
|
|
|
|
|
|
|
|
|
#define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ |
|
|
(((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) |
|
|
#define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ |
|
|
(((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SADD16( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16))) & (int32_t)0x0000FFFF; |
|
|
s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16))) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef ARM_MATH_BIG_ENDIAN |
|
|
#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ |
|
|
(((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ |
|
|
(((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ |
|
|
(((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) |
|
|
#else |
|
|
#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \ |
|
|
(((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \ |
|
|
(((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \ |
|
|
(((int32_t)(v0) << 24) & (int32_t)0xFF000000) ) |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined (ARM_MATH_DSP) || defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __QADD8( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s, t, u; |
|
|
|
|
|
r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
|
|
|
return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __QSUB8( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s, t, u; |
|
|
|
|
|
r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; |
|
|
|
|
|
return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __QADD16( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
|
|
|
q31_t r = 0, s = 0; |
|
|
|
|
|
r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SHADD16( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __QSUB16( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SHSUB16( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __QASX( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SHASX( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __QSAX( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SHSAX( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
q31_t r, s; |
|
|
|
|
|
r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; |
|
|
|
|
|
return ((uint32_t)((s << 16) | (r ))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMUSDX( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMUADX( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE int32_t __QADD( |
|
|
int32_t x, |
|
|
int32_t y) |
|
|
{ |
|
|
return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE int32_t __QSUB( |
|
|
int32_t x, |
|
|
int32_t y) |
|
|
{ |
|
|
return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y))); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMLAD( |
|
|
uint32_t x, |
|
|
uint32_t y, |
|
|
uint32_t sum) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + |
|
|
( ((q31_t)sum ) ) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMLADX( |
|
|
uint32_t x, |
|
|
uint32_t y, |
|
|
uint32_t sum) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + |
|
|
( ((q31_t)sum ) ) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMLSDX( |
|
|
uint32_t x, |
|
|
uint32_t y, |
|
|
uint32_t sum) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + |
|
|
( ((q31_t)sum ) ) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint64_t __SMLALD( |
|
|
uint32_t x, |
|
|
uint32_t y, |
|
|
uint64_t sum) |
|
|
{ |
|
|
|
|
|
return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + |
|
|
( ((q63_t)sum ) ) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint64_t __SMLALDX( |
|
|
uint32_t x, |
|
|
uint32_t y, |
|
|
uint64_t sum) |
|
|
{ |
|
|
|
|
|
return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + |
|
|
( ((q63_t)sum ) ) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMUAD( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SMUSD( |
|
|
uint32_t x, |
|
|
uint32_t y) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) - |
|
|
((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SXTB16( |
|
|
uint32_t x) |
|
|
{ |
|
|
return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | |
|
|
((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) )); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE int32_t __SMMLA( |
|
|
int32_t x, |
|
|
int32_t y, |
|
|
int32_t sum) |
|
|
{ |
|
|
return (sum + (int32_t) (((int64_t) x * y) >> 32)); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SXTB16_RORn(uint32_t val1, uint32_t rotate) |
|
|
{ |
|
|
uint32_t ret; |
|
|
int8_t a, b; |
|
|
int16_t a16, b16; |
|
|
a = (int8_t)((val1 >> rotate) & 0xff); |
|
|
rotate = (rotate + 16) & 31; |
|
|
b = (int8_t)((val1 >> rotate) & 0xff); |
|
|
a16 = a; b16 = b; |
|
|
ret = (b16 << 16); |
|
|
ret |= ((uint32_t)a16 & 0xffff); |
|
|
return ret; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE uint32_t __SXTAB16(uint32_t val1, uint32_t val2) |
|
|
{ |
|
|
int8_t a, b; |
|
|
int16_t a16, b16; |
|
|
uint32_t ret; |
|
|
a16 = (int16_t)(val1); |
|
|
b16 = (int16_t)(val1 >> 16); |
|
|
a = (int8_t)(val2 & 0xff); |
|
|
b = (int8_t)((val2 >> 16) & 0xff); |
|
|
a16 += a; |
|
|
b16 += b; |
|
|
ret = (b16 << 16); |
|
|
ret |= ((uint32_t)a16 & 0xffff); |
|
|
return ret; |
|
|
} |
|
|
|
|
|
#endif |
|
|
|
|
|
#ifdef __cplusplus |
|
|
} |
|
|
#endif |
|
|
|
|
|
#endif |
|
|
|