/* * Copyright (c) 2022 EdgeImpulse Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an "AS * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language * governing permissions and limitations under the License. * * SPDX-License-Identifier: Apache-2.0 */ #ifndef _EIDSP_NUMPY_H_ #define _EIDSP_NUMPY_H_ // it's valid to include the SDK without a model, but there's information that we need // in model_metadata.h (like the FFT tables used). // if the compiler does not support the __has_include directive we'll assume that the // file exists. #ifndef __has_include #define __has_include 1 #endif // __has_include #include #include #include #include #include "ei_vector.h" #include #include "numpy_types.h" #include "config.hpp" #include "returntypes.hpp" #include "memory.hpp" #include "ei_utils.h" #include "dct/fast-dct-fft.h" #include "kissfft/kiss_fftr.h" #if __has_include("model-parameters/model_metadata.h") #include "model-parameters/model_metadata.h" #endif #if EIDSP_USE_CMSIS_DSP #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h" #include "edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h" #endif // For the following CMSIS includes, we want to use the C fallback, so include whether or not we set the CMSIS flag #include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h" #ifdef __MBED__ #include "mbed.h" #else #include #endif // __MBED__ #define EI_MAX_UINT16 65535 namespace ei { using fvec = ei_vector; using ivec = ei_vector; // clang-format off // lookup table for quantized values between 0.0f and 1.0f static constexpr float quantized_values_one_zero[] = { (0.0f / 1.0f), (1.0f / 100.0f), (2.0f / 100.0f), (3.0f / 100.0f), (4.0f / 100.0f), (1.0f / 22.0f), (1.0f / 21.0f), (1.0f / 20.0f), (1.0f / 19.0f), (1.0f / 18.0f), (1.0f / 17.0f), (6.0f / 100.0f), (1.0f / 16.0f), (1.0f / 15.0f), (7.0f / 100.0f), (1.0f / 14.0f), (1.0f / 13.0f), (8.0f / 100.0f), (1.0f / 12.0f), (9.0f / 100.0f), (1.0f / 11.0f), (2.0f / 21.0f), (1.0f / 10.0f), (2.0f / 19.0f), (11.0f / 100.0f), (1.0f / 9.0f), (2.0f / 17.0f), (12.0f / 100.0f), (1.0f / 8.0f), (13.0f / 100.0f), (2.0f / 15.0f), (3.0f / 22.0f), (14.0f / 100.0f), (1.0f / 7.0f), (3.0f / 20.0f), (2.0f / 13.0f), (3.0f / 19.0f), (16.0f / 100.0f), (1.0f / 6.0f), (17.0f / 100.0f), (3.0f / 17.0f), (18.0f / 100.0f), (2.0f / 11.0f), (3.0f / 16.0f), (19.0f / 100.0f), (4.0f / 21.0f), (1.0f / 5.0f), (21.0f / 100.0f), (4.0f / 19.0f), (3.0f / 14.0f), (22.0f / 100.0f), (2.0f / 9.0f), (5.0f / 22.0f), (23.0f / 100.0f), (3.0f / 13.0f), (4.0f / 17.0f), (5.0f / 21.0f), (24.0f / 100.0f), (1.0f / 4.0f), (26.0f / 100.0f), (5.0f / 19.0f), (4.0f / 15.0f), (27.0f / 100.0f), (3.0f / 11.0f), (5.0f / 18.0f), (28.0f / 100.0f), (2.0f / 7.0f), (29.0f / 100.0f), (5.0f / 17.0f), (3.0f / 10.0f), (4.0f / 13.0f), (31.0f / 100.0f), (5.0f / 16.0f), (6.0f / 19.0f), (7.0f / 22.0f), (32.0f / 100.0f), (33.0f / 100.0f), (1.0f / 3.0f), (34.0f / 100.0f), (7.0f / 20.0f), (6.0f / 17.0f), (5.0f / 14.0f), (36.0f / 100.0f), (4.0f / 11.0f), (7.0f / 19.0f), (37.0f / 100.0f), (3.0f / 8.0f), (38.0f / 100.0f), (8.0f / 21.0f), (5.0f / 13.0f), (7.0f / 18.0f), (39.0f / 100.0f), (2.0f / 5.0f), (9.0f / 22.0f), (41.0f / 100.0f), (7.0f / 17.0f), (5.0f / 12.0f), (42.0f / 100.0f), (8.0f / 19.0f), (3.0f / 7.0f), (43.0f / 100.0f), (7.0f / 16.0f), (44.0f / 100.0f), (4.0f / 9.0f), (9.0f / 20.0f), (5.0f / 11.0f), (46.0f / 100.0f), (6.0f / 13.0f), (7.0f / 15.0f), (47.0f / 100.0f), (8.0f / 17.0f), (9.0f / 19.0f), (10.0f / 21.0f), (48.0f / 100.0f), (49.0f / 100.0f), (1.0f / 2.0f), (51.0f / 100.0f), (52.0f / 100.0f), (11.0f / 21.0f), (10.0f / 19.0f), (9.0f / 17.0f), (53.0f / 100.0f), (8.0f / 15.0f), (7.0f / 13.0f), (54.0f / 100.0f), (6.0f / 11.0f), (11.0f / 20.0f), (5.0f / 9.0f), (56.0f / 100.0f), (9.0f / 16.0f), (57.0f / 100.0f), (4.0f / 7.0f), (11.0f / 19.0f), (58.0f / 100.0f), (7.0f / 12.0f), (10.0f / 17.0f), (59.0f / 100.0f), (13.0f / 22.0f), (3.0f / 5.0f), (61.0f / 100.0f), (11.0f / 18.0f), (8.0f / 13.0f), (13.0f / 21.0f), (62.0f / 100.0f), (5.0f / 8.0f), (63.0f / 100.0f), (12.0f / 19.0f), (7.0f / 11.0f), (64.0f / 100.0f), (9.0f / 14.0f), (11.0f / 17.0f), (13.0f / 20.0f), (66.0f / 100.0f), (2.0f / 3.0f), (67.0f / 100.0f), (68.0f / 100.0f), (15.0f / 22.0f), (13.0f / 19.0f), (11.0f / 16.0f), (69.0f / 100.0f), (9.0f / 13.0f), (7.0f / 10.0f), (12.0f / 17.0f), (71.0f / 100.0f), (5.0f / 7.0f), (72.0f / 100.0f), (13.0f / 18.0f), (8.0f / 11.0f), (73.0f / 100.0f), (11.0f / 15.0f), (14.0f / 19.0f), (74.0f / 100.0f), (3.0f / 4.0f), (76.0f / 100.0f), (16.0f / 21.0f), (13.0f / 17.0f), (10.0f / 13.0f), (77.0f / 100.0f), (17.0f / 22.0f), (7.0f / 9.0f), (78.0f / 100.0f), (11.0f / 14.0f), (15.0f / 19.0f), (79.0f / 100.0f), (4.0f / 5.0f), (17.0f / 21.0f), (81.0f / 100.0f), (13.0f / 16.0f), (9.0f / 11.0f), (82.0f / 100.0f), (14.0f / 17.0f), (83.0f / 100.0f), (5.0f / 6.0f), (84.0f / 100.0f), (16.0f / 19.0f), (11.0f / 13.0f), (17.0f / 20.0f), (6.0f / 7.0f), (86.0f / 100.0f), (19.0f / 22.0f), (13.0f / 15.0f), (87.0f / 100.0f), (7.0f / 8.0f), (88.0f / 100.0f), (15.0f / 17.0f), (8.0f / 9.0f), (89.0f / 100.0f), (17.0f / 19.0f), (9.0f / 10.0f), (19.0f / 21.0f), (10.0f / 11.0f), (91.0f / 100.0f), (11.0f / 12.0f), (92.0f / 100.0f), (12.0f / 13.0f), (13.0f / 14.0f), (93.0f / 100.0f), (14.0f / 15.0f), (15.0f / 16.0f), (94.0f / 100.0f), (16.0f / 17.0f), (17.0f / 18.0f), (18.0f / 19.0f), (19.0f / 20.0f), (20.0f / 21.0f), (21.0f / 22.0f), (96.0f / 100.0f), (97.0f / 100.0f), (98.0f / 100.0f), (99.0f / 100.0f), (1.0f / 1.0f) , 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; // clang-format on class numpy { public: static float sqrt(float x) { #if EIDSP_USE_CMSIS_DSP float temp; arm_sqrt_f32(x, &temp); return temp; #else return sqrtf(x); #endif } /** * Roll array elements along a given axis. * Elements that roll beyond the last position are re-introduced at the first. * @param input_array * @param input_array_size * @param shift The number of places by which elements are shifted. * @returns EIDSP_OK if OK */ static int roll(float *input_array, size_t input_array_size, int shift) { if (shift < 0) { shift = input_array_size + shift; } if (shift == 0) { return EIDSP_OK; } // so we need to allocate a buffer of the size of shift... EI_DSP_MATRIX(shift_matrix, 1, shift); // we copy from the end of the buffer into the shift buffer memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(float)); // now we do a memmove to shift the array memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(float)); // and copy the shift buffer back to the beginning of the array memcpy(input_array, shift_matrix.buffer, shift * sizeof(float)); return EIDSP_OK; } /** * Roll array elements along a given axis. * Elements that roll beyond the last position are re-introduced at the first. * @param input_array * @param input_array_size * @param shift The number of places by which elements are shifted. * @returns EIDSP_OK if OK */ static int roll(int *input_array, size_t input_array_size, int shift) { if (shift < 0) { shift = input_array_size + shift; } if (shift == 0) { return EIDSP_OK; } // so we need to allocate a buffer of the size of shift... EI_DSP_MATRIX(shift_matrix, 1, shift); // we copy from the end of the buffer into the shift buffer memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(int)); // now we do a memmove to shift the array memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(int)); // and copy the shift buffer back to the beginning of the array memcpy(input_array, shift_matrix.buffer, shift * sizeof(int)); return EIDSP_OK; } /** * Roll array elements along a given axis. * Elements that roll beyond the last position are re-introduced at the first. * @param input_array * @param input_array_size * @param shift The number of places by which elements are shifted. * @returns EIDSP_OK if OK */ static int roll(int16_t *input_array, size_t input_array_size, int shift) { if (shift < 0) { shift = input_array_size + shift; } if (shift == 0) { return EIDSP_OK; } // so we need to allocate a buffer of the size of shift... EI_DSP_MATRIX(shift_matrix, 1, shift); // we copy from the end of the buffer into the shift buffer memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(int16_t)); // now we do a memmove to shift the array memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(int16_t)); // and copy the shift buffer back to the beginning of the array memcpy(input_array, shift_matrix.buffer, shift * sizeof(int16_t)); return EIDSP_OK; } static float sum(float *input_array, size_t input_array_size) { float res = 0.0f; for (size_t ix = 0; ix < input_array_size; ix++) { res += input_array[ix]; } return res; } /** * Multiply two matrices (MxN * NxK matrix) * @param matrix1 Pointer to matrix1 (MxN) * @param matrix2 Pointer to matrix2 (NxK) * @param out_matrix Pointer to out matrix (MxK) * @returns EIDSP_OK if OK */ static int dot(matrix_t *matrix1, matrix_t *matrix2, matrix_t *out_matrix) { if (matrix1->cols != matrix2->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } // no. of rows in matrix1 determines the if (matrix1->rows != out_matrix->rows || matrix2->cols != out_matrix->cols) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } #if EIDSP_USE_CMSIS_DSP if (matrix1->rows > EI_MAX_UINT16 || matrix1->cols > EI_MAX_UINT16 || matrix2->rows > EI_MAX_UINT16 || matrix2->cols > EI_MAX_UINT16 || out_matrix->rows > EI_MAX_UINT16 || out_matrix->cols > EI_MAX_UINT16) { return EIDSP_NARROWING; } const arm_matrix_instance_f32 m1 = { static_cast(matrix1->rows), static_cast(matrix1->cols), matrix1->buffer }; const arm_matrix_instance_f32 m2 = { static_cast(matrix2->rows), static_cast(matrix2->cols), matrix2->buffer }; arm_matrix_instance_f32 mo = { static_cast(out_matrix->rows), static_cast(out_matrix->cols), out_matrix->buffer }; int status = arm_mat_mult_f32(&m1, &m2, &mo); if (status != ARM_MATH_SUCCESS) { EIDSP_ERR(status); } #else memset(out_matrix->buffer, 0, out_matrix->rows * out_matrix->cols * sizeof(float)); for (size_t i = 0; i < matrix1->rows; i++) { dot_by_row(i, matrix1->buffer + (i * matrix1->cols), matrix1->cols, matrix2, out_matrix); } #endif return EIDSP_OK; } /** * Multiply two matrices (MxN * NxK matrix) * @param matrix1 Pointer to matrix1 (MxN) * @param matrix2 Pointer to quantized matrix2 (NxK) * @param out_matrix Pointer to out matrix (MxK) * @returns EIDSP_OK if OK */ static int dot(matrix_t *matrix1, quantized_matrix_t *matrix2, matrix_t *out_matrix) { if (matrix1->cols != matrix2->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } // no. of rows in matrix1 determines the if (matrix1->rows != out_matrix->rows || matrix2->cols != out_matrix->cols) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } memset(out_matrix->buffer, 0, out_matrix->rows * out_matrix->cols * sizeof(float)); for (size_t i = 0; i < matrix1->rows; i++) { dot_by_row(i, matrix1->buffer + (i * matrix1->cols), matrix1->cols, matrix2, out_matrix); } return EIDSP_OK; } /** * Multiply two matrices lazily per row in matrix 1 (MxN * NxK matrix) * @param i matrix1 row index * @param row matrix1 row * @param matrix1_cols matrix1 row size (1xN) * @param matrix2 Pointer to matrix2 (NxK) * @param out_matrix Pointer to out matrix (MxK) * @returns EIDSP_OK if OK */ static int dot_by_row(int i, float *row, uint32_t matrix1_cols, matrix_t *matrix2, matrix_t *out_matrix) { if (matrix1_cols != matrix2->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } #if EIDSP_USE_CMSIS_DSP if (matrix1_cols > EI_MAX_UINT16 || matrix2->rows > EI_MAX_UINT16 || matrix2->cols > EI_MAX_UINT16 || out_matrix->cols > EI_MAX_UINT16) { return EIDSP_NARROWING; } const arm_matrix_instance_f32 m1 = { 1, static_cast(matrix1_cols), row }; const arm_matrix_instance_f32 m2 = { static_cast(matrix2->rows), static_cast(matrix2->cols), matrix2->buffer }; arm_matrix_instance_f32 mo = { 1, static_cast(out_matrix->cols), out_matrix->buffer + (i * out_matrix->cols) }; int status = arm_mat_mult_f32(&m1, &m2, &mo); if (status != ARM_MATH_SUCCESS) { EIDSP_ERR(status); } #else for (size_t j = 0; j < matrix2->cols; j++) { float tmp = 0.0f; for (size_t k = 0; k < matrix1_cols; k++) { tmp += row[k] * matrix2->buffer[k * matrix2->cols + j]; } out_matrix->buffer[i * matrix2->cols + j] += tmp; } #endif return EIDSP_OK; } /** * Multiply two matrices lazily per row in matrix 1 (MxN * NxK matrix) * @param i matrix1 row index * @param row matrix1 row * @param matrix1_cols matrix1 row size * @param matrix2 Pointer to matrix2 (NxK) * @param out_matrix Pointer to out matrix (MxK) * @returns EIDSP_OK if OK */ static int dot_by_row(int i, float *row, size_t matrix1_cols, quantized_matrix_t *matrix2, matrix_t *out_matrix) { if (matrix1_cols != matrix2->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (uint16_t j = 0; j < matrix2->cols; j++) { float tmp = 0.0; for (uint16_t k = 0; k < matrix1_cols; k++) { uint8_t u8 = matrix2->buffer[k * matrix2->cols + j]; if (u8) { // this matrix appears to be very sparsely populated tmp += row[k] * quantized_values_one_zero[u8]; } } out_matrix->buffer[i * matrix2->cols + j] = tmp; } return EIDSP_OK; } static void transpose_in_place(matrix_t *matrix) { size_t size = matrix->cols * matrix->rows - 1; float temp; // temp for swap size_t next; // next item to swap size_t cycleBegin; // index of start of cycle size_t i; // location in matrix size_t all_done_mark = 1; ei_vector done(size+1,false); i = 1; // Note that matrix[0] and last element of matrix won't move while (1) { cycleBegin = i; temp = matrix->buffer[i]; do { size_t col = i % matrix->cols; size_t row = i / matrix->cols; // swap row and col to make new idx, b/c we want to know where in the transposed matrix next = col*matrix->rows + row; float temp2 = matrix->buffer[next]; matrix->buffer[next] = temp; temp = temp2; done[next] = true; i = next; } while (i != cycleBegin); // start next cycle by find next not done for (i = all_done_mark; done[i]; i++) { all_done_mark++; // move the high water mark so we don't look again if(i>=size) { goto LOOP_END; } } } LOOP_END: // finally, swap the row and column dimensions std::swap(matrix->rows, matrix->cols); } /** * Transpose an array, souce is destination (from MxN to NxM) * Note: this temporary allocates a copy of the matrix on the heap. * @param matrix * @param rows * @param columns * @deprecated You probably want to use transpose_in_place * @returns EIDSP_OK if OK */ static int transpose(matrix_t *matrix) { int r = transpose(matrix->buffer, matrix->cols, matrix->rows); if (r != 0) { return r; } uint16_t old_rows = matrix->rows; uint16_t old_cols = matrix->cols; matrix->rows = old_cols; matrix->cols = old_rows; return EIDSP_OK; } /** * Transpose an array, source is destination (from MxN to NxM) * @param matrix * @param rows * @param columns * @deprecated You probably want to use transpose_in_place * @returns EIDSP_OK if OK */ static int transpose(float *matrix, int rows, int columns) { EI_DSP_MATRIX(temp_matrix, rows, columns); if (!temp_matrix.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } #if EIDSP_USE_CMSIS_DSP if (rows > EI_MAX_UINT16 || columns > EI_MAX_UINT16) { return EIDSP_NARROWING; } const arm_matrix_instance_f32 i_m = { static_cast(columns), static_cast(rows), matrix }; arm_matrix_instance_f32 o_m = { static_cast(rows), static_cast(columns), temp_matrix.buffer }; arm_status status = arm_mat_trans_f32(&i_m, &o_m); if (status != ARM_MATH_SUCCESS) { return status; } #else for (int j = 0; j < rows; j++){ for (int i = 0; i < columns; i++){ temp_matrix.buffer[j * columns + i] = matrix[i * rows + j]; } } #endif memcpy(matrix, temp_matrix.buffer, rows * columns * sizeof(float)); return EIDSP_OK; } /** * Transpose an array in place (from MxN to NxM) * Note: this temporary allocates a copy of the matrix on the heap. * @param matrix * @param rows * @param columns * @returns EIDSP_OK if OK */ static int transpose(quantized_matrix_t *matrix) { int r = transpose(matrix->buffer, matrix->cols, matrix->rows); if (r != 0) { return r; } uint16_t old_rows = matrix->rows; uint16_t old_cols = matrix->cols; matrix->rows = old_cols; matrix->cols = old_rows; return EIDSP_OK; } /** * Transpose an array in place (from MxN to NxM) * @param matrix * @param rows * @param columns * @returns EIDSP_OK if OK */ static int transpose(uint8_t *matrix, int rows, int columns) { // dequantization function is not used actually... EI_DSP_QUANTIZED_MATRIX(temp_matrix, rows, columns, &dequantize_zero_one); if (!temp_matrix.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } for (int j = 0; j < rows; j++){ for (int i = 0; i < columns; i++){ temp_matrix.buffer[j * columns + i] = matrix[i * rows + j]; } } memcpy(matrix, temp_matrix.buffer, rows * columns * sizeof(uint8_t)); return EIDSP_OK; } /** * Return the Discrete Cosine Transform of arbitrary type sequence 2. * @param input Input array (of size N) * @param N number of items in input and output array * @returns EIDSP_OK if OK */ static int dct2(float *input, size_t N, DCT_NORMALIZATION_MODE normalization = DCT_NORMALIZATION_NONE) { if (N == 0) { return EIDSP_OK; } int ret = ei::dct::transform(input, N); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } // for some reason the output is 2x too low... for (size_t ix = 0; ix < N; ix++) { input[ix] *= 2; } if (normalization == DCT_NORMALIZATION_ORTHO) { input[0] = input[0] * sqrt(1.0f / static_cast(4 * N)); for (size_t ix = 1; ix < N; ix++) { input[ix] = input[ix] * sqrt(1.0f / static_cast(2 * N)); } } return EIDSP_OK; } /** * Discrete Cosine Transform of arbitrary type sequence 2 on a matrix. * @param matrix * @returns EIDSP_OK if OK */ static int dct2(matrix_t *matrix, DCT_NORMALIZATION_MODE normalization = DCT_NORMALIZATION_NONE) { for (size_t row = 0; row < matrix->rows; row++) { int r = dct2(matrix->buffer + (row * matrix->cols), matrix->cols, normalization); if (r != EIDSP_OK) { return r; } } return EIDSP_OK; } /** * Quantize a float value between zero and one * @param value Float value */ static uint8_t quantize_zero_one(float value) { const size_t length = sizeof(quantized_values_one_zero) / sizeof(float); // look in the table for (size_t ix = 0; ix < length; ix++) { if (quantized_values_one_zero[ix] == value) return ix; } // no match? if (value < quantized_values_one_zero[0]) { return quantized_values_one_zero[0]; } if (value > quantized_values_one_zero[length - 1]) { return quantized_values_one_zero[length - 1]; } int lo = 0; int hi = length - 1; while (lo <= hi) { int mid = (hi + lo) / 2; if (value < quantized_values_one_zero[mid]) { hi = mid - 1; } else if (value > quantized_values_one_zero[mid]) { lo = mid + 1; } else { return quantized_values_one_zero[mid]; } } // lo == hi + 1 return (quantized_values_one_zero[lo] - value) < (value - quantized_values_one_zero[hi]) ? lo : hi; } /** * Dequantize a float value between zero and one * @param value */ static float dequantize_zero_one(uint8_t value) { return quantized_values_one_zero[value]; } /** * Pad an array. * Pads with the reflection of the vector mirrored along the edge of the array. * @param input Input matrix (MxN) * @param output Output matrix of size (M+pad_before+pad_after x N) * @param pad_before Number of items to pad before * @param pad_after Number of items to pad after * @returns 0 if OK */ static int pad_1d_symmetric(matrix_t *input, matrix_t *output, uint16_t pad_before, uint16_t pad_after) { if (output->cols != input->cols) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output->rows != input->rows + pad_before + pad_after) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (input->rows == 0) { EIDSP_ERR(EIDSP_INPUT_MATRIX_EMPTY); } uint32_t pad_before_index = 0; bool pad_before_direction_up = true; for (int32_t ix = pad_before - 1; ix >= 0; ix--) { memcpy(output->buffer + (input->cols * ix), input->buffer + (pad_before_index * input->cols), input->cols * sizeof(float)); if (pad_before_index == 0 && !pad_before_direction_up) { pad_before_direction_up = true; } else if (pad_before_index == input->rows - 1 && pad_before_direction_up) { pad_before_direction_up = false; } else if (pad_before_direction_up) { pad_before_index++; } else { pad_before_index--; } } memcpy(output->buffer + (input->cols * pad_before), input->buffer, input->rows * input->cols * sizeof(float)); int32_t pad_after_index = input->rows - 1; bool pad_after_direction_up = false; for (int32_t ix = 0; ix < pad_after; ix++) { memcpy(output->buffer + (input->cols * (ix + pad_before + input->rows)), input->buffer + (pad_after_index * input->cols), input->cols * sizeof(float)); if (pad_after_index == 0 && !pad_after_direction_up) { pad_after_direction_up = true; } else if (pad_after_index == static_cast(input->rows) - 1 && pad_after_direction_up) { pad_after_direction_up = false; } else if (pad_after_direction_up) { pad_after_index++; } else { pad_after_index--; } } return EIDSP_OK; } /** * Scale a matrix in place * @param matrix * @param scale * @returns 0 if OK */ static int scale(matrix_t *matrix, float scale) { if (scale == 1.0f) return EIDSP_OK; #if EIDSP_USE_CMSIS_DSP if (matrix->rows > EI_MAX_UINT16 || matrix->cols > EI_MAX_UINT16) { return EIDSP_NARROWING; } const arm_matrix_instance_f32 mi = { static_cast(matrix->rows), static_cast(matrix->cols), matrix->buffer }; arm_matrix_instance_f32 mo = { static_cast(matrix->rows), static_cast(matrix->cols), matrix->buffer }; int status = arm_mat_scale_f32(&mi, scale, &mo); if (status != ARM_MATH_SUCCESS) { return status; } #else for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { matrix->buffer[ix] *= scale; } #endif return EIDSP_OK; } /** * Scale a matrix in place, per row * @param matrix Input matrix (MxN) * @param scale_matrix Scale matrix (Mx1) * @returns 0 if OK */ static int scale(matrix_t *matrix, matrix_t *scale_matrix) { if (matrix->rows != scale_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (scale_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < matrix->rows; row++) { EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols)); int ret = scale(&temp, scale_matrix->buffer[row]); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } } return EIDSP_OK; } /** * Add on matrix in place * @param matrix * @param addition * @returns 0 if OK */ static int add(matrix_t *matrix, float addition) { for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { matrix->buffer[ix] += addition; } return EIDSP_OK; } /** * Add on a matrix in place, per row * @param matrix Input matrix (MxN) * @param add Scale matrix (Mx1) * @returns 0 if OK */ static int add(matrix_t *matrix, matrix_t *add_matrix) { if (matrix->rows != add_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (add_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < matrix->rows; row++) { EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols)); int ret = add(&temp, add_matrix->buffer[row]); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } } return EIDSP_OK; } /** * Subtract from matrix in place * @param matrix * @param subtraction * @returns 0 if OK */ static int subtract(matrix_t *matrix, float subtraction) { for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { matrix->buffer[ix] -= subtraction; } return EIDSP_OK; } /** * Add on a matrix in place, per row * @param matrix Input matrix (MxN) * @param add Scale matrix (Mx1) * @returns 0 if OK */ static int subtract(matrix_t *matrix, matrix_t *subtract_matrix) { if (matrix->rows != subtract_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (subtract_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < matrix->rows; row++) { EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols)); int ret = subtract(&temp, subtract_matrix->buffer[row]); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } } return EIDSP_OK; } /** * Calculate the root mean square of a matrix, one per row * @param matrix Matrix of size (MxN) * @param output_matrix Matrix of size (Mx1) * @returns 0 if OK */ static int rms(matrix_t *matrix, matrix_t *output_matrix) { if (matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float rms_result; arm_rms_f32(matrix->buffer + (row * matrix->cols), matrix->cols, &rms_result); output_matrix->buffer[row] = rms_result; #else float sum = 0.0; for(size_t ix = 0; ix < matrix->cols; ix++) { float v = matrix->buffer[(row * matrix->cols) + ix]; sum += v * v; } output_matrix->buffer[row] = sqrt(sum / static_cast(matrix->cols)); #endif } return EIDSP_OK; } /** * Calculate the mean over a matrix per row * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Mx1) */ static int mean(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < input_matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float mean; arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean); output_matrix->buffer[row] = mean; #else float sum = 0.0f; for (size_t col = 0; col < input_matrix->cols; col++) { sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; } output_matrix->buffer[row] = sum / input_matrix->cols; #endif } return EIDSP_OK; } /** * Calculate the mean over a matrix on axis 0 * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Nx1) * @returns 0 if OK */ static int mean_axis0(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->cols != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t col = 0; col < input_matrix->cols; col++) { // Note - not using CMSIS-DSP here // gathering up the current columnand moving it into sequential memory to use // SIMD to calculate the mean would take more time than the simple loop // so disable this case. The alternative is to use 2 transposes and on a "big" ARM // platform that will take more time float sum = 0.0f; for (size_t row = 0; row < input_matrix->rows; row++) { sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; } output_matrix->buffer[col] = sum / input_matrix->rows; } return EIDSP_OK; } /** * Calculate the standard deviation over a matrix on axis 0 * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Nx1) * @returns 0 if OK */ static int std_axis0(matrix_t *input_matrix, matrix_t *output_matrix) { #if EIDSP_USE_CMSIS_DSP return std_axis0_CMSIS(input_matrix, output_matrix); #else if (input_matrix->cols != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t col = 0; col < input_matrix->cols; col++) { float sum = 0.0f; for (size_t row = 0; row < input_matrix->rows; row++) { sum += input_matrix->buffer[(row * input_matrix->cols) + col]; } float mean = sum / input_matrix->rows; float std = 0.0f; float tmp; for (size_t row = 0; row < input_matrix->rows; row++) { tmp = input_matrix->buffer[(row * input_matrix->cols) + col] - mean; std += tmp * tmp; } output_matrix->buffer[col] = sqrt(std / input_matrix->rows); } return EIDSP_OK; #endif } /** * Get the minimum value in a matrix per row * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Mx1) */ static int min(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < input_matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float min; uint32_t ix; arm_min_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &min, &ix); output_matrix->buffer[row] = min; #else float min = FLT_MAX; for (size_t col = 0; col < input_matrix->cols; col++) { float v = input_matrix->buffer[( row * input_matrix->cols ) + col]; if (v < min) { min = v; } } output_matrix->buffer[row] = min; #endif } return EIDSP_OK; } /** * Get the maximum value in a matrix per row * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Mx1) */ static int max(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < input_matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float max; uint32_t ix; arm_max_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &max, &ix); output_matrix->buffer[row] = max; #else float max = -FLT_MAX; for (size_t col = 0; col < input_matrix->cols; col++) { float v = input_matrix->buffer[( row * input_matrix->cols ) + col]; if (v > max) { max = v; } } output_matrix->buffer[row] = max; #endif } return EIDSP_OK; } /** * Get the stdev value in a matrix per row * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Mx1) */ static int stdev(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < input_matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float std; float var; cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var); arm_sqrt_f32(var, &std); output_matrix->buffer[row] = std; #else float sum = 0.0f; for (size_t col = 0; col < input_matrix->cols; col++) { sum += input_matrix->buffer[(row * input_matrix->cols) + col]; } float mean = sum / input_matrix->cols; float std = 0.0f; for (size_t col = 0; col < input_matrix->cols; col++) { float diff; diff = input_matrix->buffer[(row * input_matrix->cols) + col] - mean; std += diff * diff; } output_matrix->buffer[row] = sqrt(std / input_matrix->cols); #endif } return EIDSP_OK; } /** * Get the skewness value in a matrix per row * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Mx1) */ static int skew(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < input_matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float mean; float var; // Calculate the mean & variance arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean); cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var); // Calculate m_3 float m_3; cmsis_arm_third_moment(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, mean, &m_3); // Calculate (variance)^(3/2) arm_sqrt_f32(var * var * var, &var); // Calculate skew = (m_3) / (variance)^(3/2) if (var == 0.0f) { output_matrix->buffer[row] = 0.0f; } else { output_matrix->buffer[row] = m_3 / var; } #else float sum = 0.0f; float mean; // Calculate the mean for (size_t col = 0; col < input_matrix->cols; col++) { sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; } mean = sum / input_matrix->cols; // Calculate the m values float m_3 = 0.0f; float m_2 = 0.0f; for (size_t col = 0; col < input_matrix->cols; col++) { float diff; diff = input_matrix->buffer[( row * input_matrix->cols ) + col] - mean; m_3 += diff * diff * diff; m_2 += diff * diff; } m_3 = m_3 / input_matrix->cols; m_2 = m_2 / input_matrix->cols; // Calculate (m_2)^(3/2) m_2 = sqrt(m_2 * m_2 * m_2); // Calculate skew = (m_3) / (m_2)^(3/2) if (m_2 == 0.0f) { output_matrix->buffer[row] = 0.0f; } else { output_matrix->buffer[row] = m_3 / m_2; } #endif } return EIDSP_OK; } /** * Get the kurtosis value in a matrix per row * @param input_matrix Input matrix (MxN) * @param output_matrix Output matrix (Mx1) */ static int kurtosis(matrix_t *input_matrix, matrix_t *output_matrix) { if (input_matrix->rows != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } for (size_t row = 0; row < input_matrix->rows; row++) { #if EIDSP_USE_CMSIS_DSP float mean; float var; // Calculate mean & variance arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean); cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var); // Calculate m_4 float m_4; cmsis_arm_fourth_moment(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, mean, &m_4); // Calculate Fisher kurtosis = (m_4 / variance^2) - 3 var = var * var; if (var == 0.0f) { output_matrix->buffer[row] = -3.0f; } else { output_matrix->buffer[row] = (m_4 / var) - 3.0f; } #else // Calculate the mean float mean = 0.0f; float sum = 0.0f; for (size_t col = 0; col < input_matrix->cols; col++) { sum += input_matrix->buffer[( row * input_matrix->cols ) + col]; } mean = sum / input_matrix->cols; // Calculate m_4 & variance float m_4 = 0.0f; float variance = 0.0f; for (size_t col = 0; col < input_matrix->cols; col++) { float diff; diff = input_matrix->buffer[(row * input_matrix->cols) + col] - mean; float square_diff = diff * diff; variance += square_diff; m_4 += square_diff * square_diff; } m_4 = m_4 / input_matrix->cols; variance = variance / input_matrix->cols; // Square the variance variance = variance * variance; // Calculate Fisher kurtosis = (m_4 / variance^2) - 3 if (variance == 0.0f) { output_matrix->buffer[row] = -3.0f; } else { output_matrix->buffer[row] = (m_4 / variance) - 3.0f; } #endif } return EIDSP_OK; } /** * Compute the one-dimensional discrete Fourier Transform for real input. * This function computes the one-dimensional n-point discrete Fourier Transform (DFT) of * a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT). * @param src Source buffer * @param src_size Size of the source buffer * @param output Output buffer * @param output_size Size of the output buffer, should be n_fft / 2 + 1 * @returns 0 if OK */ static int rfft(const float *src, size_t src_size, float *output, size_t output_size, size_t n_fft) { size_t n_fft_out_features = (n_fft / 2) + 1; if (output_size != n_fft_out_features) { EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH); } // truncate if needed if (src_size > n_fft) { src_size = n_fft; } // declare input and output arrays EI_DSP_MATRIX(fft_input, 1, n_fft); if (!fft_input.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } // copy from src to fft_input memcpy(fft_input.buffer, src, src_size * sizeof(float)); // pad to the rigth with zeros memset(fft_input.buffer + src_size, 0, (n_fft - src_size) * sizeof(kiss_fft_scalar)); #if EIDSP_USE_CMSIS_DSP if (n_fft != 32 && n_fft != 64 && n_fft != 128 && n_fft != 256 && n_fft != 512 && n_fft != 1024 && n_fft != 2048 && n_fft != 4096) { int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } } else { // hardware acceleration only works for the powers above... arm_rfft_fast_instance_f32 rfft_instance; int status = cmsis_rfft_init_f32(&rfft_instance, n_fft); if (status != ARM_MATH_SUCCESS) { return status; } EI_DSP_MATRIX(fft_output, 1, n_fft); if (!fft_output.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } arm_rfft_fast_f32(&rfft_instance, fft_input.buffer, fft_output.buffer, 0); output[0] = fft_output.buffer[0]; output[n_fft_out_features - 1] = fft_output.buffer[1]; size_t fft_output_buffer_ix = 2; for (size_t ix = 1; ix < n_fft_out_features - 1; ix += 1) { float rms_result; arm_rms_f32(fft_output.buffer + fft_output_buffer_ix, 2, &rms_result); output[ix] = rms_result * sqrt(2); fft_output_buffer_ix += 2; } } #else int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } #endif return EIDSP_OK; } /** * Compute the one-dimensional discrete Fourier Transform for real input. * This function computes the one-dimensional n-point discrete Fourier Transform (DFT) of * a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT). * @param src Source buffer * @param src_size Size of the source buffer * @param output Output buffer * @param output_size Size of the output buffer, should be n_fft / 2 + 1 * @returns 0 if OK */ static int rfft(const float *src, size_t src_size, fft_complex_t *output, size_t output_size, size_t n_fft) { size_t n_fft_out_features = (n_fft / 2) + 1; if (output_size != n_fft_out_features) { EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH); } // truncate if needed if (src_size > n_fft) { src_size = n_fft; } // declare input and output arrays float *fft_input_buffer = NULL; if (src_size == n_fft) { fft_input_buffer = (float*)src; } EI_DSP_MATRIX_B(fft_input, 1, n_fft, fft_input_buffer); if (!fft_input.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } if (!fft_input_buffer) { // copy from src to fft_input memcpy(fft_input.buffer, src, src_size * sizeof(float)); // pad to the rigth with zeros memset(fft_input.buffer + src_size, 0, (n_fft - src_size) * sizeof(float)); } #if EIDSP_USE_CMSIS_DSP if (n_fft != 32 && n_fft != 64 && n_fft != 128 && n_fft != 256 && n_fft != 512 && n_fft != 1024 && n_fft != 2048 && n_fft != 4096) { int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } } else { // hardware acceleration only works for the powers above... arm_rfft_fast_instance_f32 rfft_instance; int status = cmsis_rfft_init_f32(&rfft_instance, n_fft); if (status != ARM_MATH_SUCCESS) { return status; } EI_DSP_MATRIX(fft_output, 1, n_fft); if (!fft_output.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } arm_rfft_fast_f32(&rfft_instance, fft_input.buffer, fft_output.buffer, 0); output[0].r = fft_output.buffer[0]; output[0].i = 0.0f; output[n_fft_out_features - 1].r = fft_output.buffer[1]; output[n_fft_out_features - 1].i = 0.0f; size_t fft_output_buffer_ix = 2; for (size_t ix = 1; ix < n_fft_out_features - 1; ix += 1) { output[ix].r = fft_output.buffer[fft_output_buffer_ix]; output[ix].i = fft_output.buffer[fft_output_buffer_ix + 1]; fft_output_buffer_ix += 2; } } #else int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } #endif return EIDSP_OK; } /** * Return evenly spaced numbers over a specified interval. * Returns num evenly spaced samples, calculated over the interval [start, stop]. * The endpoint of the interval can optionally be excluded. * * Based on https://github.com/ntessore/algo/blob/master/linspace.c * Licensed in public domain (see LICENSE in repository above) * * @param start The starting value of the sequence. * @param stop The end value of the sequence. * @param number Number of samples to generate. * @param out Out array, with size `number` * @returns 0 if OK */ static int linspace(float start, float stop, uint32_t number, float *out) { if (number < 1 || !out) { EIDSP_ERR(EIDSP_PARAMETER_INVALID); } if (number == 1) { out[0] = start; return EIDSP_OK; } // step size float step = (stop - start) / (number - 1); // do steps for (uint32_t ix = 0; ix < number - 1; ix++) { out[ix] = start + ix * step; } // last entry always stop out[number - 1] = stop; return EIDSP_OK; } /** * Return evenly spaced q31 numbers over a specified interval. * Returns num evenly spaced samples, calculated over the interval [start, stop]. * The endpoint of the interval can optionally be excluded. * * Based on https://github.com/ntessore/algo/blob/master/linspace.c * Licensed in public domain (see LICENSE in repository above) * * @param start The starting value of the sequence. * @param stop The end value of the sequence. * @param number Number of samples to generate. * @param out Out array, with size `number` * @returns 0 if OK */ static int linspace(EIDSP_i32 start, EIDSP_i32 stop, uint32_t number, EIDSP_i32 *out) { if (number < 1 || !out) { EIDSP_ERR(EIDSP_PARAMETER_INVALID); } if (number == 1) { out[0] = start; return EIDSP_OK; } // step size EIDSP_i32 step = (stop - start) / (number - 1); // do steps for (uint32_t ix = 0; ix < number - 1; ix++) { out[ix] = start + ix * step; } // last entry always stop out[number - 1] = stop; return EIDSP_OK; } /** * Convert an int32_t buffer into a float buffer, maps to -1..1 * @param input * @param output * @param length * @returns 0 if OK */ static int int32_to_float(const EIDSP_i32 *input, float *output, size_t length) { #if EIDSP_USE_CMSIS_DSP arm_q31_to_float((q31_t *)input, output, length); #else for (size_t ix = 0; ix < length; ix++) { output[ix] = (float)(input[ix]) / 2147483648.f; } #endif return EIDSP_OK; } /** * Convert an float buffer into a fixedpoint 32 bit buffer, input values are * limited between -1 and 1 * @param input * @param output * @param length * @returns 0 if OK */ static int float_to_int32(const float *input, EIDSP_i32 *output, size_t length) { #if EIDSP_USE_CMSIS_DSP arm_float_to_q31((float *)input, (q31_t *)output, length); #else for (size_t ix = 0; ix < length; ix++) { output[ix] = (EIDSP_i32)saturate((int64_t)(input[ix] * 2147483648.f), 32); } #endif return EIDSP_OK; } /** * Convert an int16_t buffer into a float buffer, maps to -1..1 * @param input * @param output * @param length * @returns 0 if OK */ static int int16_to_float(const EIDSP_i16 *input, float *output, size_t length) { #if EIDSP_USE_CMSIS_DSP arm_q15_to_float((q15_t *)input, output, length); #else for (size_t ix = 0; ix < length; ix++) { output[ix] = (float)(input[ix]) / 32768.f; } #endif return EIDSP_OK; } /** * Convert an float buffer into a fixedpoint 16 bit buffer, input values are * limited between -1 and 1 * @param input * @param output * @param length * @returns 0 if OK */ static int float_to_int16(const float *input, EIDSP_i16 *output, size_t length) { #if EIDSP_USE_CMSIS_DSP arm_float_to_q15((float *)input, output, length); #else for (size_t ix = 0; ix < length; ix++) { output[ix] = (EIDSP_i16)saturate((int32_t)(input[ix] * 32768.f), 16); } #endif return EIDSP_OK; } /** * Convert an int8_t buffer into a float buffer, maps to -1..1 * @param input * @param output * @param length * @returns 0 if OK */ static int int8_to_float(const EIDSP_i8 *input, float *output, size_t length) { #if EIDSP_USE_CMSIS_DSP arm_q7_to_float((q7_t *)input, output, length); #else for (size_t ix = 0; ix < length; ix++) { output[ix] = (float)(input[ix]) / 128; } #endif return EIDSP_OK; } #if EIDSP_SIGNAL_C_FN_POINTER == 0 /** * Create a signal structure from a buffer. * This is useful for data that you keep in memory anyway. If you need to load from * flash, then create the structure yourself. * @param data Buffer, make sure to keep this pointer alive * @param data_size Size of the buffer * @param signal Output signal * @returns EIDSP_OK if ok */ static int signal_from_buffer(const float *data, size_t data_size, signal_t *signal) { signal->total_length = data_size; #ifdef __MBED__ signal->get_data = mbed::callback(&numpy::signal_get_data, data); #else signal->get_data = [data](size_t offset, size_t length, float *out_ptr) { return numpy::signal_get_data(data, offset, length, out_ptr); }; #endif return EIDSP_OK; } #endif #if defined ( __GNUC__ ) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstrict-aliasing" #endif /** * > 50% faster then the math.h log() function * in return for a small loss in accuracy (0.00001 average diff with log()) * From: https://stackoverflow.com/questions/39821367/very-fast-approximate-logarithm-natural-log-function-in-c/39822314#39822314 * Licensed under the CC BY-SA 3.0 * @param a Input number * @returns Natural log value of a */ __attribute__((always_inline)) static inline float log(float a) { int32_t g = (int32_t) * ((int32_t *)&a); int32_t e = (g - 0x3f2aaaab) & 0xff800000; g = g - e; float m = (float) * ((float *)&g); float i = (float)e * 1.19209290e-7f; // 0x1.0p-23 /* m in [2/3, 4/3] */ float f = m - 1.0f; float s = f * f; /* Compute log1p(f) for f in [-1/3, 1/3] */ float r = fmaf(0.230836749f, f, -0.279208571f); // 0x1.d8c0f0p-3, -0x1.1de8dap-2 float t = fmaf(0.331826031f, f, -0.498910338f); // 0x1.53ca34p-2, -0x1.fee25ap-2 r = fmaf(r, s, t); r = fmaf(r, s, f); r = fmaf(i, 0.693147182f, r); // 0x1.62e430p-1 // log(2) return r; } /** * Fast log10 and log2 functions, significantly faster than the ones from math.h (~6x for log10 on M4F) * From https://community.arm.com/developer/tools-software/tools/f/armds-forum/4292/cmsis-dsp-new-functionality-proposal/22621#22621 * @param a Input number * @returns Log2 value of a */ __attribute__((always_inline)) static inline float log2(float a) { int e; float f = frexpf(fabsf(a), &e); float y = 1.23149591368684f; y *= f; y += -4.11852516267426f; y *= f; y += 6.02197014179219f; y *= f; y += -3.13396450166353f; y += e; return y; } /** * Fast log10 and log2 functions, significantly faster than the ones from math.h (~6x for log10 on M4F) * From https://community.arm.com/developer/tools-software/tools/f/armds-forum/4292/cmsis-dsp-new-functionality-proposal/22621#22621 * @param a Input number * @returns Log10 value of a */ __attribute__((always_inline)) static inline float log10(float a) { return numpy::log2(a) * 0.3010299956639812f; } #if defined ( __GNUC__ ) #pragma GCC diagnostic pop #endif /** * Calculate the natural log value of a matrix. Does an in-place replacement. * @param matrix Matrix (MxN) * @returns 0 if OK */ static int log(matrix_t *matrix) { for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { matrix->buffer[ix] = numpy::log(matrix->buffer[ix]); } return EIDSP_OK; } /** * Calculate the log10 of a matrix. Does an in-place replacement. * @param matrix Matrix (MxN) * @returns 0 if OK */ static int log10(matrix_t *matrix) { for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { matrix->buffer[ix] = numpy::log10(matrix->buffer[ix]); } return EIDSP_OK; } /** * @brief Signed Saturate * * @param[in] val The value to be saturated * @param[in] sat Bit position to saturate to (1..32) * * @return Saturated value */ static int32_t saturate(int64_t val, uint32_t sat) { if ((sat >= 1U) && (sat <= 32U)) { int64_t max = (int64_t)((1U << (sat - 1U)) - 1U); int64_t min = -1 - max; if (val > max) { return (int32_t)max; } else if (val < min) { return (int32_t)min; } } return (int32_t)val; } /** * Normalize a matrix to 0..1. Does an in-place replacement. * Normalization done per row. * @param matrix */ static int normalize(matrix_t *matrix) { // Python implementation: // matrix = (matrix - np.min(matrix)) / (np.max(matrix) - np.min(matrix)) int r; matrix_t temp_matrix(1, matrix->rows * matrix->cols, matrix->buffer); matrix_t min_matrix(1, 1); if (!min_matrix.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } r = min(&temp_matrix, &min_matrix); if (r != EIDSP_OK) { EIDSP_ERR(r); } matrix_t max_matrix(1, 1); if (!max_matrix.buffer) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } r = max(&temp_matrix, &max_matrix); if (r != EIDSP_OK) { EIDSP_ERR(r); } float min_max_diff = (max_matrix.buffer[0] - min_matrix.buffer[0]); /* Prevent divide by 0 by setting minimum value for divider */ float row_scale = min_max_diff < 0.001 ? 1.0f : 1.0f / min_max_diff; r = subtract(&temp_matrix, min_matrix.buffer[0]); if (r != EIDSP_OK) { EIDSP_ERR(r); } r = scale(&temp_matrix, row_scale); if (r != EIDSP_OK) { EIDSP_ERR(r); } return EIDSP_OK; } /** * Clip (limit) the values in an array. Does an in-place replacement. * Values outside the interval are clipped to the interval edges. * For example, if an interval of [0, 1] is specified, values smaller than 0 become 0, * and values larger than 1 become 1. * @param matrix * @param min Min value to be clipped * @param max Max value to be clipped */ static int clip(matrix_t *matrix, float min, float max) { if (max < min) { EIDSP_ERR(EIDSP_PARAMETER_INVALID); } for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { if (matrix->buffer[ix] < min) { matrix->buffer[ix] = min; } else if (matrix->buffer[ix] > max) { matrix->buffer[ix] = max; } } return EIDSP_OK; } /** * Cut the data behind the comma on a matrix. Does an in-place replacement. * E.g. around([ 3.01, 4.89 ]) becomes [3, 4] * @param matrix */ static int round(matrix_t *matrix) { for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) { matrix->buffer[ix] = ::round(matrix->buffer[ix]); } return EIDSP_OK; } static int software_rfft(float *fft_input, float *output, size_t n_fft, size_t n_fft_out_features) { kiss_fft_cpx *fft_output = (kiss_fft_cpx*)ei_dsp_malloc(n_fft_out_features * sizeof(kiss_fft_cpx)); if (!fft_output) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } size_t kiss_fftr_mem_length; // create fftr context kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length); if (!cfg) { ei_dsp_free(fft_output, n_fft_out_features * sizeof(kiss_fft_cpx)); EIDSP_ERR(EIDSP_OUT_OF_MEM); } ei_dsp_register_alloc(kiss_fftr_mem_length, cfg); // execute the rfft operation kiss_fftr(cfg, fft_input, fft_output); // and write back to the output for (size_t ix = 0; ix < n_fft_out_features; ix++) { output[ix] = sqrt(pow(fft_output[ix].r, 2) + pow(fft_output[ix].i, 2)); } ei_dsp_free(cfg, kiss_fftr_mem_length); ei_dsp_free(fft_output, n_fft_out_features * sizeof(kiss_fft_cpx)); return EIDSP_OK; } static int software_rfft(float *fft_input, fft_complex_t *output, size_t n_fft, size_t n_fft_out_features) { // create fftr context size_t kiss_fftr_mem_length; kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length); if (!cfg) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } ei_dsp_register_alloc(kiss_fftr_mem_length, cfg); // execute the rfft operation kiss_fftr(cfg, fft_input, (kiss_fft_cpx*)output); ei_dsp_free(cfg, kiss_fftr_mem_length); return EIDSP_OK; } static int signal_get_data(const float *in_buffer, size_t offset, size_t length, float *out_ptr) { memcpy(out_ptr, in_buffer + offset, length * sizeof(float)); return 0; } static int signal_get_data_i16(int16_t *in_buffer, size_t offset, size_t length, int16_t *out_ptr) { memcpy(out_ptr, in_buffer + offset, length * sizeof(int16_t)); return 0; } #if EIDSP_USE_CMSIS_DSP /** * @brief The CMSIS std variance function with the same behaviour as the NumPy * implementation * @details Variance in CMSIS version is calculated using fSum / (float32_t)(blockSize - 1) * @param[in] pSrc Pointer to float block * @param[in] blockSize Number of floats in block * @param pResult The variance */ static void cmsis_arm_variance(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult) { uint32_t blkCnt; float32_t sum = 0.0f; float32_t fSum = 0.0f; float32_t fMean, fValue; const float32_t *pInput = pSrc; if (blockSize <= 1U) { *pResult = 0; return; } blkCnt = blockSize >> 2U; while (blkCnt > 0U) { sum += *pInput++; sum += *pInput++; sum += *pInput++; sum += *pInput++; blkCnt--; } /* Loop unrolling: Compute remaining outputs */ blkCnt = blockSize % 0x4U; while (blkCnt > 0U) { sum += *pInput++; blkCnt--; } fMean = sum / (float32_t)blockSize; pInput = pSrc; /* Loop unrolling: Compute 4 outputs at a time */ blkCnt = blockSize >> 2U; while (blkCnt > 0U) { fValue = *pInput++ - fMean; fSum += fValue * fValue; fValue = *pInput++ - fMean; fSum += fValue * fValue; fValue = *pInput++ - fMean; fSum += fValue * fValue; fValue = *pInput++ - fMean; fSum += fValue * fValue; blkCnt--; } /* Loop unrolling: Compute remaining outputs */ blkCnt = blockSize % 0x4U; while (blkCnt > 0U) { fValue = *pInput++ - fMean; fSum += fValue * fValue; blkCnt--; } /* Variance */ *pResult = fSum / (float32_t)(blockSize); } /** * @brief Copy of the numpy version explicitely using the CMSIS lib * for STD and Matrix transpose * @param input_matrix The input matrix * @param output_matrix The output matrix * * @return EIDSP error */ static int std_axis0_CMSIS(matrix_t *input_matrix, matrix_t *output_matrix) { arm_matrix_instance_f32 arm_in_matrix, arm_transposed_matrix; if (input_matrix->cols != output_matrix->rows) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } if (output_matrix->cols != 1) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } /* Copy input matrix to arm matrix */ arm_in_matrix.numRows = input_matrix->rows; arm_in_matrix.numCols = input_matrix->cols; arm_in_matrix.pData = &input_matrix->buffer[0]; /* Create transposed matrix */ arm_transposed_matrix.numRows = input_matrix->cols; arm_transposed_matrix.numCols = input_matrix->rows; arm_transposed_matrix.pData = (float *)ei_calloc(input_matrix->cols * input_matrix->rows * sizeof(float), 1); if (arm_transposed_matrix.pData == NULL) { EIDSP_ERR(EIDSP_OUT_OF_MEM); } int ret = arm_mat_trans_f32(&arm_in_matrix, &arm_transposed_matrix); if (ret != EIDSP_OK) { EIDSP_ERR(ret); } for (size_t row = 0; row < arm_transposed_matrix.numRows; row++) { float std; float var; cmsis_arm_variance(arm_transposed_matrix.pData + (row * arm_transposed_matrix.numCols), arm_transposed_matrix.numCols, &var); arm_sqrt_f32(var, &std); output_matrix->buffer[row] = std; } ei_free(arm_transposed_matrix.pData); return EIDSP_OK; } /** * @brief A copy of the CMSIS power function, adapted to calculate the third central moment * @details Calculates the sum of cubes of a block with the mean value subtracted. * @param[in] pSrc Pointer to float block * @param[in] blockSize Number of floats in block * @param[in] mean The mean to subtract from each value before cubing * @param pResult The third central moment of the input */ static void cmsis_arm_third_moment(const float32_t * pSrc, uint32_t blockSize, float32_t mean, float32_t * pResult) { uint32_t blkCnt; float32_t sum = 0.0f; float32_t in; /* Loop unrolling: Compute 4 outputs at a time */ blkCnt = blockSize >> 2U; while (blkCnt > 0U) { /* Compute Power and store result in a temporary variable, sum. */ in = *pSrc++; in = in - mean; sum += in * in * in; in = *pSrc++; in = in - mean; sum += in * in * in; in = *pSrc++; in = in - mean; sum += in * in * in; in = *pSrc++; in = in - mean; sum += in * in * in; /* Decrement loop counter */ blkCnt--; } /* Loop unrolling: Compute remaining outputs */ blkCnt = blockSize % 0x4U; while (blkCnt > 0U) { /* Compute Power and store result in a temporary variable, sum. */ in = *pSrc++; in = in - mean; sum += in * in * in; /* Decrement loop counter */ blkCnt--; } sum = sum / blockSize; /* Store result to destination */ *pResult = sum; } /** * @brief A copy of the CMSIS power function, adapted to calculate the fourth central moment * @details Calculates the sum of fourth powers of a block with the mean value subtracted. * @param[in] pSrc Pointer to float block * @param[in] blockSize Number of floats in block * @param[in] mean The mean to subtract from each value before calculating fourth power * @param pResult The fourth central moment of the input */ static void cmsis_arm_fourth_moment(const float32_t * pSrc, uint32_t blockSize, float32_t mean, float32_t * pResult) { uint32_t blkCnt; float32_t sum = 0.0f; float32_t in; /* Loop unrolling: Compute 4 outputs at a time */ blkCnt = blockSize >> 2U; while (blkCnt > 0U) { /* Compute Power and store result in a temporary variable, sum. */ in = *pSrc++; in = in - mean; float square; square = in * in; sum += square * square; in = *pSrc++; in = in - mean; square = in * in; sum += square * square; in = *pSrc++; in = in - mean; square = in * in; sum += square * square; in = *pSrc++; in = in - mean; square = in * in; sum += square * square; /* Decrement loop counter */ blkCnt--; } /* Loop unrolling: Compute remaining outputs */ blkCnt = blockSize % 0x4U; while (blkCnt > 0U) { /* Compute Power and store result in a temporary variable, sum. */ in = *pSrc++; in = in - mean; float square; square = in * in; sum += square * square; /* Decrement loop counter */ blkCnt--; } sum = sum / blockSize; /* Store result to destination */ *pResult = sum; } #endif // EIDSP_USE_CMSIS_DSP static uint8_t count_leading_zeros(uint32_t data) { if (data == 0U) { return 32U; } uint32_t count = 0U; uint32_t mask = 0x80000000U; while ((data & mask) == 0U) { count += 1U; mask = mask >> 1U; } return count; } static void sqrt_q15(int16_t in, int16_t *pOut) { int32_t bits_val1; int16_t number, temp1, var1, signBits1, half; float temp_float1; union { int32_t fracval; float floatval; } tempconv; number = in; /* If the input is a positive number then compute the signBits. */ if (number > 0) { signBits1 = count_leading_zeros(number) - 17; /* Shift by the number of signBits1 */ if ((signBits1 % 2) == 0) { number = number << signBits1; } else { number = number << (signBits1 - 1); } /* Calculate half value of the number */ half = number >> 1; /* Store the number for later use */ temp1 = number; /* Convert to float */ temp_float1 = number * 3.051757812500000e-005f; /* Store as integer */ tempconv.floatval = temp_float1; bits_val1 = tempconv.fracval; /* Subtract the shifted value from the magic number to give intial guess */ bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */ /* Store as float */ tempconv.fracval = bits_val1; temp_float1 = tempconv.floatval; /* Convert to integer format */ var1 = (int32_t)(temp_float1 * 16384); /* 1st iteration */ var1 = ((int16_t)( (int32_t)var1 * (0x3000 - ((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >> 15)) << 2; /* 2nd iteration */ var1 = ((int16_t)( (int32_t)var1 * (0x3000 - ((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >> 15)) << 2; /* 3rd iteration */ var1 = ((int16_t)( (int32_t)var1 * (0x3000 - ((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >> 15)) << 2; /* Multiply the inverse square root with the original value */ var1 = ((int16_t)(((int32_t)temp1 * var1) >> 15)) << 1; /* Shift the output down accordingly */ if ((signBits1 % 2) == 0) { var1 = var1 >> (signBits1 / 2); } else { var1 = var1 >> ((signBits1 - 1) / 2); } *pOut = var1; } /* If the number is a negative number then store zero as its square root value */ else { *pOut = 0; } } #if EIDSP_USE_CMSIS_DSP /** * Initialize a CMSIS-DSP fast rfft structure * We do it this way as this means we can compile out fast_init calls which hints the compiler * to which tables can be removed */ static int cmsis_rfft_init_f32(arm_rfft_fast_instance_f32 *rfft_instance, const size_t n_fft) { // ARM cores (ex M55) with Helium extensions (MVEF) need special treatment (Issue 2843) #if EI_CLASSIFIER_HAS_FFT_INFO == 1 && !defined(ARM_MATH_MVEF) && !defined(EI_CLASSIFIER_LOAD_ALL_FFTS) arm_status status; switch (n_fft) { #if EI_CLASSIFIER_LOAD_FFT_32 == 1 case 32: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 16U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len16.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len16.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len16.pTwiddle; rfft_instance->fftLenRFFT = 32U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_64 == 1 case 64: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 32U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len32.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len32.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len32.pTwiddle; rfft_instance->fftLenRFFT = 64U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_128 == 1 case 128: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 64U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len64.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len64.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len64.pTwiddle; rfft_instance->fftLenRFFT = 128U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_256 == 1 case 256: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 128U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len128.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len128.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len128.pTwiddle; rfft_instance->fftLenRFFT = 256U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_512 == 1 case 512: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 256U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len256.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len256.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len256.pTwiddle; rfft_instance->fftLenRFFT = 512U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_1024 == 1 case 1024: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 512U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len512.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len512.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len512.pTwiddle; rfft_instance->fftLenRFFT = 1024U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_2048 == 1 case 2048: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 1024U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len1024.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len1024.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len1024.pTwiddle; rfft_instance->fftLenRFFT = 2048U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048; status = ARM_MATH_SUCCESS; break; } #endif #if EI_CLASSIFIER_LOAD_FFT_4096 == 1 case 4096: { arm_cfft_instance_f32 *S = &(rfft_instance->Sint); S->fftLen = 2048U; S->pTwiddle = NULL; S->bitRevLength = arm_cfft_sR_f32_len2048.bitRevLength; S->pBitRevTable = arm_cfft_sR_f32_len2048.pBitRevTable; S->pTwiddle = arm_cfft_sR_f32_len2048.pTwiddle; rfft_instance->fftLenRFFT = 4096U; rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096; status = ARM_MATH_SUCCESS; break; } #endif default: return EIDSP_FFT_TABLE_NOT_LOADED; } return status; #else return arm_rfft_fast_init_f32(rfft_instance, n_fft); #endif } #endif // #if EIDSP_USE_CMSIS_DSP /** * Power spectrum of a frame * @param frame Row of a frame * @param frame_size Size of the frame * @param out_buffer Out buffer, size should be fft_points * @param out_buffer_size Buffer size * @param fft_points (int): The length of FFT. If fft_length is greater than frame_len, the frames will be zero-padded. * @returns EIDSP_OK if OK */ static int power_spectrum( float *frame, size_t frame_size, float *out_buffer, size_t out_buffer_size, uint16_t fft_points) { if (out_buffer_size != static_cast(fft_points / 2 + 1)) { EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); } int r = numpy::rfft(frame, frame_size, out_buffer, out_buffer_size, fft_points); if (r != EIDSP_OK) { return r; } for (size_t ix = 0; ix < out_buffer_size; ix++) { out_buffer[ix] = (1.0 / static_cast(fft_points)) * (out_buffer[ix] * out_buffer[ix]); } return EIDSP_OK; } static int welch_max_hold( float *input, size_t input_size, float *output, size_t start_bin, size_t stop_bin, size_t fft_points, bool do_overlap) { // save off one point to put back, b/c we're going to calculate in place float saved_point = 0; bool do_saved_point = false; size_t fft_out_size = fft_points / 2 + 1; float *fft_out; ei_unique_ptr_t p_fft_out(nullptr, ei_free); if (input_size < fft_points) { fft_out = (float *)ei_calloc(fft_out_size, sizeof(float)); p_fft_out.reset(fft_out); } else { // set input as output for in place operation fft_out = input; // save off one point to put back, b/c we're going to calculate in place saved_point = input[fft_points / 2]; do_saved_point = true; } // init the output to zeros memset(output, 0, sizeof(float) * (stop_bin - start_bin)); int input_ix = 0; while (input_ix < (int)input_size) { // Figure out if we need any zero padding size_t n_input_points = input_ix + fft_points <= input_size ? fft_points : input_size - input_ix; EI_TRY(power_spectrum( input + input_ix, n_input_points, fft_out, fft_points / 2 + 1, fft_points)); int j = 0; // keep the max of the last frame and everything before for (size_t i = start_bin; i < stop_bin; i++) { output[j] = std::max(output[j], fft_out[i]); j++; } if (do_overlap) { if (do_saved_point) { // This step only matters first time through input[fft_points / 2] = saved_point; do_saved_point = false; } input_ix += fft_points / 2; } else { input_ix += fft_points; } } return EIDSP_OK; } static float variance(float *input, size_t size) { // Use CMSIS either way. Will fall back to straight C when needed float temp; #if EIDSP_USE_CMSIS_DSP arm_var_f32(input, size, &temp); #else float mean = 0.0f; for (size_t i = 0; i < size; i++) { mean += input[i]; } mean /= size; temp = 0.0f; for (size_t i = 0; i < size; i++) { temp += (input[i] - mean) * (input[i] - mean); } temp /= (size - 1); #endif return temp; } /** * This function handle the issue with zero values if the are exposed * to become an argument for any log function. * @param input Array * @param input_size Size of array * @returns void */ static void zero_handling(float *input, size_t input_size) { for (size_t ix = 0; ix < input_size; ix++) { if (input[ix] == 0) { input[ix] = 1e-10; } } } /** * This function handle the issue with zero values if the are exposed * to become an argument for any log function. * @param input Matrix * @returns void */ static void zero_handling(matrix_t *input) { zero_handling(input->buffer, input->rows * input->cols); } __attribute__((unused)) static void scale(fvec& v, float scale) { for (auto& x : v) { x *= scale; } } __attribute__((unused)) static void sub(fvec& v, float b) { for (auto& x : v) { x -= b; } } __attribute__((unused)) static void mul(float* y, const float* x, float* b, size_t n) { for (size_t i = 0; i < n; i++) { y[i] = x[i] * b[i]; } } __attribute__((unused)) static fvec diff(const float* v, size_t n) { fvec d(n - 1); for (size_t i = 0; i < d.size(); i++) { d[i] = v[i + 1] - v[i]; } return d; } __attribute__((unused)) static float sum(const float* v, size_t n) { float sum = 0; for (size_t i = 0; i < n; i++) { sum += v[i]; } return sum; } static float mean(const fvec& v) { float mean = 0; for (auto x : v) { mean += x; } mean /= v.size(); return mean; } static float mean(const float* v, size_t n) { float mean = 0; for (size_t i = 0; i < n; i++) { mean += v[i]; } mean /= n; return mean; } static float median(const float* v, size_t n) { fvec vc(n); std::copy(v, v + n, vc.begin()); std::sort(vc.begin(), vc.end()); if (vc.size() % 2 == 0) { return (vc[vc.size() / 2 - 1] + vc[vc.size() / 2]) / 2; } return vc[vc.size() / 2]; } __attribute__((unused)) static float median(const fvec& v) { return median(v.data(), v.size()); } static float stddev(const float* v, size_t n, float m /* mean */, int ddof = 0) { float var = 0; for (size_t i = 0; i < n; i++) { var += (v[i] - m) * (v[i] - m); } var /= n - ddof; return sqrt(var); } __attribute__((unused)) static float stddev(const float* v, size_t n) { return stddev(v, n, mean(v, n), 0); } __attribute__((unused)) static float stddev(const float* v, size_t n, int ddof) { return stddev(v, n, mean(v, n), ddof); } __attribute__((unused)) static float stddev(const fvec& v, int ddof = 0) { return stddev(v.data(), v.size(), mean(v), ddof); } static float rms(const float* v, size_t n) { float rms = 0; for (size_t i = 0; i < n; i++) { rms += v[i] * v[i]; } rms /= n; return sqrt(rms); } __attribute__((unused)) static float rms(const fvec& v) { return rms(v.data(), v.size()); } template static float max(const ei_vector& v) { return *std::max_element(v.begin(), v.end()); } __attribute__((unused)) static float max(const float* v, size_t n) { return *std::max_element(v, v + n); } template static float min(const ei_vector& v) { return *std::min_element(v.begin(), v.end()); } __attribute__((unused)) static float min(const float* v, size_t n) { return *std::min_element(v, v + n); } __attribute__((unused)) static int argmax(const fvec& v, int start, int end) { return std::max_element(v.begin() + start, v.begin() + end) - v.begin(); } __attribute__((unused)) static fvec divide(float num, const float* den, size_t n) { fvec v(n); for (size_t i = 0; i < n; i++) { v[i] = num / den[i]; } return v; } __attribute__((unused)) static ivec histogram(const float* x, size_t n, int a, int b, int inc) { int num_bins = (b - a) / inc; ivec bins(num_bins, 0); for (size_t i = 0; i < n; i++) { int bin = (int)((x[i] - a) / inc); if (bin >= 0 && bin < num_bins) { bins[bin]++; } } return bins; } __attribute__((unused)) static fvec cumsum(const float* v, size_t n) { fvec c(n); c[0] = v[0]; for (size_t i = 1; i < n; i++) { c[i] = c[i - 1] + v[i]; } return c; } __attribute__((unused)) static fvec arrange(float start, float end, float step) { assert(start < end); assert(step > 0); fvec v((size_t)((end - start) / step)); for (size_t i = 0; i < v.size(); i++) { v[i] = start + i * step; } return v; } __attribute__((unused)) static void add(fvec& v, fvec& b) { for (size_t i = 0; i < v.size(); i++) { v[i] += b[i]; } } __attribute__((unused)) static float trapz(const fvec& x, const fvec& y, size_t lo, size_t hi) { float area = 0; for (size_t i = lo; i < hi; i++) { area += (x[i + 1] - x[i]) * (y[i + 1] + y[i]) / 2; } return area; } __attribute__((unused)) static fvec quantile(const fvec& v, size_t start, size_t end, const fvec& q) { end = std::min(end, v.size()); fvec vc(end - start); std::copy(v.begin() + start, v.begin() + end, vc.begin()); std::sort(vc.begin(), vc.end()); fvec res(q.size()); for (size_t i = 0; i < q.size(); i++) { res[i] = vc[q[i] * vc.size()]; } return res; } __attribute__((unused)) static fvec quantile(const float* v, size_t n, const fvec& q) { fvec vc(n); std::copy(v, v + n, vc.begin()); std::sort(vc.begin(), vc.end()); fvec res(q.size()); for (size_t i = 0; i < q.size(); i++) { res[i] = vc[q[i] * vc.size()]; } return res; } static float dot(const float* x, const float* y, size_t n) { float res = 0; for (size_t i = 0; i < n; i++) { res += x[i] * y[i]; } return res; } __attribute__((unused)) static float cosine_similarity(const fvec& x, const fvec& y) { float xy = dot(x.data(), y.data(), x.size()); float magx = dot(x.data(), x.data(), x.size()); float magy = dot(y.data(), y.data(), y.size()); xy /= sqrt(magx * magy); return xy; } __attribute__((unused)) static void ln(fvec& v) { for (auto& x : v) { x = log(x); } } static size_t next_power_of_2(size_t x) { size_t res = 1; while (res < x) { res *= 2; } return res; } static void detrend(float* data, size_t n) { // Calculate the mean of the data points float mean = 0.0; for (size_t i = 0; i < n; i++) { mean += data[i]; } mean /= n; // Calculate the slope of the best-fit line float x_mean = (n + 1) / 2.0; float y_mean = mean; float numerator = 0.0; float denominator = 0.0; for (size_t i = 0; i < n; i++) { numerator += (i + 1 - x_mean) * (data[i] - y_mean); denominator += (i + 1 - x_mean) * (i + 1 - x_mean); } float slope = numerator / denominator; // Subtract the best-fit line from the data points to get the detrended data for (size_t i = 0; i < n; i++) { data[i] = data[i] - (slope * (i + 1)); } // Calculate the mean of the detrended data float detrended_mean = 0.0; for (size_t i = 0; i < n; i++) { detrended_mean += data[i]; } detrended_mean /= n; // Subtract the mean of the detrended data from each element for (size_t i = 0; i < n; i++) { data[i] -= detrended_mean; } } static fvec detrend(const fvec& data) { auto ret = data; detrend(ret.data(), ret.size()); return ret; } }; struct fmat { ei_matrix* mat = nullptr; fmat(size_t rows, size_t cols) { mat = new ei_matrix(rows, cols); assert(mat); } ~fmat() { delete mat; } void resize(size_t rows, size_t cols) { delete mat; mat = new ei_matrix(rows, cols); } float* operator[](size_t i) { if (mat == nullptr || i >= mat->rows) { return nullptr; } return mat->get_row_ptr(i); } void fill(float x) { if (mat == nullptr) { return; } for (size_t i = 0; i < mat->rows; i++) { for (size_t j = 0; j < mat->cols; j++) { (*this)[i][j] = x; } } } void fill_col(size_t col, float x) { if (mat == nullptr) { return; } for (size_t i = 0; i < mat->rows; i++) { (*this)[i][col] = x; } } void fill_row(size_t row, float x) { if (mat == nullptr) { return; } for (size_t i = 0; i < mat->cols; i++) { (*this)[row][i] = x; } } }; } // namespace ei #endif // _EIDSP_NUMPY_H_