luisomoreau's picture
Upload 1028 files
b7b614e
/*
* Copyright (c) 2022 EdgeImpulse Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS
* IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _EIDSP_NUMPY_H_
#define _EIDSP_NUMPY_H_
// it's valid to include the SDK without a model, but there's information that we need
// in model_metadata.h (like the FFT tables used).
// if the compiler does not support the __has_include directive we'll assume that the
// file exists.
#ifndef __has_include
#define __has_include 1
#endif // __has_include
#include <stdint.h>
#include <string.h>
#include <stddef.h>
#include <cfloat>
#include "ei_vector.h"
#include <algorithm>
#include "numpy_types.h"
#include "config.hpp"
#include "returntypes.hpp"
#include "memory.hpp"
#include "ei_utils.h"
#include "dct/fast-dct-fft.h"
#include "kissfft/kiss_fftr.h"
#if __has_include("model-parameters/model_metadata.h")
#include "model-parameters/model_metadata.h"
#endif
#if EIDSP_USE_CMSIS_DSP
#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_math.h"
#include "edge-impulse-sdk/CMSIS/DSP/Include/arm_const_structs.h"
#endif
// For the following CMSIS includes, we want to use the C fallback, so include whether or not we set the CMSIS flag
#include "edge-impulse-sdk/CMSIS/DSP/Include/dsp/statistics_functions.h"
#ifdef __MBED__
#include "mbed.h"
#else
#include <functional>
#endif // __MBED__
#define EI_MAX_UINT16 65535
namespace ei {
using fvec = ei_vector<float>;
using ivec = ei_vector<int>;
// clang-format off
// lookup table for quantized values between 0.0f and 1.0f
static constexpr float quantized_values_one_zero[] = { (0.0f / 1.0f), (1.0f / 100.0f), (2.0f / 100.0f), (3.0f / 100.0f), (4.0f / 100.0f), (1.0f / 22.0f), (1.0f / 21.0f), (1.0f / 20.0f), (1.0f / 19.0f), (1.0f / 18.0f), (1.0f / 17.0f), (6.0f / 100.0f), (1.0f / 16.0f), (1.0f / 15.0f), (7.0f / 100.0f), (1.0f / 14.0f), (1.0f / 13.0f), (8.0f / 100.0f), (1.0f / 12.0f), (9.0f / 100.0f), (1.0f / 11.0f), (2.0f / 21.0f), (1.0f / 10.0f), (2.0f / 19.0f), (11.0f / 100.0f), (1.0f / 9.0f), (2.0f / 17.0f), (12.0f / 100.0f), (1.0f / 8.0f), (13.0f / 100.0f), (2.0f / 15.0f), (3.0f / 22.0f), (14.0f / 100.0f), (1.0f / 7.0f), (3.0f / 20.0f), (2.0f / 13.0f), (3.0f / 19.0f), (16.0f / 100.0f), (1.0f / 6.0f), (17.0f / 100.0f), (3.0f / 17.0f), (18.0f / 100.0f), (2.0f / 11.0f), (3.0f / 16.0f), (19.0f / 100.0f), (4.0f / 21.0f), (1.0f / 5.0f), (21.0f / 100.0f), (4.0f / 19.0f), (3.0f / 14.0f), (22.0f / 100.0f), (2.0f / 9.0f), (5.0f / 22.0f), (23.0f / 100.0f), (3.0f / 13.0f), (4.0f / 17.0f), (5.0f / 21.0f), (24.0f / 100.0f), (1.0f / 4.0f), (26.0f / 100.0f), (5.0f / 19.0f), (4.0f / 15.0f), (27.0f / 100.0f), (3.0f / 11.0f), (5.0f / 18.0f), (28.0f / 100.0f), (2.0f / 7.0f), (29.0f / 100.0f), (5.0f / 17.0f), (3.0f / 10.0f), (4.0f / 13.0f), (31.0f / 100.0f), (5.0f / 16.0f), (6.0f / 19.0f), (7.0f / 22.0f), (32.0f / 100.0f), (33.0f / 100.0f), (1.0f / 3.0f), (34.0f / 100.0f), (7.0f / 20.0f), (6.0f / 17.0f), (5.0f / 14.0f), (36.0f / 100.0f), (4.0f / 11.0f), (7.0f / 19.0f), (37.0f / 100.0f), (3.0f / 8.0f), (38.0f / 100.0f), (8.0f / 21.0f), (5.0f / 13.0f), (7.0f / 18.0f), (39.0f / 100.0f), (2.0f / 5.0f), (9.0f / 22.0f), (41.0f / 100.0f), (7.0f / 17.0f), (5.0f / 12.0f), (42.0f / 100.0f), (8.0f / 19.0f), (3.0f / 7.0f), (43.0f / 100.0f), (7.0f / 16.0f), (44.0f / 100.0f), (4.0f / 9.0f), (9.0f / 20.0f), (5.0f / 11.0f), (46.0f / 100.0f), (6.0f / 13.0f), (7.0f / 15.0f), (47.0f / 100.0f), (8.0f / 17.0f), (9.0f / 19.0f), (10.0f / 21.0f), (48.0f / 100.0f), (49.0f / 100.0f), (1.0f / 2.0f), (51.0f / 100.0f), (52.0f / 100.0f), (11.0f / 21.0f), (10.0f / 19.0f), (9.0f / 17.0f), (53.0f / 100.0f), (8.0f / 15.0f), (7.0f / 13.0f), (54.0f / 100.0f), (6.0f / 11.0f), (11.0f / 20.0f), (5.0f / 9.0f), (56.0f / 100.0f), (9.0f / 16.0f), (57.0f / 100.0f), (4.0f / 7.0f), (11.0f / 19.0f), (58.0f / 100.0f), (7.0f / 12.0f), (10.0f / 17.0f), (59.0f / 100.0f), (13.0f / 22.0f), (3.0f / 5.0f), (61.0f / 100.0f), (11.0f / 18.0f), (8.0f / 13.0f), (13.0f / 21.0f), (62.0f / 100.0f), (5.0f / 8.0f), (63.0f / 100.0f), (12.0f / 19.0f), (7.0f / 11.0f), (64.0f / 100.0f), (9.0f / 14.0f), (11.0f / 17.0f), (13.0f / 20.0f), (66.0f / 100.0f), (2.0f / 3.0f), (67.0f / 100.0f), (68.0f / 100.0f), (15.0f / 22.0f), (13.0f / 19.0f), (11.0f / 16.0f), (69.0f / 100.0f), (9.0f / 13.0f), (7.0f / 10.0f), (12.0f / 17.0f), (71.0f / 100.0f), (5.0f / 7.0f), (72.0f / 100.0f), (13.0f / 18.0f), (8.0f / 11.0f), (73.0f / 100.0f), (11.0f / 15.0f), (14.0f / 19.0f), (74.0f / 100.0f), (3.0f / 4.0f), (76.0f / 100.0f), (16.0f / 21.0f), (13.0f / 17.0f), (10.0f / 13.0f), (77.0f / 100.0f), (17.0f / 22.0f), (7.0f / 9.0f), (78.0f / 100.0f), (11.0f / 14.0f), (15.0f / 19.0f), (79.0f / 100.0f), (4.0f / 5.0f), (17.0f / 21.0f), (81.0f / 100.0f), (13.0f / 16.0f), (9.0f / 11.0f), (82.0f / 100.0f), (14.0f / 17.0f), (83.0f / 100.0f), (5.0f / 6.0f), (84.0f / 100.0f), (16.0f / 19.0f), (11.0f / 13.0f), (17.0f / 20.0f), (6.0f / 7.0f), (86.0f / 100.0f), (19.0f / 22.0f), (13.0f / 15.0f), (87.0f / 100.0f), (7.0f / 8.0f), (88.0f / 100.0f), (15.0f / 17.0f), (8.0f / 9.0f), (89.0f / 100.0f), (17.0f / 19.0f), (9.0f / 10.0f), (19.0f / 21.0f), (10.0f / 11.0f), (91.0f / 100.0f), (11.0f / 12.0f), (92.0f / 100.0f), (12.0f / 13.0f), (13.0f / 14.0f), (93.0f / 100.0f), (14.0f / 15.0f), (15.0f / 16.0f), (94.0f / 100.0f), (16.0f / 17.0f), (17.0f / 18.0f), (18.0f / 19.0f), (19.0f / 20.0f), (20.0f / 21.0f), (21.0f / 22.0f), (96.0f / 100.0f), (97.0f / 100.0f), (98.0f / 100.0f), (99.0f / 100.0f), (1.0f / 1.0f) ,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
// clang-format on
class numpy {
public:
static float sqrt(float x) {
#if EIDSP_USE_CMSIS_DSP
float temp;
arm_sqrt_f32(x, &temp);
return temp;
#else
return sqrtf(x);
#endif
}
/**
* Roll array elements along a given axis.
* Elements that roll beyond the last position are re-introduced at the first.
* @param input_array
* @param input_array_size
* @param shift The number of places by which elements are shifted.
* @returns EIDSP_OK if OK
*/
static int roll(float *input_array, size_t input_array_size, int shift) {
if (shift < 0) {
shift = input_array_size + shift;
}
if (shift == 0) {
return EIDSP_OK;
}
// so we need to allocate a buffer of the size of shift...
EI_DSP_MATRIX(shift_matrix, 1, shift);
// we copy from the end of the buffer into the shift buffer
memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(float));
// now we do a memmove to shift the array
memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(float));
// and copy the shift buffer back to the beginning of the array
memcpy(input_array, shift_matrix.buffer, shift * sizeof(float));
return EIDSP_OK;
}
/**
* Roll array elements along a given axis.
* Elements that roll beyond the last position are re-introduced at the first.
* @param input_array
* @param input_array_size
* @param shift The number of places by which elements are shifted.
* @returns EIDSP_OK if OK
*/
static int roll(int *input_array, size_t input_array_size, int shift) {
if (shift < 0) {
shift = input_array_size + shift;
}
if (shift == 0) {
return EIDSP_OK;
}
// so we need to allocate a buffer of the size of shift...
EI_DSP_MATRIX(shift_matrix, 1, shift);
// we copy from the end of the buffer into the shift buffer
memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(int));
// now we do a memmove to shift the array
memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(int));
// and copy the shift buffer back to the beginning of the array
memcpy(input_array, shift_matrix.buffer, shift * sizeof(int));
return EIDSP_OK;
}
/**
* Roll array elements along a given axis.
* Elements that roll beyond the last position are re-introduced at the first.
* @param input_array
* @param input_array_size
* @param shift The number of places by which elements are shifted.
* @returns EIDSP_OK if OK
*/
static int roll(int16_t *input_array, size_t input_array_size, int shift) {
if (shift < 0) {
shift = input_array_size + shift;
}
if (shift == 0) {
return EIDSP_OK;
}
// so we need to allocate a buffer of the size of shift...
EI_DSP_MATRIX(shift_matrix, 1, shift);
// we copy from the end of the buffer into the shift buffer
memcpy(shift_matrix.buffer, input_array + input_array_size - shift, shift * sizeof(int16_t));
// now we do a memmove to shift the array
memmove(input_array + shift, input_array, (input_array_size - shift) * sizeof(int16_t));
// and copy the shift buffer back to the beginning of the array
memcpy(input_array, shift_matrix.buffer, shift * sizeof(int16_t));
return EIDSP_OK;
}
static float sum(float *input_array, size_t input_array_size) {
float res = 0.0f;
for (size_t ix = 0; ix < input_array_size; ix++) {
res += input_array[ix];
}
return res;
}
/**
* Multiply two matrices (MxN * NxK matrix)
* @param matrix1 Pointer to matrix1 (MxN)
* @param matrix2 Pointer to matrix2 (NxK)
* @param out_matrix Pointer to out matrix (MxK)
* @returns EIDSP_OK if OK
*/
static int dot(matrix_t *matrix1, matrix_t *matrix2, matrix_t *out_matrix) {
if (matrix1->cols != matrix2->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
// no. of rows in matrix1 determines the
if (matrix1->rows != out_matrix->rows || matrix2->cols != out_matrix->cols) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
#if EIDSP_USE_CMSIS_DSP
if (matrix1->rows > EI_MAX_UINT16 || matrix1->cols > EI_MAX_UINT16 || matrix2->rows > EI_MAX_UINT16 ||
matrix2->cols > EI_MAX_UINT16 || out_matrix->rows > EI_MAX_UINT16 || out_matrix->cols > EI_MAX_UINT16) {
return EIDSP_NARROWING;
}
const arm_matrix_instance_f32 m1 = { static_cast<uint16_t>(matrix1->rows), static_cast<uint16_t>(matrix1->cols), matrix1->buffer };
const arm_matrix_instance_f32 m2 = { static_cast<uint16_t>(matrix2->rows), static_cast<uint16_t>(matrix2->cols), matrix2->buffer };
arm_matrix_instance_f32 mo = { static_cast<uint16_t>(out_matrix->rows), static_cast<uint16_t>(out_matrix->cols), out_matrix->buffer };
int status = arm_mat_mult_f32(&m1, &m2, &mo);
if (status != ARM_MATH_SUCCESS) {
EIDSP_ERR(status);
}
#else
memset(out_matrix->buffer, 0, out_matrix->rows * out_matrix->cols * sizeof(float));
for (size_t i = 0; i < matrix1->rows; i++) {
dot_by_row(i,
matrix1->buffer + (i * matrix1->cols),
matrix1->cols,
matrix2,
out_matrix);
}
#endif
return EIDSP_OK;
}
/**
* Multiply two matrices (MxN * NxK matrix)
* @param matrix1 Pointer to matrix1 (MxN)
* @param matrix2 Pointer to quantized matrix2 (NxK)
* @param out_matrix Pointer to out matrix (MxK)
* @returns EIDSP_OK if OK
*/
static int dot(matrix_t *matrix1,
quantized_matrix_t *matrix2,
matrix_t *out_matrix)
{
if (matrix1->cols != matrix2->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
// no. of rows in matrix1 determines the
if (matrix1->rows != out_matrix->rows || matrix2->cols != out_matrix->cols) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
memset(out_matrix->buffer, 0, out_matrix->rows * out_matrix->cols * sizeof(float));
for (size_t i = 0; i < matrix1->rows; i++) {
dot_by_row(i,
matrix1->buffer + (i * matrix1->cols),
matrix1->cols,
matrix2,
out_matrix);
}
return EIDSP_OK;
}
/**
* Multiply two matrices lazily per row in matrix 1 (MxN * NxK matrix)
* @param i matrix1 row index
* @param row matrix1 row
* @param matrix1_cols matrix1 row size (1xN)
* @param matrix2 Pointer to matrix2 (NxK)
* @param out_matrix Pointer to out matrix (MxK)
* @returns EIDSP_OK if OK
*/
static int dot_by_row(int i, float *row, uint32_t matrix1_cols, matrix_t *matrix2, matrix_t *out_matrix) {
if (matrix1_cols != matrix2->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
#if EIDSP_USE_CMSIS_DSP
if (matrix1_cols > EI_MAX_UINT16 || matrix2->rows > EI_MAX_UINT16 || matrix2->cols > EI_MAX_UINT16 ||
out_matrix->cols > EI_MAX_UINT16) {
return EIDSP_NARROWING;
}
const arm_matrix_instance_f32 m1 = { 1, static_cast<uint16_t>(matrix1_cols), row };
const arm_matrix_instance_f32 m2 = { static_cast<uint16_t>(matrix2->rows), static_cast<uint16_t>(matrix2->cols), matrix2->buffer };
arm_matrix_instance_f32 mo = { 1, static_cast<uint16_t>(out_matrix->cols), out_matrix->buffer + (i * out_matrix->cols) };
int status = arm_mat_mult_f32(&m1, &m2, &mo);
if (status != ARM_MATH_SUCCESS) {
EIDSP_ERR(status);
}
#else
for (size_t j = 0; j < matrix2->cols; j++) {
float tmp = 0.0f;
for (size_t k = 0; k < matrix1_cols; k++) {
tmp += row[k] * matrix2->buffer[k * matrix2->cols + j];
}
out_matrix->buffer[i * matrix2->cols + j] += tmp;
}
#endif
return EIDSP_OK;
}
/**
* Multiply two matrices lazily per row in matrix 1 (MxN * NxK matrix)
* @param i matrix1 row index
* @param row matrix1 row
* @param matrix1_cols matrix1 row size
* @param matrix2 Pointer to matrix2 (NxK)
* @param out_matrix Pointer to out matrix (MxK)
* @returns EIDSP_OK if OK
*/
static int dot_by_row(int i, float *row, size_t matrix1_cols,
quantized_matrix_t *matrix2, matrix_t *out_matrix)
{
if (matrix1_cols != matrix2->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (uint16_t j = 0; j < matrix2->cols; j++) {
float tmp = 0.0;
for (uint16_t k = 0; k < matrix1_cols; k++) {
uint8_t u8 = matrix2->buffer[k * matrix2->cols + j];
if (u8) { // this matrix appears to be very sparsely populated
tmp += row[k] * quantized_values_one_zero[u8];
}
}
out_matrix->buffer[i * matrix2->cols + j] = tmp;
}
return EIDSP_OK;
}
static void transpose_in_place(matrix_t *matrix) {
size_t size = matrix->cols * matrix->rows - 1;
float temp; // temp for swap
size_t next; // next item to swap
size_t cycleBegin; // index of start of cycle
size_t i; // location in matrix
size_t all_done_mark = 1;
ei_vector<bool> done(size+1,false);
i = 1; // Note that matrix[0] and last element of matrix won't move
while (1)
{
cycleBegin = i;
temp = matrix->buffer[i];
do
{
size_t col = i % matrix->cols;
size_t row = i / matrix->cols;
// swap row and col to make new idx, b/c we want to know where in the transposed matrix
next = col*matrix->rows + row;
float temp2 = matrix->buffer[next];
matrix->buffer[next] = temp;
temp = temp2;
done[next] = true;
i = next;
}
while (i != cycleBegin);
// start next cycle by find next not done
for (i = all_done_mark; done[i]; i++) {
all_done_mark++; // move the high water mark so we don't look again
if(i>=size) { goto LOOP_END; }
}
}
LOOP_END:
// finally, swap the row and column dimensions
std::swap(matrix->rows, matrix->cols);
}
/**
* Transpose an array, souce is destination (from MxN to NxM)
* Note: this temporary allocates a copy of the matrix on the heap.
* @param matrix
* @param rows
* @param columns
* @deprecated You probably want to use transpose_in_place
* @returns EIDSP_OK if OK
*/
static int transpose(matrix_t *matrix) {
int r = transpose(matrix->buffer, matrix->cols, matrix->rows);
if (r != 0) {
return r;
}
uint16_t old_rows = matrix->rows;
uint16_t old_cols = matrix->cols;
matrix->rows = old_cols;
matrix->cols = old_rows;
return EIDSP_OK;
}
/**
* Transpose an array, source is destination (from MxN to NxM)
* @param matrix
* @param rows
* @param columns
* @deprecated You probably want to use transpose_in_place
* @returns EIDSP_OK if OK
*/
static int transpose(float *matrix, int rows, int columns) {
EI_DSP_MATRIX(temp_matrix, rows, columns);
if (!temp_matrix.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
#if EIDSP_USE_CMSIS_DSP
if (rows > EI_MAX_UINT16 || columns > EI_MAX_UINT16) {
return EIDSP_NARROWING;
}
const arm_matrix_instance_f32 i_m = {
static_cast<uint16_t>(columns),
static_cast<uint16_t>(rows),
matrix
};
arm_matrix_instance_f32 o_m = {
static_cast<uint16_t>(rows),
static_cast<uint16_t>(columns),
temp_matrix.buffer
};
arm_status status = arm_mat_trans_f32(&i_m, &o_m);
if (status != ARM_MATH_SUCCESS) {
return status;
}
#else
for (int j = 0; j < rows; j++){
for (int i = 0; i < columns; i++){
temp_matrix.buffer[j * columns + i] = matrix[i * rows + j];
}
}
#endif
memcpy(matrix, temp_matrix.buffer, rows * columns * sizeof(float));
return EIDSP_OK;
}
/**
* Transpose an array in place (from MxN to NxM)
* Note: this temporary allocates a copy of the matrix on the heap.
* @param matrix
* @param rows
* @param columns
* @returns EIDSP_OK if OK
*/
static int transpose(quantized_matrix_t *matrix) {
int r = transpose(matrix->buffer, matrix->cols, matrix->rows);
if (r != 0) {
return r;
}
uint16_t old_rows = matrix->rows;
uint16_t old_cols = matrix->cols;
matrix->rows = old_cols;
matrix->cols = old_rows;
return EIDSP_OK;
}
/**
* Transpose an array in place (from MxN to NxM)
* @param matrix
* @param rows
* @param columns
* @returns EIDSP_OK if OK
*/
static int transpose(uint8_t *matrix, int rows, int columns) {
// dequantization function is not used actually...
EI_DSP_QUANTIZED_MATRIX(temp_matrix, rows, columns, &dequantize_zero_one);
if (!temp_matrix.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
for (int j = 0; j < rows; j++){
for (int i = 0; i < columns; i++){
temp_matrix.buffer[j * columns + i] = matrix[i * rows + j];
}
}
memcpy(matrix, temp_matrix.buffer, rows * columns * sizeof(uint8_t));
return EIDSP_OK;
}
/**
* Return the Discrete Cosine Transform of arbitrary type sequence 2.
* @param input Input array (of size N)
* @param N number of items in input and output array
* @returns EIDSP_OK if OK
*/
static int dct2(float *input, size_t N, DCT_NORMALIZATION_MODE normalization = DCT_NORMALIZATION_NONE) {
if (N == 0) {
return EIDSP_OK;
}
int ret = ei::dct::transform(input, N);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
// for some reason the output is 2x too low...
for (size_t ix = 0; ix < N; ix++) {
input[ix] *= 2;
}
if (normalization == DCT_NORMALIZATION_ORTHO) {
input[0] = input[0] * sqrt(1.0f / static_cast<float>(4 * N));
for (size_t ix = 1; ix < N; ix++) {
input[ix] = input[ix] * sqrt(1.0f / static_cast<float>(2 * N));
}
}
return EIDSP_OK;
}
/**
* Discrete Cosine Transform of arbitrary type sequence 2 on a matrix.
* @param matrix
* @returns EIDSP_OK if OK
*/
static int dct2(matrix_t *matrix, DCT_NORMALIZATION_MODE normalization = DCT_NORMALIZATION_NONE) {
for (size_t row = 0; row < matrix->rows; row++) {
int r = dct2(matrix->buffer + (row * matrix->cols), matrix->cols, normalization);
if (r != EIDSP_OK) {
return r;
}
}
return EIDSP_OK;
}
/**
* Quantize a float value between zero and one
* @param value Float value
*/
static uint8_t quantize_zero_one(float value) {
const size_t length = sizeof(quantized_values_one_zero) / sizeof(float);
// look in the table
for (size_t ix = 0; ix < length; ix++) {
if (quantized_values_one_zero[ix] == value) return ix;
}
// no match?
if (value < quantized_values_one_zero[0]) {
return quantized_values_one_zero[0];
}
if (value > quantized_values_one_zero[length - 1]) {
return quantized_values_one_zero[length - 1];
}
int lo = 0;
int hi = length - 1;
while (lo <= hi) {
int mid = (hi + lo) / 2;
if (value < quantized_values_one_zero[mid]) {
hi = mid - 1;
} else if (value > quantized_values_one_zero[mid]) {
lo = mid + 1;
} else {
return quantized_values_one_zero[mid];
}
}
// lo == hi + 1
return (quantized_values_one_zero[lo] - value) < (value - quantized_values_one_zero[hi]) ?
lo :
hi;
}
/**
* Dequantize a float value between zero and one
* @param value
*/
static float dequantize_zero_one(uint8_t value) {
return quantized_values_one_zero[value];
}
/**
* Pad an array.
* Pads with the reflection of the vector mirrored along the edge of the array.
* @param input Input matrix (MxN)
* @param output Output matrix of size (M+pad_before+pad_after x N)
* @param pad_before Number of items to pad before
* @param pad_after Number of items to pad after
* @returns 0 if OK
*/
static int pad_1d_symmetric(matrix_t *input, matrix_t *output, uint16_t pad_before, uint16_t pad_after) {
if (output->cols != input->cols) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output->rows != input->rows + pad_before + pad_after) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (input->rows == 0) {
EIDSP_ERR(EIDSP_INPUT_MATRIX_EMPTY);
}
uint32_t pad_before_index = 0;
bool pad_before_direction_up = true;
for (int32_t ix = pad_before - 1; ix >= 0; ix--) {
memcpy(output->buffer + (input->cols * ix),
input->buffer + (pad_before_index * input->cols),
input->cols * sizeof(float));
if (pad_before_index == 0 && !pad_before_direction_up) {
pad_before_direction_up = true;
}
else if (pad_before_index == input->rows - 1 && pad_before_direction_up) {
pad_before_direction_up = false;
}
else if (pad_before_direction_up) {
pad_before_index++;
}
else {
pad_before_index--;
}
}
memcpy(output->buffer + (input->cols * pad_before),
input->buffer,
input->rows * input->cols * sizeof(float));
int32_t pad_after_index = input->rows - 1;
bool pad_after_direction_up = false;
for (int32_t ix = 0; ix < pad_after; ix++) {
memcpy(output->buffer + (input->cols * (ix + pad_before + input->rows)),
input->buffer + (pad_after_index * input->cols),
input->cols * sizeof(float));
if (pad_after_index == 0 && !pad_after_direction_up) {
pad_after_direction_up = true;
}
else if (pad_after_index == static_cast<int32_t>(input->rows) - 1 && pad_after_direction_up) {
pad_after_direction_up = false;
}
else if (pad_after_direction_up) {
pad_after_index++;
}
else {
pad_after_index--;
}
}
return EIDSP_OK;
}
/**
* Scale a matrix in place
* @param matrix
* @param scale
* @returns 0 if OK
*/
static int scale(matrix_t *matrix, float scale) {
if (scale == 1.0f) return EIDSP_OK;
#if EIDSP_USE_CMSIS_DSP
if (matrix->rows > EI_MAX_UINT16 || matrix->cols > EI_MAX_UINT16) {
return EIDSP_NARROWING;
}
const arm_matrix_instance_f32 mi = { static_cast<uint16_t>(matrix->rows), static_cast<uint16_t>(matrix->cols), matrix->buffer };
arm_matrix_instance_f32 mo = { static_cast<uint16_t>(matrix->rows), static_cast<uint16_t>(matrix->cols), matrix->buffer };
int status = arm_mat_scale_f32(&mi, scale, &mo);
if (status != ARM_MATH_SUCCESS) {
return status;
}
#else
for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
matrix->buffer[ix] *= scale;
}
#endif
return EIDSP_OK;
}
/**
* Scale a matrix in place, per row
* @param matrix Input matrix (MxN)
* @param scale_matrix Scale matrix (Mx1)
* @returns 0 if OK
*/
static int scale(matrix_t *matrix, matrix_t *scale_matrix) {
if (matrix->rows != scale_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (scale_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < matrix->rows; row++) {
EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols));
int ret = scale(&temp, scale_matrix->buffer[row]);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
}
return EIDSP_OK;
}
/**
* Add on matrix in place
* @param matrix
* @param addition
* @returns 0 if OK
*/
static int add(matrix_t *matrix, float addition) {
for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
matrix->buffer[ix] += addition;
}
return EIDSP_OK;
}
/**
* Add on a matrix in place, per row
* @param matrix Input matrix (MxN)
* @param add Scale matrix (Mx1)
* @returns 0 if OK
*/
static int add(matrix_t *matrix, matrix_t *add_matrix) {
if (matrix->rows != add_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (add_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < matrix->rows; row++) {
EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols));
int ret = add(&temp, add_matrix->buffer[row]);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
}
return EIDSP_OK;
}
/**
* Subtract from matrix in place
* @param matrix
* @param subtraction
* @returns 0 if OK
*/
static int subtract(matrix_t *matrix, float subtraction) {
for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
matrix->buffer[ix] -= subtraction;
}
return EIDSP_OK;
}
/**
* Add on a matrix in place, per row
* @param matrix Input matrix (MxN)
* @param add Scale matrix (Mx1)
* @returns 0 if OK
*/
static int subtract(matrix_t *matrix, matrix_t *subtract_matrix) {
if (matrix->rows != subtract_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (subtract_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < matrix->rows; row++) {
EI_DSP_MATRIX_B(temp, 1, matrix->cols, matrix->buffer + (row * matrix->cols));
int ret = subtract(&temp, subtract_matrix->buffer[row]);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
}
return EIDSP_OK;
}
/**
* Calculate the root mean square of a matrix, one per row
* @param matrix Matrix of size (MxN)
* @param output_matrix Matrix of size (Mx1)
* @returns 0 if OK
*/
static int rms(matrix_t *matrix, matrix_t *output_matrix) {
if (matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float rms_result;
arm_rms_f32(matrix->buffer + (row * matrix->cols), matrix->cols, &rms_result);
output_matrix->buffer[row] = rms_result;
#else
float sum = 0.0;
for(size_t ix = 0; ix < matrix->cols; ix++) {
float v = matrix->buffer[(row * matrix->cols) + ix];
sum += v * v;
}
output_matrix->buffer[row] = sqrt(sum / static_cast<float>(matrix->cols));
#endif
}
return EIDSP_OK;
}
/**
* Calculate the mean over a matrix per row
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Mx1)
*/
static int mean(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < input_matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float mean;
arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean);
output_matrix->buffer[row] = mean;
#else
float sum = 0.0f;
for (size_t col = 0; col < input_matrix->cols; col++) {
sum += input_matrix->buffer[( row * input_matrix->cols ) + col];
}
output_matrix->buffer[row] = sum / input_matrix->cols;
#endif
}
return EIDSP_OK;
}
/**
* Calculate the mean over a matrix on axis 0
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Nx1)
* @returns 0 if OK
*/
static int mean_axis0(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->cols != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t col = 0; col < input_matrix->cols; col++) {
// Note - not using CMSIS-DSP here
// gathering up the current columnand moving it into sequential memory to use
// SIMD to calculate the mean would take more time than the simple loop
// so disable this case. The alternative is to use 2 transposes and on a "big" ARM
// platform that will take more time
float sum = 0.0f;
for (size_t row = 0; row < input_matrix->rows; row++) {
sum += input_matrix->buffer[( row * input_matrix->cols ) + col];
}
output_matrix->buffer[col] = sum / input_matrix->rows;
}
return EIDSP_OK;
}
/**
* Calculate the standard deviation over a matrix on axis 0
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Nx1)
* @returns 0 if OK
*/
static int std_axis0(matrix_t *input_matrix, matrix_t *output_matrix) {
#if EIDSP_USE_CMSIS_DSP
return std_axis0_CMSIS(input_matrix, output_matrix);
#else
if (input_matrix->cols != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t col = 0; col < input_matrix->cols; col++) {
float sum = 0.0f;
for (size_t row = 0; row < input_matrix->rows; row++) {
sum += input_matrix->buffer[(row * input_matrix->cols) + col];
}
float mean = sum / input_matrix->rows;
float std = 0.0f;
float tmp;
for (size_t row = 0; row < input_matrix->rows; row++) {
tmp = input_matrix->buffer[(row * input_matrix->cols) + col] - mean;
std += tmp * tmp;
}
output_matrix->buffer[col] = sqrt(std / input_matrix->rows);
}
return EIDSP_OK;
#endif
}
/**
* Get the minimum value in a matrix per row
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Mx1)
*/
static int min(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < input_matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float min;
uint32_t ix;
arm_min_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &min, &ix);
output_matrix->buffer[row] = min;
#else
float min = FLT_MAX;
for (size_t col = 0; col < input_matrix->cols; col++) {
float v = input_matrix->buffer[( row * input_matrix->cols ) + col];
if (v < min) {
min = v;
}
}
output_matrix->buffer[row] = min;
#endif
}
return EIDSP_OK;
}
/**
* Get the maximum value in a matrix per row
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Mx1)
*/
static int max(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < input_matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float max;
uint32_t ix;
arm_max_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &max, &ix);
output_matrix->buffer[row] = max;
#else
float max = -FLT_MAX;
for (size_t col = 0; col < input_matrix->cols; col++) {
float v = input_matrix->buffer[( row * input_matrix->cols ) + col];
if (v > max) {
max = v;
}
}
output_matrix->buffer[row] = max;
#endif
}
return EIDSP_OK;
}
/**
* Get the stdev value in a matrix per row
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Mx1)
*/
static int stdev(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < input_matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float std;
float var;
cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var);
arm_sqrt_f32(var, &std);
output_matrix->buffer[row] = std;
#else
float sum = 0.0f;
for (size_t col = 0; col < input_matrix->cols; col++) {
sum += input_matrix->buffer[(row * input_matrix->cols) + col];
}
float mean = sum / input_matrix->cols;
float std = 0.0f;
for (size_t col = 0; col < input_matrix->cols; col++) {
float diff;
diff = input_matrix->buffer[(row * input_matrix->cols) + col] - mean;
std += diff * diff;
}
output_matrix->buffer[row] = sqrt(std / input_matrix->cols);
#endif
}
return EIDSP_OK;
}
/**
* Get the skewness value in a matrix per row
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Mx1)
*/
static int skew(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < input_matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float mean;
float var;
// Calculate the mean & variance
arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean);
cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var);
// Calculate m_3
float m_3;
cmsis_arm_third_moment(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, mean, &m_3);
// Calculate (variance)^(3/2)
arm_sqrt_f32(var * var * var, &var);
// Calculate skew = (m_3) / (variance)^(3/2)
if (var == 0.0f) {
output_matrix->buffer[row] = 0.0f;
} else {
output_matrix->buffer[row] = m_3 / var;
}
#else
float sum = 0.0f;
float mean;
// Calculate the mean
for (size_t col = 0; col < input_matrix->cols; col++) {
sum += input_matrix->buffer[( row * input_matrix->cols ) + col];
}
mean = sum / input_matrix->cols;
// Calculate the m values
float m_3 = 0.0f;
float m_2 = 0.0f;
for (size_t col = 0; col < input_matrix->cols; col++) {
float diff;
diff = input_matrix->buffer[( row * input_matrix->cols ) + col] - mean;
m_3 += diff * diff * diff;
m_2 += diff * diff;
}
m_3 = m_3 / input_matrix->cols;
m_2 = m_2 / input_matrix->cols;
// Calculate (m_2)^(3/2)
m_2 = sqrt(m_2 * m_2 * m_2);
// Calculate skew = (m_3) / (m_2)^(3/2)
if (m_2 == 0.0f) {
output_matrix->buffer[row] = 0.0f;
} else {
output_matrix->buffer[row] = m_3 / m_2;
}
#endif
}
return EIDSP_OK;
}
/**
* Get the kurtosis value in a matrix per row
* @param input_matrix Input matrix (MxN)
* @param output_matrix Output matrix (Mx1)
*/
static int kurtosis(matrix_t *input_matrix, matrix_t *output_matrix) {
if (input_matrix->rows != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
for (size_t row = 0; row < input_matrix->rows; row++) {
#if EIDSP_USE_CMSIS_DSP
float mean;
float var;
// Calculate mean & variance
arm_mean_f32(input_matrix->buffer + (row * input_matrix->cols), input_matrix->cols, &mean);
cmsis_arm_variance(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, &var);
// Calculate m_4
float m_4;
cmsis_arm_fourth_moment(&input_matrix->buffer[(row * input_matrix->cols)], input_matrix->cols, mean, &m_4);
// Calculate Fisher kurtosis = (m_4 / variance^2) - 3
var = var * var;
if (var == 0.0f) {
output_matrix->buffer[row] = -3.0f;
} else {
output_matrix->buffer[row] = (m_4 / var) - 3.0f;
}
#else
// Calculate the mean
float mean = 0.0f;
float sum = 0.0f;
for (size_t col = 0; col < input_matrix->cols; col++) {
sum += input_matrix->buffer[( row * input_matrix->cols ) + col];
}
mean = sum / input_matrix->cols;
// Calculate m_4 & variance
float m_4 = 0.0f;
float variance = 0.0f;
for (size_t col = 0; col < input_matrix->cols; col++) {
float diff;
diff = input_matrix->buffer[(row * input_matrix->cols) + col] - mean;
float square_diff = diff * diff;
variance += square_diff;
m_4 += square_diff * square_diff;
}
m_4 = m_4 / input_matrix->cols;
variance = variance / input_matrix->cols;
// Square the variance
variance = variance * variance;
// Calculate Fisher kurtosis = (m_4 / variance^2) - 3
if (variance == 0.0f) {
output_matrix->buffer[row] = -3.0f;
} else {
output_matrix->buffer[row] = (m_4 / variance) - 3.0f;
}
#endif
}
return EIDSP_OK;
}
/**
* Compute the one-dimensional discrete Fourier Transform for real input.
* This function computes the one-dimensional n-point discrete Fourier Transform (DFT) of
* a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT).
* @param src Source buffer
* @param src_size Size of the source buffer
* @param output Output buffer
* @param output_size Size of the output buffer, should be n_fft / 2 + 1
* @returns 0 if OK
*/
static int rfft(const float *src, size_t src_size, float *output, size_t output_size, size_t n_fft) {
size_t n_fft_out_features = (n_fft / 2) + 1;
if (output_size != n_fft_out_features) {
EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH);
}
// truncate if needed
if (src_size > n_fft) {
src_size = n_fft;
}
// declare input and output arrays
EI_DSP_MATRIX(fft_input, 1, n_fft);
if (!fft_input.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
// copy from src to fft_input
memcpy(fft_input.buffer, src, src_size * sizeof(float));
// pad to the rigth with zeros
memset(fft_input.buffer + src_size, 0, (n_fft - src_size) * sizeof(kiss_fft_scalar));
#if EIDSP_USE_CMSIS_DSP
if (n_fft != 32 && n_fft != 64 && n_fft != 128 && n_fft != 256 &&
n_fft != 512 && n_fft != 1024 && n_fft != 2048 && n_fft != 4096) {
int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
}
else {
// hardware acceleration only works for the powers above...
arm_rfft_fast_instance_f32 rfft_instance;
int status = cmsis_rfft_init_f32(&rfft_instance, n_fft);
if (status != ARM_MATH_SUCCESS) {
return status;
}
EI_DSP_MATRIX(fft_output, 1, n_fft);
if (!fft_output.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
arm_rfft_fast_f32(&rfft_instance, fft_input.buffer, fft_output.buffer, 0);
output[0] = fft_output.buffer[0];
output[n_fft_out_features - 1] = fft_output.buffer[1];
size_t fft_output_buffer_ix = 2;
for (size_t ix = 1; ix < n_fft_out_features - 1; ix += 1) {
float rms_result;
arm_rms_f32(fft_output.buffer + fft_output_buffer_ix, 2, &rms_result);
output[ix] = rms_result * sqrt(2);
fft_output_buffer_ix += 2;
}
}
#else
int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
#endif
return EIDSP_OK;
}
/**
* Compute the one-dimensional discrete Fourier Transform for real input.
* This function computes the one-dimensional n-point discrete Fourier Transform (DFT) of
* a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT).
* @param src Source buffer
* @param src_size Size of the source buffer
* @param output Output buffer
* @param output_size Size of the output buffer, should be n_fft / 2 + 1
* @returns 0 if OK
*/
static int rfft(const float *src, size_t src_size, fft_complex_t *output, size_t output_size, size_t n_fft) {
size_t n_fft_out_features = (n_fft / 2) + 1;
if (output_size != n_fft_out_features) {
EIDSP_ERR(EIDSP_BUFFER_SIZE_MISMATCH);
}
// truncate if needed
if (src_size > n_fft) {
src_size = n_fft;
}
// declare input and output arrays
float *fft_input_buffer = NULL;
if (src_size == n_fft) {
fft_input_buffer = (float*)src;
}
EI_DSP_MATRIX_B(fft_input, 1, n_fft, fft_input_buffer);
if (!fft_input.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
if (!fft_input_buffer) {
// copy from src to fft_input
memcpy(fft_input.buffer, src, src_size * sizeof(float));
// pad to the rigth with zeros
memset(fft_input.buffer + src_size, 0, (n_fft - src_size) * sizeof(float));
}
#if EIDSP_USE_CMSIS_DSP
if (n_fft != 32 && n_fft != 64 && n_fft != 128 && n_fft != 256 &&
n_fft != 512 && n_fft != 1024 && n_fft != 2048 && n_fft != 4096) {
int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
}
else {
// hardware acceleration only works for the powers above...
arm_rfft_fast_instance_f32 rfft_instance;
int status = cmsis_rfft_init_f32(&rfft_instance, n_fft);
if (status != ARM_MATH_SUCCESS) {
return status;
}
EI_DSP_MATRIX(fft_output, 1, n_fft);
if (!fft_output.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
arm_rfft_fast_f32(&rfft_instance, fft_input.buffer, fft_output.buffer, 0);
output[0].r = fft_output.buffer[0];
output[0].i = 0.0f;
output[n_fft_out_features - 1].r = fft_output.buffer[1];
output[n_fft_out_features - 1].i = 0.0f;
size_t fft_output_buffer_ix = 2;
for (size_t ix = 1; ix < n_fft_out_features - 1; ix += 1) {
output[ix].r = fft_output.buffer[fft_output_buffer_ix];
output[ix].i = fft_output.buffer[fft_output_buffer_ix + 1];
fft_output_buffer_ix += 2;
}
}
#else
int ret = software_rfft(fft_input.buffer, output, n_fft, n_fft_out_features);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
#endif
return EIDSP_OK;
}
/**
* Return evenly spaced numbers over a specified interval.
* Returns num evenly spaced samples, calculated over the interval [start, stop].
* The endpoint of the interval can optionally be excluded.
*
* Based on https://github.com/ntessore/algo/blob/master/linspace.c
* Licensed in public domain (see LICENSE in repository above)
*
* @param start The starting value of the sequence.
* @param stop The end value of the sequence.
* @param number Number of samples to generate.
* @param out Out array, with size `number`
* @returns 0 if OK
*/
static int linspace(float start, float stop, uint32_t number, float *out)
{
if (number < 1 || !out) {
EIDSP_ERR(EIDSP_PARAMETER_INVALID);
}
if (number == 1) {
out[0] = start;
return EIDSP_OK;
}
// step size
float step = (stop - start) / (number - 1);
// do steps
for (uint32_t ix = 0; ix < number - 1; ix++) {
out[ix] = start + ix * step;
}
// last entry always stop
out[number - 1] = stop;
return EIDSP_OK;
}
/**
* Return evenly spaced q31 numbers over a specified interval.
* Returns num evenly spaced samples, calculated over the interval [start, stop].
* The endpoint of the interval can optionally be excluded.
*
* Based on https://github.com/ntessore/algo/blob/master/linspace.c
* Licensed in public domain (see LICENSE in repository above)
*
* @param start The starting value of the sequence.
* @param stop The end value of the sequence.
* @param number Number of samples to generate.
* @param out Out array, with size `number`
* @returns 0 if OK
*/
static int linspace(EIDSP_i32 start, EIDSP_i32 stop, uint32_t number, EIDSP_i32 *out)
{
if (number < 1 || !out) {
EIDSP_ERR(EIDSP_PARAMETER_INVALID);
}
if (number == 1) {
out[0] = start;
return EIDSP_OK;
}
// step size
EIDSP_i32 step = (stop - start) / (number - 1);
// do steps
for (uint32_t ix = 0; ix < number - 1; ix++) {
out[ix] = start + ix * step;
}
// last entry always stop
out[number - 1] = stop;
return EIDSP_OK;
}
/**
* Convert an int32_t buffer into a float buffer, maps to -1..1
* @param input
* @param output
* @param length
* @returns 0 if OK
*/
static int int32_to_float(const EIDSP_i32 *input, float *output, size_t length) {
#if EIDSP_USE_CMSIS_DSP
arm_q31_to_float((q31_t *)input, output, length);
#else
for (size_t ix = 0; ix < length; ix++) {
output[ix] = (float)(input[ix]) / 2147483648.f;
}
#endif
return EIDSP_OK;
}
/**
* Convert an float buffer into a fixedpoint 32 bit buffer, input values are
* limited between -1 and 1
* @param input
* @param output
* @param length
* @returns 0 if OK
*/
static int float_to_int32(const float *input, EIDSP_i32 *output, size_t length) {
#if EIDSP_USE_CMSIS_DSP
arm_float_to_q31((float *)input, (q31_t *)output, length);
#else
for (size_t ix = 0; ix < length; ix++) {
output[ix] = (EIDSP_i32)saturate((int64_t)(input[ix] * 2147483648.f), 32);
}
#endif
return EIDSP_OK;
}
/**
* Convert an int16_t buffer into a float buffer, maps to -1..1
* @param input
* @param output
* @param length
* @returns 0 if OK
*/
static int int16_to_float(const EIDSP_i16 *input, float *output, size_t length) {
#if EIDSP_USE_CMSIS_DSP
arm_q15_to_float((q15_t *)input, output, length);
#else
for (size_t ix = 0; ix < length; ix++) {
output[ix] = (float)(input[ix]) / 32768.f;
}
#endif
return EIDSP_OK;
}
/**
* Convert an float buffer into a fixedpoint 16 bit buffer, input values are
* limited between -1 and 1
* @param input
* @param output
* @param length
* @returns 0 if OK
*/
static int float_to_int16(const float *input, EIDSP_i16 *output, size_t length) {
#if EIDSP_USE_CMSIS_DSP
arm_float_to_q15((float *)input, output, length);
#else
for (size_t ix = 0; ix < length; ix++) {
output[ix] = (EIDSP_i16)saturate((int32_t)(input[ix] * 32768.f), 16);
}
#endif
return EIDSP_OK;
}
/**
* Convert an int8_t buffer into a float buffer, maps to -1..1
* @param input
* @param output
* @param length
* @returns 0 if OK
*/
static int int8_to_float(const EIDSP_i8 *input, float *output, size_t length) {
#if EIDSP_USE_CMSIS_DSP
arm_q7_to_float((q7_t *)input, output, length);
#else
for (size_t ix = 0; ix < length; ix++) {
output[ix] = (float)(input[ix]) / 128;
}
#endif
return EIDSP_OK;
}
#if EIDSP_SIGNAL_C_FN_POINTER == 0
/**
* Create a signal structure from a buffer.
* This is useful for data that you keep in memory anyway. If you need to load from
* flash, then create the structure yourself.
* @param data Buffer, make sure to keep this pointer alive
* @param data_size Size of the buffer
* @param signal Output signal
* @returns EIDSP_OK if ok
*/
static int signal_from_buffer(const float *data, size_t data_size, signal_t *signal)
{
signal->total_length = data_size;
#ifdef __MBED__
signal->get_data = mbed::callback(&numpy::signal_get_data, data);
#else
signal->get_data = [data](size_t offset, size_t length, float *out_ptr) {
return numpy::signal_get_data(data, offset, length, out_ptr);
};
#endif
return EIDSP_OK;
}
#endif
#if defined ( __GNUC__ )
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
/**
* > 50% faster then the math.h log() function
* in return for a small loss in accuracy (0.00001 average diff with log())
* From: https://stackoverflow.com/questions/39821367/very-fast-approximate-logarithm-natural-log-function-in-c/39822314#39822314
* Licensed under the CC BY-SA 3.0
* @param a Input number
* @returns Natural log value of a
*/
__attribute__((always_inline)) static inline float log(float a)
{
int32_t g = (int32_t) * ((int32_t *)&a);
int32_t e = (g - 0x3f2aaaab) & 0xff800000;
g = g - e;
float m = (float) * ((float *)&g);
float i = (float)e * 1.19209290e-7f; // 0x1.0p-23
/* m in [2/3, 4/3] */
float f = m - 1.0f;
float s = f * f;
/* Compute log1p(f) for f in [-1/3, 1/3] */
float r = fmaf(0.230836749f, f, -0.279208571f); // 0x1.d8c0f0p-3, -0x1.1de8dap-2
float t = fmaf(0.331826031f, f, -0.498910338f); // 0x1.53ca34p-2, -0x1.fee25ap-2
r = fmaf(r, s, t);
r = fmaf(r, s, f);
r = fmaf(i, 0.693147182f, r); // 0x1.62e430p-1 // log(2)
return r;
}
/**
* Fast log10 and log2 functions, significantly faster than the ones from math.h (~6x for log10 on M4F)
* From https://community.arm.com/developer/tools-software/tools/f/armds-forum/4292/cmsis-dsp-new-functionality-proposal/22621#22621
* @param a Input number
* @returns Log2 value of a
*/
__attribute__((always_inline)) static inline float log2(float a)
{
int e;
float f = frexpf(fabsf(a), &e);
float y = 1.23149591368684f;
y *= f;
y += -4.11852516267426f;
y *= f;
y += 6.02197014179219f;
y *= f;
y += -3.13396450166353f;
y += e;
return y;
}
/**
* Fast log10 and log2 functions, significantly faster than the ones from math.h (~6x for log10 on M4F)
* From https://community.arm.com/developer/tools-software/tools/f/armds-forum/4292/cmsis-dsp-new-functionality-proposal/22621#22621
* @param a Input number
* @returns Log10 value of a
*/
__attribute__((always_inline)) static inline float log10(float a)
{
return numpy::log2(a) * 0.3010299956639812f;
}
#if defined ( __GNUC__ )
#pragma GCC diagnostic pop
#endif
/**
* Calculate the natural log value of a matrix. Does an in-place replacement.
* @param matrix Matrix (MxN)
* @returns 0 if OK
*/
static int log(matrix_t *matrix)
{
for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
matrix->buffer[ix] = numpy::log(matrix->buffer[ix]);
}
return EIDSP_OK;
}
/**
* Calculate the log10 of a matrix. Does an in-place replacement.
* @param matrix Matrix (MxN)
* @returns 0 if OK
*/
static int log10(matrix_t *matrix)
{
for (uint32_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
matrix->buffer[ix] = numpy::log10(matrix->buffer[ix]);
}
return EIDSP_OK;
}
/**
* @brief Signed Saturate
*
* @param[in] val The value to be saturated
* @param[in] sat Bit position to saturate to (1..32)
*
* @return Saturated value
*/
static int32_t saturate(int64_t val, uint32_t sat)
{
if ((sat >= 1U) && (sat <= 32U)) {
int64_t max = (int64_t)((1U << (sat - 1U)) - 1U);
int64_t min = -1 - max;
if (val > max) {
return (int32_t)max;
} else if (val < min) {
return (int32_t)min;
}
}
return (int32_t)val;
}
/**
* Normalize a matrix to 0..1. Does an in-place replacement.
* Normalization done per row.
* @param matrix
*/
static int normalize(matrix_t *matrix) {
// Python implementation:
// matrix = (matrix - np.min(matrix)) / (np.max(matrix) - np.min(matrix))
int r;
matrix_t temp_matrix(1, matrix->rows * matrix->cols, matrix->buffer);
matrix_t min_matrix(1, 1);
if (!min_matrix.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
r = min(&temp_matrix, &min_matrix);
if (r != EIDSP_OK) {
EIDSP_ERR(r);
}
matrix_t max_matrix(1, 1);
if (!max_matrix.buffer) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
r = max(&temp_matrix, &max_matrix);
if (r != EIDSP_OK) {
EIDSP_ERR(r);
}
float min_max_diff = (max_matrix.buffer[0] - min_matrix.buffer[0]);
/* Prevent divide by 0 by setting minimum value for divider */
float row_scale = min_max_diff < 0.001 ? 1.0f : 1.0f / min_max_diff;
r = subtract(&temp_matrix, min_matrix.buffer[0]);
if (r != EIDSP_OK) {
EIDSP_ERR(r);
}
r = scale(&temp_matrix, row_scale);
if (r != EIDSP_OK) {
EIDSP_ERR(r);
}
return EIDSP_OK;
}
/**
* Clip (limit) the values in an array. Does an in-place replacement.
* Values outside the interval are clipped to the interval edges.
* For example, if an interval of [0, 1] is specified, values smaller than 0 become 0,
* and values larger than 1 become 1.
* @param matrix
* @param min Min value to be clipped
* @param max Max value to be clipped
*/
static int clip(matrix_t *matrix, float min, float max) {
if (max < min) {
EIDSP_ERR(EIDSP_PARAMETER_INVALID);
}
for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
if (matrix->buffer[ix] < min) {
matrix->buffer[ix] = min;
}
else if (matrix->buffer[ix] > max) {
matrix->buffer[ix] = max;
}
}
return EIDSP_OK;
}
/**
* Cut the data behind the comma on a matrix. Does an in-place replacement.
* E.g. around([ 3.01, 4.89 ]) becomes [3, 4]
* @param matrix
*/
static int round(matrix_t *matrix) {
for (size_t ix = 0; ix < matrix->rows * matrix->cols; ix++) {
matrix->buffer[ix] = ::round(matrix->buffer[ix]);
}
return EIDSP_OK;
}
static int software_rfft(float *fft_input, float *output, size_t n_fft, size_t n_fft_out_features) {
kiss_fft_cpx *fft_output = (kiss_fft_cpx*)ei_dsp_malloc(n_fft_out_features * sizeof(kiss_fft_cpx));
if (!fft_output) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
size_t kiss_fftr_mem_length;
// create fftr context
kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length);
if (!cfg) {
ei_dsp_free(fft_output, n_fft_out_features * sizeof(kiss_fft_cpx));
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
ei_dsp_register_alloc(kiss_fftr_mem_length, cfg);
// execute the rfft operation
kiss_fftr(cfg, fft_input, fft_output);
// and write back to the output
for (size_t ix = 0; ix < n_fft_out_features; ix++) {
output[ix] = sqrt(pow(fft_output[ix].r, 2) + pow(fft_output[ix].i, 2));
}
ei_dsp_free(cfg, kiss_fftr_mem_length);
ei_dsp_free(fft_output, n_fft_out_features * sizeof(kiss_fft_cpx));
return EIDSP_OK;
}
static int software_rfft(float *fft_input, fft_complex_t *output, size_t n_fft, size_t n_fft_out_features)
{
// create fftr context
size_t kiss_fftr_mem_length;
kiss_fftr_cfg cfg = kiss_fftr_alloc(n_fft, 0, NULL, NULL, &kiss_fftr_mem_length);
if (!cfg) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
ei_dsp_register_alloc(kiss_fftr_mem_length, cfg);
// execute the rfft operation
kiss_fftr(cfg, fft_input, (kiss_fft_cpx*)output);
ei_dsp_free(cfg, kiss_fftr_mem_length);
return EIDSP_OK;
}
static int signal_get_data(const float *in_buffer, size_t offset, size_t length, float *out_ptr)
{
memcpy(out_ptr, in_buffer + offset, length * sizeof(float));
return 0;
}
static int signal_get_data_i16(int16_t *in_buffer, size_t offset, size_t length, int16_t *out_ptr)
{
memcpy(out_ptr, in_buffer + offset, length * sizeof(int16_t));
return 0;
}
#if EIDSP_USE_CMSIS_DSP
/**
* @brief The CMSIS std variance function with the same behaviour as the NumPy
* implementation
* @details Variance in CMSIS version is calculated using fSum / (float32_t)(blockSize - 1)
* @param[in] pSrc Pointer to float block
* @param[in] blockSize Number of floats in block
* @param pResult The variance
*/
static void cmsis_arm_variance(const float32_t *pSrc, uint32_t blockSize, float32_t *pResult)
{
uint32_t blkCnt;
float32_t sum = 0.0f;
float32_t fSum = 0.0f;
float32_t fMean, fValue;
const float32_t *pInput = pSrc;
if (blockSize <= 1U) {
*pResult = 0;
return;
}
blkCnt = blockSize >> 2U;
while (blkCnt > 0U) {
sum += *pInput++;
sum += *pInput++;
sum += *pInput++;
sum += *pInput++;
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U) {
sum += *pInput++;
blkCnt--;
}
fMean = sum / (float32_t)blockSize;
pInput = pSrc;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U) {
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U) {
fValue = *pInput++ - fMean;
fSum += fValue * fValue;
blkCnt--;
}
/* Variance */
*pResult = fSum / (float32_t)(blockSize);
}
/**
* @brief Copy of the numpy version explicitely using the CMSIS lib
* for STD and Matrix transpose
* @param input_matrix The input matrix
* @param output_matrix The output matrix
*
* @return EIDSP error
*/
static int std_axis0_CMSIS(matrix_t *input_matrix, matrix_t *output_matrix)
{
arm_matrix_instance_f32 arm_in_matrix, arm_transposed_matrix;
if (input_matrix->cols != output_matrix->rows) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
if (output_matrix->cols != 1) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
/* Copy input matrix to arm matrix */
arm_in_matrix.numRows = input_matrix->rows;
arm_in_matrix.numCols = input_matrix->cols;
arm_in_matrix.pData = &input_matrix->buffer[0];
/* Create transposed matrix */
arm_transposed_matrix.numRows = input_matrix->cols;
arm_transposed_matrix.numCols = input_matrix->rows;
arm_transposed_matrix.pData = (float *)ei_calloc(input_matrix->cols * input_matrix->rows * sizeof(float), 1);
if (arm_transposed_matrix.pData == NULL) {
EIDSP_ERR(EIDSP_OUT_OF_MEM);
}
int ret = arm_mat_trans_f32(&arm_in_matrix, &arm_transposed_matrix);
if (ret != EIDSP_OK) {
EIDSP_ERR(ret);
}
for (size_t row = 0; row < arm_transposed_matrix.numRows; row++) {
float std;
float var;
cmsis_arm_variance(arm_transposed_matrix.pData + (row * arm_transposed_matrix.numCols),
arm_transposed_matrix.numCols, &var);
arm_sqrt_f32(var, &std);
output_matrix->buffer[row] = std;
}
ei_free(arm_transposed_matrix.pData);
return EIDSP_OK;
}
/**
* @brief A copy of the CMSIS power function, adapted to calculate the third central moment
* @details Calculates the sum of cubes of a block with the mean value subtracted.
* @param[in] pSrc Pointer to float block
* @param[in] blockSize Number of floats in block
* @param[in] mean The mean to subtract from each value before cubing
* @param pResult The third central moment of the input
*/
static void cmsis_arm_third_moment(const float32_t * pSrc, uint32_t blockSize, float32_t mean, float32_t * pResult)
{
uint32_t blkCnt;
float32_t sum = 0.0f;
float32_t in;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U) {
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
in = in - mean;
sum += in * in * in;
in = *pSrc++;
in = in - mean;
sum += in * in * in;
in = *pSrc++;
in = in - mean;
sum += in * in * in;
in = *pSrc++;
in = in - mean;
sum += in * in * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U) {
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
in = in - mean;
sum += in * in * in;
/* Decrement loop counter */
blkCnt--;
}
sum = sum / blockSize;
/* Store result to destination */
*pResult = sum;
}
/**
* @brief A copy of the CMSIS power function, adapted to calculate the fourth central moment
* @details Calculates the sum of fourth powers of a block with the mean value subtracted.
* @param[in] pSrc Pointer to float block
* @param[in] blockSize Number of floats in block
* @param[in] mean The mean to subtract from each value before calculating fourth power
* @param pResult The fourth central moment of the input
*/
static void cmsis_arm_fourth_moment(const float32_t * pSrc, uint32_t blockSize, float32_t mean, float32_t * pResult)
{
uint32_t blkCnt;
float32_t sum = 0.0f;
float32_t in;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U) {
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
in = in - mean;
float square;
square = in * in;
sum += square * square;
in = *pSrc++;
in = in - mean;
square = in * in;
sum += square * square;
in = *pSrc++;
in = in - mean;
square = in * in;
sum += square * square;
in = *pSrc++;
in = in - mean;
square = in * in;
sum += square * square;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U) {
/* Compute Power and store result in a temporary variable, sum. */
in = *pSrc++;
in = in - mean;
float square;
square = in * in;
sum += square * square;
/* Decrement loop counter */
blkCnt--;
}
sum = sum / blockSize;
/* Store result to destination */
*pResult = sum;
}
#endif // EIDSP_USE_CMSIS_DSP
static uint8_t count_leading_zeros(uint32_t data)
{
if (data == 0U) { return 32U; }
uint32_t count = 0U;
uint32_t mask = 0x80000000U;
while ((data & mask) == 0U)
{
count += 1U;
mask = mask >> 1U;
}
return count;
}
static void sqrt_q15(int16_t in, int16_t *pOut)
{
int32_t bits_val1;
int16_t number, temp1, var1, signBits1, half;
float temp_float1;
union {
int32_t fracval;
float floatval;
} tempconv;
number = in;
/* If the input is a positive number then compute the signBits. */
if (number > 0) {
signBits1 = count_leading_zeros(number) - 17;
/* Shift by the number of signBits1 */
if ((signBits1 % 2) == 0) {
number = number << signBits1;
} else {
number = number << (signBits1 - 1);
}
/* Calculate half value of the number */
half = number >> 1;
/* Store the number for later use */
temp1 = number;
/* Convert to float */
temp_float1 = number * 3.051757812500000e-005f;
/* Store as integer */
tempconv.floatval = temp_float1;
bits_val1 = tempconv.fracval;
/* Subtract the shifted value from the magic number to give intial guess */
bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */
/* Store as float */
tempconv.fracval = bits_val1;
temp_float1 = tempconv.floatval;
/* Convert to integer format */
var1 = (int32_t)(temp_float1 * 16384);
/* 1st iteration */
var1 =
((int16_t)(
(int32_t)var1 *
(0x3000 -
((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >>
15))
<< 2;
/* 2nd iteration */
var1 =
((int16_t)(
(int32_t)var1 *
(0x3000 -
((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >>
15))
<< 2;
/* 3rd iteration */
var1 =
((int16_t)(
(int32_t)var1 *
(0x3000 -
((int16_t)((((int16_t)(((int32_t)var1 * var1) >> 15)) * (int32_t)half) >> 15))) >>
15))
<< 2;
/* Multiply the inverse square root with the original value */
var1 = ((int16_t)(((int32_t)temp1 * var1) >> 15)) << 1;
/* Shift the output down accordingly */
if ((signBits1 % 2) == 0) {
var1 = var1 >> (signBits1 / 2);
} else {
var1 = var1 >> ((signBits1 - 1) / 2);
}
*pOut = var1;
}
/* If the number is a negative number then store zero as its square root value */
else {
*pOut = 0;
}
}
#if EIDSP_USE_CMSIS_DSP
/**
* Initialize a CMSIS-DSP fast rfft structure
* We do it this way as this means we can compile out fast_init calls which hints the compiler
* to which tables can be removed
*/
static int cmsis_rfft_init_f32(arm_rfft_fast_instance_f32 *rfft_instance, const size_t n_fft)
{
// ARM cores (ex M55) with Helium extensions (MVEF) need special treatment (Issue 2843)
#if EI_CLASSIFIER_HAS_FFT_INFO == 1 && !defined(ARM_MATH_MVEF) && !defined(EI_CLASSIFIER_LOAD_ALL_FFTS)
arm_status status;
switch (n_fft) {
#if EI_CLASSIFIER_LOAD_FFT_32 == 1
case 32: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 16U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len16.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len16.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len16.pTwiddle;
rfft_instance->fftLenRFFT = 32U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_64 == 1
case 64: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 32U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len32.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len32.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len32.pTwiddle;
rfft_instance->fftLenRFFT = 64U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_128 == 1
case 128: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 64U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len64.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len64.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len64.pTwiddle;
rfft_instance->fftLenRFFT = 128U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_256 == 1
case 256: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 128U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len128.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len128.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len128.pTwiddle;
rfft_instance->fftLenRFFT = 256U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_512 == 1
case 512: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 256U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len256.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len256.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len256.pTwiddle;
rfft_instance->fftLenRFFT = 512U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_1024 == 1
case 1024: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 512U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len512.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len512.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len512.pTwiddle;
rfft_instance->fftLenRFFT = 1024U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_2048 == 1
case 2048: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 1024U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len1024.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len1024.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len1024.pTwiddle;
rfft_instance->fftLenRFFT = 2048U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048;
status = ARM_MATH_SUCCESS;
break;
}
#endif
#if EI_CLASSIFIER_LOAD_FFT_4096 == 1
case 4096: {
arm_cfft_instance_f32 *S = &(rfft_instance->Sint);
S->fftLen = 2048U;
S->pTwiddle = NULL;
S->bitRevLength = arm_cfft_sR_f32_len2048.bitRevLength;
S->pBitRevTable = arm_cfft_sR_f32_len2048.pBitRevTable;
S->pTwiddle = arm_cfft_sR_f32_len2048.pTwiddle;
rfft_instance->fftLenRFFT = 4096U;
rfft_instance->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096;
status = ARM_MATH_SUCCESS;
break;
}
#endif
default:
return EIDSP_FFT_TABLE_NOT_LOADED;
}
return status;
#else
return arm_rfft_fast_init_f32(rfft_instance, n_fft);
#endif
}
#endif // #if EIDSP_USE_CMSIS_DSP
/**
* Power spectrum of a frame
* @param frame Row of a frame
* @param frame_size Size of the frame
* @param out_buffer Out buffer, size should be fft_points
* @param out_buffer_size Buffer size
* @param fft_points (int): The length of FFT. If fft_length is greater than frame_len, the frames will be zero-padded.
* @returns EIDSP_OK if OK
*/
static int power_spectrum(
float *frame,
size_t frame_size,
float *out_buffer,
size_t out_buffer_size,
uint16_t fft_points)
{
if (out_buffer_size != static_cast<size_t>(fft_points / 2 + 1)) {
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH);
}
int r = numpy::rfft(frame, frame_size, out_buffer, out_buffer_size, fft_points);
if (r != EIDSP_OK) {
return r;
}
for (size_t ix = 0; ix < out_buffer_size; ix++) {
out_buffer[ix] = (1.0 / static_cast<float>(fft_points)) *
(out_buffer[ix] * out_buffer[ix]);
}
return EIDSP_OK;
}
static int welch_max_hold(
float *input,
size_t input_size,
float *output,
size_t start_bin,
size_t stop_bin,
size_t fft_points,
bool do_overlap)
{
// save off one point to put back, b/c we're going to calculate in place
float saved_point = 0;
bool do_saved_point = false;
size_t fft_out_size = fft_points / 2 + 1;
float *fft_out;
ei_unique_ptr_t p_fft_out(nullptr, ei_free);
if (input_size < fft_points) {
fft_out = (float *)ei_calloc(fft_out_size, sizeof(float));
p_fft_out.reset(fft_out);
}
else {
// set input as output for in place operation
fft_out = input;
// save off one point to put back, b/c we're going to calculate in place
saved_point = input[fft_points / 2];
do_saved_point = true;
}
// init the output to zeros
memset(output, 0, sizeof(float) * (stop_bin - start_bin));
int input_ix = 0;
while (input_ix < (int)input_size) {
// Figure out if we need any zero padding
size_t n_input_points = input_ix + fft_points <= input_size ? fft_points
: input_size - input_ix;
EI_TRY(power_spectrum(
input + input_ix,
n_input_points,
fft_out,
fft_points / 2 + 1,
fft_points));
int j = 0;
// keep the max of the last frame and everything before
for (size_t i = start_bin; i < stop_bin; i++) {
output[j] = std::max(output[j], fft_out[i]);
j++;
}
if (do_overlap) {
if (do_saved_point) {
// This step only matters first time through
input[fft_points / 2] = saved_point;
do_saved_point = false;
}
input_ix += fft_points / 2;
}
else {
input_ix += fft_points;
}
}
return EIDSP_OK;
}
static float variance(float *input, size_t size)
{
// Use CMSIS either way. Will fall back to straight C when needed
float temp;
#if EIDSP_USE_CMSIS_DSP
arm_var_f32(input, size, &temp);
#else
float mean = 0.0f;
for (size_t i = 0; i < size; i++) {
mean += input[i];
}
mean /= size;
temp = 0.0f;
for (size_t i = 0; i < size; i++) {
temp += (input[i] - mean) * (input[i] - mean);
}
temp /= (size - 1);
#endif
return temp;
}
/**
* This function handle the issue with zero values if the are exposed
* to become an argument for any log function.
* @param input Array
* @param input_size Size of array
* @returns void
*/
static void zero_handling(float *input, size_t input_size)
{
for (size_t ix = 0; ix < input_size; ix++) {
if (input[ix] == 0) {
input[ix] = 1e-10;
}
}
}
/**
* This function handle the issue with zero values if the are exposed
* to become an argument for any log function.
* @param input Matrix
* @returns void
*/
static void zero_handling(matrix_t *input)
{
zero_handling(input->buffer, input->rows * input->cols);
}
__attribute__((unused)) static void scale(fvec& v, float scale) {
for (auto& x : v) {
x *= scale;
}
}
__attribute__((unused)) static void sub(fvec& v, float b) {
for (auto& x : v) {
x -= b;
}
}
__attribute__((unused)) static void mul(float* y, const float* x, float* b, size_t n) {
for (size_t i = 0; i < n; i++) {
y[i] = x[i] * b[i];
}
}
__attribute__((unused)) static fvec diff(const float* v, size_t n) {
fvec d(n - 1);
for (size_t i = 0; i < d.size(); i++) {
d[i] = v[i + 1] - v[i];
}
return d;
}
__attribute__((unused)) static float sum(const float* v, size_t n) {
float sum = 0;
for (size_t i = 0; i < n; i++) {
sum += v[i];
}
return sum;
}
static float mean(const fvec& v) {
float mean = 0;
for (auto x : v) {
mean += x;
}
mean /= v.size();
return mean;
}
static float mean(const float* v, size_t n) {
float mean = 0;
for (size_t i = 0; i < n; i++) {
mean += v[i];
}
mean /= n;
return mean;
}
static float median(const float* v, size_t n) {
fvec vc(n);
std::copy(v, v + n, vc.begin());
std::sort(vc.begin(), vc.end());
if (vc.size() % 2 == 0) {
return (vc[vc.size() / 2 - 1] + vc[vc.size() / 2]) / 2;
}
return vc[vc.size() / 2];
}
__attribute__((unused)) static float median(const fvec& v) {
return median(v.data(), v.size());
}
static float stddev(const float* v, size_t n, float m /* mean */, int ddof = 0) {
float var = 0;
for (size_t i = 0; i < n; i++) {
var += (v[i] - m) * (v[i] - m);
}
var /= n - ddof;
return sqrt(var);
}
__attribute__((unused)) static float stddev(const float* v, size_t n) {
return stddev(v, n, mean(v, n), 0);
}
__attribute__((unused)) static float stddev(const float* v, size_t n, int ddof) {
return stddev(v, n, mean(v, n), ddof);
}
__attribute__((unused)) static float stddev(const fvec& v, int ddof = 0) {
return stddev(v.data(), v.size(), mean(v), ddof);
}
static float rms(const float* v, size_t n) {
float rms = 0;
for (size_t i = 0; i < n; i++) {
rms += v[i] * v[i];
}
rms /= n;
return sqrt(rms);
}
__attribute__((unused)) static float rms(const fvec& v) {
return rms(v.data(), v.size());
}
template <typename T>
static float max(const ei_vector<T>& v) {
return *std::max_element(v.begin(), v.end());
}
__attribute__((unused)) static float max(const float* v, size_t n) {
return *std::max_element(v, v + n);
}
template <typename T>
static float min(const ei_vector<T>& v) {
return *std::min_element(v.begin(), v.end());
}
__attribute__((unused)) static float min(const float* v, size_t n) {
return *std::min_element(v, v + n);
}
__attribute__((unused)) static int argmax(const fvec& v, int start, int end) {
return std::max_element(v.begin() + start, v.begin() + end) - v.begin();
}
__attribute__((unused)) static fvec divide(float num, const float* den, size_t n) {
fvec v(n);
for (size_t i = 0; i < n; i++) {
v[i] = num / den[i];
}
return v;
}
__attribute__((unused)) static ivec histogram(const float* x, size_t n, int a, int b, int inc) {
int num_bins = (b - a) / inc;
ivec bins(num_bins, 0);
for (size_t i = 0; i < n; i++) {
int bin = (int)((x[i] - a) / inc);
if (bin >= 0 && bin < num_bins) {
bins[bin]++;
}
}
return bins;
}
__attribute__((unused)) static fvec cumsum(const float* v, size_t n) {
fvec c(n);
c[0] = v[0];
for (size_t i = 1; i < n; i++) {
c[i] = c[i - 1] + v[i];
}
return c;
}
__attribute__((unused)) static fvec arrange(float start, float end, float step) {
assert(start < end);
assert(step > 0);
fvec v((size_t)((end - start) / step));
for (size_t i = 0; i < v.size(); i++) {
v[i] = start + i * step;
}
return v;
}
__attribute__((unused)) static void add(fvec& v, fvec& b) {
for (size_t i = 0; i < v.size(); i++) {
v[i] += b[i];
}
}
__attribute__((unused)) static float trapz(const fvec& x, const fvec& y, size_t lo, size_t hi) {
float area = 0;
for (size_t i = lo; i < hi; i++) {
area += (x[i + 1] - x[i]) * (y[i + 1] + y[i]) / 2;
}
return area;
}
__attribute__((unused)) static fvec quantile(const fvec& v, size_t start, size_t end, const fvec& q) {
end = std::min(end, v.size());
fvec vc(end - start);
std::copy(v.begin() + start, v.begin() + end, vc.begin());
std::sort(vc.begin(), vc.end());
fvec res(q.size());
for (size_t i = 0; i < q.size(); i++) {
res[i] = vc[q[i] * vc.size()];
}
return res;
}
__attribute__((unused)) static fvec quantile(const float* v, size_t n, const fvec& q) {
fvec vc(n);
std::copy(v, v + n, vc.begin());
std::sort(vc.begin(), vc.end());
fvec res(q.size());
for (size_t i = 0; i < q.size(); i++) {
res[i] = vc[q[i] * vc.size()];
}
return res;
}
static float dot(const float* x, const float* y, size_t n) {
float res = 0;
for (size_t i = 0; i < n; i++) {
res += x[i] * y[i];
}
return res;
}
__attribute__((unused)) static float cosine_similarity(const fvec& x, const fvec& y) {
float xy = dot(x.data(), y.data(), x.size());
float magx = dot(x.data(), x.data(), x.size());
float magy = dot(y.data(), y.data(), y.size());
xy /= sqrt(magx * magy);
return xy;
}
__attribute__((unused)) static void ln(fvec& v) {
for (auto& x : v) {
x = log(x);
}
}
static size_t next_power_of_2(size_t x) {
size_t res = 1;
while (res < x) {
res *= 2;
}
return res;
}
static void detrend(float* data, size_t n) {
// Calculate the mean of the data points
float mean = 0.0;
for (size_t i = 0; i < n; i++) {
mean += data[i];
}
mean /= n;
// Calculate the slope of the best-fit line
float x_mean = (n + 1) / 2.0;
float y_mean = mean;
float numerator = 0.0;
float denominator = 0.0;
for (size_t i = 0; i < n; i++) {
numerator += (i + 1 - x_mean) * (data[i] - y_mean);
denominator += (i + 1 - x_mean) * (i + 1 - x_mean);
}
float slope = numerator / denominator;
// Subtract the best-fit line from the data points to get the detrended data
for (size_t i = 0; i < n; i++) {
data[i] = data[i] - (slope * (i + 1));
}
// Calculate the mean of the detrended data
float detrended_mean = 0.0;
for (size_t i = 0; i < n; i++) {
detrended_mean += data[i];
}
detrended_mean /= n;
// Subtract the mean of the detrended data from each element
for (size_t i = 0; i < n; i++) {
data[i] -= detrended_mean;
}
}
static fvec detrend(const fvec& data) {
auto ret = data;
detrend(ret.data(), ret.size());
return ret;
}
};
struct fmat {
ei_matrix* mat = nullptr;
fmat(size_t rows, size_t cols) {
mat = new ei_matrix(rows, cols);
assert(mat);
}
~fmat() {
delete mat;
}
void resize(size_t rows, size_t cols) {
delete mat;
mat = new ei_matrix(rows, cols);
}
float* operator[](size_t i) {
if (mat == nullptr || i >= mat->rows) {
return nullptr;
}
return mat->get_row_ptr(i);
}
void fill(float x) {
if (mat == nullptr) {
return;
}
for (size_t i = 0; i < mat->rows; i++) {
for (size_t j = 0; j < mat->cols; j++) {
(*this)[i][j] = x;
}
}
}
void fill_col(size_t col, float x) {
if (mat == nullptr) {
return;
}
for (size_t i = 0; i < mat->rows; i++) {
(*this)[i][col] = x;
}
}
void fill_row(size_t row, float x) {
if (mat == nullptr) {
return;
}
for (size_t i = 0; i < mat->cols; i++) {
(*this)[row][i] = x;
}
}
};
} // namespace ei
#endif // _EIDSP_NUMPY_H_