|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef _EIDSP_SPEECHPY_FEATURE_H_ |
|
|
#define _EIDSP_SPEECHPY_FEATURE_H_ |
|
|
|
|
|
#include <stdint.h> |
|
|
#include "../../porting/ei_classifier_porting.h" |
|
|
#include "../ei_utils.h" |
|
|
#include "functions.hpp" |
|
|
#include "processing.hpp" |
|
|
#include "../memory.hpp" |
|
|
#include "../returntypes.hpp" |
|
|
#include "../ei_vector.h" |
|
|
|
|
|
namespace ei { |
|
|
namespace speechpy { |
|
|
|
|
|
class feature { |
|
|
public: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int filterbanks( |
|
|
#if EIDSP_QUANTIZE_FILTERBANK |
|
|
quantized_matrix_t *filterbanks, |
|
|
#else |
|
|
matrix_t *filterbanks, |
|
|
#endif |
|
|
uint16_t num_filter, int coefficients, uint32_t sampling_freq, |
|
|
uint32_t low_freq, uint32_t high_freq, |
|
|
bool output_transposed = false |
|
|
) |
|
|
{ |
|
|
const size_t mels_mem_size = (num_filter + 2) * sizeof(float); |
|
|
const size_t hertz_mem_size = (num_filter + 2) * sizeof(float); |
|
|
const size_t freq_index_mem_size = (num_filter + 2) * sizeof(int); |
|
|
|
|
|
float *mels = (float*)ei_dsp_malloc(mels_mem_size); |
|
|
if (!mels) { |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
|
|
|
if (filterbanks->rows != num_filter || filterbanks->cols != static_cast<uint32_t>(coefficients)) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
#if EIDSP_QUANTIZE_FILTERBANK |
|
|
memset(filterbanks->buffer, 0, filterbanks->rows * filterbanks->cols * sizeof(uint8_t)); |
|
|
#else |
|
|
memset(filterbanks->buffer, 0, filterbanks->rows * filterbanks->cols * sizeof(float)); |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
numpy::linspace( |
|
|
functions::frequency_to_mel(static_cast<float>(low_freq)), |
|
|
functions::frequency_to_mel(static_cast<float>(high_freq)), |
|
|
num_filter + 2, |
|
|
mels); |
|
|
|
|
|
|
|
|
|
|
|
float *hertz = (float*)ei_dsp_malloc(hertz_mem_size); |
|
|
if (!hertz) { |
|
|
ei_dsp_free(mels, mels_mem_size); |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
for (uint16_t ix = 0; ix < num_filter + 2; ix++) { |
|
|
hertz[ix] = functions::mel_to_frequency(mels[ix]); |
|
|
if (hertz[ix] < low_freq) { |
|
|
hertz[ix] = low_freq; |
|
|
} |
|
|
if (hertz[ix] > high_freq) { |
|
|
hertz[ix] = high_freq; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (ix == num_filter + 2 - 1) { |
|
|
hertz[ix] -= 0.001; |
|
|
} |
|
|
} |
|
|
ei_dsp_free(mels, mels_mem_size); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int *freq_index = (int*)ei_dsp_malloc(freq_index_mem_size); |
|
|
if (!freq_index) { |
|
|
ei_dsp_free(hertz, hertz_mem_size); |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
for (uint16_t ix = 0; ix < num_filter + 2; ix++) { |
|
|
freq_index[ix] = static_cast<int>(floor((coefficients + 1) * hertz[ix] / sampling_freq)); |
|
|
} |
|
|
ei_dsp_free(hertz, hertz_mem_size); |
|
|
|
|
|
for (size_t i = 0; i < num_filter; i++) { |
|
|
int left = freq_index[i]; |
|
|
int middle = freq_index[i + 1]; |
|
|
int right = freq_index[i + 2]; |
|
|
|
|
|
EI_DSP_MATRIX(z, 1, (right - left + 1)); |
|
|
if (!z.buffer) { |
|
|
ei_dsp_free(freq_index, freq_index_mem_size); |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
numpy::linspace(left, right, (right - left + 1), z.buffer); |
|
|
functions::triangle(z.buffer, (right - left + 1), left, middle, right); |
|
|
|
|
|
|
|
|
for (int zx = 0; zx < (right - left + 1); zx++) { |
|
|
size_t index = (i * filterbanks->cols) + (left + zx); |
|
|
|
|
|
if (output_transposed) { |
|
|
index = ((left + zx) * filterbanks->rows) + i; |
|
|
} |
|
|
|
|
|
#if EIDSP_QUANTIZE_FILTERBANK |
|
|
filterbanks->buffer[index] = numpy::quantize_zero_one(z.buffer[zx]); |
|
|
#else |
|
|
filterbanks->buffer[index] = z.buffer[zx]; |
|
|
#endif |
|
|
} |
|
|
} |
|
|
|
|
|
if (output_transposed) { |
|
|
uint16_t r = filterbanks->rows; |
|
|
filterbanks->rows = filterbanks->cols; |
|
|
filterbanks->cols = r; |
|
|
} |
|
|
|
|
|
ei_dsp_free(freq_index, freq_index_mem_size); |
|
|
|
|
|
return EIDSP_OK; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int get_fft_bin_from_hertz(uint16_t fft_size, float hertz, uint32_t sampling_freq) |
|
|
{ |
|
|
return static_cast<int>(floor((fft_size + 1) * hertz / sampling_freq)); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int mfe(matrix_t *out_features, matrix_t *out_energies, |
|
|
signal_t *signal, |
|
|
uint32_t sampling_frequency, |
|
|
float frame_length, float frame_stride, uint16_t num_filters, |
|
|
uint16_t fft_length, uint32_t low_frequency, uint32_t high_frequency, |
|
|
uint16_t version |
|
|
) |
|
|
{ |
|
|
int ret = 0; |
|
|
|
|
|
if (high_frequency == 0) { |
|
|
high_frequency = sampling_frequency / 2; |
|
|
} |
|
|
|
|
|
if (version<4) { |
|
|
if (low_frequency == 0) { |
|
|
low_frequency = 300; |
|
|
} |
|
|
} |
|
|
|
|
|
stack_frames_info_t stack_frame_info = { 0 }; |
|
|
stack_frame_info.signal = signal; |
|
|
|
|
|
ret = processing::stack_frames( |
|
|
&stack_frame_info, |
|
|
sampling_frequency, |
|
|
frame_length, |
|
|
frame_stride, |
|
|
false, |
|
|
version |
|
|
); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
if (stack_frame_info.frame_ixs.size() != out_features->rows) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
if (num_filters != out_features->cols) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
if (out_energies) { |
|
|
if (stack_frame_info.frame_ixs.size() != out_energies->rows || out_energies->cols != 1) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
} |
|
|
|
|
|
for (uint32_t i = 0; i < out_features->rows * out_features->cols; i++) { |
|
|
*(out_features->buffer + i) = 0; |
|
|
} |
|
|
|
|
|
const size_t power_spectrum_frame_size = (fft_length / 2 + 1); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
float *mels; |
|
|
const int MELS_SIZE = num_filters + 2; |
|
|
mels = (float*)ei_calloc(MELS_SIZE, sizeof(float)); |
|
|
EI_ERR_AND_RETURN_ON_NULL(mels, EIDSP_OUT_OF_MEM); |
|
|
ei_unique_ptr_t __ptr__(mels,ei_free); |
|
|
uint16_t* bins = reinterpret_cast<uint16_t*>(mels); |
|
|
|
|
|
numpy::linspace( |
|
|
functions::frequency_to_mel(static_cast<float>(low_frequency)), |
|
|
functions::frequency_to_mel(static_cast<float>(high_frequency)), |
|
|
num_filters + 2, |
|
|
mels); |
|
|
|
|
|
uint16_t max_bin = version >= 4 ? fft_length : power_spectrum_frame_size; |
|
|
|
|
|
for (uint16_t ix = 0; ix < MELS_SIZE-1; ix++) { |
|
|
mels[ix] = functions::mel_to_frequency(mels[ix]); |
|
|
if (mels[ix] < low_frequency) { |
|
|
mels[ix] = low_frequency; |
|
|
} |
|
|
if (mels[ix] > high_frequency) { |
|
|
mels[ix] = high_frequency; |
|
|
} |
|
|
bins[ix] = get_fft_bin_from_hertz(max_bin, mels[ix], sampling_frequency); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mels[MELS_SIZE-1] = functions::mel_to_frequency(mels[MELS_SIZE-1]); |
|
|
if (mels[MELS_SIZE-1] > high_frequency) { |
|
|
mels[MELS_SIZE-1] = high_frequency; |
|
|
} |
|
|
mels[MELS_SIZE-1] -= 0.001; |
|
|
bins[MELS_SIZE-1] = get_fft_bin_from_hertz(max_bin, mels[MELS_SIZE-1], sampling_frequency); |
|
|
|
|
|
EI_DSP_MATRIX(power_spectrum_frame, 1, power_spectrum_frame_size); |
|
|
if (!power_spectrum_frame.buffer) { |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
|
|
|
|
|
|
EI_DSP_MATRIX(signal_frame, 1, stack_frame_info.frame_length); |
|
|
|
|
|
for (size_t ix = 0; ix < stack_frame_info.frame_ixs.size(); ix++) { |
|
|
|
|
|
size_t signal_offset = stack_frame_info.frame_ixs.at(ix); |
|
|
size_t signal_length = stack_frame_info.frame_length; |
|
|
if (signal_offset + signal_length > stack_frame_info.signal->total_length) { |
|
|
signal_length = signal_length - |
|
|
(stack_frame_info.signal->total_length - (signal_offset + signal_length)); |
|
|
} |
|
|
|
|
|
ret = stack_frame_info.signal->get_data( |
|
|
signal_offset, |
|
|
signal_length, |
|
|
signal_frame.buffer |
|
|
); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
ret = numpy::power_spectrum( |
|
|
signal_frame.buffer, |
|
|
stack_frame_info.frame_length, |
|
|
power_spectrum_frame.buffer, |
|
|
power_spectrum_frame_size, |
|
|
fft_length |
|
|
); |
|
|
|
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
float energy = numpy::sum(power_spectrum_frame.buffer, power_spectrum_frame_size); |
|
|
if (energy == 0) { |
|
|
energy = 1e-10; |
|
|
} |
|
|
|
|
|
if (out_energies) { |
|
|
out_energies->buffer[ix] = energy; |
|
|
} |
|
|
|
|
|
auto row_ptr = out_features->get_row_ptr(ix); |
|
|
for (size_t i = 0; i < num_filters; i++) { |
|
|
size_t left = bins[i]; |
|
|
size_t middle = bins[i+1]; |
|
|
size_t right = bins[i+2]; |
|
|
|
|
|
assert(right < power_spectrum_frame_size); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
row_ptr[i] = power_spectrum_frame.buffer[middle]; |
|
|
|
|
|
for (size_t bin = left+1; bin < right; bin++) { |
|
|
if (bin < middle) { |
|
|
row_ptr[i] += |
|
|
((static_cast<float>(bin) - left) / (middle - left)) * |
|
|
power_spectrum_frame.buffer[bin]; |
|
|
} |
|
|
|
|
|
if (bin > middle) { |
|
|
row_ptr[i] += |
|
|
((right - static_cast<float>(bin)) / (right - middle)) * |
|
|
power_spectrum_frame.buffer[bin]; |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
} |
|
|
|
|
|
numpy::zero_handling(out_features); |
|
|
|
|
|
return EIDSP_OK; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int mfe_v3(matrix_t *out_features, matrix_t *out_energies, |
|
|
signal_t *signal, |
|
|
uint32_t sampling_frequency, |
|
|
float frame_length, float frame_stride, uint16_t num_filters, |
|
|
uint16_t fft_length, uint32_t low_frequency, uint32_t high_frequency, |
|
|
uint16_t version |
|
|
) |
|
|
{ |
|
|
int ret = 0; |
|
|
|
|
|
if (high_frequency == 0) { |
|
|
high_frequency = sampling_frequency / 2; |
|
|
} |
|
|
|
|
|
if (low_frequency == 0) { |
|
|
low_frequency = 300; |
|
|
} |
|
|
|
|
|
stack_frames_info_t stack_frame_info = { 0 }; |
|
|
stack_frame_info.signal = signal; |
|
|
|
|
|
ret = processing::stack_frames( |
|
|
&stack_frame_info, |
|
|
sampling_frequency, |
|
|
frame_length, |
|
|
frame_stride, |
|
|
false, |
|
|
version |
|
|
); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
if (stack_frame_info.frame_ixs.size() != out_features->rows) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
if (num_filters != out_features->cols) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
if (out_energies) { |
|
|
if (stack_frame_info.frame_ixs.size() != out_energies->rows || out_energies->cols != 1) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
} |
|
|
|
|
|
for (uint32_t i = 0; i < out_features->rows * out_features->cols; i++) { |
|
|
*(out_features->buffer + i) = 0; |
|
|
} |
|
|
|
|
|
uint16_t coefficients = fft_length / 2 + 1; |
|
|
|
|
|
|
|
|
|
|
|
#if EIDSP_QUANTIZE_FILTERBANK |
|
|
EI_DSP_QUANTIZED_MATRIX(filterbanks, num_filters, coefficients, &numpy::dequantize_zero_one); |
|
|
#else |
|
|
EI_DSP_MATRIX(filterbanks, num_filters, coefficients); |
|
|
#endif |
|
|
if (!filterbanks.buffer) { |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
|
|
|
ret = feature::filterbanks( |
|
|
&filterbanks, num_filters, coefficients, sampling_frequency, low_frequency, high_frequency, true); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
for (size_t ix = 0; ix < stack_frame_info.frame_ixs.size(); ix++) { |
|
|
size_t power_spectrum_frame_size = (fft_length / 2 + 1); |
|
|
|
|
|
EI_DSP_MATRIX(power_spectrum_frame, 1, power_spectrum_frame_size); |
|
|
if (!power_spectrum_frame.buffer) { |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
|
|
|
|
|
|
EI_DSP_MATRIX(signal_frame, 1, stack_frame_info.frame_length); |
|
|
|
|
|
|
|
|
size_t signal_offset = stack_frame_info.frame_ixs.at(ix); |
|
|
size_t signal_length = stack_frame_info.frame_length; |
|
|
if (signal_offset + signal_length > stack_frame_info.signal->total_length) { |
|
|
signal_length = signal_length - |
|
|
(stack_frame_info.signal->total_length - (signal_offset + signal_length)); |
|
|
} |
|
|
|
|
|
ret = stack_frame_info.signal->get_data( |
|
|
signal_offset, |
|
|
signal_length, |
|
|
signal_frame.buffer |
|
|
); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
ret = numpy::power_spectrum( |
|
|
signal_frame.buffer, |
|
|
stack_frame_info.frame_length, |
|
|
power_spectrum_frame.buffer, |
|
|
power_spectrum_frame_size, |
|
|
fft_length |
|
|
); |
|
|
|
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
float energy = numpy::sum(power_spectrum_frame.buffer, power_spectrum_frame_size); |
|
|
if (energy == 0) { |
|
|
energy = 1e-10; |
|
|
} |
|
|
|
|
|
if (out_energies) { |
|
|
out_energies->buffer[ix] = energy; |
|
|
} |
|
|
|
|
|
|
|
|
ret = numpy::dot_by_row( |
|
|
ix, |
|
|
power_spectrum_frame.buffer, |
|
|
power_spectrum_frame_size, |
|
|
&filterbanks, |
|
|
out_features |
|
|
); |
|
|
|
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
} |
|
|
|
|
|
numpy::zero_handling(out_features); |
|
|
|
|
|
return EIDSP_OK; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int spectrogram(matrix_t *out_features, |
|
|
signal_t *signal, float sampling_frequency, |
|
|
float frame_length, float frame_stride, uint16_t fft_length, |
|
|
uint16_t version |
|
|
) |
|
|
{ |
|
|
int ret = 0; |
|
|
|
|
|
stack_frames_info_t stack_frame_info = { 0 }; |
|
|
stack_frame_info.signal = signal; |
|
|
|
|
|
ret = processing::stack_frames( |
|
|
&stack_frame_info, |
|
|
sampling_frequency, |
|
|
frame_length, |
|
|
frame_stride, |
|
|
false, |
|
|
version |
|
|
); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
if (stack_frame_info.frame_ixs.size() != out_features->rows) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
uint16_t coefficients = fft_length / 2 + 1; |
|
|
|
|
|
if (coefficients != out_features->cols) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
for (uint32_t i = 0; i < out_features->rows * out_features->cols; i++) { |
|
|
*(out_features->buffer + i) = 0; |
|
|
} |
|
|
|
|
|
for (size_t ix = 0; ix < stack_frame_info.frame_ixs.size(); ix++) { |
|
|
|
|
|
EI_DSP_MATRIX(signal_frame, 1, stack_frame_info.frame_length); |
|
|
|
|
|
|
|
|
size_t signal_offset = stack_frame_info.frame_ixs.at(ix); |
|
|
size_t signal_length = stack_frame_info.frame_length; |
|
|
if (signal_offset + signal_length > stack_frame_info.signal->total_length) { |
|
|
signal_length = signal_length - |
|
|
(stack_frame_info.signal->total_length - (signal_offset + signal_length)); |
|
|
} |
|
|
|
|
|
ret = stack_frame_info.signal->get_data( |
|
|
signal_offset, |
|
|
signal_length, |
|
|
signal_frame.buffer |
|
|
); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
|
|
|
if (version >= 3) { |
|
|
|
|
|
bool all_between_min_1_and_1 = true; |
|
|
for (size_t ix = 0; ix < signal_frame.rows * signal_frame.cols; ix++) { |
|
|
if (signal_frame.buffer[ix] < -1.0f || signal_frame.buffer[ix] > 1.0f) { |
|
|
all_between_min_1_and_1 = false; |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
if (!all_between_min_1_and_1) { |
|
|
ret = numpy::scale(&signal_frame, 1.0f / 32768.0f); |
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
ret = numpy::power_spectrum( |
|
|
signal_frame.buffer, |
|
|
stack_frame_info.frame_length, |
|
|
out_features->buffer + (ix * coefficients), |
|
|
coefficients, |
|
|
fft_length |
|
|
); |
|
|
|
|
|
if (ret != 0) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
} |
|
|
|
|
|
numpy::zero_handling(out_features); |
|
|
|
|
|
return EIDSP_OK; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static matrix_size_t calculate_mfe_buffer_size( |
|
|
size_t signal_length, |
|
|
uint32_t sampling_frequency, |
|
|
float frame_length, float frame_stride, uint16_t num_filters, |
|
|
uint16_t version) |
|
|
{ |
|
|
int32_t rows = processing::calculate_no_of_stack_frames( |
|
|
signal_length, |
|
|
sampling_frequency, |
|
|
frame_length, |
|
|
frame_stride, |
|
|
false, |
|
|
version); |
|
|
int32_t cols = num_filters; |
|
|
|
|
|
matrix_size_t size_matrix; |
|
|
size_matrix.rows = (uint32_t)rows; |
|
|
size_matrix.cols = (uint32_t)cols; |
|
|
return size_matrix; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int mfcc(matrix_t *out_features, signal_t *signal, |
|
|
uint32_t sampling_frequency, float frame_length, float frame_stride, |
|
|
uint8_t num_cepstral, uint16_t num_filters, uint16_t fft_length, |
|
|
uint32_t low_frequency, uint32_t high_frequency, bool dc_elimination, |
|
|
uint16_t version) |
|
|
{ |
|
|
if (out_features->cols != num_cepstral) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
matrix_size_t mfe_matrix_size = |
|
|
calculate_mfe_buffer_size( |
|
|
signal->total_length, |
|
|
sampling_frequency, |
|
|
frame_length, |
|
|
frame_stride, |
|
|
num_filters, |
|
|
version); |
|
|
|
|
|
if (out_features->rows != mfe_matrix_size.rows) { |
|
|
EIDSP_ERR(EIDSP_MATRIX_SIZE_MISMATCH); |
|
|
} |
|
|
|
|
|
int ret = EIDSP_OK; |
|
|
|
|
|
|
|
|
EI_DSP_MATRIX(features_matrix, mfe_matrix_size.rows, mfe_matrix_size.cols); |
|
|
if (!features_matrix.buffer) { |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
|
|
|
EI_DSP_MATRIX(energy_matrix, mfe_matrix_size.rows, 1); |
|
|
if (!energy_matrix.buffer) { |
|
|
EIDSP_ERR(EIDSP_OUT_OF_MEM); |
|
|
} |
|
|
|
|
|
ret = mfe(&features_matrix, &energy_matrix, signal, |
|
|
sampling_frequency, frame_length, frame_stride, num_filters, fft_length, |
|
|
low_frequency, high_frequency, version); |
|
|
if (ret != EIDSP_OK) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
ret = numpy::log(&features_matrix); |
|
|
if (ret != EIDSP_OK) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
|
|
|
ret = numpy::dct2(&features_matrix, DCT_NORMALIZATION_ORTHO); |
|
|
if (ret != EIDSP_OK) { |
|
|
EIDSP_ERR(ret); |
|
|
} |
|
|
|
|
|
|
|
|
if (dc_elimination) { |
|
|
for (size_t row = 0; row < features_matrix.rows; row++) { |
|
|
features_matrix.buffer[row * features_matrix.cols] = numpy::log(energy_matrix.buffer[row]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for (size_t row = 0; row < features_matrix.rows; row++) { |
|
|
for(int i = 0; i < num_cepstral; i++) { |
|
|
*(out_features->buffer + (num_cepstral * row) + i) = *(features_matrix.buffer + (features_matrix.cols * row) + i); |
|
|
} |
|
|
} |
|
|
|
|
|
return EIDSP_OK; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static matrix_size_t calculate_mfcc_buffer_size( |
|
|
size_t signal_length, |
|
|
uint32_t sampling_frequency, |
|
|
float frame_length, float frame_stride, uint16_t num_cepstral, |
|
|
uint16_t version) |
|
|
{ |
|
|
int32_t rows = processing::calculate_no_of_stack_frames( |
|
|
signal_length, |
|
|
sampling_frequency, |
|
|
frame_length, |
|
|
frame_stride, |
|
|
false, |
|
|
version); |
|
|
int32_t cols = num_cepstral; |
|
|
|
|
|
matrix_size_t size_matrix; |
|
|
size_matrix.rows = (uint32_t)rows; |
|
|
size_matrix.cols = (uint32_t)cols; |
|
|
return size_matrix; |
|
|
} |
|
|
}; |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
#endif |
|
|
|