| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifdef HAVE_CONFIG_H |
| | #include <config.h> |
| | #endif |
| |
|
| | #include <math.h> |
| |
|
| | #include <pocketsphinx.h> |
| |
|
| | #include "util/ckd_alloc.h" |
| | #include "util/strfuncs.h" |
| |
|
| | #include "fe/fe_noise.h" |
| | #include "fe/fe_internal.h" |
| |
|
| | |
| | #define SMOOTH_WINDOW 4 |
| | #define LAMBDA_POWER 0.7 |
| | #define LAMBDA_A 0.995 |
| | #define LAMBDA_B 0.5 |
| | #define LAMBDA_T 0.85 |
| | #define MU_T 0.2 |
| | #define MAX_GAIN 20 |
| | #define SLOW_PEAK_FORGET_FACTOR 0.9995 |
| | #define SLOW_PEAK_LEARN_FACTOR 0.9 |
| | #define SPEECH_VOLUME_RANGE 8.0 |
| |
|
| | struct noise_stats_s { |
| | |
| | powspec_t *power; |
| | |
| | powspec_t *noise; |
| | |
| | powspec_t *floor; |
| | |
| | powspec_t *peak; |
| | |
| | powspec_t *signal, *gain; |
| |
|
| | |
| | int undefined; |
| | |
| | int num_filters; |
| |
|
| | |
| | powspec_t slow_peak_sum; |
| |
|
| | |
| | powspec_t lambda_power; |
| | powspec_t comp_lambda_power; |
| | powspec_t lambda_a; |
| | powspec_t comp_lambda_a; |
| | powspec_t lambda_b; |
| | powspec_t comp_lambda_b; |
| | powspec_t lambda_t; |
| | powspec_t mu_t; |
| | powspec_t max_gain; |
| | powspec_t inv_max_gain; |
| |
|
| | powspec_t smooth_scaling[2 * SMOOTH_WINDOW + 3]; |
| | }; |
| |
|
| | static void |
| | fe_lower_envelope(noise_stats_t *noise_stats, const powspec_t *buf, powspec_t *floor_buf, int32 num_filt) |
| | { |
| | int i; |
| |
|
| | for (i = 0; i < num_filt; i++) { |
| | #ifndef FIXED_POINT |
| | if (buf[i] >= floor_buf[i]) { |
| | floor_buf[i] = |
| | noise_stats->lambda_a * floor_buf[i] + noise_stats->comp_lambda_a * buf[i]; |
| | } |
| | else { |
| | floor_buf[i] = |
| | noise_stats->lambda_b * floor_buf[i] + noise_stats->comp_lambda_b * buf[i]; |
| | } |
| | #else |
| | if (buf[i] >= floor_buf[i]) { |
| | floor_buf[i] = fe_log_add(noise_stats->lambda_a + floor_buf[i], |
| | noise_stats->comp_lambda_a + buf[i]); |
| | } |
| | else { |
| | floor_buf[i] = fe_log_add(noise_stats->lambda_b + floor_buf[i], |
| | noise_stats->comp_lambda_b + buf[i]); |
| | } |
| | #endif |
| | } |
| | } |
| |
|
| | |
| | static void |
| | fe_temp_masking(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * peak, int32 num_filt) |
| | { |
| | powspec_t cur_in; |
| | int i; |
| |
|
| | for (i = 0; i < num_filt; i++) { |
| | cur_in = buf[i]; |
| |
|
| | #ifndef FIXED_POINT |
| | peak[i] *= noise_stats->lambda_t; |
| | if (buf[i] < noise_stats->lambda_t * peak[i]) |
| | buf[i] = peak[i] * noise_stats->mu_t; |
| | #else |
| | peak[i] += noise_stats->lambda_t; |
| | if (buf[i] < noise_stats->lambda_t + peak[i]) |
| | buf[i] = peak[i] + noise_stats->mu_t; |
| | #endif |
| |
|
| | if (cur_in > peak[i]) |
| | peak[i] = cur_in; |
| | } |
| | } |
| |
|
| | |
| | static void |
| | fe_weight_smooth(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * coefs, int32 num_filt) |
| | { |
| | int i, j; |
| | int l1, l2; |
| | powspec_t coef; |
| |
|
| | for (i = 0; i < num_filt; i++) { |
| | l1 = ((i - SMOOTH_WINDOW) > 0) ? (i - SMOOTH_WINDOW) : 0; |
| | l2 = ((i + SMOOTH_WINDOW) < |
| | (num_filt - 1)) ? (i + SMOOTH_WINDOW) : (num_filt - 1); |
| |
|
| | #ifndef FIXED_POINT |
| | (void)noise_stats; |
| | coef = 0; |
| | for (j = l1; j <= l2; j++) { |
| | coef += coefs[j]; |
| | } |
| | buf[i] = buf[i] * (coef / (l2 - l1 + 1)); |
| | #else |
| | coef = MIN_FIXLOG; |
| | for (j = l1; j <= l2; j++) { |
| | coef = fe_log_add(coef, coefs[j]); |
| | } |
| | buf[i] = buf[i] + coef - noise_stats->smooth_scaling[l2 - l1 + 1]; |
| | #endif |
| |
|
| | } |
| | } |
| |
|
| | noise_stats_t * |
| | fe_init_noisestats(int num_filters) |
| | { |
| | int i; |
| | noise_stats_t *noise_stats; |
| |
|
| | noise_stats = (noise_stats_t *) ckd_calloc(1, sizeof(noise_stats_t)); |
| |
|
| | noise_stats->power = |
| | (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); |
| | noise_stats->noise = |
| | (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); |
| | noise_stats->floor = |
| | (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); |
| | noise_stats->peak = |
| | (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); |
| |
|
| | noise_stats->undefined = TRUE; |
| | noise_stats->num_filters = num_filters; |
| |
|
| | #ifndef FIXED_POINT |
| | noise_stats->lambda_power = LAMBDA_POWER; |
| | noise_stats->comp_lambda_power = 1 - LAMBDA_POWER; |
| | noise_stats->lambda_a = LAMBDA_A; |
| | noise_stats->comp_lambda_a = 1 - LAMBDA_A; |
| | noise_stats->lambda_b = LAMBDA_B; |
| | noise_stats->comp_lambda_b = 1 - LAMBDA_B; |
| | noise_stats->lambda_t = LAMBDA_T; |
| | noise_stats->mu_t = MU_T; |
| | noise_stats->max_gain = MAX_GAIN; |
| | noise_stats->inv_max_gain = 1.0 / MAX_GAIN; |
| | |
| | for (i = 1; i < 2 * SMOOTH_WINDOW + 1; i++) { |
| | noise_stats->smooth_scaling[i] = 1.0 / i; |
| | } |
| | #else |
| | noise_stats->lambda_power = FLOAT2FIX(log(LAMBDA_POWER)); |
| | noise_stats->comp_lambda_power = FLOAT2FIX(log(1 - LAMBDA_POWER)); |
| | noise_stats->lambda_a = FLOAT2FIX(log(LAMBDA_A)); |
| | noise_stats->comp_lambda_a = FLOAT2FIX(log(1 - LAMBDA_A)); |
| | noise_stats->lambda_b = FLOAT2FIX(log(LAMBDA_B)); |
| | noise_stats->comp_lambda_b = FLOAT2FIX(log(1 - LAMBDA_B)); |
| | noise_stats->lambda_t = FLOAT2FIX(log(LAMBDA_T)); |
| | noise_stats->mu_t = FLOAT2FIX(log(MU_T)); |
| | noise_stats->max_gain = FLOAT2FIX(log(MAX_GAIN)); |
| | noise_stats->inv_max_gain = FLOAT2FIX(log(1.0 / MAX_GAIN)); |
| |
|
| | for (i = 1; i < 2 * SMOOTH_WINDOW + 3; i++) { |
| | noise_stats->smooth_scaling[i] = FLOAT2FIX(log(i)); |
| | } |
| | #endif |
| |
|
| | noise_stats->signal = (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); |
| | noise_stats->gain = (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); |
| |
|
| | return noise_stats; |
| | } |
| |
|
| | void |
| | fe_reset_noisestats(noise_stats_t * noise_stats) |
| | { |
| | if (noise_stats) |
| | noise_stats->undefined = TRUE; |
| | } |
| |
|
| | void |
| | fe_free_noisestats(noise_stats_t * noise_stats) |
| | { |
| | ckd_free(noise_stats->signal); |
| | ckd_free(noise_stats->gain); |
| | ckd_free(noise_stats->power); |
| | ckd_free(noise_stats->noise); |
| | ckd_free(noise_stats->floor); |
| | ckd_free(noise_stats->peak); |
| | ckd_free(noise_stats); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | void |
| | fe_remove_noise(fe_t * fe) |
| | { |
| | noise_stats_t *noise_stats; |
| | powspec_t *mfspec; |
| | int32 i, num_filts; |
| |
|
| | if (fe->noise_stats == NULL) |
| | return; |
| |
|
| | noise_stats = fe->noise_stats; |
| | mfspec = fe->mfspec; |
| | num_filts = noise_stats->num_filters; |
| |
|
| | if (noise_stats->undefined) { |
| | noise_stats->slow_peak_sum = FIX2FLOAT(0.0); |
| | for (i = 0; i < num_filts; i++) { |
| | noise_stats->power[i] = mfspec[i]; |
| | #ifndef FIXED_POINT |
| | noise_stats->noise[i] = mfspec[i] / noise_stats->max_gain; |
| | noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain; |
| | noise_stats->peak[i] = 0.0; |
| | #else |
| | noise_stats->noise[i] = mfspec[i] - noise_stats->max_gain;; |
| | noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain; |
| | noise_stats->peak[i] = MIN_FIXLOG; |
| | #endif |
| | } |
| | noise_stats->undefined = FALSE; |
| | } |
| |
|
| | |
| | for (i = 0; i < num_filts; i++) { |
| | #ifndef FIXED_POINT |
| | noise_stats->power[i] = |
| | noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i]; |
| | #else |
| | noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i], |
| | noise_stats->comp_lambda_power + mfspec[i]); |
| | #endif |
| | } |
| |
|
| | |
| | fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts); |
| |
|
| | |
| | for (i = 0; i < num_filts; i++) { |
| | #ifndef FIXED_POINT |
| | noise_stats->signal[i] = noise_stats->power[i] - noise_stats->noise[i]; |
| | if (noise_stats->signal[i] < 1.0) |
| | noise_stats->signal[i] = 1.0; |
| | #else |
| | noise_stats->signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]); |
| | #endif |
| | } |
| |
|
| | |
| | |
| | fe_lower_envelope(noise_stats, noise_stats->signal, noise_stats->floor, num_filts); |
| |
|
| | fe_temp_masking(noise_stats, noise_stats->signal, noise_stats->peak, num_filts); |
| |
|
| | for (i = 0; i < num_filts; i++) { |
| | if (noise_stats->signal[i] < noise_stats->floor[i]) |
| | noise_stats->signal[i] = noise_stats->floor[i]; |
| | } |
| |
|
| | #ifndef FIXED_POINT |
| | for (i = 0; i < num_filts; i++) { |
| | if (noise_stats->signal[i] < noise_stats->max_gain * noise_stats->power[i]) |
| | noise_stats->gain[i] = noise_stats->signal[i] / noise_stats->power[i]; |
| | else |
| | noise_stats->gain[i] = noise_stats->max_gain; |
| | if (noise_stats->gain[i] < noise_stats->inv_max_gain) |
| | noise_stats->gain[i] = noise_stats->inv_max_gain; |
| | } |
| | #else |
| | for (i = 0; i < num_filts; i++) { |
| | noise_stats->gain[i] = noise_stats->signal[i] - noise_stats->power[i]; |
| | if (noise_stats->gain[i] > noise_stats->max_gain) |
| | noise_stats->gain[i] = noise_stats->max_gain; |
| | if (noise_stats->gain[i] < noise_stats->inv_max_gain) |
| | noise_stats->gain[i] = noise_stats->inv_max_gain; |
| | } |
| | #endif |
| |
|
| | |
| | fe_weight_smooth(noise_stats, mfspec, noise_stats->gain, num_filts); |
| | } |
| |
|