File size: 3,950 Bytes
5610573 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | /* Test voice activity detection.
*
* MIT license (c) 2022, see LICENSE for more information.
*
* Author: David Huggins-Daines <dhdaines@gmail.com>
*/
#include <pocketsphinx.h>
#include "util/ckd_alloc.h"
#include "test_macros.h"
static const char *expecteds[] = {
"011110111111111111111111111100",
"011110111111111111111111111100",
"000000111111111111111111110000",
"000000111111111111111100000000"
};
static const int n_modes = sizeof(expecteds)/sizeof(expecteds[0]);
static int sample_rates[] = {
8000,
16000,
32000,
48000,
11025,
22050,
44100
};
static const int n_sample_rates = sizeof(sample_rates)/sizeof(sample_rates[0]);
static FILE *
open_data(int sample_rate)
{
char *soxcmd;
int len;
FILE *sox;
#define SOXCMD "sox -q -r 8000 -c 1 -b 16 -e signed-integer -t raw -D -G " \
DATADIR "/vad/test-audio.raw -r %d -t raw -"
if (sample_rate == 8000)
return fopen(DATADIR "/vad/test-audio.raw", "rb");
len = snprintf(NULL, 0, SOXCMD, sample_rate);
if ((soxcmd = malloc(len + 1)) == NULL)
E_FATAL_SYSTEM("Failed to allocate string");
if (snprintf(soxcmd, len + 1, SOXCMD, sample_rate) != len)
E_FATAL_SYSTEM("snprintf() failed");
if ((sox = popen(soxcmd, "r")) == NULL)
E_FATAL_SYSTEM("Failed to popen(%s)", soxcmd);
free(soxcmd);
return sox;
}
static void
close_data(FILE *fh, int sample_rate)
{
if (sample_rate == 8000)
fclose(fh);
else
pclose(fh);
}
static int
test_sample_rate(int sample_rate)
{
ps_vad_t *vader;
short *frame;
int i;
/* Test VAD modes with py-webrtcvad test data. */
for (i = 0; i < n_modes; ++i) {
FILE *fh;
size_t frame_size;
char *classification, *c;
E_INFO("Sample rate %d, mode %d\n", sample_rate, i);
/* Extra space for approximate rates */
c = classification = ckd_calloc(1, strlen(expecteds[i]) * 2);
vader = ps_vad_init(i, sample_rate, 0.03);
TEST_ASSERT(vader);
frame_size = ps_vad_frame_size(vader);
frame = ckd_calloc(sizeof(*frame), frame_size);
TEST_ASSERT(frame);
fh = open_data(sample_rate);
TEST_ASSERT(fh);
while (fread(frame, sizeof(*frame), frame_size, fh) == frame_size) {
int is_speech = ps_vad_classify(vader, frame);
TEST_ASSERT(is_speech != PS_VAD_ERROR);
*c++ = (is_speech == PS_VAD_SPEECH) ? '1' : '0';
}
E_INFO("true: %s\n", expecteds[i]);
E_INFO("pred: %s\n", classification);
if (sample_rate != 48000 /* Has Problems for some reason */
&& ps_vad_frame_length(vader) == 0.03) /* skip approximate rates */
TEST_EQUAL(0, strcmp(expecteds[i], classification));
ckd_free(classification);
ps_vad_free(vader);
ckd_free(frame);
close_data(fh, sample_rate);
}
return 0;
}
int
main(int argc, char *argv[])
{
ps_vad_t *vader;
int i;
(void)argc; (void)argv;
err_set_loglevel(ERR_INFO);
/* Test initialization with default parameters. */
vader = ps_vad_init(0, 0, 0);
TEST_ASSERT(vader);
/* Retain and release, should still be there. */
TEST_ASSERT((vader = ps_vad_retain(vader)));
TEST_ASSERT(ps_vad_free(vader));
/* Test default frame size. */
TEST_EQUAL(ps_vad_frame_size(vader),
(int)(PS_VAD_DEFAULT_SAMPLE_RATE * PS_VAD_DEFAULT_FRAME_LENGTH));
TEST_EQUAL_FLOAT(ps_vad_frame_length(vader), PS_VAD_DEFAULT_FRAME_LENGTH);
TEST_ASSERT(ps_vad_free(vader) == 0);
/* Test a variety of sample rates. */
for (i = 0; i < n_sample_rates; ++i)
test_sample_rate(sample_rates[i]);
/* Test rejection of unreasonable sample rates. */
vader = ps_vad_init(0, 42, 0.03);
TEST_ASSERT(vader == NULL);
vader = ps_vad_init(0, 96000, 0.03);
TEST_ASSERT(vader == NULL);
return 0;
}
|