| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifdef HAVE_CONFIG_H |
| | #include <config.h> |
| | #endif |
| |
|
| | #include <stdio.h> |
| | #include <string.h> |
| |
|
| | #include <pocketsphinx.h> |
| |
|
| | #include "util/cmd_ln.h" |
| | #include "util/ckd_alloc.h" |
| | #include "util/byteorder.h" |
| | #include "util/strfuncs.h" |
| | #include "util/pio.h" |
| | #include "fe/yin.h" |
| |
|
| | #include "pocketsphinx_internal.h" |
| |
|
| | static ps_arg_t defn[] = { |
| | { "i", |
| | ARG_STRING, |
| | NULL, |
| | "Single audio input file" }, |
| |
|
| | { "o", |
| | ARG_STRING, |
| | NULL, |
| | "Single text output file (standard output will be used if not given)" }, |
| | |
| | { "c", |
| | ARG_STRING, |
| | NULL, |
| | "Control file for batch processing" }, |
| | |
| | { "nskip", |
| | ARG_INTEGER, |
| | "0", |
| | "If a control file was specified, the number of utterances to skip at the head of the file" }, |
| | |
| | { "runlen", |
| | ARG_INTEGER, |
| | "-1", |
| | "If a control file was specified, the number of utterances to process (see -nskip too)" }, |
| | |
| | { "di", |
| | ARG_STRING, |
| | NULL, |
| | "Input directory, input file names are relative to this, if defined" }, |
| | |
| | { "ei", |
| | ARG_STRING, |
| | NULL, |
| | "Input extension to be applied to all input files" }, |
| | |
| | { "do", |
| | ARG_STRING, |
| | NULL, |
| | "Output directory, output files are relative to this" }, |
| | |
| | { "eo", |
| | ARG_STRING, |
| | NULL, |
| | "Output extension to be applied to all output files" }, |
| | |
| | { "nist", |
| | ARG_BOOLEAN, |
| | "no", |
| | "Defines input format as NIST sphere" }, |
| | |
| | { "raw", |
| | ARG_BOOLEAN, |
| | "no", |
| | "Defines input format as raw binary data" }, |
| | |
| | { "mswav", |
| | ARG_BOOLEAN, |
| | "no", |
| | "Defines input format as Microsoft Wav (RIFF)" }, |
| |
|
| | { "samprate", |
| | ARG_INTEGER, |
| | "0", |
| | "Sampling rate of audio data (will be determined automatically if 0)" }, |
| |
|
| | { "input_endian", |
| | ARG_STRING, |
| | NULL, |
| | "Endianness of audio data (will be determined automatically if not given)" }, |
| |
|
| | { "fshift", |
| | ARG_FLOATING, |
| | "0.01", |
| | "Frame shift: number of seconds between each analysis frame." }, |
| |
|
| | { "flen", |
| | ARG_FLOATING, |
| | "0.025", |
| | "Number of seconds in each analysis frame (needs to be greater than twice the longest period you wish to detect - to detect down to 80Hz you need a frame length of 2.0/80 = 0.025)." }, |
| |
|
| | { "smooth_window", |
| | ARG_INTEGER, |
| | "2", |
| | "Number of frames on either side of the current frame to use for smoothing." }, |
| |
|
| | { "voice_thresh", |
| | ARG_FLOATING, |
| | "0.1", |
| | "Threshold of normalized difference under which to search for the fundamental period." }, |
| |
|
| | { "search_range", |
| | ARG_FLOATING, |
| | "0.2", |
| | "Fraction of the best local estimate to use as a search range for smoothing." }, |
| |
|
| | { NULL, 0, NULL, NULL } |
| | }; |
| |
|
| | static int extract_pitch(const char *in, const char *out, cmd_ln_t *config); |
| | static int run_control_file(const char *ctl, cmd_ln_t *config); |
| |
|
| | static int |
| | guess_file_type(char const *file, FILE *infh, ps_config_t *config) |
| | { |
| | char header[4]; |
| |
|
| | fseek(infh, 0, SEEK_SET); |
| | if (fread(header, 1, 4, infh) != 4) { |
| | E_ERROR_SYSTEM("Failed to read 4 byte header"); |
| | return -1; |
| | } |
| | if (0 == memcmp(header, "RIFF", 4)) { |
| | E_INFO("%s appears to be a WAV file\n", file); |
| | if (ps_config_typeof(config, "mswav") != 0) { |
| | ps_config_set_bool(config, "mswav", TRUE); |
| | ps_config_set_bool(config, "nist", FALSE); |
| | ps_config_set_bool(config, "raw", FALSE); |
| | } |
| | } |
| | else if (0 == memcmp(header, "NIST", 4)) { |
| | E_INFO("%s appears to be a NIST SPHERE file\n", file); |
| | if (ps_config_typeof(config, "mswav") != 0) { |
| | ps_config_set_bool(config, "mswav", FALSE); |
| | ps_config_set_bool(config, "nist", TRUE); |
| | ps_config_set_bool(config, "raw", FALSE); |
| | } |
| | } |
| | else { |
| | E_INFO("%s appears to be raw data\n", file); |
| | if (ps_config_typeof(config, "mswav") != 0) { |
| | ps_config_set_bool(config, "mswav", FALSE); |
| | ps_config_set_bool(config, "nist", FALSE); |
| | ps_config_set_bool(config, "raw", TRUE); |
| | } |
| | } |
| | fseek(infh, 0, SEEK_SET); |
| | return 0; |
| | } |
| |
|
| | int |
| | main(int argc, char *argv[]) |
| | { |
| | cmd_ln_t *config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE); |
| |
|
| | if (config == NULL) { |
| | |
| | err_set_loglevel(ERR_INFO); |
| | cmd_ln_log_help_r(NULL, defn); |
| | return 1; |
| | } |
| |
|
| | |
| | if (ps_config_str(config, "c")) { |
| | if (run_control_file(ps_config_str(config, "c"), config) < 0) |
| | return 1; |
| | } |
| | else { |
| | if (extract_pitch(ps_config_str(config, "i"), |
| | ps_config_str(config, "o"), |
| | config) < 0) |
| | return 1; |
| | } |
| |
|
| | ps_config_free(config); |
| | return 0; |
| | } |
| |
|
| | static int |
| | extract_pitch(const char *in, const char *out, cmd_ln_t *config) |
| | { |
| | FILE *infh = NULL, *outfh = NULL; |
| | size_t flen, fshift, nsamps; |
| | int16 *buf = NULL; |
| | yin_t *yin = NULL; |
| | uint16 period, bestdiff; |
| | int32 sps; |
| |
|
| | if (out) { |
| | if ((outfh = fopen(out, "w")) == NULL) { |
| | E_ERROR_SYSTEM("Failed to open %s for writing", out); |
| | goto error_out; |
| | } |
| | } |
| | else { |
| | outfh = stdout; |
| | } |
| | if ((infh = fopen(in, "rb")) == NULL) { |
| | E_ERROR_SYSTEM("Failed to open %s for reading", in); |
| | goto error_out; |
| | } |
| |
|
| | |
| | |
| | if (!(ps_config_bool(config, "raw") |
| | || ps_config_bool(config, "mswav") |
| | || ps_config_bool(config, "nist"))) { |
| | if (guess_file_type(in, infh, config) < 0) |
| | goto error_out; |
| | } |
| | |
| | |
| | |
| | if (ps_config_bool(config, "mswav")) { |
| | if (ps_config_wavfile(config, infh, in) < 0) |
| | goto error_out; |
| | } |
| | else if (ps_config_bool(config, "nist")) { |
| | if (ps_config_nistfile(config, infh, in) < 0) |
| | goto error_out; |
| | } |
| | else if (ps_config_bool(config, "raw")) { |
| | |
| | if (ps_config_str(config, "input_endian") == NULL) { |
| | ps_config_set_str(config, "input_endian", "little"); |
| | } |
| | if (ps_config_int(config, "samprate") == 0) |
| | ps_config_set_int(config, "samprate", 16000); |
| | } |
| |
|
| | |
| | sps = ps_config_int(config, "samprate"); |
| | flen = (size_t)(0.5 + sps * ps_config_float(config, "flen")); |
| | fshift = (size_t)(0.5 + sps * ps_config_float(config, "fshift")); |
| | yin = yin_init(flen, ps_config_float(config, "voice_thresh"), |
| | ps_config_float(config, "search_range"), |
| | ps_config_int(config, "smooth_window")); |
| | if (yin == NULL) { |
| | E_ERROR("Failed to initialize YIN\n"); |
| | goto error_out; |
| | } |
| | buf = ckd_calloc(flen, sizeof(*buf)); |
| | |
| | if (fread(buf, sizeof(*buf), flen, infh) != flen) { |
| | |
| | } |
| | yin_start(yin); |
| | nsamps = 0; |
| | while (!feof(infh)) { |
| | |
| | yin_write(yin, buf); |
| | if (yin_read(yin, &period, &bestdiff)) { |
| | fprintf(outfh, "%.3f %.2f %.2f\n", |
| | |
| | (double)nsamps/sps, |
| | |
| | bestdiff > 32768 ? 0.0 : 1.0 - (double)bestdiff / 32768, |
| | |
| | period == 0 ? sps : (double)sps / period); |
| | nsamps += fshift; |
| | } |
| | |
| | memmove(buf, buf + fshift, (flen - fshift) * sizeof(*buf)); |
| | if (fread(buf + flen - fshift, sizeof(*buf), fshift, infh) != fshift) { |
| | |
| | } |
| | } |
| | yin_end(yin); |
| | |
| | while (yin_read(yin, &period, &bestdiff)) { |
| | fprintf(outfh, "%.3f %.2f %.2f\n", |
| | |
| | (double)nsamps/sps, |
| | |
| | bestdiff > 32768 ? 0.0 : 1.0 - (double)bestdiff / 32768, |
| | |
| | period == 0 ? sps : (double)sps / period); |
| | } |
| |
|
| | if (yin) |
| | yin_free(yin); |
| | ckd_free(buf); |
| | fclose(infh); |
| | if (outfh && outfh != stdout) |
| | fclose(outfh); |
| | return 0; |
| |
|
| | error_out: |
| | if (yin) |
| | yin_free(yin); |
| | ckd_free(buf); |
| | if (infh) fclose(infh); |
| | if (outfh && outfh != stdout) |
| | fclose(outfh); |
| | return -1; |
| | } |
| |
|
| | static int |
| | run_control_file(const char *ctl, cmd_ln_t *config) |
| | { |
| | FILE *ctlfh; |
| | char *line; |
| | char *di, *dout, *ei, *eio; |
| | size_t len; |
| | int rv, guess_type, guess_sps, guess_endian; |
| | int32 skip, runlen; |
| |
|
| | skip = ps_config_int(config, "nskip"); |
| | runlen = ps_config_int(config, "runlen"); |
| |
|
| | |
| | guess_type = !(ps_config_bool(config, "raw") |
| | || ps_config_bool(config, "mswav") |
| | || ps_config_bool(config, "nist")); |
| | |
| | guess_sps = (ps_config_int(config, "samprate") == 0); |
| | |
| | guess_endian = (ps_config_str(config, "input_endian") == NULL); |
| |
|
| | if ((ctlfh = fopen(ctl, "r")) == NULL) { |
| | E_ERROR_SYSTEM("Failed to open control file %s", ctl); |
| | return -1; |
| | } |
| | if (ps_config_str(config, "di")) |
| | di = string_join(ps_config_str(config, "di"), "/", NULL); |
| | else |
| | di = ckd_salloc(""); |
| | if (ps_config_str(config, "do")) |
| | dout = string_join(ps_config_str(config, "do"), "/", NULL); |
| | else |
| | dout = ckd_salloc(""); |
| | if (ps_config_str(config, "ei")) |
| | ei = string_join(".", ps_config_str(config, "ei"), NULL); |
| | else |
| | ei = ckd_salloc(""); |
| | if (ps_config_str(config, "eo")) |
| | eio = string_join(".", ps_config_str(config, "eo"), NULL); |
| | else |
| | eio = ckd_salloc(""); |
| | rv = 0; |
| | while ((line = fread_line(ctlfh, &len)) != NULL) { |
| | char *infile, *outfile; |
| |
|
| | if (skip-- > 0) { |
| | ckd_free(line); |
| | continue; |
| | } |
| | if (runlen == 0) { |
| | ckd_free(line); |
| | break; |
| | } |
| | --runlen; |
| |
|
| | if (line[len-1] == '\n') |
| | line[len-1] = '\0'; |
| |
|
| | infile = string_join(di, line, ei, NULL); |
| | outfile = string_join(dout, line, eio, NULL); |
| |
|
| | |
| | if (guess_type) { |
| | ps_config_set_bool(config, "nist", FALSE); |
| | ps_config_set_bool(config, "mswav", FALSE); |
| | ps_config_set_bool(config, "raw", FALSE); |
| | } |
| | if (guess_sps) |
| | ps_config_set_int(config, "samprate", 0); |
| | if (guess_endian) |
| | ps_config_set_str(config, "input_endian", NULL); |
| |
|
| | rv = extract_pitch(infile, outfile, config); |
| |
|
| | ckd_free(infile); |
| | ckd_free(outfile); |
| | ckd_free(line); |
| |
|
| | if (rv != 0) |
| | break; |
| | } |
| | ckd_free(di); |
| | ckd_free(dout); |
| | ckd_free(ei); |
| | ckd_free(eio); |
| | fclose(ctlfh); |
| | ps_config_free(config); |
| | return rv; |
| | } |
| |
|