| /* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ | |
| /* ==================================================================== | |
| * Copyright (c) 2022 David Huggins-Daines. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions | |
| * are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * | |
| * 2. Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in | |
| * the documentation and/or other materials provided with the | |
| * distribution. | |
| * | |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | |
| * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * ==================================================================== | |
| */ | |
| /** | |
| * @file vad.h | |
| * @brief Simple voice activity detection | |
| * | |
| * Because doxygen is Bad Software, the actual documentation can only | |
| * exist in \ref ps_vad_t. Sorry about that. | |
| */ | |
| extern "C" { | |
| } | |
| /** | |
| * @struct ps_vad_t pocketsphinx/vad.h | |
| * @brief Voice activity detector. | |
| */ | |
| typedef struct ps_vad_s ps_vad_t; | |
| /** | |
| * @enum ps_vad_mode_e pocketsphinx/vad.h | |
| * @brief Voice activity detection "aggressiveness" levels. | |
| */ | |
| typedef enum ps_vad_mode_e { | |
| PS_VAD_LOOSE = 0, | |
| PS_VAD_MEDIUM_LOOSE = 1, | |
| PS_VAD_MEDIUM_STRICT = 2, | |
| PS_VAD_STRICT = 3 | |
| } ps_vad_mode_t; | |
| /** | |
| * @enum ps_vad_class_e pocketsphinx/vad.h | |
| * @brief Classification of input frames returned by ps_vad_classify(). | |
| */ | |
| typedef enum ps_vad_class_e { | |
| PS_VAD_ERROR = -1, | |
| PS_VAD_NOT_SPEECH = 0, | |
| PS_VAD_SPEECH = 1 | |
| } ps_vad_class_t; | |
| /** | |
| * Default sampling rate for voice activity detector | |
| */ | |
| /** | |
| * Default frame length for voice activity detector | |
| */ | |
| /** | |
| * Initialize voice activity detection. | |
| * | |
| * @memberof ps_vad_t | |
| * @param mode "Aggressiveness" of voice activity detection. Stricter | |
| * values (see ps_vad_mode_t) are less likely to | |
| * misclassify non-speech as speech. | |
| * @param sample_rate Sampling rate of input, or 0 for default (which can | |
| * be obtained with ps_vad_sample_rate()). Only 8000, | |
| * 16000, 32000, 48000 are directly supported. See | |
| * ps_vad_set_input_params() for more information. | |
| * @param frame_length Frame length in seconds, or 0.0 for the default. Only | |
| * 0.01, 0.02, 0.03 currently supported. **Actual** value | |
| * may differ, you must use ps_vad_frame_length() to | |
| * obtain it. | |
| * @return VAD object or NULL on failure (invalid parameter for instance). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_vad_t *ps_vad_init(ps_vad_mode_t mode, int sample_rate, double frame_length); | |
| /** | |
| * Retain a pointer to voice activity detector. | |
| * | |
| * @memberof ps_vad_t | |
| * @param vad Voice activity detector. | |
| * @return Voice activity detector with incremented reference count. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_vad_t *ps_vad_retain(ps_vad_t *vad); | |
| /** | |
| * Release a pointer to voice activity detector. | |
| * | |
| * @memberof ps_vad_t | |
| * @param vad Voice activity detector. | |
| * @return New reference count (0 if freed). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_vad_free(ps_vad_t *vad); | |
| /** | |
| * Set the input parameters for voice activity detection. | |
| * | |
| * @memberof ps_vad_t | |
| * @param sample_rate Sampling rate of input, or 0 for default (which can | |
| * be obtained with ps_vad_sample_rate()). Only 8000, | |
| * 16000, 32000, 48000 are directly supported, others | |
| * will use the closest supported rate (within reason). | |
| * Note that this means that the actual frame length | |
| * may not be exactly the one requested, so you must | |
| * always use the one returned by ps_vad_frame_size() | |
| * (in samples) or ps_vad_frame_length() (in seconds). | |
| * @param frame_length Requested frame length in seconds, or 0.0 for the | |
| * default. Only 0.01, 0.02, 0.03 currently supported. | |
| * **Actual frame length may be different, you must | |
| * always use ps_vad_frame_length() to obtain it.** | |
| * @return 0 for success or -1 on error. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_vad_set_input_params(ps_vad_t *vad, int sample_rate, double frame_length); | |
| /** | |
| * Get the sampling rate expected by voice activity detection. | |
| * | |
| * @memberof ps_vad_t | |
| * @param vad Voice activity detector. | |
| * @return Expected sampling rate. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_vad_sample_rate(ps_vad_t *vad); | |
| /** | |
| * Get the number of samples expected by voice activity detection. | |
| * | |
| * You **must** always ensure that the buffers passed to | |
| * ps_vad_classify() contain this number of samples (zero-pad them if | |
| * necessary). | |
| * | |
| * @memberof ps_vad_t | |
| * @param vad Voice activity detector. | |
| * @return Size, in samples, of the frames passed to ps_vad_classify(). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| size_t ps_vad_frame_size(ps_vad_t *vad); | |
| /** | |
| * Get the *actual* length of a frame in seconds. | |
| * | |
| * This may differ from the value requested in ps_vad_set_input_params(). | |
| */ | |
| /** | |
| * Classify a frame as speech or not speech. | |
| * | |
| * @memberof ps_vad_t | |
| * @param vad Voice activity detector. | |
| * @param frame Frame of input, **must** contain the number of | |
| * samples returned by ps_vad_frame_size(). | |
| * @return PS_VAD_SPEECH, PS_VAD_NOT_SPEECH, or PS_VAD_ERROR (see | |
| * ps_vad_class_t). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_vad_class_t ps_vad_classify(ps_vad_t *vad, const int16 *frame); | |
| } | |