| /* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ | |
| /* ==================================================================== | |
| * Copyright (c) 2022 David Huggins-Daines. All rights reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions | |
| * are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * | |
| * 2. Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in | |
| * the documentation and/or other materials provided with the | |
| * distribution. | |
| * | |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED | |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | |
| * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * ==================================================================== | |
| */ | |
| /** | |
| * @file endpointer.h | |
| * @brief VAD-based endpointer for PocketSphinx | |
| * | |
| * Because doxygen is Bad Software, the actual documentation can only | |
| * exist in \ref ps_endpointer_t. Sorry about that. | |
| */ | |
| extern "C" { | |
| } | |
| /** | |
| * @struct ps_endpointer_t pocketsphinx/endpointer.h | |
| * @brief Simple voice activity detection based endpointing | |
| */ | |
| typedef struct ps_endpointer_s ps_endpointer_t; | |
| /** | |
| * Default window in seconds of audio to use for speech start/end decision. | |
| */ | |
| /** | |
| * Default ratio of frames in window to trigger start/end decision. | |
| */ | |
| /** | |
| * Initialize endpointing. | |
| * | |
| * @memberof ps_endpointer_t | |
| * @param window Seconds of audio to use in speech start/end decision, | |
| * or 0 to use the default (PS_ENDPOINTER_DEFAULT_WINDOW). | |
| * @param ratio Ratio of frames needed to trigger start/end decision, | |
| * or 0 for the default (PS_ENDPOINTER_DEFAULT_RATIO). | |
| * @param mode "Aggressiveness" of voice activity detection. Stricter | |
| * values (see ps_vad_mode_t) are less likely to | |
| * misclassify non-speech as speech. | |
| * @param sample_rate Sampling rate of input, or 0 for default (which can | |
| * be obtained with ps_vad_sample_rate()). Only 8000, | |
| * 16000, 32000, 48000 are directly supported, others | |
| * will use the closest supported rate (within reason). | |
| * Note that this means that the actual frame length | |
| * may not be exactly the one requested, so you must | |
| * always use the one returned by | |
| * ps_endpointer_frame_size() | |
| * (in samples) or ps_endpointer_frame_length() (in | |
| * seconds). | |
| * @param frame_length Requested frame length in seconds, or 0.0 for the | |
| * default. Only 0.01, 0.02, 0.03 currently supported. | |
| * **Actual frame length may be different, you must | |
| * always use ps_endpointer_frame_length() to obtain it.** | |
| * @return Endpointer object or NULL on failure (invalid parameter for | |
| * instance). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_endpointer_t *ps_endpointer_init(double window, | |
| double ratio, | |
| ps_vad_mode_t mode, | |
| int sample_rate, double frame_length); | |
| /** | |
| * Retain a pointer to endpointer | |
| * | |
| * @memberof ps_endpointer_t | |
| * @param ep Endpointer. | |
| * @return Endpointer with incremented reference count. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_endpointer_t *ps_endpointer_retain(ps_endpointer_t *ep); | |
| /** | |
| * Release a pointer to endpointer. | |
| * | |
| * @memberof ps_endpointer_t | |
| * @param ep Endpointer | |
| * @return New reference count (0 if freed). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_endpointer_free(ps_endpointer_t *ep); | |
| /** | |
| * Get the voice activity detector used by the endpointer. | |
| * | |
| * @memberof ps_endpointer_t | |
| * @return VAD object. The endpointer retains ownership of this | |
| * object, so you must use ps_vad_retain() if you wish to use it | |
| * outside of the lifetime of the endpointer. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_vad_t *ps_endpointer_vad(ps_endpointer_t *ep); | |
| /** | |
| * Get the frame size (in samples) consumed by the endpointer. | |
| * | |
| * Multiply this by 2 to get the size of the frame buffer required. | |
| */ | |
| /** | |
| * Get the frame length (in seconds) consumed by the endpointer. | |
| */ | |
| /** | |
| * Get the sample rate required by the endpointer. | |
| */ | |
| /** | |
| * Process a frame of audio, returning a frame if in a speech region. | |
| * | |
| * Note that the endpointer is *not* thread-safe. You must call all | |
| * endpointer functions from the same thread. | |
| * | |
| * @memberof ps_endpointer_t | |
| * @param ep Endpointer. | |
| * @param frame Frame of data, must contain ps_endpointer_frame_size() | |
| * samples. | |
| * @return NULL if no speech available, or pointer to a frame of | |
| * ps_endpointer_frame_size() samples (no more and no less). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const int16 *ps_endpointer_process(ps_endpointer_t *ep, | |
| const int16 *frame); | |
| /** | |
| * Process remaining samples at end of stream. | |
| * | |
| * Note that the endpointer is *not* thread-safe. You must call all | |
| * endpointer functions from the same thread. | |
| * | |
| * @memberof ps_endpointer_t | |
| * @param ep Endpointer. | |
| * @param frame Frame of data, must contain ps_endpointer_frame_size() | |
| * samples or less. | |
| * @param nsamp: Number of samples in frame. | |
| * @param out_nsamp: Output, number of samples available. | |
| * @return Pointer to available samples, or NULL if none available. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const int16 *ps_endpointer_end_stream(ps_endpointer_t *ep, | |
| const int16 *frame, | |
| size_t nsamp, | |
| size_t *out_nsamp); | |
| /** | |
| * Get the current state (speech/not-speech) of the endpointer. | |
| * | |
| * This function can be used to detect speech/non-speech transitions. | |
| * If it returns 0, and a subsequent call to ps_endpointer_process() | |
| * returns non-NULL, this indicates a transition to speech. | |
| * Conversely, if ps_endpointer_process() returns non-NULL and a | |
| * subsequent call to this function returns 0, this indicates a | |
| * transition to non-speech. | |
| * | |
| * @memberof ps_endpointer_t | |
| * @param ep Endpointer. | |
| * @return non-zero if in a speech segment after processing the last | |
| * frame of data. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_endpointer_in_speech(ps_endpointer_t *ep); | |
| /** | |
| * Get the start time of the last speech segment. | |
| * @memberof ps_endpointer_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| double ps_endpointer_speech_start(ps_endpointer_t *ep); | |
| /** | |
| * Get the end time of the last speech segment | |
| * @memberof ps_endpointer_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| double ps_endpointer_speech_end(ps_endpointer_t *ep); | |
| } | |