pocketsphinx-20.04

37a92a9 about 3 years ago

6.62 kB

	/* -- c-basic-offset:4; indent-tabs-mode: nil -- */
	/* ====================================================================
	* Copyright (c) 2022 David Huggins-Daines. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	*
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	*
	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
	* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
	* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
	* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
	* OF THE POSSIBILITY OF SUCH DAMAGE.
	* ====================================================================
	*/
	/**
	* @file vad.h
	* @brief Simple voice activity detection
	*
	* Because doxygen is Bad Software, the actual documentation can only
	* exist in \ref ps_vad_t. Sorry about that.
	*/

	#ifndef __PS_VAD_H__
	#define __PS_VAD_H__


	#include <pocketsphinx/prim_type.h>
	#include <pocketsphinx/export.h>

	#ifdef __cplusplus
	extern "C" {
	#endif
	#if 0
	}
	#endif

	/**
	* @struct ps_vad_t pocketsphinx/vad.h
	* @brief Voice activity detector.
	*/
	typedef struct ps_vad_s ps_vad_t;

	/**
	* @enum ps_vad_mode_e pocketsphinx/vad.h
	* @brief Voice activity detection "aggressiveness" levels.
	*/
	typedef enum ps_vad_mode_e {
	PS_VAD_LOOSE = 0,
	PS_VAD_MEDIUM_LOOSE = 1,
	PS_VAD_MEDIUM_STRICT = 2,
	PS_VAD_STRICT = 3
	} ps_vad_mode_t;

	/**
	* @enum ps_vad_class_e pocketsphinx/vad.h
	* @brief Classification of input frames returned by ps_vad_classify().
	*/
	typedef enum ps_vad_class_e {
	PS_VAD_ERROR = -1,
	PS_VAD_NOT_SPEECH = 0,
	PS_VAD_SPEECH = 1
	} ps_vad_class_t;

	/**
	* Default sampling rate for voice activity detector
	*/
	#define PS_VAD_DEFAULT_SAMPLE_RATE 16000
	/**
	* Default frame length for voice activity detector
	*/
	#define PS_VAD_DEFAULT_FRAME_LENGTH 0.03

	/**
	* Initialize voice activity detection.
	*
	* @memberof ps_vad_t
	* @param mode "Aggressiveness" of voice activity detection. Stricter
	* values (see ps_vad_mode_t) are less likely to
	* misclassify non-speech as speech.
	* @param sample_rate Sampling rate of input, or 0 for default (which can
	* be obtained with ps_vad_sample_rate()). Only 8000,
	* 16000, 32000, 48000 are directly supported. See
	* ps_vad_set_input_params() for more information.
	* @param frame_length Frame length in seconds, or 0.0 for the default. Only
	* 0.01, 0.02, 0.03 currently supported. Actual value
	* may differ, you must use ps_vad_frame_length() to
	* obtain it.
	* @return VAD object or NULL on failure (invalid parameter for instance).
	*/
	POCKETSPHINX_EXPORT
	ps_vad_t *ps_vad_init(ps_vad_mode_t mode, int sample_rate, double frame_length);

	/**
	* Retain a pointer to voice activity detector.
	*
	* @memberof ps_vad_t
	* @param vad Voice activity detector.
	* @return Voice activity detector with incremented reference count.
	*/
	POCKETSPHINX_EXPORT
	ps_vad_t ps_vad_retain(ps_vad_t vad);

	/**
	* Release a pointer to voice activity detector.
	*
	* @memberof ps_vad_t
	* @param vad Voice activity detector.
	* @return New reference count (0 if freed).
	*/
	POCKETSPHINX_EXPORT
	int ps_vad_free(ps_vad_t *vad);

	/**
	* Set the input parameters for voice activity detection.
	*
	* @memberof ps_vad_t
	* @param sample_rate Sampling rate of input, or 0 for default (which can
	* be obtained with ps_vad_sample_rate()). Only 8000,
	* 16000, 32000, 48000 are directly supported, others
	* will use the closest supported rate (within reason).
	* Note that this means that the actual frame length
	* may not be exactly the one requested, so you must
	* always use the one returned by ps_vad_frame_size()
	* (in samples) or ps_vad_frame_length() (in seconds).
	* @param frame_length Requested frame length in seconds, or 0.0 for the
	* default. Only 0.01, 0.02, 0.03 currently supported.
	* **Actual frame length may be different, you must
	* always use ps_vad_frame_length() to obtain it.**
	* @return 0 for success or -1 on error.
	*/
	POCKETSPHINX_EXPORT
	int ps_vad_set_input_params(ps_vad_t *vad, int sample_rate, double frame_length);

	/**
	* Get the sampling rate expected by voice activity detection.
	*
	* @memberof ps_vad_t
	* @param vad Voice activity detector.
	* @return Expected sampling rate.
	*/
	POCKETSPHINX_EXPORT
	int ps_vad_sample_rate(ps_vad_t *vad);

	/**
	* Get the number of samples expected by voice activity detection.
	*
	* You must always ensure that the buffers passed to
	* ps_vad_classify() contain this number of samples (zero-pad them if
	* necessary).
	*
	* @memberof ps_vad_t
	* @param vad Voice activity detector.
	* @return Size, in samples, of the frames passed to ps_vad_classify().
	*/
	POCKETSPHINX_EXPORT
	size_t ps_vad_frame_size(ps_vad_t *vad);

	/**
	* Get the actual length of a frame in seconds.
	*
	* This may differ from the value requested in ps_vad_set_input_params().
	*/
	#define ps_vad_frame_length(vad) ((double)ps_vad_frame_size(vad) / ps_vad_sample_rate(vad))

	/**
	* Classify a frame as speech or not speech.
	*
	* @memberof ps_vad_t
	* @param vad Voice activity detector.
	* @param frame Frame of input, must contain the number of
	* samples returned by ps_vad_frame_size().
	* @return PS_VAD_SPEECH, PS_VAD_NOT_SPEECH, or PS_VAD_ERROR (see
	* ps_vad_class_t).
	*/
	POCKETSPHINX_EXPORT
	ps_vad_class_t ps_vad_classify(ps_vad_t vad, const int16 frame);

	#ifdef __cplusplus
	}
	#endif

	#endif /* __PS_VAD_H__ */