| /* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ | |
| /* ==================================================================== | |
| * Copyright (c) 1999-2008 Carnegie Mellon University. All rights | |
| * reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions | |
| * are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * | |
| * 2. Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in | |
| * the documentation and/or other materials provided with the | |
| * distribution. | |
| * | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND | |
| * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY | |
| * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| * ==================================================================== | |
| * | |
| */ | |
| /** | |
| * @file pocketsphinx.h Main header file for the PocketSphinx decoder. | |
| * | |
| * This is the only header file you should need to include in order to | |
| * write code using PocketSphinx. The documentation for its various | |
| * functions and structures is actually located on the pages for those | |
| * structures, and because Doxygen does not seem smart enough to put | |
| * links in the "Typedefs" list above, here they are for your | |
| * convenience: | |
| * | |
| * - \ref ps_config_t | |
| * - \ref ps_arg_t | |
| * - \ref ps_decoder_t | |
| * - \ref ps_nbest_t | |
| * - \ref ps_seg_t | |
| * | |
| * There are also a few other structures you should be aware of, which | |
| * can be useful in writing speech applications: | |
| * | |
| * - \ref ps_endpointer_t | |
| * - \ref ps_vad_t | |
| * - \ref jsgf_t | |
| * - \ref ngram_model_t | |
| * | |
| * Finally, to learn about switching language models and grammars, see | |
| * <pocketsphinx/search.h> | |
| */ | |
| /* System headers */ | |
| /* PocketSphinx utility headers */ | |
| /* PocketSphinx API headers */ | |
| /* Namum manglium ii domum */ | |
| extern "C" { | |
| } | |
| /* Transparent structures */ | |
| /** | |
| * @enum ps_type_e | |
| * @brief Types of configuration parameters. | |
| */ | |
| typedef enum ps_type_e { | |
| ARG_REQUIRED = (1<<0), /*<< Bit indicating required argument. */ | |
| ARG_INTEGER = (1<<1), /*<< Integer up to 64 bits. */ | |
| ARG_FLOATING = (1<<2), /*<< Double-precision floating point. */ | |
| ARG_STRING = (1<<3), /*<< String. */ | |
| ARG_BOOLEAN = (1<<4), /*<< Boolean (true/false). */ | |
| REQARG_INTEGER = (ARG_INTEGER | ARG_REQUIRED), | |
| REQARG_FLOATING = (ARG_FLOATING | ARG_REQUIRED), | |
| REQARG_STRING = (ARG_STRING | ARG_REQUIRED), | |
| REQARG_BOOLEAN = (ARG_BOOLEAN | ARG_REQUIRED) | |
| } ps_type_t; | |
| /** | |
| * @typedef ps_type_t | |
| * @brief Types of configuration parameters. | |
| */ | |
| /** | |
| * @struct ps_arg_t | |
| * @brief Definition of a configuration parameter. | |
| */ | |
| typedef struct ps_arg_s { | |
| char const *name; /**< Name of the command line switch */ | |
| int type; /**< Type of the argument in question */ | |
| char const *deflt; /**< Default value (as a character string), or NULL if none */ | |
| char const *doc; /**< Documentation/description string */ | |
| } ps_arg_t; | |
| /* Opaque structures */ | |
| /** | |
| * @struct ps_config_t | |
| * @brief configuration object. | |
| */ | |
| typedef struct cmd_ln_s ps_config_t; | |
| /** | |
| * @struct ps_decoder_t | |
| * @brief Speech recognizer object. | |
| */ | |
| typedef struct ps_decoder_s ps_decoder_t; | |
| /** | |
| * @struct ps_nbest_t | |
| * @brief N-best hypothesis iterator object. | |
| */ | |
| typedef struct ps_astar_s ps_nbest_t; | |
| /** | |
| * @struct ps_seg_t | |
| * @brief Segmentation iterator object. | |
| */ | |
| typedef struct ps_seg_s ps_seg_t; | |
| /** | |
| * Create a configuration with default values. | |
| * | |
| * @memberof ps_config_t | |
| * @param defn Array of ps_arg_t defining and describing parameters, | |
| * terminated by an ps_arg_t with `name == NULL`. You should usually | |
| * just pass NULL here, which will result in the standard set of | |
| * parameters being used. | |
| * @return Newly created configuration or NULL on failure (should not | |
| * happen, but check it anyway). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_config_t *ps_config_init(const ps_arg_t *defn); | |
| /** | |
| * Retain a pointer to a configuration object. | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_config_t *ps_config_retain(ps_config_t *config); | |
| /** | |
| * Release a configuration object. | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_config_free(ps_config_t *config); | |
| /** | |
| * Validate configuration. | |
| * | |
| * Currently this just checks that you haven't specified multiple | |
| * types of grammars or language models at the same time. | |
| * | |
| * @memberof ps_config_t | |
| * @return 0 for success, <0 for failure. | |
| */ | |
| int ps_config_validate(ps_config_t *config); | |
| /** | |
| * Create or update a configuration by parsing slightly extended JSON. | |
| * | |
| * This function parses a JSON object in non-strict mode to produce a | |
| * ps_config_t. Configuration parameters are given *without* a | |
| * leading dash, and do not need to be quoted, nor does the object | |
| * need to be enclosed in curly braces, nor are commas necessary | |
| * between key/value pairs. Basically, it's degenerate YAML. So, for | |
| * example, this is accepted: | |
| * | |
| * hmm: fr-fr | |
| * samprate: 8000 | |
| * keyphrase: "hello world" | |
| * | |
| * Of course, valid JSON is also accepted, but who wants to use that. | |
| * | |
| * Well, mostly. Unicode escape sequences (e.g. `"\u0020"`) are *not* | |
| * supported at the moment, so please don't use them. | |
| * | |
| * @memberof ps_config_t | |
| * @arg config Previously existing ps_config_t to update, or NULL to | |
| * create a new one. | |
| * @arg json JSON serialized object as null-terminated UTF-8, | |
| * containing configuration parameters. | |
| * @return Newly created configuration or NULL on failure (such as | |
| * invalid or missing parameters). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_config_t *ps_config_parse_json(ps_config_t *config, const char *json); | |
| /** | |
| * Construct JSON from a configuration object. | |
| * | |
| * Unlike ps_config_parse_json(), this actually produces valid JSON ;-) | |
| * | |
| * @memberof ps_config_t | |
| * @arg config Configuration object | |
| * @return Newly created null-terminated JSON string. The ps_config_t | |
| * retains ownership of this pointer, which is only valid until the | |
| * next call to ps_config_serialize_json(). You must copy it if you | |
| * wish to retain it. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const char *ps_config_serialize_json(ps_config_t *config); | |
| /** | |
| * Access the type of a configuration parameter. | |
| * | |
| * @memberof ps_config_t | |
| * @param config Configuration object. | |
| * @param name Name of the parameter to retrieve. | |
| * @return the type of the parameter (as a combination of the ARG_* | |
| * bits), or 0 if no such parameter exists. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_type_t ps_config_typeof(ps_config_t *config, char const *name); | |
| /** | |
| * Access the value of a configuration parameter. | |
| * | |
| * To actually do something with the value, you will need to know its | |
| * type, which can be obtained with ps_config_typeof(). This function | |
| * is thus mainly useful for dynamic language bindings, and you should | |
| * use ps_config_int(), ps_config_float(), or ps_config_str() instead. | |
| * | |
| * @memberof ps_config_t | |
| * @param config Configuration object. | |
| * @param name Name of the parameter to retrieve. | |
| * @return Pointer to the parameter's value, or NULL if the parameter | |
| * does not exist. Note that a string parameter can also have NULL as | |
| * a value, in which case the `ptr` field in the return value is NULL. | |
| * This pointer (and any pointers inside it) is owned by the ps_config_t. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const anytype_t *ps_config_get(ps_config_t *config, const char *name); | |
| /** | |
| * Set or unset the value of a configuration parameter. | |
| * | |
| * This will coerce the value to the proper type, so you can, for | |
| * example, pass it a string with ARG_STRING as the type when adding | |
| * options from the command-line. Note that the return pointer will | |
| * *not* be the same as the one passed in the value. | |
| * | |
| * @memberof ps_config_t | |
| * @param config Configuration object. | |
| * @param name Name of the parameter to set. Must exist. | |
| * @param val Pointer to the value (strings will be copied) inside an | |
| * anytype_t union. On 64-bit little-endian platforms, you *can* cast | |
| * a pointer to int, long, double, or char* here, but that doesn't | |
| * necessarily mean that you *should*. As a convenience, you can pass | |
| * NULL here to reset a parameter to its default value. | |
| * @param t Type of the value in `val`, will be coerced to the type of | |
| * the actual parameter if necessary. | |
| * @return Pointer to the parameter's value, or NULL on failure | |
| * (unknown parameter, usually). This pointer (and any pointers | |
| * inside it) is owned by the ps_config_t. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const anytype_t *ps_config_set(ps_config_t *config, const char *name, | |
| const anytype_t *val, ps_type_t t); | |
| /** | |
| * Get an integer-valued parameter. | |
| * | |
| * If the parameter does not have an integer or boolean type, this | |
| * will print an error and return 0. So don't do that. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| long ps_config_int(ps_config_t *config, const char *name); | |
| /** | |
| * Get a boolean-valued parameter. | |
| * | |
| * If the parameter does not have an integer or boolean type, this | |
| * will print an error and return 0. The return value is either 0 or | |
| * 1 (if the parameter has an integer type, any non-zero value will | |
| * return 1). | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_config_bool(ps_config_t *config, const char *name); | |
| /** | |
| * Get a floating-point parameter. | |
| * | |
| * If the parameter does not have a floating-point type, this will | |
| * print an error and return 0. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| double ps_config_float(ps_config_t *config, const char *name); | |
| /** | |
| * Get a string parameter. | |
| * | |
| * If the parameter does not have a string type, this will print an | |
| * error and return NULL. Notably, it will *NOT* format an integer or | |
| * float for you, because that would involve allocating memory. So | |
| * don't do that. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const char *ps_config_str(ps_config_t *config, const char *name); | |
| /** | |
| * Set an integer-valued parameter. | |
| * | |
| * If the parameter does not have an integer or boolean type, this | |
| * will convert `val` appropriately. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const anytype_t *ps_config_set_int(ps_config_t *config, const char *name, long val); | |
| /** | |
| * Set a boolean-valued parameter. | |
| * | |
| * If the parameter does not have an integer or boolean type, this | |
| * will convert `val` appropriately. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const anytype_t *ps_config_set_bool(ps_config_t *config, const char *name, int val); | |
| /** | |
| * Set a floating-point parameter. | |
| * | |
| * If the parameter does not have a floating-point type, this will | |
| * convert `val` appropriately. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const anytype_t *ps_config_set_float(ps_config_t *config, const char *name, double val); | |
| /** | |
| * Set a string-valued parameter. | |
| * | |
| * If the parameter does not have a string type, this will convert | |
| * `val` appropriately. For boolean parameters, any string matching | |
| * `/^[yt1]/` will be true, while any string matching `/^[nf0]/` will | |
| * be false. NULL is also false. | |
| * | |
| * This function is used for configuration from JSON, you may want to | |
| * use it for your own configuration files too. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const anytype_t *ps_config_set_str(ps_config_t *config, const char *name, const char *val); | |
| /** | |
| * Set configuration parameters (actually just sample rate) from a | |
| * sound file. | |
| * | |
| * If the file is unreadable, unsupported or incompatible with the | |
| * existing feature extraction parameters, this will print an error | |
| * message and fail (return -1). | |
| * | |
| * If it is of an unknown type, it will be treated as raw data. So | |
| * beware! Currently we only support WAV and NIST Sphere files. We | |
| * attempt to recognize Ogg, MP3 (but not really, because it is very | |
| * difficult to do reliably), and FLAC, but do not support them. For | |
| * everything else, there's SoX (tm). | |
| * | |
| * Currently, the file must be seekable, so you can't use this on | |
| * standard input, for instance. | |
| * | |
| * @memberof ps_config_t | |
| * @param config Configuration to update from file. | |
| * @param fh Previously opened file handle. | |
| * @param file Name of open file handle for logging (optional, can be NULL) | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_config_soundfile(ps_config_t *config, FILE *fh, const char *file); | |
| /** | |
| * Read a WAV header and set configuration parameters. | |
| * | |
| * This works like ps_config_soundfile() but assumes that you already | |
| * know it's a WAV file. | |
| * | |
| * Unlike ps_config_soundfile(), the file does *not* have to be seekable. | |
| * | |
| * @memberof ps_config_t | |
| * @param config Configuration to update from file. | |
| * @param infh Previously opened file handle. | |
| * @param file Name of open file handle for logging (optional, can be NULL) | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_config_wavfile(ps_config_t *config, FILE *infh, const char *file); | |
| /** | |
| * Read a NIST header and set configuration parameters. | |
| * | |
| * This works like ps_config_soundfile() but assumes that you already | |
| * know it's a NIST file. | |
| * | |
| * Unlike ps_config_soundfile(), the file does *not* have to be seekable. | |
| * | |
| * @memberof ps_config_t | |
| * @param config Configuration to update from file. | |
| * @param infh Previously opened file handle. | |
| * @param file Name of open file handle for logging (optional, can be NULL) | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_config_nistfile(ps_config_t *config, FILE *infh, const char *file); | |
| /** | |
| * Sets default acoustic and language model if they are not set explicitly. | |
| * | |
| * This function fills in the configuration with the default acoustic | |
| * and language models and dictionary, if (and this is a badly | |
| * implemented heuristic) they do not seem to be already filled in. | |
| * It is preferable for you to call this *before* doing any other | |
| * configuration to avoid confusion. | |
| * | |
| * The default models are looked for in the directory returned by | |
| * ps_default_modeldir(), or, if the `POCKETSPHINX_PATH` environment | |
| * variable is set, this function will look there instead. | |
| * | |
| * If no global model directory was defined at compilation time (this | |
| * is useful for relocatable installs such as the Python module) and | |
| * `POCKETSPHINX_PATH` is not set, this will simply do nothing. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_default_search_args(ps_config_t *config); | |
| /** | |
| * Sets default file paths and parameters based on configuration. | |
| * | |
| * @memberof ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_expand_model_config(ps_config_t *config); | |
| /** | |
| * Gets the system default model directory, if any exists. | |
| * | |
| * @relates ps_config_t | |
| * @return system model directory defined at compile time, or NULL if | |
| * not defined (usually in a relocatable installation such as | |
| * a Python module). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const char *ps_default_modeldir(void); | |
| /** | |
| * Initialize the decoder from a configuration object. | |
| * | |
| * @memberof ps_config_t | |
| * @note The decoder retains ownership of the pointer | |
| * <code>config</code>, so if you are not going to use it | |
| * elsewhere, you can free it. | |
| * @param config a configuration object. If NULL, the | |
| * decoder will be allocated but not initialized. You can | |
| * proceed to initialize it with ps_reinit(). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_decoder_t *ps_init(ps_config_t *config); | |
| /** | |
| * Reinitialize the decoder with updated configuration. | |
| * | |
| * This function allows you to switch the acoustic model, dictionary, | |
| * or other configuration without creating an entirely new decoding | |
| * object. | |
| * | |
| * @note Since the acoustic model will be reloaded, changes made to | |
| * feature extraction parameters may be overridden if a `feat.params` | |
| * file is present. | |
| * @note Any searches created with ps_set_search() or words added to | |
| * the dictionary with ps_add_word() will also be lost. To avoid this | |
| * you can use ps_reinit_feat(). | |
| * @note The decoder retains ownership of the pointer | |
| * <code>config</code>, so you should free it when no longer used. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param config An optional new configuration to use. If this is | |
| * NULL, the previous configuration will be reloaded, | |
| * with any changes applied. | |
| * @return 0 for success, <0 for failure. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_reinit(ps_decoder_t *ps, ps_config_t *config); | |
| /** | |
| * Reinitialize only the feature computation with updated configuration. | |
| * | |
| * This function allows you to switch the feature computation | |
| * parameters without otherwise affecting the decoder configuration. | |
| * For example, if you change the sample rate or the frame rate, and | |
| * do not want to reconfigure the rest of the decoder. | |
| * | |
| * Note that if you have set a custom cepstral mean with ps_set_cmn(), | |
| * it will be overridden. | |
| * | |
| * @note The decoder retains ownership of the pointer `config`, so you | |
| * should free it when no longer used. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param config An optional new configuration to use. If this is | |
| * NULL, the previous configuration will be reloaded, | |
| * with any changes to feature computation applied. | |
| * @return 0 for success, <0 for failure (usually an invalid parameter) | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_reinit_feat(ps_decoder_t *ps, ps_config_t *config); | |
| /** | |
| * Get the current cepstral mean as a string. | |
| * | |
| * This is the string representation of the current cepstral mean, | |
| * which represents the acoustic channel conditions in live | |
| * recognition. This can be used to initialize the decoder with the | |
| * `cmninit` option, e.g.: | |
| * | |
| * config = ps_config_parse_json(NULL, "cmninit: 42,-1,0"); | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder | |
| * @param update Update the cepstral mean using data processed so far. | |
| * @return String representation of cepstral mean, as | |
| * `ps_config_get_int(config, "ceplen")` comma-separated | |
| * numbers. This pointer is owned by the decoder and only | |
| * valid until the next call to ps_get_cmn(), ps_set_cmn() or | |
| * ps_end_utt(). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| const char *ps_get_cmn(ps_decoder_t *ps, int update); | |
| /** | |
| * Set the current cepstral mean from a string. | |
| * | |
| * This does the same thing as setting `cmninit` with | |
| * ps_config_set_string() and running `ps_reinit_feat()` but is more | |
| * efficient, and can also be done in the middle of an utterance if | |
| * you like. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder | |
| * @param cmn String representation of cepstral mean, as up to | |
| * `ps_config_get_int(config, "ceplen")` -separated numbers | |
| * (any missing values will be zero-filled). @return 0 for | |
| * success of -1 for invalid input. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_set_cmn(ps_decoder_t *ps, const char *cmn); | |
| /** | |
| * Returns the argument definitions used in ps_config_init(). | |
| * | |
| * This is here to avoid exporting global data, which is problematic | |
| * on Win32 and Symbian (and possibly other platforms). | |
| * | |
| * @related ps_config_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_arg_t const *ps_args(void); | |
| /** | |
| * Retain a pointer to the decoder. | |
| * | |
| * This increments the reference count on the decoder, allowing it to | |
| * be shared between multiple parent objects. In general you will not | |
| * need to use this function, ever. It is mainly here for the | |
| * convenience of scripting language bindings. | |
| * | |
| * @memberof ps_decoder_t | |
| * @return pointer to retained decoder. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_decoder_t *ps_retain(ps_decoder_t *ps); | |
| /** | |
| * Finalize the decoder. | |
| * | |
| * This releases all resources associated with the decoder. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder to be freed. | |
| * @return New reference count (0 if freed). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_free(ps_decoder_t *ps); | |
| /** | |
| * Get the configuration object for this decoder. | |
| * | |
| * @memberof ps_decoder_t | |
| * @return The configuration object for this decoder. The decoder | |
| * owns this pointer, so you should not attempt to free it | |
| * manually. Use ps_config_retain() if you wish to reuse it | |
| * elsewhere. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_config_t *ps_get_config(ps_decoder_t *ps); | |
| /** | |
| * Get the log-math computation object for this decoder. | |
| * | |
| * @memberof ps_decoder_t | |
| * @return The log-math object for this decoder. The decoder owns | |
| * this pointer, so you should not attempt to free it | |
| * manually. Use logmath_retain() if you wish to reuse it | |
| * elsewhere. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| logmath_t *ps_get_logmath(ps_decoder_t *ps); | |
| /** | |
| * Adapt current acoustic model using a linear transform. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param mllr The new transform to use, or NULL to update the | |
| * existing transform. The decoder retains ownership of | |
| * this pointer, so you may free it if you no longer need | |
| * it. | |
| * @return The updated transform object for this decoder, or | |
| * NULL on failure. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_mllr_t *ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr); | |
| /** | |
| * Reload the pronunciation dictionary from a file. | |
| * | |
| * This function replaces the current pronunciation dictionary with | |
| * the one stored in `dictfile`. This also causes the active search | |
| * module(s) to be reinitialized, in the same manner as calling | |
| * ps_add_word() with update=TRUE. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param dictfile Path to dictionary file to load. | |
| * @param fdictfile Path to filler dictionary to load, or NULL to keep | |
| * the existing filler dictionary. | |
| * @param format Format of the dictionary file, or NULL to determine | |
| * automatically (currently unused,should be NULL) | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_load_dict(ps_decoder_t *ps, char const *dictfile, | |
| char const *fdictfile, char const *format); | |
| /** | |
| * Dump the current pronunciation dictionary to a file. | |
| * | |
| * This function dumps the current pronunciation dictionary to a text file. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param dictfile Path to file where dictionary will be written. | |
| * @param format Format of the dictionary file, or NULL for the | |
| * default (text) format (currently unused, should be NULL) | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format); | |
| /** | |
| * Add a word to the pronunciation dictionary. | |
| * | |
| * This function adds a word to the pronunciation dictionary and the | |
| * current language model (but, obviously, not to the current FSG if | |
| * FSG mode is enabled). If the word is already present in one or the | |
| * other, it does whatever is necessary to ensure that the word can be | |
| * recognized. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param word Word string to add. | |
| * @param phones Whitespace-separated list of phoneme strings | |
| * describing pronunciation of <code>word</code>. | |
| * @param update If TRUE, update the search module (whichever one is | |
| * currently active) to recognize the newly added word. | |
| * If adding multiple words, it is more efficient to | |
| * pass FALSE here in all but the last word. | |
| * @return The internal ID (>= 0) of the newly added word, or <0 on | |
| * failure. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_add_word(ps_decoder_t *ps, | |
| char const *word, | |
| char const *phones, | |
| int update); | |
| /** | |
| * Look up a word in the dictionary and return phone transcription | |
| * for it. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Pocketsphinx decoder | |
| * @param word Word to look for | |
| * | |
| * @return Whitespace-spearated phone string describing the pronunciation of the <code>word</code> | |
| * or NULL if word is not present in the dictionary. The string is | |
| * allocated and must be freed by the user. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| char *ps_lookup_word(ps_decoder_t *ps, | |
| const char *word); | |
| /** | |
| * Decode a raw audio stream. | |
| * | |
| * No headers are recognized in this files. The configuration | |
| * parameters <tt>-samprate</tt> and <tt>-input_endian</tt> are used | |
| * to determine the sampling rate and endianness of the stream, | |
| * respectively. Audio is always assumed to be 16-bit signed PCM. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param rawfh Previously opened file stream. | |
| * @param maxsamps Maximum number of samples to read from rawfh, or -1 | |
| * to read until end-of-file. | |
| * @return Number of samples of audio. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, | |
| long maxsamps); | |
| /** | |
| * Decode a senone score dump file. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder | |
| * @param senfh Previously opened file handle positioned at start of file. | |
| * @return Number of frames read. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh); | |
| /** | |
| * Start processing of the stream of speech. | |
| * | |
| * @deprecated This function is retained for compatibility, but its | |
| * only effect is to reset the noise removal statistics, which are | |
| * otherwise retained across utterances. You do not need to call it. | |
| * | |
| * @memberof ps_decoder_t | |
| * @return 0 for success, <0 on error. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_start_stream(ps_decoder_t *ps); | |
| /** | |
| * Check in-speech status of decoder. | |
| * | |
| * @deprecated This function is retained for compatibility but should | |
| * not be considered a reliable voice activity detector. It will | |
| * always return 1 between calls to ps_start_utt() and ps_end_utt(). | |
| * You probably want ps_endpointer_t, but for single frames of data | |
| * you can also use \ref ps_vad_t. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return 1 if last buffer contained speech, 0 - otherwise | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_get_in_speech(ps_decoder_t *ps); | |
| /** | |
| * Start utterance processing. | |
| * | |
| * This function should be called before any utterance data is passed | |
| * to the decoder. It marks the start of a new utterance and | |
| * reinitializes internal data structures. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder to be started. | |
| * @return 0 for success, <0 on error. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_start_utt(ps_decoder_t *ps); | |
| /** | |
| * Decode raw audio data. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param data Audio data, as 16-bit linear PCM. | |
| * @param n_samples Number of samples (not bytes) in `data`. | |
| * @param no_search If non-zero, perform feature extraction but don't | |
| * do any recognition yet. This may be necessary if | |
| * your processor has trouble doing recognition in | |
| * real-time. | |
| * @param full_utt If non-zero, this block of data is a full utterance | |
| * worth of data. This may allow the recognizer to | |
| * produce more accurate results. | |
| * @return Number of frames of data searched, or <0 for error. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_process_raw(ps_decoder_t *ps, | |
| int16 const *data, | |
| size_t n_samples, | |
| int no_search, | |
| int full_utt); | |
| /** | |
| * Decode acoustic feature data. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param data Acoustic feature data, a 2-dimensional array of 32-bit | |
| * floating-point values. Note that this is not a standard | |
| * 2-dimesional C array but rather an array of pointers to | |
| * floats, each of which is one vector (or frame) of | |
| * `ps_config_get_int("ceplen")` values. | |
| * @param n_frames Number of vectors in `data`. | |
| * @param no_search If non-zero, perform feature extraction but don't | |
| * do any recognition yet. This may be necessary if | |
| * your processor has trouble doing recognition in | |
| * real-time. | |
| * @param full_utt If non-zero, this block of data is a full utterance | |
| * worth of data. This may allow the recognizer to | |
| * produce more accurate results. | |
| * @return Number of frames of data searched, or <0 for error. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_process_cep(ps_decoder_t *ps, | |
| float32 **data, | |
| int n_frames, | |
| int no_search, | |
| int full_utt); | |
| /** | |
| * Get the number of frames of data searched. | |
| * | |
| * Note that there is a delay between this and the number of frames of | |
| * audio which have been input to the system. This is due to the fact | |
| * that acoustic features are computed using a sliding window of | |
| * audio, and dynamic features are computed over a sliding window of | |
| * acoustic features. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return Number of frames of speech data which have been recognized | |
| * so far. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_get_n_frames(ps_decoder_t *ps); | |
| /** | |
| * End utterance processing. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return 0 for success, <0 on error | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int ps_end_utt(ps_decoder_t *ps); | |
| /** | |
| * Get hypothesis string and path score. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param out_best_score Output: path score corresponding to returned string. | |
| * @return String containing best hypothesis at this point in | |
| * decoding. NULL if no hypothesis is available. This string is owned | |
| * by the decoder and only valid for the current hypothesis, so you | |
| * should copy it if you need to hold onto it. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); | |
| /** | |
| * Get posterior probability. | |
| * | |
| * @note Unless the -bestpath option is enabled, this function will | |
| * always return zero (corresponding to a posterior probability of | |
| * 1.0). Even if -bestpath is enabled, it will also return zero when | |
| * called on a partial result. Ongoing research into effective | |
| * confidence annotation for partial hypotheses may result in these | |
| * restrictions being lifted in future versions. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return Posterior probability of the best hypothesis. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int32 ps_get_prob(ps_decoder_t *ps); | |
| /** | |
| * Get word lattice. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return Word lattice object containing all hypotheses so far. NULL | |
| * if no hypotheses are available. This pointer is owned by | |
| * the decoder and you should not attempt to free it manually. | |
| * It is only valid until the next utterance, unless you use | |
| * ps_lattice_retain() to retain it. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_lattice_t *ps_get_lattice(ps_decoder_t *ps); | |
| /** | |
| * Get an iterator over the word segmentation for the best hypothesis. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return Iterator over the best hypothesis at this point in | |
| * decoding. NULL if no hypothesis is available. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_seg_t *ps_seg_iter(ps_decoder_t *ps); | |
| /** | |
| * Get the next segment in a word segmentation. | |
| * | |
| * @memberof ps_seg_t | |
| * @param seg Segment iterator. | |
| * @return Updated iterator with the next segment. NULL at end of | |
| * utterance (the iterator will be freed in this case). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_seg_t *ps_seg_next(ps_seg_t *seg); | |
| /** | |
| * Get word string from a segmentation iterator. | |
| * | |
| * @memberof ps_seg_t | |
| * @param seg Segment iterator. | |
| * @return Read-only string giving string name of this segment. This | |
| * is only valid until the next call to ps_seg_next(). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| char const *ps_seg_word(ps_seg_t *seg); | |
| /** | |
| * Get inclusive start and end frames from a segmentation iterator. | |
| * | |
| * @note These frame numbers are inclusive, i.e. the end frame refers | |
| * to the last frame in which the given word or other segment was | |
| * active. Therefore, the actual duration is *out_ef - *out_sf + 1. | |
| * | |
| * @memberof ps_seg_t | |
| * @param seg Segment iterator. | |
| * @param out_sf Output: First frame index in segment. | |
| * @param out_ef Output: Last frame index in segment. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef); | |
| /** | |
| * Get language, acoustic, and posterior probabilities from a | |
| * segmentation iterator. | |
| * | |
| * @note Unless the -bestpath option is enabled, this function will | |
| * always return zero (corresponding to a posterior probability of | |
| * 1.0). Even if -bestpath is enabled, it will also return zero when | |
| * called on a partial result. Ongoing research into effective | |
| * confidence annotation for partial hypotheses may result in these | |
| * restrictions being lifted in future versions. | |
| * | |
| * @memberof ps_seg_t | |
| * @param out_ascr Output: acoustic model score for this segment. | |
| * @param out_lscr Output: language model score for this segment. | |
| * @param out_lback Output: language model backoff mode for this | |
| * segment (i.e. the number of words used in | |
| * calculating lscr). This field is, of course, only | |
| * meaningful for N-Gram models. | |
| * @return Log posterior probability of current segment. Log is | |
| * expressed in the log-base used in the decoder. To convert | |
| * to linear floating-point, use logmath_exp(ps_get_logmath(), | |
| * pprob). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback); | |
| /** | |
| * Finish iterating over a word segmentation early, freeing resources. | |
| * @memberof ps_seg_t | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_seg_free(ps_seg_t *seg); | |
| /** | |
| * Get an iterator over the best hypotheses. The function may also | |
| * return a NULL which means that there is no hypothesis available for this | |
| * utterance. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @return Iterator over N-best hypotheses or NULL if no hypothesis is available | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_nbest_t *ps_nbest(ps_decoder_t *ps); | |
| /** | |
| * Move an N-best list iterator forward. | |
| * | |
| * @memberof ps_nbest_t | |
| * @param nbest N-best iterator. | |
| * @return Updated N-best iterator, or NULL if no more hypotheses are | |
| * available (iterator is freed ni this case). | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_nbest_t *ps_nbest_next(ps_nbest_t *nbest); | |
| /** | |
| * Get the hypothesis string from an N-best list iterator. | |
| * | |
| * @memberof ps_nbest_t | |
| * @param nbest N-best iterator. | |
| * @param out_score Output: Path score for this hypothesis. | |
| * @return String containing next best hypothesis. Note that this | |
| * pointer is only valid for the current iteration. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| char const *ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score); | |
| /** | |
| * Get the word segmentation from an N-best list iterator. | |
| * | |
| * @memberof ps_nbest_t | |
| * @param nbest N-best iterator. | |
| * @return Iterator over the next best hypothesis. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| ps_seg_t *ps_nbest_seg(ps_nbest_t *nbest); | |
| /** | |
| * Finish N-best search early, releasing resources. | |
| * | |
| * @memberof ps_nbest_t | |
| * @param nbest N-best iterator. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_nbest_free(ps_nbest_t *nbest); | |
| /** | |
| * Get performance information for the current utterance. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param out_nspeech Output: Number of seconds of speech. | |
| * @param out_ncpu Output: Number of seconds of CPU time used. | |
| * @param out_nwall Output: Number of seconds of wall time used. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, | |
| double *out_ncpu, double *out_nwall); | |
| /** | |
| * Get overall performance information. | |
| * | |
| * @memberof ps_decoder_t | |
| * @param ps Decoder. | |
| * @param out_nspeech Output: Number of seconds of speech. | |
| * @param out_ncpu Output: Number of seconds of CPU time used. | |
| * @param out_nwall Output: Number of seconds of wall time used. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, | |
| double *out_ncpu, double *out_nwall); | |
| /** | |
| * @mainpage PocketSphinx API Documentation | |
| * @author David Huggins-Daines <dhdaines@gmail.com> | |
| * @version 5.0.0 | |
| * @date October 5, 2022 | |
| * | |
| * @tableofcontents{HTML:1} | |
| * | |
| * @section intro_sec Introduction | |
| * | |
| * This is the documentation for the PocketSphinx speech recognition | |
| * engine. The main API calls are documented in \ref ps_decoder_t and | |
| * \ref ps_config_t. The organization of this document is not optimal | |
| * due to the limitations of Doxygen, so if you know of a better tool | |
| * for documenting object-oriented interfaces in C, please let me know. | |
| * | |
| * @section install_sec Installation | |
| * | |
| * To install from source, you will need a C compiler and a recent | |
| * version of CMake. If you wish to use an integrated development | |
| * environment, Visual Studio Code will automate most of this process | |
| * for you once you have installed C++ and CMake support as described | |
| * at https://code.visualstudio.com/docs/languages/cpp | |
| * | |
| * The easiest way to program PocketSphinx is with the Python module. | |
| * See http://pocketsphinx.readthedocs.io/ for installation and usage | |
| * instructions. | |
| * | |
| * @subsection unix_install Unix-like systems | |
| * | |
| * From the top-level source directory, use CMake to generate a build | |
| * directory: | |
| * | |
| * cmake -S . -B build | |
| * | |
| * Now you can compile and run the tests, and install the code: | |
| * | |
| * cmake --build build | |
| * cmake --build build --target check | |
| * cmake --build build --target install | |
| * | |
| * By default CMake will try to install things in `/usr/local`, which | |
| * you might not have access to. If you want to install somewhere | |
| * else you need to set `CMAKE_INSTALL_PREFIX` *when running cmake for | |
| * the first time*, for example: | |
| * | |
| * cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local | |
| * | |
| * @subsection windows_install Windows | |
| * | |
| * On Windows, the process is similar, but you will need to tell CMake | |
| * what build tool you are using with the `-G` option, and there are | |
| * many of them. The build is known to work with `nmake` but it is | |
| * easiest just to use Visual Studio Code, which should automatically | |
| * detect and offer to run the build when you add the source directory | |
| * to your list of directories. Once built, you will find the EXE | |
| * files in `build\Debug` or `build\Release` depending on your build | |
| * type. | |
| * | |
| * @subsection build_options Compilation options | |
| * | |
| * By default, PocketSphinx does *not* build shared libraries, as | |
| * there are not very many executables, and the library is quite smol. | |
| * If you insist on building them, you can add `BUILD_SHARED_LIBS=ON` | |
| * to the CMake configuration. This is done either in the CMake GUI, | |
| * in Visual Studio Code, or with the `-D` option to the first CMake | |
| * command-line above, e.g.: | |
| * | |
| * cmake -S. -B build -DBUILD_SHARED_LIBS=ON | |
| * | |
| * GStreamer support is not built by default, but can be enabled with | |
| * `BUILD_GSTREAMER=ON`. | |
| * | |
| * PocketSphinx uses a mixture of fixed and floating-point computation | |
| * by default, but can be configured to use fixed-point (nearly) | |
| * exclusively with `FIXED_POINT=ON`. | |
| * | |
| * @section programming_sec Using the Library | |
| * | |
| * Minimally, to do speech recognition, you must first create a | |
| * configuration, using \ref ps_config_t and its associated functions. | |
| * This configuration is then passed to ps_init() to initialize the | |
| * decoder, which is returned as a \ref ps_decoder_t. Note that you must | |
| * ultimately release the configuration with ps_config_free() to avoid | |
| * memory leaks. | |
| * | |
| * At this point, you can start an "utterance" (a section of speech | |
| * you wish to recognize) with ps_start_utt() and pass audio data to | |
| * the decoder with ps_process_raw(). When finished, call | |
| * ps_end_utt() to finalize recognition. The result can then be | |
| * obtained with ps_get_hyp(). To get a detailed word segmentation, | |
| * use ps_seg_iter(). To get the N-best results, use ps_nbest(). | |
| * | |
| * When you no longer need the decoder, release its memory with | |
| * ps_free(). | |
| * | |
| * A concrete example can be found in \ref simple.c. | |
| * | |
| * You may, however, wish to do more interesting things like | |
| * segmenting and recognizing speech from an audio stream. As | |
| * described below, PocketSphinx will *not* handle the details of | |
| * microphone input for you, because doing this in a reliable and | |
| * portable way is outside the scope of a speech recognizer. In | |
| * theory, [PortAudio](http://www.portaudio.com/) should work across | |
| * many platforms. An example using it is in \ref live_portaudio.c. | |
| * | |
| * On Windows, an example of using the [Waveform Audio | |
| * API](https://learn.microsoft.com/en-us/windows/win32/multimedia/waveform-audio) | |
| * can be found in \ref live_win32.c. | |
| * | |
| * On GNU/Linux and some other platforms, audio might be handled by | |
| * the PulseAudio library/server, in which case you can also use the | |
| * technique in \ref live_pulseaudio.c. | |
| * | |
| * Finally, if you have `sox` on your platform, you can simply use the | |
| * method shown in \ref live.c. | |
| * | |
| * @section faq_sec Frequently Asked Questions | |
| * | |
| * @subsection faq_api My code no longer compiles! Why? | |
| * | |
| * Some APIs were intentionally broken by the 5.0.0 release. The most | |
| * likely culprit here is the configuration API, where the old | |
| * "options" which started with a `-` are now "parameters" which do | |
| * not, and instead of a `cmd_ln_t` it is now a `ps_config_t`. There | |
| * is no backward compatibility, you have to change your code | |
| * manually. This is straightforward for the most part. For example, | |
| * instead of writing: | |
| * | |
| * cmdln = cmd_ln_init(NULL, "-samprate", "16000", NULL); | |
| * cmd_ln_set_int32_r(NULL, "-maxwpf", 40); | |
| * | |
| * You should write: | |
| * | |
| * config = ps_config_init(NULL); | |
| * ps_config_set_int(config, "samprate", 16000); | |
| * ps_config_set_int(config, "maxwpf", 40); | |
| * | |
| * Another likely suspect is the \ref pocketsphinx/search.h | |
| * "search module API" where the function names have been changed to be more | |
| * intuitive. Wherever you had `ps_set_search` you can use | |
| * ps_activate_search(), it is the same function. Likewise, anything | |
| * that was `ps_set_*` is now `ps_add_*`, e.g. ps_add_lm(), | |
| * ps_add_fsg(), ps_add_keyphrase(). | |
| * | |
| * @subsection faq_path What does ERROR: "acmod.c, line NN: ..." mean? | |
| * | |
| * In general you will get "Acoustic model definition is not | |
| * specified" or "Folder does not contain acoustic model definition" | |
| * errors if PocketSphinx cannot find a model. If you are trying to | |
| * use the default module, perhaps you have not installed | |
| * PocketSphinx. Unfortunately it is not designed to run "in-place", | |
| * but you can get around this by setting the `POCKETSPHINX_PATH` | |
| * environment variable, e.g. | |
| * | |
| * cmake --build build | |
| * POCKETSPHINX_PATH=$PWD/model build/pocketsphinx single foo.wav | |
| * | |
| * @subsection faq_blank There is literally no output! | |
| * | |
| * If by this you mean it doesn't spew copious logging output like it | |
| * used to, you can solve this by passing `-loglevel INFO` on the | |
| * command-line, or setting the `loglevel` parameter to `"INFO"`, or | |
| * calling err_set_loglevel() with `ERR_INFO`. | |
| * | |
| * If you mean that you just don't have any recognition result, you | |
| * may have forgotten to configure a dictionary. Or see \ref | |
| * faq_error "below" for other reasons the output could be blank. | |
| * | |
| * @subsection faq_audio Why doesn't my audio device work? | |
| * | |
| * Because it's an audio device. They don't work, at least for things | |
| * other than making annoying "beep boop" noises and playing Video | |
| * Games. More generally, I cannot solve this problem for you, | |
| * because every single computer, operating system, sound card, | |
| * microphone, phase of the moon, and day of the week is different | |
| * when it comes to recording audio. That's why I suggest you use | |
| * SoX, because (a) it usually works, and (b) whoever wrote it seems | |
| * to have retired long ago, so you can't bother them. | |
| * | |
| * @subsection faq_error The recognized text is wrong. | |
| * | |
| * That's not a question! But since this isn't Jeopardy, and my name | |
| * is not Watson, I'll try to answer it anyway. Be aware that the | |
| * answer depends on many things, first and foremost what you mean by | |
| * "wrong". | |
| * | |
| * If it *sounds* the same, e.g. "wreck a nice beach" when you said | |
| * "recognize speech" then the issue is that the **language model** is | |
| * not appropriate for the task, domain, dialect, or whatever it is | |
| * you're trying to recognize. You may wish to consider writing a | |
| * JSGF grammar and using it instead of the default language model | |
| * (with the `jsgf` parameter). Or you can get an N-best list or word | |
| * lattice and rescore it with a better language model, such as a | |
| * recurrent neural network or a human being. | |
| * | |
| * If it is total nonsense, or if it is just blank, or if it's the | |
| * same word repeated, e.g. "a a a a a a", then there is likely a | |
| * problem with the input audio. The sampling rate could be wrong, or | |
| * even if it's correct, you may have narrow-band data. Try to look | |
| * at the spectrogram (Audacity can show you this) and see if it looks | |
| * empty or flat below the frequency in the `upperf` parameter. | |
| * Alternately it could just be very noisy. In particular, if the | |
| * noise consists of other people talking, automatic speech | |
| * recognition will nearly always fail. | |
| * | |
| * @subsection faq_tech Why don't you support (pick one or more: WFST, fMLLR, SAT, DNN, CTC, LAS, CNN, RNN, LSTM, etc)? | |
| * | |
| * Not because there's anything wrong with those things (except LAS, | |
| * which is kind of a dumb idea) but simply because PocketSphinx does | |
| * not do them, or anything like them, and there is no point in adding | |
| * them to it when other systems exist. Many of them are also heavily | |
| * dependent on distasteful and wasteful platforms like C++, CUDA, | |
| * TensorFlow, PyTorch, and so on. | |
| * | |
| * @section thanks_sec Acknowledgements | |
| * | |
| * PocketSphinx was originally released by David Huggins-Daines, but | |
| * is largely based on the previous Sphinx-II and Sphinx-III systems, | |
| * developed by a large number of contributors at Carnegie Mellon | |
| * University, and released as open source under a BSD-like license | |
| * thanks to Kevin Lenzo. For some time, it was maintained by | |
| * Nickolay Shmyrev and others at Alpha Cephei, Inc. See the | |
| * `AUTHORS` file for a list of contributors. | |
| */ | |
| } /* extern "C" */ | |