| /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ | |
| /* ==================================================================== | |
| * Copyright (c) 1999-2004 Carnegie Mellon University. All rights | |
| * reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions | |
| * are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * | |
| * 2. Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in | |
| * the documentation and/or other materials provided with the | |
| * distribution. | |
| * | |
| * This work was supported in part by funding from the Defense Advanced | |
| * Research Projects Agency and the National Science Foundation of the | |
| * United States of America, and the CMU Sphinx Speech Consortium. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND | |
| * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY | |
| * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| * ==================================================================== | |
| * | |
| */ | |
| /** \file dict.h | |
| * \brief Operations on dictionary. | |
| */ | |
| extern "C" { | |
| } | |
| /** | |
| \struct dictword_t | |
| \brief a structure for one dictionary word. | |
| */ | |
| typedef struct dictword_s { | |
| char *word; /**< Ascii word string */ | |
| s3cipid_t *ciphone; /**< Pronunciation */ | |
| int32 pronlen; /**< Pronunciation length */ | |
| s3wid_t alt; /**< Next alternative pronunciation id, NOT_S3WID if none */ | |
| s3wid_t basewid; /**< Base pronunciation id */ | |
| } dictword_t; | |
| /** | |
| \struct dict_t | |
| \brief a structure for a dictionary. | |
| */ | |
| typedef struct dict_s { | |
| int refcnt; | |
| bin_mdef_t *mdef; /**< Model definition used for phone IDs; NULL if none used */ | |
| dictword_t *word; /**< Array of entries in dictionary */ | |
| hash_table_t *ht; /**< Hash table for mapping word strings to word ids */ | |
| int32 max_words; /**< #Entries allocated in dict, including empty slots */ | |
| int32 n_word; /**< #Occupied entries in dict; ie, excluding empty slots */ | |
| int32 filler_start; /**< First filler word id (read from filler dict) */ | |
| int32 filler_end; /**< Last filler word id (read from filler dict) */ | |
| s3wid_t startwid; /**< FOR INTERNAL-USE ONLY */ | |
| s3wid_t finishwid; /**< FOR INTERNAL-USE ONLY */ | |
| s3wid_t silwid; /**< FOR INTERNAL-USE ONLY */ | |
| int nocase; | |
| } dict_t; | |
| /** | |
| * Initialize a new dictionary. | |
| * | |
| * If config and mdef are supplied, then the dictionary will be read | |
| * from the files specified by the -dict and -fdict options in config, | |
| * with case sensitivity determined by the -dictcase option. | |
| * | |
| * Otherwise an empty case-sensitive dictionary will be created. | |
| * | |
| * Return ptr to dict_t if successful, NULL otherwise. | |
| */ | |
| dict_t *dict_init(ps_config_t *config, /**< Configuration (-dict, -fdict, -dictcase) or NULL */ | |
| bin_mdef_t *mdef /**< For looking up CI phone IDs (or NULL) */ | |
| ); | |
| /** | |
| * Write dictionary to a file. | |
| */ | |
| int dict_write(dict_t *dict, char const *filename, char const *format); | |
| /** Return word id for given word string if present. Otherwise return BAD_S3WID */ | |
| POCKETSPHINX_EXPORT | |
| s3wid_t dict_wordid(dict_t *d, const char *word); | |
| /** | |
| * Return 1 if w is a filler word, 0 if not. A filler word is one that was read in from the | |
| * filler dictionary; however, sentence START and FINISH words are not filler words. | |
| */ | |
| int dict_filler_word(dict_t *d, /**< The dictionary structure */ | |
| s3wid_t w /**< The word ID */ | |
| ); | |
| /** | |
| * Test if w is a "real" word, i.e. neither a filler word nor START/FINISH. | |
| */ | |
| POCKETSPHINX_EXPORT | |
| int dict_real_word(dict_t *d, /**< The dictionary structure */ | |
| s3wid_t w /**< The word ID */ | |
| ); | |
| /** | |
| * Add a word with the given ciphone pronunciation list to the dictionary. | |
| * Return value: Result word id if successful, BAD_S3WID otherwise | |
| */ | |
| s3wid_t dict_add_word(dict_t *d, /**< The dictionary structure. */ | |
| char const *word, /**< The word. */ | |
| s3cipid_t const *p, /**< The pronunciation. */ | |
| int32 np /**< Number of phones. */ | |
| ); | |
| /** | |
| * Return value: CI phone string for the given word, phone position. | |
| */ | |
| const char *dict_ciphone_str(dict_t *d, /**< In: Dictionary to look up */ | |
| s3wid_t wid, /**< In: Component word being looked up */ | |
| int32 pos /**< In: Pronunciation phone position */ | |
| ); | |
| /** Packaged macro access to dictionary members */ | |
| /** | |
| * Number of "real words" in the dictionary. | |
| * | |
| * This is the number of words that are not fillers, <s>, or </s>. | |
| */ | |
| /* Hard-coded special words */ | |
| /** | |
| * If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative | |
| * pronunciation specification), strip that trailing portion from it. Note that the given | |
| * string is modified. | |
| * Return value: If string was modified, the character position at which the original string | |
| * was truncated; otherwise -1. | |
| */ | |
| int32 dict_word2basestr(char *word); | |
| /** | |
| * Retain a pointer to an dict_t. | |
| */ | |
| dict_t *dict_retain(dict_t *d); | |
| /** | |
| * Release a pointer to a dictionary. | |
| */ | |
| int dict_free(dict_t *d); | |
| /** Report a dictionary structure */ | |
| void dict_report(dict_t *d /**< A dictionary structure */ | |
| ); | |
| } /* extern "C" */ | |