| /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ | |
| /* ==================================================================== | |
| * Copyright (c) 1999-2013 Carnegie Mellon University. All rights | |
| * reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions | |
| * are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * | |
| * 2. Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in | |
| * the documentation and/or other materials provided with the | |
| * distribution. | |
| * | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND | |
| * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY | |
| * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| * ==================================================================== | |
| * | |
| */ | |
| /* | |
| * fsg_lextree.h -- The collection of all the lextrees for the entire FSM. | |
| * | |
| */ | |
| extern "C" { | |
| } | |
| /* | |
| * Compile-time constant determining the size of the | |
| * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.) | |
| * But it makes memory allocation simpler and more efficient. | |
| * Make it smaller (2) to save memory if your phoneset has less than | |
| * 64 phones. | |
| */ | |
| typedef struct fsg_pnode_ctxt_s { | |
| uint32 bv[FSG_PNODE_CTXT_BVSZ]; | |
| } fsg_pnode_ctxt_t; | |
| /* | |
| * All transitions (words) out of any given FSG state represented are by a | |
| * phonetic prefix lextree (except for epsilon or null transitions; they | |
| * are not part of the lextree). Lextree leaf nodes represent individual | |
| * FSG transitions, so no sharing is allowed at the leaf nodes. The FSG | |
| * transition probs are distributed along the lextree: the prob at a node | |
| * is the max of the probs of all leaf nodes (and, hence, FSG transitions) | |
| * reachable from that node. | |
| * | |
| * To conserve memory, the underlying HMMs with state-level information are | |
| * allocated only as needed. Root and leaf nodes must also account for all | |
| * the possible phonetic contexts, with an independent HMM for each distinct | |
| * context. | |
| */ | |
| typedef struct fsg_pnode_s { | |
| /* | |
| * If this is not a leaf node, the first successor (child) node. Otherwise | |
| * the parent FSG transition for which this is the leaf node (for figuring | |
| * the FSG destination state, and word emitted by the transition). A node | |
| * may have several children. The succ ptr gives just the first; the rest | |
| * are linked via the sibling ptr below. | |
| */ | |
| union { | |
| struct fsg_pnode_s *succ; | |
| fsg_link_t *fsglink; | |
| } next; | |
| /* | |
| * For simplicity of memory management (i.e., freeing the pnodes), all | |
| * pnodes allocated for all transitions out of a state are maintained in a | |
| * linear linked list through the alloc_next pointer. | |
| */ | |
| struct fsg_pnode_s *alloc_next; | |
| /* | |
| * The next node that is also a child of the parent of this node; NULL if | |
| * none. | |
| */ | |
| struct fsg_pnode_s *sibling; | |
| /* | |
| * The transition (log) probability to be incurred upon transitioning to | |
| * this node. (Transition probabilities are really associated with the | |
| * transitions. But a lextree node has exactly one incoming transition. | |
| * Hence, the prob can be associated with the node.) | |
| * This is a logs2(prob) value, and includes the language weight. | |
| */ | |
| int32 logs2prob; | |
| /* | |
| * The root and leaf positions associated with any transition have to deal | |
| * with multiple phonetic contexts. However, different contexts may result | |
| * in the same SSID (senone-seq ID), and can share a single pnode with that | |
| * SSID. But the pnode should track the set of context CI phones that share | |
| * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For | |
| * simplicity of implementation, its size is a compile-time constant for | |
| * now.) Single phone words would need a 2-D array of context, but that's | |
| * too expensive. For now, they simply use SIL as right context, so only | |
| * the left context is properly modelled. | |
| * (For word-internal phones, this field is unused, of course.) | |
| */ | |
| fsg_pnode_ctxt_t ctxt; | |
| uint16 ci_ext; /* This node's CIphone as viewed externally (context) */ | |
| uint8 ppos; /* Phoneme position in pronunciation */ | |
| uint8 leaf; /* Whether this is a leaf node */ | |
| /* HMM-state-level stuff here */ | |
| hmm_context_t *ctx; | |
| hmm_t hmm; | |
| } fsg_pnode_t; | |
| /* Access macros */ | |
| /* | |
| * The following is macroized because its called very frequently | |
| * ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); | |
| */ | |
| /* | |
| * Subtract bitvector sub from bitvector src (src updated with the result). | |
| * Return 0 if result is all 0, non-zero otherwise. | |
| */ | |
| /** | |
| * Collection of lextrees for an FSG. | |
| */ | |
| typedef struct fsg_lextree_s { | |
| fsg_model_t *fsg; /**< The fsg for which this lextree is built. */ | |
| hmm_context_t *ctx; /**< HMM context structure. */ | |
| dict_t *dict; /**< Pronunciation dictionary for this FSG. */ | |
| dict2pid_t *d2p; /**< Context-dependent phone mappings for this FSG. */ | |
| bin_mdef_t *mdef; /**< Model definition (triphone mappings). */ | |
| /* | |
| * Left and right CIphone sets for each state. | |
| * Left context CIphones for a state S: If word W transitions into S, W's | |
| * final CIphone is in S's {lc}. Words transitioning out of S must consider | |
| * these left context CIphones. | |
| * Similarly, right contexts for state S: If word W transitions out of S, | |
| * W's first CIphone is in S's {rc}. Words transitioning into S must consider | |
| * these right contexts. | |
| * | |
| * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate | |
| * null transitions. | |
| * NOTE: Single-phone words are difficult; only SILENCE right context is | |
| * modelled for them. | |
| * NOTE: Non-silence filler phones aren't included in these sets. Filler | |
| * words don't use context, and present the SILENCE phone as context to | |
| * adjacent words. | |
| */ | |
| int16 **lc; /**< Left context triphone mappings for FSG. */ | |
| int16 **rc; /**< Right context triphone mappings for FSG. */ | |
| fsg_pnode_t **root; /* root[s] = lextree representing all transitions | |
| out of state s. Note that the "tree" for each | |
| state is actually a collection of trees, linked | |
| via fsg_pnode_t.sibling (root[s]->sibling) */ | |
| fsg_pnode_t **alloc_head; /* alloc_head[s] = head of linear list of all | |
| pnodes allocated for state s */ | |
| int32 n_pnode; /* #HMM nodes in search structure */ | |
| int32 wip; | |
| int32 pip; | |
| } fsg_lextree_t; | |
| /* Access macros */ | |
| /** | |
| * Create, initialize, and return a new phonetic lextree for the given FSG. | |
| */ | |
| fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, | |
| dict2pid_t *d2p, | |
| bin_mdef_t *mdef, hmm_context_t *ctx, | |
| int32 wip, int32 pip); | |
| /** | |
| * Free lextrees for an FSG. | |
| */ | |
| void fsg_lextree_free(fsg_lextree_t *fsg); | |
| /** | |
| * Print an FSG lextree to a file for debugging. | |
| */ | |
| void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh); | |
| /** | |
| * Mark the given pnode as inactive (for search). | |
| */ | |
| void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode); | |
| /** | |
| * Set all flags on in the given context bitvector. | |
| */ | |
| void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt); | |
| /** | |
| * Generic variant for arbitrary size | |
| */ | |
| uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); | |
| } /* extern "C" */ | |