| /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ | |
| /* ==================================================================== | |
| * Copyright (c) 2015 Carnegie Mellon University. All rights | |
| * reserved. | |
| * | |
| * Redistribution and use in source and binary forms, with or without | |
| * modification, are permitted provided that the following conditions | |
| * are met: | |
| * | |
| * 1. Redistributions of source code must retain the above copyright | |
| * notice, this list of conditions and the following disclaimer. | |
| * | |
| * 2. Redistributions in binary form must reproduce the above copyright | |
| * notice, this list of conditions and the following disclaimer in | |
| * the documentation and/or other materials provided with the | |
| * distribution. | |
| * | |
| * This work was supported in part by funding from the Defense Advanced | |
| * Research Projects Agency and the National Science Foundation of the | |
| * United States of America, and the CMU Sphinx Speech Consortium. | |
| * | |
| * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND | |
| * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY | |
| * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| * | |
| * ==================================================================== | |
| * | |
| */ | |
| typedef struct ngram_raw_s { | |
| uint32 *words; /* array of word indexes, length corresponds to ngram order */ | |
| float32 prob; | |
| float32 backoff; | |
| uint32 order; | |
| } ngram_raw_t; | |
| typedef union { | |
| float32 f; | |
| int32 l; | |
| } dmp_weight_t; | |
| /** | |
| * Raw ordered ngrams comparator | |
| */ | |
| int ngram_ord_comparator(const void *a_raw, const void *b_raw); | |
| /** | |
| * Read ngrams of order > 1 from ARPA file | |
| * @param li [in] sphinxbase file line iterator that point to bigram description in ARPA file | |
| * @param wid [in] hashtable that maps string word representation to id | |
| * @param lmath [in] log math used for log convertions | |
| * @param counts [in] amount of ngrams for each order | |
| * @param order [in] maximum order of ngrams | |
| * @return raw ngrams of order bigger than 1 | |
| */ | |
| ngram_raw_t **ngrams_raw_read_arpa(lineiter_t ** li, logmath_t * lmath, | |
| uint32 * counts, int order, | |
| hash_table_t * wid); | |
| /** | |
| * Reads ngrams of order > 1 from DMP file. | |
| * @param fp [in] file to read from. Position in file corresponds to start of bigram description | |
| * @param lmath [in] log math used for log convertions | |
| * @param counts [in] amount of ngrams for each order | |
| * @param order [in] maximum order of ngrams | |
| * @param unigram_next [in] array of next word pointers for unigrams. Needed to define forst word of bigrams | |
| * @param do_swap [in] wether to do swap of bits | |
| * @return raw ngrams of order bigger than 1 | |
| */ | |
| ngram_raw_t **ngrams_raw_read_dmp(FILE * fp, logmath_t * lmath, | |
| uint32 * counts, int order, | |
| uint32 * unigram_next, uint8 do_swap); | |
| void ngrams_raw_free(ngram_raw_t ** raw_ngrams, uint32 * counts, | |
| int order); | |