pocketsphinx / src /lm /lm_trie.h
camenduru's picture
pocketsphinx
5610573
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
* Copyright (c) 2015 Carnegie Mellon University. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* This work was supported in part by funding from the Defense Advanced
* Research Projects Agency and the National Science Foundation of the
* United States of America, and the CMU Sphinx Speech Consortium.
*
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
*
*/
#ifndef __LM_TRIE_H__
#define __LM_TRIE_H__
#include "util/pio.h"
#include "lm/bitarr.h"
#include "lm/ngram_model_internal.h"
#include "lm/lm_trie_quant.h"
typedef struct unigram_s {
float prob;
float bo;
uint32 next;
} unigram_t;
typedef struct node_range_s {
uint32 begin;
uint32 end;
} node_range_t;
typedef struct base_s {
uint8 word_bits;
uint8 total_bits;
uint32 word_mask;
uint8 *base;
uint32 insert_index;
uint32 max_vocab;
} base_t;
typedef struct middle_s {
base_t base;
bitarr_mask_t next_mask;
uint8 quant_bits;
void *next_source;
} middle_t;
typedef struct longest_s {
base_t base;
uint8 quant_bits;
} longest_t;
typedef struct lm_trie_s {
uint8 *ngram_mem; /*<< This appears to be a bitarr.h bit array */
size_t ngram_mem_size;
unigram_t *unigrams;
middle_t *middle_begin;
middle_t *middle_end;
longest_t *longest;
lm_trie_quant_t *quant;
float backoff_cache[NGRAM_MAX_ORDER];
uint32 hist_cache[NGRAM_MAX_ORDER - 1];
} lm_trie_t;
/**
* Creates lm_trie structure. Fills it if binary file with correspondent data is provided
*/
lm_trie_t *lm_trie_create(uint32 unigram_count, int order);
lm_trie_t *lm_trie_read_bin(uint32 * counts, int order, FILE * fp);
void lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp);
void lm_trie_free(lm_trie_t * trie);
void lm_trie_build(lm_trie_t * trie, ngram_raw_t ** raw_ngrams,
uint32 * counts, uint32 *out_counts, int order);
void lm_trie_fill_raw_ngram(lm_trie_t * trie,
ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx,
uint32 * counts, node_range_t range, uint32 * hist,
int n_hist, int order, int max_order);
float lm_trie_score(lm_trie_t * trie, int order, int32 wid, int32 * hist,
int32 n_hist, int32 * n_used);
#endif /* __LM_TRIE_H__ */