| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | #include <string.h> |
| | #include <assert.h> |
| |
|
| | #include <pocketsphinx.h> |
| |
|
| | #include "util/ckd_alloc.h" |
| | #include "util/listelem_alloc.h" |
| | #include "ngram_search.h" |
| | #include "ps_lattice_internal.h" |
| |
|
| | |
| | #define __CHAN_DUMP__ 0 |
| | #if __CHAN_DUMP__ |
| | #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr) |
| | #else |
| | #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm) |
| | #endif |
| |
|
| | static void |
| | ngram_fwdflat_expand_all(ngram_search_t *ngs) |
| | { |
| | int n_words, i; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | ngs->n_expand_words = 0; |
| | n_words = ps_search_n_words(ngs); |
| | bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs)); |
| | for (i = 0; i < n_words; ++i) { |
| | if (!ngram_model_set_known_wid(ngs->lmset, |
| | dict_basewid(ps_search_dict(ngs),i))) |
| | continue; |
| | ngs->fwdflat_wordlist[ngs->n_expand_words] = i; |
| | ngs->expand_word_list[ngs->n_expand_words] = i; |
| | bitvec_set(ngs->expand_word_flag, i); |
| | ngs->n_expand_words++; |
| | } |
| | E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words); |
| | ngs->expand_word_list[ngs->n_expand_words] = -1; |
| | ngs->fwdflat_wordlist[ngs->n_expand_words] = -1; |
| | } |
| |
|
| | static void |
| | ngram_fwdflat_allocate_1ph(ngram_search_t *ngs) |
| | { |
| | dict_t *dict = ps_search_dict(ngs); |
| | int n_words = ps_search_n_words(ngs); |
| | int i, w; |
| |
|
| | |
| | |
| | ngs->n_1ph_words = 0; |
| | for (w = 0; w < n_words; w++) { |
| | if (dict_is_single_phone(dict, w)) |
| | ++ngs->n_1ph_words; |
| | } |
| | ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words, |
| | sizeof(*ngs->single_phone_wid)); |
| | ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph)); |
| | i = 0; |
| | for (w = 0; w < n_words; w++) { |
| | if (!dict_is_single_phone(dict, w)) |
| | continue; |
| |
|
| | |
| | ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w); |
| | ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef); |
| | hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE, |
| | bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, |
| | ngs->rhmm_1ph[i].ciphone), |
| | bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, |
| | ngs->rhmm_1ph[i].ciphone)); |
| | ngs->rhmm_1ph[i].next = NULL; |
| | ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]); |
| | ngs->single_phone_wid[i] = w; |
| | i++; |
| | } |
| | } |
| |
|
| | static void |
| | ngram_fwdflat_free_1ph(ngram_search_t *ngs) |
| | { |
| | int i, w; |
| | int n_words = ps_search_n_words(ngs); |
| |
|
| | for (i = w = 0; w < n_words; ++w) { |
| | if (!dict_is_single_phone(ps_search_dict(ngs), w)) |
| | continue; |
| | hmm_deinit(&ngs->rhmm_1ph[i].hmm); |
| | ++i; |
| | } |
| | ckd_free(ngs->rhmm_1ph); |
| | ngs->rhmm_1ph = NULL; |
| | ckd_free(ngs->single_phone_wid); |
| | } |
| |
|
| | void |
| | ngram_fwdflat_init(ngram_search_t *ngs) |
| | { |
| | int n_words; |
| |
|
| | n_words = ps_search_n_words(ngs); |
| | ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist)); |
| | ngs->expand_word_flag = bitvec_alloc(n_words); |
| | ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list)); |
| | ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist)); |
| | ngs->min_ef_width = ps_config_int(ps_search_config(ngs), "fwdflatefwid"); |
| | ngs->max_sf_win = ps_config_int(ps_search_config(ngs), "fwdflatsfwin"); |
| | E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n", |
| | ngs->min_ef_width, ngs->max_sf_win); |
| |
|
| | |
| | if (!ngs->fwdtree) { |
| | |
| | ngram_fwdflat_expand_all(ngs); |
| | |
| | ngram_fwdflat_allocate_1ph(ngs); |
| | } |
| | } |
| |
|
| | void |
| | ngram_fwdflat_deinit(ngram_search_t *ngs) |
| | { |
| | double n_speech = (double)ngs->n_tot_frame |
| | / ps_config_int(ps_search_config(ngs), "frate"); |
| |
|
| | E_INFO("TOTAL fwdflat %.2f CPU %.3f xRT\n", |
| | ngs->fwdflat_perf.t_tot_cpu, |
| | ngs->fwdflat_perf.t_tot_cpu / n_speech); |
| | E_INFO("TOTAL fwdflat %.2f wall %.3f xRT\n", |
| | ngs->fwdflat_perf.t_tot_elapsed, |
| | ngs->fwdflat_perf.t_tot_elapsed / n_speech); |
| |
|
| | |
| | if (!ngs->fwdtree) { |
| | ngram_fwdflat_free_1ph(ngs); |
| | } |
| | ckd_free(ngs->fwdflat_wordlist); |
| | bitvec_free(ngs->expand_word_flag); |
| | ckd_free(ngs->expand_word_list); |
| | ckd_free(ngs->frm_wordlist); |
| | } |
| |
|
| | int |
| | ngram_fwdflat_reinit(ngram_search_t *ngs) |
| | { |
| | |
| | int n_words; |
| |
|
| | ckd_free(ngs->fwdflat_wordlist); |
| | ckd_free(ngs->expand_word_list); |
| | bitvec_free(ngs->expand_word_flag); |
| | n_words = ps_search_n_words(ngs); |
| | ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist)); |
| | ngs->expand_word_flag = bitvec_alloc(n_words); |
| | ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list)); |
| | |
| | |
| | if (!ngs->fwdtree) { |
| | |
| | ngram_fwdflat_free_1ph(ngs); |
| | |
| | ckd_free(ngs->word_chan); |
| | ngs->word_chan = ckd_calloc(dict_size(ps_search_dict(ngs)), |
| | sizeof(*ngs->word_chan)); |
| | |
| | ngram_fwdflat_expand_all(ngs); |
| | |
| | ngram_fwdflat_allocate_1ph(ngs); |
| | } |
| | |
| | |
| | return 0; |
| | } |
| |
|
| | |
| | |
| | |
| | static void |
| | build_fwdflat_wordlist(ngram_search_t *ngs) |
| | { |
| | int32 i, f, sf, ef, wid, nwd; |
| | bptbl_t *bp; |
| | ps_latnode_t *node, *prevnode, *nextnode; |
| |
|
| | |
| | if (!ngs->fwdtree) |
| | return; |
| |
|
| | memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist)); |
| |
|
| | |
| | |
| | for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) { |
| | sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1; |
| | ef = bp->frame; |
| | wid = bp->wid; |
| |
|
| | |
| | |
| | if (!ngram_model_set_known_wid(ngs->lmset, |
| | dict_basewid(ps_search_dict(ngs), wid))) |
| | continue; |
| |
|
| | |
| | for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid); |
| | node = node->next); |
| |
|
| | |
| | if (node) |
| | node->lef = ef; |
| | else { |
| | |
| | node = listelem_malloc(ngs->latnode_alloc); |
| | node->wid = wid; |
| | node->fef = node->lef = ef; |
| |
|
| | node->next = ngs->frm_wordlist[sf]; |
| | ngs->frm_wordlist[sf] = node; |
| | } |
| | } |
| |
|
| | |
| | for (f = 0; f < ngs->n_frame; f++) { |
| | prevnode = NULL; |
| | for (node = ngs->frm_wordlist[f]; node; node = nextnode) { |
| | nextnode = node->next; |
| | |
| | if ((node->lef - node->fef < ngs->min_ef_width) || |
| | |
| | ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) { |
| | if (!prevnode) |
| | ngs->frm_wordlist[f] = nextnode; |
| | else |
| | prevnode->next = nextnode; |
| | listelem_free(ngs->latnode_alloc, node); |
| | } |
| | else |
| | prevnode = node; |
| | } |
| | } |
| |
|
| | |
| | nwd = 0; |
| | bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); |
| | for (f = 0; f < ngs->n_frame; f++) { |
| | for (node = ngs->frm_wordlist[f]; node; node = node->next) { |
| | if (!bitvec_is_set(ngs->word_active, node->wid)) { |
| | bitvec_set(ngs->word_active, node->wid); |
| | ngs->fwdflat_wordlist[nwd++] = node->wid; |
| | } |
| | } |
| | } |
| | ngs->fwdflat_wordlist[nwd] = -1; |
| | E_INFO("Utterance vocabulary contains %d words\n", nwd); |
| | } |
| |
|
| | |
| | |
| | |
| | static void |
| | build_fwdflat_chan(ngram_search_t *ngs) |
| | { |
| | int32 i, wid, p; |
| | root_chan_t *rhmm; |
| | chan_t *hmm, *prevhmm; |
| | dict_t *dict; |
| | dict2pid_t *d2p; |
| |
|
| | dict = ps_search_dict(ngs); |
| | d2p = ps_search_dict2pid(ngs); |
| |
|
| | |
| | for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { |
| | wid = ngs->fwdflat_wordlist[i]; |
| |
|
| | |
| | if (dict_is_single_phone(dict, wid)) |
| | continue; |
| |
|
| | assert(ngs->word_chan[wid] == NULL); |
| |
|
| | |
| | |
| | |
| | rhmm = listelem_malloc(ngs->root_chan_alloc); |
| | rhmm->ci2phone = dict_second_phone(dict, wid); |
| | rhmm->ciphone = dict_first_phone(dict, wid); |
| | rhmm->next = NULL; |
| | hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE, |
| | bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone), |
| | bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone)); |
| |
|
| | |
| | prevhmm = NULL; |
| | for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) { |
| | hmm = listelem_malloc(ngs->chan_alloc); |
| | hmm->ciphone = dict_pron(dict, wid, p); |
| | hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1; |
| | hmm->next = NULL; |
| | hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, |
| | dict2pid_internal(d2p,wid,p), |
| | bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone)); |
| |
|
| | if (prevhmm) |
| | prevhmm->next = hmm; |
| | else |
| | rhmm->next = hmm; |
| |
|
| | prevhmm = hmm; |
| | } |
| |
|
| | |
| | ngram_search_alloc_all_rc(ngs, wid); |
| |
|
| | |
| | if (prevhmm) |
| | prevhmm->next = ngs->word_chan[wid]; |
| | else |
| | rhmm->next = ngs->word_chan[wid]; |
| | ngs->word_chan[wid] = (chan_t *) rhmm; |
| | } |
| |
|
| | } |
| |
|
| | void |
| | ngram_fwdflat_start(ngram_search_t *ngs) |
| | { |
| | root_chan_t *rhmm; |
| | int i; |
| |
|
| | ptmr_reset(&ngs->fwdflat_perf); |
| | ptmr_start(&ngs->fwdflat_perf); |
| | build_fwdflat_wordlist(ngs); |
| | build_fwdflat_chan(ngs); |
| |
|
| | ngs->bpidx = 0; |
| | ngs->bss_head = 0; |
| |
|
| | for (i = 0; i < ps_search_n_words(ngs); i++) |
| | ngs->word_lat_idx[i] = NO_BP; |
| |
|
| | |
| | |
| | for (i = 0; i < ngs->n_1ph_words; i++) { |
| | int32 w = ngs->single_phone_wid[i]; |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | hmm_clear(&rhmm->hmm); |
| | } |
| |
|
| | |
| | rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)]; |
| | hmm_enter(&rhmm->hmm, 0, NO_BP, 0); |
| | ngs->active_word_list[0][0] = ps_search_start_wid(ngs); |
| | ngs->n_active_word[0] = 1; |
| |
|
| | ngs->best_score = 0; |
| | ngs->renormalized = FALSE; |
| |
|
| | for (i = 0; i < ps_search_n_words(ngs); i++) |
| | ngs->last_ltrans[i].sf = -1; |
| |
|
| | if (!ngs->fwdtree) |
| | ngs->n_frame = 0; |
| |
|
| | ngs->st.n_fwdflat_chan = 0; |
| | ngs->st.n_fwdflat_words = 0; |
| | ngs->st.n_fwdflat_word_transition = 0; |
| | ngs->st.n_senone_active_utt = 0; |
| | } |
| |
|
| | static void |
| | compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx) |
| | { |
| | int32 i, nw, w; |
| | int32 *awl; |
| | root_chan_t *rhmm; |
| | chan_t *hmm; |
| |
|
| | acmod_clear_active(ps_search_acmod(ngs)); |
| |
|
| | nw = ngs->n_active_word[frame_idx & 0x1]; |
| | awl = ngs->active_word_list[frame_idx & 0x1]; |
| |
|
| | for (i = 0; i < nw; i++) { |
| | w = *(awl++); |
| | rhmm = (root_chan_t *)ngs->word_chan[w]; |
| | if (hmm_frame(&rhmm->hmm) == frame_idx) { |
| | acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm); |
| | } |
| |
|
| | for (hmm = rhmm->next; hmm; hmm = hmm->next) { |
| | if (hmm_frame(&hmm->hmm) == frame_idx) { |
| | acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | static void |
| | fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx) |
| | { |
| | int32 i, w, nw, bestscore; |
| | int32 *awl; |
| | root_chan_t *rhmm; |
| | chan_t *hmm; |
| |
|
| | nw = ngs->n_active_word[frame_idx & 0x1]; |
| | awl = ngs->active_word_list[frame_idx & 0x1]; |
| | bestscore = WORST_SCORE; |
| |
|
| | ngs->st.n_fwdflat_words += nw; |
| |
|
| | |
| | for (i = 0; i < nw; i++) { |
| | w = *(awl++); |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | if (hmm_frame(&rhmm->hmm) == frame_idx) { |
| | int32 score = chan_v_eval(rhmm); |
| | if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs))) |
| | bestscore = score; |
| | ngs->st.n_fwdflat_chan++; |
| | } |
| |
|
| | for (hmm = rhmm->next; hmm; hmm = hmm->next) { |
| | if (hmm_frame(&hmm->hmm) == frame_idx) { |
| | int32 score = chan_v_eval(hmm); |
| | if (score BETTER_THAN bestscore) |
| | bestscore = score; |
| | ngs->st.n_fwdflat_chan++; |
| | } |
| | } |
| | } |
| |
|
| | ngs->best_score = bestscore; |
| | } |
| |
|
| | static void |
| | fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx) |
| | { |
| | int32 i, nw, cf, nf, w, pip, newscore, thresh, wordthresh; |
| | int32 *awl; |
| | root_chan_t *rhmm; |
| | chan_t *hmm, *nexthmm; |
| |
|
| | cf = frame_idx; |
| | nf = cf + 1; |
| | nw = ngs->n_active_word[cf & 0x1]; |
| | awl = ngs->active_word_list[cf & 0x1]; |
| | bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); |
| |
|
| | thresh = ngs->best_score + ngs->fwdflatbeam; |
| | wordthresh = ngs->best_score + ngs->fwdflatwbeam; |
| | pip = ngs->pip; |
| | E_DEBUG("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh); |
| |
|
| | |
| | for (i = 0; i < nw; i++) { |
| | w = *(awl++); |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | |
| | if (hmm_frame(&rhmm->hmm) == cf |
| | && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) { |
| | hmm_frame(&rhmm->hmm) = nf; |
| | bitvec_set(ngs->word_active, w); |
| |
|
| | |
| | newscore = hmm_out_score(&rhmm->hmm); |
| | if (rhmm->next) { |
| | assert(!dict_is_single_phone(ps_search_dict(ngs), w)); |
| |
|
| | newscore += pip; |
| | if (newscore BETTER_THAN thresh) { |
| | hmm = rhmm->next; |
| | |
| | if (hmm->info.rc_id >= 0) { |
| | for (; hmm; hmm = hmm->next) { |
| | if ((hmm_frame(&hmm->hmm) < cf) |
| | || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) { |
| | hmm_enter(&hmm->hmm, newscore, |
| | hmm_out_history(&rhmm->hmm), nf); |
| | } |
| | } |
| | } |
| | |
| | else { |
| | if ((hmm_frame(&hmm->hmm) < cf) |
| | || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) { |
| | hmm_enter(&hmm->hmm, newscore, |
| | hmm_out_history(&rhmm->hmm), nf); |
| | } |
| | } |
| | } |
| | } |
| | else { |
| | assert(dict_is_single_phone(ps_search_dict(ngs), w)); |
| |
|
| | |
| | |
| | |
| | |
| | if (newscore BETTER_THAN wordthresh) { |
| | ngram_search_save_bp(ngs, cf, w, newscore, |
| | hmm_out_history(&rhmm->hmm), 0); |
| | } |
| | } |
| | } |
| |
|
| | |
| | for (hmm = rhmm->next; hmm; hmm = hmm->next) { |
| | if (hmm_frame(&hmm->hmm) >= cf) { |
| | |
| | if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) { |
| | hmm_frame(&hmm->hmm) = nf; |
| | bitvec_set(ngs->word_active, w); |
| |
|
| | newscore = hmm_out_score(&hmm->hmm); |
| | |
| | if (hmm->info.rc_id < 0) { |
| | newscore += pip; |
| | if (newscore BETTER_THAN thresh) { |
| | nexthmm = hmm->next; |
| | |
| | if (nexthmm->info.rc_id >= 0) { |
| | for (; nexthmm; nexthmm = nexthmm->next) { |
| | if ((hmm_frame(&nexthmm->hmm) < cf) |
| | || (newscore BETTER_THAN |
| | hmm_in_score(&nexthmm->hmm))) { |
| | hmm_enter(&nexthmm->hmm, |
| | newscore, |
| | hmm_out_history(&hmm->hmm), |
| | nf); |
| | } |
| | } |
| | } |
| | |
| | else { |
| | if ((hmm_frame(&nexthmm->hmm) < cf) |
| | || (newscore BETTER_THAN |
| | hmm_in_score(&nexthmm->hmm))) { |
| | hmm_enter(&nexthmm->hmm, newscore, |
| | hmm_out_history(&hmm->hmm), nf); |
| | } |
| | } |
| | } |
| | } |
| | |
| | else { |
| | if (newscore BETTER_THAN wordthresh) { |
| | ngram_search_save_bp(ngs, cf, w, newscore, |
| | hmm_out_history(&hmm->hmm), |
| | hmm->info.rc_id); |
| | } |
| | } |
| | } |
| | |
| | else if (hmm_frame(&hmm->hmm) != nf) { |
| | hmm_clear_scores(&hmm->hmm); |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | static void |
| | get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win) |
| | { |
| | int32 f, sf, ef; |
| | ps_latnode_t *node; |
| |
|
| | if (!ngs->fwdtree) { |
| | ngs->st.n_fwdflat_word_transition += ngs->n_expand_words; |
| | return; |
| | } |
| |
|
| | sf = frm - win; |
| | if (sf < 0) |
| | sf = 0; |
| | ef = frm + win; |
| | if (ef > ngs->n_frame) |
| | ef = ngs->n_frame; |
| |
|
| | bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs)); |
| | ngs->n_expand_words = 0; |
| |
|
| | for (f = sf; f < ef; f++) { |
| | for (node = ngs->frm_wordlist[f]; node; node = node->next) { |
| | if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) { |
| | ngs->expand_word_list[ngs->n_expand_words++] = node->wid; |
| | bitvec_set(ngs->expand_word_flag, node->wid); |
| | } |
| | } |
| | } |
| | ngs->expand_word_list[ngs->n_expand_words] = -1; |
| | ngs->st.n_fwdflat_word_transition += ngs->n_expand_words; |
| | } |
| |
|
| | static void |
| | fwdflat_word_transition(ngram_search_t *ngs, int frame_idx) |
| | { |
| | int32 cf, nf, b, thresh, pip, i, nw, w, newscore; |
| | int32 best_silrc_score = 0, best_silrc_bp = 0; |
| | bptbl_t *bp; |
| | int32 *rcss; |
| | root_chan_t *rhmm; |
| | int32 *awl; |
| | float32 lwf; |
| | dict_t *dict = ps_search_dict(ngs); |
| | dict2pid_t *d2p = ps_search_dict2pid(ngs); |
| |
|
| | cf = frame_idx; |
| | nf = cf + 1; |
| | thresh = ngs->best_score + ngs->fwdflatbeam; |
| | pip = ngs->pip; |
| | best_silrc_score = WORST_SCORE; |
| | lwf = ngs->fwdflat_fwdtree_lw_ratio; |
| |
|
| | |
| | |
| | get_expand_wordlist(ngs, cf, ngs->max_sf_win); |
| |
|
| | |
| | for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) { |
| | xwdssid_t *rssid; |
| | int32 silscore; |
| |
|
| | bp = ngs->bp_table + b; |
| | ngs->word_lat_idx[bp->wid] = NO_BP; |
| |
|
| | if (bp->wid == ps_search_finish_wid(ngs)) |
| | continue; |
| |
|
| | |
| | |
| | |
| | rcss = ngs->bscore_stack + bp->s_idx; |
| | if (bp->last2_phone == -1) |
| | rssid = NULL; |
| | else |
| | rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone); |
| |
|
| | |
| | for (i = 0; ngs->expand_word_list[i] >= 0; i++) { |
| | int32 n_used; |
| |
|
| | w = ngs->expand_word_list[i]; |
| |
|
| | |
| | |
| | if (rssid) |
| | newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]]; |
| | else |
| | newscore = bp->score; |
| | if (newscore == WORST_SCORE) |
| | continue; |
| | |
| | newscore += lwf |
| | * (ngram_tg_score(ngs->lmset, |
| | dict_basewid(dict, w), |
| | bp->real_wid, |
| | bp->prev_real_wid, |
| | &n_used) >> SENSCR_SHIFT); |
| | newscore += pip; |
| |
|
| | |
| | if (newscore BETTER_THAN thresh) { |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | if ((hmm_frame(&rhmm->hmm) < cf) |
| | || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { |
| | hmm_enter(&rhmm->hmm, newscore, b, nf); |
| | |
| | |
| | hmm_mpx_ssid(&rhmm->hmm, 0) = |
| | dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, |
| | dict_last_phone(dict, bp->wid)); |
| | assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0))); |
| | E_DEBUG("ssid %d(%d,%d) = %d\n", |
| | rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone, |
| | hmm_mpx_ssid(&rhmm->hmm, 0)); |
| | bitvec_set(ngs->word_active, w); |
| | } |
| | } |
| | } |
| |
|
| | |
| | if (rssid) |
| | silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]]; |
| | else |
| | silscore = bp->score; |
| | if (silscore BETTER_THAN best_silrc_score) { |
| | best_silrc_score = silscore; |
| | best_silrc_bp = b; |
| | } |
| | } |
| |
|
| | |
| | newscore = best_silrc_score + ngs->silpen + pip; |
| | if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { |
| | w = ps_search_silence_wid(ngs); |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | if ((hmm_frame(&rhmm->hmm) < cf) |
| | || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { |
| | hmm_enter(&rhmm->hmm, newscore, |
| | best_silrc_bp, nf); |
| | bitvec_set(ngs->word_active, w); |
| | } |
| | } |
| | |
| | newscore = best_silrc_score + ngs->fillpen + pip; |
| | if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { |
| | for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) { |
| | if (w == ps_search_silence_wid(ngs)) |
| | continue; |
| |
|
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | |
| | if (rhmm == NULL) |
| | continue; |
| | if ((hmm_frame(&rhmm->hmm) < cf) |
| | || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { |
| | hmm_enter(&rhmm->hmm, newscore, |
| | best_silrc_bp, nf); |
| | bitvec_set(ngs->word_active, w); |
| | } |
| | } |
| | } |
| |
|
| | |
| | nw = ngs->n_active_word[cf & 0x1]; |
| | awl = ngs->active_word_list[cf & 0x1]; |
| | for (i = 0; i < nw; i++) { |
| | w = *(awl++); |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | if (hmm_frame(&rhmm->hmm) == cf) { |
| | hmm_clear_scores(&rhmm->hmm); |
| | } |
| | } |
| | } |
| |
|
| | static void |
| | fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm) |
| | { |
| | root_chan_t *rhmm; |
| | chan_t *hmm; |
| | int32 i, nw, cf, w, *awl; |
| |
|
| | cf = frame_idx; |
| |
|
| | |
| | nw = ngs->n_active_word[cf & 0x1]; |
| | awl = ngs->active_word_list[cf & 0x1]; |
| | for (i = 0; i < nw; i++) { |
| | w = *(awl++); |
| | rhmm = (root_chan_t *) ngs->word_chan[w]; |
| | if (hmm_frame(&rhmm->hmm) == cf) { |
| | hmm_normalize(&rhmm->hmm, norm); |
| | } |
| | for (hmm = rhmm->next; hmm; hmm = hmm->next) { |
| | if (hmm_frame(&hmm->hmm) == cf) { |
| | hmm_normalize(&hmm->hmm, norm); |
| | } |
| | } |
| | } |
| |
|
| | ngs->renormalized = TRUE; |
| | } |
| |
|
| | int |
| | ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx) |
| | { |
| | int16 const *senscr; |
| | int32 nf, i, j; |
| | int32 *nawl; |
| |
|
| | |
| | if (!ps_search_acmod(ngs)->compallsen) |
| | compute_fwdflat_sen_active(ngs, frame_idx); |
| |
|
| | |
| | senscr = acmod_score(ps_search_acmod(ngs), &frame_idx); |
| | ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active; |
| |
|
| | |
| | ngram_search_mark_bptable(ngs, frame_idx); |
| |
|
| | |
| | |
| | if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) |
| | return 0; |
| | |
| | if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) { |
| | E_INFO("Renormalizing Scores at frame %d, best score %d\n", |
| | frame_idx, ngs->best_score); |
| | fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score); |
| | } |
| |
|
| | ngs->best_score = WORST_SCORE; |
| | hmm_context_set_senscore(ngs->hmmctx, senscr); |
| |
|
| | |
| | fwdflat_eval_chan(ngs, frame_idx); |
| | |
| | fwdflat_prune_chan(ngs, frame_idx); |
| | |
| | fwdflat_word_transition(ngs, frame_idx); |
| |
|
| | |
| | nf = frame_idx + 1; |
| | nawl = ngs->active_word_list[nf & 0x1]; |
| | for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { |
| | int32 wid = ngs->fwdflat_wordlist[i]; |
| | if (bitvec_is_set(ngs->word_active, wid) && wid < ps_search_start_wid(ngs)) { |
| | *(nawl++) = wid; |
| | j++; |
| | } |
| | } |
| | |
| | for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) { |
| | if (bitvec_is_set(ngs->word_active, i)) { |
| | *(nawl++) = i; |
| | j++; |
| | } |
| | } |
| | if (!ngs->fwdtree) |
| | ++ngs->n_frame; |
| | ngs->n_active_word[nf & 0x1] = j; |
| |
|
| | |
| | return 1; |
| | } |
| |
|
| | |
| | |
| | |
| | static void |
| | destroy_fwdflat_wordlist(ngram_search_t *ngs) |
| | { |
| | ps_latnode_t *node, *tnode; |
| | int32 f; |
| |
|
| | if (!ngs->fwdtree) |
| | return; |
| |
|
| | for (f = 0; f < ngs->n_frame; f++) { |
| | for (node = ngs->frm_wordlist[f]; node; node = tnode) { |
| | tnode = node->next; |
| | listelem_free(ngs->latnode_alloc, node); |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | static void |
| | destroy_fwdflat_chan(ngram_search_t *ngs) |
| | { |
| | int32 i, wid; |
| |
|
| | for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { |
| | root_chan_t *rhmm; |
| | chan_t *thmm; |
| | wid = ngs->fwdflat_wordlist[i]; |
| | if (dict_is_single_phone(ps_search_dict(ngs),wid)) |
| | continue; |
| | assert(ngs->word_chan[wid] != NULL); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | rhmm = (root_chan_t *)ngs->word_chan[wid]; |
| | thmm = rhmm->next; |
| | listelem_free(ngs->root_chan_alloc, rhmm); |
| | ngs->word_chan[wid] = thmm; |
| | ngram_search_free_all_rc(ngs, wid); |
| | } |
| | } |
| |
|
| | void |
| | ngram_fwdflat_finish(ngram_search_t *ngs) |
| | { |
| | int32 cf; |
| |
|
| | destroy_fwdflat_chan(ngs); |
| | destroy_fwdflat_wordlist(ngs); |
| | bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); |
| |
|
| | |
| | cf = ps_search_acmod(ngs)->output_frame; |
| | |
| | ngram_search_mark_bptable(ngs, cf); |
| |
|
| | ptmr_stop(&ngs->fwdflat_perf); |
| | |
| | if (cf > 0) { |
| | double n_speech = (double)(cf + 1) |
| | / ps_config_int(ps_search_config(ngs), "frate"); |
| | E_INFO("%8d words recognized (%d/fr)\n", |
| | ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1)); |
| | E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt, |
| | (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1)); |
| | E_INFO("%8d channels searched (%d/fr)\n", |
| | ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1)); |
| | E_INFO("%8d words searched (%d/fr)\n", |
| | ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1)); |
| | E_INFO("%8d word transitions (%d/fr)\n", |
| | ngs->st.n_fwdflat_word_transition, |
| | ngs->st.n_fwdflat_word_transition / (cf + 1)); |
| | E_INFO("fwdflat %.2f CPU %.3f xRT\n", |
| | ngs->fwdflat_perf.t_cpu, |
| | ngs->fwdflat_perf.t_cpu / n_speech); |
| | E_INFO("fwdflat %.2f wall %.3f xRT\n", |
| | ngs->fwdflat_perf.t_elapsed, |
| | ngs->fwdflat_perf.t_elapsed / n_speech); |
| | } |
| | } |
| |
|