| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | #include <pocketsphinx.h> |
| |
|
| | #include "phone_loop_search.h" |
| |
|
| | static int phone_loop_search_start(ps_search_t *search); |
| | static int phone_loop_search_step(ps_search_t *search, int frame_idx); |
| | static int phone_loop_search_finish(ps_search_t *search); |
| | static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); |
| | static void phone_loop_search_free(ps_search_t *search); |
| | static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score); |
| | static int32 phone_loop_search_prob(ps_search_t *search); |
| | static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search); |
| |
|
| | static ps_searchfuncs_t phone_loop_search_funcs = { |
| | phone_loop_search_start, |
| | phone_loop_search_step, |
| | phone_loop_search_finish, |
| | phone_loop_search_reinit, |
| | phone_loop_search_free, |
| | NULL, |
| | phone_loop_search_hyp, |
| | phone_loop_search_prob, |
| | phone_loop_search_seg_iter, |
| | }; |
| |
|
| | static int |
| | phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) |
| | { |
| | phone_loop_search_t *pls = (phone_loop_search_t *)search; |
| | cmd_ln_t *config = ps_search_config(search); |
| | acmod_t *acmod = ps_search_acmod(search); |
| | int i; |
| |
|
| | |
| | ps_search_base_reinit(search, dict, d2p); |
| |
|
| | |
| | if (pls->hmmctx) |
| | hmm_context_free(pls->hmmctx); |
| | pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), |
| | acmod->tmat->tp, NULL, acmod->mdef->sseq); |
| | if (pls->hmmctx == NULL) |
| | return -1; |
| |
|
| | |
| | pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); |
| | pls->window = ps_config_int(config, "pl_window"); |
| | if (pls->penalties) |
| | ckd_free(pls->penalties); |
| | pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); |
| | if (pls->pen_buf) |
| | ckd_free_2d(pls->pen_buf); |
| | pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); |
| |
|
| | |
| | if (pls->hmms) { |
| | for (i = 0; i < pls->n_phones; ++i) |
| | hmm_deinit((hmm_t *)&pls->hmms[i]); |
| | ckd_free(pls->hmms); |
| | } |
| | pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); |
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], |
| | FALSE, |
| | bin_mdef_pid2ssid(acmod->mdef, i), |
| | bin_mdef_pid2tmatid(acmod->mdef, i)); |
| | } |
| | pls->penalty_weight = ps_config_float(config, "pl_weight"); |
| | pls->beam = logmath_log(acmod->lmath, ps_config_float(config, "pl_beam")) >> SENSCR_SHIFT; |
| | pls->pbeam = logmath_log(acmod->lmath, ps_config_float(config, "pl_pbeam")) >> SENSCR_SHIFT; |
| | pls->pip = logmath_log(acmod->lmath, ps_config_float(config, "pl_pip")) >> SENSCR_SHIFT; |
| | E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", |
| | pls->beam, pls->pbeam, pls->pip); |
| |
|
| | return 0; |
| | } |
| |
|
| | ps_search_t * |
| | phone_loop_search_init(cmd_ln_t *config, |
| | acmod_t *acmod, |
| | dict_t *dict) |
| | { |
| | phone_loop_search_t *pls; |
| |
|
| | |
| | pls = (phone_loop_search_t *)ckd_calloc(1, sizeof(*pls)); |
| | ps_search_init(ps_search_base(pls), &phone_loop_search_funcs, |
| | PS_SEARCH_TYPE_PHONE_LOOP, PS_DEFAULT_PL_SEARCH, |
| | config, acmod, dict, NULL); |
| | phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls), |
| | ps_search_dict2pid(pls)); |
| |
|
| | return ps_search_base(pls); |
| | } |
| |
|
| | static void |
| | phone_loop_search_free_renorm(phone_loop_search_t *pls) |
| | { |
| | gnode_t *gn; |
| | for (gn = pls->renorm; gn; gn = gnode_next(gn)) |
| | ckd_free(gnode_ptr(gn)); |
| | glist_free(pls->renorm); |
| | pls->renorm = NULL; |
| | } |
| |
|
| | static void |
| | phone_loop_search_free(ps_search_t *search) |
| | { |
| | phone_loop_search_t *pls = (phone_loop_search_t *)search; |
| | int i; |
| |
|
| | ps_search_base_free(search); |
| | for (i = 0; i < pls->n_phones; ++i) |
| | hmm_deinit((hmm_t *)&pls->hmms[i]); |
| | phone_loop_search_free_renorm(pls); |
| | ckd_free_2d(pls->pen_buf); |
| | ckd_free(pls->hmms); |
| | ckd_free(pls->penalties); |
| | hmm_context_free(pls->hmmctx); |
| | ckd_free(pls); |
| | } |
| |
|
| | static int |
| | phone_loop_search_start(ps_search_t *search) |
| | { |
| | phone_loop_search_t *pls = (phone_loop_search_t *)search; |
| | int i; |
| |
|
| | |
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_t *hmm = (hmm_t *)&pls->hmms[i]; |
| | hmm_clear(hmm); |
| | hmm_enter(hmm, 0, -1, 0); |
| | } |
| | memset(pls->penalties, 0, pls->n_phones * sizeof(*pls->penalties)); |
| | for (i = 0; i < pls->window; i++) |
| | memset(pls->pen_buf[i], 0, pls->n_phones * sizeof(*pls->pen_buf[i])); |
| | phone_loop_search_free_renorm(pls); |
| | pls->best_score = 0; |
| | pls->pen_buf_ptr = 0; |
| |
|
| | return 0; |
| | } |
| |
|
| | static void |
| | renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm) |
| | { |
| | phone_loop_renorm_t *rn = (phone_loop_renorm_t *)ckd_calloc(1, sizeof(*rn)); |
| | int i; |
| |
|
| | pls->renorm = glist_add_ptr(pls->renorm, rn); |
| | rn->frame_idx = frame_idx; |
| | rn->norm = norm; |
| |
|
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_normalize((hmm_t *)&pls->hmms[i], norm); |
| | } |
| | } |
| |
|
| | static void |
| | evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx) |
| | { |
| | int32 bs = WORST_SCORE; |
| | int i; |
| |
|
| | hmm_context_set_senscore(pls->hmmctx, senscr); |
| |
|
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_t *hmm = (hmm_t *)&pls->hmms[i]; |
| | int32 score; |
| |
|
| | if (hmm_frame(hmm) < frame_idx) |
| | continue; |
| | score = hmm_vit_eval(hmm); |
| | if (score BETTER_THAN bs) { |
| | bs = score; |
| | } |
| | } |
| | pls->best_score = bs; |
| | } |
| |
|
| | static void |
| | store_scores(phone_loop_search_t *pls, int frame_idx) |
| | { |
| | int i, j, itr; |
| |
|
| | (void)frame_idx; |
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_t *hmm = (hmm_t *)&pls->hmms[i]; |
| | pls->pen_buf[pls->pen_buf_ptr][i] = (hmm_bestscore(hmm) - pls->best_score) * pls->penalty_weight; |
| | } |
| | pls->pen_buf_ptr++; |
| | pls->pen_buf_ptr = pls->pen_buf_ptr % pls->window; |
| |
|
| | |
| | for (i = 0; i < pls->n_phones; ++i) { |
| | pls->penalties[i] = WORST_SCORE; |
| | for (j = 0, itr = pls->pen_buf_ptr + 1; j < pls->window; j++, itr++) { |
| | itr = itr % pls->window; |
| | if (pls->pen_buf[itr][i] > pls->penalties[i]) |
| | pls->penalties[i] = pls->pen_buf[itr][i]; |
| | } |
| | } |
| | } |
| |
|
| | static void |
| | prune_hmms(phone_loop_search_t *pls, int frame_idx) |
| | { |
| | int32 thresh = pls->best_score + pls->beam; |
| | int nf = frame_idx + 1; |
| | int i; |
| |
|
| | |
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_t *hmm = (hmm_t *)&pls->hmms[i]; |
| |
|
| | if (hmm_frame(hmm) < frame_idx) |
| | continue; |
| | |
| | if (hmm_bestscore(hmm) BETTER_THAN thresh) { |
| | hmm_frame(hmm) = nf; |
| | } |
| | else |
| | hmm_clear_scores(hmm); |
| | } |
| | } |
| |
|
| | static void |
| | phone_transition(phone_loop_search_t *pls, int frame_idx) |
| | { |
| | int32 thresh = pls->best_score + pls->pbeam; |
| | int nf = frame_idx + 1; |
| | int i; |
| |
|
| | |
| | |
| | for (i = 0; i < pls->n_phones; ++i) { |
| | hmm_t *hmm = (hmm_t *)&pls->hmms[i]; |
| | int32 newphone_score; |
| | int j; |
| |
|
| | if (hmm_frame(hmm) != nf) |
| | continue; |
| |
|
| | newphone_score = hmm_out_score(hmm) + pls->pip; |
| | if (newphone_score BETTER_THAN thresh) { |
| | |
| | for (j = 0; j < pls->n_phones; ++j) { |
| | hmm_t *nhmm = (hmm_t *)&pls->hmms[j]; |
| |
|
| | if (hmm_frame(nhmm) < frame_idx |
| | || newphone_score BETTER_THAN hmm_in_score(nhmm)) { |
| | hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | static int |
| | phone_loop_search_step(ps_search_t *search, int frame_idx) |
| | { |
| | phone_loop_search_t *pls = (phone_loop_search_t *)search; |
| | acmod_t *acmod = ps_search_acmod(search); |
| | int16 const *senscr; |
| | int i; |
| |
|
| | |
| | if (!ps_search_acmod(pls)->compallsen) { |
| | acmod_clear_active(ps_search_acmod(pls)); |
| | for (i = 0; i < pls->n_phones; ++i) |
| | acmod_activate_hmm(acmod, (hmm_t *)&pls->hmms[i]); |
| | } |
| |
|
| | |
| | senscr = acmod_score(acmod, &frame_idx); |
| |
|
| | |
| | if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) { |
| | E_INFO("Renormalizing Scores at frame %d, best score %d\n", |
| | frame_idx, pls->best_score); |
| | renormalize_hmms(pls, frame_idx, pls->best_score); |
| | } |
| |
|
| | |
| | evaluate_hmms(pls, senscr, frame_idx); |
| |
|
| | |
| | store_scores(pls, frame_idx); |
| |
|
| | |
| | prune_hmms(pls, frame_idx); |
| |
|
| | |
| | phone_transition(pls, frame_idx); |
| |
|
| | return 0; |
| | } |
| |
|
| | static int |
| | phone_loop_search_finish(ps_search_t *search) |
| | { |
| | |
| | (void)search; |
| | return 0; |
| | } |
| |
|
| | static char const * |
| | phone_loop_search_hyp(ps_search_t *search, int32 *out_score) |
| | { |
| | (void)search; |
| | (void)out_score; |
| | E_WARN("Hypotheses are not returned from phone loop search"); |
| | return NULL; |
| | } |
| |
|
| | static int32 |
| | phone_loop_search_prob(ps_search_t *search) |
| | { |
| | (void)search; |
| | |
| | E_WARN("Posterior probabilities are not returned from phone loop search"); |
| | return 0; |
| | } |
| |
|
| | static ps_seg_t * |
| | phone_loop_search_seg_iter(ps_search_t *search) |
| | { |
| | (void)search; |
| | E_WARN("Hypotheses are not returned from phone loop search"); |
| | return NULL; |
| | } |
| |
|