| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| #include <stdio.h> |
| #include <string.h> |
| #include <assert.h> |
|
|
| #include <pocketsphinx.h> |
|
|
| #include "util/pio.h" |
| #include "util/ckd_alloc.h" |
| #include "util/strfuncs.h" |
| #include "util/hash_table.h" |
| #include "util/bitvec.h" |
|
|
| #include "lm/fsg_model.h" |
|
|
| #define FSG_MODEL_BEGIN_DECL "FSG_BEGIN" |
| #define FSG_MODEL_END_DECL "FSG_END" |
| #define FSG_MODEL_N_DECL "N" |
| #define FSG_MODEL_NUM_STATES_DECL "NUM_STATES" |
| #define FSG_MODEL_S_DECL "S" |
| #define FSG_MODEL_START_STATE_DECL "START_STATE" |
| #define FSG_MODEL_F_DECL "F" |
| #define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE" |
| #define FSG_MODEL_T_DECL "T" |
| #define FSG_MODEL_TRANSITION_DECL "TRANSITION" |
| #define FSG_MODEL_COMMENT_CHAR '#' |
|
|
|
|
| static int32 |
| nextline_str2words(FILE * fp, int32 * lineno, |
| char **lineptr, char ***wordptr) |
| { |
| for (;;) { |
| size_t len; |
| int32 n; |
|
|
| ckd_free(*lineptr); |
| if ((*lineptr = fread_line(fp, &len)) == NULL) |
| return -1; |
|
|
| (*lineno)++; |
|
|
| if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) |
| continue; |
|
|
| n = str2words(*lineptr, NULL, 0); |
| if (n == 0) |
| continue; |
|
|
| |
| if (*wordptr == NULL) |
| *wordptr = ckd_calloc(n, sizeof(**wordptr)); |
| else |
| *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); |
| return str2words(*lineptr, *wordptr, n); |
| } |
| } |
|
|
| void |
| fsg_model_trans_add(fsg_model_t * fsg, |
| int32 from, int32 to, int32 logp, int32 wid) |
| { |
| fsg_link_t *link; |
| glist_t gl; |
| gnode_t *gn; |
|
|
| if (fsg->trans[from].trans == NULL) |
| fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES); |
|
|
| |
| for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) { |
| link = (fsg_link_t *) gnode_ptr(gn); |
| if (link->wid == wid) { |
| if (link->logs2prob < logp) |
| link->logs2prob = logp; |
| return; |
| } |
| } |
|
|
| |
| link = listelem_malloc(fsg->link_alloc); |
| link->from_state = from; |
| link->to_state = to; |
| link->logs2prob = logp; |
| link->wid = wid; |
|
|
| |
| gl = glist_add_ptr(gl, (void *) link); |
| hash_table_replace_bkey(fsg->trans[from].trans, |
| (char const *) &link->to_state, |
| sizeof(link->to_state), gl); |
| } |
|
|
| int32 |
| fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, |
| int32 logp, int32 wid) |
| { |
| fsg_link_t *link, *link2; |
|
|
| (void)wid; |
| |
| if (logp > 0) { |
| E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", |
| from, to); |
| } |
|
|
| |
| if (from == to) |
| return -1; |
|
|
| if (fsg->trans[from].null_trans == NULL) |
| fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES); |
|
|
| |
| link = fsg_model_null_trans(fsg, from, to); |
| if (link) { |
| if (link->logs2prob < logp) { |
| link->logs2prob = logp; |
| return 0; |
| } |
| else |
| return -1; |
| } |
|
|
| |
| link = listelem_malloc(fsg->link_alloc); |
| link->from_state = from; |
| link->to_state = to; |
| link->logs2prob = logp; |
| link->wid = -1; |
|
|
| link2 = (fsg_link_t *) |
| hash_table_enter_bkey(fsg->trans[from].null_trans, |
| (char const *) &link->to_state, |
| sizeof(link->to_state), link); |
| assert(link == link2); |
| (void)link2; |
|
|
| return 1; |
| } |
|
|
| int32 |
| fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, |
| int32 logp) |
| { |
| return fsg_model_tag_trans_add(fsg, from, to, logp, -1); |
| } |
|
|
| glist_t |
| fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls) |
| { |
| gnode_t *gn1; |
| int updated; |
| fsg_link_t *tl1, *tl2; |
| int32 k, n; |
|
|
| E_INFO("Computing transitive closure for null transitions\n"); |
|
|
| |
| |
| |
| |
| if (nulls == NULL) { |
| int i; |
| for (i = 0; i < fsg->n_state; ++i) { |
| hash_iter_t *itor; |
| hash_table_t *null_trans = fsg->trans[i].null_trans; |
| if (null_trans == NULL) |
| continue; |
| for (itor = hash_table_iter(null_trans); |
| itor != NULL; itor = hash_table_iter_next(itor)) { |
| nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent)); |
| } |
| } |
| } |
|
|
| |
| |
| |
| |
| n = 0; |
| do { |
| updated = FALSE; |
|
|
| for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { |
| hash_iter_t *itor; |
|
|
| tl1 = (fsg_link_t *) gnode_ptr(gn1); |
| assert(tl1->wid < 0); |
|
|
| if (fsg->trans[tl1->to_state].null_trans == NULL) |
| continue; |
|
|
| for (itor = |
| hash_table_iter(fsg->trans[tl1->to_state].null_trans); |
| itor; itor = hash_table_iter_next(itor)) { |
|
|
| tl2 = (fsg_link_t *) hash_entry_val(itor->ent); |
|
|
| k = fsg_model_null_trans_add(fsg, |
| tl1->from_state, |
| tl2->to_state, |
| tl1->logs2prob + |
| tl2->logs2prob); |
| if (k >= 0) { |
| updated = TRUE; |
| if (k > 0) { |
| nulls = glist_add_ptr(nulls, (void *) |
| fsg_model_null_trans |
| (fsg, tl1->from_state, |
| tl2->to_state)); |
| n++; |
| } |
| } |
| } |
| } |
| } while (updated); |
|
|
| E_INFO("%d null transitions added\n", n); |
|
|
| return nulls; |
| } |
|
|
| glist_t |
| fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j) |
| { |
| void *val; |
|
|
| if (fsg->trans[i].trans == NULL) |
| return NULL; |
| if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j, |
| sizeof(j), &val) < 0) |
| return NULL; |
| return (glist_t) val; |
| } |
|
|
| fsg_link_t * |
| fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j) |
| { |
| void *val; |
|
|
| if (fsg->trans[i].null_trans == NULL) |
| return NULL; |
| if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j, |
| sizeof(j), &val) < 0) |
| return NULL; |
| return (fsg_link_t *) val; |
| } |
|
|
| fsg_arciter_t * |
| fsg_model_arcs(fsg_model_t * fsg, int32 i) |
| { |
| fsg_arciter_t *itor; |
|
|
| if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL) |
| return NULL; |
| itor = ckd_calloc(1, sizeof(*itor)); |
| if (fsg->trans[i].null_trans) |
| itor->null_itor = hash_table_iter(fsg->trans[i].null_trans); |
| if (fsg->trans[i].trans) |
| itor->itor = hash_table_iter(fsg->trans[i].trans); |
| if (itor->itor != NULL) |
| itor->gn = hash_entry_val(itor->itor->ent); |
| return itor; |
| } |
|
|
| fsg_link_t * |
| fsg_arciter_get(fsg_arciter_t * itor) |
| { |
| |
| if (itor->gn) |
| return (fsg_link_t *) gnode_ptr(itor->gn); |
| else if (itor->null_itor) |
| return (fsg_link_t *) hash_entry_val(itor->null_itor->ent); |
| else |
| return NULL; |
| } |
|
|
| fsg_arciter_t * |
| fsg_arciter_next(fsg_arciter_t * itor) |
| { |
| |
| if (itor->gn) { |
| itor->gn = gnode_next(itor->gn); |
| |
| if (itor->gn == NULL) { |
| itor->itor = hash_table_iter_next(itor->itor); |
| if (itor->itor != NULL) |
| itor->gn = hash_entry_val(itor->itor->ent); |
| else if (itor->null_itor == NULL) |
| goto stop_iteration; |
| } |
| } |
| else { |
| if (itor->null_itor == NULL) |
| goto stop_iteration; |
| itor->null_itor = hash_table_iter_next(itor->null_itor); |
| if (itor->null_itor == NULL) |
| goto stop_iteration; |
| } |
| return itor; |
| stop_iteration: |
| fsg_arciter_free(itor); |
| return NULL; |
|
|
| } |
|
|
| void |
| fsg_arciter_free(fsg_arciter_t * itor) |
| { |
| if (itor == NULL) |
| return; |
| hash_table_iter_free(itor->null_itor); |
| hash_table_iter_free(itor->itor); |
| ckd_free(itor); |
| } |
|
|
| int |
| fsg_model_word_id(fsg_model_t * fsg, char const *word) |
| { |
| int wid; |
|
|
| |
| for (wid = 0; wid < fsg->n_word; ++wid) { |
| if (0 == strcmp(fsg->vocab[wid], word)) |
| break; |
| } |
| |
| if (wid == fsg->n_word) |
| return -1; |
| return wid; |
| } |
|
|
| int |
| fsg_model_word_add(fsg_model_t * fsg, char const *word) |
| { |
| int wid, old_size; |
|
|
| |
| wid = fsg_model_word_id(fsg, word); |
| |
| if (wid == -1) { |
| wid = fsg->n_word; |
| if (fsg->n_word == fsg->n_word_alloc) { |
| old_size = fsg->n_word_alloc; |
| fsg->n_word_alloc += 10; |
| fsg->vocab = ckd_realloc(fsg->vocab, |
| fsg->n_word_alloc * |
| sizeof(*fsg->vocab)); |
| if (fsg->silwords) |
| fsg->silwords = |
| bitvec_realloc(fsg->silwords, old_size, |
| fsg->n_word_alloc); |
| if (fsg->altwords) |
| fsg->altwords = |
| bitvec_realloc(fsg->altwords, old_size, |
| fsg->n_word_alloc); |
| } |
| ++fsg->n_word; |
| fsg->vocab[wid] = ckd_salloc(word); |
| } |
| return wid; |
| } |
|
|
| int |
| fsg_model_add_silence(fsg_model_t * fsg, char const *silword, |
| int state, float32 silprob) |
| { |
| int32 logsilp; |
| int n_trans, silwid, src; |
|
|
| E_INFO("Adding silence transitions for %s to FSG\n", silword); |
|
|
| silwid = fsg_model_word_add(fsg, silword); |
| logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); |
| if (fsg->silwords == NULL) |
| fsg->silwords = bitvec_alloc(fsg->n_word_alloc); |
| bitvec_set(fsg->silwords, silwid); |
|
|
| n_trans = 0; |
| if (state == -1) { |
| for (src = 0; src < fsg->n_state; src++) { |
| fsg_model_trans_add(fsg, src, src, logsilp, silwid); |
| ++n_trans; |
| } |
| } |
| else { |
| fsg_model_trans_add(fsg, state, state, logsilp, silwid); |
| ++n_trans; |
| } |
|
|
| E_INFO("Added %d silence word transitions\n", n_trans); |
| return n_trans; |
| } |
|
|
| int |
| fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, |
| char const *altword) |
| { |
| int i, basewid, altwid; |
| int ntrans; |
|
|
| |
| for (basewid = 0; basewid < fsg->n_word; ++basewid) |
| if (0 == strcmp(fsg->vocab[basewid], baseword)) |
| break; |
| if (basewid == fsg->n_word) { |
| E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); |
| return -1; |
| } |
| altwid = fsg_model_word_add(fsg, altword); |
| if (fsg->altwords == NULL) |
| fsg->altwords = bitvec_alloc(fsg->n_word_alloc); |
| bitvec_set(fsg->altwords, altwid); |
| if (fsg_model_is_filler(fsg, basewid)) { |
| if (fsg->silwords == NULL) |
| fsg->silwords = bitvec_alloc(fsg->n_word_alloc); |
| bitvec_set(fsg->silwords, altwid); |
| } |
|
|
| E_DEBUG("Adding alternate word transitions (%s,%s) to FSG\n", |
| baseword, altword); |
|
|
| |
| |
| ntrans = 0; |
| for (i = 0; i < fsg->n_state; ++i) { |
| hash_iter_t *itor; |
| if (fsg->trans[i].trans == NULL) |
| continue; |
| for (itor = hash_table_iter(fsg->trans[i].trans); itor; |
| itor = hash_table_iter_next(itor)) { |
| glist_t trans; |
| gnode_t *gn; |
|
|
| trans = hash_entry_val(itor->ent); |
| for (gn = trans; gn; gn = gnode_next(gn)) { |
| fsg_link_t *fl = gnode_ptr(gn); |
| if (fl->wid == basewid) { |
| fsg_link_t *link; |
|
|
| |
| link = listelem_malloc(fsg->link_alloc); |
| link->from_state = fl->from_state; |
| link->to_state = fl->to_state; |
| link->logs2prob = fl->logs2prob; |
| link->wid = altwid; |
|
|
| trans = glist_add_ptr(trans, (void *) link); |
| ++ntrans; |
| } |
| } |
| hash_entry_val(itor->ent) = trans; |
| } |
| } |
|
|
| E_DEBUG("Added %d alternate word transitions\n", ntrans); |
| return ntrans; |
| } |
|
|
|
|
| fsg_model_t * |
| fsg_model_init(char const *name, logmath_t * lmath, float32 lw, |
| int32 n_state) |
| { |
| fsg_model_t *fsg; |
|
|
| |
| fsg = ckd_calloc(1, sizeof(*fsg)); |
| fsg->refcount = 1; |
| fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t)); |
| fsg->lmath = lmath; |
| fsg->name = name ? ckd_salloc(name) : NULL; |
| fsg->n_state = n_state; |
| fsg->lw = lw; |
|
|
| fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans)); |
|
|
| return fsg; |
| } |
|
|
| fsg_model_t * |
| fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) |
| { |
| fsg_model_t *fsg; |
| hash_table_t *vocab; |
| hash_iter_t *itor; |
| int32 lastwid; |
| char **wordptr; |
| char *lineptr; |
| char *fsgname; |
| int32 lineno; |
| int32 n, i, j; |
| int n_state, n_trans, n_null_trans; |
| glist_t nulls; |
| float32 p; |
|
|
| lineno = 0; |
| vocab = hash_table_new(32, FALSE); |
| wordptr = NULL; |
| lineptr = NULL; |
| nulls = NULL; |
| fsgname = NULL; |
| fsg = NULL; |
|
|
| |
| for (;;) { |
| n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); |
| if (n < 0) { |
| E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); |
| goto parse_error; |
| } |
|
|
| if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { |
| if (n > 2) { |
| E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", |
| lineno); |
| goto parse_error; |
| } |
| break; |
| } |
| } |
| |
| |
| |
| if (n == 2) { |
| fsgname = ckd_salloc(wordptr[1]); |
| } |
| else { |
| E_WARN("FSG name is missing\n"); |
| fsgname = ckd_salloc("unknown"); |
| } |
|
|
| |
| n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); |
| if ((n != 2) |
| || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) |
| && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) |
| || (sscanf(wordptr[1], "%d", &n_state) != 1) |
| || (n_state <= 0)) { |
| E_ERROR |
| ("Line[%d]: #states declaration line missing or malformed\n", |
| lineno); |
| goto parse_error; |
| } |
|
|
| |
| fsg = fsg_model_init(fsgname, lmath, lw, n_state); |
| ckd_free(fsgname); |
| fsgname = NULL; |
|
|
| |
| n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); |
| if ((n != 2) |
| || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) |
| && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) |
| || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) |
| || (fsg->start_state < 0) |
| || (fsg->start_state >= fsg->n_state)) { |
| E_ERROR |
| ("Line[%d]: start state declaration line missing or malformed\n", |
| lineno); |
| goto parse_error; |
| } |
|
|
| |
| n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); |
| if ((n != 2) |
| || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) |
| && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) |
| || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) |
| || (fsg->final_state < 0) |
| || (fsg->final_state >= fsg->n_state)) { |
| E_ERROR |
| ("Line[%d]: final state declaration line missing or malformed\n", |
| lineno); |
| goto parse_error; |
| } |
|
|
| |
| lastwid = 0; |
| n_trans = n_null_trans = 0; |
| for (;;) { |
| int32 wid, tprob; |
|
|
| n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); |
| if (n <= 0) { |
| E_ERROR("Line[%d]: transition or FSG_END statement expected\n", |
| lineno); |
| goto parse_error; |
| } |
|
|
| if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { |
| break; |
| } |
|
|
| if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) |
| || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { |
|
|
|
|
| if (((n != 4) && (n != 5)) |
| || (sscanf(wordptr[1], "%d", &i) != 1) |
| || (sscanf(wordptr[2], "%d", &j) != 1) |
| || (i < 0) || (i >= fsg->n_state) |
| || (j < 0) || (j >= fsg->n_state)) { |
| E_ERROR |
| ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", |
| lineno); |
| goto parse_error; |
| } |
|
|
| p = atof_c(wordptr[3]); |
| if ((p <= 0.0) || (p > 1.0)) { |
| E_ERROR |
| ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", |
| lineno); |
| goto parse_error; |
| } |
| } |
| else { |
| E_ERROR("Line[%d]: transition or FSG_END statement expected\n", |
| lineno); |
| goto parse_error; |
| } |
|
|
| tprob = (int32) (logmath_log(lmath, p) * fsg->lw); |
| |
| if (n > 4) { |
| if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { |
| (void) hash_table_enter_int32(vocab, |
| ckd_salloc(wordptr[4]), |
| lastwid); |
| wid = lastwid; |
| ++lastwid; |
| } |
| fsg_model_trans_add(fsg, i, j, tprob, wid); |
| ++n_trans; |
| } |
| else { |
| if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { |
| ++n_null_trans; |
| nulls = |
| glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); |
| } |
| } |
| } |
|
|
| E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", |
| fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); |
|
|
|
|
| |
| fsg->n_word = hash_table_inuse(vocab); |
| fsg->n_word_alloc = fsg->n_word + 10; |
| fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); |
| for (itor = hash_table_iter(vocab); itor; |
| itor = hash_table_iter_next(itor)) { |
| char const *word = hash_entry_key(itor->ent); |
| int32 wid = (int32) (size_t) hash_entry_val(itor->ent); |
| fsg->vocab[wid] = (char *) word; |
| } |
| hash_table_free(vocab); |
|
|
| |
| |
| |
| |
| |
| nulls = fsg_model_null_trans_closure(fsg, nulls); |
| glist_free(nulls); |
|
|
| ckd_free(lineptr); |
| ckd_free(wordptr); |
|
|
| return fsg; |
|
|
| parse_error: |
| for (itor = hash_table_iter(vocab); itor; |
| itor = hash_table_iter_next(itor)) |
| ckd_free((char *) hash_entry_key(itor->ent)); |
| glist_free(nulls); |
| hash_table_free(vocab); |
| ckd_free(fsgname); |
| ckd_free(lineptr); |
| ckd_free(wordptr); |
| fsg_model_free(fsg); |
| return NULL; |
| } |
|
|
|
|
| fsg_model_t * |
| fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw) |
| { |
| FILE *fp; |
| fsg_model_t *fsg; |
|
|
| if ((fp = fopen(file, "r")) == NULL) { |
| E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); |
| return NULL; |
| } |
| fsg = fsg_model_read(fp, lmath, lw); |
| fclose(fp); |
| return fsg; |
| } |
|
|
| fsg_model_t * |
| fsg_model_retain(fsg_model_t * fsg) |
| { |
| ++fsg->refcount; |
| return fsg; |
| } |
|
|
| static void |
| trans_list_free(fsg_model_t * fsg, int32 i) |
| { |
| hash_iter_t *itor; |
|
|
| |
| |
| |
| if (fsg->trans[i].trans) { |
| for (itor = hash_table_iter(fsg->trans[i].trans); |
| itor; itor = hash_table_iter_next(itor)) { |
| glist_t gl = (glist_t) hash_entry_val(itor->ent); |
| glist_free(gl); |
| } |
| } |
| hash_table_free(fsg->trans[i].trans); |
| hash_table_free(fsg->trans[i].null_trans); |
| } |
|
|
| int |
| fsg_model_free(fsg_model_t * fsg) |
| { |
| int i; |
|
|
| if (fsg == NULL) |
| return 0; |
|
|
| if (--fsg->refcount > 0) |
| return fsg->refcount; |
|
|
| for (i = 0; i < fsg->n_word; ++i) |
| ckd_free(fsg->vocab[i]); |
| for (i = 0; i < fsg->n_state; ++i) |
| trans_list_free(fsg, i); |
| ckd_free(fsg->trans); |
| ckd_free(fsg->vocab); |
| listelem_alloc_free(fsg->link_alloc); |
| bitvec_free(fsg->silwords); |
| bitvec_free(fsg->altwords); |
| ckd_free(fsg->name); |
| ckd_free(fsg); |
| return 0; |
| } |
|
|
|
|
| void |
| fsg_model_write(fsg_model_t * fsg, FILE * fp) |
| { |
| int32 i; |
|
|
| fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, |
| fsg->name ? fsg->name : ""); |
| fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); |
| fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); |
| fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); |
|
|
| for (i = 0; i < fsg->n_state; i++) { |
| fsg_arciter_t *itor; |
|
|
| for (itor = fsg_model_arcs(fsg, i); itor; |
| itor = fsg_arciter_next(itor)) { |
| fsg_link_t *tl = fsg_arciter_get(itor); |
|
|
| fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, |
| tl->from_state, tl->to_state, |
| logmath_exp(fsg->lmath, |
| (int32) (tl->logs2prob / fsg->lw)), |
| (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); |
| } |
| } |
|
|
| fprintf(fp, "%s\n", FSG_MODEL_END_DECL); |
|
|
| fflush(fp); |
| } |
|
|
| void |
| fsg_model_writefile(fsg_model_t * fsg, char const *file) |
| { |
| FILE *fp; |
|
|
| assert(fsg); |
|
|
| E_INFO("Writing FSG file '%s'\n", file); |
|
|
| if ((fp = fopen(file, "w")) == NULL) { |
| E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); |
| return; |
| } |
|
|
| fsg_model_write(fsg, fp); |
|
|
| fclose(fp); |
| } |
|
|
| static void |
| fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp) |
| { |
| fsg_arciter_t *itor; |
|
|
| for (itor = fsg_model_arcs(fsg, i); itor; |
| itor = fsg_arciter_next(itor)) { |
| fsg_link_t *tl = fsg_arciter_get(itor); |
| fprintf(fp, "%d %d %s %f\n", |
| tl->from_state, tl->to_state, |
| (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid), |
| -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw)); |
| } |
| } |
|
|
| void |
| fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp) |
| { |
| int i; |
|
|
| |
| fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp); |
|
|
| |
| for (i = 0; i < fsg->n_state; i++) { |
| if (i == fsg_model_start_state(fsg)) |
| continue; |
| fsg_model_write_fsm_trans(fsg, i, fp); |
| } |
|
|
| |
| fprintf(fp, "%d 0\n", fsg_model_final_state(fsg)); |
|
|
| fflush(fp); |
| } |
|
|
| void |
| fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file) |
| { |
| FILE *fp; |
|
|
| assert(fsg); |
|
|
| E_INFO("Writing FSM file '%s'\n", file); |
|
|
| if ((fp = fopen(file, "w")) == NULL) { |
| E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file); |
| return; |
| } |
|
|
| fsg_model_write_fsm(fsg, fp); |
|
|
| fclose(fp); |
| } |
|
|
| void |
| fsg_model_write_symtab(fsg_model_t * fsg, FILE * file) |
| { |
| int i; |
|
|
| fprintf(file, "<eps> 0\n"); |
| for (i = 0; i < fsg_model_n_word(fsg); ++i) { |
| fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1); |
| } |
| fflush(file); |
| } |
|
|
| void |
| fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file) |
| { |
| FILE *fp; |
|
|
| assert(fsg); |
|
|
| E_INFO("Writing FSM symbol table '%s'\n", file); |
|
|
| if ((fp = fopen(file, "w")) == NULL) { |
| E_ERROR("Failed to open symbol table '%s' for writing", file); |
| return; |
| } |
|
|
| fsg_model_write_symtab(fsg, fp); |
|
|
| fclose(fp); |
| } |
|
|
| static void |
| apply_closure(fsg_model_t *fsg, bitvec_t *active) |
| { |
| int state; |
|
|
| |
| for (state = 0; state < fsg_model_n_state(fsg); ++state) { |
| hash_table_t *null_trans; |
| hash_iter_t *itor; |
|
|
| if (!bitvec_is_set(active, state)) |
| continue; |
| null_trans = fsg->trans[state].null_trans; |
| if (null_trans == NULL) |
| continue; |
| |
| |
| for (itor = hash_table_iter(null_trans); |
| itor != NULL; itor = hash_table_iter_next(itor)) { |
| fsg_link_t *link = (fsg_link_t *)hash_entry_val(itor->ent); |
| bitvec_set(active, link->to_state); |
| E_INFO("epsilon %d -> %d\n", state, link->to_state); |
| } |
| } |
| } |
|
|
| int |
| fsg_model_accept(fsg_model_t *fsg, char const *words) |
| { |
| char *ptr, *mutable_words, *word, delimfound; |
| bitvec_t *active, *next; |
| int n, found = 0; |
|
|
| if (fsg == NULL || words == NULL) |
| return 0; |
|
|
| active = bitvec_alloc(fsg_model_n_state(fsg)); |
| next = bitvec_alloc(fsg_model_n_state(fsg)); |
| bitvec_set(active, fsg_model_start_state(fsg)); |
|
|
| |
| ptr = mutable_words = ckd_salloc(words); |
| while ((n = nextword(ptr, " \t\r\n\v\f", |
| &word, &delimfound)) >= 0) { |
| int wid = fsg_model_word_id(fsg, word); |
| int state; |
| bitvec_t *tmp; |
|
|
| E_INFO("word: %s\n", word); |
| |
| apply_closure(fsg, active); |
|
|
| |
| |
| if (wid < 0) { |
| |
| E_INFO("word %s not found!\n", word); |
| goto done; |
| } |
| |
| for (state = 0; state < fsg_model_n_state(fsg); ++state) { |
| fsg_arciter_t *itor; |
| if (!bitvec_is_set(active, state)) |
| continue; |
| for (itor = fsg_model_arcs(fsg, state); |
| itor != NULL; itor = fsg_arciter_next(itor)) { |
| fsg_link_t *link = fsg_arciter_get(itor); |
| |
| if (link->wid == wid) { |
| bitvec_set(next, link->to_state); |
| E_INFO("%s %d -> %d\n", |
| word, state, link->to_state); |
| } |
| } |
| } |
|
|
| |
| tmp = active; |
| active = next; |
| next = tmp; |
| bitvec_clear_all(next, fsg_model_n_state(fsg)); |
| |
| word[n] = delimfound; |
| ptr = word + n; |
| } |
| |
| |
| apply_closure(fsg, active); |
| found = bitvec_is_set(active, fsg_model_final_state(fsg)); |
|
|
| done: |
| bitvec_free(active); |
| bitvec_free(next); |
| ckd_free(mutable_words); |
| return found != 0; |
| } |
|
|