File size: 3,289 Bytes
5610573 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | #include "lm/ngram_model.h"
#include <pocketsphinx/logmath.h>
#include "util/strfuncs.h"
#include <pocketsphinx/err.h>
#include "test_macros.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int
test_lm_vals(ngram_model_t *model)
{
int32 n_used, score;
TEST_ASSERT(model);
TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
TEST_EQUAL(strcmp(ngram_word(model, 0), "<UNK>"), 0);
TEST_EQUAL(ngram_wid(model, "absolute"), 13);
TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
/* Test unigrams. */
score = ngram_score(model, "<UNK>", NULL);
E_INFO("%d\n", score);
TEST_EQUAL_LOG(score, -75346);
score = ngram_bg_score(model, ngram_wid(model, "<UNK>"),
NGRAM_INVALID_WID, &n_used);
E_INFO("%d\n", score);
TEST_EQUAL_LOG(score, -75346);
TEST_EQUAL(n_used, 1);
score = ngram_score(model, "sphinxtrain", NULL);
E_INFO("%d\n", score);
TEST_EQUAL_LOG(score, -64208);
TEST_EQUAL_LOG(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
NGRAM_INVALID_WID, &n_used), -64208);
TEST_EQUAL(n_used, 1);
/* Test bigrams. */
score = ngram_score(model, "huggins", "david", NULL);
E_INFO("%d\n", score);
TEST_EQUAL_LOG(score, -831);
/* Test trigrams. */
score = ngram_score(model, "daines", "huggins", "david", NULL);
E_INFO("%d\n", score);
TEST_EQUAL_LOG(score, -9450);
return 0;
}
int
main(int argc, char *argv[])
{
logmath_t *lmath;
ngram_model_t *model;
(void)argc;
(void)argv;
err_set_loglevel(ERR_INFO);
/* Initialize a logmath object to pass to ngram_read */
lmath = logmath_init(1.0001, 0, 0);
E_INFO("Converting ARPA to BIN\n");
model = ngram_model_read(NULL, LMDIR "/100.lm.bz2", NGRAM_ARPA, lmath);
E_INFO("Verifying ARPA\n");
test_lm_vals(model);
E_INFO("Writing BIN\n");
TEST_EQUAL(0, ngram_model_write(model, "100.tmp.lm.bin", NGRAM_BIN));
ngram_model_free(model);
#ifdef DEBUG_ENDIAN
E_INFO("Debugging endianness, will not use pre-existing model\n");
#else
E_INFO("Converting BIN to ARPA\n");
model = ngram_model_read(NULL, LMDIR "/100.lm.bin", NGRAM_BIN, lmath);
test_lm_vals(model);
TEST_EQUAL(0, ngram_model_write(model, "100.tmp.lm", NGRAM_ARPA));
ngram_model_free(model);
#endif
E_INFO("Testing converted BIN\n");
model = ngram_model_read(NULL, "100.tmp.lm.bin", NGRAM_BIN, lmath);
test_lm_vals(model);
#ifdef DEBUG_ENDIAN
TEST_EQUAL(0, ngram_model_write(model, "100.tmp.lm", NGRAM_ARPA));
#endif
ngram_model_free(model);
E_INFO("Testing converted ARPA\n");
model = ngram_model_read(NULL, "100.tmp.lm", NGRAM_ARPA, lmath);
test_lm_vals(model);
ngram_model_free(model);
E_INFO("Converting ARPA back to BIN\n");
model = ngram_model_read(NULL, "100.tmp.lm", NGRAM_ARPA, lmath);
test_lm_vals(model);
TEST_EQUAL(0, ngram_model_write(model, "100.tmp.lm.bin", NGRAM_BIN));
ngram_model_free(model);
E_INFO("Converting BIN back to ARPA\n");
model = ngram_model_read(NULL, "100.tmp.lm.bin", NGRAM_BIN, lmath);
test_lm_vals(model);
TEST_EQUAL(0, ngram_model_write(model, "100.tmp.lm", NGRAM_ARPA));
ngram_model_free(model);
E_INFO("Converting unigram ARPA to BIN\n");
model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm", NGRAM_ARPA, lmath);
TEST_EQUAL(0, ngram_model_write(model, "turtle.ug.tmp.lm.bin", NGRAM_BIN));
ngram_model_free(model);
logmath_free(lmath);
return 0;
}
|