pocketsphinx / src /ms_gauden.c
camenduru's picture
pocketsphinx
5610573
/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
/* ====================================================================
* Copyright (c) 1999-2004 Carnegie Mellon University. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* This work was supported in part by funding from the Defense Advanced
* Research Projects Agency and the National Science Foundation of the
* United States of America, and the CMU Sphinx Speech Consortium.
*
* THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
* NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
*
*/
#include <assert.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include <pocketsphinx.h>
#include "util/bio.h"
#include "util/ckd_alloc.h"
#include "ms_gauden.h"
#define GAUDEN_PARAM_VERSION "1.0"
#ifndef M_PI
#define M_PI 3.1415926535897932385e0
#endif
#define WORST_DIST MAX_NEG_INT32
void
gauden_dump(const gauden_t * g)
{
int32 c;
for (c = 0; c < g->n_mgau; c++)
gauden_dump_ind(g, c);
}
void
gauden_dump_ind(const gauden_t * g, int senidx)
{
int32 f, d, i;
for (f = 0; f < g->n_feat; f++) {
E_INFO("Codebook %d, Feature %d (%dx%d):\n",
senidx, f, g->n_density, g->featlen[f]);
for (d = 0; d < g->n_density; d++) {
printf("m[%3d]", d);
for (i = 0; i < g->featlen[f]; i++)
printf(" %7.4f", MFCC2FLOAT(g->mean[senidx][f][d][i]));
printf("\n");
}
printf("\n");
for (d = 0; d < g->n_density; d++) {
printf("v[%3d]", d);
for (i = 0; i < g->featlen[f]; i++)
printf(" %d", (int)g->var[senidx][f][d][i]);
printf("\n");
}
printf("\n");
for (d = 0; d < g->n_density; d++)
printf("d[%3d] %d\n", d, (int)g->det[senidx][f][d]);
}
fflush(stderr);
}
/**
* Reads gaussian parameters from a file
*
* @param: out_param output parameter
* @
*
* @returns: allocated 4-d array of gaussians
*
*/
static float ****
gauden_param_read(const char *file_name,
int32 * out_n_mgau,
int32 * out_n_feat,
int32 * out_n_density,
int32 ** out_veclen)
{
char tmp;
FILE *fp;
int32 i, j, k, l, n, blk;
int32 n_mgau;
int32 n_feat;
int32 n_density;
int32 *veclen;
int32 byteswap, chksum_present;
float32 ****out;
float32 *buf;
char **argname, **argval;
uint32 chksum;
E_INFO("Reading mixture gaussian parameter: %s\n", file_name);
if ((fp = fopen(file_name, "rb")) == NULL) {
E_ERROR_SYSTEM("Failed to open file '%s' for reading", file_name);
return NULL;
}
/* Read header, including argument-value info and 32-bit byteorder magic */
if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) {
E_ERROR("Failed to read header from file '%s'\n", file_name);
fclose(fp);
return NULL;
}
/* Parse argument-value list */
chksum_present = 0;
for (i = 0; argname[i]; i++) {
if (strcmp(argname[i], "version") == 0) {
if (strcmp(argval[i], GAUDEN_PARAM_VERSION) != 0)
E_WARN("Version mismatch(%s): %s, expecting %s\n",
file_name, argval[i], GAUDEN_PARAM_VERSION);
}
else if (strcmp(argname[i], "chksum0") == 0) {
chksum_present = 1; /* Ignore the associated value */
}
}
bio_hdrarg_free(argname, argval);
argname = argval = NULL;
chksum = 0;
/* #Codebooks */
if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
E_ERROR("Failed to read number fo codebooks from %s\n", file_name);
fclose(fp);
return NULL;
}
*out_n_mgau = n_mgau;
/* #Features/codebook */
if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
E_ERROR("Failed to read number of features from %s\n", file_name);
fclose(fp);
return NULL;
}
*out_n_feat = n_feat;
/* #Gaussian densities/feature in each codebook */
if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
E_ERROR("fread(%s) (#density/codebook) failed\n", file_name);
}
*out_n_density = n_density;
/* #Dimensions in each feature stream */
veclen = ckd_calloc(n_feat, sizeof(uint32));
*out_veclen = veclen;
if (bio_fread(veclen, sizeof(int32), n_feat, fp, byteswap, &chksum) !=
n_feat) {
E_ERROR("fread(%s) (feature-lengths) failed\n", file_name);
fclose(fp);
return NULL;
}
/* blk = total vector length of all feature streams */
for (i = 0, blk = 0; i < n_feat; i++)
blk += veclen[i];
/* #Floats to follow; for the ENTIRE SET of CODEBOOKS */
if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) {
E_ERROR("Failed to read number of parameters from %s\n", file_name);
fclose(fp);
return NULL;
}
if (n != n_mgau * n_density * blk) {
E_ERROR
("Number of parameters in %s(%d) doesn't match dimensions: %d x %d x %d\n",
file_name, n, n_mgau, n_density, blk);
fclose(fp);
return NULL;
}
/* Allocate memory for mixture gaussian densities if not already allocated */
out = (float32 ****) ckd_calloc_3d(n_mgau, n_feat, n_density,
sizeof(float32 *));
buf = (float32 *) ckd_calloc(n, sizeof(float32));
for (i = 0, l = 0; i < n_mgau; i++) {
for (j = 0; j < n_feat; j++) {
for (k = 0; k < n_density; k++) {
out[i][j][k] = &buf[l];
l += veclen[j];
}
}
}
/* Read mixture gaussian densities data */
if (bio_fread(buf, sizeof(float32), n, fp, byteswap, &chksum) != n) {
E_ERROR("Failed to read density data from file '%s'\n", file_name);
fclose(fp);
ckd_free_3d(out);
return NULL;
}
if (chksum_present)
bio_verify_chksum(fp, byteswap, chksum);
if (fread(&tmp, 1, 1, fp) == 1) {
E_ERROR("More data than expected in %s\n", file_name);
fclose(fp);
ckd_free_3d(out);
return NULL;
}
fclose(fp);
E_INFO("%d codebook, %d feature, size: \n", n_mgau, n_feat);
for (i = 0; i < n_feat; i++)
E_INFO(" %dx%d\n", n_density, veclen[i]);
return out;
}
static void
gauden_param_free(mfcc_t **** p)
{
ckd_free(p[0][0][0]);
ckd_free_3d(p);
}
/*
* Some of the gaussian density computation can be carried out in advance:
* log(determinant) calculation,
* 1/(2*var) in the exponent,
* NOTE; The density computation is performed in log domain.
*/
static int32
gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor)
{
int32 i, m, f, d, flen;
mfcc_t *meanp;
mfcc_t *varp;
mfcc_t *detp;
int32 floored;
floored = 0;
/* Allocate space for determinants */
g->det = ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det));
for (m = 0; m < g->n_mgau; m++) {
for (f = 0; f < g->n_feat; f++) {
flen = g->featlen[f];
/* Determinants for all variance vectors in g->[m][f] */
for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) {
*detp = 0;
for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d];
i < flen; i++, varp++, meanp++) {
float32 *fvarp = (float32 *)varp;
#ifdef FIXED_POINT
float32 *fmp = (float32 *)meanp;
*meanp = FLOAT2MFCC(*fmp);
#endif
if (*fvarp < varfloor) {
*fvarp = varfloor;
++floored;
}
*detp += (mfcc_t)logmath_log(lmath,
1.0 / sqrt(*fvarp * 2.0 * M_PI));
/* Precompute this part of the exponential */
*varp = (mfcc_t)logmath_ln_to_log(lmath,
(1.0 / (*fvarp * 2.0)));
}
}
}
}
E_INFO("%d variance values floored\n", floored);
return 0;
}
gauden_t *
gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath)
{
int32 i, m, f, d, *flen;
gauden_t *g;
assert(meanfile != NULL);
assert(varfile != NULL);
assert(varfloor > 0.0);
g = (gauden_t *) ckd_calloc(1, sizeof(gauden_t));
g->lmath = logmath_retain(lmath);
g->mean = (mfcc_t ****)gauden_param_read(meanfile, &g->n_mgau, &g->n_feat, &g->n_density,
&g->featlen);
if (g->mean == NULL) {
return NULL;
}
g->var = (mfcc_t ****)gauden_param_read(varfile, &m, &f, &d, &flen);
if (g->var == NULL) {
return NULL;
}
/* Verify mean and variance parameter dimensions */
if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) {
E_ERROR
("Mixture-gaussians dimensions for means and variances differ\n");
ckd_free(flen);
gauden_free(g);
return NULL;
}
for (i = 0; i < g->n_feat; i++) {
if (g->featlen[i] != flen[i]) {
E_ERROR("Feature lengths for means and variances differ\n");
ckd_free(flen);
gauden_free(g);
return NULL;
}
}
ckd_free(flen);
gauden_dist_precompute(g, lmath, varfloor);
return g;
}
void
gauden_free(gauden_t * g)
{
if (g == NULL)
return;
if (g->mean)
gauden_param_free(g->mean);
if (g->var)
gauden_param_free(g->var);
if (g->det)
ckd_free_3d(g->det);
if (g->featlen)
ckd_free(g->featlen);
if (g->lmath)
logmath_free(g->lmath);
ckd_free(g);
}
/* See compute_dist below */
static int32
compute_dist_all(gauden_dist_t * out_dist, mfcc_t* obs, int32 featlen,
mfcc_t ** mean, mfcc_t ** var, mfcc_t * det,
int32 n_density)
{
int32 i, d;
for (d = 0; d < n_density; ++d) {
mfcc_t *m;
mfcc_t *v;
mfcc_t dval;
m = mean[d];
v = var[d];
dval = det[d];
for (i = 0; i < featlen; i++) {
mfcc_t diff;
#ifdef FIXED_POINT
/* Have to check for underflows here. */
mfcc_t pdval = dval;
diff = obs[i] - m[i];
dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]);
if (dval > pdval) {
dval = WORST_SCORE;
break;
}
#else
diff = obs[i] - m[i];
/* The compiler really likes this to be a single
* expression, for whatever reason. */
dval -= diff * diff * v[i];
#endif
}
out_dist[d].dist = dval;
out_dist[d].id = d;
}
return 0;
}
/*
* Compute the top-N closest gaussians from the chosen set (mgau,feat)
* for the given input observation vector.
*/
static int32
compute_dist(gauden_dist_t * out_dist, int32 n_top,
mfcc_t * obs, int32 featlen,
mfcc_t ** mean, mfcc_t ** var, mfcc_t * det,
int32 n_density)
{
int32 i, j, d;
gauden_dist_t *worst;
/* Special case optimization when n_density <= n_top */
if (n_top >= n_density)
return (compute_dist_all
(out_dist, obs, featlen, mean, var, det, n_density));
for (i = 0; i < n_top; i++)
out_dist[i].dist = WORST_DIST;
worst = &(out_dist[n_top - 1]);
for (d = 0; d < n_density; d++) {
mfcc_t *m;
mfcc_t *v;
mfcc_t dval;
m = mean[d];
v = var[d];
dval = det[d];
for (i = 0; (i < featlen) && (dval >= worst->dist); i++) {
mfcc_t diff;
#ifdef FIXED_POINT
/* Have to check for underflows here. */
mfcc_t pdval = dval;
diff = obs[i] - m[i];
dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]);
if (dval > pdval) {
dval = WORST_SCORE;
break;
}
#else
diff = obs[i] - m[i];
/* The compiler really likes this to be a single
* expression, for whatever reason. */
dval -= diff * diff * v[i];
#endif
}
if ((i < featlen) || (dval < worst->dist)) /* Codeword d worse than worst */
continue;
/* Codeword d at least as good as worst so far; insert in the ordered list */
for (i = 0; (i < n_top) && (dval < out_dist[i].dist); i++);
assert(i < n_top);
for (j = n_top - 1; j > i; --j)
out_dist[j] = out_dist[j - 1];
out_dist[i].dist = dval;
out_dist[i].id = d;
}
return 0;
}
/*
* Compute distances of the input observation from the top N codewords in the given
* codebook (g->{mean,var}[mgau]). The input observation, obs, includes vectors for
* all features in the codebook.
*/
int32
gauden_dist(gauden_t * g,
int mgau, int32 n_top, mfcc_t** obs, gauden_dist_t ** out_dist)
{
int32 f;
assert((n_top > 0) && (n_top <= g->n_density));
for (f = 0; f < g->n_feat; f++) {
compute_dist(out_dist[f], n_top,
obs[f], g->featlen[f],
g->mean[mgau][f], g->var[mgau][f], g->det[mgau][f],
g->n_density);
E_DEBUG("Top CW(%d,%d) = %d %d\n", mgau, f, out_dist[f][0].id,
(int)out_dist[f][0].dist >> SENSCR_SHIFT);
}
return 0;
}
int32
gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, ps_config_t *config)
{
int32 i, m, f, d, *flen;
/* Free data if already here */
if (g->mean)
gauden_param_free(g->mean);
if (g->var)
gauden_param_free(g->var);
if (g->det)
ckd_free_3d(g->det);
if (g->featlen)
ckd_free(g->featlen);
g->det = NULL;
g->featlen = NULL;
/* Reload means and variances (un-precomputed). */
g->mean = (mfcc_t ****)gauden_param_read(ps_config_str(config, "mean"), &g->n_mgau, &g->n_feat, &g->n_density,
&g->featlen);
g->var = (mfcc_t ****)gauden_param_read(ps_config_str(config, "var"), &m, &f, &d, &flen);
/* Verify mean and variance parameter dimensions */
if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density))
E_FATAL
("Mixture-gaussians dimensions for means and variances differ\n");
for (i = 0; i < g->n_feat; i++)
if (g->featlen[i] != flen[i])
E_FATAL("Feature lengths for means and variances differ\n");
ckd_free(flen);
/* Transform codebook for each stream s */
for (i = 0; i < g->n_mgau; ++i) {
for (f = 0; f < g->n_feat; ++f) {
float64 *temp;
temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64));
/* Transform each density d in selected codebook */
for (d = 0; d < g->n_density; d++) {
int l;
for (l = 0; l < g->featlen[f]; l++) {
temp[l] = 0.0;
for (m = 0; m < g->featlen[f]; m++) {
/* FIXME: For now, only one class, hence the zeros below. */
temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m];
}
temp[l] += mllr->b[f][0][l];
}
for (l = 0; l < g->featlen[f]; l++) {
g->mean[i][f][d][l] = (float32) temp[l];
g->var[i][f][d][l] *= mllr->h[f][0][l];
}
}
ckd_free(temp);
}
}
/* Re-precompute (if we aren't adapting variances this isn't
* actually necessary...) */
gauden_dist_precompute(g, g->lmath, ps_config_float(config, "varfloor"));
return 0;
}