| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import json |
| | import math |
| |
|
| | import numpy as np |
| |
|
| |
|
| | def _load_json_cmvn(json_cmvn_file): |
| | """Load the json format cmvn stats file and calculate cmvn |
| | |
| | Args: |
| | json_cmvn_file: cmvn stats file in json format |
| | |
| | Returns: |
| | a numpy array of [means, vars] |
| | """ |
| | with open(json_cmvn_file) as f: |
| | cmvn_stats = json.load(f) |
| |
|
| | means = cmvn_stats["mean_stat"] |
| | variance = cmvn_stats["var_stat"] |
| | count = cmvn_stats["frame_num"] |
| | for i in range(len(means)): |
| | means[i] /= count |
| | variance[i] = variance[i] / count - means[i] * means[i] |
| | if variance[i] < 1.0e-20: |
| | variance[i] = 1.0e-20 |
| | variance[i] = 1.0 / math.sqrt(variance[i]) |
| | cmvn = np.array([means, variance]) |
| | return cmvn |
| |
|
| |
|
| | def _load_kaldi_cmvn(kaldi_cmvn_file): |
| | """Load the kaldi format cmvn stats file and calculate cmvn |
| | |
| | Args: |
| | kaldi_cmvn_file: kaldi text style global cmvn file, which |
| | is generated by: |
| | compute-cmvn-stats --binary=false scp:feats.scp global_cmvn |
| | |
| | Returns: |
| | a numpy array of [means, vars] |
| | """ |
| | means = [] |
| | variance = [] |
| | with open(kaldi_cmvn_file, "r") as fid: |
| | |
| | if fid.read(2) == "\0B": |
| | logging.error( |
| | "kaldi cmvn binary file is not supported, please " |
| | "recompute it by: compute-cmvn-stats --binary=false " |
| | " scp:feats.scp global_cmvn" |
| | ) |
| | sys.exit(1) |
| | fid.seek(0) |
| | arr = fid.read().split() |
| | assert arr[0] == "[" |
| | assert arr[-2] == "0" |
| | assert arr[-1] == "]" |
| | feat_dim = int((len(arr) - 2 - 2) / 2) |
| | for i in range(1, feat_dim + 1): |
| | means.append(float(arr[i])) |
| | count = float(arr[feat_dim + 1]) |
| | for i in range(feat_dim + 2, 2 * feat_dim + 2): |
| | variance.append(float(arr[i])) |
| |
|
| | for i in range(len(means)): |
| | means[i] /= count |
| | variance[i] = variance[i] / count - means[i] * means[i] |
| | if variance[i] < 1.0e-20: |
| | variance[i] = 1.0e-20 |
| | variance[i] = 1.0 / math.sqrt(variance[i]) |
| | cmvn = np.array([means, variance]) |
| | return cmvn |
| |
|
| |
|
| | def load_cmvn(cmvn_file, is_json): |
| | if is_json: |
| | cmvn = _load_json_cmvn(cmvn_file) |
| | else: |
| | cmvn = _load_kaldi_cmvn(cmvn_file) |
| | return cmvn[0], cmvn[1] |
| |
|