english_mfa / acoustic /meta.json
mmcauliffe's picture
Upload folder using huggingface_hub
d379d5e verified
{
"phones": [
"a",
"aj",
"aw",
"aː",
"b",
"bʲ",
"c",
"cʰ",
"cʷ",
"d",
"dʒ",
"dʲ",
"d̪",
"e",
"ej",
"eː",
"f",
"fʲ",
"fʷ",
"h",
"i",
"iː",
"j",
"k",
"kp",
"kʰ",
"kʷ",
"l",
"m",
"mʲ",
"m̩",
"n",
"n̩",
"o",
"ow",
"oː",
"p",
"pʰ",
"pʲ",
"pʷ",
"s",
"t",
"tʃ",
"tʰ",
"tʲ",
"tʷ",
"t̪",
"u",
"uː",
"v",
"vʲ",
"vʷ",
"w",
"z",
"æ",
"ç",
"ð",
"ŋ",
"ɐ",
"ɑ",
"ɑː",
"ɒ",
"ɒː",
"ɔ",
"ɔj",
"ɖ",
"ə",
"əw",
"ɚ",
"ɛ",
"ɛː",
"ɜ",
"ɜː",
"ɝ",
"ɟ",
"ɟʷ",
"ɡ",
"ɡb",
"ɡʷ",
"ɪ",
"ɫ",
"ɫ̩",
"ɱ",
"ɲ",
"ɹ",
"ɾ",
"ɾʲ",
"ɾ̃",
"ʃ",
"ʈ",
"ʈʲ",
"ʈʷ",
"ʉ",
"ʉː",
"ʊ",
"ʋ",
"ʎ",
"ʒ",
"ʔ",
"θ"
],
"phone_mapping": {
"<eps>": 0,
"sil": 1,
"spn": 2,
"a": 3,
"aj": 4,
"aw": 5,
"aː": 6,
"b": 7,
"bʲ": 8,
"c": 9,
"cʰ": 10,
"cʷ": 11,
"d": 12,
"dʒ": 13,
"dʲ": 14,
"d̪": 15,
"e": 16,
"ej": 17,
"eː": 18,
"f": 19,
"fʲ": 20,
"fʷ": 21,
"h": 22,
"i": 23,
"iː": 24,
"j": 25,
"k": 26,
"kp": 27,
"kʰ": 28,
"kʷ": 29,
"l": 30,
"m": 31,
"mʲ": 32,
"m̩": 33,
"n": 34,
"n̩": 35,
"o": 36,
"ow": 37,
"oː": 38,
"p": 39,
"pʰ": 40,
"pʲ": 41,
"pʷ": 42,
"s": 43,
"t": 44,
"tʃ": 45,
"tʰ": 46,
"tʲ": 47,
"tʷ": 48,
"t̪": 49,
"u": 50,
"uː": 51,
"v": 52,
"vʲ": 53,
"vʷ": 54,
"w": 55,
"z": 56,
"æ": 57,
"ç": 58,
"ð": 59,
"ŋ": 60,
"ɐ": 61,
"ɑ": 62,
"ɑː": 63,
"ɒ": 64,
"ɒː": 65,
"ɔ": 66,
"ɔj": 67,
"ɖ": 68,
"ə": 69,
"əw": 70,
"ɚ": 71,
"ɛ": 72,
"ɛː": 73,
"ɜ": 74,
"ɜː": 75,
"ɝ": 76,
"ɟ": 77,
"ɟʷ": 78,
"ɡ": 79,
"ɡb": 80,
"ɡʷ": 81,
"ɪ": 82,
"ɫ": 83,
"ɫ̩": 84,
"ɱ": 85,
"ɲ": 86,
"ɹ": 87,
"ɾ": 88,
"ɾʲ": 89,
"ɾ̃": 90,
"ʃ": 91,
"ʈ": 92,
"ʈʲ": 93,
"ʈʷ": 94,
"ʉ": 95,
"ʉː": 96,
"ʊ": 97,
"ʋ": 98,
"ʎ": 99,
"ʒ": 100,
"ʔ": 101,
"θ": 102
},
"phone_groups": {
"0": [
"kp",
"p",
"pʰ",
"pʲ",
"pʷ"
],
"1": [
"b",
"bʲ",
"ɡb"
],
"2": [
"f",
"fʲ",
"fʷ"
],
"3": [
"v",
"vʲ",
"vʷ"
],
"4": [
"θ"
],
"5": [
"t̪"
],
"6": [
"ð"
],
"7": [
"d̪"
],
"8": [
"t",
"tʰ",
"tʲ",
"tʷ",
"ʈ",
"ʈʲ",
"ʈʷ"
],
"9": [
"ʔ"
],
"10": [
"d",
"dʲ",
"ɖ"
],
"11": [
"ɾ",
"ɾʲ"
],
"12": [
"tʃ"
],
"13": [
"dʒ"
],
"14": [
"ʃ"
],
"15": [
"ʒ"
],
"16": [
"s"
],
"17": [
"z"
],
"18": [
"ɹ"
],
"19": [
"m",
"m̩"
],
"20": [
"mʲ"
],
"21": [
"ɱ"
],
"22": [
"n",
"n̩"
],
"23": [
"ɲ"
],
"24": [
"ɾ̃"
],
"25": [
"ŋ"
],
"26": [
"l"
],
"27": [
"ɫ",
"ɫ̩"
],
"28": [
"ʎ"
],
"29": [
"ɟ",
"ɟʷ",
"ɡ",
"ɡʷ"
],
"30": [
"c",
"cʰ",
"cʷ"
],
"31": [
"k",
"kʰ",
"kʷ"
],
"32": [
"ç"
],
"33": [
"h"
],
"34": [
"ɐ",
"ə"
],
"35": [
"ɜ",
"ɜː"
],
"36": [
"ɚ",
"ɝ"
],
"37": [
"ʊ"
],
"38": [
"ɪ"
],
"39": [
"ɑ",
"ɑː"
],
"40": [
"ɒ",
"ɒː",
"ɔ"
],
"41": [
"a",
"aː"
],
"42": [
"æ"
],
"43": [
"aj"
],
"44": [
"aw"
],
"45": [
"i",
"iː"
],
"46": [
"j"
],
"47": [
"ɛ",
"ɛː"
],
"48": [
"e",
"ej",
"eː"
],
"49": [
"ʉ",
"ʉː"
],
"50": [
"u",
"uː"
],
"51": [
"w"
],
"52": [
"ʋ"
],
"53": [
"ɔj"
],
"54": [
"o",
"ow",
"oː",
"əw"
]
},
"version": "3.1.0",
"architecture": "gmm-hmm",
"train_date": "2024-06-12 12:16:18.584033",
"training": {
"audio_duration": 12862940.052134357,
"num_speakers": 75018,
"num_utterances": 2374755,
"num_oovs": 0,
"average_log_likelihood": -0.08382050453507844
},
"dictionaries": {
"names": [
"default",
"english_india_mfa",
"english_nigeria_mfa",
"english_uk_mfa",
"english_us_mfa",
"nonnative"
],
"default": "default",
"silence_word": "<eps>",
"use_g2p": false,
"oov_word": "<unk>",
"bracketed_word": "[bracketed]",
"laughter_word": "[laughter]",
"clitic_marker": "'",
"position_dependent_phones": false
},
"language": "unknown",
"features": {
"type": "mfcc",
"use_energy": true,
"frame_shift": 10,
"frame_length": 25,
"snip_edges": false,
"low_frequency": 20,
"high_frequency": 7800,
"sample_frequency": 16000,
"dither": 0.0001,
"energy_floor": 1.0,
"num_coefficients": 13,
"num_mel_bins": 23,
"cepstral_lifter": 22,
"preemphasis_coefficient": 0.97,
"uses_cmvn": true,
"uses_deltas": true,
"uses_voiced": false,
"uses_splices": false,
"uses_speaker_adaptation": true,
"use_pitch": false,
"use_voicing": false,
"min_f0": 50,
"max_f0": 800,
"delta_pitch": 0.005,
"penalty_factor": 0.1,
"silence_weight": 0.0,
"splice_left_context": 3,
"splice_right_context": 3
},
"oov_phone": "spn",
"optional_silence_phone": "sil",
"phone_set_type": "UNKNOWN",
"silence_probability": 0.17,
"initial_silence_probability": 0.17,
"final_silence_correction": 0.99,
"final_non_silence_correction": 0.2966666666666667
}