speecht5-korean / tokenizer.json
ahnhs2k's picture
Upload SpeechT5 Korean TTS artifacts
49fa254 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<bos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"<pad>": 0,
"<unk>": 1,
"<bos>": 2,
"<eos>": 3,
"ᄀ": 4,
"ᄁ": 5,
"ᄂ": 6,
"ᄃ": 7,
"ᄄ": 8,
"ᄅ": 9,
"ᄆ": 10,
"ᄇ": 11,
"ᄈ": 12,
"ᄉ": 13,
"ᄊ": 14,
"ᄋ": 15,
"ᄌ": 16,
"ᄍ": 17,
"ᄎ": 18,
"ᄏ": 19,
"ᄐ": 20,
"ᄑ": 21,
"ᄒ": 22,
"ᅡ": 23,
"ᅢ": 24,
"ᅣ": 25,
"ᅤ": 26,
"ᅥ": 27,
"ᅦ": 28,
"ᅧ": 29,
"ᅨ": 30,
"ᅩ": 31,
"ᅪ": 32,
"ᅫ": 33,
"ᅬ": 34,
"ᅭ": 35,
"ᅮ": 36,
"ᅯ": 37,
"ᅰ": 38,
"ᅱ": 39,
"ᅲ": 40,
"ᅳ": 41,
"ᅴ": 42,
"ᅵ": 43,
"ᆨ": 44,
"ᆩ": 45,
"ᆪ": 46,
"ᆫ": 47,
"ᆬ": 48,
"ᆭ": 49,
"ᆮ": 50,
"ᆯ": 51,
"ᆰ": 52,
"ᆱ": 53,
"ᆲ": 54,
"ᆳ": 55,
"ᆴ": 56,
"ᆵ": 57,
"ᆶ": 58,
"ᆷ": 59,
"ᆸ": 60,
"ᆹ": 61,
"ᆺ": 62,
"ᆻ": 63,
"ᆼ": 64,
"ᆽ": 65,
"ᆾ": 66,
"ᆿ": 67,
"ᇀ": 68,
"ᇁ": 69,
"ᇂ": 70
},
"unk_token": "<unk>"
}
}