ctaguchi's picture
Upload tokenizer
cfa1a7d verified
{
"added_tokens_decoder": {
"0": {
"content": "t'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"2": {
"content": "s'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"3": {
"content": "jy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"4": {
"content": "n'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"13": {
"content": "sj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"18": {
"content": "ḗ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"21": {
"content": "ā́",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"23": {
"content": "ndz",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"25": {
"content": "ī̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"26": {
"content": "ä́",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"29": {
"content": "zh",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"31": {
"content": "ū́",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"33": {
"content": "chj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"34": {
"content": "dy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"35": {
"content": "tsj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"38": {
"content": "ū̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"41": {
"content": "ō̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"42": {
"content": "ndzh",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"44": {
"content": "jn",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"45": {
"content": "ä̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"47": {
"content": "ǚ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"48": {
"content": "ë̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"49": {
"content": "tj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"52": {
"content": "ë̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"54": {
"content": "ḯ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"55": {
"content": "ä̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"56": {
"content": "ö̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"57": {
"content": "ē",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"58": {
"content": "ā̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"61": {
"content": "pj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"62": {
"content": "ö̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"64": {
"content": "ǜ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"65": {
"content": "ī̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"66": {
"content": "ō̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"67": {
"content": "ö̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"69": {
"content": "jw",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"70": {
"content": "ī",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"73": {
"content": "ū̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"77": {
"content": "ā̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"81": {
"content": "ī̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"82": {
"content": "kj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"83": {
"content": "ë́",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"86": {
"content": "j'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"87": {
"content": "ï̌",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"89": {
"content": "ö́",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"92": {
"content": "ō",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"93": {
"content": "ū̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"95": {
"content": "jñ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"96": {
"content": "ä̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"98": {
"content": "ë̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"99": {
"content": "ü̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"102": {
"content": "ā̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"115": {
"content": "ū",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"116": {
"content": "ṓ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"117": {
"content": "ǘ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"123": {
"content": "ï̂",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"126": {
"content": "ṑ",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"127": {
"content": "jm",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"129": {
"content": "m'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"130": {
"content": "ts'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"133": {
"content": "'w",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"134": {
"content": "ñ'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"135": {
"content": "k'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"136": {
"content": "ch'",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"141": {
"content": "ā",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"145": {
"content": "ch",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"153": {
"content": "ï̀",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"156": {
"content": "ī́",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"160": {
"content": "...",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"161": {
"content": "[UNK]",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"162": {
"content": "[PAD]",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"163": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"164": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"do_lower_case": false,
"eos_token": "</s>",
"extra_special_tokens": {},
"model_max_length": 1000000000000000019884624838656,
"pad_token": "[PAD]",
"replace_word_delimiter_char": " ",
"target_lang": "mmc",
"tokenizer_class": "Wav2Vec2CTCTokenizer",
"unk_token": "[UNK]",
"word_delimiter_token": "|"
}