Upload tokenizer

cfa1a7d verified 4 months ago

12.5 kB

	{
	"added_tokens_decoder": {
	"0": {
	"content": "t'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"2": {
	"content": "s'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"3": {
	"content": "jy",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"4": {
	"content": "n'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"13": {
	"content": "sj",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"18": {
	"content": "ḗ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"21": {
	"content": "ā́",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"23": {
	"content": "ndz",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"25": {
	"content": "ī̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"26": {
	"content": "ä́",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"29": {
	"content": "zh",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"31": {
	"content": "ū́",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"33": {
	"content": "chj",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"34": {
	"content": "dy",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"35": {
	"content": "tsj",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"38": {
	"content": "ū̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"41": {
	"content": "ō̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"42": {
	"content": "ndzh",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"44": {
	"content": "jn",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"45": {
	"content": "ä̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"47": {
	"content": "ǚ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"48": {
	"content": "ë̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"49": {
	"content": "tj",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"52": {
	"content": "ë̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"54": {
	"content": "ḯ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"55": {
	"content": "ä̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"56": {
	"content": "ö̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"57": {
	"content": "ē",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"58": {
	"content": "ā̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"61": {
	"content": "pj",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"62": {
	"content": "ö̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"64": {
	"content": "ǜ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"65": {
	"content": "ī̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"66": {
	"content": "ō̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"67": {
	"content": "ö̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"69": {
	"content": "jw",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"70": {
	"content": "ī",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"73": {
	"content": "ū̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"77": {
	"content": "ā̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"81": {
	"content": "ī̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"82": {
	"content": "kj",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"83": {
	"content": "ë́",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"86": {
	"content": "j'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"87": {
	"content": "ï̌",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"89": {
	"content": "ö́",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"92": {
	"content": "ō",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"93": {
	"content": "ū̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"95": {
	"content": "jñ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"96": {
	"content": "ä̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"98": {
	"content": "ë̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"99": {
	"content": "ü̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"102": {
	"content": "ā̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"115": {
	"content": "ū",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"116": {
	"content": "ṓ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"117": {
	"content": "ǘ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"123": {
	"content": "ï̂",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"126": {
	"content": "ṑ",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"127": {
	"content": "jm",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"129": {
	"content": "m'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"130": {
	"content": "ts'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"133": {
	"content": "'w",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"134": {
	"content": "ñ'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"135": {
	"content": "k'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"136": {
	"content": "ch'",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"141": {
	"content": "ā",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"145": {
	"content": "ch",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"153": {
	"content": "ï̀",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"156": {
	"content": "ī́",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"160": {
	"content": "...",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"161": {
	"content": "[UNK]",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"162": {
	"content": "[PAD]",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"163": {
	"content": "<s>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"164": {
	"content": "</s>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	}
	},
	"bos_token": "<s>",
	"clean_up_tokenization_spaces": false,
	"do_lower_case": false,
	"eos_token": "</s>",
	"extra_special_tokens": {},
	"model_max_length": 1000000000000000019884624838656,
	"pad_token": "[PAD]",
	"replace_word_delimiter_char": " ",
	"target_lang": "mmc",
	"tokenizer_class": "Wav2Vec2CTCTokenizer",
	"unk_token": "[UNK]",
	"word_delimiter_token": "\|"
	}