Upload tokenizer
ba3216f
verified
|
|
{ |
|
|
"version": "1.0", |
|
|
"truncation": null, |
|
|
"padding": null, |
|
|
"added_tokens": [ |
|
|
{ |
|
|
"id": 0, |
|
|
"content": "[PAD]", |
|
|
"single_word": false, |
|
|
"lstrip": false, |
|
|
"rstrip": false, |
|
|
"normalized": false, |
|
|
"special": true |
|
|
} |
|
|
], |
|
|
"normalizer": null, |
|
|
"pre_tokenizer": { |
|
|
"type": "Split", |
|
|
"pattern": { |
|
|
"Regex": "." |
|
|
}, |
|
|
"behavior": "Isolated", |
|
|
"invert": false |
|
|
}, |
|
|
"post_processor": null, |
|
|
"decoder": null, |
|
|
"model": { |
|
|
"type": "WordLevel", |
|
|
"vocab": { |
|
|
"[PAD]": 0, |
|
|
"'": 1, |
|
|
"ɕ": 2, |
|
|
"ɘ": 3, |
|
|
"ɝ": 4, |
|
|
"ɸ": 5, |
|
|
"ɹ": 6, |
|
|
"ɾ": 7, |
|
|
"ʁ": 8, |
|
|
"ʎ": 9, |
|
|
"ʏ": 10, |
|
|
"ʔ": 11, |
|
|
"A": 12, |
|
|
"B": 13, |
|
|
"C": 14, |
|
|
"D": 15, |
|
|
"E": 16, |
|
|
"F": 17, |
|
|
"G": 18, |
|
|
"H": 19, |
|
|
"I": 20, |
|
|
"J": 21, |
|
|
"K": 22, |
|
|
"L": 23, |
|
|
"M": 24, |
|
|
"N": 25, |
|
|
"O": 26, |
|
|
"P": 27, |
|
|
"Q": 28, |
|
|
"R": 29, |
|
|
"S": 30, |
|
|
"T": 31, |
|
|
"U": 32, |
|
|
"V": 33, |
|
|
"W": 34, |
|
|
"X": 35, |
|
|
"Y": 36, |
|
|
"Z": 37, |
|
|
"a": 38, |
|
|
"b": 39, |
|
|
"c": 40, |
|
|
"d": 41, |
|
|
"e": 42, |
|
|
"f": 43, |
|
|
"g": 44, |
|
|
"h": 45, |
|
|
"i": 46, |
|
|
"j": 47, |
|
|
"k": 48, |
|
|
"l": 49, |
|
|
"m": 50, |
|
|
"n": 51, |
|
|
"o": 52, |
|
|
"p": 53, |
|
|
"q": 54, |
|
|
"r": 55, |
|
|
"s": 56, |
|
|
"t": 57, |
|
|
"u": 58, |
|
|
"v": 59, |
|
|
"w": 60, |
|
|
"x": 61, |
|
|
"y": 62, |
|
|
"z": 63, |
|
|
"«": 64, |
|
|
"»": 65, |
|
|
"ɐ": 66, |
|
|
"ɑ": 67, |
|
|
"ɒ": 68, |
|
|
"ɔ": 69, |
|
|
"ə": 70, |
|
|
"ɛ": 71, |
|
|
"ɜ": 72, |
|
|
"ɡ": 73, |
|
|
"ɦ": 74, |
|
|
"ɪ": 75, |
|
|
"ɱ": 76, |
|
|
"ɲ": 77, |
|
|
"ʀ": 78, |
|
|
"ʃ": 79, |
|
|
"ʊ": 80, |
|
|
"ʋ": 81, |
|
|
"ʌ": 82, |
|
|
"ʒ": 83, |
|
|
"ʰ": 84, |
|
|
"ʲ": 85, |
|
|
"ʼ": 86, |
|
|
"ˀ": 87, |
|
|
"˂": 88, |
|
|
"˃": 89, |
|
|
"ː": 90, |
|
|
"ˑ": 91, |
|
|
"˵": 92, |
|
|
"˶": 93, |
|
|
"˹": 94, |
|
|
"˺": 95, |
|
|
"̃": 96, |
|
|
"̆": 97, |
|
|
"̊": 98, |
|
|
"̍": 99, |
|
|
"̝": 100, |
|
|
"̞": 101, |
|
|
"̥": 102, |
|
|
"̩": 103, |
|
|
"̪": 104, |
|
|
"̯": 105, |
|
|
"͜": 106, |
|
|
"͡": 107, |
|
|
"‴": 108, |
|
|
"‷": 109, |
|
|
"‿": 110, |
|
|
"⁽": 111, |
|
|
"⁾": 112, |
|
|
"\t": 113, |
|
|
"\n": 114, |
|
|
"\u000b": 115, |
|
|
"\f": 116, |
|
|
"\r": 117, |
|
|
"\u000e": 118, |
|
|
"\u000f": 119, |
|
|
"\u0010": 120, |
|
|
"\u0011": 121, |
|
|
"\u0012": 122, |
|
|
"\u0013": 123, |
|
|
"\u0014": 124, |
|
|
"\u0015": 125, |
|
|
"\u0016": 126, |
|
|
"\u0017": 127, |
|
|
"\u0018": 128, |
|
|
"\u0019": 129, |
|
|
"\u001a": 130, |
|
|
"\u001b": 131, |
|
|
"\u001c": 132, |
|
|
"\u001d": 133, |
|
|
"\u001e": 134, |
|
|
"\u001f": 135, |
|
|
" ": 136, |
|
|
"!": 137, |
|
|
"\"": 138, |
|
|
"#": 139, |
|
|
"$": 140, |
|
|
"%": 141, |
|
|
"&": 142, |
|
|
"(": 143, |
|
|
")": 144, |
|
|
"*": 145, |
|
|
"+": 146, |
|
|
",": 147, |
|
|
"-": 148, |
|
|
".": 149, |
|
|
"/": 150, |
|
|
"0": 151, |
|
|
"1": 152, |
|
|
"2": 153, |
|
|
"3": 154, |
|
|
"4": 155, |
|
|
"5": 156, |
|
|
"6": 157, |
|
|
"7": 158, |
|
|
"8": 159, |
|
|
"9": 160, |
|
|
":": 161, |
|
|
";": 162, |
|
|
"<": 163, |
|
|
"=": 164, |
|
|
">": 165, |
|
|
"?": 166, |
|
|
"@": 167, |
|
|
"[": 168, |
|
|
"\\": 169, |
|
|
"]": 170, |
|
|
"^": 171, |
|
|
"_": 172, |
|
|
"`": 173, |
|
|
"{": 174, |
|
|
"|": 175, |
|
|
"}": 176, |
|
|
"~": 177, |
|
|
"": 178, |
|
|
"À": 179, |
|
|
"Á": 180, |
|
|
"Ã": 181, |
|
|
"Ä": 182, |
|
|
"Æ": 183, |
|
|
"Ç": 184, |
|
|
"È": 185, |
|
|
"É": 186, |
|
|
"Ê": 187, |
|
|
"Ë": 188, |
|
|
"Í": 189, |
|
|
"Ó": 190, |
|
|
"Õ": 191, |
|
|
"Ö": 192, |
|
|
"Ø": 193, |
|
|
"Ü": 194, |
|
|
"ß": 195, |
|
|
"à": 196, |
|
|
"á": 197, |
|
|
"ã": 198, |
|
|
"ä": 199, |
|
|
"æ": 200, |
|
|
"ç": 201, |
|
|
"è": 202, |
|
|
"é": 203, |
|
|
"ê": 204, |
|
|
"ë": 205, |
|
|
"í": 206, |
|
|
"ó": 207, |
|
|
"õ": 208, |
|
|
"ö": 209, |
|
|
"ø": 210, |
|
|
"ü": 211, |
|
|
"Ġ": 212, |
|
|
"ġ": 213, |
|
|
"Ŋ": 214, |
|
|
"ŋ": 215, |
|
|
"Œ": 216, |
|
|
"œ": 217, |
|
|
"Ɔ": 218, |
|
|
"Ə": 219, |
|
|
"Ɛ": 220, |
|
|
"Ɲ": 221, |
|
|
"Ʀ": 222, |
|
|
"Ʃ": 223, |
|
|
"Ʊ": 224, |
|
|
"Ʋ": 225, |
|
|
"Ʒ": 226, |
|
|
"Ʌ": 227, |
|
|
"ɓ": 228, |
|
|
"ɖ": 229, |
|
|
"ɗ": 230, |
|
|
"ɚ": 231, |
|
|
"ɞ": 232, |
|
|
"ɟ": 233, |
|
|
"ɠ": 234, |
|
|
"ɢ": 235, |
|
|
"ɣ": 236, |
|
|
"ɤ": 237, |
|
|
"ɥ": 238, |
|
|
"ɧ": 239, |
|
|
"ɨ": 240, |
|
|
"ɩ": 241, |
|
|
"ɫ": 242, |
|
|
"ɬ": 243, |
|
|
"ɭ": 244, |
|
|
"ɮ": 245, |
|
|
"ɯ": 246, |
|
|
"ɰ": 247, |
|
|
"ɳ": 248, |
|
|
"ɴ": 249, |
|
|
"ɵ": 250, |
|
|
"ɶ": 251, |
|
|
"ɷ": 252, |
|
|
"ɺ": 253, |
|
|
"ɻ": 254, |
|
|
"ɼ": 255, |
|
|
"ɽ": 256, |
|
|
"ɿ": 257, |
|
|
"ʂ": 258, |
|
|
"ʄ": 259, |
|
|
"ʅ": 260, |
|
|
"ʆ": 261, |
|
|
"ʇ": 262, |
|
|
"ʈ": 263, |
|
|
"ʉ": 264, |
|
|
"ʍ": 265, |
|
|
"ʐ": 266, |
|
|
"ʑ": 267, |
|
|
"ʓ": 268, |
|
|
"ʕ": 269, |
|
|
"ʖ": 270, |
|
|
"ʗ": 271, |
|
|
"ʘ": 272, |
|
|
"ʙ": 273, |
|
|
"ʚ": 274, |
|
|
"ʛ": 275, |
|
|
"ʜ": 276, |
|
|
"ʝ": 277, |
|
|
"ʞ": 278, |
|
|
"ʟ": 279, |
|
|
"ʠ": 280, |
|
|
"ʡ": 281, |
|
|
"ʢ": 282, |
|
|
"ʣ": 283, |
|
|
"ʤ": 284, |
|
|
"ʥ": 285, |
|
|
"ʦ": 286, |
|
|
"ʧ": 287, |
|
|
"ʨ": 288, |
|
|
"ʩ": 289, |
|
|
"ʪ": 290, |
|
|
"ʫ": 291, |
|
|
"ʬ": 292, |
|
|
"ʭ": 293, |
|
|
"ʮ": 294, |
|
|
"ʯ": 295, |
|
|
"Θ": 296, |
|
|
"Χ": 297, |
|
|
"θ": 298, |
|
|
"χ": 299, |
|
|
"Ɑ": 300, |
|
|
"Ɱ": 301, |
|
|
"Ɐ": 302, |
|
|
"Ɒ": 303, |
|
|
"Ɦ": 304, |
|
|
"Ɜ": 305, |
|
|
"Ɡ": 306, |
|
|
"Ɪ": 307 |
|
|
}, |
|
|
"unk_token": "[UNK]" |
|
|
} |
|
|
} |