Neos-0-Preview / tokenizer.json
bgg1996's picture
Upload folder using huggingface_hub
5aef489 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 256,
"content": "<|pad|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 257,
"content": "<|bos|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 258,
"content": "<|eos|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 259,
"content": "<|unk|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 260,
"content": "<|mask|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 261,
"content": "<|start_of_message|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 262,
"content": "<|end_of_message|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 263,
"content": "<|start_of_thinking|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 264,
"content": "<|end_of_thinking|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 265,
"content": "<|reserved_265|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 266,
"content": "<|reserved_266|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 267,
"content": "<|reserved_267|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 268,
"content": "<|reserved_268|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 269,
"content": "<|reserved_269|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 270,
"content": "<|reserved_270|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 271,
"content": "<|reserved_271|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 272,
"content": "<|reserved_272|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 273,
"content": "<|reserved_273|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 274,
"content": "<|reserved_274|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 275,
"content": "<|reserved_275|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 276,
"content": "<|reserved_276|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 277,
"content": "<|reserved_277|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 278,
"content": "<|reserved_278|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 279,
"content": "<|reserved_279|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 280,
"content": "<|reserved_280|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 281,
"content": "<|reserved_281|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 282,
"content": "<|reserved_282|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 283,
"content": "<|reserved_283|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 284,
"content": "<|reserved_284|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 285,
"content": "<|reserved_285|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 286,
"content": "<|reserved_286|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 287,
"content": "<|reserved_287|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": null,
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<|unk|>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"Ā": 0,
"ā": 1,
"Ă": 2,
"ă": 3,
"Ą": 4,
"ą": 5,
"Ć": 6,
"ć": 7,
"Ĉ": 8,
"ĉ": 9,
"Ċ": 10,
"ċ": 11,
"Č": 12,
"č": 13,
"Ď": 14,
"ď": 15,
"Đ": 16,
"đ": 17,
"Ē": 18,
"ē": 19,
"Ĕ": 20,
"ĕ": 21,
"Ė": 22,
"ė": 23,
"Ę": 24,
"ę": 25,
"Ě": 26,
"ě": 27,
"Ĝ": 28,
"ĝ": 29,
"Ğ": 30,
"ğ": 31,
"Ġ": 32,
"!": 33,
"\"": 34,
"#": 35,
"$": 36,
"%": 37,
"&": 38,
"'": 39,
"(": 40,
")": 41,
"*": 42,
"+": 43,
",": 44,
"-": 45,
".": 46,
"/": 47,
"0": 48,
"1": 49,
"2": 50,
"3": 51,
"4": 52,
"5": 53,
"6": 54,
"7": 55,
"8": 56,
"9": 57,
":": 58,
";": 59,
"<": 60,
"=": 61,
">": 62,
"?": 63,
"@": 64,
"A": 65,
"B": 66,
"C": 67,
"D": 68,
"E": 69,
"F": 70,
"G": 71,
"H": 72,
"I": 73,
"J": 74,
"K": 75,
"L": 76,
"M": 77,
"N": 78,
"O": 79,
"P": 80,
"Q": 81,
"R": 82,
"S": 83,
"T": 84,
"U": 85,
"V": 86,
"W": 87,
"X": 88,
"Y": 89,
"Z": 90,
"[": 91,
"\\": 92,
"]": 93,
"^": 94,
"_": 95,
"`": 96,
"a": 97,
"b": 98,
"c": 99,
"d": 100,
"e": 101,
"f": 102,
"g": 103,
"h": 104,
"i": 105,
"j": 106,
"k": 107,
"l": 108,
"m": 109,
"n": 110,
"o": 111,
"p": 112,
"q": 113,
"r": 114,
"s": 115,
"t": 116,
"u": 117,
"v": 118,
"w": 119,
"x": 120,
"y": 121,
"z": 122,
"{": 123,
"|": 124,
"}": 125,
"~": 126,
"ġ": 127,
"Ģ": 128,
"ģ": 129,
"Ĥ": 130,
"ĥ": 131,
"Ħ": 132,
"ħ": 133,
"Ĩ": 134,
"ĩ": 135,
"Ī": 136,
"ī": 137,
"Ĭ": 138,
"ĭ": 139,
"Į": 140,
"į": 141,
"İ": 142,
"ı": 143,
"IJ": 144,
"ij": 145,
"Ĵ": 146,
"ĵ": 147,
"Ķ": 148,
"ķ": 149,
"ĸ": 150,
"Ĺ": 151,
"ĺ": 152,
"Ļ": 153,
"ļ": 154,
"Ľ": 155,
"ľ": 156,
"Ŀ": 157,
"ŀ": 158,
"Ł": 159,
"ł": 160,
"¡": 161,
"¢": 162,
"£": 163,
"¤": 164,
"¥": 165,
"¦": 166,
"§": 167,
"¨": 168,
"©": 169,
"ª": 170,
"«": 171,
"¬": 172,
"Ń": 173,
"®": 174,
"¯": 175,
"°": 176,
"±": 177,
"²": 178,
"³": 179,
"´": 180,
"µ": 181,
"¶": 182,
"·": 183,
"¸": 184,
"¹": 185,
"º": 186,
"»": 187,
"¼": 188,
"½": 189,
"¾": 190,
"¿": 191,
"À": 192,
"Á": 193,
"Â": 194,
"Ã": 195,
"Ä": 196,
"Å": 197,
"Æ": 198,
"Ç": 199,
"È": 200,
"É": 201,
"Ê": 202,
"Ë": 203,
"Ì": 204,
"Í": 205,
"Î": 206,
"Ï": 207,
"Ð": 208,
"Ñ": 209,
"Ò": 210,
"Ó": 211,
"Ô": 212,
"Õ": 213,
"Ö": 214,
"×": 215,
"Ø": 216,
"Ù": 217,
"Ú": 218,
"Û": 219,
"Ü": 220,
"Ý": 221,
"Þ": 222,
"ß": 223,
"à": 224,
"á": 225,
"â": 226,
"ã": 227,
"ä": 228,
"å": 229,
"æ": 230,
"ç": 231,
"è": 232,
"é": 233,
"ê": 234,
"ë": 235,
"ì": 236,
"í": 237,
"î": 238,
"ï": 239,
"ð": 240,
"ñ": 241,
"ò": 242,
"ó": 243,
"ô": 244,
"õ": 245,
"ö": 246,
"÷": 247,
"ø": 248,
"ù": 249,
"ú": 250,
"û": 251,
"ü": 252,
"ý": 253,
"þ": 254,
"ÿ": 255,
"<|pad|>": 256,
"<|bos|>": 257,
"<|eos|>": 258,
"<|unk|>": 259,
"<|mask|>": 260,
"<|start_of_message|>": 261,
"<|end_of_message|>": 262,
"<|start_of_thinking|>": 263,
"<|end_of_thinking|>": 264,
"<|reserved_265|>": 265,
"<|reserved_266|>": 266,
"<|reserved_267|>": 267,
"<|reserved_268|>": 268,
"<|reserved_269|>": 269,
"<|reserved_270|>": 270,
"<|reserved_271|>": 271,
"<|reserved_272|>": 272,
"<|reserved_273|>": 273,
"<|reserved_274|>": 274,
"<|reserved_275|>": 275,
"<|reserved_276|>": 276,
"<|reserved_277|>": 277,
"<|reserved_278|>": 278,
"<|reserved_279|>": 279,
"<|reserved_280|>": 280,
"<|reserved_281|>": 281,
"<|reserved_282|>": 282,
"<|reserved_283|>": 283,
"<|reserved_284|>": 284,
"<|reserved_285|>": 285,
"<|reserved_286|>": 286,
"<|reserved_287|>": 287
},
"merges": []
}
}