Light-100M-untrainedv0.01 / tokenizer.json
Levelfive's picture
Upload LIGHTBRAIN model
ddd7c97 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<bos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": []
},
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<bos>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<bos>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<eos>",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<bos>": {
"id": "<bos>",
"ids": [
2
],
"tokens": [
"<bos>"
]
},
"<eos>": {
"id": "<eos>",
"ids": [
3
],
"tokens": [
"<eos>"
]
}
}
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": true,
"vocab": {
"<0x00>": 0,
"<0x01>": 1,
"<0x02>": 2,
"<0x03>": 3,
"<0x04>": 4,
"<0x05>": 5,
"<0x06>": 6,
"<0x07>": 7,
"<0x08>": 8,
"<0x09>": 9,
"<0x0A>": 10,
"<0x0B>": 11,
"<0x0C>": 12,
"<0x0D>": 13,
"<0x0E>": 14,
"<0x0F>": 15,
"<0x10>": 16,
"<0x11>": 17,
"<0x12>": 18,
"<0x13>": 19,
"<0x14>": 20,
"<0x15>": 21,
"<0x16>": 22,
"<0x17>": 23,
"<0x18>": 24,
"<0x19>": 25,
"<0x1A>": 26,
"<0x1B>": 27,
"<0x1C>": 28,
"<0x1D>": 29,
"<0x1E>": 30,
"<0x1F>": 31,
" ": 32,
"!": 33,
"\"": 34,
"#": 35,
"$": 36,
"%": 37,
"&": 38,
"'": 39,
"(": 40,
")": 41,
"*": 42,
"+": 43,
",": 44,
"-": 45,
".": 46,
"/": 47,
"0": 48,
"1": 49,
"2": 50,
"3": 51,
"4": 52,
"5": 53,
"6": 54,
"7": 55,
"8": 56,
"9": 57,
":": 58,
";": 59,
"<": 60,
"=": 61,
">": 62,
"?": 63,
"@": 64,
"A": 65,
"B": 66,
"C": 67,
"D": 68,
"E": 69,
"F": 70,
"G": 71,
"H": 72,
"I": 73,
"J": 74,
"K": 75,
"L": 76,
"M": 77,
"N": 78,
"O": 79,
"P": 80,
"Q": 81,
"R": 82,
"S": 83,
"T": 84,
"U": 85,
"V": 86,
"W": 87,
"X": 88,
"Y": 89,
"Z": 90,
"[": 91,
"\\": 92,
"]": 93,
"^": 94,
"_": 95,
"`": 96,
"a": 97,
"b": 98,
"c": 99,
"d": 100,
"e": 101,
"f": 102,
"g": 103,
"h": 104,
"i": 105,
"j": 106,
"k": 107,
"l": 108,
"m": 109,
"n": 110,
"o": 111,
"p": 112,
"q": 113,
"r": 114,
"s": 115,
"t": 116,
"u": 117,
"v": 118,
"w": 119,
"x": 120,
"y": 121,
"z": 122,
"{": 123,
"|": 124,
"}": 125,
"~": 126,
"<0x7F>": 127,
"<0x80>": 128,
"<0x81>": 129,
"<0x82>": 130,
"<0x83>": 131,
"<0x84>": 132,
"<0x85>": 133,
"<0x86>": 134,
"<0x87>": 135,
"<0x88>": 136,
"<0x89>": 137,
"<0x8A>": 138,
"<0x8B>": 139,
"<0x8C>": 140,
"<0x8D>": 141,
"<0x8E>": 142,
"<0x8F>": 143,
"<0x90>": 144,
"<0x91>": 145,
"<0x92>": 146,
"<0x93>": 147,
"<0x94>": 148,
"<0x95>": 149,
"<0x96>": 150,
"<0x97>": 151,
"<0x98>": 152,
"<0x99>": 153,
"<0x9A>": 154,
"<0x9B>": 155,
"<0x9C>": 156,
"<0x9D>": 157,
"<0x9E>": 158,
"<0x9F>": 159,
"<0xA0>": 160,
"<0xA1>": 161,
"<0xA2>": 162,
"<0xA3>": 163,
"<0xA4>": 164,
"<0xA5>": 165,
"<0xA6>": 166,
"<0xA7>": 167,
"<0xA8>": 168,
"<0xA9>": 169,
"<0xAA>": 170,
"<0xAB>": 171,
"<0xAC>": 172,
"<0xAD>": 173,
"<0xAE>": 174,
"<0xAF>": 175,
"<0xB0>": 176,
"<0xB1>": 177,
"<0xB2>": 178,
"<0xB3>": 179,
"<0xB4>": 180,
"<0xB5>": 181,
"<0xB6>": 182,
"<0xB7>": 183,
"<0xB8>": 184,
"<0xB9>": 185,
"<0xBA>": 186,
"<0xBB>": 187,
"<0xBC>": 188,
"<0xBD>": 189,
"<0xBE>": 190,
"<0xBF>": 191,
"<0xC0>": 192,
"<0xC1>": 193,
"<0xC2>": 194,
"<0xC3>": 195,
"<0xC4>": 196,
"<0xC5>": 197,
"<0xC6>": 198,
"<0xC7>": 199,
"<0xC8>": 200,
"<0xC9>": 201,
"<0xCA>": 202,
"<0xCB>": 203,
"<0xCC>": 204,
"<0xCD>": 205,
"<0xCE>": 206,
"<0xCF>": 207,
"<0xD0>": 208,
"<0xD1>": 209,
"<0xD2>": 210,
"<0xD3>": 211,
"<0xD4>": 212,
"<0xD5>": 213,
"<0xD6>": 214,
"<0xD7>": 215,
"<0xD8>": 216,
"<0xD9>": 217,
"<0xDA>": 218,
"<0xDB>": 219,
"<0xDC>": 220,
"<0xDD>": 221,
"<0xDE>": 222,
"<0xDF>": 223,
"<0xE0>": 224,
"<0xE1>": 225,
"<0xE2>": 226,
"<0xE3>": 227,
"<0xE4>": 228,
"<0xE5>": 229,
"<0xE6>": 230,
"<0xE7>": 231,
"<0xE8>": 232,
"<0xE9>": 233,
"<0xEA>": 234,
"<0xEB>": 235,
"<0xEC>": 236,
"<0xED>": 237,
"<0xEE>": 238,
"<0xEF>": 239,
"<0xF0>": 240,
"<0xF1>": 241,
"<0xF2>": 242,
"<0xF3>": 243,
"<0xF4>": 244,
"<0xF5>": 245,
"<0xF6>": 246,
"<0xF7>": 247,
"<0xF8>": 248,
"<0xF9>": 249,
"<0xFA>": 250,
"<0xFB>": 251,
"<0xFC>": 252,
"<0xFD>": 253,
"<0xFE>": 254,
"<0xFF>": 255,
"<pad>": 0,
"<unk>": 1,
"<bos>": 2,
"<eos>": 3,
"<sep>": 4
},
"merges": []
}
}