blt-7b-hf / tokenizer.json
itazap's picture
itazap HF Staff
Upload Blt model converted
8f60e52 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 260,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<s>": {
"id": "<s>",
"ids": [
1
],
"tokens": [
"<s>"
]
}
}
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<boe>": 0,
"<s>": 1,
"</s>": 2,
"<pad>": 3,
"Ā": 4,
"ā": 5,
"Ă": 6,
"ă": 7,
"Ą": 8,
"ą": 9,
"Ć": 10,
"ć": 11,
"Ĉ": 12,
"ĉ": 13,
"Ċ": 14,
"ċ": 15,
"Č": 16,
"č": 17,
"Ď": 18,
"ď": 19,
"Đ": 20,
"đ": 21,
"Ē": 22,
"ē": 23,
"Ĕ": 24,
"ĕ": 25,
"Ė": 26,
"ė": 27,
"Ę": 28,
"ę": 29,
"Ě": 30,
"ě": 31,
"Ĝ": 32,
"ĝ": 33,
"Ğ": 34,
"ğ": 35,
"Ġ": 36,
"!": 37,
"\"": 38,
"#": 39,
"$": 40,
"%": 41,
"&": 42,
"'": 43,
"(": 44,
")": 45,
"*": 46,
"+": 47,
",": 48,
"-": 49,
".": 50,
"/": 51,
"0": 52,
"1": 53,
"2": 54,
"3": 55,
"4": 56,
"5": 57,
"6": 58,
"7": 59,
"8": 60,
"9": 61,
":": 62,
";": 63,
"<": 64,
"=": 65,
">": 66,
"?": 67,
"@": 68,
"A": 69,
"B": 70,
"C": 71,
"D": 72,
"E": 73,
"F": 74,
"G": 75,
"H": 76,
"I": 77,
"J": 78,
"K": 79,
"L": 80,
"M": 81,
"N": 82,
"O": 83,
"P": 84,
"Q": 85,
"R": 86,
"S": 87,
"T": 88,
"U": 89,
"V": 90,
"W": 91,
"X": 92,
"Y": 93,
"Z": 94,
"[": 95,
"\\": 96,
"]": 97,
"^": 98,
"_": 99,
"`": 100,
"a": 101,
"b": 102,
"c": 103,
"d": 104,
"e": 105,
"f": 106,
"g": 107,
"h": 108,
"i": 109,
"j": 110,
"k": 111,
"l": 112,
"m": 113,
"n": 114,
"o": 115,
"p": 116,
"q": 117,
"r": 118,
"s": 119,
"t": 120,
"u": 121,
"v": 122,
"w": 123,
"x": 124,
"y": 125,
"z": 126,
"{": 127,
"|": 128,
"}": 129,
"~": 130,
"ġ": 131,
"Ģ": 132,
"ģ": 133,
"Ĥ": 134,
"ĥ": 135,
"Ħ": 136,
"ħ": 137,
"Ĩ": 138,
"ĩ": 139,
"Ī": 140,
"ī": 141,
"Ĭ": 142,
"ĭ": 143,
"Į": 144,
"į": 145,
"İ": 146,
"ı": 147,
"IJ": 148,
"ij": 149,
"Ĵ": 150,
"ĵ": 151,
"Ķ": 152,
"ķ": 153,
"ĸ": 154,
"Ĺ": 155,
"ĺ": 156,
"Ļ": 157,
"ļ": 158,
"Ľ": 159,
"ľ": 160,
"Ŀ": 161,
"ŀ": 162,
"Ł": 163,
"ł": 164,
"¡": 165,
"¢": 166,
"£": 167,
"¤": 168,
"¥": 169,
"¦": 170,
"§": 171,
"¨": 172,
"©": 173,
"ª": 174,
"«": 175,
"¬": 176,
"Ń": 177,
"®": 178,
"¯": 179,
"°": 180,
"±": 181,
"²": 182,
"³": 183,
"´": 184,
"µ": 185,
"¶": 186,
"·": 187,
"¸": 188,
"¹": 189,
"º": 190,
"»": 191,
"¼": 192,
"½": 193,
"¾": 194,
"¿": 195,
"À": 196,
"Á": 197,
"Â": 198,
"Ã": 199,
"Ä": 200,
"Å": 201,
"Æ": 202,
"Ç": 203,
"È": 204,
"É": 205,
"Ê": 206,
"Ë": 207,
"Ì": 208,
"Í": 209,
"Î": 210,
"Ï": 211,
"Ð": 212,
"Ñ": 213,
"Ò": 214,
"Ó": 215,
"Ô": 216,
"Õ": 217,
"Ö": 218,
"×": 219,
"Ø": 220,
"Ù": 221,
"Ú": 222,
"Û": 223,
"Ü": 224,
"Ý": 225,
"Þ": 226,
"ß": 227,
"à": 228,
"á": 229,
"â": 230,
"ã": 231,
"ä": 232,
"å": 233,
"æ": 234,
"ç": 235,
"è": 236,
"é": 237,
"ê": 238,
"ë": 239,
"ì": 240,
"í": 241,
"î": 242,
"ï": 243,
"ð": 244,
"ñ": 245,
"ò": 246,
"ó": 247,
"ô": 248,
"õ": 249,
"ö": 250,
"÷": 251,
"ø": 252,
"ù": 253,
"ú": 254,
"û": 255,
"ü": 256,
"ý": 257,
"þ": 258,
"ÿ": 259
},
"merges": []
}
}