moe100m-physics-tinybpe / tokenizer.json
AlexWortega's picture
Upload tokenizer.json with huggingface_hub
58e7b04 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<bos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<bos>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<eos>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<bos>": {
"id": "<bos>",
"ids": [
1
],
"tokens": [
"<bos>"
]
},
"<eos>": {
"id": "<eos>",
"ids": [
2
],
"tokens": [
"<eos>"
]
}
}
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<pad>": 0,
"<bos>": 1,
"<eos>": 2,
"!": 3,
"\"": 4,
"#": 5,
"$": 6,
"%": 7,
"&": 8,
"'": 9,
"(": 10,
")": 11,
"*": 12,
"+": 13,
",": 14,
"-": 15,
".": 16,
"/": 17,
"0": 18,
"1": 19,
"2": 20,
"3": 21,
"4": 22,
"5": 23,
"6": 24,
"7": 25,
"8": 26,
"9": 27,
":": 28,
";": 29,
"<": 30,
"=": 31,
">": 32,
"?": 33,
"@": 34,
"A": 35,
"B": 36,
"C": 37,
"D": 38,
"E": 39,
"F": 40,
"G": 41,
"H": 42,
"I": 43,
"J": 44,
"K": 45,
"L": 46,
"M": 47,
"N": 48,
"O": 49,
"P": 50,
"Q": 51,
"R": 52,
"S": 53,
"T": 54,
"U": 55,
"V": 56,
"W": 57,
"X": 58,
"Y": 59,
"Z": 60,
"[": 61,
"\\": 62,
"]": 63,
"^": 64,
"_": 65,
"`": 66,
"a": 67,
"b": 68,
"c": 69,
"d": 70,
"e": 71,
"f": 72,
"g": 73,
"h": 74,
"i": 75,
"j": 76,
"k": 77,
"l": 78,
"m": 79,
"n": 80,
"o": 81,
"p": 82,
"q": 83,
"r": 84,
"s": 85,
"t": 86,
"u": 87,
"v": 88,
"w": 89,
"x": 90,
"y": 91,
"z": 92,
"{": 93,
"|": 94,
"}": 95,
"~": 96,
"¡": 97,
"¢": 98,
"£": 99,
"¤": 100,
"¥": 101,
"¦": 102,
"§": 103,
"¨": 104,
"©": 105,
"ª": 106,
"«": 107,
"¬": 108,
"®": 109,
"¯": 110,
"°": 111,
"±": 112,
"²": 113,
"³": 114,
"´": 115,
"µ": 116,
"¶": 117,
"·": 118,
"¸": 119,
"¹": 120,
"º": 121,
"»": 122,
"¼": 123,
"½": 124,
"¾": 125,
"¿": 126,
"À": 127,
"Á": 128,
"Â": 129,
"Ã": 130,
"Ä": 131,
"Å": 132,
"Æ": 133,
"Ç": 134,
"È": 135,
"É": 136,
"Ê": 137,
"Ë": 138,
"Ì": 139,
"Í": 140,
"Î": 141,
"Ï": 142,
"Ð": 143,
"Ñ": 144,
"Ò": 145,
"Ó": 146,
"Ô": 147,
"Õ": 148,
"Ö": 149,
"×": 150,
"Ø": 151,
"Ù": 152,
"Ú": 153,
"Û": 154,
"Ü": 155,
"Ý": 156,
"Þ": 157,
"ß": 158,
"à": 159,
"á": 160,
"â": 161,
"ã": 162,
"ä": 163,
"å": 164,
"æ": 165,
"ç": 166,
"è": 167,
"é": 168,
"ê": 169,
"ë": 170,
"ì": 171,
"í": 172,
"î": 173,
"ï": 174,
"ð": 175,
"ñ": 176,
"ò": 177,
"ó": 178,
"ô": 179,
"õ": 180,
"ö": 181,
"÷": 182,
"ø": 183,
"ù": 184,
"ú": 185,
"û": 186,
"ü": 187,
"ý": 188,
"þ": 189,
"ÿ": 190,
"Ā": 191,
"ā": 192,
"Ă": 193,
"ă": 194,
"Ą": 195,
"ą": 196,
"Ć": 197,
"ć": 198,
"Ĉ": 199,
"ĉ": 200,
"Ċ": 201,
"ċ": 202,
"Č": 203,
"č": 204,
"Ď": 205,
"ď": 206,
"Đ": 207,
"đ": 208,
"Ē": 209,
"ē": 210,
"Ĕ": 211,
"ĕ": 212,
"Ė": 213,
"ė": 214,
"Ę": 215,
"ę": 216,
"Ě": 217,
"ě": 218,
"Ĝ": 219,
"ĝ": 220,
"Ğ": 221,
"ğ": 222,
"Ġ": 223,
"ġ": 224,
"Ģ": 225,
"ģ": 226,
"Ĥ": 227,
"ĥ": 228,
"Ħ": 229,
"ħ": 230,
"Ĩ": 231,
"ĩ": 232,
"Ī": 233,
"ī": 234,
"Ĭ": 235,
"ĭ": 236,
"Į": 237,
"į": 238,
"İ": 239,
"ı": 240,
"IJ": 241,
"ij": 242,
"Ĵ": 243,
"ĵ": 244,
"Ķ": 245,
"ķ": 246,
"ĸ": 247,
"Ĺ": 248,
"ĺ": 249,
"Ļ": 250,
"ļ": 251,
"Ľ": 252,
"ľ": 253,
"Ŀ": 254,
"ŀ": 255,
"Ł": 256,
"ł": 257,
"Ń": 258,
"00": 259,
"=(": 260,
"),": 261,
"Ġa": 262,
"Ġp": 263,
"os": 264,
"el": 265,
"Ġo": 266,
"bj": 267,
"vel": 268,
"ĊĠ": 269,
"Ġvel": 270,
"Ġpos": 271,
"Ġobj": 272,
"=-": 273,
"Ġav": 274,
"Ġ-": 275,
"0000": 276,
"=(-": 277,
"Ġ1": 278,
"01": 279,
"Ġ0": 280,
"12": 281,
"14": 282,
"39": 283,
"02": 284,
"13": 285,
"15": 286,
"10": 287,
"18": 288,
"16": 289,
"17": 290,
"Ġ6": 291,
"19": 292,
"04": 293,
"38": 294,
"28": 295,
"25": 296,
"37": 297,
"35": 298,
"26": 299,
"27": 300,
"24": 301,
"34": 302,
"29": 303,
"36": 304,
"05": 305,
"06": 306,
"07": 307,
"89": 308,
"09": 309,
"59": 310,
"11": 311,
"33": 312,
"57": 313,
"08": 314,
"49": 315,
"47": 316,
"66": 317,
"22": 318,
"58": 319,
"48": 320,
"46": 321,
"56": 322,
"03": 323,
"23": 324,
"98": 325,
"55": 326,
"77": 327,
"45": 328,
"44": 329,
"67": 330,
"88": 331,
"21": 332,
"20": 333,
"69": 334,
"Ġ7": 335,
"99": 336,
"68": 337,
"ra": 338,
"me": 339,
"Fra": 340,
"Frame": 341,
"54": 342,
"32": 343,
"87": 344,
"in": 345,
"97": 346,
"65": 347,
"64": 348,
"31": 349,
"000": 350,
"30": 351,
"75": 352,
"85": 353,
"86": 354,
"79": 355,
"84": 356,
"42": 357,
"43": 358,
"78": 359,
"95": 360,
"Ġ3": 361,
"96": 362,
"76": 363,
"94": 364,
"63": 365,
"74": 366,
"81": 367,
"53": 368,
"52": 369,
"41": 370,
"62": 371,
"82": 372,
"50": 373,
"83": 374,
"0001": 375,
"40": 376,
"Ġ2": 377,
"93": 378,
"92": 379,
"73": 380,
"Ġ66": 381,
"72": 382,
"61": 383,
"Ġ4": 384,
"51": 385,
"90": 386,
"80": 387,
"60": 388,
"Ġs": 389,
"Ġse": 390,
"lin": 391,
"tt": 392,
"Ġsett": 393,
"ling": 394,
"Ġsettling": 395,
"91": 396,
"ti": 397,
"Ġ64": 398,
"on": 399,
"Ġm": 400,
"oti": 401,
"Ġin": 402,
"Ġmoti": 403,
"Ġmotion": 404,
"71": 405,
"70": 406,
"Ġ59": 407,
"400": 408,
"Ġ89": 409,
"Ġ61": 410,
"9804": 411,
"Ġ57": 412,
"0002": 413,
"399": 414,
"Ġ63": 415,
"Ġ5": 416,
"Ġ58": 417,
"0003": 418,
"398": 419,
"Ġ114": 420,
"Ġ28": 421,
"Ġ29": 422,
"Ġ62": 423,
"Ġ69": 424,
"Ġ27": 425,
"9000": 426,
"Ġ60": 427,
"385": 428,
"0004": 429,
"Ġ65": 430,
"Ġ26": 431,
"Ġ73": 432,
"412": 433,
"380": 434,
"401": 435,
"Ġ67": 436,
"Ġ139": 437,
"0005": 438,
"154": 439,
"Ġ74": 440,
"218": 441,
"425": 442,
"Ġ68": 443,
"Ġ88": 444,
"372": 445,
"Ġ70": 446,
"Ġ25": 447,
"Ġ49": 448,
"Ġ71": 449,
"4000": 450,
"0006": 451,
"358": 452,
"3333": 453,
"6667": 454,
"382": 455,
"549": 456,
"Ġ24": 457,
"204": 458,
"548": 459,
"102": 460,
"0007": 461,
"0011": 462,
"397": 463,
"0012": 464,
"395": 465,
"282": 466,
"346": 467,
"114": 468,
"Ġ81": 469,
"550": 470,
"0010": 471,
"416": 472,
"383": 473,
"180": 474,
"Ġ72": 475,
"371": 476,
"410": 477,
"402": 478,
"375": 479,
"225": 480,
"0008": 481,
"419": 482,
"Ġ164": 483,
"394": 484,
"Ġ47": 485,
"393": 486,
"219": 487,
"0013": 488,
"Ġ48": 489,
"101": 490,
"392": 491,
"365": 492,
"379": 493,
"174": 494,
"426": 495,
"396": 496,
"345": 497,
"409": 498,
"201": 499,
"381": 500,
"376": 501,
"Ġ46": 502,
"035": 503,
"0014": 504,
"0009": 505,
"Ġ35": 506,
"Ġr": 507,
"424": 508,
"Ġ36": 509,
"212": 510,
"137": 511
},
"merges": [
[
"0",
"0"
],
[
"=",
"("
],
[
")",
","
],
[
"Ġ",
"a"
],
[
"Ġ",
"p"
],
[
"o",
"s"
],
[
"e",
"l"
],
[
"Ġ",
"o"
],
[
"b",
"j"
],
[
"v",
"el"
],
[
"Ċ",
"Ġ"
],
[
"Ġ",
"vel"
],
[
"Ġp",
"os"
],
[
"Ġo",
"bj"
],
[
"=",
"-"
],
[
"Ġa",
"v"
],
[
"Ġ",
"-"
],
[
"00",
"00"
],
[
"=(",
"-"
],
[
"Ġ",
"1"
],
[
"0",
"1"
],
[
"Ġ",
"0"
],
[
"1",
"2"
],
[
"1",
"4"
],
[
"3",
"9"
],
[
"0",
"2"
],
[
"1",
"3"
],
[
"1",
"5"
],
[
"1",
"0"
],
[
"1",
"8"
],
[
"1",
"6"
],
[
"1",
"7"
],
[
"Ġ",
"6"
],
[
"1",
"9"
],
[
"0",
"4"
],
[
"3",
"8"
],
[
"2",
"8"
],
[
"2",
"5"
],
[
"3",
"7"
],
[
"3",
"5"
],
[
"2",
"6"
],
[
"2",
"7"
],
[
"2",
"4"
],
[
"3",
"4"
],
[
"2",
"9"
],
[
"3",
"6"
],
[
"0",
"5"
],
[
"0",
"6"
],
[
"0",
"7"
],
[
"8",
"9"
],
[
"0",
"9"
],
[
"5",
"9"
],
[
"1",
"1"
],
[
"3",
"3"
],
[
"5",
"7"
],
[
"0",
"8"
],
[
"4",
"9"
],
[
"4",
"7"
],
[
"6",
"6"
],
[
"2",
"2"
],
[
"5",
"8"
],
[
"4",
"8"
],
[
"4",
"6"
],
[
"5",
"6"
],
[
"0",
"3"
],
[
"2",
"3"
],
[
"9",
"8"
],
[
"5",
"5"
],
[
"7",
"7"
],
[
"4",
"5"
],
[
"4",
"4"
],
[
"6",
"7"
],
[
"8",
"8"
],
[
"2",
"1"
],
[
"2",
"0"
],
[
"6",
"9"
],
[
"Ġ",
"7"
],
[
"9",
"9"
],
[
"6",
"8"
],
[
"r",
"a"
],
[
"m",
"e"
],
[
"F",
"ra"
],
[
"Fra",
"me"
],
[
"5",
"4"
],
[
"3",
"2"
],
[
"8",
"7"
],
[
"i",
"n"
],
[
"9",
"7"
],
[
"6",
"5"
],
[
"6",
"4"
],
[
"3",
"1"
],
[
"00",
"0"
],
[
"3",
"0"
],
[
"7",
"5"
],
[
"8",
"5"
],
[
"8",
"6"
],
[
"7",
"9"
],
[
"8",
"4"
],
[
"4",
"2"
],
[
"4",
"3"
],
[
"7",
"8"
],
[
"9",
"5"
],
[
"Ġ",
"3"
],
[
"9",
"6"
],
[
"7",
"6"
],
[
"9",
"4"
],
[
"6",
"3"
],
[
"7",
"4"
],
[
"8",
"1"
],
[
"5",
"3"
],
[
"5",
"2"
],
[
"4",
"1"
],
[
"6",
"2"
],
[
"8",
"2"
],
[
"5",
"0"
],
[
"8",
"3"
],
[
"00",
"01"
],
[
"4",
"0"
],
[
"Ġ",
"2"
],
[
"9",
"3"
],
[
"9",
"2"
],
[
"7",
"3"
],
[
"Ġ6",
"6"
],
[
"7",
"2"
],
[
"6",
"1"
],
[
"Ġ",
"4"
],
[
"5",
"1"
],
[
"9",
"0"
],
[
"8",
"0"
],
[
"6",
"0"
],
[
"Ġ",
"s"
],
[
"Ġs",
"e"
],
[
"l",
"in"
],
[
"t",
"t"
],
[
"Ġse",
"tt"
],
[
"lin",
"g"
],
[
"Ġsett",
"ling"
],
[
"9",
"1"
],
[
"t",
"i"
],
[
"Ġ6",
"4"
],
[
"o",
"n"
],
[
"Ġ",
"m"
],
[
"o",
"ti"
],
[
"Ġ",
"in"
],
[
"Ġm",
"oti"
],
[
"Ġmoti",
"on"
],
[
"7",
"1"
],
[
"7",
"0"
],
[
"Ġ",
"59"
],
[
"4",
"00"
],
[
"Ġ",
"89"
],
[
"Ġ6",
"1"
],
[
"98",
"04"
],
[
"Ġ",
"57"
],
[
"00",
"02"
],
[
"39",
"9"
],
[
"Ġ6",
"3"
],
[
"Ġ",
"5"
],
[
"Ġ",
"58"
],
[
"00",
"03"
],
[
"39",
"8"
],
[
"Ġ1",
"14"
],
[
"Ġ",
"28"
],
[
"Ġ",
"29"
],
[
"Ġ6",
"2"
],
[
"Ġ6",
"9"
],
[
"Ġ",
"27"
],
[
"9",
"000"
],
[
"Ġ6",
"0"
],
[
"38",
"5"
],
[
"00",
"04"
],
[
"Ġ6",
"5"
],
[
"Ġ",
"26"
],
[
"Ġ7",
"3"
],
[
"4",
"12"
],
[
"38",
"0"
],
[
"4",
"01"
],
[
"Ġ6",
"7"
],
[
"Ġ1",
"39"
],
[
"00",
"05"
],
[
"15",
"4"
],
[
"Ġ7",
"4"
],
[
"2",
"18"
],
[
"4",
"25"
],
[
"Ġ6",
"8"
],
[
"Ġ",
"88"
],
[
"37",
"2"
],
[
"Ġ7",
"0"
],
[
"Ġ",
"25"
],
[
"Ġ",
"49"
],
[
"Ġ7",
"1"
],
[
"4",
"000"
],
[
"00",
"06"
],
[
"35",
"8"
],
[
"33",
"33"
],
[
"66",
"67"
],
[
"38",
"2"
],
[
"5",
"49"
],
[
"Ġ",
"24"
],
[
"2",
"04"
],
[
"5",
"48"
],
[
"1",
"02"
],
[
"00",
"07"
],
[
"00",
"11"
],
[
"39",
"7"
],
[
"00",
"12"
],
[
"39",
"5"
],
[
"28",
"2"
],
[
"34",
"6"
],
[
"1",
"14"
],
[
"Ġ",
"81"
],
[
"55",
"0"
],
[
"00",
"10"
],
[
"4",
"16"
],
[
"38",
"3"
],
[
"18",
"0"
],
[
"Ġ7",
"2"
],
[
"37",
"1"
],
[
"4",
"10"
],
[
"4",
"02"
],
[
"37",
"5"
],
[
"2",
"25"
],
[
"00",
"08"
],
[
"4",
"19"
],
[
"Ġ1",
"64"
],
[
"39",
"4"
],
[
"Ġ",
"47"
],
[
"39",
"3"
],
[
"2",
"19"
],
[
"00",
"13"
],
[
"Ġ",
"48"
],
[
"1",
"01"
],
[
"39",
"2"
],
[
"36",
"5"
],
[
"37",
"9"
],
[
"17",
"4"
],
[
"4",
"26"
],
[
"39",
"6"
],
[
"34",
"5"
],
[
"4",
"09"
],
[
"2",
"01"
],
[
"38",
"1"
],
[
"37",
"6"
],
[
"Ġ",
"46"
],
[
"0",
"35"
],
[
"00",
"14"
],
[
"00",
"09"
],
[
"Ġ",
"35"
],
[
"Ġ",
"r"
],
[
"4",
"24"
],
[
"Ġ",
"36"
],
[
"2",
"12"
],
[
"13",
"7"
]
]
}
}