logic_bench_tokenizer / tokenizer.json
BY7O0un8yig8O's picture
Upload tokenizer
db16875 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<BOSOL>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<EOSTEP>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<EOSOL>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<BOSOL>": 0,
"<EOSTEP>": 1,
"<EOSOL>": 2,
"!": 3,
"'": 4,
"(": 5,
")": 6,
"+": 7,
",": 8,
"-": 9,
".": 10,
"/": 11,
"0": 12,
"1": 13,
"2": 14,
"3": 15,
"4": 16,
"5": 17,
"6": 18,
"7": 19,
"8": 20,
"9": 21,
":": 22,
"<": 23,
"=": 24,
">": 25,
"?": 26,
"@": 27,
"B": 28,
"E": 29,
"F": 30,
"L": 31,
"N": 32,
"O": 33,
"P": 34,
"S": 35,
"T": 36,
"U": 37,
"a": 38,
"b": 39,
"c": 40,
"d": 41,
"e": 42,
"f": 43,
"g": 44,
"h": 45,
"i": 46,
"k": 47,
"l": 48,
"m": 49,
"n": 50,
"o": 51,
"p": 52,
"q": 53,
"r": 54,
"s": 55,
"t": 56,
"u": 57,
"v": 58,
"w": 59,
"x": 60,
"y": 61,
"z": 62,
"ti": 63,
"on": 64,
"al": 65,
"le": 66,
"en": 67,
"or": 68,
"mp": 69,
"si": 70,
"ar": 71,
"an": 72,
"<<": 73,
">>": 74,
"ati": 75,
"ation": 76,
"st": 77,
"co": 78,
"mple": 79,
"in": 80,
"th": 81,
"ge": 82,
"as": 83,
"tu": 84,
"v1": 85,
"gn": 86,
"sign": 87,
"assign": 88,
"de": 89,
"enc": 90,
"pr": 91,
"gu": 92,
"age": 93,
"ru": 94,
"ac": 95,
"tim": 96,
"ce": 97,
"ori": 98,
"pe": 99,
".<": 100,
"OS": 101,
"we": 102,
"EOS": 103,
"<<<": 104,
">>>": 105,
"the": 106,
"@<<<": 107,
"EP": 108,
"TEP": 109,
"EOSTEP": 110,
">>>@": 111,
"re": 112,
"se": 113,
"is": 114,
"qu": 115,
"ing": 116,
"ent": 117,
"ex": 118,
"ip": 119,
"equ": 120,
"be": 121,
"at": 122,
"v2": 123,
"lan": 124,
"guage": 125,
"language": 126,
"am": 127,
"gr": 128,
"ming": 129,
"ogr": 130,
"progr": 131,
"amming": 132,
"programming": 133,
"ment": 134,
"us": 135,
"for": 136,
"to": 137,
"equation": 138,
"ed": 139,
"tice": 140,
"prac": 141,
"practice": 142,
"eed": 143,
"need": 144,
"needle": 145,
"ci": 146,
"fi": 147,
"iz": 148,
"op": 149,
"timiz": 150,
"optimiz": 151,
"optimization": 152,
"ntim": 153,
"runtim": 154,
"runtime": 155,
"mpar": 156,
"compar": 157,
"ison": 158,
"comparison": 159,
"ay": 160,
"hay": 161,
"stac": 162,
"haystac": 163,
"haystack": 164,
"ide": 165,
"guide": 166,
"benc": 167,
"hm": 168,
"ark": 169,
"benchm": 170,
"benchmark": 171,
"imple": 172,
"mentation": 173,
"implementation": 174,
"em": 175,
"mem": 176,
"ory": 177,
"memory": 178,
"usage": 179,
"tori": 180,
"tutori": 181,
"tutorial": 182,
"ef": 183,
"ency": 184,
"ciency": 185,
"ficiency": 186,
"efficiency": 187,
"it": 188,
"xit": 189,
"comple": 190,
"xity": 191,
"complexity": 192,
"ample": 193,
"example": 194,
"gori": 195,
"algori": 196,
"thm": 197,
"algorithm": 198,
"code": 199,
"ysi": 200,
"alysi": 201,
"analysi": 202,
"analysis": 203,
"ch": 204,
"arch": 205,
"search": 206,
"dat": 207,
"data": 208,
"man": 209,
"rfor": 210,
"perfor": 211,
"mance": 212,
"performance": 213,
"ctu": 214,
"stru": 215,
"cture": 216,
"structure": 217,
"nip": 218,
"snip": 219,
"pet": 220,
"snippet": 221,
"best": 222,
"ng": 223,
"no": 224,
"Fr": 225,
"om": 226,
"From": 227,
"es": 228,
"get": 229,
"Su": 230,
"bs": 231,
"kno": 232,
"ues": 233,
"val": 234,
"wn": 235,
"titu": 236,
"ting": 237,
"into": 238,
"Subs": 239,
"known": 240,
"values": 241,
"tituting": 242,
"Substituting": 243,
">>>@@<<<": 244,
"Th": 245,
"are": 246,
"v0": 247,
"lation": 248,
"lations": 249,
"The": 250,
"v3": 251,
"v4": 252,
"v5": 253,
"10": 254,
"v6": 255,
"v7": 256,
"hip": 257,
"tex": 258,
"and": 259,
"relations": 260,
"hips": 261,
"text": 262,
"relationships": 263,
"ab": 264,
"iab": 265,
"var": 266,
"iable": 267,
"variable": 268,
"v8": 269,
"v9": 270,
"v10": 271,
".@<<<": 272,
"?@<<<": 273,
"!@<<<": 274,
"v11": 275,
"v12": 276,
"v13": 277,
"of": 278,
"v14": 279,
"OL": 280,
"by": 281,
"con": 282,
"cal": 283,
"ep": 284,
"ly": 285,
"ou": 286,
"you": 287,
"end": 288,
"step": 289,
"This": 290,
"your": 291,
"v15": 292,
"equal": 293,
"v16": 294,
"v17": 295,
"v18": 296,
"v19": 297,
"v20": 298,
"variables": 299,
"v21": 300,
"v22": 301,
"'.": 302,
"'<<<": 303,
"'>>>": 304,
"),": 305,
"BOS": 306,
"Sh": 307,
"Usi": 308,
"ain": 309,
"athe": 310,
"cl": 311,
"cu": 312,
"eo": 313,
"er": 314,
"fin": 315,
"gin": 316,
"hat": 317,
"if": 318,
"lo": 319,
"lt": 320,
"mu": 321,
"mati": 322,
"min": 323,
"mathe": 324,
"ning": 325,
"ow": 326,
"pend": 327,
"swe": 328,
"sent": 329,
"tru": 330,
"twe": 331,
"tain": 332,
"ter": 333,
"ude": 334,
"what": 335,
"tial": 336,
"oning": 337,
"only": 338,
"all": 339,
"ential": 340,
"simu": 341,
"any": 342,
"aneo": 343,
"answe": 344,
"inde": 345,
"then": 346,
"that": 347,
"asoning": 348,
"assignment": 349,
"deter": 350,
"ence": 351,
"enclo": 352,
"EOSOL": 353,
"these": 354,
"reasoning": 355,
"sed": 356,
"sequ": 357,
"begin": 358,
"betwe": 359,
"usly": 360,
"equations": 361,
"not": 362,
"They": 363,
"These": 364,
"concl": 365,
"contain": 366,
"calcu": 367,
"'<<<'": 368,
"'>>>'.": 369,
"BOSOL": 370,
"Show": 371,
"Using": 372,
"final": 373,
"ltaneo": 374,
"matical": 375,
"mine": 376,
"mathematical": 377,
"pendent": 378,
"sentence": 379,
"true": 380,
"simultaneo": 381,
"answer": 382,
"independent": 383,
"assignments": 384,
"determine": 385,
"enclosed": 386,
"sequential": 387,
"beginning": 388,
"between": 389,
"conclude": 390,
"contains": 391,
"calculations": 392,
"simultaneously": 393,
"v23": 394,
"refor": 395,
"Therefor": 396,
"Therefore": 397,
"v24": 398,
"v25": 399,
"v26": 400,
"11": 401,
"v27": 402,
"v28": 403,
"Non": 404,
"None": 405,
"12": 406,
"No": 407,
"mbe": 408,
"nu": 409,
"spe": 410,
"tar": 411,
"tion": 412,
"ques": 413,
"cifi": 414,
"mber": 415,
"number": 416,
"specifi": 417,
"target": 418,
"question": 419,
"specified": 420,
",@<<<": 421,
"13": 422,
"14": 423,
"15": 424
},
"merges": [
"t i",
"o n",
"a l",
"l e",
"e n",
"o r",
"m p",
"s i",
"a r",
"a n",
"< <",
"> >",
"a ti",
"ati on",
"s t",
"c o",
"mp le",
"i n",
"t h",
"g e",
"a s",
"t u",
"v 1",
"g n",
"si gn",
"as sign",
"d e",
"en c",
"p r",
"g u",
"a ge",
"r u",
"a c",
"ti m",
"c e",
"or i",
"p e",
". <",
"O S",
"w e",
"E OS",
"<< <",
">> >",
"th e",
"@ <<<",
"E P",
"T EP",
"EOS TEP",
">>> @",
"r e",
"s e",
"i s",
"q u",
"in g",
"en t",
"e x",
"i p",
"e qu",
"b e",
"a t",
"v 2",
"l an",
"gu age",
"lan guage",
"a m",
"g r",
"m ing",
"o gr",
"pr ogr",
"am ming",
"progr amming",
"m ent",
"u s",
"f or",
"t o",
"equ ation",
"e d",
"ti ce",
"pr ac",
"prac tice",
"e ed",
"n eed",
"need le",
"c i",
"f i",
"i z",
"o p",
"tim iz",
"op timiz",
"optimiz ation",
"n tim",
"ru ntim",
"runtim e",
"mp ar",
"co mpar",
"is on",
"compar ison",
"a y",
"h ay",
"st ac",
"hay stac",
"haystac k",
"i de",
"gu ide",
"b enc",
"h m",
"ar k",
"benc hm",
"benchm ark",
"i mple",
"ment ation",
"imple mentation",
"e m",
"m em",
"or y",
"mem ory",
"us age",
"t ori",
"tu tori",
"tutori al",
"e f",
"enc y",
"ci ency",
"fi ciency",
"ef ficiency",
"i t",
"x it",
"co mple",
"xit y",
"comple xity",
"a mple",
"ex ample",
"g ori",
"al gori",
"th m",
"algori thm",
"co de",
"y si",
"al ysi",
"an alysi",
"analysi s",
"c h",
"ar ch",
"se arch",
"d at",
"dat a",
"m an",
"r for",
"pe rfor",
"man ce",
"perfor mance",
"c tu",
"st ru",
"ctu re",
"stru cture",
"n ip",
"s nip",
"pe t",
"snip pet",
"be st",
"n g",
"n o",
"F r",
"o m",
"Fr om",
"e s",
"ge t",
"S u",
"b s",
"k no",
"u es",
"v al",
"w n",
"ti tu",
"ti ng",
"in to",
"Su bs",
"kno wn",
"val ues",
"titu ting",
"Subs tituting",
">>>@ @<<<",
"T h",
"ar e",
"v 0",
"l ation",
"lation s",
"Th e",
"v 3",
"v 4",
"v 5",
"1 0",
"v 6",
"v 7",
"h ip",
"t ex",
"an d",
"re lations",
"hip s",
"tex t",
"relations hips",
"a b",
"i ab",
"v ar",
"iab le",
"var iable",
"v 8",
"v 9",
"v1 0",
". @<<<",
"? @<<<",
"! @<<<",
"v1 1",
"v1 2",
"v1 3",
"o f",
"v1 4",
"O L",
"b y",
"c on",
"c al",
"e p",
"l y",
"o u",
"y ou",
"en d",
"st ep",
"Th is",
"you r",
"v1 5",
"equ al",
"v1 6",
"v1 7",
"v1 8",
"v1 9",
"v2 0",
"variable s",
"v2 1",
"v2 2",
"' .",
"' <<<",
"' >>>",
") ,",
"B OS",
"S h",
"U si",
"a in",
"a the",
"c l",
"c u",
"e o",
"e r",
"f in",
"g in",
"h at",
"i f",
"l o",
"l t",
"m u",
"m ati",
"m in",
"m athe",
"n ing",
"o w",
"p end",
"s we",
"s ent",
"t ru",
"t we",
"t ain",
"t er",
"u de",
"w hat",
"ti al",
"on ing",
"on ly",
"al l",
"en tial",
"si mu",
"an y",
"an eo",
"an swe",
"in de",
"th en",
"th at",
"as oning",
"assign ment",
"de ter",
"enc e",
"enc lo",
"EOS OL",
"the se",
"re asoning",
"se d",
"se qu",
"be gin",
"be twe",
"us ly",
"equation s",
"no t",
"The y",
"The se",
"con cl",
"con tain",
"cal cu",
"'<<< '",
"'>>> '.",
"BOS OL",
"Sh ow",
"Usi ng",
"fin al",
"lt aneo",
"mati cal",
"min e",
"mathe matical",
"pend ent",
"sent ence",
"tru e",
"simu ltaneo",
"answe r",
"inde pendent",
"assignment s",
"deter mine",
"enclo sed",
"sequ ential",
"begin ning",
"betwe en",
"concl ude",
"contain s",
"calcu lations",
"simultaneo usly",
"v2 3",
"re for",
"The refor",
"Therefor e",
"v2 4",
"v2 5",
"v2 6",
"1 1",
"v2 7",
"v2 8",
"N on",
"Non e",
"1 2",
"N o",
"m be",
"n u",
"s pe",
"t ar",
"ti on",
"qu es",
"ci fi",
"mbe r",
"nu mber",
"spe cifi",
"tar get",
"ques tion",
"specifi ed",
", @<<<",
"1 3",
"1 4",
"1 5"
]
}
}