model2 / tokenizer.json
pradeep4321's picture
Training in progress epoch 0
b90c730
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 40,
"strategy": "LongestFirst",
"stride": 0
},
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<|endoftext|>": 0,
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"<": 28,
"=": 29,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"b": 66,
"c": 67,
"d": 68,
"e": 69,
"f": 70,
"g": 71,
"h": 72,
"i": 73,
"j": 74,
"k": 75,
"l": 76,
"m": 77,
"n": 78,
"o": 79,
"p": 80,
"q": 81,
"r": 82,
"s": 83,
"t": 84,
"u": 85,
"v": 86,
"w": 87,
"x": 88,
"y": 89,
"z": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
"¡": 95,
"¢": 96,
"£": 97,
"¤": 98,
"¥": 99,
"¦": 100,
"§": 101,
"¨": 102,
"©": 103,
"ª": 104,
"«": 105,
"¬": 106,
"®": 107,
"¯": 108,
"°": 109,
"±": 110,
"²": 111,
"³": 112,
"´": 113,
"µ": 114,
"¶": 115,
"·": 116,
"¸": 117,
"¹": 118,
"º": 119,
"»": 120,
"¼": 121,
"½": 122,
"¾": 123,
"¿": 124,
"À": 125,
"Á": 126,
"Â": 127,
"Ã": 128,
"Ä": 129,
"Å": 130,
"Æ": 131,
"Ç": 132,
"È": 133,
"É": 134,
"Ê": 135,
"Ë": 136,
"Ì": 137,
"Í": 138,
"Î": 139,
"Ï": 140,
"Ð": 141,
"Ñ": 142,
"Ò": 143,
"Ó": 144,
"Ô": 145,
"Õ": 146,
"Ö": 147,
"×": 148,
"Ø": 149,
"Ù": 150,
"Ú": 151,
"Û": 152,
"Ü": 153,
"Ý": 154,
"Þ": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"Ā": 189,
"ā": 190,
"Ă": 191,
"ă": 192,
"Ą": 193,
"ą": 194,
"Ć": 195,
"ć": 196,
"Ĉ": 197,
"ĉ": 198,
"Ċ": 199,
"ċ": 200,
"Č": 201,
"č": 202,
"Ď": 203,
"ď": 204,
"Đ": 205,
"đ": 206,
"Ē": 207,
"ē": 208,
"Ĕ": 209,
"ĕ": 210,
"Ė": 211,
"ė": 212,
"Ę": 213,
"ę": 214,
"Ě": 215,
"ě": 216,
"Ĝ": 217,
"ĝ": 218,
"Ğ": 219,
"ğ": 220,
"Ġ": 221,
"ġ": 222,
"Ģ": 223,
"ģ": 224,
"Ĥ": 225,
"ĥ": 226,
"Ħ": 227,
"ħ": 228,
"Ĩ": 229,
"ĩ": 230,
"Ī": 231,
"ī": 232,
"Ĭ": 233,
"ĭ": 234,
"Į": 235,
"į": 236,
"İ": 237,
"ı": 238,
"IJ": 239,
"ij": 240,
"Ĵ": 241,
"ĵ": 242,
"Ķ": 243,
"ķ": 244,
"ĸ": 245,
"Ĺ": 246,
"ĺ": 247,
"Ļ": 248,
"ļ": 249,
"Ľ": 250,
"ľ": 251,
"Ŀ": 252,
"ŀ": 253,
"Ł": 254,
"ł": 255,
"Ń": 256,
"Ġt": 257,
"Ġa": 258,
"Ġo": 259,
"at": 260,
"re": 261,
"Ġf": 262,
"in": 263,
"Ġth": 264,
"lo": 265,
"Ġc": 266,
"on": 267,
"Ġof": 268,
"al": 269,
"ve": 270,
"se": 271,
"low": 272,
"Ġu": 273,
"pe": 274,
"Ġuse": 275,
"Ġto": 276,
"is": 277,
"val": 278,
"Ġval": 279,
"Ġvalve": 280,
"ic": 281,
"ro": 282,
"Ġflow": 283,
"Ġthat": 284,
"he": 285,
"te": 286,
"Ġin": 287,
"ion": 288,
"ype": 289,
"Ġd": 290,
"Ġp": 291,
"Ġtype": 292,
"Ġan": 293,
"Ġcon": 294,
"It": 295,
"id": 296,
"Ġs": 297,
"Ġis": 298,
"ĠIt": 299,
"Ġand": 300,
"tro": 301,
"nt": 302,
"Ġb": 303,
"Ġused": 304,
"Ġcontro": 305,
"Ġcontrol": 306,
"lic": 307,
"Ġre": 308,
"ing": 309,
"Ġthe": 310,
"pp": 311,
"Ġw": 312,
"Ġor": 313,
"ation": 314,
"Ġuses": 315,
"Ġpre": 316,
"le": 317,
"lu": 318,
"or": 319,
"Ġapp": 320,
"lication": 321,
"Ġapplication": 322,
"Ġapplications": 323,
"ha": 324,
"ire": 325,
"Ġon": 326,
"Ġflu": 327,
"Ġfor": 328,
"Ġofte": 329,
"Ġbe": 330,
"Ġwhe": 331,
"Ġfluid": 332,
"Ġoften": 333,
"ck": 334,
"ff": 335,
"ta": 336,
"ate": 337,
"ical": 338,
"an": 339,
"ct": 340,
"ed": 341,
"ig": 342,
"off": 343,
"su": 344,
"ste": 345,
"yste": 346,
"Ġclo": 347,
"vent": 348,
"isc": 349,
"Ġdisc": 350,
"Ġsyste": 351,
"Ġprevent": 352,
"sure": 353,
"Ġsystem": 354,
"es": 355,
"gu": 356,
"ling": 357,
"qu": 358,
"ra": 359,
"ssure": 360,
"ts": 361,
"ut": 362,
"Ġh": 363,
"Ġti": 364,
"Ġas": 365,
"Ġcan": 366,
"rot": 367,
"ide": 368,
"Ġsu": 369,
"Ġregu": 370,
"Ġwhere": 371,
"ction": 372,
"Ġsystems": 373,
"Ġregul": 374,
"ack": 375,
"ch": 376,
"eed": 377,
"flow": 378,
"ly": 379,
"lin": 380,
"llow": 381,
"me": 382,
"mat": 383,
"sta": 384,
"tling": 385,
"Ġe": 386,
"Ġm": 387,
"Ġr": 388,
"Ġha": 389,
"Ġallow": 390,
"Ġope": 391,
"ating": 392,
"Ġthrot": 393,
"Ġche": 394,
"Ġdi": 395,
"Ġdire": 396,
"Ġconsta": 397,
"Ġback": 398,
"Ġrequ": 399,
"Ġpressure": 400,
"Ġwhen": 401,
"igh": 402,
"Ġclose": 403,
"Ġtil": 404,
"Ġsuch": 405,
"Ġregulate": 406,
"eeds": 407,
"Ġhas": 408,
"Ġopen": 409,
"Ġthrottling": 410,
"Ġcheck": 411,
"Ġdirection": 412,
"Ġconstant": 413,
"Ġbackflow": 414,
"Ġrequire": 415,
"ble": 416,
"dra": 417,
"it": 418,
"needs": 419,
"oo": 420,
"omat": 421,
"ps": 422,
"rical": 423,
"sha": 424,
"us": 425,
"ulic": 426,
"ydra": 427,
"Ġg": 428,
"Ġhe": 429,
"Ġle": 430,
"Ġide": 431,
"Ġlin": 432,
"Ġneeds": 433,
"Ġaut": 434,
"vel": 435,
"ped": 436,
"Ġone": 437,
"tain": 438,
"ically": 439,
"Ġhigh": 440,
"Ġhydra": 441,
"Ġmo": 442,
"Ġrate": 443,
"Ġallows": 444,
"Ġclosed": 445,
"Ġtilts": 446,
"Ġrequired": 447,
"omatically": 448,
"shaped": 449,
"Ġlevel": 450,
"Ġideal": 451,
"Ġautomatically": 452,
"Ġhydraulic": 453,
"The": 454,
"ac": 455,
"ap": 456,
"ar": 457,
"ain": 458,
"ase": 459,
"ape": 460,
"be": 461,
"bin": 462,
"cc": 463,
"ci": 464,
"cte": 465,
"ctro": 466,
"cha": 467,
"ceeds": 468,
"dj": 469,
"dire": 470,
"drical": 471,
"dus": 472,
"ee": 473,
"er": 474,
"eu": 475,
"ere": 476,
"eut": 477,
"ear": 478,
"ge": 479,
"gm": 480,
"hra": 481,
"hut": 482,
"ip": 483,
"ir": 484,
"ite": 485,
"ita": 486,
"ies": 487,
"ible": 488,
"lps": 489,
"mac": 490,
"mbin": 491,
"no": 492,
"nical": 493,
"ned": 494,
"nction": 495,
"nit": 496,
"neu": 497,
"os": 498,
"ow": 499,
"ole": 500,
"ops": 501,
"ombin": 502,
"pre": 503,
"pon": 504,
"phe": 505,
"pid": 506,
"rt": 507,
"ru": 508,
"rig": 509,
"ries": 510,
"rmac": 511,
"sing": 512,
"side": 513,
"spon": 514,
"tw": 515,
"tion": 516,
"ting": 517,
"tops": 518,
"tries": 519,
"unction": 520,
"wo": 521,
"xceeds": 522,
"xible": 523,
"ylin": 524,
"Ġra": 525,
"Ġrot": 526,
"ĠThe": 527,
"Ġir": 528,
"Ġtigh": 529,
"Ġtape": 530,
"Ġtwo": 531,
"Ġacc": 532,
"Ġadj": 533,
"Ġopp": 534,
"athe": 535,
"red": 536,
"ree": 537,
"Ġflo": 538,
"Ġfle": 539,
"Ġfoo": 540,
"Ġfunction": 541,
"Ġthan": 542,
"Ġthree": 543,
"lobe": 544,
"Ġcoo": 545,
"Ġcer": 546,
"Ġcombin": 547,
"Ġcylin": 548,
"all": 549,
"ves": 550,
"vert": 551,
"Ġup": 552,
"Ġunit": 553,
"rote": 554,
"Ġindus": 555,
"Ġinside": 556,
"Ġdes": 557,
"Ġdow": 558,
"Ġdru": 559,
"Ġplu": 560,
"Ġpor": 561,
"Ġpha": 562,
"Ġpneu": 563,
"Ġprote": 564,
"Ġshut": 565,
"Ġsole": 566,
"Ġsphe": 567,
"Ġstops": 568,
"nts": 569,
"Ġball": 570,
"Ġrele": 571,
"Ġredire": 572,
"Ġrespon": 573,
"Ġwed": 574,
"Ġwit": 575,
"Ġpreci": 576,
"lectro": 577,
"Ġonly": 578,
"Ġbetw": 579,
"ffere": 580,
"igned": 581,
"Ġclosure": 582,
"Ġclosing": 583,
"Ġprevents": 584,
"quip": 585,
"Ġtime": 586,
"idents": 587,
"Ġsuita": 588,
"Ġregulating": 589,
"ment": 590,
"mecha": 591,
"matic": 592,
"Ġexceeds": 593,
"Ġelectro": 594,
"Ġequip": 595,
"Ġmain": 596,
"Ġrathe": 597,
"Ġdiap": 598,
"Ġdivert": 599,
"Ġdiffere": 600,
"Ġcloses": 601,
"Ġtilting": 602,
"Ġopening": 603,
"usts": 604,
"Ġgate": 605,
"Ġglobe": 606,
"Ġheating": 607,
"Ġhelps": 608,
"Ġlines": 609,
"Ġlinear": 610,
"Ġmotion": 611,
"Ġmoves": 612,
"Ġrates": 613,
"cted": 614,
"een": 615,
"eutical": 616,
"hragm": 617,
"noid": 618,
"osite": 619,
"pressure": 620,
"rigation": 621,
"rmaceutical": 622,
"Ġrapid": 623,
"Ġrotating": 624,
"Ġirrigation": 625,
"Ġtight": 626,
"Ġtapered": 627,
"Ġaccidents": 628,
"Ġadjusts": 629,
"Ġopposite": 630,
"Ġfloat": 631,
"Ġflexible": 632,
"Ġfood": 633,
"Ġfunctions": 634,
"Ġcooling": 635,
"Ġcertain": 636,
"Ġcombines": 637,
"Ġcylindrical": 638,
"Ġindustries": 639,
"Ġdesigned": 640,
"Ġdown": 641,
"Ġdrum": 642,
"Ġplug": 643,
"Ġports": 644,
"Ġpharmaceutical": 645,
"Ġpneumatic": 646,
"Ġprotect": 647,
"Ġsolenoid": 648,
"Ġspherical": 649,
"Ġrelease": 650,
"Ġredirected": 651,
"Ġresponse": 652,
"Ġwedge": 653,
"Ġwith": 654,
"Ġprecise": 655,
"Ġbetween": 656,
"Ġsuitable": 657,
"mechanical": 658,
"Ġelectromechanical": 659,
"Ġequipment": 660,
"Ġmaintain": 661,
"Ġrather": 662,
"Ġdiaphragm": 663,
"Ġdifferent": 664
},
"merges": [
"Ġ t",
"Ġ a",
"Ġ o",
"a t",
"r e",
"Ġ f",
"i n",
"Ġt h",
"l o",
"Ġ c",
"o n",
"Ġo f",
"a l",
"v e",
"s e",
"lo w",
"Ġ u",
"p e",
"Ġu se",
"Ġt o",
"i s",
"v al",
"Ġ val",
"Ġval ve",
"i c",
"r o",
"Ġf low",
"Ġth at",
"h e",
"t e",
"Ġ in",
"i on",
"y pe",
"Ġ d",
"Ġ p",
"Ġt ype",
"Ġa n",
"Ġc on",
"I t",
"i d",
"Ġ s",
"Ġ is",
"Ġ It",
"Ġan d",
"t ro",
"n t",
"Ġ b",
"Ġuse d",
"Ġcon tro",
"Ġcontro l",
"l ic",
"Ġ re",
"in g",
"Ġth e",
"p p",
"Ġ w",
"Ġo r",
"at ion",
"Ġuse s",
"Ġp re",
"l e",
"l u",
"o r",
"Ġa pp",
"lic ation",
"Ġapp lication",
"Ġapplication s",
"h a",
"i re",
"Ġo n",
"Ġf lu",
"Ġf or",
"Ġof te",
"Ġb e",
"Ġw he",
"Ġflu id",
"Ġofte n",
"c k",
"f f",
"t a",
"at e",
"ic al",
"a n",
"c t",
"e d",
"i g",
"o ff",
"s u",
"s te",
"y ste",
"Ġc lo",
"ve nt",
"is c",
"Ġd isc",
"Ġs yste",
"Ġpre vent",
"su re",
"Ġsyste m",
"e s",
"g u",
"l ing",
"q u",
"r a",
"s sure",
"t s",
"u t",
"Ġ h",
"Ġt i",
"Ġa s",
"Ġc an",
"ro t",
"id e",
"Ġs u",
"Ġre gu",
"Ġwhe re",
"ct ion",
"Ġsystem s",
"Ġregu l",
"a ck",
"c h",
"e ed",
"f low",
"l y",
"l in",
"l low",
"m e",
"m at",
"s ta",
"t ling",
"Ġ e",
"Ġ m",
"Ġ r",
"Ġ ha",
"Ġa llow",
"Ġo pe",
"at ing",
"Ġth rot",
"Ġc he",
"Ġd i",
"Ġd ire",
"Ġcon sta",
"Ġb ack",
"Ġre qu",
"Ġpre ssure",
"Ġwhe n",
"ig h",
"Ġclo se",
"Ġti l",
"Ġsu ch",
"Ġregul ate",
"eed s",
"Ġha s",
"Ġope n",
"Ġthrot tling",
"Ġche ck",
"Ġdire ction",
"Ġconsta nt",
"Ġback flow",
"Ġrequ ire",
"b le",
"d ra",
"i t",
"n eeds",
"o o",
"o mat",
"p s",
"r ical",
"s ha",
"u s",
"u lic",
"y dra",
"Ġ g",
"Ġ he",
"Ġ le",
"Ġ ide",
"Ġ lin",
"Ġ needs",
"Ġa ut",
"ve l",
"pe d",
"Ġon e",
"ta in",
"ical ly",
"Ġh igh",
"Ġh ydra",
"Ġm o",
"Ġr ate",
"Ġallow s",
"Ġclose d",
"Ġtil ts",
"Ġrequire d",
"omat ically",
"sha ped",
"Ġle vel",
"Ġide al",
"Ġaut omatically",
"Ġhydra ulic",
"T he",
"a c",
"a p",
"a r",
"a in",
"a se",
"a pe",
"b e",
"b in",
"c c",
"c i",
"c te",
"c tro",
"c ha",
"c eeds",
"d j",
"d ire",
"d rical",
"d us",
"e e",
"e r",
"e u",
"e re",
"e ut",
"e ar",
"g e",
"g m",
"h ra",
"h ut",
"i p",
"i r",
"i te",
"i ta",
"i es",
"i ble",
"l ps",
"m ac",
"m bin",
"n o",
"n ical",
"n ed",
"n ction",
"n it",
"n eu",
"o s",
"o w",
"o le",
"o ps",
"o mbin",
"p re",
"p on",
"p he",
"p id",
"r t",
"r u",
"r ig",
"r ies",
"r mac",
"s ing",
"s ide",
"s pon",
"t w",
"t ion",
"t ing",
"t ops",
"t ries",
"u nction",
"w o",
"x ceeds",
"x ible",
"y lin",
"Ġ ra",
"Ġ rot",
"Ġ The",
"Ġ ir",
"Ġt igh",
"Ġt ape",
"Ġt wo",
"Ġa cc",
"Ġa dj",
"Ġo pp",
"at he",
"re d",
"re e",
"Ġf lo",
"Ġf le",
"Ġf oo",
"Ġf unction",
"Ġth an",
"Ġth ree",
"lo be",
"Ġc oo",
"Ġc er",
"Ġc ombin",
"Ġc ylin",
"al l",
"ve s",
"ve rt",
"Ġu p",
"Ġu nit",
"ro te",
"Ġin dus",
"Ġin side",
"Ġd es",
"Ġd ow",
"Ġd ru",
"Ġp lu",
"Ġp or",
"Ġp ha",
"Ġp neu",
"Ġp rote",
"Ġs hut",
"Ġs ole",
"Ġs phe",
"Ġs tops",
"nt s",
"Ġb all",
"Ġre le",
"Ġre dire",
"Ġre spon",
"Ġw ed",
"Ġw it",
"Ġpre ci",
"le ctro",
"Ġon ly",
"Ġbe tw",
"ff ere",
"ig ned",
"Ġclo sure",
"Ġclo sing",
"Ġprevent s",
"qu ip",
"Ġti me",
"ide nts",
"Ġsu ita",
"Ġregul ating",
"me nt",
"me cha",
"mat ic",
"Ġe xceeds",
"Ġe lectro",
"Ġe quip",
"Ġm ain",
"Ġr athe",
"Ġdi ap",
"Ġdi vert",
"Ġdi ffere",
"Ġclose s",
"Ġtil ting",
"Ġopen ing",
"us ts",
"Ġg ate",
"Ġg lobe",
"Ġhe ating",
"Ġhe lps",
"Ġlin es",
"Ġlin ear",
"Ġmo tion",
"Ġmo ves",
"Ġrate s",
"cte d",
"ee n",
"eut ical",
"hra gm",
"no id",
"os ite",
"pre ssure",
"rig ation",
"rmac eutical",
"Ġra pid",
"Ġrot ating",
"Ġir rigation",
"Ġtigh t",
"Ġtape red",
"Ġacc idents",
"Ġadj usts",
"Ġopp osite",
"Ġflo at",
"Ġfle xible",
"Ġfoo d",
"Ġfunction s",
"Ġcoo ling",
"Ġcer tain",
"Ġcombin es",
"Ġcylin drical",
"Ġindus tries",
"Ġdes igned",
"Ġdow n",
"Ġdru m",
"Ġplu g",
"Ġpor ts",
"Ġpha rmaceutical",
"Ġpneu matic",
"Ġprote ct",
"Ġsole noid",
"Ġsphe rical",
"Ġrele ase",
"Ġredire cted",
"Ġrespon se",
"Ġwed ge",
"Ġwit h",
"Ġpreci se",
"Ġbetw een",
"Ġsuita ble",
"mecha nical",
"Ġelectro mechanical",
"Ġequip ment",
"Ġmain tain",
"Ġrathe r",
"Ġdiap hragm",
"Ġdiffere nt"
]
}
}