backend-bpe-tokenizer / tokenizer.json
nursimakgul's picture
Upload tokenizer.json with huggingface_hub
a1a1354 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": []
},
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 1
}
}
],
"special_tokens": {
"<s>": {
"id": "<s>",
"ids": [
2
],
"tokens": [
"<s>"
]
},
"</s>": {
"id": "</s>",
"ids": [
3
],
"tokens": [
"</s>"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"vocab": {
"<mask>": 4,
"er": 5,
"in": 6,
"on": 7,
"es": 8,
"an": 9,
"at": 10,
"en": 11,
"al": 12,
"ed": 13,
"▁s": 14,
"is": 15,
"ic": 16,
"ar": 17,
"or": 18,
"▁p": 19,
"▁c": 20,
"ing": 21,
"un": 22,
"it": 23,
"el": 24,
"ro": 25,
"re": 26,
"le": 27,
"us": 28,
"▁t": 29,
"as": 30,
"ac": 31,
"▁un": 32,
"il": 33,
"▁d": 34,
"▁m": 35,
"ol": 36,
"▁b": 37,
"et": 38,
"ess": 39,
"om": 40,
"id": 41,
"ly": 42,
"ion": 43,
"ul": 44,
"os": 45,
"ra": 46,
"▁h": 47,
"ph": 48,
"ur": 49,
"ch": 50,
"▁n": 51,
"▁f": 52,
"ab": 53,
"ot": 54,
"ent": 55,
"em": 56,
"am": 57,
"ut": 58,
"▁g": 59,
"ter": 60,
"ad": 61,
"ous": 62,
"▁o": 63,
"og": 64,
"um": 65,
"ri": 66,
"ec": 67,
"▁w": 68,
"ist": 69,
"iv": 70,
"im": 71,
"ag": 72,
"iz": 73,
"ant": 74,
"op": 75,
"▁l": 76,
"▁re": 77,
"oc": 78,
"ation": 79,
"▁in": 80,
"od": 81,
"ig": 82,
"ers": 83,
"ap": 84,
"▁non": 85,
"qu": 86,
"ir": 87,
"ia": 88,
"▁C": 89,
"▁S": 90,
"ness": 91,
"ver": 92,
"ical": 93,
"▁e": 94,
"ip": 95,
"▁A": 96,
"ate": 97,
"if": 98,
"est": 99,
"st": 100,
"ub": 101,
"▁r": 102,
"ell": 103,
"▁M": 104,
"up": 105,
"ow": 106,
"▁P": 107,
"th": 108,
"and": 109,
"▁B": 110,
"▁st": 111,
"ism": 112,
"ian": 113,
"he": 114,
"▁v": 115,
"able": 116,
"ine": 117,
"uc": 118,
"ill": 119,
"ast": 120,
"ud": 121,
"ish": 122,
"ep": 123,
"all": 124,
"ib": 125,
"▁ch": 126,
"ies": 127,
"ity": 128,
"▁a": 129,
"ae": 130,
"ite": 131,
"▁T": 132,
"ob": 133,
"ard": 134,
"▁pre": 135,
"yl": 136,
"ak": 137,
"ect": 138,
"ial": 139,
"▁H": 140,
"▁L": 141,
"▁sp": 142,
"▁k": 143,
"▁G": 144,
"▁over": 145,
"▁D": 146,
"end": 147,
"ff": 148,
"ug": 149,
"oph": 150,
"av": 151,
"ce": 152,
"ry": 153,
"ran": 154,
"ive": 155,
"ay": 156,
"▁pro": 157,
"▁con": 158,
"sh": 159,
"se": 160,
"ated": 161,
"per": 162,
"ost": 163,
"ke": 164,
"ath": 165,
"olog": 166,
"▁th": 167,
"eness": 168,
"▁co": 169,
"ass": 170,
"ack": 171,
"▁qu": 172,
"ang": 173,
"ie": 174,
"▁R": 175,
"▁sub": 176,
"ted": 177,
"der": 178,
"▁E": 179,
"con": 180,
"▁ph": 181,
"ically": 182,
"▁hy": 183,
"ric": 184,
"pl": 185,
"ach": 186,
"ize": 187,
"man": 188,
"▁K": 189,
"▁ex": 190,
"eg": 191,
"▁ant": 192,
"▁dis": 193,
"yn": 194,
"ort": 195,
"ex": 196,
"act": 197,
"▁j": 198,
"ens": 199,
"ht": 200,
"▁sc": 201,
"orm": 202,
"▁sh": 203,
"art": 204,
"ator": 205,
"▁N": 206,
"oid": 207,
"ier": 208,
"ely": 209,
"eb": 210,
"▁F": 211,
"ace": 212,
"lo": 213,
"ron": 214,
"ind": 215,
"ct": 216,
"▁sup": 217,
"ious": 218,
"pro": 219,
"ok": 220,
"raph": 221,
"▁ar": 222,
"ef": 223,
"▁pr": 224,
"ating": 225,
"ment": 226,
"ick": 227,
"li": 228,
"ys": 229,
"aw": 230,
"und": 231,
"cl": 232,
"ym": 233,
"▁out": 234,
"one": 235,
"oth": 236,
"ain": 237,
"▁W": 238,
"rom": 239,
"elf": 240,
"▁an": 241,
"▁inter": 242,
"te": 243,
"ide": 244,
"erm": 245,
"rop": 246,
"▁O": 247,
"▁uns": 248,
"ne": 249,
"co": 250,
"▁pl": 251,
"ev": 252,
"ith": 253,
"ock": 254,
"ram": 255,
"▁im": 256,
"ure": 257,
"less": 258,
"▁al": 259,
"ary": 260,
"▁fl": 261,
"ust": 262,
"ult": 263,
"ingly": 264,
"ium": 265,
"ros": 266,
"pt": 267,
"bo": 268,
"ful": 269,
"ah": 270,
"ov": 271,
"▁self": 272,
"of": 273,
"ood": 274,
"▁mis": 275,
"com": 276,
"▁sem": 277,
"ber": 278,
"▁J": 279,
"▁ac": 280,
"ility": 281,
"ew": 282,
"▁le": 283,
"▁en": 284,
"▁I": 285,
"▁ro": 286,
"ear": 287,
"ille": 288,
"▁par": 289,
"ong": 290,
"ograph": 291,
"bl": 292,
"ise": 293,
"orn": 294,
"▁am": 295,
"▁per": 296,
"ord": 297,
"ized": 298,
"our": 299,
"eter": 300,
"iness": 301,
"▁cl": 302,
"▁com": 303,
"ular": 304,
"▁und": 305,
"sp": 306,
"idae": 307,
"ally": 308,
"cy": 309,
"age": 310,
"▁bl": 311,
"ight": 312,
"tr": 313,
"rad": 314,
"▁super": 315,
"ox": 316,
"ge": 317,
"row": 318,
"pr": 319,
"▁V": 320,
"ash": 321,
"let": 322,
"int": 323,
"led": 324,
"eth": 325,
"ative": 326,
"▁tw": 327,
"ont": 328,
"att": 329,
"ore": 330,
"▁ad": 331,
"ling": 332,
"ization": 333,
"▁well": 334,
"▁ap": 335,
"iss": 336,
"ins": 337,
"out": 338,
"▁wh": 339,
"ail": 340,
"▁pol": 341,
"▁y": 342,
"fl": 343,
"▁Ch": 344,
"ax": 345,
"az": 346,
"ice": 347,
"ater": 348,
"▁tr": 349,
"old": 350,
"ence": 351,
"ee": 352,
"ied": 353,
"abl": 354,
"▁z": 355,
"esc": 356,
"asi": 357,
"ville": 358,
"och": 359,
"ole": 360,
"ich": 361,
"ose": 362,
"ire": 363,
"izing": 364,
"ond": 365,
"ogen": 366,
"like": 367,
"▁under": 368,
"yt": 369,
"rig": 370,
"ik": 371,
"ates": 372,
"▁sl": 373,
"ann": 374,
"ept": 375,
"yp": 376,
"ile": 377,
"▁for": 378,
"vers": 379,
"▁de": 380,
"▁em": 381,
"own": 382,
"▁ab": 383,
"ively": 384,
"▁el": 385,
"atic": 386,
"ign": 387,
"oll": 388,
"▁An": 389,
"iest": 390,
"▁car": 391,
"we": 392,
"▁mon": 393,
"ability": 394,
"▁or": 395,
"omy": 396,
"istic": 397,
"ting": 398,
"ey": 399,
"▁quasi": 400,
"ors": 401,
"ull": 402,
"ily": 403,
"rans": 404,
"ship": 405,
"▁ne": 406,
"ea": 407,
"rim": 408,
"▁res": 409,
"▁ep": 410,
"ably": 411,
"oss": 412,
"rit": 413,
"uck": 414,
"oot": 415,
"ark": 416,
"erv": 417,
"ush": 418,
"ton": 419,
"ou": 420,
"ork": 421,
"▁gl": 422,
"arm": 423,
"▁dec": 424,
"cop": 425,
"rag": 426,
"head": 427,
"seud": 428,
"ery": 429,
"anc": 430,
"ree": 431,
"▁unp": 432,
"red": 433,
"ene": 434,
"urg": 435,
"etic": 436,
"ink": 437,
"ations": 438,
"ons": 439,
"ings": 440,
"edness": 441,
"ale": 442,
"ue": 443,
"ert": 444,
"▁red": 445,
"ped": 446,
"unter": 447,
"igh": 448,
"ableness": 449,
"ans": 450,
"osis": 451,
"ology": 452,
"ett": 453,
"ered": 454,
"rog": 455,
"▁U": 456,
"▁met": 457,
"▁sn": 458,
"ane": 459,
"air": 460,
"ific": 461,
"ised": 462,
"ities": 463,
"sc": 464,
"ement": 465,
"inn": 466,
"▁di": 467,
"rib": 468,
"ance": 469,
"yr": 470,
"uff": 471,
"▁he": 472,
"▁hal": 473,
"▁pseud": 474,
"▁unf": 475,
"ugh": 476,
"aph": 477,
"rot": 478,
"uct": 479,
"af": 480,
"enn": 481,
"roph": 482,
"ler": 483,
"▁hyp": 484,
"oh": 485,
"erous": 486,
"urn": 487,
"anth": 488,
"▁unc": 489,
"yd": 490,
"ome": 491,
"iform": 492,
"▁ind": 493,
"umb": 494,
"str": 495,
"ix": 496,
"▁sw": 497,
"▁up": 498,
"sy": 499,
"ret": 500,
"ung": 501,
"▁all": 502,
"▁ra": 503,
"rol": 504,
"▁mic": 505,
"▁aut": 506,
"▁unm": 507,
"phal": 508,
"▁br": 509,
"orph": 510,
"ition": 511,
"▁counter": 512,
"▁is": 513,
"ict": 514,
"itis": 515,
"ari": 516,
"▁Al": 517,
"eli": 518,
"orr": 519,
"ual": 520,
"▁int": 521,
"yg": 522,
"ress": 523,
"iveness": 524,
"au": 525,
"ve": 526,
"▁sm": 527,
"itt": 528,
"ple": 529,
"agg": 530,
"ank": 531,
"les": 532,
"ern": 533,
"ify": 534,
"ible": 535,
"rat": 536,
"▁hyper": 537,
"ising": 538,
"eck": 539,
"▁sk": 540,
"are": 541,
"ott": 542,
"roc": 543,
"omet": 544,
"ously": 545,
"res": 546,
"oy": 547,
"ists": 548,
"atory": 549,
"enc": 550,
"io": 551,
"edly": 552,
"oman": 553,
"▁unb": 554,
"aceae": 555,
"unt": 556,
"pos": 557,
"arch": 558,
"ral": 559,
"amb": 560,
"aceous": 561,
"▁ret": 562,
"▁post": 563,
"arg": 564,
"cul": 565,
"ina": 566,
"amp": 567,
"ars": 568,
"ase": 569,
"men": 570,
"mer": 571,
"iff": 572,
"rac": 573,
"app": 574,
"rab": 575,
"▁my": 576,
"onic": 577,
"eld": 578,
"lor": 579,
"▁be": 580,
"▁trans": 581,
"▁ir": 582,
"ias": 583,
"▁tri": 584,
"▁unt": 585,
"▁rec": 586,
"▁hyd": 587,
"aff": 588,
"ere": 589,
"▁hem": 590,
"oz": 591,
"isc": 592,
"itic": 593,
"arb": 594,
"ions": 595,
"arp": 596,
"▁as": 597,
"ound": 598,
"ters": 599,
"▁half": 600,
"ory": 601,
"atch": 602,
"inc": 603,
"rap": 604,
"▁pal": 605,
"ool": 606,
"ined": 607,
"▁dem": 608,
"uss": 609,
"▁imp": 610,
"▁Z": 611,
"yll": 612,
"▁cal": 613,
"ren": 614,
"ines": 615,
"▁nond": 616,
"ring": 617,
"oon": 618,
"▁col": 619,
"rav": 620,
"ient": 621,
"ional": 622,
"ousness": 623,
"ele": 624,
"rid": 625,
"yst": 626,
"por": 627,
"eless": 628,
"arn": 629,
"▁Ar": 630,
"opl": 631,
"▁syn": 632,
"▁squ": 633,
"▁inf": 634,
"▁se": 635,
"▁ent": 636,
"ged": 637,
"inal": 638,
"▁ob": 639,
"▁ag": 640,
"▁overs": 641,
"ational": 642,
"arian": 643,
"▁cor": 644,
"ata": 645,
"▁bo": 646,
"work": 647,
"be": 648,
"col": 649,
"ven": 650,
"▁Sh": 651,
"ics": 652,
"▁unre": 653,
"ipp": 654,
"▁nons": 655,
"ird": 656,
"▁St": 657,
"land": 658,
"als": 659,
"▁man": 660,
"▁inc": 661,
"ft": 662,
"els": 663,
"▁cont": 664,
"ru": 665,
"▁pred": 666,
"form": 667,
"▁end": 668,
"amm": 669,
"ild": 670,
"ower": 671,
"erg": 672,
"ored": 673,
"fish": 674,
"ward": 675,
"off": 676,
"▁Ph": 677,
"▁ung": 678,
"▁ver": 679,
"▁mult": 680,
"aster": 681,
"iously": 682,
"agn": 683,
"iph": 684,
"ological": 685,
"rous": 686,
"aur": 687,
"uk": 688,
"arc": 689,
"acc": 690,
"aut": 691,
"▁phot": 692,
"rod": 693,
"▁des": 694,
"wood": 695,
"add": 696,
"itch": 697,
"ross": 698,
"▁twice": 699,
"▁rep": 700,
"▁arch": 701,
"▁gr": 702,
"aker": 703,
"ural": 704,
"aking": 705,
"raw": 706,
"esis": 707,
"iousness": 708,
"esh": 709,
"icul": 710,
"▁sch": 711,
"ets": 712,
"▁hom": 713,
"orth": 714,
"▁ref": 715,
"eral": 716,
"ency": 717,
"▁op": 718,
"que": 719,
"▁unh": 720,
"▁uncon": 721,
"ometer": 722,
"▁elect": 723,
"▁do": 724,
"▁ins": 725,
"ger": 726,
"mat": 727,
"irc": 728,
"oscop": 729,
"ty": 730,
"▁Y": 731,
"ade": 732,
"oe": 733,
"tern": 734,
"▁cr": 735,
"oul": 736,
"urs": 737,
"▁rh": 738,
"▁sul": 739,
"yc": 740,
"leg": 741,
"iferous": 742,
"▁def": 743,
"press": 744,
"eal": 745,
"▁sal": 746,
"▁mal": 747,
"astic": 748,
"proof": 749,
"rip": 750,
"ump": 751,
"▁wor": 752,
"iate": 753,
"▁dep": 754,
"ured": 755,
"ean": 756,
"rec": 757,
"ech": 758,
"sych": 759,
"ently": 760,
"▁Th": 761,
"oma": 762,
"min": 763,
"ught": 764,
"idd": 765,
"uch": 766,
"ld": 767,
"gl": 768,
"read": 769,
"ologist": 770,
"ering": 771,
"vent": 772,
"mon": 773,
"ject": 774,
"ially": 775,
"arr": 776,
"▁py": 777,
"▁the": 778,
"alk": 779,
"▁pres": 780,
"son": 781,
"sk": 782,
"ley": 783,
"erc": 784,
"▁ill": 785,
"asm": 786,
"val": 787,
"ai": 788,
"reg": 789,
"▁we": 790,
"urb": 791,
"asc": 792,
"ake": 793,
"org": 794,
"unc": 795,
"▁cap": 796,
"ili": 797,
"▁at": 798,
"outh": 799,
"▁back": 800,
"▁prot": 801,
"rown": 802,
"▁x": 803,
"ranch": 804,
"izes": 805,
"comp": 806,
"ital": 807,
"aus": 808,
"ette": 809,
"way": 810,
"▁gu": 811,
"▁comp": 812,
"alm": 813,
"rain": 814,
"▁ext": 815,
"iol": 816,
"ray": 817,
"umm": 818,
"ouse": 819,
"otic": 820,
"▁unw": 821,
"right": 822,
"omat": 823,
"rist": 824,
"▁ser": 825,
"omb": 826,
"ites": 827,
"▁reg": 828,
"nesses": 829,
"fe": 830,
"▁po": 831,
"rob": 832,
"▁cat": 833,
"inter": 834,
"back": 835,
"ified": 836,
"lect": 837,
"▁tet": 838,
"ography": 839,
"▁mes": 840,
"unn": 841,
"wise": 842,
"unk": 843,
"▁ass": 844,
"aped": 845,
"ras": 846,
"ened": 847,
"▁emb": 848,
"imb": 849,
"vol": 850,
"osph": 851,
"ries": 852,
"aced": 853,
"hear": 854,
"rem": 855,
"▁bi": 856,
"ana": 857,
"▁kn": 858,
"▁Am": 859,
"▁min": 860,
"oo": 861,
"erat": 862,
"ux": 863,
"▁ox": 864,
"ield": 865,
"▁sol": 866,
"▁bar": 867,
"ald": 868,
"lin": 869,
"rin": 870,
"esp": 871,
"rick": 872,
"ule": 873,
"emb": 874,
"other": 875,
"comm": 876,
"▁rem": 877,
"▁sil": 878,
"▁cur": 879,
"dom": 880,
"omorph": 881,
"her": 882,
"ited": 883,
"aud": 884,
"rif": 885,
"▁anti": 886,
"▁poly": 887,
"ather": 888,
"port": 889,
"ats": 890,
"ification": 891,
"aul": 892,
"▁rad": 893,
"ental": 894,
"rich": 895,
"▁cop": 896,
"eng": 897,
"iac": 898,
"udd": 899,
"ek": 900,
"over": 901,
"▁hand": 902,
"bor": 903,
"elt": 904,
"ingness": 905,
"umin": 906,
"ooth": 907,
"▁world": 908,
"bre": 909,
"▁fe": 910,
"izer": 911,
"by": 912,
"ook": 913,
"▁cy": 914,
"▁rel": 915,
"enz": 916,
"ara": 917,
"ext": 918,
"ops": 919,
"ella": 920,
"▁comm": 921,
"zz": 922,
"lic": 923,
"arth": 924,
"unct": 925,
"cont": 926,
"ener": 927,
"▁prec": 928,
"burg": 929,
"apt": 930,
"eh": 931,
"aem": 932,
"ogn": 933,
"hand": 934,
"▁conc": 935,
"err": 936,
"▁Pro": 937,
"ister": 938,
"riv": 939,
"aid": 940,
"▁mac": 941,
"ime": 942,
"▁cons": 943,
"its": 944,
"ote": 945,
"enic": 946,
"headed": 947,
"usc": 948,
"iter": 949,
"po": 950,
"ibility": 951,
"occ": 952,
"▁gre": 953,
"ellow": 954,
"▁ge": 955,
"▁pe": 956,
"▁te": 957,
"aria": 958,
"iat": 959,
"eric": 960,
"ides": 961,
"▁ste": 962,
"ephal": 963,
"odont": 964,
"ula": 965,
"roch": 966,
"▁acc": 967,
"▁sur": 968,
"ugg": 969,
"ench": 970,
"weed": 971,
"▁heter": 972,
"ius": 973,
"onn": 974,
"▁El": 975,
"amin": 976,
"▁ple": 977,
"rep": 978,
"foot": 979,
"ping": 980,
"ening": 981,
"ulate": 982,
"vis": 983,
"hood": 984,
"▁long": 985,
"isms": 986,
"▁she": 987,
"▁circ": 988,
"aun": 989,
"urt": 990,
"ute": 991,
"▁spl": 992,
"ishness": 993,
"osp": 994,
"▁ly": 995,
"rate": 996,
"aint": 997,
"eled": 998,
"▁ang": 999,
"elect": 1000,
"ages": 1001,
"▁cross": 1002,
"aps": 1003,
"rum": 1004,
"▁imm": 1005,
"shaped": 1006,
"▁equ": 1007,
"atter": 1008,
"▁anth": 1009,
"▁nonc": 1010,
"▁nonp": 1011,
"isp": 1012,
"ony": 1013,
"▁bed": 1014,
"orb": 1015,
"▁lo": 1016,
"▁tel": 1017,
"ura": 1018,
"ster": 1019,
"aj": 1020,
"mind": 1021,
"uv": 1022,
"ese": 1023,
"ued": 1024,
"iser": 1025,
"▁unl": 1026,
"▁bro": 1027,
"hearted": 1028,
"▁che": 1029,
"lood": 1030,
"obia": 1031,
"isation": 1032,
"▁dev": 1033,
"sw": 1034,
"fer": 1035,
"uses": 1036,
"fully": 1037,
"▁semi": 1038,
"hip": 1039,
"med": 1040,
"rel": 1041,
"rew": 1042,
"oral": 1043,
"▁med": 1044,
"me": 1045,
"obl": 1046,
"▁unst": 1047,
"osm": 1048,
"atin": 1049,
"▁can": 1050,
"oic": 1051,
"elic": 1052,
"▁mar": 1053,
"▁exc": 1054,
"utter": 1055,
"atively": 1056,
"ophil": 1057,
"elling": 1058,
"abb": 1059,
"opt": 1060,
"uth": 1061,
"ators": 1062,
"▁psych": 1063,
"ze": 1064,
"ants": 1065,
"itor": 1066,
"vert": 1067,
"ike": 1068,
"iot": 1069,
"elle": 1070,
"ying": 1071,
"antly": 1072,
"▁af": 1073,
"▁cry": 1074,
"▁pan": 1075,
"ones": 1076,
"ios": 1077,
"ks": 1078,
"▁Q": 1079,
"ame": 1080,
"che": 1081,
"▁id": 1082,
"ophor": 1083,
"▁disc": 1084,
"gu": 1085,
"ibl": 1086,
"esth": 1087,
"wort": 1088,
"▁supers": 1089,
"ave": 1090,
"utt": 1091,
"opod": 1092,
"▁mel": 1093,
"▁ult": 1094,
"▁eth": 1095,
"ete": 1096,
"▁ev": 1097,
"uls": 1098,
"plic": 1099,
"▁del": 1100,
"ophag": 1101,
"▁dist": 1102,
"▁ped": 1103,
"ution": 1104,
"alt": 1105,
"▁ec": 1106,
"▁er": 1107,
"aved": 1108,
"▁app": 1109,
"elled": 1110,
"▁prop": 1111,
"yph": 1112,
"ena": 1113,
"▁her": 1114,
"elike": 1115,
"ilt": 1116,
"uble": 1117,
"▁pat": 1118,
"aries": 1119,
"bed": 1120,
"▁sulph": 1121,
"ises": 1122,
"ately": 1123,
"board": 1124,
"for": 1125,
"▁to": 1126,
"ancy": 1127,
"eful": 1128,
"ript": 1129,
"ishly": 1130,
"ologic": 1131,
"▁conf": 1132,
"iod": 1133,
"under": 1134,
"▁cent": 1135,
"▁high": 1136,
"▁unex": 1137,
"iet": 1138,
"onal": 1139,
"rest": 1140,
"▁aer": 1141,
"▁ast": 1142,
"▁tub": 1143,
"istically": 1144,
"ask": 1145,
"▁quad": 1146,
"ifying": 1147,
"ays": 1148,
"ipl": 1149,
"irt": 1150,
"yte": 1151,
"▁In": 1152,
"▁Mar": 1153,
"▁gal": 1154,
"▁overd": 1155,
"▁fr": 1156,
"ider": 1157,
"olic": 1158,
"tail": 1159,
"▁ost": 1160,
"ogg": 1161,
"pect": 1162,
"▁sym": 1163,
"opath": 1164,
"yth": 1165,
"▁cle": 1166,
"opter": 1167,
"▁water": 1168,
"ma": 1169,
"lex": 1170,
"etal": 1171,
"ison": 1172,
"▁wind": 1173,
"action": 1174,
"otyp": 1175,
"▁reb": 1176,
"ident": 1177,
"inate": 1178,
"▁pur": 1179,
"kn": 1180,
"ino": 1181,
"▁go": 1182,
"cons": 1183,
"rost": 1184,
"▁neu": 1185,
"▁ten": 1186,
"ender": 1187,
"wh": 1188,
"▁Sp": 1189,
"▁om": 1190,
"aryn": 1191,
"etr": 1192,
"uit": 1193,
"▁Ad": 1194,
"emat": 1195,
"▁div": 1196,
"ament": 1197,
"▁orth": 1198,
"icular": 1199,
"igg": 1200,
"ums": 1201,
"eyed": 1202,
"▁Sch": 1203,
"faced": 1204,
"ps": 1205,
"ographic": 1206,
"▁Le": 1207,
"oes": 1208,
"ysis": 1209,
"▁vol": 1210,
"ination": 1211,
"oin": 1212,
"ucc": 1213,
"raft": 1214,
"acter": 1215,
"fulness": 1216,
"ck": 1217,
"iqu": 1218,
"▁mo": 1219,
"▁three": 1220,
"▁Ant": 1221,
"▁pap": 1222,
"otrop": 1223,
"icalness": 1224,
"anic": 1225,
"neum": 1226,
"▁unpro": 1227,
"ude": 1228,
"oper": 1229,
"ures": 1230,
"▁aff": 1231,
"ogram": 1232,
"▁black": 1233,
"▁me": 1234,
"orrh": 1235,
"▁diss": 1236,
"kin": 1237,
"omer": 1238,
"illed": 1239,
"otomy": 1240,
"de": 1241,
"ioc": 1242,
"ians": 1243,
"eling": 1244,
"▁phil": 1245,
"▁rest": 1246,
"▁wood": 1247,
"▁att": 1248,
"iable": 1249,
"itter": 1250,
"ments": 1251,
"▁unders": 1252,
"▁u": 1253,
"inch": 1254,
"aged": 1255,
"anch": 1256,
"ester": 1257,
"▁Sc": 1258,
"▁av": 1259,
"ream": 1260,
"▁rev": 1261,
"ioned": 1262,
"▁amph": 1263,
"escent": 1264,
"sm": 1265,
"▁Mc": 1266,
"inae": 1267,
"onia": 1268,
"▁off": 1269,
"iment": 1270,
"▁ster": 1271,
"gen": 1272,
"▁pent": 1273,
"the": 1274,
"ibly": 1275,
"▁bre": 1276,
"aver": 1277,
"▁mid": 1278,
"▁top": 1279,
"house": 1280,
"lessness": 1281,
"emia": 1282,
"illa": 1283,
"oric": 1284,
"▁therm": 1285,
"odd": 1286,
"ola": 1287,
"▁Cl": 1288,
"eman": 1289,
"line": 1290,
"▁air": 1291,
"▁mus": 1292,
"ner": 1293,
"ots": 1294,
"omic": 1295,
"▁cer": 1296,
"▁mer": 1297,
"oking": 1298,
"▁gast": 1299,
"equ": 1300,
"▁antic": 1301,
"ye": 1302,
"oft": 1303,
"▁hex": 1304,
"▁pot": 1305,
"▁sac": 1306,
"iation": 1307,
"ih": 1308,
"▁Un": 1309,
"orial": 1310,
"▁micro": 1311,
"alg": 1312,
"oil": 1313,
"▁gen": 1314,
"▁two": 1315,
"▁disp": 1316,
"arl": 1317,
"isk": 1318,
"ouch": 1319,
"▁nonf": 1320,
"▁circum": 1321,
"imp": 1322,
"▁exp": 1323,
"maker": 1324,
"stone": 1325,
"▁full": 1326,
"bu": 1327,
"bird": 1328,
"▁white": 1329,
"sl": 1330,
"uf": 1331,
"aim": 1332,
"era": 1333,
"▁ho": 1334,
"▁zo": 1335,
"▁oct": 1336,
"allow": 1337,
"itted": 1338,
"▁nont": 1339,
"ophobia": 1340,
"ior": 1341,
"ode": 1342,
"ved": 1343,
"ront": 1344,
"ulation": 1345,
"alc": 1346,
"ubb": 1347,
"conc": 1348,
"reat": 1349,
"▁unin": 1350,
"raz": 1351,
"upt": 1352,
"ilit": 1353,
"orse": 1354,
"▁sec": 1355,
"▁tem": 1356,
"colored": 1357,
"roid": 1358,
"ving": 1359,
"▁sph": 1360,
"▁sun": 1361,
"ham": 1362,
"char": 1363,
"ander": 1364,
"▁nonm": 1365,
"▁fil": 1366,
"orous": 1367,
"▁coll": 1368,
"ita": 1369,
"▁ur": 1370,
"anal": 1371,
"bear": 1372,
"▁det": 1373,
"inous": 1374,
"uring": 1375,
"ential": 1376,
"inic": 1377,
"wing": 1378,
"▁hum": 1379,
"▁card": 1380,
"hyd": 1381,
"ked": 1382,
"oses": 1383,
"▁unn": 1384,
"ality": 1385,
"making": 1386,
"▁hol": 1387,
"ingle": 1388,
"▁head": 1389,
"ianism": 1390,
"go": 1391,
"to": 1392,
"omen": 1393,
"▁act": 1394,
"▁mat": 1395,
"▁chrom": 1396,
"yb": 1397,
"awn": 1398,
"urr": 1399,
"ired": 1400,
"▁cam": 1401,
"▁meg": 1402,
"▁ter": 1403,
"berry": 1404,
"ished": 1405,
"▁Pl": 1406,
"▁part": 1407,
"ogenic": 1408,
"▁double": 1409,
"imm": 1410,
"ulg": 1411,
"▁ed": 1412,
"▁den": 1413,
"ibleness": 1414,
"ka": 1415,
"osc": 1416,
"▁Or": 1417,
"aven": 1418,
"emic": 1419,
"ences": 1420,
"▁hard": 1421,
"oscope": 1422,
"pe": 1423,
"den": 1424,
"ytic": 1425,
"▁amb": 1426,
"▁unv": 1427,
"▁mill": 1428,
"auc": 1429,
"▁ol": 1430,
"ynam": 1431,
"▁ren": 1432,
"oidal": 1433,
"▁fire": 1434,
"ape": 1435,
"key": 1436,
"▁En": 1437,
"rong": 1438,
"▁bel": 1439,
"▁inv": 1440,
"▁pet": 1441,
"▁pers": 1442,
"▁pros": 1443,
"ration": 1444,
"ini": 1445,
"osh": 1446,
"ove": 1447,
"▁gro": 1448,
"▁leg": 1449,
"ection": 1450,
"gr": 1451,
"ece": 1452,
"ero": 1453,
"▁dog": 1454,
"rable": 1455,
"ips": 1456,
"orc": 1457,
"▁Ne": 1458,
"ache": 1459,
"iver": 1460,
"ives": 1461,
"opol": 1462,
"▁cond": 1463,
"▁down": 1464,
"▁chlor": 1465,
"oned": 1466,
"▁cock": 1467,
"jud": 1468,
"▁fer": 1469,
"▁sim": 1470,
"hol": 1471,
"ney": 1472,
"ora": 1473,
"ards": 1474,
"iers": 1475,
"▁deb": 1476,
"▁pul": 1477,
"▁pale": 1478,
"▁prep": 1479,
"ria": 1480,
"▁so": 1481,
"cher": 1482,
"ival": 1483,
"perm": 1484,
"pres": 1485,
"▁bas": 1486,
"amine": 1487,
"iting": 1488,
"▁on": 1489,
"weet": 1490,
"ala": 1491,
"ift": 1492,
"irm": 1493,
"aded": 1494,
"rang": 1495,
"▁pen": 1496,
"bur": 1497,
"hum": 1498,
"ien": 1499,
"▁od": 1500,
"enth": 1501,
"▁form": 1502,
"▁subs": 1503,
"▁short": 1504,
"eff": 1505,
"▁beg": 1506,
"▁flu": 1507,
"▁after": 1508,
"ogl": 1509,
"oke": 1510,
"onian": 1511,
"▁lith": 1512,
"use": 1513,
"roth": 1514,
"field": 1515,
"▁pret": 1516,
"ocephal": 1517,
"cut": 1518,
"ife": 1519,
"▁Bl": 1520,
"▁az": 1521,
"ucle": 1522,
"▁val": 1523,
"actyl": 1524,
"eed": 1525,
"king": 1526,
"lers": 1527,
"uter": 1528,
"▁ann": 1529,
"▁eff": 1530,
"rh": 1531,
"ads": 1532,
"ids": 1533,
"aden": 1534,
"emon": 1535,
"omed": 1536,
"root": 1537,
"▁Car": 1538,
"itive": 1539,
"do": 1540,
"ble": 1541,
"▁Gl": 1542,
"▁lab": 1543,
"▁land": 1544,
"ometry": 1545,
"ulated": 1546,
"olith": 1547,
"▁fore": 1548,
"ilities": 1549,
"▁pseudo": 1550,
"ker": 1551,
"rix": 1552,
"▁Br": 1553,
"ales": 1554,
"▁clo": 1555,
"▁fib": 1556,
"oderm": 1557,
"▁unsh": 1558,
"▁noncon": 1559,
"ani": 1560,
"ela": 1561,
"ched": 1562,
"irit": 1563,
"stit": 1564,
"▁antip": 1565,
"oqu": 1566,
"tract": 1567,
"▁gold": 1568,
"▁ov": 1569,
"rian": 1570,
"ocarp": 1571,
"▁phys": 1572,
"▁proc": 1573,
"▁rein": 1574,
"leaved": 1575,
"inesses": 1576,
"duc": 1577,
"inu": 1578,
"opp": 1579,
"ows": 1580,
"reh": 1581,
"elet": 1582,
"rect": 1583,
"▁tra": 1584,
"arter": 1585,
"iated": 1586,
"ivity": 1587,
"na": 1588,
"asp": 1589,
"bol": 1590,
"ils": 1591,
"ocal": 1592,
"omin": 1593,
"osed": 1594,
"▁pin": 1595,
"bound": 1596,
"ten": 1597,
"onym": 1598,
"sman": 1599,
"▁har": 1600,
"▁ram": 1601,
"▁acet": 1602,
"minded": 1603,
"▁semic": 1604,
"ato": 1605,
"▁sle": 1606,
"ended": 1607,
"mouth": 1608,
"omies": 1609,
"▁quin": 1610,
"oplast": 1611,
"▁yellow": 1612,
"ologically": 1613,
"ano": 1614,
"itz": 1615,
"▁Pol": 1616,
"▁bes": 1617,
"enter": 1618,
"erman": 1619,
"inger": 1620,
"rolog": 1621,
"fishes": 1622,
"flower": 1623,
"ha": 1624,
"box": 1625,
"reen": 1626,
"▁foot": 1627,
"ocl": 1628,
"▁Gu": 1629,
"ough": 1630,
"▁cro": 1631,
"▁enc": 1632,
"▁rub": 1633,
"▁four": 1634,
"▁tran": 1635,
"ico": 1636,
"▁Qu": 1637,
"angu": 1638,
"▁org": 1639,
"ought": 1640,
"stant": 1641,
"▁pant": 1642,
"▁pass": 1643,
"ighted": 1644,
"empt": 1645,
"iled": 1646,
"▁corn": 1647,
"ectomy": 1648,
"illing": 1649,
"br": 1650,
"▁X": 1651,
"oni": 1652,
"ents": 1653,
"mark": 1654,
"uria": 1655,
"▁fur": 1656,
"▁tur": 1657,
"iling": 1658,
"rical": 1659,
"▁bull": 1660,
"▁magn": 1661,
"itation": 1662,
"etically": 1663,
"ida": 1664,
"down": 1665,
"▁bet": 1666,
"▁far": 1667,
"▁ing": 1668,
"▁whe": 1669,
"stall": 1670,
"ometric": 1671,
"erd": 1672,
"yle": 1673,
"▁Ab": 1674,
"emin": 1675,
"ilic": 1676,
"▁bal": 1677,
"▁lim": 1678,
"▁new": 1679,
"actic": 1680,
"▁ball": 1681,
"ership": 1682,
"▁rough": 1683,
"acy": 1684,
"iums": 1685,
"▁eng": 1686,
"▁sea": 1687,
"eries": 1688,
"osity": 1689,
"▁blood": 1690,
"▁dim": 1691,
"▁snow": 1692,
"izz": 1693,
"leb": 1694,
"met": 1695,
"oct": 1696,
"rah": 1697,
"▁du": 1698,
"asis": 1699,
"eler": 1700,
"esia": 1701,
"▁bur": 1702,
"▁nit": 1703,
"▁not": 1704,
"ionist": 1705,
"▁heart": 1706,
"idi": 1707,
"oms": 1708,
"ulc": 1709,
"ared": 1710,
"ases": 1711,
"worm": 1712,
"▁log": 1713,
"ining": 1714,
"onder": 1715,
"oster": 1716,
"▁pref": 1717,
"▁overp": 1718,
"pi": 1719,
"ols": 1720,
"ford": 1721,
"itan": 1722,
"ogam": 1723,
"▁est": 1724,
"▁mod": 1725,
"antic": 1726,
"▁sand": 1727,
"ez": 1728,
"eps": 1729,
"▁es": 1730,
"olar": 1731,
"▁ven": 1732,
"▁subt": 1733,
"ref": 1734,
"▁cre": 1735,
"▁cycl": 1736,
"▁phen": 1737,
"▁port": 1738,
"▁overc": 1739,
"usp": 1740,
"utin": 1741,
"▁Cal": 1742,
"▁six": 1743,
"▁tar": 1744,
"ances": 1745,
"cover": 1746,
"owing": 1747,
"struct": 1748,
"istical": 1749,
"▁nonpro": 1750,
"ado": 1751,
"ball": 1752,
"ulph": 1753,
"▁vit": 1754,
"▁phyt": 1755,
"▁stra": 1756,
"ending": 1757,
"ophyll": 1758,
"sville": 1759,
"ady": 1760,
"ein": 1761,
"thy": 1762,
"ores": 1763,
"rank": 1764,
"ivers": 1765,
"umber": 1766,
"▁mist": 1767,
"▁hydro": 1768,
"▁pneum": 1769,
"ope": 1770,
"riz": 1771,
"▁Ag": 1772,
"▁Rh": 1773,
"berg": 1774,
"ried": 1775,
"▁Mon": 1776,
"aryng": 1777,
"asted": 1778,
"thalm": 1779,
"▁light": 1780,
"aed": 1781,
"obi": 1782,
"ris": 1783,
"ason": 1784,
"ided": 1785,
"unic": 1786,
"▁mot": 1787,
"▁plat": 1788,
"▁night": 1789,
"▁palae": 1790,
"looking": 1791,
"la": 1792,
"ety": 1793,
"oco": 1794,
"▁oc": 1795,
"inct": 1796,
"uous": 1797,
"▁Col": 1798,
"▁gly": 1799,
"▁lib": 1800,
"▁derm": 1801,
"oi": 1802,
"ois": 1803,
"rys": 1804,
"aked": 1805,
"atal": 1806,
"ging": 1807,
"owed": 1808,
"▁art": 1809,
"▁der": 1810,
"▁sat": 1811,
"▁soft": 1812,
"liness": 1813,
"agu": 1814,
"uper": 1815,
"▁dys": 1816,
"▁prer": 1817,
"▁prof": 1818,
"▁antim": 1819,
"▁nonex": 1820,
"bar": 1821,
"pet": 1822,
"▁dr": 1823,
"mann": 1824,
"rated": 1825,
"▁prem": 1826,
"▁sulf": 1827,
"itting": 1828,
"ky": 1829,
"aer": 1830,
"lem": 1831,
"oney": 1832,
"▁unch": 1833,
"▁recon": 1834,
"ada": 1835,
"ama": 1836,
"orh": 1837,
"uzz": 1838,
"van": 1839,
"▁Ac": 1840,
"aric": 1841,
"idal": 1842,
"▁cow": 1843,
"▁tro": 1844,
"onger": 1845,
"usion": 1846,
"▁prim": 1847,
"lessly": 1848,
"escence": 1849,
"yz": 1850,
"irl": 1851,
"aled": 1852,
"eted": 1853,
"iner": 1854,
"▁cyt": 1855,
"ethyl": 1856,
"woman": 1857,
"▁nonv": 1858,
"▁oste": 1859,
"▁subd": 1860,
"▁overf": 1861,
"▁semip": 1862,
"obb": 1863,
"ona": 1864,
"ulf": 1865,
"yan": 1866,
"▁benz": 1867,
"ochrom": 1868,
"vel": 1869,
"agon": 1870,
"unch": 1871,
"urch": 1872,
"uted": 1873,
"▁aph": 1874,
"▁cot": 1875,
"▁ens": 1876,
"alism": 1877,
"▁best": 1878,
"▁single": 1879,
"ta": 1880,
"uph": 1881,
"lock": 1882,
"▁but": 1883,
"▁gon": 1884,
"▁low": 1885,
"alize": 1886,
"light": 1887,
"otted": 1888,
"izable": 1889,
"ativeness": 1890,
"eas": 1891,
"eph": 1892,
"une": 1893,
"icol": 1894,
"ingu": 1895,
"orac": 1896,
"quis": 1897,
"▁dig": 1898,
"rogen": 1899,
"orship": 1900,
"e": 1901,
"▁": 1902,
"i": 1903,
"a": 1904,
"n": 1905,
"o": 1906,
"r": 1907,
"s": 1908,
"t": 1909,
"l": 1910,
"c": 1911,
"u": 1912,
"d": 1913,
"p": 1914,
"m": 1915,
"h": 1916,
"g": 1917,
"y": 1918,
"b": 1919,
"f": 1920,
"-": 1921,
"v": 1922,
"k": 1923,
"w": 1924,
"z": 1925,
"x": 1926,
"S": 1927,
"C": 1928,
"A": 1929,
"q": 1930,
"M": 1931,
"P": 1932,
"j": 1933,
"B": 1934,
"T": 1935,
"L": 1936,
"'": 1937,
"D": 1938,
"H": 1939,
"G": 1940,
"E": 1941,
"R": 1942,
"N": 1943,
"F": 1944,
"K": 1945,
"O": 1946,
"I": 1947,
"W": 1948,
".": 1949,
"J": 1950,
"V": 1951,
"U": 1952,
"Z": 1953,
"Y": 1954,
"/": 1955,
"Q": 1956,
"X": 1957,
":": 1958,
"[": 1959,
"]": 1960,
"(": 1961,
")": 1962,
"2": 1963,
"1": 1964,
"0": 1965,
"=": 1966,
",": 1967,
"ı": 1968,
"#": 1969,
"3": 1970,
"4": 1971,
"?": 1972,
"\"": 1973,
"ü": 1974,
"!": 1975,
"&": 1976,
"5": 1977,
"ş": 1978,
"6": 1979,
"8": 1980,
"`": 1981,
"ö": 1982,
"9": 1983,
"ğ": 1984,
"%": 1985,
"@": 1986,
"7": 1987,
"<": 1988,
">": 1989,
"Ö": 1990,
"ç": 1991,
"$": 1992,
"+": 1993,
";": 1994,
"Ç": 1995,
"Ü": 1996,
"Ğ": 1997,
"İ": 1998,
"Ş": 1999,
"<pad>": 0,
"<unk>": 1,
"<s>": 2,
"</s>": 3
},
"merges": []
}
}