nursimakgul commited on
Commit
a1a1354
·
verified ·
1 Parent(s): 7068313

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +325 -325
tokenizer.json CHANGED
@@ -137,10 +137,6 @@
137
  "end_of_word_suffix": null,
138
  "fuse_unk": false,
139
  "vocab": {
140
- "<pad>": 0,
141
- "<unk>": 1,
142
- "<s>": 2,
143
- "</s>": 3,
144
  "<mask>": 4,
145
  "er": 5,
146
  "in": 6,
@@ -363,8 +359,8 @@
363
  "ef": 223,
364
  "▁pr": 224,
365
  "ating": 225,
366
- "ick": 226,
367
- "ment": 227,
368
  "li": 228,
369
  "ys": 229,
370
  "aw": 230,
@@ -422,8 +418,8 @@
422
  "ew": 282,
423
  "▁le": 283,
424
  "▁en": 284,
425
- "▁ro": 285,
426
- "▁I": 286,
427
  "ear": 287,
428
  "ille": 288,
429
  "▁par": 289,
@@ -456,17 +452,17 @@
456
  "ox": 316,
457
  "ge": 317,
458
  "row": 318,
459
- "▁V": 319,
460
- "pr": 320,
461
  "ash": 321,
462
  "let": 322,
463
- "led": 323,
464
- "int": 324,
465
  "eth": 325,
466
  "ative": 326,
467
  "▁tw": 327,
468
- "att": 328,
469
- "ont": 329,
470
  "ore": 330,
471
  "▁ad": 331,
472
  "ling": 332,
@@ -479,13 +475,13 @@
479
  "▁wh": 339,
480
  "ail": 340,
481
  "▁pol": 341,
482
- "fl": 342,
483
- "▁Ch": 343,
484
- "▁y": 344,
485
  "ax": 345,
486
  "az": 346,
487
- "ater": 347,
488
- "ice": 348,
489
  "▁tr": 349,
490
  "old": 350,
491
  "ence": 351,
@@ -508,12 +504,12 @@
508
  "▁under": 368,
509
  "yt": 369,
510
  "rig": 370,
511
- "ates": 371,
512
- "ik": 372,
513
  "▁sl": 373,
514
  "ann": 374,
515
- "yp": 375,
516
- "ept": 376,
517
  "ile": 377,
518
  "▁for": 378,
519
  "vers": 379,
@@ -539,9 +535,9 @@
539
  "ey": 399,
540
  "▁quasi": 400,
541
  "ors": 401,
542
- "ily": 402,
543
- "rans": 403,
544
- "ull": 404,
545
  "ship": 405,
546
  "▁ne": 406,
547
  "ea": 407,
@@ -554,8 +550,8 @@
554
  "uck": 414,
555
  "oot": 415,
556
  "ark": 416,
557
- "ush": 417,
558
- "erv": 418,
559
  "ton": 419,
560
  "ou": 420,
561
  "ork": 421,
@@ -655,11 +651,11 @@
655
  "itis": 515,
656
  "ari": 516,
657
  "▁Al": 517,
658
- "orr": 518,
659
- "ual": 519,
660
- "▁int": 520,
661
- "yg": 521,
662
- "eli": 522,
663
  "ress": 523,
664
  "iveness": 524,
665
  "au": 525,
@@ -670,8 +666,8 @@
670
  "agg": 530,
671
  "ank": 531,
672
  "les": 532,
673
- "ify": 533,
674
- "ern": 534,
675
  "ible": 535,
676
  "rat": 536,
677
  "▁hyper": 537,
@@ -751,9 +747,9 @@
751
  "▁Z": 611,
752
  "yll": 612,
753
  "▁cal": 613,
754
- "ines": 614,
755
- "▁nond": 615,
756
- "ren": 616,
757
  "ring": 617,
758
  "oon": 618,
759
  "▁col": 619,
@@ -768,8 +764,8 @@
768
  "eless": 628,
769
  "arn": 629,
770
  "▁Ar": 630,
771
- "▁syn": 631,
772
- "opl": 632,
773
  "▁squ": 633,
774
  "▁inf": 634,
775
  "▁se": 635,
@@ -787,9 +783,9 @@
787
  "work": 647,
788
  "be": 648,
789
  "col": 649,
790
- "▁Sh": 650,
791
- "ics": 651,
792
- "ven": 652,
793
  "▁unre": 653,
794
  "ipp": 654,
795
  "▁nons": 655,
@@ -805,8 +801,8 @@
805
  "ru": 665,
806
  "▁pred": 666,
807
  "form": 667,
808
- "amm": 668,
809
- "▁end": 669,
810
  "ild": 670,
811
  "ower": 671,
812
  "erg": 672,
@@ -816,12 +812,12 @@
816
  "off": 676,
817
  "▁Ph": 677,
818
  "▁ung": 678,
819
- "▁mult": 679,
820
- "aster": 680,
821
- "iously": 681,
822
- "agn": 682,
823
- "iph": 683,
824
- "▁ver": 684,
825
  "ological": 685,
826
  "rous": 686,
827
  "aur": 687,
@@ -854,8 +850,8 @@
854
  "orth": 714,
855
  "▁ref": 715,
856
  "eral": 716,
857
- "▁op": 717,
858
- "ency": 718,
859
  "que": 719,
860
  "▁unh": 720,
861
  "▁uncon": 721,
@@ -877,9 +873,9 @@
877
  "urs": 737,
878
  "▁rh": 738,
879
  "▁sul": 739,
880
- "leg": 740,
881
- "iferous": 741,
882
- "yc": 742,
883
  "▁def": 743,
884
  "press": 744,
885
  "eal": 745,
@@ -891,10 +887,10 @@
891
  "ump": 751,
892
  "▁wor": 752,
893
  "iate": 753,
894
- "ured": 754,
895
- "▁dep": 755,
896
- "rec": 756,
897
- "ean": 757,
898
  "ech": 758,
899
  "sych": 759,
900
  "ently": 760,
@@ -911,9 +907,9 @@
911
  "ering": 771,
912
  "vent": 772,
913
  "mon": 773,
914
- "ially": 774,
915
- "arr": 775,
916
- "ject": 776,
917
  "▁py": 777,
918
  "▁the": 778,
919
  "alk": 779,
@@ -934,11 +930,11 @@
934
  "org": 794,
935
  "unc": 795,
936
  "▁cap": 796,
937
- "▁at": 797,
938
- "outh": 798,
939
- "▁back": 799,
940
- "▁prot": 800,
941
- "ili": 801,
942
  "rown": 802,
943
  "▁x": 803,
944
  "ranch": 804,
@@ -962,8 +958,8 @@
962
  "right": 822,
963
  "omat": 823,
964
  "rist": 824,
965
- "omb": 825,
966
- "▁ser": 826,
967
  "ites": 827,
968
  "▁reg": 828,
969
  "nesses": 829,
@@ -983,13 +979,13 @@
983
  "unk": 843,
984
  "▁ass": 844,
985
  "aped": 845,
986
- "ened": 846,
987
- "▁emb": 847,
988
- "imb": 848,
989
- "vol": 849,
990
- "osph": 850,
991
- "ries": 851,
992
- "ras": 852,
993
  "aced": 853,
994
  "hear": 854,
995
  "rem": 855,
@@ -1037,12 +1033,12 @@
1037
  "eng": 897,
1038
  "iac": 898,
1039
  "udd": 899,
1040
- "over": 900,
1041
- "bor": 901,
1042
- "elt": 902,
1043
- "▁hand": 903,
1044
- "ingness": 904,
1045
- "ek": 905,
1046
  "umin": 906,
1047
  "ooth": 907,
1048
  "▁world": 908,
@@ -1055,11 +1051,11 @@
1055
  "▁rel": 915,
1056
  "enz": 916,
1057
  "ara": 917,
1058
- "ops": 918,
1059
- "ella": 919,
1060
- "▁comm": 920,
1061
- "zz": 921,
1062
- "ext": 922,
1063
  "lic": 923,
1064
  "arth": 924,
1065
  "unct": 925,
@@ -1087,8 +1083,8 @@
1087
  "headed": 947,
1088
  "usc": 948,
1089
  "iter": 949,
1090
- "ibility": 950,
1091
- "po": 951,
1092
  "occ": 952,
1093
  "▁gre": 953,
1094
  "ellow": 954,
@@ -1133,9 +1129,9 @@
1133
  "ishness": 993,
1134
  "osp": 994,
1135
  "▁ly": 995,
1136
- "aint": 996,
1137
- "eled": 997,
1138
- "rate": 998,
1139
  "▁ang": 999,
1140
  "elect": 1000,
1141
  "ages": 1001,
@@ -1182,26 +1178,26 @@
1182
  "rew": 1042,
1183
  "oral": 1043,
1184
  "▁med": 1044,
1185
- "obl": 1045,
1186
- "osm": 1046,
1187
  "▁unst": 1047,
1188
- "atin": 1048,
1189
- "▁can": 1049,
1190
- "oic": 1050,
1191
- "elic": 1051,
1192
- "▁mar": 1052,
1193
- "▁exc": 1053,
1194
- "utter": 1054,
1195
- "atively": 1055,
1196
- "ophil": 1056,
1197
- "elling": 1057,
1198
- "abb": 1058,
1199
- "opt": 1059,
1200
- "uth": 1060,
1201
- "ators": 1061,
1202
- "▁psych": 1062,
1203
- "ze": 1063,
1204
- "me": 1064,
1205
  "ants": 1065,
1206
  "itor": 1066,
1207
  "vert": 1067,
@@ -1236,10 +1232,10 @@
1236
  "ete": 1096,
1237
  "▁ev": 1097,
1238
  "uls": 1098,
1239
- "▁del": 1099,
1240
- "ophag": 1100,
1241
- "▁dist": 1101,
1242
- "plic": 1102,
1243
  "▁ped": 1103,
1244
  "ution": 1104,
1245
  "alt": 1105,
@@ -1255,9 +1251,9 @@
1255
  "elike": 1115,
1256
  "ilt": 1116,
1257
  "uble": 1117,
1258
- "aries": 1118,
1259
- "bed": 1119,
1260
- "▁pat": 1120,
1261
  "▁sulph": 1121,
1262
  "ises": 1122,
1263
  "ately": 1123,
@@ -1289,35 +1285,35 @@
1289
  "ipl": 1149,
1290
  "irt": 1150,
1291
  "yte": 1151,
1292
- "▁Mar": 1152,
1293
- "▁gal": 1153,
1294
- "▁overd": 1154,
1295
- "▁In": 1155,
1296
  "▁fr": 1156,
1297
  "ider": 1157,
1298
  "olic": 1158,
1299
  "tail": 1159,
1300
  "▁ost": 1160,
1301
- "pect": 1161,
1302
- "▁sym": 1162,
1303
- "opath": 1163,
1304
- "ogg": 1164,
1305
  "yth": 1165,
1306
  "▁cle": 1166,
1307
  "opter": 1167,
1308
  "▁water": 1168,
1309
- "lex": 1169,
1310
- "etal": 1170,
1311
- "ison": 1171,
1312
- "▁wind": 1172,
1313
- "action": 1173,
1314
- "otyp": 1174,
1315
- "▁reb": 1175,
1316
- "ident": 1176,
1317
- "inate": 1177,
1318
- "▁pur": 1178,
1319
- "kn": 1179,
1320
- "ma": 1180,
1321
  "ino": 1181,
1322
  "▁go": 1182,
1323
  "cons": 1183,
@@ -1446,11 +1442,11 @@
1446
  "▁sac": 1306,
1447
  "iation": 1307,
1448
  "ih": 1308,
1449
- "orial": 1309,
1450
- "▁micro": 1310,
1451
- "alg": 1311,
1452
- "oil": 1312,
1453
- "▁Un": 1313,
1454
  "▁gen": 1314,
1455
  "▁two": 1315,
1456
  "▁disp": 1316,
@@ -1531,13 +1527,13 @@
1531
  "go": 1391,
1532
  "to": 1392,
1533
  "omen": 1393,
1534
- "▁mat": 1394,
1535
- "▁chrom": 1395,
1536
- "yb": 1396,
1537
- "awn": 1397,
1538
- "urr": 1398,
1539
- "ired": 1399,
1540
- "▁act": 1400,
1541
  "▁cam": 1401,
1542
  "▁meg": 1402,
1543
  "▁ter": 1403,
@@ -1555,36 +1551,36 @@
1555
  "ka": 1415,
1556
  "osc": 1416,
1557
  "▁Or": 1417,
1558
- "emic": 1418,
1559
- "ences": 1419,
1560
- "▁hard": 1420,
1561
- "oscope": 1421,
1562
- "pe": 1422,
1563
- "den": 1423,
1564
- "aven": 1424,
1565
  "ytic": 1425,
1566
  "▁amb": 1426,
1567
  "▁unv": 1427,
1568
  "▁mill": 1428,
1569
  "auc": 1429,
1570
- "ynam": 1430,
1571
- "▁ren": 1431,
1572
- "oidal": 1432,
1573
- "▁fire": 1433,
1574
- "ape": 1434,
1575
- "key": 1435,
1576
- "▁En": 1436,
1577
- "rong": 1437,
1578
- "▁bel": 1438,
1579
- "▁inv": 1439,
1580
- "▁pet": 1440,
1581
- "▁pers": 1441,
1582
- "▁pros": 1442,
1583
- "ration": 1443,
1584
- "ini": 1444,
1585
- "osh": 1445,
1586
- "ove": 1446,
1587
- "▁ol": 1447,
1588
  "▁gro": 1448,
1589
  "▁leg": 1449,
1590
  "ection": 1450,
@@ -1837,18 +1833,18 @@
1837
  "rah": 1697,
1838
  "▁du": 1698,
1839
  "asis": 1699,
1840
- "esia": 1700,
1841
- "▁bur": 1701,
1842
- "▁nit": 1702,
1843
- "▁not": 1703,
1844
- "ionist": 1704,
1845
- "▁heart": 1705,
1846
- "idi": 1706,
1847
- "oms": 1707,
1848
- "ulc": 1708,
1849
- "ared": 1709,
1850
- "ases": 1710,
1851
- "eler": 1711,
1852
  "worm": 1712,
1853
  "▁log": 1713,
1854
  "ining": 1714,
@@ -1968,44 +1964,44 @@
1968
  "itting": 1828,
1969
  "ky": 1829,
1970
  "aer": 1830,
1971
- "oney": 1831,
1972
- "▁unch": 1832,
1973
- "▁recon": 1833,
1974
- "ada": 1834,
1975
- "orh": 1835,
1976
- "uzz": 1836,
1977
- "van": 1837,
1978
- "▁Ac": 1838,
1979
- "aric": 1839,
1980
- "idal": 1840,
1981
- "▁cow": 1841,
1982
- "▁tro": 1842,
1983
- "onger": 1843,
1984
- "usion": 1844,
1985
- "▁prim": 1845,
1986
- "lessly": 1846,
1987
- "escence": 1847,
1988
- "yz": 1848,
1989
- "irl": 1849,
1990
- "aled": 1850,
1991
- "eted": 1851,
1992
- "iner": 1852,
1993
- "▁cyt": 1853,
1994
- "ethyl": 1854,
1995
- "woman": 1855,
1996
- "▁nonv": 1856,
1997
- "▁oste": 1857,
1998
- "▁subd": 1858,
1999
- "▁overf": 1859,
2000
- "▁semip": 1860,
2001
- "lem": 1861,
2002
- "obb": 1862,
2003
- "ona": 1863,
2004
- "ulf": 1864,
2005
- "yan": 1865,
2006
- "▁benz": 1866,
2007
- "ochrom": 1867,
2008
- "ama": 1868,
2009
  "vel": 1869,
2010
  "agon": 1870,
2011
  "unch": 1871,
@@ -2038,105 +2034,109 @@
2038
  "▁dig": 1898,
2039
  "rogen": 1899,
2040
  "orship": 1900,
2041
- "rained": 1901,
2042
- "▁trich": 1902,
2043
- "erb": 1903,
2044
- "ken": 1904,
2045
- "pan": 1905,
2046
- "▁ax": 1906,
2047
- "▁ri": 1907,
2048
- "ested": 1908,
2049
- "osoph": 1909,
2050
- "▁hist": 1910,
2051
- "▁sept": 1911,
2052
- "cephal": 1912,
2053
- "e": 1913,
2054
- "": 1914,
2055
- "i": 1915,
2056
- "a": 1916,
2057
- "n": 1917,
2058
- "o": 1918,
2059
- "r": 1919,
2060
- "s": 1920,
2061
- "t": 1921,
2062
- "l": 1922,
2063
- "c": 1923,
2064
- "u": 1924,
2065
- "d": 1925,
2066
- "p": 1926,
2067
- "m": 1927,
2068
- "h": 1928,
2069
- "g": 1929,
2070
- "y": 1930,
2071
- "b": 1931,
2072
- "f": 1932,
2073
- "-": 1933,
2074
- "v": 1934,
2075
- "k": 1935,
2076
- "w": 1936,
2077
- "z": 1937,
2078
- "x": 1938,
2079
- "S": 1939,
2080
- "C": 1940,
2081
- "A": 1941,
2082
- "q": 1942,
2083
- "M": 1943,
2084
- "P": 1944,
2085
- "j": 1945,
2086
- "B": 1946,
2087
- "T": 1947,
2088
- "L": 1948,
2089
- "'": 1949,
2090
- "D": 1950,
2091
- "H": 1951,
2092
- "G": 1952,
2093
- "E": 1953,
2094
- "R": 1954,
2095
- "N": 1955,
2096
- "F": 1956,
2097
- "K": 1957,
2098
- "O": 1958,
2099
- "I": 1959,
2100
- "W": 1960,
2101
- ".": 1961,
2102
- "J": 1962,
2103
- "V": 1963,
2104
- "U": 1964,
2105
- "Z": 1965,
2106
- "Y": 1966,
2107
- "/": 1967,
2108
- "Q": 1968,
2109
- "X": 1969,
2110
- ":": 1970,
2111
- "[": 1971,
2112
- "]": 1972,
2113
- "(": 1973,
2114
- ")": 1974,
2115
- "2": 1975,
2116
- "1": 1976,
2117
- "0": 1977,
2118
- "=": 1978,
2119
- ",": 1979,
2120
- "#": 1980,
2121
- "3": 1981,
2122
- "4": 1982,
2123
- "?": 1983,
2124
- "\"": 1984,
2125
- "!": 1985,
2126
- "&": 1986,
2127
- "5": 1987,
2128
- "6": 1988,
2129
- "8": 1989,
2130
- "`": 1990,
2131
- "9": 1991,
2132
- "%": 1992,
2133
- "@": 1993,
2134
- "7": 1994,
2135
- "<": 1995,
2136
- ">": 1996,
2137
- "$": 1997,
2138
- "+": 1998,
2139
- ";": 1999
 
 
 
 
2140
  },
2141
  "merges": []
2142
  }
 
137
  "end_of_word_suffix": null,
138
  "fuse_unk": false,
139
  "vocab": {
 
 
 
 
140
  "<mask>": 4,
141
  "er": 5,
142
  "in": 6,
 
359
  "ef": 223,
360
  "▁pr": 224,
361
  "ating": 225,
362
+ "ment": 226,
363
+ "ick": 227,
364
  "li": 228,
365
  "ys": 229,
366
  "aw": 230,
 
418
  "ew": 282,
419
  "▁le": 283,
420
  "▁en": 284,
421
+ "▁I": 285,
422
+ "▁ro": 286,
423
  "ear": 287,
424
  "ille": 288,
425
  "▁par": 289,
 
452
  "ox": 316,
453
  "ge": 317,
454
  "row": 318,
455
+ "pr": 319,
456
+ "▁V": 320,
457
  "ash": 321,
458
  "let": 322,
459
+ "int": 323,
460
+ "led": 324,
461
  "eth": 325,
462
  "ative": 326,
463
  "▁tw": 327,
464
+ "ont": 328,
465
+ "att": 329,
466
  "ore": 330,
467
  "▁ad": 331,
468
  "ling": 332,
 
475
  "▁wh": 339,
476
  "ail": 340,
477
  "▁pol": 341,
478
+ "▁y": 342,
479
+ "fl": 343,
480
+ "▁Ch": 344,
481
  "ax": 345,
482
  "az": 346,
483
+ "ice": 347,
484
+ "ater": 348,
485
  "▁tr": 349,
486
  "old": 350,
487
  "ence": 351,
 
504
  "▁under": 368,
505
  "yt": 369,
506
  "rig": 370,
507
+ "ik": 371,
508
+ "ates": 372,
509
  "▁sl": 373,
510
  "ann": 374,
511
+ "ept": 375,
512
+ "yp": 376,
513
  "ile": 377,
514
  "▁for": 378,
515
  "vers": 379,
 
535
  "ey": 399,
536
  "▁quasi": 400,
537
  "ors": 401,
538
+ "ull": 402,
539
+ "ily": 403,
540
+ "rans": 404,
541
  "ship": 405,
542
  "▁ne": 406,
543
  "ea": 407,
 
550
  "uck": 414,
551
  "oot": 415,
552
  "ark": 416,
553
+ "erv": 417,
554
+ "ush": 418,
555
  "ton": 419,
556
  "ou": 420,
557
  "ork": 421,
 
651
  "itis": 515,
652
  "ari": 516,
653
  "▁Al": 517,
654
+ "eli": 518,
655
+ "orr": 519,
656
+ "ual": 520,
657
+ "▁int": 521,
658
+ "yg": 522,
659
  "ress": 523,
660
  "iveness": 524,
661
  "au": 525,
 
666
  "agg": 530,
667
  "ank": 531,
668
  "les": 532,
669
+ "ern": 533,
670
+ "ify": 534,
671
  "ible": 535,
672
  "rat": 536,
673
  "▁hyper": 537,
 
747
  "▁Z": 611,
748
  "yll": 612,
749
  "▁cal": 613,
750
+ "ren": 614,
751
+ "ines": 615,
752
+ "▁nond": 616,
753
  "ring": 617,
754
  "oon": 618,
755
  "▁col": 619,
 
764
  "eless": 628,
765
  "arn": 629,
766
  "▁Ar": 630,
767
+ "opl": 631,
768
+ "▁syn": 632,
769
  "▁squ": 633,
770
  "▁inf": 634,
771
  "▁se": 635,
 
783
  "work": 647,
784
  "be": 648,
785
  "col": 649,
786
+ "ven": 650,
787
+ "▁Sh": 651,
788
+ "ics": 652,
789
  "▁unre": 653,
790
  "ipp": 654,
791
  "▁nons": 655,
 
801
  "ru": 665,
802
  "▁pred": 666,
803
  "form": 667,
804
+ "▁end": 668,
805
+ "amm": 669,
806
  "ild": 670,
807
  "ower": 671,
808
  "erg": 672,
 
812
  "off": 676,
813
  "▁Ph": 677,
814
  "▁ung": 678,
815
+ "▁ver": 679,
816
+ "▁mult": 680,
817
+ "aster": 681,
818
+ "iously": 682,
819
+ "agn": 683,
820
+ "iph": 684,
821
  "ological": 685,
822
  "rous": 686,
823
  "aur": 687,
 
850
  "orth": 714,
851
  "▁ref": 715,
852
  "eral": 716,
853
+ "ency": 717,
854
+ "▁op": 718,
855
  "que": 719,
856
  "▁unh": 720,
857
  "▁uncon": 721,
 
873
  "urs": 737,
874
  "▁rh": 738,
875
  "▁sul": 739,
876
+ "yc": 740,
877
+ "leg": 741,
878
+ "iferous": 742,
879
  "▁def": 743,
880
  "press": 744,
881
  "eal": 745,
 
887
  "ump": 751,
888
  "▁wor": 752,
889
  "iate": 753,
890
+ "▁dep": 754,
891
+ "ured": 755,
892
+ "ean": 756,
893
+ "rec": 757,
894
  "ech": 758,
895
  "sych": 759,
896
  "ently": 760,
 
907
  "ering": 771,
908
  "vent": 772,
909
  "mon": 773,
910
+ "ject": 774,
911
+ "ially": 775,
912
+ "arr": 776,
913
  "▁py": 777,
914
  "▁the": 778,
915
  "alk": 779,
 
930
  "org": 794,
931
  "unc": 795,
932
  "▁cap": 796,
933
+ "ili": 797,
934
+ "▁at": 798,
935
+ "outh": 799,
936
+ "▁back": 800,
937
+ "▁prot": 801,
938
  "rown": 802,
939
  "▁x": 803,
940
  "ranch": 804,
 
958
  "right": 822,
959
  "omat": 823,
960
  "rist": 824,
961
+ "▁ser": 825,
962
+ "omb": 826,
963
  "ites": 827,
964
  "▁reg": 828,
965
  "nesses": 829,
 
979
  "unk": 843,
980
  "▁ass": 844,
981
  "aped": 845,
982
+ "ras": 846,
983
+ "ened": 847,
984
+ "▁emb": 848,
985
+ "imb": 849,
986
+ "vol": 850,
987
+ "osph": 851,
988
+ "ries": 852,
989
  "aced": 853,
990
  "hear": 854,
991
  "rem": 855,
 
1033
  "eng": 897,
1034
  "iac": 898,
1035
  "udd": 899,
1036
+ "ek": 900,
1037
+ "over": 901,
1038
+ "▁hand": 902,
1039
+ "bor": 903,
1040
+ "elt": 904,
1041
+ "ingness": 905,
1042
  "umin": 906,
1043
  "ooth": 907,
1044
  "▁world": 908,
 
1051
  "▁rel": 915,
1052
  "enz": 916,
1053
  "ara": 917,
1054
+ "ext": 918,
1055
+ "ops": 919,
1056
+ "ella": 920,
1057
+ "▁comm": 921,
1058
+ "zz": 922,
1059
  "lic": 923,
1060
  "arth": 924,
1061
  "unct": 925,
 
1083
  "headed": 947,
1084
  "usc": 948,
1085
  "iter": 949,
1086
+ "po": 950,
1087
+ "ibility": 951,
1088
  "occ": 952,
1089
  "▁gre": 953,
1090
  "ellow": 954,
 
1129
  "ishness": 993,
1130
  "osp": 994,
1131
  "▁ly": 995,
1132
+ "rate": 996,
1133
+ "aint": 997,
1134
+ "eled": 998,
1135
  "▁ang": 999,
1136
  "elect": 1000,
1137
  "ages": 1001,
 
1178
  "rew": 1042,
1179
  "oral": 1043,
1180
  "▁med": 1044,
1181
+ "me": 1045,
1182
+ "obl": 1046,
1183
  "▁unst": 1047,
1184
+ "osm": 1048,
1185
+ "atin": 1049,
1186
+ "▁can": 1050,
1187
+ "oic": 1051,
1188
+ "elic": 1052,
1189
+ "▁mar": 1053,
1190
+ "▁exc": 1054,
1191
+ "utter": 1055,
1192
+ "atively": 1056,
1193
+ "ophil": 1057,
1194
+ "elling": 1058,
1195
+ "abb": 1059,
1196
+ "opt": 1060,
1197
+ "uth": 1061,
1198
+ "ators": 1062,
1199
+ "▁psych": 1063,
1200
+ "ze": 1064,
1201
  "ants": 1065,
1202
  "itor": 1066,
1203
  "vert": 1067,
 
1232
  "ete": 1096,
1233
  "▁ev": 1097,
1234
  "uls": 1098,
1235
+ "plic": 1099,
1236
+ "▁del": 1100,
1237
+ "ophag": 1101,
1238
+ "▁dist": 1102,
1239
  "▁ped": 1103,
1240
  "ution": 1104,
1241
  "alt": 1105,
 
1251
  "elike": 1115,
1252
  "ilt": 1116,
1253
  "uble": 1117,
1254
+ "▁pat": 1118,
1255
+ "aries": 1119,
1256
+ "bed": 1120,
1257
  "▁sulph": 1121,
1258
  "ises": 1122,
1259
  "ately": 1123,
 
1285
  "ipl": 1149,
1286
  "irt": 1150,
1287
  "yte": 1151,
1288
+ "▁In": 1152,
1289
+ "▁Mar": 1153,
1290
+ "▁gal": 1154,
1291
+ "▁overd": 1155,
1292
  "▁fr": 1156,
1293
  "ider": 1157,
1294
  "olic": 1158,
1295
  "tail": 1159,
1296
  "▁ost": 1160,
1297
+ "ogg": 1161,
1298
+ "pect": 1162,
1299
+ "▁sym": 1163,
1300
+ "opath": 1164,
1301
  "yth": 1165,
1302
  "▁cle": 1166,
1303
  "opter": 1167,
1304
  "▁water": 1168,
1305
+ "ma": 1169,
1306
+ "lex": 1170,
1307
+ "etal": 1171,
1308
+ "ison": 1172,
1309
+ "▁wind": 1173,
1310
+ "action": 1174,
1311
+ "otyp": 1175,
1312
+ "▁reb": 1176,
1313
+ "ident": 1177,
1314
+ "inate": 1178,
1315
+ "▁pur": 1179,
1316
+ "kn": 1180,
1317
  "ino": 1181,
1318
  "▁go": 1182,
1319
  "cons": 1183,
 
1442
  "▁sac": 1306,
1443
  "iation": 1307,
1444
  "ih": 1308,
1445
+ "▁Un": 1309,
1446
+ "orial": 1310,
1447
+ "▁micro": 1311,
1448
+ "alg": 1312,
1449
+ "oil": 1313,
1450
  "▁gen": 1314,
1451
  "▁two": 1315,
1452
  "▁disp": 1316,
 
1527
  "go": 1391,
1528
  "to": 1392,
1529
  "omen": 1393,
1530
+ "▁act": 1394,
1531
+ "▁mat": 1395,
1532
+ "▁chrom": 1396,
1533
+ "yb": 1397,
1534
+ "awn": 1398,
1535
+ "urr": 1399,
1536
+ "ired": 1400,
1537
  "▁cam": 1401,
1538
  "▁meg": 1402,
1539
  "▁ter": 1403,
 
1551
  "ka": 1415,
1552
  "osc": 1416,
1553
  "▁Or": 1417,
1554
+ "aven": 1418,
1555
+ "emic": 1419,
1556
+ "ences": 1420,
1557
+ "▁hard": 1421,
1558
+ "oscope": 1422,
1559
+ "pe": 1423,
1560
+ "den": 1424,
1561
  "ytic": 1425,
1562
  "▁amb": 1426,
1563
  "▁unv": 1427,
1564
  "▁mill": 1428,
1565
  "auc": 1429,
1566
+ "▁ol": 1430,
1567
+ "ynam": 1431,
1568
+ "▁ren": 1432,
1569
+ "oidal": 1433,
1570
+ "▁fire": 1434,
1571
+ "ape": 1435,
1572
+ "key": 1436,
1573
+ "▁En": 1437,
1574
+ "rong": 1438,
1575
+ "▁bel": 1439,
1576
+ "▁inv": 1440,
1577
+ "▁pet": 1441,
1578
+ "▁pers": 1442,
1579
+ "▁pros": 1443,
1580
+ "ration": 1444,
1581
+ "ini": 1445,
1582
+ "osh": 1446,
1583
+ "ove": 1447,
1584
  "▁gro": 1448,
1585
  "▁leg": 1449,
1586
  "ection": 1450,
 
1833
  "rah": 1697,
1834
  "▁du": 1698,
1835
  "asis": 1699,
1836
+ "eler": 1700,
1837
+ "esia": 1701,
1838
+ "▁bur": 1702,
1839
+ "▁nit": 1703,
1840
+ "▁not": 1704,
1841
+ "ionist": 1705,
1842
+ "▁heart": 1706,
1843
+ "idi": 1707,
1844
+ "oms": 1708,
1845
+ "ulc": 1709,
1846
+ "ared": 1710,
1847
+ "ases": 1711,
1848
  "worm": 1712,
1849
  "▁log": 1713,
1850
  "ining": 1714,
 
1964
  "itting": 1828,
1965
  "ky": 1829,
1966
  "aer": 1830,
1967
+ "lem": 1831,
1968
+ "oney": 1832,
1969
+ "▁unch": 1833,
1970
+ "▁recon": 1834,
1971
+ "ada": 1835,
1972
+ "ama": 1836,
1973
+ "orh": 1837,
1974
+ "uzz": 1838,
1975
+ "van": 1839,
1976
+ "▁Ac": 1840,
1977
+ "aric": 1841,
1978
+ "idal": 1842,
1979
+ "▁cow": 1843,
1980
+ "▁tro": 1844,
1981
+ "onger": 1845,
1982
+ "usion": 1846,
1983
+ "▁prim": 1847,
1984
+ "lessly": 1848,
1985
+ "escence": 1849,
1986
+ "yz": 1850,
1987
+ "irl": 1851,
1988
+ "aled": 1852,
1989
+ "eted": 1853,
1990
+ "iner": 1854,
1991
+ "▁cyt": 1855,
1992
+ "ethyl": 1856,
1993
+ "woman": 1857,
1994
+ "▁nonv": 1858,
1995
+ "▁oste": 1859,
1996
+ "▁subd": 1860,
1997
+ "▁overf": 1861,
1998
+ "▁semip": 1862,
1999
+ "obb": 1863,
2000
+ "ona": 1864,
2001
+ "ulf": 1865,
2002
+ "yan": 1866,
2003
+ "▁benz": 1867,
2004
+ "ochrom": 1868,
2005
  "vel": 1869,
2006
  "agon": 1870,
2007
  "unch": 1871,
 
2034
  "▁dig": 1898,
2035
  "rogen": 1899,
2036
  "orship": 1900,
2037
+ "e": 1901,
2038
+ "▁": 1902,
2039
+ "i": 1903,
2040
+ "a": 1904,
2041
+ "n": 1905,
2042
+ "o": 1906,
2043
+ "r": 1907,
2044
+ "s": 1908,
2045
+ "t": 1909,
2046
+ "l": 1910,
2047
+ "c": 1911,
2048
+ "u": 1912,
2049
+ "d": 1913,
2050
+ "p": 1914,
2051
+ "m": 1915,
2052
+ "h": 1916,
2053
+ "g": 1917,
2054
+ "y": 1918,
2055
+ "b": 1919,
2056
+ "f": 1920,
2057
+ "-": 1921,
2058
+ "v": 1922,
2059
+ "k": 1923,
2060
+ "w": 1924,
2061
+ "z": 1925,
2062
+ "x": 1926,
2063
+ "S": 1927,
2064
+ "C": 1928,
2065
+ "A": 1929,
2066
+ "q": 1930,
2067
+ "M": 1931,
2068
+ "P": 1932,
2069
+ "j": 1933,
2070
+ "B": 1934,
2071
+ "T": 1935,
2072
+ "L": 1936,
2073
+ "'": 1937,
2074
+ "D": 1938,
2075
+ "H": 1939,
2076
+ "G": 1940,
2077
+ "E": 1941,
2078
+ "R": 1942,
2079
+ "N": 1943,
2080
+ "F": 1944,
2081
+ "K": 1945,
2082
+ "O": 1946,
2083
+ "I": 1947,
2084
+ "W": 1948,
2085
+ ".": 1949,
2086
+ "J": 1950,
2087
+ "V": 1951,
2088
+ "U": 1952,
2089
+ "Z": 1953,
2090
+ "Y": 1954,
2091
+ "/": 1955,
2092
+ "Q": 1956,
2093
+ "X": 1957,
2094
+ ":": 1958,
2095
+ "[": 1959,
2096
+ "]": 1960,
2097
+ "(": 1961,
2098
+ ")": 1962,
2099
+ "2": 1963,
2100
+ "1": 1964,
2101
+ "0": 1965,
2102
+ "=": 1966,
2103
+ ",": 1967,
2104
+ "ı": 1968,
2105
+ "#": 1969,
2106
+ "3": 1970,
2107
+ "4": 1971,
2108
+ "?": 1972,
2109
+ "\"": 1973,
2110
+ "ü": 1974,
2111
+ "!": 1975,
2112
+ "&": 1976,
2113
+ "5": 1977,
2114
+ "ş": 1978,
2115
+ "6": 1979,
2116
+ "8": 1980,
2117
+ "`": 1981,
2118
+ "ö": 1982,
2119
+ "9": 1983,
2120
+ "ğ": 1984,
2121
+ "%": 1985,
2122
+ "@": 1986,
2123
+ "7": 1987,
2124
+ "<": 1988,
2125
+ ">": 1989,
2126
+ "Ö": 1990,
2127
+ "ç": 1991,
2128
+ "$": 1992,
2129
+ "+": 1993,
2130
+ ";": 1994,
2131
+ "Ç": 1995,
2132
+ "Ü": 1996,
2133
+ "Ğ": 1997,
2134
+ "İ": 1998,
2135
+ "Ş": 1999,
2136
+ "<pad>": 0,
2137
+ "<unk>": 1,
2138
+ "<s>": 2,
2139
+ "</s>": 3
2140
  },
2141
  "merges": []
2142
  }