Upload tokenizer.json with huggingface_hub
Browse files- tokenizer.json +325 -325
tokenizer.json
CHANGED
|
@@ -137,10 +137,6 @@
|
|
| 137 |
"end_of_word_suffix": null,
|
| 138 |
"fuse_unk": false,
|
| 139 |
"vocab": {
|
| 140 |
-
"<pad>": 0,
|
| 141 |
-
"<unk>": 1,
|
| 142 |
-
"<s>": 2,
|
| 143 |
-
"</s>": 3,
|
| 144 |
"<mask>": 4,
|
| 145 |
"er": 5,
|
| 146 |
"in": 6,
|
|
@@ -363,8 +359,8 @@
|
|
| 363 |
"ef": 223,
|
| 364 |
"▁pr": 224,
|
| 365 |
"ating": 225,
|
| 366 |
-
"
|
| 367 |
-
"
|
| 368 |
"li": 228,
|
| 369 |
"ys": 229,
|
| 370 |
"aw": 230,
|
|
@@ -422,8 +418,8 @@
|
|
| 422 |
"ew": 282,
|
| 423 |
"▁le": 283,
|
| 424 |
"▁en": 284,
|
| 425 |
-
"▁
|
| 426 |
-
"▁
|
| 427 |
"ear": 287,
|
| 428 |
"ille": 288,
|
| 429 |
"▁par": 289,
|
|
@@ -456,17 +452,17 @@
|
|
| 456 |
"ox": 316,
|
| 457 |
"ge": 317,
|
| 458 |
"row": 318,
|
| 459 |
-
"
|
| 460 |
-
"
|
| 461 |
"ash": 321,
|
| 462 |
"let": 322,
|
| 463 |
-
"
|
| 464 |
-
"
|
| 465 |
"eth": 325,
|
| 466 |
"ative": 326,
|
| 467 |
"▁tw": 327,
|
| 468 |
-
"
|
| 469 |
-
"
|
| 470 |
"ore": 330,
|
| 471 |
"▁ad": 331,
|
| 472 |
"ling": 332,
|
|
@@ -479,13 +475,13 @@
|
|
| 479 |
"▁wh": 339,
|
| 480 |
"ail": 340,
|
| 481 |
"▁pol": 341,
|
| 482 |
-
"
|
| 483 |
-
"
|
| 484 |
-
"▁
|
| 485 |
"ax": 345,
|
| 486 |
"az": 346,
|
| 487 |
-
"
|
| 488 |
-
"
|
| 489 |
"▁tr": 349,
|
| 490 |
"old": 350,
|
| 491 |
"ence": 351,
|
|
@@ -508,12 +504,12 @@
|
|
| 508 |
"▁under": 368,
|
| 509 |
"yt": 369,
|
| 510 |
"rig": 370,
|
| 511 |
-
"
|
| 512 |
-
"
|
| 513 |
"▁sl": 373,
|
| 514 |
"ann": 374,
|
| 515 |
-
"
|
| 516 |
-
"
|
| 517 |
"ile": 377,
|
| 518 |
"▁for": 378,
|
| 519 |
"vers": 379,
|
|
@@ -539,9 +535,9 @@
|
|
| 539 |
"ey": 399,
|
| 540 |
"▁quasi": 400,
|
| 541 |
"ors": 401,
|
| 542 |
-
"
|
| 543 |
-
"
|
| 544 |
-
"
|
| 545 |
"ship": 405,
|
| 546 |
"▁ne": 406,
|
| 547 |
"ea": 407,
|
|
@@ -554,8 +550,8 @@
|
|
| 554 |
"uck": 414,
|
| 555 |
"oot": 415,
|
| 556 |
"ark": 416,
|
| 557 |
-
"
|
| 558 |
-
"
|
| 559 |
"ton": 419,
|
| 560 |
"ou": 420,
|
| 561 |
"ork": 421,
|
|
@@ -655,11 +651,11 @@
|
|
| 655 |
"itis": 515,
|
| 656 |
"ari": 516,
|
| 657 |
"▁Al": 517,
|
| 658 |
-
"
|
| 659 |
-
"
|
| 660 |
-
"
|
| 661 |
-
"
|
| 662 |
-
"
|
| 663 |
"ress": 523,
|
| 664 |
"iveness": 524,
|
| 665 |
"au": 525,
|
|
@@ -670,8 +666,8 @@
|
|
| 670 |
"agg": 530,
|
| 671 |
"ank": 531,
|
| 672 |
"les": 532,
|
| 673 |
-
"
|
| 674 |
-
"
|
| 675 |
"ible": 535,
|
| 676 |
"rat": 536,
|
| 677 |
"▁hyper": 537,
|
|
@@ -751,9 +747,9 @@
|
|
| 751 |
"▁Z": 611,
|
| 752 |
"yll": 612,
|
| 753 |
"▁cal": 613,
|
| 754 |
-
"
|
| 755 |
-
"
|
| 756 |
-
"
|
| 757 |
"ring": 617,
|
| 758 |
"oon": 618,
|
| 759 |
"▁col": 619,
|
|
@@ -768,8 +764,8 @@
|
|
| 768 |
"eless": 628,
|
| 769 |
"arn": 629,
|
| 770 |
"▁Ar": 630,
|
| 771 |
-
"
|
| 772 |
-
"
|
| 773 |
"▁squ": 633,
|
| 774 |
"▁inf": 634,
|
| 775 |
"▁se": 635,
|
|
@@ -787,9 +783,9 @@
|
|
| 787 |
"work": 647,
|
| 788 |
"be": 648,
|
| 789 |
"col": 649,
|
| 790 |
-
"
|
| 791 |
-
"
|
| 792 |
-
"
|
| 793 |
"▁unre": 653,
|
| 794 |
"ipp": 654,
|
| 795 |
"▁nons": 655,
|
|
@@ -805,8 +801,8 @@
|
|
| 805 |
"ru": 665,
|
| 806 |
"▁pred": 666,
|
| 807 |
"form": 667,
|
| 808 |
-
"
|
| 809 |
-
"
|
| 810 |
"ild": 670,
|
| 811 |
"ower": 671,
|
| 812 |
"erg": 672,
|
|
@@ -816,12 +812,12 @@
|
|
| 816 |
"off": 676,
|
| 817 |
"▁Ph": 677,
|
| 818 |
"▁ung": 678,
|
| 819 |
-
"▁
|
| 820 |
-
"
|
| 821 |
-
"
|
| 822 |
-
"
|
| 823 |
-
"
|
| 824 |
-
"
|
| 825 |
"ological": 685,
|
| 826 |
"rous": 686,
|
| 827 |
"aur": 687,
|
|
@@ -854,8 +850,8 @@
|
|
| 854 |
"orth": 714,
|
| 855 |
"▁ref": 715,
|
| 856 |
"eral": 716,
|
| 857 |
-
"
|
| 858 |
-
"
|
| 859 |
"que": 719,
|
| 860 |
"▁unh": 720,
|
| 861 |
"▁uncon": 721,
|
|
@@ -877,9 +873,9 @@
|
|
| 877 |
"urs": 737,
|
| 878 |
"▁rh": 738,
|
| 879 |
"▁sul": 739,
|
| 880 |
-
"
|
| 881 |
-
"
|
| 882 |
-
"
|
| 883 |
"▁def": 743,
|
| 884 |
"press": 744,
|
| 885 |
"eal": 745,
|
|
@@ -891,10 +887,10 @@
|
|
| 891 |
"ump": 751,
|
| 892 |
"▁wor": 752,
|
| 893 |
"iate": 753,
|
| 894 |
-
"
|
| 895 |
-
"
|
| 896 |
-
"
|
| 897 |
-
"
|
| 898 |
"ech": 758,
|
| 899 |
"sych": 759,
|
| 900 |
"ently": 760,
|
|
@@ -911,9 +907,9 @@
|
|
| 911 |
"ering": 771,
|
| 912 |
"vent": 772,
|
| 913 |
"mon": 773,
|
| 914 |
-
"
|
| 915 |
-
"
|
| 916 |
-
"
|
| 917 |
"▁py": 777,
|
| 918 |
"▁the": 778,
|
| 919 |
"alk": 779,
|
|
@@ -934,11 +930,11 @@
|
|
| 934 |
"org": 794,
|
| 935 |
"unc": 795,
|
| 936 |
"▁cap": 796,
|
| 937 |
-
"
|
| 938 |
-
"
|
| 939 |
-
"
|
| 940 |
-
"▁
|
| 941 |
-
"
|
| 942 |
"rown": 802,
|
| 943 |
"▁x": 803,
|
| 944 |
"ranch": 804,
|
|
@@ -962,8 +958,8 @@
|
|
| 962 |
"right": 822,
|
| 963 |
"omat": 823,
|
| 964 |
"rist": 824,
|
| 965 |
-
"
|
| 966 |
-
"
|
| 967 |
"ites": 827,
|
| 968 |
"▁reg": 828,
|
| 969 |
"nesses": 829,
|
|
@@ -983,13 +979,13 @@
|
|
| 983 |
"unk": 843,
|
| 984 |
"▁ass": 844,
|
| 985 |
"aped": 845,
|
| 986 |
-
"
|
| 987 |
-
"
|
| 988 |
-
"
|
| 989 |
-
"
|
| 990 |
-
"
|
| 991 |
-
"
|
| 992 |
-
"
|
| 993 |
"aced": 853,
|
| 994 |
"hear": 854,
|
| 995 |
"rem": 855,
|
|
@@ -1037,12 +1033,12 @@
|
|
| 1037 |
"eng": 897,
|
| 1038 |
"iac": 898,
|
| 1039 |
"udd": 899,
|
| 1040 |
-
"
|
| 1041 |
-
"
|
| 1042 |
-
"
|
| 1043 |
-
"
|
| 1044 |
-
"
|
| 1045 |
-
"
|
| 1046 |
"umin": 906,
|
| 1047 |
"ooth": 907,
|
| 1048 |
"▁world": 908,
|
|
@@ -1055,11 +1051,11 @@
|
|
| 1055 |
"▁rel": 915,
|
| 1056 |
"enz": 916,
|
| 1057 |
"ara": 917,
|
| 1058 |
-
"
|
| 1059 |
-
"
|
| 1060 |
-
"
|
| 1061 |
-
"
|
| 1062 |
-
"
|
| 1063 |
"lic": 923,
|
| 1064 |
"arth": 924,
|
| 1065 |
"unct": 925,
|
|
@@ -1087,8 +1083,8 @@
|
|
| 1087 |
"headed": 947,
|
| 1088 |
"usc": 948,
|
| 1089 |
"iter": 949,
|
| 1090 |
-
"
|
| 1091 |
-
"
|
| 1092 |
"occ": 952,
|
| 1093 |
"▁gre": 953,
|
| 1094 |
"ellow": 954,
|
|
@@ -1133,9 +1129,9 @@
|
|
| 1133 |
"ishness": 993,
|
| 1134 |
"osp": 994,
|
| 1135 |
"▁ly": 995,
|
| 1136 |
-
"
|
| 1137 |
-
"
|
| 1138 |
-
"
|
| 1139 |
"▁ang": 999,
|
| 1140 |
"elect": 1000,
|
| 1141 |
"ages": 1001,
|
|
@@ -1182,26 +1178,26 @@
|
|
| 1182 |
"rew": 1042,
|
| 1183 |
"oral": 1043,
|
| 1184 |
"▁med": 1044,
|
| 1185 |
-
"
|
| 1186 |
-
"
|
| 1187 |
"▁unst": 1047,
|
| 1188 |
-
"
|
| 1189 |
-
"
|
| 1190 |
-
"
|
| 1191 |
-
"
|
| 1192 |
-
"
|
| 1193 |
-
"▁
|
| 1194 |
-
"
|
| 1195 |
-
"
|
| 1196 |
-
"
|
| 1197 |
-
"
|
| 1198 |
-
"
|
| 1199 |
-
"
|
| 1200 |
-
"
|
| 1201 |
-
"
|
| 1202 |
-
"
|
| 1203 |
-
"
|
| 1204 |
-
"
|
| 1205 |
"ants": 1065,
|
| 1206 |
"itor": 1066,
|
| 1207 |
"vert": 1067,
|
|
@@ -1236,10 +1232,10 @@
|
|
| 1236 |
"ete": 1096,
|
| 1237 |
"▁ev": 1097,
|
| 1238 |
"uls": 1098,
|
| 1239 |
-
"
|
| 1240 |
-
"
|
| 1241 |
-
"
|
| 1242 |
-
"
|
| 1243 |
"▁ped": 1103,
|
| 1244 |
"ution": 1104,
|
| 1245 |
"alt": 1105,
|
|
@@ -1255,9 +1251,9 @@
|
|
| 1255 |
"elike": 1115,
|
| 1256 |
"ilt": 1116,
|
| 1257 |
"uble": 1117,
|
| 1258 |
-
"
|
| 1259 |
-
"
|
| 1260 |
-
"
|
| 1261 |
"▁sulph": 1121,
|
| 1262 |
"ises": 1122,
|
| 1263 |
"ately": 1123,
|
|
@@ -1289,35 +1285,35 @@
|
|
| 1289 |
"ipl": 1149,
|
| 1290 |
"irt": 1150,
|
| 1291 |
"yte": 1151,
|
| 1292 |
-
"▁
|
| 1293 |
-
"▁
|
| 1294 |
-
"▁
|
| 1295 |
-
"▁
|
| 1296 |
"▁fr": 1156,
|
| 1297 |
"ider": 1157,
|
| 1298 |
"olic": 1158,
|
| 1299 |
"tail": 1159,
|
| 1300 |
"▁ost": 1160,
|
| 1301 |
-
"
|
| 1302 |
-
"
|
| 1303 |
-
"
|
| 1304 |
-
"
|
| 1305 |
"yth": 1165,
|
| 1306 |
"▁cle": 1166,
|
| 1307 |
"opter": 1167,
|
| 1308 |
"▁water": 1168,
|
| 1309 |
-
"
|
| 1310 |
-
"
|
| 1311 |
-
"
|
| 1312 |
-
"
|
| 1313 |
-
"
|
| 1314 |
-
"
|
| 1315 |
-
"
|
| 1316 |
-
"
|
| 1317 |
-
"
|
| 1318 |
-
"
|
| 1319 |
-
"
|
| 1320 |
-
"
|
| 1321 |
"ino": 1181,
|
| 1322 |
"▁go": 1182,
|
| 1323 |
"cons": 1183,
|
|
@@ -1446,11 +1442,11 @@
|
|
| 1446 |
"▁sac": 1306,
|
| 1447 |
"iation": 1307,
|
| 1448 |
"ih": 1308,
|
| 1449 |
-
"
|
| 1450 |
-
"
|
| 1451 |
-
"
|
| 1452 |
-
"
|
| 1453 |
-
"
|
| 1454 |
"▁gen": 1314,
|
| 1455 |
"▁two": 1315,
|
| 1456 |
"▁disp": 1316,
|
|
@@ -1531,13 +1527,13 @@
|
|
| 1531 |
"go": 1391,
|
| 1532 |
"to": 1392,
|
| 1533 |
"omen": 1393,
|
| 1534 |
-
"▁
|
| 1535 |
-
"▁
|
| 1536 |
-
"
|
| 1537 |
-
"
|
| 1538 |
-
"
|
| 1539 |
-
"
|
| 1540 |
-
"
|
| 1541 |
"▁cam": 1401,
|
| 1542 |
"▁meg": 1402,
|
| 1543 |
"▁ter": 1403,
|
|
@@ -1555,36 +1551,36 @@
|
|
| 1555 |
"ka": 1415,
|
| 1556 |
"osc": 1416,
|
| 1557 |
"▁Or": 1417,
|
| 1558 |
-
"
|
| 1559 |
-
"
|
| 1560 |
-
"
|
| 1561 |
-
"
|
| 1562 |
-
"
|
| 1563 |
-
"
|
| 1564 |
-
"
|
| 1565 |
"ytic": 1425,
|
| 1566 |
"▁amb": 1426,
|
| 1567 |
"▁unv": 1427,
|
| 1568 |
"▁mill": 1428,
|
| 1569 |
"auc": 1429,
|
| 1570 |
-
"
|
| 1571 |
-
"
|
| 1572 |
-
"
|
| 1573 |
-
"
|
| 1574 |
-
"
|
| 1575 |
-
"
|
| 1576 |
-
"
|
| 1577 |
-
"
|
| 1578 |
-
"
|
| 1579 |
-
"▁
|
| 1580 |
-
"▁
|
| 1581 |
-
"▁
|
| 1582 |
-
"▁
|
| 1583 |
-
"
|
| 1584 |
-
"
|
| 1585 |
-
"
|
| 1586 |
-
"
|
| 1587 |
-
"
|
| 1588 |
"▁gro": 1448,
|
| 1589 |
"▁leg": 1449,
|
| 1590 |
"ection": 1450,
|
|
@@ -1837,18 +1833,18 @@
|
|
| 1837 |
"rah": 1697,
|
| 1838 |
"▁du": 1698,
|
| 1839 |
"asis": 1699,
|
| 1840 |
-
"
|
| 1841 |
-
"
|
| 1842 |
-
"▁
|
| 1843 |
-
"▁
|
| 1844 |
-
"
|
| 1845 |
-
"
|
| 1846 |
-
"
|
| 1847 |
-
"
|
| 1848 |
-
"
|
| 1849 |
-
"
|
| 1850 |
-
"
|
| 1851 |
-
"
|
| 1852 |
"worm": 1712,
|
| 1853 |
"▁log": 1713,
|
| 1854 |
"ining": 1714,
|
|
@@ -1968,44 +1964,44 @@
|
|
| 1968 |
"itting": 1828,
|
| 1969 |
"ky": 1829,
|
| 1970 |
"aer": 1830,
|
| 1971 |
-
"
|
| 1972 |
-
"
|
| 1973 |
-
"▁
|
| 1974 |
-
"
|
| 1975 |
-
"
|
| 1976 |
-
"
|
| 1977 |
-
"
|
| 1978 |
-
"
|
| 1979 |
-
"
|
| 1980 |
-
"
|
| 1981 |
-
"
|
| 1982 |
-
"
|
| 1983 |
-
"
|
| 1984 |
-
"
|
| 1985 |
-
"
|
| 1986 |
-
"
|
| 1987 |
-
"
|
| 1988 |
-
"
|
| 1989 |
-
"
|
| 1990 |
-
"
|
| 1991 |
-
"
|
| 1992 |
-
"
|
| 1993 |
-
"
|
| 1994 |
-
"
|
| 1995 |
-
"
|
| 1996 |
-
"
|
| 1997 |
-
"
|
| 1998 |
-
"▁
|
| 1999 |
-
"▁
|
| 2000 |
-
"▁
|
| 2001 |
-
"
|
| 2002 |
-
"
|
| 2003 |
-
"
|
| 2004 |
-
"
|
| 2005 |
-
"
|
| 2006 |
-
"
|
| 2007 |
-
"
|
| 2008 |
-
"
|
| 2009 |
"vel": 1869,
|
| 2010 |
"agon": 1870,
|
| 2011 |
"unch": 1871,
|
|
@@ -2038,105 +2034,109 @@
|
|
| 2038 |
"▁dig": 1898,
|
| 2039 |
"rogen": 1899,
|
| 2040 |
"orship": 1900,
|
| 2041 |
-
"
|
| 2042 |
-
"▁
|
| 2043 |
-
"
|
| 2044 |
-
"
|
| 2045 |
-
"
|
| 2046 |
-
"
|
| 2047 |
-
"
|
| 2048 |
-
"
|
| 2049 |
-
"
|
| 2050 |
-
"
|
| 2051 |
-
"
|
| 2052 |
-
"
|
| 2053 |
-
"
|
| 2054 |
-
"
|
| 2055 |
-
"
|
| 2056 |
-
"
|
| 2057 |
-
"
|
| 2058 |
-
"
|
| 2059 |
-
"
|
| 2060 |
-
"
|
| 2061 |
-
"
|
| 2062 |
-
"
|
| 2063 |
-
"
|
| 2064 |
-
"
|
| 2065 |
-
"
|
| 2066 |
-
"
|
| 2067 |
-
"
|
| 2068 |
-
"
|
| 2069 |
-
"
|
| 2070 |
-
"
|
| 2071 |
-
"
|
| 2072 |
-
"
|
| 2073 |
-
"
|
| 2074 |
-
"
|
| 2075 |
-
"
|
| 2076 |
-
"
|
| 2077 |
-
"
|
| 2078 |
-
"
|
| 2079 |
-
"
|
| 2080 |
-
"
|
| 2081 |
-
"
|
| 2082 |
-
"
|
| 2083 |
-
"
|
| 2084 |
-
"
|
| 2085 |
-
"
|
| 2086 |
-
"
|
| 2087 |
-
"
|
| 2088 |
-
"
|
| 2089 |
-
"
|
| 2090 |
-
"
|
| 2091 |
-
"
|
| 2092 |
-
"
|
| 2093 |
-
"
|
| 2094 |
-
"
|
| 2095 |
-
"
|
| 2096 |
-
"
|
| 2097 |
-
"
|
| 2098 |
-
"
|
| 2099 |
-
"
|
| 2100 |
-
"
|
| 2101 |
-
"
|
| 2102 |
-
"
|
| 2103 |
-
"
|
| 2104 |
-
"
|
| 2105 |
-
"
|
| 2106 |
-
"
|
| 2107 |
-
"
|
| 2108 |
-
"
|
| 2109 |
-
"
|
| 2110 |
-
"
|
| 2111 |
-
"
|
| 2112 |
-
"
|
| 2113 |
-
"
|
| 2114 |
-
"
|
| 2115 |
-
"
|
| 2116 |
-
"
|
| 2117 |
-
"
|
| 2118 |
-
"
|
| 2119 |
-
"
|
| 2120 |
-
"
|
| 2121 |
-
"
|
| 2122 |
-
"
|
| 2123 |
-
"
|
| 2124 |
-
"
|
| 2125 |
-
"
|
| 2126 |
-
"
|
| 2127 |
-
"
|
| 2128 |
-
"
|
| 2129 |
-
"
|
| 2130 |
-
"
|
| 2131 |
-
"
|
| 2132 |
-
"
|
| 2133 |
-
"
|
| 2134 |
-
"
|
| 2135 |
-
"
|
| 2136 |
-
"
|
| 2137 |
-
"
|
| 2138 |
-
"
|
| 2139 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2140 |
},
|
| 2141 |
"merges": []
|
| 2142 |
}
|
|
|
|
| 137 |
"end_of_word_suffix": null,
|
| 138 |
"fuse_unk": false,
|
| 139 |
"vocab": {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
"<mask>": 4,
|
| 141 |
"er": 5,
|
| 142 |
"in": 6,
|
|
|
|
| 359 |
"ef": 223,
|
| 360 |
"▁pr": 224,
|
| 361 |
"ating": 225,
|
| 362 |
+
"ment": 226,
|
| 363 |
+
"ick": 227,
|
| 364 |
"li": 228,
|
| 365 |
"ys": 229,
|
| 366 |
"aw": 230,
|
|
|
|
| 418 |
"ew": 282,
|
| 419 |
"▁le": 283,
|
| 420 |
"▁en": 284,
|
| 421 |
+
"▁I": 285,
|
| 422 |
+
"▁ro": 286,
|
| 423 |
"ear": 287,
|
| 424 |
"ille": 288,
|
| 425 |
"▁par": 289,
|
|
|
|
| 452 |
"ox": 316,
|
| 453 |
"ge": 317,
|
| 454 |
"row": 318,
|
| 455 |
+
"pr": 319,
|
| 456 |
+
"▁V": 320,
|
| 457 |
"ash": 321,
|
| 458 |
"let": 322,
|
| 459 |
+
"int": 323,
|
| 460 |
+
"led": 324,
|
| 461 |
"eth": 325,
|
| 462 |
"ative": 326,
|
| 463 |
"▁tw": 327,
|
| 464 |
+
"ont": 328,
|
| 465 |
+
"att": 329,
|
| 466 |
"ore": 330,
|
| 467 |
"▁ad": 331,
|
| 468 |
"ling": 332,
|
|
|
|
| 475 |
"▁wh": 339,
|
| 476 |
"ail": 340,
|
| 477 |
"▁pol": 341,
|
| 478 |
+
"▁y": 342,
|
| 479 |
+
"fl": 343,
|
| 480 |
+
"▁Ch": 344,
|
| 481 |
"ax": 345,
|
| 482 |
"az": 346,
|
| 483 |
+
"ice": 347,
|
| 484 |
+
"ater": 348,
|
| 485 |
"▁tr": 349,
|
| 486 |
"old": 350,
|
| 487 |
"ence": 351,
|
|
|
|
| 504 |
"▁under": 368,
|
| 505 |
"yt": 369,
|
| 506 |
"rig": 370,
|
| 507 |
+
"ik": 371,
|
| 508 |
+
"ates": 372,
|
| 509 |
"▁sl": 373,
|
| 510 |
"ann": 374,
|
| 511 |
+
"ept": 375,
|
| 512 |
+
"yp": 376,
|
| 513 |
"ile": 377,
|
| 514 |
"▁for": 378,
|
| 515 |
"vers": 379,
|
|
|
|
| 535 |
"ey": 399,
|
| 536 |
"▁quasi": 400,
|
| 537 |
"ors": 401,
|
| 538 |
+
"ull": 402,
|
| 539 |
+
"ily": 403,
|
| 540 |
+
"rans": 404,
|
| 541 |
"ship": 405,
|
| 542 |
"▁ne": 406,
|
| 543 |
"ea": 407,
|
|
|
|
| 550 |
"uck": 414,
|
| 551 |
"oot": 415,
|
| 552 |
"ark": 416,
|
| 553 |
+
"erv": 417,
|
| 554 |
+
"ush": 418,
|
| 555 |
"ton": 419,
|
| 556 |
"ou": 420,
|
| 557 |
"ork": 421,
|
|
|
|
| 651 |
"itis": 515,
|
| 652 |
"ari": 516,
|
| 653 |
"▁Al": 517,
|
| 654 |
+
"eli": 518,
|
| 655 |
+
"orr": 519,
|
| 656 |
+
"ual": 520,
|
| 657 |
+
"▁int": 521,
|
| 658 |
+
"yg": 522,
|
| 659 |
"ress": 523,
|
| 660 |
"iveness": 524,
|
| 661 |
"au": 525,
|
|
|
|
| 666 |
"agg": 530,
|
| 667 |
"ank": 531,
|
| 668 |
"les": 532,
|
| 669 |
+
"ern": 533,
|
| 670 |
+
"ify": 534,
|
| 671 |
"ible": 535,
|
| 672 |
"rat": 536,
|
| 673 |
"▁hyper": 537,
|
|
|
|
| 747 |
"▁Z": 611,
|
| 748 |
"yll": 612,
|
| 749 |
"▁cal": 613,
|
| 750 |
+
"ren": 614,
|
| 751 |
+
"ines": 615,
|
| 752 |
+
"▁nond": 616,
|
| 753 |
"ring": 617,
|
| 754 |
"oon": 618,
|
| 755 |
"▁col": 619,
|
|
|
|
| 764 |
"eless": 628,
|
| 765 |
"arn": 629,
|
| 766 |
"▁Ar": 630,
|
| 767 |
+
"opl": 631,
|
| 768 |
+
"▁syn": 632,
|
| 769 |
"▁squ": 633,
|
| 770 |
"▁inf": 634,
|
| 771 |
"▁se": 635,
|
|
|
|
| 783 |
"work": 647,
|
| 784 |
"be": 648,
|
| 785 |
"col": 649,
|
| 786 |
+
"ven": 650,
|
| 787 |
+
"▁Sh": 651,
|
| 788 |
+
"ics": 652,
|
| 789 |
"▁unre": 653,
|
| 790 |
"ipp": 654,
|
| 791 |
"▁nons": 655,
|
|
|
|
| 801 |
"ru": 665,
|
| 802 |
"▁pred": 666,
|
| 803 |
"form": 667,
|
| 804 |
+
"▁end": 668,
|
| 805 |
+
"amm": 669,
|
| 806 |
"ild": 670,
|
| 807 |
"ower": 671,
|
| 808 |
"erg": 672,
|
|
|
|
| 812 |
"off": 676,
|
| 813 |
"▁Ph": 677,
|
| 814 |
"▁ung": 678,
|
| 815 |
+
"▁ver": 679,
|
| 816 |
+
"▁mult": 680,
|
| 817 |
+
"aster": 681,
|
| 818 |
+
"iously": 682,
|
| 819 |
+
"agn": 683,
|
| 820 |
+
"iph": 684,
|
| 821 |
"ological": 685,
|
| 822 |
"rous": 686,
|
| 823 |
"aur": 687,
|
|
|
|
| 850 |
"orth": 714,
|
| 851 |
"▁ref": 715,
|
| 852 |
"eral": 716,
|
| 853 |
+
"ency": 717,
|
| 854 |
+
"▁op": 718,
|
| 855 |
"que": 719,
|
| 856 |
"▁unh": 720,
|
| 857 |
"▁uncon": 721,
|
|
|
|
| 873 |
"urs": 737,
|
| 874 |
"▁rh": 738,
|
| 875 |
"▁sul": 739,
|
| 876 |
+
"yc": 740,
|
| 877 |
+
"leg": 741,
|
| 878 |
+
"iferous": 742,
|
| 879 |
"▁def": 743,
|
| 880 |
"press": 744,
|
| 881 |
"eal": 745,
|
|
|
|
| 887 |
"ump": 751,
|
| 888 |
"▁wor": 752,
|
| 889 |
"iate": 753,
|
| 890 |
+
"▁dep": 754,
|
| 891 |
+
"ured": 755,
|
| 892 |
+
"ean": 756,
|
| 893 |
+
"rec": 757,
|
| 894 |
"ech": 758,
|
| 895 |
"sych": 759,
|
| 896 |
"ently": 760,
|
|
|
|
| 907 |
"ering": 771,
|
| 908 |
"vent": 772,
|
| 909 |
"mon": 773,
|
| 910 |
+
"ject": 774,
|
| 911 |
+
"ially": 775,
|
| 912 |
+
"arr": 776,
|
| 913 |
"▁py": 777,
|
| 914 |
"▁the": 778,
|
| 915 |
"alk": 779,
|
|
|
|
| 930 |
"org": 794,
|
| 931 |
"unc": 795,
|
| 932 |
"▁cap": 796,
|
| 933 |
+
"ili": 797,
|
| 934 |
+
"▁at": 798,
|
| 935 |
+
"outh": 799,
|
| 936 |
+
"▁back": 800,
|
| 937 |
+
"▁prot": 801,
|
| 938 |
"rown": 802,
|
| 939 |
"▁x": 803,
|
| 940 |
"ranch": 804,
|
|
|
|
| 958 |
"right": 822,
|
| 959 |
"omat": 823,
|
| 960 |
"rist": 824,
|
| 961 |
+
"▁ser": 825,
|
| 962 |
+
"omb": 826,
|
| 963 |
"ites": 827,
|
| 964 |
"▁reg": 828,
|
| 965 |
"nesses": 829,
|
|
|
|
| 979 |
"unk": 843,
|
| 980 |
"▁ass": 844,
|
| 981 |
"aped": 845,
|
| 982 |
+
"ras": 846,
|
| 983 |
+
"ened": 847,
|
| 984 |
+
"▁emb": 848,
|
| 985 |
+
"imb": 849,
|
| 986 |
+
"vol": 850,
|
| 987 |
+
"osph": 851,
|
| 988 |
+
"ries": 852,
|
| 989 |
"aced": 853,
|
| 990 |
"hear": 854,
|
| 991 |
"rem": 855,
|
|
|
|
| 1033 |
"eng": 897,
|
| 1034 |
"iac": 898,
|
| 1035 |
"udd": 899,
|
| 1036 |
+
"ek": 900,
|
| 1037 |
+
"over": 901,
|
| 1038 |
+
"▁hand": 902,
|
| 1039 |
+
"bor": 903,
|
| 1040 |
+
"elt": 904,
|
| 1041 |
+
"ingness": 905,
|
| 1042 |
"umin": 906,
|
| 1043 |
"ooth": 907,
|
| 1044 |
"▁world": 908,
|
|
|
|
| 1051 |
"▁rel": 915,
|
| 1052 |
"enz": 916,
|
| 1053 |
"ara": 917,
|
| 1054 |
+
"ext": 918,
|
| 1055 |
+
"ops": 919,
|
| 1056 |
+
"ella": 920,
|
| 1057 |
+
"▁comm": 921,
|
| 1058 |
+
"zz": 922,
|
| 1059 |
"lic": 923,
|
| 1060 |
"arth": 924,
|
| 1061 |
"unct": 925,
|
|
|
|
| 1083 |
"headed": 947,
|
| 1084 |
"usc": 948,
|
| 1085 |
"iter": 949,
|
| 1086 |
+
"po": 950,
|
| 1087 |
+
"ibility": 951,
|
| 1088 |
"occ": 952,
|
| 1089 |
"▁gre": 953,
|
| 1090 |
"ellow": 954,
|
|
|
|
| 1129 |
"ishness": 993,
|
| 1130 |
"osp": 994,
|
| 1131 |
"▁ly": 995,
|
| 1132 |
+
"rate": 996,
|
| 1133 |
+
"aint": 997,
|
| 1134 |
+
"eled": 998,
|
| 1135 |
"▁ang": 999,
|
| 1136 |
"elect": 1000,
|
| 1137 |
"ages": 1001,
|
|
|
|
| 1178 |
"rew": 1042,
|
| 1179 |
"oral": 1043,
|
| 1180 |
"▁med": 1044,
|
| 1181 |
+
"me": 1045,
|
| 1182 |
+
"obl": 1046,
|
| 1183 |
"▁unst": 1047,
|
| 1184 |
+
"osm": 1048,
|
| 1185 |
+
"atin": 1049,
|
| 1186 |
+
"▁can": 1050,
|
| 1187 |
+
"oic": 1051,
|
| 1188 |
+
"elic": 1052,
|
| 1189 |
+
"▁mar": 1053,
|
| 1190 |
+
"▁exc": 1054,
|
| 1191 |
+
"utter": 1055,
|
| 1192 |
+
"atively": 1056,
|
| 1193 |
+
"ophil": 1057,
|
| 1194 |
+
"elling": 1058,
|
| 1195 |
+
"abb": 1059,
|
| 1196 |
+
"opt": 1060,
|
| 1197 |
+
"uth": 1061,
|
| 1198 |
+
"ators": 1062,
|
| 1199 |
+
"▁psych": 1063,
|
| 1200 |
+
"ze": 1064,
|
| 1201 |
"ants": 1065,
|
| 1202 |
"itor": 1066,
|
| 1203 |
"vert": 1067,
|
|
|
|
| 1232 |
"ete": 1096,
|
| 1233 |
"▁ev": 1097,
|
| 1234 |
"uls": 1098,
|
| 1235 |
+
"plic": 1099,
|
| 1236 |
+
"▁del": 1100,
|
| 1237 |
+
"ophag": 1101,
|
| 1238 |
+
"▁dist": 1102,
|
| 1239 |
"▁ped": 1103,
|
| 1240 |
"ution": 1104,
|
| 1241 |
"alt": 1105,
|
|
|
|
| 1251 |
"elike": 1115,
|
| 1252 |
"ilt": 1116,
|
| 1253 |
"uble": 1117,
|
| 1254 |
+
"▁pat": 1118,
|
| 1255 |
+
"aries": 1119,
|
| 1256 |
+
"bed": 1120,
|
| 1257 |
"▁sulph": 1121,
|
| 1258 |
"ises": 1122,
|
| 1259 |
"ately": 1123,
|
|
|
|
| 1285 |
"ipl": 1149,
|
| 1286 |
"irt": 1150,
|
| 1287 |
"yte": 1151,
|
| 1288 |
+
"▁In": 1152,
|
| 1289 |
+
"▁Mar": 1153,
|
| 1290 |
+
"▁gal": 1154,
|
| 1291 |
+
"▁overd": 1155,
|
| 1292 |
"▁fr": 1156,
|
| 1293 |
"ider": 1157,
|
| 1294 |
"olic": 1158,
|
| 1295 |
"tail": 1159,
|
| 1296 |
"▁ost": 1160,
|
| 1297 |
+
"ogg": 1161,
|
| 1298 |
+
"pect": 1162,
|
| 1299 |
+
"▁sym": 1163,
|
| 1300 |
+
"opath": 1164,
|
| 1301 |
"yth": 1165,
|
| 1302 |
"▁cle": 1166,
|
| 1303 |
"opter": 1167,
|
| 1304 |
"▁water": 1168,
|
| 1305 |
+
"ma": 1169,
|
| 1306 |
+
"lex": 1170,
|
| 1307 |
+
"etal": 1171,
|
| 1308 |
+
"ison": 1172,
|
| 1309 |
+
"▁wind": 1173,
|
| 1310 |
+
"action": 1174,
|
| 1311 |
+
"otyp": 1175,
|
| 1312 |
+
"▁reb": 1176,
|
| 1313 |
+
"ident": 1177,
|
| 1314 |
+
"inate": 1178,
|
| 1315 |
+
"▁pur": 1179,
|
| 1316 |
+
"kn": 1180,
|
| 1317 |
"ino": 1181,
|
| 1318 |
"▁go": 1182,
|
| 1319 |
"cons": 1183,
|
|
|
|
| 1442 |
"▁sac": 1306,
|
| 1443 |
"iation": 1307,
|
| 1444 |
"ih": 1308,
|
| 1445 |
+
"▁Un": 1309,
|
| 1446 |
+
"orial": 1310,
|
| 1447 |
+
"▁micro": 1311,
|
| 1448 |
+
"alg": 1312,
|
| 1449 |
+
"oil": 1313,
|
| 1450 |
"▁gen": 1314,
|
| 1451 |
"▁two": 1315,
|
| 1452 |
"▁disp": 1316,
|
|
|
|
| 1527 |
"go": 1391,
|
| 1528 |
"to": 1392,
|
| 1529 |
"omen": 1393,
|
| 1530 |
+
"▁act": 1394,
|
| 1531 |
+
"▁mat": 1395,
|
| 1532 |
+
"▁chrom": 1396,
|
| 1533 |
+
"yb": 1397,
|
| 1534 |
+
"awn": 1398,
|
| 1535 |
+
"urr": 1399,
|
| 1536 |
+
"ired": 1400,
|
| 1537 |
"▁cam": 1401,
|
| 1538 |
"▁meg": 1402,
|
| 1539 |
"▁ter": 1403,
|
|
|
|
| 1551 |
"ka": 1415,
|
| 1552 |
"osc": 1416,
|
| 1553 |
"▁Or": 1417,
|
| 1554 |
+
"aven": 1418,
|
| 1555 |
+
"emic": 1419,
|
| 1556 |
+
"ences": 1420,
|
| 1557 |
+
"▁hard": 1421,
|
| 1558 |
+
"oscope": 1422,
|
| 1559 |
+
"pe": 1423,
|
| 1560 |
+
"den": 1424,
|
| 1561 |
"ytic": 1425,
|
| 1562 |
"▁amb": 1426,
|
| 1563 |
"▁unv": 1427,
|
| 1564 |
"▁mill": 1428,
|
| 1565 |
"auc": 1429,
|
| 1566 |
+
"▁ol": 1430,
|
| 1567 |
+
"ynam": 1431,
|
| 1568 |
+
"▁ren": 1432,
|
| 1569 |
+
"oidal": 1433,
|
| 1570 |
+
"▁fire": 1434,
|
| 1571 |
+
"ape": 1435,
|
| 1572 |
+
"key": 1436,
|
| 1573 |
+
"▁En": 1437,
|
| 1574 |
+
"rong": 1438,
|
| 1575 |
+
"▁bel": 1439,
|
| 1576 |
+
"▁inv": 1440,
|
| 1577 |
+
"▁pet": 1441,
|
| 1578 |
+
"▁pers": 1442,
|
| 1579 |
+
"▁pros": 1443,
|
| 1580 |
+
"ration": 1444,
|
| 1581 |
+
"ini": 1445,
|
| 1582 |
+
"osh": 1446,
|
| 1583 |
+
"ove": 1447,
|
| 1584 |
"▁gro": 1448,
|
| 1585 |
"▁leg": 1449,
|
| 1586 |
"ection": 1450,
|
|
|
|
| 1833 |
"rah": 1697,
|
| 1834 |
"▁du": 1698,
|
| 1835 |
"asis": 1699,
|
| 1836 |
+
"eler": 1700,
|
| 1837 |
+
"esia": 1701,
|
| 1838 |
+
"▁bur": 1702,
|
| 1839 |
+
"▁nit": 1703,
|
| 1840 |
+
"▁not": 1704,
|
| 1841 |
+
"ionist": 1705,
|
| 1842 |
+
"▁heart": 1706,
|
| 1843 |
+
"idi": 1707,
|
| 1844 |
+
"oms": 1708,
|
| 1845 |
+
"ulc": 1709,
|
| 1846 |
+
"ared": 1710,
|
| 1847 |
+
"ases": 1711,
|
| 1848 |
"worm": 1712,
|
| 1849 |
"▁log": 1713,
|
| 1850 |
"ining": 1714,
|
|
|
|
| 1964 |
"itting": 1828,
|
| 1965 |
"ky": 1829,
|
| 1966 |
"aer": 1830,
|
| 1967 |
+
"lem": 1831,
|
| 1968 |
+
"oney": 1832,
|
| 1969 |
+
"▁unch": 1833,
|
| 1970 |
+
"▁recon": 1834,
|
| 1971 |
+
"ada": 1835,
|
| 1972 |
+
"ama": 1836,
|
| 1973 |
+
"orh": 1837,
|
| 1974 |
+
"uzz": 1838,
|
| 1975 |
+
"van": 1839,
|
| 1976 |
+
"▁Ac": 1840,
|
| 1977 |
+
"aric": 1841,
|
| 1978 |
+
"idal": 1842,
|
| 1979 |
+
"▁cow": 1843,
|
| 1980 |
+
"▁tro": 1844,
|
| 1981 |
+
"onger": 1845,
|
| 1982 |
+
"usion": 1846,
|
| 1983 |
+
"▁prim": 1847,
|
| 1984 |
+
"lessly": 1848,
|
| 1985 |
+
"escence": 1849,
|
| 1986 |
+
"yz": 1850,
|
| 1987 |
+
"irl": 1851,
|
| 1988 |
+
"aled": 1852,
|
| 1989 |
+
"eted": 1853,
|
| 1990 |
+
"iner": 1854,
|
| 1991 |
+
"▁cyt": 1855,
|
| 1992 |
+
"ethyl": 1856,
|
| 1993 |
+
"woman": 1857,
|
| 1994 |
+
"▁nonv": 1858,
|
| 1995 |
+
"▁oste": 1859,
|
| 1996 |
+
"▁subd": 1860,
|
| 1997 |
+
"▁overf": 1861,
|
| 1998 |
+
"▁semip": 1862,
|
| 1999 |
+
"obb": 1863,
|
| 2000 |
+
"ona": 1864,
|
| 2001 |
+
"ulf": 1865,
|
| 2002 |
+
"yan": 1866,
|
| 2003 |
+
"▁benz": 1867,
|
| 2004 |
+
"ochrom": 1868,
|
| 2005 |
"vel": 1869,
|
| 2006 |
"agon": 1870,
|
| 2007 |
"unch": 1871,
|
|
|
|
| 2034 |
"▁dig": 1898,
|
| 2035 |
"rogen": 1899,
|
| 2036 |
"orship": 1900,
|
| 2037 |
+
"e": 1901,
|
| 2038 |
+
"▁": 1902,
|
| 2039 |
+
"i": 1903,
|
| 2040 |
+
"a": 1904,
|
| 2041 |
+
"n": 1905,
|
| 2042 |
+
"o": 1906,
|
| 2043 |
+
"r": 1907,
|
| 2044 |
+
"s": 1908,
|
| 2045 |
+
"t": 1909,
|
| 2046 |
+
"l": 1910,
|
| 2047 |
+
"c": 1911,
|
| 2048 |
+
"u": 1912,
|
| 2049 |
+
"d": 1913,
|
| 2050 |
+
"p": 1914,
|
| 2051 |
+
"m": 1915,
|
| 2052 |
+
"h": 1916,
|
| 2053 |
+
"g": 1917,
|
| 2054 |
+
"y": 1918,
|
| 2055 |
+
"b": 1919,
|
| 2056 |
+
"f": 1920,
|
| 2057 |
+
"-": 1921,
|
| 2058 |
+
"v": 1922,
|
| 2059 |
+
"k": 1923,
|
| 2060 |
+
"w": 1924,
|
| 2061 |
+
"z": 1925,
|
| 2062 |
+
"x": 1926,
|
| 2063 |
+
"S": 1927,
|
| 2064 |
+
"C": 1928,
|
| 2065 |
+
"A": 1929,
|
| 2066 |
+
"q": 1930,
|
| 2067 |
+
"M": 1931,
|
| 2068 |
+
"P": 1932,
|
| 2069 |
+
"j": 1933,
|
| 2070 |
+
"B": 1934,
|
| 2071 |
+
"T": 1935,
|
| 2072 |
+
"L": 1936,
|
| 2073 |
+
"'": 1937,
|
| 2074 |
+
"D": 1938,
|
| 2075 |
+
"H": 1939,
|
| 2076 |
+
"G": 1940,
|
| 2077 |
+
"E": 1941,
|
| 2078 |
+
"R": 1942,
|
| 2079 |
+
"N": 1943,
|
| 2080 |
+
"F": 1944,
|
| 2081 |
+
"K": 1945,
|
| 2082 |
+
"O": 1946,
|
| 2083 |
+
"I": 1947,
|
| 2084 |
+
"W": 1948,
|
| 2085 |
+
".": 1949,
|
| 2086 |
+
"J": 1950,
|
| 2087 |
+
"V": 1951,
|
| 2088 |
+
"U": 1952,
|
| 2089 |
+
"Z": 1953,
|
| 2090 |
+
"Y": 1954,
|
| 2091 |
+
"/": 1955,
|
| 2092 |
+
"Q": 1956,
|
| 2093 |
+
"X": 1957,
|
| 2094 |
+
":": 1958,
|
| 2095 |
+
"[": 1959,
|
| 2096 |
+
"]": 1960,
|
| 2097 |
+
"(": 1961,
|
| 2098 |
+
")": 1962,
|
| 2099 |
+
"2": 1963,
|
| 2100 |
+
"1": 1964,
|
| 2101 |
+
"0": 1965,
|
| 2102 |
+
"=": 1966,
|
| 2103 |
+
",": 1967,
|
| 2104 |
+
"ı": 1968,
|
| 2105 |
+
"#": 1969,
|
| 2106 |
+
"3": 1970,
|
| 2107 |
+
"4": 1971,
|
| 2108 |
+
"?": 1972,
|
| 2109 |
+
"\"": 1973,
|
| 2110 |
+
"ü": 1974,
|
| 2111 |
+
"!": 1975,
|
| 2112 |
+
"&": 1976,
|
| 2113 |
+
"5": 1977,
|
| 2114 |
+
"ş": 1978,
|
| 2115 |
+
"6": 1979,
|
| 2116 |
+
"8": 1980,
|
| 2117 |
+
"`": 1981,
|
| 2118 |
+
"ö": 1982,
|
| 2119 |
+
"9": 1983,
|
| 2120 |
+
"ğ": 1984,
|
| 2121 |
+
"%": 1985,
|
| 2122 |
+
"@": 1986,
|
| 2123 |
+
"7": 1987,
|
| 2124 |
+
"<": 1988,
|
| 2125 |
+
">": 1989,
|
| 2126 |
+
"Ö": 1990,
|
| 2127 |
+
"ç": 1991,
|
| 2128 |
+
"$": 1992,
|
| 2129 |
+
"+": 1993,
|
| 2130 |
+
";": 1994,
|
| 2131 |
+
"Ç": 1995,
|
| 2132 |
+
"Ü": 1996,
|
| 2133 |
+
"Ğ": 1997,
|
| 2134 |
+
"İ": 1998,
|
| 2135 |
+
"Ş": 1999,
|
| 2136 |
+
"<pad>": 0,
|
| 2137 |
+
"<unk>": 1,
|
| 2138 |
+
"<s>": 2,
|
| 2139 |
+
"</s>": 3
|
| 2140 |
},
|
| 2141 |
"merges": []
|
| 2142 |
}
|