Commit
·
38d7771
1
Parent(s):
705eb2e
Upload tokenizer
Browse files- tokenizer.json +150 -2
tokenizer.json
CHANGED
|
@@ -386,7 +386,81 @@
|
|
| 386 |
"Ġdaayim": 324,
|
| 387 |
"Ġyuxw": 325,
|
| 388 |
"Ġaloohl": 326,
|
| 389 |
-
"Ġbax": 327
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
},
|
| 391 |
"merges": [
|
| 392 |
"s t",
|
|
@@ -669,7 +743,81 @@
|
|
| 669 |
"Ġd aayim",
|
| 670 |
"Ġyu xw",
|
| 671 |
"Ġa loohl",
|
| 672 |
-
"Ġb ax"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
]
|
| 674 |
}
|
| 675 |
}
|
|
|
|
| 386 |
"Ġdaayim": 324,
|
| 387 |
"Ġyuxw": 325,
|
| 388 |
"Ġaloohl": 326,
|
| 389 |
+
"Ġbax": 327,
|
| 390 |
+
"Ġbaasx": 328,
|
| 391 |
+
"Ġligit": 329,
|
| 392 |
+
"Ġjok": 330,
|
| 393 |
+
"Ġsg": 331,
|
| 394 |
+
"Ġsi": 332,
|
| 395 |
+
"ĠSpain": 333,
|
| 396 |
+
"nakwhl": 334,
|
| 397 |
+
"Ġhehl": 335,
|
| 398 |
+
"Ġhediit": 336,
|
| 399 |
+
"diithl": 337,
|
| 400 |
+
"witxwit": 338,
|
| 401 |
+
"Ġjaphl": 339,
|
| 402 |
+
"nithl": 340,
|
| 403 |
+
"ytxwhl": 341,
|
| 404 |
+
"Ġxhlii": 342,
|
| 405 |
+
"Ġdaayimaahl": 343,
|
| 406 |
+
"Ġyuxwdiithl": 344,
|
| 407 |
+
"Ġbaasxi": 345,
|
| 408 |
+
"Nakwhl": 346,
|
| 409 |
+
"gwi": 347,
|
| 410 |
+
"ukwhl": 348,
|
| 411 |
+
"yukwhl": 349,
|
| 412 |
+
"ĠAk": 350,
|
| 413 |
+
"ĠAgwi": 351,
|
| 414 |
+
"ĠAgwiyukwhl": 352,
|
| 415 |
+
"BM": 353,
|
| 416 |
+
"De": 354,
|
| 417 |
+
"Gi": 355,
|
| 418 |
+
"IBM": 356,
|
| 419 |
+
"aw": 357,
|
| 420 |
+
"ail": 358,
|
| 421 |
+
"ce": 359,
|
| 422 |
+
"ff": 360,
|
| 423 |
+
"gee": 361,
|
| 424 |
+
"it": 362,
|
| 425 |
+
"iwaa": 363,
|
| 426 |
+
"ice": 364,
|
| 427 |
+
"jit": 365,
|
| 428 |
+
"ljit": 366,
|
| 429 |
+
"mar": 367,
|
| 430 |
+
"mail": 368,
|
| 431 |
+
"nmar": 369,
|
| 432 |
+
"oxs": 370,
|
| 433 |
+
"off": 371,
|
| 434 |
+
"si": 372,
|
| 435 |
+
"wan": 373,
|
| 436 |
+
"way": 374,
|
| 437 |
+
"yo": 375,
|
| 438 |
+
"ĠDe": 376,
|
| 439 |
+
"ĠGi": 377,
|
| 440 |
+
"ĠIBM": 378,
|
| 441 |
+
"Ġmail": 379,
|
| 442 |
+
"Ġoff": 380,
|
| 443 |
+
"niiwan": 381,
|
| 444 |
+
"niiyo": 382,
|
| 445 |
+
"xsiwaa": 383,
|
| 446 |
+
"Ġsaw": 384,
|
| 447 |
+
"nix": 385,
|
| 448 |
+
"Ġwok": 386,
|
| 449 |
+
"atdiit": 387,
|
| 450 |
+
"̲.\"": 388,
|
| 451 |
+
"oosun": 389,
|
| 452 |
+
"ĠAp": 390,
|
| 453 |
+
"Ġamxsiwaa": 391,
|
| 454 |
+
"Ġaks": 392,
|
| 455 |
+
"geenix": 393,
|
| 456 |
+
"nmark": 394,
|
| 457 |
+
"oxsxw": 395,
|
| 458 |
+
"wayi": 396,
|
| 459 |
+
"ĠDenmark": 397,
|
| 460 |
+
"ĠGigeenix": 398,
|
| 461 |
+
"Ġoffice": 399,
|
| 462 |
+
"Ġsawatdiit": 400,
|
| 463 |
+
"ytxw": 401
|
| 464 |
},
|
| 465 |
"merges": [
|
| 466 |
"s t",
|
|
|
|
| 743 |
"Ġd aayim",
|
| 744 |
"Ġyu xw",
|
| 745 |
"Ġa loohl",
|
| 746 |
+
"Ġb ax",
|
| 747 |
+
"Ġb aasx",
|
| 748 |
+
"Ġligi t",
|
| 749 |
+
"Ġj ok",
|
| 750 |
+
"Ġs g",
|
| 751 |
+
"Ġs i",
|
| 752 |
+
"ĠS pain",
|
| 753 |
+
"na kwhl",
|
| 754 |
+
"Ġhe hl",
|
| 755 |
+
"Ġhe diit",
|
| 756 |
+
"diit hl",
|
| 757 |
+
"wit xwit",
|
| 758 |
+
"Ġja phl",
|
| 759 |
+
"nit hl",
|
| 760 |
+
"yt xwhl",
|
| 761 |
+
"Ġxhl ii",
|
| 762 |
+
"Ġdaayim aahl",
|
| 763 |
+
"Ġyuxw diithl",
|
| 764 |
+
"Ġbaasx i",
|
| 765 |
+
"N akwhl",
|
| 766 |
+
"g wi",
|
| 767 |
+
"u kwhl",
|
| 768 |
+
"y ukwhl",
|
| 769 |
+
"ĠA k",
|
| 770 |
+
"ĠA gwi",
|
| 771 |
+
"ĠAgwi yukwhl",
|
| 772 |
+
"B M",
|
| 773 |
+
"D e",
|
| 774 |
+
"G i",
|
| 775 |
+
"I BM",
|
| 776 |
+
"a w",
|
| 777 |
+
"a il",
|
| 778 |
+
"c e",
|
| 779 |
+
"f f",
|
| 780 |
+
"g ee",
|
| 781 |
+
"i t",
|
| 782 |
+
"i waa",
|
| 783 |
+
"i ce",
|
| 784 |
+
"j it",
|
| 785 |
+
"l jit",
|
| 786 |
+
"m ar",
|
| 787 |
+
"m ail",
|
| 788 |
+
"n mar",
|
| 789 |
+
"o xs",
|
| 790 |
+
"o ff",
|
| 791 |
+
"s i",
|
| 792 |
+
"w an",
|
| 793 |
+
"w ay",
|
| 794 |
+
"y o",
|
| 795 |
+
"Ġ De",
|
| 796 |
+
"Ġ Gi",
|
| 797 |
+
"Ġ IBM",
|
| 798 |
+
"Ġ mail",
|
| 799 |
+
"Ġ off",
|
| 800 |
+
"nii wan",
|
| 801 |
+
"nii yo",
|
| 802 |
+
"xs iwaa",
|
| 803 |
+
"Ġs aw",
|
| 804 |
+
"ni x",
|
| 805 |
+
"Ġw ok",
|
| 806 |
+
"at diit",
|
| 807 |
+
"̲. \"",
|
| 808 |
+
"oos un",
|
| 809 |
+
"ĠA p",
|
| 810 |
+
"Ġam xsiwaa",
|
| 811 |
+
"Ġak s",
|
| 812 |
+
"gee nix",
|
| 813 |
+
"nmar k",
|
| 814 |
+
"oxs xw",
|
| 815 |
+
"way i",
|
| 816 |
+
"ĠDe nmark",
|
| 817 |
+
"ĠGi geenix",
|
| 818 |
+
"Ġoff ice",
|
| 819 |
+
"Ġsaw atdiit",
|
| 820 |
+
"yt xw"
|
| 821 |
]
|
| 822 |
}
|
| 823 |
}
|