{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[SOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "[SOS]": 5, "[EOS]": 6, "[SPACE]": 7, "’": 8, "Ng": 9, "a": 10, "e": 11, "ng": 12, "o": 13, "\u0000": 14, "\u0001": 15, "\u0002": 16, "\u0003": 17, "\u0004": 18, "\u0005": 19, "\u0006": 20, "\u0007": 21, "\b": 22, "\u000e": 23, "\u000f": 24, "\u0010": 25, "\u0011": 26, "\u0012": 27, "\u0013": 28, "\u0014": 29, "\u0015": 30, "\u0016": 31, "\u0017": 32, "\u0018": 33, "\u0019": 34, "\u001a": 35, "\u001b": 36, "\u001c": 37, "\u001d": 38, "\u001e": 39, "\u001f": 40, "!": 41, "\"": 42, "#": 43, "$": 44, "%": 45, "&": 46, "'": 47, "(": 48, ")": 49, "*": 50, "+": 51, ",": 52, "-": 53, ".": 54, "/": 55, "0": 56, "1": 57, "2": 58, "3": 59, "4": 60, "5": 61, "6": 62, "7": 63, "8": 64, "9": 65, ":": 66, ";": 67, "<": 68, "=": 69, ">": 70, "?": 71, "@": 72, "A": 73, "B": 74, "Ba": 75, "Be": 76, "Bi": 77, "Bo": 78, "Bu": 79, "Bwa": 80, "Bwe": 81, "Bwi": 82, "C": 83, "Cha": 84, "Che": 85, "Chi": 86, "Cho": 87, "Chu": 88, "Chwa": 89, "Chwe": 90, "Chwi": 91, "D": 92, "Da": 93, "De": 94, "Dha": 95, "Dhe": 96, "Dhi": 97, "Dho": 98, "Dhu": 99, "Di": 100, "Do": 101, "Du": 102, "E": 103, "F": 104, "Fa": 105, "Fe": 106, "Fi": 107, "Fo": 108, "Fu": 109, "G": 110, "Ga": 111, "Ge": 112, "Gha": 113, "Ghe": 114, "Ghi": 115, "Gho": 116, "Ghu": 117, "Gi": 118, "Go": 119, "Gu": 120, "Gwa": 121, "Gwe": 122, "Gwi": 123, "H": 124, "Ha": 125, "He": 126, "Hi": 127, "Ho": 128, "Hu": 129, "I": 130, "J": 131, "Ja": 132, "Je": 133, "Ji": 134, "Jo": 135, "Ju": 136, "Jwa": 137, "Jwe": 138, "Jwi": 139, "K": 140, "Ka": 141, "Ke": 142, "Kha": 143, "Khe": 144, "Kho": 145, "Khu": 146, "Ki": 147, "Ko": 148, "Ku": 149, "Kwa": 150, "Kwe": 151, "Kwi": 152, "L": 153, "La": 154, "Le": 155, "Li": 156, "Lo": 157, "Lu": 158, "Lwa": 159, "Lwe": 160, "Lwi": 161, "M": 162, "Ma": 163, "Mba": 164, "Mbe": 165, "Mbi": 166, "Mbo": 167, "Mbu": 168, "Mbwa": 169, "Mbwe": 170, "Mbwi": 171, "Me": 172, "Mi": 173, "Mo": 174, "Mu": 175, "Mwa": 176, "Mwe": 177, "Mwi": 178, "N": 179, "Na": 180, "Nda": 181, "Nde": 182, "Ndi": 183, "Ndo": 184, "Ndu": 185, "Ndwa": 186, "Ndwe": 187, "Ndwi": 188, "Ne": 189, "Nga": 190, "Nge": 191, "Ngi": 192, "Ngo": 193, "Ngu": 194, "Ngwa": 195, "Ngwe": 196, "Ngwi": 197, "Ni": 198, "Nja": 199, "Nje": 200, "Nji": 201, "Njo": 202, "Nju": 203, "Njwa": 204, "Njwe": 205, "Njwi": 206, "No": 207, "Nu": 208, "Nya": 209, "Nye": 210, "Nyi": 211, "Nyo": 212, "Nyu": 213, "Nywa": 214, "Nywe": 215, "Nza": 216, "Nze": 217, "Nzi": 218, "Nzo": 219, "Nzu": 220, "O": 221, "P": 222, "Pa": 223, "Pe": 224, "Pi": 225, "Po": 226, "Pu": 227, "Pwa": 228, "Pwe": 229, "Pwi": 230, "Pwo": 231, "Q": 232, "R": 233, "Ra": 234, "Re": 235, "Ri": 236, "Ro": 237, "Ru": 238, "S": 239, "Sa": 240, "Se": 241, "Sha": 242, "She": 243, "Shi": 244, "Sho": 245, "Shu": 246, "Shwa": 247, "Shwe": 248, "Shwi": 249, "Si": 250, "So": 251, "Su": 252, "Swa": 253, "Swe": 254, "Swi": 255, "T": 256, "Ta": 257, "Te": 258, "Tha": 259, "The": 260, "Thi": 261, "Tho": 262, "Thu": 263, "Ti": 264, "To": 265, "Twa": 266, "Twe": 267, "Twi": 268, "U": 269, "V": 270, "Va": 271, "Ve": 272, "Vi": 273, "Vo": 274, "Vu": 275, "Vya": 276, "Vye": 277, "Vyo": 278, "W": 279, "Wa": 280, "We": 281, "Wi": 282, "Wo": 283, "Wu": 284, "X": 285, "Y": 286, "Ya": 287, "Ye": 288, "Yi": 289, "Yo": 290, "Yu": 291, "Z": 292, "Za": 293, "Ze": 294, "Zi": 295, "Zo": 296, "Zu": 297, "Zwa": 298, "Zwe": 299, "Zwi": 300, "[": 301, "\\": 302, "]": 303, "^": 304, "_": 305, "`": 306, "b": 307, "ba": 308, "be": 309, "bi": 310, "bo": 311, "bu": 312, "bwa": 313, "bwe": 314, "bwi": 315, "c": 316, "cha": 317, "che": 318, "chi": 319, "cho": 320, "chu": 321, "chwa": 322, "chwe": 323, "chwi": 324, "d": 325, "da": 326, "de": 327, "dha": 328, "dhe": 329, "dhi": 330, "dho": 331, "dhu": 332, "di": 333, "do": 334, "du": 335, "f": 336, "fa": 337, "fe": 338, "fi": 339, "fo": 340, "fu": 341, "g": 342, "ga": 343, "ge": 344, "gha": 345, "ghe": 346, "ghi": 347, "gho": 348, "ghu": 349, "gi": 350, "go": 351, "gu": 352, "gwa": 353, "gwe": 354, "gwi": 355, "h": 356, "ha": 357, "he": 358, "hi": 359, "ho": 360, "hu": 361, "i": 362, "j": 363, "ja": 364, "je": 365, "ji": 366, "jo": 367, "ju": 368, "jwa": 369, "jwe": 370, "jwi": 371, "k": 372, "ka": 373, "ke": 374, "kha": 375, "khe": 376, "kho": 377, "khu": 378, "ki": 379, "ko": 380, "ku": 381, "kwa": 382, "kwe": 383, "kwi": 384, "l": 385, "la": 386, "le": 387, "li": 388, "lo": 389, "lu": 390, "lwa": 391, "lwe": 392, "lwi": 393, "m": 394, "ma": 395, "mba": 396, "mbe": 397, "mbi": 398, "mbo": 399, "mbu": 400, "mbwa": 401, "mbwe": 402, "mbwi": 403, "me": 404, "mi": 405, "mo": 406, "mu": 407, "mwa": 408, "mwe": 409, "mwi": 410, "n": 411, "na": 412, "nda": 413, "nde": 414, "ndi": 415, "ndo": 416, "ndu": 417, "ndwa": 418, "ndwe": 419, "ndwi": 420, "ne": 421, "nga": 422, "nge": 423, "ngi": 424, "ngo": 425, "ngu": 426, "ngwa": 427, "ngwe": 428, "ngwi": 429, "ni": 430, "nja": 431, "nje": 432, "nji": 433, "njo": 434, "nju": 435, "njwa": 436, "njwe": 437, "njwi": 438, "no": 439, "nu": 440, "nya": 441, "nye": 442, "nyi": 443, "nyo": 444, "nyu": 445, "nywa": 446, "nywe": 447, "nza": 448, "nze": 449, "nzi": 450, "nzo": 451, "nzu": 452, "p": 453, "pa": 454, "pe": 455, "pi": 456, "po": 457, "pu": 458, "pwa": 459, "pwe": 460, "pwi": 461, "pwo": 462, "q": 463, "r": 464, "ra": 465, "re": 466, "ri": 467, "ro": 468, "ru": 469, "s": 470, "sa": 471, "se": 472, "sha": 473, "she": 474, "shi": 475, "sho": 476, "shu": 477, "shwa": 478, "shwe": 479, "shwi": 480, "si": 481, "so": 482, "su": 483, "swa": 484, "swe": 485, "swi": 486, "t": 487, "ta": 488, "te": 489, "tha": 490, "the": 491, "thi": 492, "tho": 493, "thu": 494, "ti": 495, "to": 496, "twa": 497, "twe": 498, "twi": 499, "u": 500, "v": 501, "va": 502, "ve": 503, "vi": 504, "vo": 505, "vu": 506, "vya": 507, "vye": 508, "vyo": 509, "w": 510, "wa": 511, "we": 512, "wi": 513, "wo": 514, "wu": 515, "x": 516, "y": 517, "ya": 518, "ye": 519, "yi": 520, "yo": 521, "yu": 522, "z": 523, "za": 524, "ze": 525, "zi": 526, "zo": 527, "zu": 528, "zwa": 529, "zwe": 530, "zwi": 531, "{": 532, "|": 533, "}": 534, "~": 535, "": 536, "€": 537, "": 538, "‚": 539, "ƒ": 540, "„": 541, "†": 542, "‡": 543, "ˆ": 544, "‰": 545, "Š": 546, "‹": 547, "Œ": 548, "": 549, "Ž": 550, "": 551, "": 552, "‘": 553, "’": 554, "“": 555, "”": 556, "•": 557, "–": 558, "—": 559, "˜": 560, "™": 561, "š": 562, "›": 563, "œ": 564, "": 565, "ž": 566, "Ÿ": 567, "¡": 568, "¢": 569, "£": 570, "¤": 571, "¥": 572, "¦": 573, "§": 574, "¨": 575, "©": 576, "ª": 577, "«": 578, "¬": 579, "­": 580, "®": 581, "¯": 582, "°": 583, "±": 584, "²": 585, "³": 586, "´": 587, "µ": 588, "¶": 589, "·": 590, "¸": 591, "¹": 592, "º": 593, "»": 594, "¼": 595, "½": 596, "¾": 597, "¿": 598, "À": 599, "Á": 600, "Â": 601, "Ã": 602, "Ä": 603, "Å": 604, "Æ": 605, "Ç": 606, "È": 607, "É": 608, "Ê": 609, "Ë": 610, "Ì": 611, "Í": 612, "Î": 613, "Ï": 614, "Ð": 615, "Ñ": 616, "Ò": 617, "Ó": 618, "Ô": 619, "Õ": 620, "Ö": 621, "×": 622, "Ø": 623, "Ù": 624, "Ú": 625, "Û": 626, "Ü": 627, "Ý": 628, "Þ": 629, "ß": 630, "à": 631, "á": 632, "â": 633, "ã": 634, "ä": 635, "å": 636, "æ": 637, "ç": 638, "è": 639, "é": 640, "ê": 641, "ë": 642, "ì": 643, "í": 644, "î": 645, "ï": 646, "ð": 647, "ñ": 648, "ò": 649, "ó": 650, "ô": 651, "õ": 652, "ö": 653, "÷": 654, "ø": 655, "ù": 656, "ú": 657, "û": 658, "ü": 659, "ý": 660, "þ": 661 }, "unk_token": "[UNK]" } }