{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 255, "content": "[START]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, " ": 3, "!": 4, "%": 5, "&": 6, "'": 7, ",": 8, "-": 9, ".": 10, "0": 11, "1": 12, "2": 13, "3": 14, "4": 15, "5": 16, "6": 17, "7": 18, "8": 19, "9": 20, "?": 21, "a": 22, "b": 23, "c": 24, "d": 25, "e": 26, "f": 27, "g": 28, "h": 29, "i": 30, "j": 31, "k": 32, "l": 33, "m": 34, "n": 35, "o": 36, "p": 37, "q": 38, "r": 39, "s": 40, "t": 41, "u": 42, "v": 43, "w": 44, "x": 45, "y": 46, "z": 47, "à": 48, "á": 49, "ả": 50, "ã": 51, "ạ": 52, "ă": 53, "ắ": 54, "ằ": 55, "ẳ": 56, "ẵ": 57, "ặ": 58, "â": 59, "ấ": 60, "ầ": 61, "ẩ": 62, "ẫ": 63, "ậ": 64, "è": 65, "é": 66, "ẻ": 67, "ẽ": 68, "ẹ": 69, "ê": 70, "ế": 71, "ề": 72, "ể": 73, "ễ": 74, "ệ": 75, "ì": 76, "í": 77, "ỉ": 78, "ĩ": 79, "ị": 80, "ò": 81, "ó": 82, "ỏ": 83, "õ": 84, "ọ": 85, "ô": 86, "ố": 87, "ồ": 88, "ổ": 89, "ỗ": 90, "ộ": 91, "ơ": 92, "ớ": 93, "ờ": 94, "ở": 95, "ỡ": 96, "ợ": 97, "ù": 98, "ú": 99, "ủ": 100, "ũ": 101, "ụ": 102, "ư": 103, "ứ": 104, "ừ": 105, "ử": 106, "ữ": 107, "ự": 108, "ỳ": 109, "ý": 110, "ỷ": 111, "ỹ": 112, "ỵ": 113, "đ": 114, "A": 115, "B": 116, "C": 117, "D": 118, "E": 119, "F": 120, "G": 121, "H": 122, "I": 123, "J": 124, "K": 125, "L": 126, "M": 127, "N": 128, "O": 129, "P": 130, "Q": 131, "R": 132, "S": 133, "T": 134, "U": 135, "V": 136, "W": 137, "X": 138, "Y": 139, "Z": 140, "À": 141, "Á": 142, "Ả": 143, "Ã": 144, "Ạ": 145, "Ă": 146, "Ắ": 147, "Ằ": 148, "Ẳ": 149, "Ẵ": 150, "Ặ": 151, "Â": 152, "Ấ": 153, "Ầ": 154, "Ẩ": 155, "Ẫ": 156, "Ậ": 157, "È": 158, "É": 159, "Ẻ": 160, "Ẽ": 161, "Ẹ": 162, "Ê": 163, "Ế": 164, "Ề": 165, "Ể": 166, "Ễ": 167, "Ệ": 168, "Ì": 169, "Í": 170, "Ỉ": 171, "Ĩ": 172, "Ị": 173, "Ò": 174, "Ó": 175, "Ỏ": 176, "Õ": 177, "Ọ": 178, "Ô": 179, "Ố": 180, "Ồ": 181, "Ổ": 182, "Ỗ": 183, "Ộ": 184, "Ơ": 185, "Ớ": 186, "Ờ": 187, "Ở": 188, "Ỡ": 189, "Ợ": 190, "Ù": 191, "Ú": 192, "Ủ": 193, "Ũ": 194, "Ụ": 195, "Ư": 196, "Ứ": 197, "Ừ": 198, "Ử": 199, "Ữ": 200, "Ự": 201, "Ỳ": 202, "Ý": 203, "Ỷ": 204, "Ỹ": 205, "Ỵ": 206, "Đ": 207, ":": 208, ";": 209, "(": 210, ")": 211, "[": 212, "]": 213, "{": 214, "}": 215, "/": 216, "\\": 217, "@": 218, "#": 219, "$": 220, "*": 221, "+": 222, "=": 223, "<": 224, ">": 225, "~": 226, "`": 227, "^": 228, "_": 229, "|": 230, "\"": 231, ", ": 232, "…": 233, "—": 234, "–": 235, ",": 236, "、": 237, "。": 238, "!": 239, "?": 240, "°": 241, "±": 242, "×": 243, "÷": 244, "€": 245, "£": 246, "¥": 247, "ƀ": 248, "Ɓ": 249, "Ƃ": 250, "ƃ": 251, "Ƅ": 252, "ƅ": 253, "Ɔ": 254, "[START]": 255, "ng": 256, "nh": 257, "th": 258, "ch": 259, "tr": 260, "kh": 261, "ph": 262, "gh": 263, "gi": 264, "qu": 265, "có": 266, "là": 267, "và": 268, "một": 269, "của": 270, "không": 271, "thể": 272, "người": 273, "các": 274, "trong": 275, "những": 276, "cho": 277, "để": 278, "được": 279, "tôi": 280, "bạn": 281, "với": 282, "đã": 283, "sự": 284, "ta": 285, "việc": 286, "sẽ": 287, "chúng": 288, "khi": 289, "cũng": 290, "như": 291, "mà": 292, "đến": 293, "ra": 294, "này": 295, "từ": 296, "về": 297, "nên": 298, "sau": 299, "thì": 300, "năm": 301, "ngày": 302, "họ": 303, "mình": 304, "rất": 305, "đang": 306, "còn": 307, "vẫn": 308, "đều": 309, "cả": 310, "nhiều": 311, "nào": 312, "hay": 313, "đó": 314, "nó": 315, "ai": 316, "gì": 317, "đây": 318, "đấy": 319, "ấy": 320, "kia": 321, "nọ": 322, "bao": 323, "bất": 324, "cứ": 325, "mỗi": 326, "mọi": 327, "tất": 328, "toàn": 329, "cùng": 330, "nhau": 331, "nhất": 332, "hơn": 333, "lại": 334, "nữa": 335, "thêm": 336, "luôn": 337, "vừa": 338, "mới": 339, "sắp": 340, "rồi": 341, "xong": 342, "hết": 343, "bị": 344, "phải": 345, "muốn": 346, "thích": 347, "yêu": 348, "ghét": 349, "biết": 350, "hiểu": 351, "nghĩ": 352, "tin": 353, "làm": 354, "nói": 355, "hỏi": 356, "trả": 357, "lời": 358, "kể": 359, "bảo": 360, "gọi": 361, "đọc": 362, "viết": 363, "nghe": 364, "nhìn": 365, "thấy": 366, "Ā": 367, "ā": 368, "Ą": 369, "ą": 370, "Ć": 371, "ć": 372, "Ĉ": 373, "ĉ": 374, "Ċ": 375, "ċ": 376, "Č": 377, "č": 378, "Ď": 379, "ď": 380, "Ē": 381, "ē": 382, "Ĕ": 383, "ĕ": 384, "Ė": 385, "ė": 386, "Ę": 387, "ę": 388, "Ě": 389, "ě": 390, "Ĝ": 391, "ĝ": 392, "Ğ": 393, "ğ": 394, "Ġ": 395, "ġ": 396, "Ģ": 397, "ģ": 398, "Ĥ": 399, "ĥ": 400, "Ħ": 401, "ħ": 402, "Ī": 403, "ī": 404, "Ĭ": 405, "ĭ": 406, "Į": 407, "į": 408, "İ": 409, "ı": 410, "IJ": 411, "ij": 412, "Ĵ": 413, "ĵ": 414, "Ķ": 415, "ķ": 416, "ĸ": 417, "Ĺ": 418, "ĺ": 419, "Ļ": 420, "ļ": 421, "Ľ": 422, "ľ": 423, "Ŀ": 424, "ŀ": 425, "Ł": 426, "ł": 427, "Ń": 428, "ń": 429, "Ņ": 430, "ņ": 431, "Ň": 432, "ň": 433, "ʼn": 434, "Ŋ": 435, "ŋ": 436, "Ō": 437, "ō": 438, "Ŏ": 439, "ŏ": 440, "Ő": 441, "ő": 442, "Œ": 443, "œ": 444, "Ŕ": 445, "ŕ": 446, "Ŗ": 447, "ŗ": 448, "Ř": 449, "ř": 450, "Ś": 451, "ś": 452, "Ŝ": 453, "ŝ": 454, "Ş": 455, "ş": 456, "Š": 457, "š": 458, "Ţ": 459, "ţ": 460, "Ť": 461, "ť": 462, "Ŧ": 463, "ŧ": 464, "Ū": 465, "ū": 466, "Ŭ": 467, "ŭ": 468, "Ů": 469, "ů": 470, "Ű": 471, "ű": 472, "Ų": 473, "ų": 474, "Ŵ": 475, "ŵ": 476, "Ŷ": 477, "ŷ": 478, "Ÿ": 479, "Ź": 480, "ź": 481, "Ż": 482, "ż": 483, "Ž": 484, "ž": 485, "ſ": 486, "Ƈ": 487, "ƈ": 488, "Ɖ": 489, "Ɗ": 490, "Ƌ": 491, "ƌ": 492, "ƍ": 493, "Ǝ": 494, "Ə": 495, "Ɛ": 496, "Ƒ": 497, "ƒ": 498, "Ɠ": 499, "Ɣ": 500, "ƕ": 501, "Ɩ": 502, "Ɨ": 503, "Ƙ": 504, "ƙ": 505, "ƚ": 506, "ƛ": 507, "Ɯ": 508, "Ɲ": 509, "ƞ": 510, "Ɵ": 511, "Ƣ": 512, "ƣ": 513, "Ƥ": 514, "ƥ": 515, "Ʀ": 516, "Ƨ": 517, "ƨ": 518, "Ʃ": 519, "ƪ": 520, "ƫ": 521, "Ƭ": 522, "ƭ": 523, "Ʈ": 524, "Ʊ": 525, "Ʋ": 526, "Ƴ": 527, "ƴ": 528, "Ƶ": 529, "ƶ": 530, "Ʒ": 531, "Ƹ": 532, "ƹ": 533, "ƺ": 534, "ƻ": 535, "Ƽ": 536, "ƽ": 537, "ƾ": 538, "ƿ": 539, "ǀ": 540, "ǁ": 541, "ǂ": 542, "ǃ": 543, "DŽ": 544, "Dž": 545, "dž": 546, "LJ": 547, "Lj": 548, "lj": 549, "NJ": 550, "Nj": 551, "nj": 552, "Ǎ": 553, "ǎ": 554, "Ǐ": 555, "ǐ": 556, "Ǒ": 557, "ǒ": 558, "Ǔ": 559, "ǔ": 560, "Ǖ": 561, "ǖ": 562, "Ǘ": 563, "ǘ": 564, "Ǚ": 565, "ǚ": 566, "Ǜ": 567, "ǜ": 568, "ǝ": 569, "Ǟ": 570, "ǟ": 571, "Ǡ": 572, "ǡ": 573, "Ǣ": 574, "ǣ": 575, "Ǥ": 576, "ǥ": 577, "Ǧ": 578, "ǧ": 579, "Ǩ": 580, "ǩ": 581, "Ǫ": 582, "ǫ": 583, "Ǭ": 584, "ǭ": 585, "Ǯ": 586, "ǯ": 587, "ǰ": 588, "DZ": 589, "Dz": 590, "dz": 591, "Ǵ": 592, "ǵ": 593, "Ƕ": 594, "Ƿ": 595, "Ǹ": 596, "ǹ": 597, "Ǻ": 598, "ǻ": 599, "Ǽ": 600, "ǽ": 601, "Ǿ": 602, "ǿ": 603, "[UH]": 604, "[UM]": 605, "[giggle]": 606, "[laughter]": 607, "[guffaw]": 608, "[inhale]": 609, "[exhale]": 610, "[sigh]": 611, "[cry]": 612, "[bark]": 613, "[howl]": 614, "[meow]": 615, "[singing]": 616, "[music]": 617, "[whistle]": 618, "[humming]": 619, "[gasp]": 620, "[groan]": 621, "[whisper]": 622, "[mumble]": 623, "[sniff]": 624, "[sneeze]": 625, "[cough]": 626, "[snore]": 627, "[chew]": 628, "[sip]": 629, "[clear_throat]": 630, "[kiss]": 631, "[shhh]": 632, "[gibberish]": 633, "[fr]": 634, "[es]": 635, "[de]": 636, "[it]": 637, "[ipa]": 638, "[end_of_label]": 639, "θ": 640, "ð": 641, "ʃ": 642, "ʒ": 643, "tʃ": 644, "dʒ": 645, "ʔ": 646, "ɑː": 647, "æ": 648, "ʌ": 649, "ɒ": 650, "ɔː": 651, "ɜː": 652, "ə": 653, "ɪ": 654, "iː": 655, "ʊ": 656, "uː": 657, "eɪ": 658, "aɪ": 659, "ɔɪ": 660, "aʊ": 661, "əʊ": 662, "ɯ": 663, "ɤ": 664, "ɨ": 665, "ʉ": 666, "ɘ": 667, "ɵ": 668, "ɜ": 669, "ɞ": 670, "ɐ": 671, "ɶ": 672, "ɑ": 673, "ɔ": 674, "˧": 675, "˥": 676, "˩˧": 677, "˧˥": 678, "˧˩˧": 679, "˧˩": 680, "ɓ": 681, "ɗ": 682, "ɠ": 683, "ʄ": 684, "ʛ": 685, "ɲ": 686, "ɳ": 687, "ɱ": 688, "ʈ": 689, "ɖ": 690, "ɟ": 691, "ɡ": 692, "ɢ": 693, "ʡ": 694, "[PLACEHOLDER55]": 695, "[PLACEHOLDER56]": 696, "[PLACEHOLDER57]": 697, "[PLACEHOLDER58]": 698, "[PLACEHOLDER59]": 699, "[PLACEHOLDER60]": 700, "[PLACEHOLDER61]": 701, "[PLACEHOLDER62]": 702, "[PLACEHOLDER63]": 703 }, "merges": [ "n g", "n h", "t h", "c h", "t r", "k h", "p h", "g h", "g i", "q u" ], "language": "vi" } }