{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "String": "" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 3 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] } } }, "decoder": { "type": "Sequence", "decoders": [] }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "8": 4, ".": 5, "1": 6, "3": 7, " ": 8, "D": 9, "u": 10, "n": 11, "c": 12, "a": 13, "h": 14, "t": 15, "y": 16, "d": 17, "g": 18, "i": 19, "m": 20, "e": 21, "o": 22, "k": 23, "5": 24, "0": 25, "C": 26, ",": 27, "s": 28, "l": 29, "f": 30, "(": 31, "N": 32, "H": 33, "4": 34, ")": 35, "2": 36, "S": 37, "v": 38, "L": 39, "q": 40, "-": 41, "μ": 42, "6": 43, "b": 44, "p": 45, "r": 46, "Đ": 47, "á": 48, "ủ": 49, "ị": 50, "ẫ": 51, "đ": 52, "ế": 53, "ệ": 54, "ộ": 55, "ả": 56, "ỏ": 57, "ừ": 58, "ọ": 59, "à": 60, "ù": 61, "ũ": 62, "ấ": 63, "ề": 64, "ạ": 65, "ê": 66, "ó": 67, "ố": 68, "ư": 69, "ỡ": 70, "ỗ": 71, "ì": 72, "â": 73, "ử": 74, "ằ": 75, "ớ": 76, "/": 77, "V": 78, "ă": 79, "ò": 80, "í": 81, "ý": 82, "ở": 83, "ơ": 84, "B": 85, "ự": 86, "T": 87, "ầ": 88, "ậ": 89, "ô": 90, "U": 91, "ỉ": 92, "ú": 93, "ẵ": 94, "ồ": 95, "ặ": 96, "ổ": 97, "ẹ": 98, "P": 99, "ẽ": 100, "ỷ": 101, "ể": 102, "ẻ": 103, "ữ": 104, "ụ": 105, "Q": 106, "ờ": 107, "Y": 108, "ợ": 109, "=": 110, "x": 111, "@": 112, "+": 113, "^": 114, "Ủ": 115, "&": 116, "$": 117, ";": 118, "!": 119, "*": 120, "#": 121, "%": 122, "~": 123, "õ": 124, "ứ": 125, "X": 126, ":": 127, "Ề": 128, "Ạ": 129, "Ứ": 130, "Ụ": 131, "A": 132, "G": 133, "Ư": 134, "Ờ": 135, "I": 136, "K": 137, "Ý": 138, "ẩ": 139, "ã": 140, "é": 141, "F": 142, "W": 143, "ĩ": 144, "è": 145, "M": 146, "ễ": 147, "ắ": 148, "\"": 149, "7": 150, "ỳ": 151, "ẳ": 152, "ỵ": 153, "?": 154, "z": 155, "•": 156, "9": 157, "j": 158, "Ẩ": 159, "\n": 160, "O": 161, "ỹ": 162, "E": 163, "R": 164, "Ế": 165, "Ị": 166, "Ộ": 167, "Ở": 168, "Ọ": 169, "À": 170, "Ô": 171, "Ệ": 172, "Ê": 173, "Ặ": 174, "Ó": 175, "’": 176, "w": 177, "Ể": 178, "Ú": 179, "Í": 180, "É": 181, "Ễ": 182, "Ỡ": 183, "Â": 184, "Á": 185, "Ỹ": 186, "Ậ": 187, "Ã": 188, "Ố": 189, "Ầ": 190, "“": 191, "”": 192, "Ừ": 193, "Ả": 194, "Ằ": 195, "Ớ": 196, "Ự": 197, "Õ": 198, "Ĩ": 199, "Ữ": 200, "Ò": 201, "Ử": 202, "≥": 203, "Ì": 204, "Ổ": 205, "°": 206, "'": 207, "Ơ": 208, "Ă": 209, "Ỉ": 210, "Ẹ": 211, "[": 212, "]": 213, "<": 214, ">": 215, "…": 216, "Z": 217, "J": 218, "Ấ": 219, "Ợ": 220, "🥺": 221, "–": 222, "Ẫ": 223, "ē": 224, "ō": 225, "ā": 226, ",": 227, "П": 228, "Ù": 229, "Ũ": 230, "Ẵ": 231, "|": 232, "±": 233, "ę": 234, "³": 235, "_": 236, "Ỗ": 237, "È": 238, "Ỏ": 239, "Ắ": 240, "Ẳ": 241, "∆": 242, "Ẽ": 243, "Ồ": 244, "Ỳ": 245, "Ỵ": 246, "—": 247, "ǎ": 248, "Ỷ": 249, "ϕ": 250, "≤": 251, "α": 252, "θ": 253, "Σ": 254, "­": 255, "÷": 256, "Ω": 257, "β": 258, "ç": 259, "{": 260, "}": 261, "ü": 262, "̀": 263, "Ẻ": 264, "□": 265, "ϑ": 266, "ʃ": 267, "£": 268, "√": 269, "∑": 270, "‰": 271, "ǐ": 272, "►": 273, "́": 274, "̉": 275, "̣": 276, "·": 277, "‘": 278, "ǔ": 279, "Î": 280, "ï": 281, "ǒ": 282, "̃": 283, "å": 284, "½": 285, "Μ": 286, "ύ": 287, "ς": 288, "ū": 289, "●": 290, "δ": 291, "ž": 292, "Å": 293, "ö": 294, "ß": 295, "λ": 296, "²": 297, "↔": 298, "║": 299, "ɸ": 300, "≠": 301, "γ": 302, "ě": 303, "¹": 304, "µ": 305, "≈": 306, "์": 307, "¾": 308, "ą": 309, "异": 310, "物": 311, "ī": 312, "¨": 313, "®": 314, "þ": 315, "♦": 316, "§": 317, "▪": 318, "σ": 319, "Ä": 320, "ε": 321, "∙": 322, "û": 323, "ρ": 324, "令": 325, "ద": 326, "ి": 327, "ం": 328, "Δ": 329, "¬": 330, "?": 331, "φ": 332, "": 333, "‚": 334, "ƒ": 335, "„": 336, "π": 337, "🗿": 338, "Ø": 339, "′": 340, "�": 341, "业": 342, "主": 343, "下": 344, "一": 345, "ǰ": 346, "ξ": 347, "■": 348, "→": 349, "ӧ": 350, "린": 351, "च": 352, "्": 353, "य": 354, "ं": 355, "ु": 356, "ग": 357, "म": 358, "ू": 359, "स": 360, "ा": 361, "क": 362, "श": 363, "े": 364, "त": 365, "ै": 366, "प": 367, "ृ": 368, "थ": 369, "ि": 370, "भ": 371, "ह": 372, "॑": 373, "व": 374, "灌": 375, "ठ": 376, "ी": 377, "工": 378, "商": 379, "ข": 380, "ึ": 381, "要": 382, "坚": 383, "持": 384, "‎": 385, "\\": 386, "×": 387, "": 388, "ä": 389, "Ö": 390, ";": 391, "Η": 392, "Ο": 393, "Α": 394, "Τ": 395, "Θ": 396, "Ν": 397, "Γ": 398, "Β": 399, "Ψ": 400, "Ι": 401, "Д": 402, "ø": 403, "辜": 404, "¼": 405, "培": 406, "标": 407, "త": 408, "等": 409, "》": 410, "、": 411, "항": 412, "„": 413, "ラ": 414, "ン": 415, "讓": 416, "嘻": 417, "在": 418, "人": 419, "ँ": 420, "景": 421, "点": 422, "ప": 423, "ు": 424, "却": 425, "不": 426, "平": 427, "均": 428, "🫠": 429, "`": 430, "😭": 431, "ล": 432, "้": 433, "า": 434, "น": 435, "章": 436, "¥": 437, "η": 438, "中": 439, "心": 440, "的": 441, "陛": 442, "朝": 443, "群": 444, "众": 445, "ใ": 446, "缺": 447, "陷": 448, "以": 449, "防": 450, "本": 451, "Ð": 452, "👀": 453, "永": 454, "久": 455, "可": 456, "某": 457, "种": 458, "方": 459, "差": 460, "↑": 461, "和": 462, "外": 463, "危": 464, "出": 465, "版": 466, "社": 467, "ë": 468, "є": 469, "Æ": 470, "받": 471, "️": 472, "有": 473, "效": 474, "期": 475, "现": 476, "我": 477, "τ": 478, "開": 479, "会": 480, "": 481, "न": 482, "ण": 483, "ो": 484, "र": 485, "్": 486, "ุ": 487, "ḏ": 488, "路": 489, "口": 490, "↓": 491, "│": 492, "框": 493, "о": 494, "к": 495, "教": 496, "育": 497, "活": 498, "动": 499, "≡": 500, "с": 501, "ல": 502, "்": 503, "服": 504, "务": 505, "氛": 506, "阶": 507, "级": 508, "🇻": 509, "🇳": 510, "☆": 511, "😒": 512, "遮": 513, "挡": 514, "体": 515, "检": 516, "数": 517, "据": 518, "显": 519, "示": 520, "ก": 521, "ั": 522, "บ": 523, "실": 524, "😻": 525, "涵": 526, "盖": 527, "©": 528, "墩": 529, "❤": 530, "宣": 531, "言": 532, "ὅ": 533, "ι": 534, "ὀ": 535, "ὑ": 536, "ὁ": 537, "ὄ": 538, "∞": 539, "ɒ": 540, "ɛ": 541, "ɑ": 542, "ᾶ": 543, "υ": 544, "Κ": 545, "Ε": 546, "Π": 547, "Χ": 548, "Λ": 549, "¸": 550, "値": 551, "º": 552, "Ü": 553, "😏": 554, "第": 555, "三": 556, "步": 557, "肖": 558, "š": 559, "😂": 560, "ళ": 561, "乔": 562, "木": 563, "线": 564, "​": 565, "作": 566, "ı": 567, "😔": 568, "😃": 569, "💔": 570, "걸": 571, "😌": 572, "ఉ": 573, "エ": 574, "无": 575, "总": 576, "竞": 577, "赛": 578, "Φ": 579, "»": 580, "语": 581, "表": 582, "达": 583, "🙃": 584, "顯": 585, "ో": 586, "Ἐ": 587, "ἀ": 588, "ὶ": 589, "ν": 590, "ἄ": 591, "ο": 592, ")": 593, "💢": 594, "哉": 595, "开": 596, "实": 597, "็": 598, "🥰": 599, "ˉ": 600, "学": 601, "报": 602, "ณ": 603, "ె": 604, "🙂": 605, "ř": 606, "": 607, "ð": 608, "右": 609, "上": 610, "音": 611, "调": 612, "奎": 613, "": 614, "": 615, "兑": 616, ":": 617, "ట": 618, "ῆ": 619, "ᶻ": 620, "𝗓": 621, "𐰁": 622, "暴": 623, "د": 624, "َ": 625, "ا": 626, "ت": 627, "ُ": 628, "و": 629, "ْ": 630, "م": 631, "غ": 632, "ق": 633, "ن": 634, "ل": 635, "ِ": 636, "ي": 637, "ح": 638, "ك": 639, "足": 640, "™": 641, "与": 642, "应": 643, "用": 644, "毛": 645, "纠": 646, "息": 647, "번": 648, "授": 649, "课": 650, "ధ": 651, "ฺ": 652, "剩": 653, "☺": 654, "声": 655, "మ": 656, "ా": 657, "డ": 658, "信": 659, "ć": 660, "č": 661, "Ï": 662, "ʊ": 663, "\t": 664, "挂": 665, "ซ": 666, "😊": 667, "岗": 668, "终": 669, "身": 670, "Ӏ": 671, "成": 672, "り": 673, "市": 674, "民": 675, "讀": 676, "🤭": 677, "🙄": 678, "罕": 679, "‹": 680, "涡": 681, "ื": 682, "": 683, "🫰": 684, "🏻": 685, "ń": 686, "欲": 687, "这": 688, "组": 689, "之": 690, "那": 691, "樓": 692, "贯": 693, "穿": 694, "阙": 695, "Ÿ": 696, "咬": 697, "ర": 698, "确": 699, "定": 700, "项": 701, "葬": 702, "相": 703, "匈": 704, "奴": 705, "ẍ": 706, "个": 707, "词": 708, "刑": 709, "鼻": 710, "钢": 711, "板": 712, "驰": 713, "串": 714, "ม": 715, "🤬": 716, "อ": 717, "ง": 718, "拉": 719, "목": 720, "̂": 721, "̛": 722, "ख": 723, "€": 724, "😁": 725, "捐": 726, "赠": 727, "其": 728, "所": 729, "必": 730, "须": 731, "盛": 732, "ẏ": 733, "檀": 734, "闲": 735, "从": 736, "头": 737, "被": 738, "打": 739, "四": 740, "旅": 741, "游": 742, "œ": 743, "ด": 744, "粹": 745, "합": 746, "こ": 747, "と": 748, "󠇄": 749, "హ": 750, "చ": 751, "ϱ": 752, "న": 753, "గ": 754, "द": 755, "ध": 756, "ौ": 757, "文": 758, "句": 759, "首": 760, "恰": 761, "当": 762, "格": 763, "山": 764, "区": 765, "🥲": 766, "将": 767, "ῦ": 768, "ὸ": 769, "制": 770, "Υ": 771, "Ώ": 772, "Ξ": 773, "Ρ": 774, "😇": 775, "з": 776, "м": 777, "專": 778, "经": 779, "济": 780, "技": 781, "术": 782, "世": 783, "界": 784, "最": 785, "😍": 786, "술": 787, "克": 788, "н": 789, "а": 790, "р": 791, "ω": 792, "覆": 793, "率": 794, "士": 795, "虞": 796, "": 797, "을": 798, "指": 799, "接": 800, "收": 801, "в": 802, "星": 803, "空": 804, "ǚ": 805, "함": 806, "并": 807, "提": 808, "實": 809, "斗": 810, "遁": 811, "稿": 812, "素": 813, "限": 814, "时": 815, "ˆ": 816, "验": 817, "客": 818, "龙": 819, "企": 820, "它": 821, "堡": 822, "垒": 823, "集": 824, "任": 825, "何": 826, "他": 827, "😞": 828, "집": 829, "冬": 830, "奏": 831, "土": 832, "性": 833, "地": 834, "Ñ": 835, "ท": 836, "ี": 837, "่": 838, "⁴": 839, "Ⅱ": 840, "派": 841, "薪": 842, "ే": 843, "洛": 844, "基": 845, "准": 846, "函": 847, "委": 848, "托": 849, "牵": 850, "歇": 851, "抽": 852, "晖": 853, "媒": 854, "医": 855, "ẋ": 856, "明": 857, "있": 858, "´": 859, "十": 860, "条": 861, "ş": 862, "黒": 863, "劲": 864, "🤗": 865, "始": 866, "皇": 867, "气": 868, "❌": 869, "✅": 870, "慎": 871, "😋": 872, "ต": 873, "ู": 874, "ย": 875, "ธ": 876, "ช": 877, "ิ": 878, "ศ": 879, "😮": 880, "‍": 881, "💨": 882, "绩": 883, "勒": 884, "斯": 885, "Ṣ": 886, "ṯ": 887, "华": 888, "南": 889, "字": 890, "过": 891, "М": 892, "у": 893, "风": 894, "电": 895, "🥵": 896, "도": 897, "線": 898, "逼": 899, "行": 900, "争": 901, "设": 902, "备": 903, "ट": 904, "末": 905, "ల": 906, "щ": 907, "и": 908, "族": 909, "自": 910, "治": 911, "县": 912, "😘": 913, "窗": 914, "饮": 915, "占": 916, "比": 917, "产": 918, "化": 919, "百": 920, "度": 921, "़": 922, "对": 923, "完": 924, "为": 925, "核": 926, "靠": 927, "д": 928, "话": 929, "你": 930, "联": 931, "系": 932, "是": 933, "。": 934, "尚": 935, "知": 936, "样": 937, "涅": 938, "ś": 939, "畔": 940, "할": 941, "都": 942, "很": 943, "习": 944, "近": 945, "遗": 946, "憾": 947, " ": 948, "✓": 949, "เ": 950, "많": 951, "脑": 952, "续": 953, "增": 954, "长": 955, "公": 956, "司": 957, "😳": 958, "视": 959, "🤨": 960, "需": 961, "簿": 962, "犁": 963, "存": 964, "货": 965, "芙": 966, "份": 967, "典": 968, "们": 969, "来": 970, "写": 971, "强": 972, "帆": 973, "資": 974, "🤣": 975, "橋": 976, "⁠": 977, "🫨": 978, "╭": 979, " ̄": 980, "♡": 981, "广": 982, "₫": 983, "ꈨ": 984, "ຶ": 985, "˙": 986, "̫": 987, "̮": 988, "`": 989, "ノ": 990, "𝐁": 991, "𝐂": 992, "𝐀": 993, "𝐬": 994, "𝐢": 995, "𝐠": 996, "𝐧": 997, "𝐦": 998, "𝐞": 999, "𝐭": 1000, "・": 1001, "✈": 1002, "̆": 1003, "᷄": 1004, "⌓": 1005, "᷅": 1006 }, "unk_token": "" } }