{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "n": 4, "h": 5, "t": 6, "c": 7, "i": 8, "g": 9, "a": 10, "u": 11, "đ": 12, "m": 13, "o": 14, "r": 15, "v": 16, "à": 17, "l": 18, "p": 19, ",": 20, "ư": 21, "y": 22, "á": 23, "s": 24, "b": 25, "k": 26, ".": 27, "d": 28, "ế": 29, "ệ": 30, "ạ": 31, "ô": 32, "e": 33, "ả": 34, "ê": 35, "ộ": 36, "ó": 37, "ố": 38, "ớ": 39, "ấ": 40, "ờ": 41, "ị": 42, "â": 43, "q": 44, "ề": 45, "ủ": 46, "1": 47, "ể": 48, "2": 49, "ợ": 50, "0": 51, "ơ": 52, "ậ": 53, "x": 54, "ì": 55, "ầ": 56, "ự": 57, "ă": 58, "ứ": 59, "ở": 60, "í": 61, "ã": 62, "ụ": 63, "ọ": 64, "ồ": 65, "ữ": 66, "-": 67, "ắ": 68, "ú": 69, "ổ": 70, "ừ": 71, "ò": 72, "ù": 73, "ặ": 74, "3": 75, "ỉ": 76, "9": 77, ")": 78, "(": 79, "ũ": 80, "5": 81, "\"": 82, "ễ": 83, ":": 84, "ử": 85, "4": 86, "/": 87, "f": 88, "ẽ": 89, "ý": 90, "ỏ": 91, "6": 92, "ẩ": 93, "é": 94, "8": 95, "7": 96, "ẫ": 97, "ằ": 98, "ỗ": 99, "ĩ": 100, "w": 101, "ẻ": 102, ";": 103, "'": 104, "ỹ": 105, "ẹ": 106, "ỷ": 107, "%": 108, "ỳ": 109, "z": 110, "j": 111, "ỡ": 112, "õ": 113, "è": 114, "ẳ": 115, "?": 116, "ẵ": 117, "–": 118, "&": 119, "!": 120, "*": 121, "’": 122, "+": 123, "‘": 124, ">": 125, "|": 126, "_": 127, "ỵ": 128, "=": 129, "@": 130, "[": 131, "]": 132, "•": 133, "#": 134, "●": 135, "·": 136, "ð": 137, "°": 138, "<": 139, "ö": 140, "≥": 141, "$": 142, "ü": 143, "о": 144, "⁄": 145, "а": 146, "\\": 147, "т": 148, "е": 149, "и": 150, "~": 151, "с": 152, "н": 153, "ä": 154, "—": 155, "ç": 156, "р": 157, "ø": 158, "≤": 159, "ë": 160, "к": 161, "п": 162, "ć": 163, "л": 164, "‐": 165, "μ": 166, "«": 167, "š": 168, "ь": 169, "×": 170, "ā": 171, "м": 172, "у": 173, "å": 174, "β": 175, "ч": 176, "я": 177, "в": 178, "`": 179, "ō": 180, "ï": 181, "č": 182, "ŋ": 183, "̣": 184, "−": 185, "α": 186, "ы": 187, "�": 188, "ß": 189, "д": 190, "′": 191, "ń": 192, "́": 193, "б": 194, "̀": 195, "з": 196, "ş": 197, "̉": 198, "‰": 199, "»": 200, "æ": 201, "δ": 202, "ν": 203, "ж": 204, "ğ": 205, "ı": 206, "ɛ": 207, "虎": 208, "ǎ": 209, "г": 210, "ᅲ": 211, "人": 212, "家": 213, "空": 214, "ġ": 215, "ī": 216, "ł": 217, "ū": 218, "ɪ": 219, "ς": 220, "ц": 221, "子": 222, "礼": 223, "精": 224, "自": 225, "花": 226 }, "unk_token": "[UNK]" } }