{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 512, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 512 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 1, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 9, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 10, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 11, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 12, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 13, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 14, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 15, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 16, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 17, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 18, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 19, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 20, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 21, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 22, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 23, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 24, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 25, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 26, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 27, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 28, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 29, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 30, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 31, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 32, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 33, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 34, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 35, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 36, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 37, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 38, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 39, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 40, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 41, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 42, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 43, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 44, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 45, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 46, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 47, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 48, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 49, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 50, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 51, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 52, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 53, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 54, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 55, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 56, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 57, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 58, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 59, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 60, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 61, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 62, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 63, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 64, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 65, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 66, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 67, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 68, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 69, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 70, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 71, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 72, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 73, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 74, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 75, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 76, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 77, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 78, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 79, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 80, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 81, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 82, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 83, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 84, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 85, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 86, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 87, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 88, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 89, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 90, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 91, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 92, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 93, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 94, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 95, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 96, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 97, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 98, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 99, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 100, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 101, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 102, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 103, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 104, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "": 5, "": 6, "": 7, "": 8, "": 9, "": 10, "": 11, "": 12, "": 13, "": 14, "": 15, "": 16, "": 17, "": 18, "": 19, "": 20, "": 21, "": 22, "": 23, "": 24, "": 25, "": 26, "": 27, "": 28, "": 29, "": 30, "": 31, "": 32, "": 33, "": 34, "": 35, "": 36, "": 37, "": 38, "": 39, "": 40, "": 41, "": 42, "": 43, "": 44, "": 45, "": 46, "": 47, "": 48, "": 49, "": 50, "": 51, "": 52, "": 53, "": 54, "": 55, "": 56, "": 57, "": 58, "": 59, "": 60, "": 61, "": 62, "": 63, "": 64, "": 65, "": 66, "": 67, "": 68, "": 69, "": 70, "": 71, "": 72, "": 73, "": 74, "": 75, "": 76, "": 77, "": 78, "": 79, "": 80, "": 81, "": 82, "": 83, "": 84, "": 85, "": 86, "": 87, "": 88, "": 89, "": 90, "": 91, "": 92, "": 93, "": 94, "": 95, "": 96, "": 97, "": 98, "": 99, "": 100, "": 101, "": 102, "": 103, "": 104, "!": 105, "\"": 106, "#": 107, "$": 108, "%": 109, "&": 110, "'": 111, "(": 112, ")": 113, "*": 114, "+": 115, ",": 116, "-": 117, ".": 118, "/": 119, "0": 120, "1": 121, "2": 122, "3": 123, "4": 124, "5": 125, "6": 126, "7": 127, "8": 128, "9": 129, ":": 130, ";": 131, "<": 132, "=": 133, ">": 134, "?": 135, "@": 136, "A": 137, "B": 138, "C": 139, "D": 140, "E": 141, "F": 142, "G": 143, "H": 144, "I": 145, "J": 146, "K": 147, "L": 148, "M": 149, "N": 150, "O": 151, "P": 152, "Q": 153, "R": 154, "S": 155, "T": 156, "U": 157, "V": 158, "W": 159, "X": 160, "Y": 161, "Z": 162, "[": 163, "\\": 164, "]": 165, "^": 166, "_": 167, "`": 168, "a": 169, "b": 170, "c": 171, "d": 172, "e": 173, "f": 174, "g": 175, "h": 176, "i": 177, "j": 178, "k": 179, "l": 180, "m": 181, "n": 182, "o": 183, "p": 184, "q": 185, "r": 186, "s": 187, "t": 188, "u": 189, "v": 190, "w": 191, "x": 192, "y": 193, "z": 194, "{": 195, "|": 196, "}": 197, "~": 198, "¡": 199, "¢": 200, "£": 201, "¤": 202, "¥": 203, "¦": 204, "§": 205, "¨": 206, "©": 207, "ª": 208, "«": 209, "¬": 210, "®": 211, "¯": 212, "°": 213, "±": 214, "²": 215, "³": 216, "´": 217, "µ": 218, "¶": 219, "·": 220, "¸": 221, "¹": 222, "º": 223, "»": 224, "¼": 225, "½": 226, "¾": 227, "¿": 228, "À": 229, "Á": 230, "Â": 231, "Ã": 232, "Ä": 233, "Å": 234, "Æ": 235, "Ç": 236, "È": 237, "É": 238, "Ê": 239, "Ë": 240, "Ì": 241, "Í": 242, "Î": 243, "Ï": 244, "Ð": 245, "Ñ": 246, "Ò": 247, "Ó": 248, "Ô": 249, "Õ": 250, "Ö": 251, "×": 252, "Ø": 253, "Ù": 254, "Ú": 255, "Û": 256, "Ü": 257, "Ý": 258, "Þ": 259, "ß": 260, "à": 261, "á": 262, "â": 263, "ã": 264, "ä": 265, "å": 266, "æ": 267, "ç": 268, "è": 269, "é": 270, "ê": 271, "ë": 272, "ì": 273, "í": 274, "î": 275, "ï": 276, "ð": 277, "ñ": 278, "ò": 279, "ó": 280, "ô": 281, "õ": 282, "ö": 283, "÷": 284, "ø": 285, "ù": 286, "ú": 287, "û": 288, "ü": 289, "ý": 290, "þ": 291, "ÿ": 292, "Ā": 293, "ā": 294, "Ă": 295, "ă": 296, "Ą": 297, "ą": 298, "Ć": 299, "ć": 300, "Ĉ": 301, "ĉ": 302, "Ċ": 303, "ċ": 304, "Č": 305, "č": 306, "Ď": 307, "ď": 308, "Đ": 309, "đ": 310, "Ē": 311, "ē": 312, "Ĕ": 313, "ĕ": 314, "Ė": 315, "ė": 316, "Ę": 317, "ę": 318, "Ě": 319, "ě": 320, "Ĝ": 321, "ĝ": 322, "Ğ": 323, "ğ": 324, "Ġ": 325, "ġ": 326, "Ģ": 327, "ģ": 328, "Ĥ": 329, "ĥ": 330, "Ħ": 331, "ħ": 332, "Ĩ": 333, "ĩ": 334, "Ī": 335, "ī": 336, "Ĭ": 337, "ĭ": 338, "Į": 339, "į": 340, "İ": 341, "ı": 342, "IJ": 343, "ij": 344, "Ĵ": 345, "ĵ": 346, "Ķ": 347, "ķ": 348, "ĸ": 349, "Ĺ": 350, "ĺ": 351, "Ļ": 352, "ļ": 353, "Ľ": 354, "ľ": 355, "Ŀ": 356, "ŀ": 357, "Ł": 358, "ł": 359, "Ń": 360, "à§": 361, "Ġà": 362, "Ġà¦": 363, "¦¾": 364, "া": 365, "à§ĩ": 366, "¦°": 367, "র": 368, "à§į": 369, "¦¿": 370, "ি": 371, "¦¨": 372, "ন": 373, "¦¯": 374, "য": 375, "¦¤": 376, "ত": 377, "¦ķ": 378, "à¦ķ": 379, "¦²": 380, "ল": 381, "¦¼": 382, "়": 383 }, "merges": [ [ "à", "§" ], [ "Ġ", "à" ], [ "Ġà", "¦" ], [ "¦", "¾" ], [ "à", "¦¾" ], [ "à§", "ĩ" ], [ "¦", "°" ], [ "à", "¦°" ], [ "à§", "į" ], [ "¦", "¿" ], [ "à", "¦¿" ], [ "¦", "¨" ], [ "à", "¦¨" ], [ "¦", "¯" ], [ "à", "¦¯" ], [ "¦", "¤" ], [ "à", "¦¤" ], [ "¦", "ķ" ], [ "à", "¦ķ" ], [ "¦", "²" ], [ "à", "¦²" ], [ "¦", "¼" ], [ "à", "¦¼" ] ] } }