{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Lowercase" }, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "<|endoftext|>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "<|endoftext|>": { "id": "<|endoftext|>", "ids": [ 1 ], "tokens": [ "<|endoftext|>" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "<|endoftext|>": 1, "": 2, "": 3, "!": 4, "\"": 5, "#": 6, "$": 7, "%": 8, "&": 9, "'": 10, "(": 11, ")": 12, "*": 13, "+": 14, ",": 15, "-": 16, ".": 17, "/": 18, "0": 19, "1": 20, "2": 21, "3": 22, "4": 23, "5": 24, "6": 25, "7": 26, "8": 27, "9": 28, ":": 29, ";": 30, "<": 31, "=": 32, ">": 33, "?": 34, "@": 35, "[": 36, "\\": 37, "]": 38, "^": 39, "_": 40, "`": 41, "a": 42, "b": 43, "c": 44, "d": 45, "e": 46, "f": 47, "g": 48, "h": 49, "i": 50, "j": 51, "k": 52, "l": 53, "m": 54, "n": 55, "o": 56, "p": 57, "q": 58, "r": 59, "s": 60, "t": 61, "u": 62, "v": 63, "w": 64, "x": 65, "y": 66, "z": 67, "{": 68, "}": 69, "~": 70, "¡": 71, "¢": 72, "£": 73, "¤": 74, "¥": 75, "¦": 76, "§": 77, "¨": 78, "©": 79, "ª": 80, "«": 81, "¬": 82, "®": 83, "¯": 84, "°": 85, "±": 86, "²": 87, "³": 88, "´": 89, "µ": 90, "¶": 91, "·": 92, "¸": 93, "¹": 94, "º": 95, "»": 96, "¼": 97, "½": 98, "¾": 99, "¿": 100, "Â": 101, "Ã": 102, "Ä": 103, "Å": 104, "Æ": 105, "Ç": 106, "È": 107, "É": 108, "Ê": 109, "Ë": 110, "Ì": 111, "Î": 112, "Ï": 113, "Ð": 114, "Ñ": 115, "Ò": 116, "Ó": 117, "Õ": 118, "Ö": 119, "×": 120, "Ø": 121, "Ù": 122, "Ú": 123, "Û": 124, "à": 125, "á": 126, "â": 127, "ã": 128, "ä": 129, "å": 130, "æ": 131, "ç": 132, "è": 133, "é": 134, "ê": 135, "ë": 136, "ì": 137, "í": 138, "î": 139, "ï": 140, "ð": 141, "Ċ": 142, "Ġ": 143, "ġ": 144, "Ģ": 145, "ģ": 146, "Ĥ": 147, "ĥ": 148, "Ħ": 149, "ħ": 150, "Ĩ": 151, "ĩ": 152, "Ī": 153, "ī": 154, "Ĭ": 155, "ĭ": 156, "Į": 157, "į": 158, "İ": 159, "ı": 160, "IJ": 161, "ij": 162, "Ĵ": 163, "ĵ": 164, "Ķ": 165, "ķ": 166, "ĸ": 167, "Ĺ": 168, "ĺ": 169, "Ļ": 170, "ļ": 171, "Ľ": 172, "ľ": 173, "Ŀ": 174, "ŀ": 175, "Ł": 176, "ł": 177, "Ń": 178, "en": 179, "er": 180, "ch": 181, "Ġd": 182, "Ġs": 183, "in": 184, "ie": 185, "Ġw": 186, "ich": 187, "un": 188, "Ġa": 189, "st": 190, "Ġm": 191, "ein": 192, "Ġh": 193, "Ġg": 194, "es": 195, "as": 196, "Ġb": 197, "Ġn": 198, "Ġi": 199 }, "merges": [ [ "e", "n" ], [ "e", "r" ], [ "c", "h" ], [ "Ġ", "d" ], [ "Ġ", "s" ], [ "i", "n" ], [ "i", "e" ], [ "Ġ", "w" ], [ "i", "ch" ], [ "u", "n" ], [ "Ġ", "a" ], [ "s", "t" ], [ "Ġ", "m" ], [ "e", "in" ], [ "Ġ", "h" ], [ "Ġ", "g" ], [ "e", "s" ], [ "a", "s" ], [ "Ġ", "b" ], [ "Ġ", "n" ], [ "Ġ", "i" ] ] } }