{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Lowercase" }, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "<|endoftext|>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "<|endoftext|>": { "id": "<|endoftext|>", "ids": [ 1 ], "tokens": [ "<|endoftext|>" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "<|endoftext|>": 1, "": 2, "": 3, "!": 4, "\"": 5, "#": 6, "$": 7, "%": 8, "&": 9, "'": 10, "(": 11, ")": 12, "*": 13, "+": 14, ",": 15, "-": 16, ".": 17, "/": 18, "0": 19, "1": 20, "2": 21, "3": 22, "4": 23, "5": 24, "6": 25, "7": 26, "8": 27, "9": 28, ":": 29, ";": 30, "<": 31, "=": 32, ">": 33, "?": 34, "@": 35, "[": 36, "\\": 37, "]": 38, "^": 39, "_": 40, "`": 41, "a": 42, "b": 43, "c": 44, "d": 45, "e": 46, "f": 47, "g": 48, "h": 49, "i": 50, "j": 51, "k": 52, "l": 53, "m": 54, "n": 55, "o": 56, "p": 57, "q": 58, "r": 59, "s": 60, "t": 61, "u": 62, "v": 63, "w": 64, "x": 65, "y": 66, "z": 67, "{": 68, "|": 69, "}": 70, "~": 71, "¡": 72, "¢": 73, "£": 74, "¤": 75, "¥": 76, "¦": 77, "§": 78, "¨": 79, "©": 80, "ª": 81, "«": 82, "¬": 83, "®": 84, "¯": 85, "°": 86, "±": 87, "²": 88, "³": 89, "´": 90, "µ": 91, "¶": 92, "·": 93, "¸": 94, "¹": 95, "º": 96, "»": 97, "¼": 98, "½": 99, "¾": 100, "¿": 101, "Â": 102, "Ã": 103, "Ä": 104, "Å": 105, "Æ": 106, "Ç": 107, "È": 108, "É": 109, "Ê": 110, "Ì": 111, "Î": 112, "Ï": 113, "Ð": 114, "Ñ": 115, "Ò": 116, "Ó": 117, "Ö": 118, "×": 119, "Ø": 120, "Ù": 121, "Ú": 122, "Û": 123, "Þ": 124, "à": 125, "á": 126, "â": 127, "ã": 128, "ä": 129, "å": 130, "æ": 131, "ç": 132, "è": 133, "é": 134, "ê": 135, "ë": 136, "ì": 137, "í": 138, "î": 139, "ï": 140, "ð": 141, "Ċ": 142, "Ġ": 143, "Ģ": 144, "ģ": 145, "Ĥ": 146, "ĥ": 147, "Ħ": 148, "ħ": 149, "Ĩ": 150, "ĩ": 151, "Ī": 152, "ī": 153, "Ĭ": 154, "ĭ": 155, "Į": 156, "į": 157, "İ": 158, "ı": 159, "IJ": 160, "ij": 161, "Ĵ": 162, "ĵ": 163, "Ķ": 164, "ķ": 165, "ĸ": 166, "Ĺ": 167, "ĺ": 168, "Ļ": 169, "ļ": 170, "Ľ": 171, "ľ": 172, "Ŀ": 173, "ŀ": 174, "Ł": 175, "ł": 176, "Ń": 177, "Ġt": 178, "Ġa": 179, "Ġth": 180, "Ġi": 181, "Ġw": 182, "ou": 183, "Ġs": 184, "er": 185, "Ġthe": 186, "in": 187, "Ġh": 188, "Ġo": 189, "re": 190, "at": 191, "Ġy": 192, "Ġm": 193, "Ġc": 194, "Ġb": 195, "nd": 196, "Ġyou": 197, "Ġd": 198, "Ġf": 199 }, "merges": [ [ "Ġ", "t" ], [ "Ġ", "a" ], [ "Ġt", "h" ], [ "Ġ", "i" ], [ "Ġ", "w" ], [ "o", "u" ], [ "Ġ", "s" ], [ "e", "r" ], [ "Ġth", "e" ], [ "i", "n" ], [ "Ġ", "h" ], [ "Ġ", "o" ], [ "r", "e" ], [ "a", "t" ], [ "Ġ", "y" ], [ "Ġ", "m" ], [ "Ġ", "c" ], [ "Ġ", "b" ], [ "n", "d" ], [ "Ġy", "ou" ], [ "Ġ", "d" ], [ "Ġ", "f" ] ] } }