{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Lowercase" }, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "<|endoftext|>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "<|endoftext|>": { "id": "<|endoftext|>", "ids": [ 1 ], "tokens": [ "<|endoftext|>" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "<|endoftext|>": 1, "": 2, "": 3, "!": 4, "\"": 5, "#": 6, "$": 7, "%": 8, "&": 9, "'": 10, "(": 11, ")": 12, "*": 13, "+": 14, ",": 15, "-": 16, ".": 17, "/": 18, "0": 19, "1": 20, "2": 21, "3": 22, "4": 23, "5": 24, "6": 25, "7": 26, "8": 27, "9": 28, ":": 29, ";": 30, "<": 31, "=": 32, ">": 33, "?": 34, "@": 35, "[": 36, "\\": 37, "]": 38, "^": 39, "_": 40, "`": 41, "a": 42, "b": 43, "c": 44, "d": 45, "e": 46, "f": 47, "g": 48, "h": 49, "i": 50, "j": 51, "k": 52, "l": 53, "m": 54, "n": 55, "o": 56, "p": 57, "q": 58, "r": 59, "s": 60, "t": 61, "u": 62, "v": 63, "w": 64, "x": 65, "y": 66, "z": 67, "{": 68, "|": 69, "}": 70, "~": 71, "¡": 72, "¢": 73, "£": 74, "¤": 75, "¥": 76, "¦": 77, "§": 78, "¨": 79, "©": 80, "ª": 81, "«": 82, "¬": 83, "®": 84, "¯": 85, "°": 86, "±": 87, "²": 88, "³": 89, "´": 90, "µ": 91, "¶": 92, "·": 93, "¸": 94, "¹": 95, "º": 96, "»": 97, "¼": 98, "½": 99, "¾": 100, "¿": 101, "Â": 102, "Ã": 103, "Ä": 104, "Å": 105, "Æ": 106, "Ç": 107, "È": 108, "É": 109, "Ê": 110, "Ì": 111, "Î": 112, "Ï": 113, "Ð": 114, "Ñ": 115, "Ò": 116, "×": 117, "Ø": 118, "Ù": 119, "Ú": 120, "Û": 121, "Ü": 122, "à": 123, "á": 124, "â": 125, "ã": 126, "ä": 127, "å": 128, "æ": 129, "ç": 130, "è": 131, "é": 132, "ê": 133, "ë": 134, "ì": 135, "í": 136, "î": 137, "ï": 138, "ð": 139, "Ċ": 140, "Ġ": 141, "Ģ": 142, "ģ": 143, "Ĥ": 144, "ĥ": 145, "Ħ": 146, "ħ": 147, "Ĩ": 148, "ĩ": 149, "Ī": 150, "ī": 151, "Ĭ": 152, "ĭ": 153, "Į": 154, "į": 155, "İ": 156, "ı": 157, "IJ": 158, "ij": 159, "Ĵ": 160, "ĵ": 161, "Ķ": 162, "ķ": 163, "ĸ": 164, "Ĺ": 165, "ĺ": 166, "Ļ": 167, "ļ": 168, "Ľ": 169, "ľ": 170, "Ŀ": 171, "ŀ": 172, "Ł": 173, "ł": 174, "Ń": 175, "ا": 176, "ÙĦ": 177, "ĠØ": 178, "ĠÙ": 179, "ÙĬ": 180, "ÙĨ": 181, "اÙĦ": 182, "Ùħ": 183, "ĠاÙĦ": 184, "ر": 185, "ت": 186, "ÙĪ": 187, "د": 188, "Ùĥ": 189, "ع": 190, "Ø©": 191, "ب": 192, "Ùĩ": 193, "ĠÙħ": 194, "ĠØ£": 195, "س": 196, "ÙĤ": 197, "ĠÙĦ": 198, "Ùģ": 199 }, "merges": [ [ "Ø", "§" ], [ "Ù", "Ħ" ], [ "Ġ", "Ø" ], [ "Ġ", "Ù" ], [ "Ù", "Ĭ" ], [ "Ù", "Ĩ" ], [ "ا", "ÙĦ" ], [ "Ù", "ħ" ], [ "Ġ", "اÙĦ" ], [ "Ø", "±" ], [ "Ø", "ª" ], [ "Ù", "Ī" ], [ "Ø", "¯" ], [ "Ù", "ĥ" ], [ "Ø", "¹" ], [ "Ø", "©" ], [ "Ø", "¨" ], [ "Ù", "ĩ" ], [ "ĠÙ", "ħ" ], [ "ĠØ", "£" ], [ "Ø", "³" ], [ "Ù", "Ĥ" ], [ "Ġ", "ÙĦ" ], [ "Ù", "ģ" ] ] } }