{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "aha", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "wait", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 3, "content": "BoS", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "EoS", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "EoT", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "BoT", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "aha": 0, "wait": 1, "<|endoftext|>": 2, "BoS": 3, "EoS": 4, "UNK": 5, "PAD": 6, "EoT": 7, "BoT": 8, "!": 9, "\"": 10, "#": 11, "$": 12, "%": 13, "&": 14, "'": 15, "(": 16, ")": 17, "*": 18, "+": 19, ",": 20, "-": 21, ".": 22, "/": 23, "0": 24, "1": 25, "2": 26, "3": 27, "4": 28, "5": 29, "6": 30, "7": 31, "8": 32, "9": 33, ":": 34, ";": 35, "<": 36, "=": 37, ">": 38, "?": 39, "@": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48, "I": 49, "J": 50, "K": 51, "L": 52, "M": 53, "N": 54, "O": 55, "P": 56, "Q": 57, "R": 58, "S": 59, "T": 60, "U": 61, "V": 62, "W": 63, "X": 64, "Y": 65, "Z": 66, "[": 67, "\\": 68, "]": 69, "^": 70, "_": 71, "`": 72, "a": 73, "b": 74, "c": 75, "d": 76, "e": 77, "f": 78, "g": 79, "h": 80, "i": 81, "j": 82, "k": 83, "l": 84, "m": 85, "n": 86, "o": 87, "p": 88, "q": 89, "r": 90, "s": 91, "t": 92, "u": 93, "v": 94, "w": 95, "x": 96, "y": 97, "z": 98, "{": 99, "|": 100, "}": 101, "~": 102, "¡": 103, "¢": 104, "£": 105, "¤": 106, "¥": 107, "¦": 108, "§": 109, "¨": 110, "©": 111, "ª": 112, "«": 113, "¬": 114, "®": 115, "¯": 116, "°": 117, "±": 118, "²": 119, "³": 120, "´": 121, "µ": 122, "¶": 123, "·": 124, "¸": 125, "¹": 126, "º": 127, "»": 128, "¼": 129, "½": 130, "¾": 131, "¿": 132, "À": 133, "Á": 134, "Â": 135, "Ã": 136, "Ä": 137, "Å": 138, "Æ": 139, "Ç": 140, "È": 141, "É": 142, "Ê": 143, "Ë": 144, "Ì": 145, "Í": 146, "Î": 147, "Ï": 148, "Ð": 149, "Ñ": 150, "Ò": 151, "Ó": 152, "Ô": 153, "Õ": 154, "Ö": 155, "×": 156, "Ø": 157, "Ù": 158, "Ú": 159, "Û": 160, "Ü": 161, "Ý": 162, "Þ": 163, "ß": 164, "à": 165, "á": 166, "â": 167, "ã": 168, "ä": 169, "å": 170, "æ": 171, "ç": 172, "è": 173, "é": 174, "ê": 175, "ë": 176, "ì": 177, "í": 178, "î": 179, "ï": 180, "ð": 181, "ñ": 182, "ò": 183, "ó": 184, "ô": 185, "õ": 186, "ö": 187, "÷": 188, "ø": 189, "ù": 190, "ú": 191, "û": 192, "ü": 193, "ý": 194, "þ": 195, "ÿ": 196, "Ā": 197, "ā": 198, "Ă": 199, "ă": 200, "Ą": 201, "ą": 202, "Ć": 203, "ć": 204, "Ĉ": 205, "ĉ": 206, "Ċ": 207, "ċ": 208, "Č": 209, "č": 210, "Ď": 211, "ď": 212, "Đ": 213, "đ": 214, "Ē": 215, "ē": 216, "Ĕ": 217, "ĕ": 218, "Ė": 219, "ė": 220, "Ę": 221, "ę": 222, "Ě": 223, "ě": 224, "Ĝ": 225, "ĝ": 226, "Ğ": 227, "ğ": 228, "Ġ": 229, "ġ": 230, "Ģ": 231, "ģ": 232, "Ĥ": 233, "ĥ": 234, "Ħ": 235, "ħ": 236, "Ĩ": 237, "ĩ": 238, "Ī": 239, "ī": 240, "Ĭ": 241, "ĭ": 242, "Į": 243, "į": 244, "İ": 245, "ı": 246, "IJ": 247, "ij": 248, "Ĵ": 249, "ĵ": 250, "Ķ": 251, "ķ": 252, "ĸ": 253, "Ĺ": 254, "ĺ": 255, "Ļ": 256, "ļ": 257, "Ľ": 258, "ľ": 259, "Ŀ": 260, "ŀ": 261, "Ł": 262, "ł": 263, "Ń": 264, "Ġn": 265, "Ġ|": 266, "ah": 267, "Ġah": 268, "Ġaha": 269, "Ġ1": 270, "Ġ4": 271, "Ġ3": 272, "Ġ2": 273, "ai": 274, "wai": 275, "Ġwai": 276, "Ġwait": 277, "10": 278, "Ġ[": 279, "Ġ]": 280, "Ġl": 281, "11": 282, "12": 283, "Ġ6": 284, "Ġ7": 285, "Bo": 286, "Eo": 287, "ĠEo": 288, "Ġ5": 289, "Ġ8": 290, "13": 291, "Ġ9": 292, "Ġ10": 293, "ĠBo": 294, "ĠEoS": 295, "ĠEoT": 296, "ĠBoT": 297, "14": 298, "Ġ11": 299, "Ġ12": 300, "15": 301, "Ġ13": 302, "Ġ14": 303, "16": 304, "Ġ15": 305, "Ġ16": 306, "17": 307, "Ġ17": 308, "Ġ18": 309, "Ġ19": 310, "Ġ20": 311 }, "merges": [ [ "Ġ", "n" ], [ "Ġ", "|" ], [ "a", "h" ], [ "Ġ", "ah" ], [ "Ġah", "a" ], [ "Ġ", "1" ], [ "Ġ", "4" ], [ "Ġ", "3" ], [ "Ġ", "2" ], [ "a", "i" ], [ "w", "ai" ], [ "Ġ", "wai" ], [ "Ġwai", "t" ], [ "1", "0" ], [ "Ġ", "[" ], [ "Ġ", "]" ], [ "Ġ", "l" ], [ "1", "1" ], [ "1", "2" ], [ "Ġ", "6" ], [ "Ġ", "7" ], [ "B", "o" ], [ "E", "o" ], [ "Ġ", "Eo" ], [ "Ġ", "5" ], [ "Ġ", "8" ], [ "1", "3" ], [ "Ġ", "9" ], [ "Ġ1", "0" ], [ "Ġ", "Bo" ], [ "Bo", "S" ], [ "ĠEo", "S" ], [ "ĠEo", "T" ], [ "ĠBo", "T" ], [ "1", "4" ], [ "Ġ1", "1" ], [ "Ġ1", "2" ], [ "1", "5" ], [ "Ġ1", "3" ], [ "Ġ1", "4" ], [ "1", "6" ], [ "Ġ1", "5" ], [ "Ġ1", "6" ], [ "1", "7" ], [ "Ġ1", "7" ], [ "Ġ1", "8" ], [ "Ġ1", "9" ], [ "Ġ2", "0" ] ] } }