{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "!": 5, "\"": 6, "#": 7, "$": 8, "%": 9, "&": 10, "'": 11, "(": 12, ")": 13, "*": 14, "+": 15, ",": 16, "-": 17, ".": 18, "/": 19, "0": 20, "1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26, "7": 27, "8": 28, "9": 29, ":": 30, ";": 31, "<": 32, "=": 33, ">": 34, "?": 35, "@": 36, "A": 37, "B": 38, "C": 39, "D": 40, "E": 41, "F": 42, "G": 43, "H": 44, "I": 45, "J": 46, "K": 47, "L": 48, "M": 49, "N": 50, "O": 51, "P": 52, "Q": 53, "R": 54, "S": 55, "T": 56, "U": 57, "V": 58, "W": 59, "X": 60, "Y": 61, "Z": 62, "[": 63, "\\": 64, "]": 65, "^": 66, "_": 67, "`": 68, "a": 69, "b": 70, "c": 71, "d": 72, "e": 73, "f": 74, "g": 75, "h": 76, "i": 77, "j": 78, "k": 79, "l": 80, "m": 81, "n": 82, "o": 83, "p": 84, "q": 85, "r": 86, "s": 87, "t": 88, "u": 89, "v": 90, "w": 91, "x": 92, "y": 93, "z": 94, "{": 95, "|": 96, "}": 97, "~": 98, "¡": 99, "¢": 100, "£": 101, "¤": 102, "¥": 103, "¦": 104, "§": 105, "¨": 106, "©": 107, "ª": 108, "«": 109, "¬": 110, "®": 111, "¯": 112, "°": 113, "±": 114, "²": 115, "³": 116, "´": 117, "µ": 118, "¶": 119, "·": 120, "¸": 121, "¹": 122, "º": 123, "»": 124, "¼": 125, "½": 126, "¾": 127, "¿": 128, "À": 129, "Á": 130, "Â": 131, "Ã": 132, "Ä": 133, "Å": 134, "Æ": 135, "Ç": 136, "È": 137, "É": 138, "Ê": 139, "Ë": 140, "Ì": 141, "Í": 142, "Î": 143, "Ï": 144, "Ð": 145, "Ñ": 146, "Ò": 147, "Ó": 148, "Ô": 149, "Õ": 150, "Ö": 151, "×": 152, "Ø": 153, "Ù": 154, "Ú": 155, "Û": 156, "Ü": 157, "Ý": 158, "Þ": 159, "ß": 160, "à": 161, "á": 162, "â": 163, "ã": 164, "ä": 165, "å": 166, "æ": 167, "ç": 168, "è": 169, "é": 170, "ê": 171, "ë": 172, "ì": 173, "í": 174, "î": 175, "ï": 176, "ð": 177, "ñ": 178, "ò": 179, "ó": 180, "ô": 181, "õ": 182, "ö": 183, "÷": 184, "ø": 185, "ù": 186, "ú": 187, "û": 188, "ü": 189, "ý": 190, "þ": 191, "ÿ": 192, "Ā": 193, "ā": 194, "Ă": 195, "ă": 196, "Ą": 197, "ą": 198, "Ć": 199, "ć": 200, "Ĉ": 201, "ĉ": 202, "Ċ": 203, "ċ": 204, "Č": 205, "č": 206, "Ď": 207, "ď": 208, "Đ": 209, "đ": 210, "Ē": 211, "ē": 212, "Ĕ": 213, "ĕ": 214, "Ė": 215, "ė": 216, "Ę": 217, "ę": 218, "Ě": 219, "ě": 220, "Ĝ": 221, "ĝ": 222, "Ğ": 223, "ğ": 224, "Ġ": 225, "ġ": 226, "Ģ": 227, "ģ": 228, "Ĥ": 229, "ĥ": 230, "Ħ": 231, "ħ": 232, "Ĩ": 233, "ĩ": 234, "Ī": 235, "ī": 236, "Ĭ": 237, "ĭ": 238, "Į": 239, "į": 240, "İ": 241, "ı": 242, "IJ": 243, "ij": 244, "Ĵ": 245, "ĵ": 246, "Ķ": 247, "ķ": 248, "ĸ": 249, "Ĺ": 250, "ĺ": 251, "Ļ": 252, "ļ": 253, "Ľ": 254, "ľ": 255, "Ŀ": 256, "ŀ": 257, "Ł": 258, "ł": 259, "Ń": 260, "Ġt": 261, "Ġa": 262, "he": 263, "in": 264, "re": 265, "Ġthe": 266, "on": 267, "er": 268, "Ġo": 269, "at": 270, "Ġs": 271, "en": 272, "Ġc": 273, "Ġw": 274, "es": 275, "is": 276, "nd": 277, "it": 278, "or": 279, "Ġp": 280, "al": 281, "ed": 282, "Ġb": 283, "an": 284, "Ġof": 285, "Ġf": 286, "ing": 287, "Ġin": 288, "ar": 289, "ou": 290, "Ġm": 291, "Ġand": 292, "Ġto": 293, "ic": 294, "Ġd": 295, "ion": 296, "le": 297, "ro": 298, "as": 299, "Ġh": 300, "Ġth": 301, "ent": 302, "ct": 303, "Ġe": 304, "il": 305, "Ġre": 306, "om": 307, "ve": 308, "Ġl": 309, "st": 310, "Ġn": 311, "ly": 312, "âĢ": 313, "Ġbe": 314, "Ġis": 315, "ĠT": 316, "se": 317, "ol": 318, "Ġg": 319, "ation": 320, "im": 321, "id": 322, "et": 323, "ut": 324, "ur": 325, "ce": 326, "ot": 327, "ra": 328, "ow": 329, "ch": 330, "ĠA": 331, "Ġfor": 332, "ig": 333, "ĠS": 334, "Ġthat": 335, "Ġu": 336, "Ġon": 337, "Ġst": 338, "ver": 339, "ĠC": 340, "ĠI": 341, "ir": 342, "Ġy": 343, "ul": 344, "ay": 345, "am": 346, "Ġas": 347, "ith": 348, "el": 349, "ad": 350, "her": 351, "Ġpro": 352, "Ġare": 353, "Ġcon": 354, "Ġan": 355, "Ġwith": 356, "Ġwh": 357, "if": 358, "Ġit": 359, "Ġor": 360, "Ġal": 361, "Ġyou": 362, "ter": 363, "od": 364, "ĠM": 365, "Ġ(": 366, "ĠThe": 367, "ment": 368, "ge": 369, "ĠP": 370, "Ġ1": 371, "th": 372, "ate": 373, "op": 374, "ist": 375, "ers": 376, "Ġde": 377, "ies": 378, "ab": 379, "ill": 380, "Ġhe": 381, "ess": 382, "ĠB": 383, "Ġex": 384, "us": 385, "Ġwe": 386, "ect": 387, "ore": 388, "Ġsu": 389, "Ġcom": 390, "âĢĻ": 391, "ity": 392, "est": 393, "ive": 394, "res": 395, "Ġha": 396, "ac": 397, "rom": 398, "ld": 399, "ĠH": 400, "os": 401, "um": 402, "Ġv": 403, "ke": 404, "Ġby": 405, "em": 406, "ant": 407, "iv": 408, "qu": 409, "ain": 410, "Ġat": 411, "nt": 412, "ĠW": 413, "pp": 414, "Ġwas": 415, "ĠD": 416, "igh": 417, "ud": 418, "ĠE": 419, "Ġfrom": 420, "Ġcan": 421, "and": 422, "oc": 423, "Ġch": 424, "Ġr": 425, "ĠR": 426, "ort": 427, "Ġne": 428, "ĠF": 429, "Ġse": 430, "ial": 431, "Ġ2": 432, "ure": 433, "ĠâĢ": 434, "Ġnot": 435, "un": 436, "art": 437, "ĠN": 438, "Ġle": 439, "ical": 440, "ĠG": 441, "Ġhave": 442, "ri": 443, "ĠL": 444, "Ġsh": 445, "og": 446, "Ġen": 447, "Ġthis": 448, "Ġsp": 449, "00": 450, "pt": 451, "ight": 452, "rou": 453, "Ġab": 454, "red": 455, "ich": 456, "all": 457, "Ġwor": 458, "ther": 459, "ould": 460, "The": 461, "gh": 462, "ions": 463, "our": 464, "ome": 465, "ost": 466, "du": 467, "ated": 468, "Ġim": 469, "Ġus": 470, "ine": 471, "per": 472, "out": 473, "ard": 474, "Ġpl": 475, "ag": 476, "Ġtheir": 477, "ide": 478, "ell": 479, "ff": 480, "act": 481, "ans": 482, "ĠO": 483, "Ġint": 484, "ast": 485, "pl": 486, "ear": 487, "age": 488, "ust": 489, "ĠJ": 490, "Ġwhich": 491, "ak": 492, "Ġk": 493, "ous": 494, "Ġwhe": 495, "ec": 496, "Ġthey": 497, "ĠU": 498, "iz": 499 }, "merges": [ [ "Ġ", "t" ], [ "Ġ", "a" ], [ "h", "e" ], [ "i", "n" ], [ "r", "e" ], [ "Ġt", "he" ], [ "o", "n" ], [ "e", "r" ], [ "Ġ", "o" ], [ "a", "t" ], [ "Ġ", "s" ], [ "e", "n" ], [ "Ġ", "c" ], [ "Ġ", "w" ], [ "e", "s" ], [ "i", "s" ], [ "n", "d" ], [ "i", "t" ], [ "o", "r" ], [ "Ġ", "p" ], [ "a", "l" ], [ "e", "d" ], [ "Ġ", "b" ], [ "a", "n" ], [ "Ġo", "f" ], [ "Ġ", "f" ], [ "in", "g" ], [ "Ġ", "in" ], [ "a", "r" ], [ "o", "u" ], [ "Ġ", "m" ], [ "Ġa", "nd" ], [ "Ġt", "o" ], [ "i", "c" ], [ "Ġ", "d" ], [ "i", "on" ], [ "l", "e" ], [ "r", "o" ], [ "a", "s" ], [ "Ġ", "h" ], [ "Ġt", "h" ], [ "en", "t" ], [ "c", "t" ], [ "Ġ", "e" ], [ "i", "l" ], [ "Ġ", "re" ], [ "o", "m" ], [ "v", "e" ], [ "Ġ", "l" ], [ "s", "t" ], [ "Ġ", "n" ], [ "l", "y" ], [ "â", "Ģ" ], [ "Ġb", "e" ], [ "Ġ", "is" ], [ "Ġ", "T" ], [ "s", "e" ], [ "o", "l" ], [ "Ġ", "g" ], [ "at", "ion" ], [ "i", "m" ], [ "i", "d" ], [ "e", "t" ], [ "u", "t" ], [ "u", "r" ], [ "c", "e" ], [ "o", "t" ], [ "r", "a" ], [ "o", "w" ], [ "c", "h" ], [ "Ġ", "A" ], [ "Ġf", "or" ], [ "i", "g" ], [ "Ġ", "S" ], [ "Ġth", "at" ], [ "Ġ", "u" ], [ "Ġ", "on" ], [ "Ġs", "t" ], [ "v", "er" ], [ "Ġ", "C" ], [ "Ġ", "I" ], [ "i", "r" ], [ "Ġ", "y" ], [ "u", "l" ], [ "a", "y" ], [ "a", "m" ], [ "Ġa", "s" ], [ "it", "h" ], [ "e", "l" ], [ "a", "d" ], [ "he", "r" ], [ "Ġp", "ro" ], [ "Ġa", "re" ], [ "Ġc", "on" ], [ "Ġa", "n" ], [ "Ġw", "ith" ], [ "Ġw", "h" ], [ "i", "f" ], [ "Ġ", "it" ], [ "Ġo", "r" ], [ "Ġa", "l" ], [ "Ġy", "ou" ], [ "t", "er" ], [ "o", "d" ], [ "Ġ", "M" ], [ "Ġ", "(" ], [ "ĠT", "he" ], [ "m", "ent" ], [ "g", "e" ], [ "Ġ", "P" ], [ "Ġ", "1" ], [ "t", "h" ], [ "at", "e" ], [ "o", "p" ], [ "is", "t" ], [ "er", "s" ], [ "Ġd", "e" ], [ "i", "es" ], [ "a", "b" ], [ "il", "l" ], [ "Ġ", "he" ], [ "es", "s" ], [ "Ġ", "B" ], [ "Ġe", "x" ], [ "u", "s" ], [ "Ġw", "e" ], [ "e", "ct" ], [ "o", "re" ], [ "Ġs", "u" ], [ "Ġc", "om" ], [ "âĢ", "Ļ" ], [ "it", "y" ], [ "es", "t" ], [ "i", "ve" ], [ "re", "s" ], [ "Ġh", "a" ], [ "a", "c" ], [ "ro", "m" ], [ "l", "d" ], [ "Ġ", "H" ], [ "o", "s" ], [ "u", "m" ], [ "Ġ", "v" ], [ "k", "e" ], [ "Ġb", "y" ], [ "e", "m" ], [ "an", "t" ], [ "i", "v" ], [ "q", "u" ], [ "a", "in" ], [ "Ġa", "t" ], [ "n", "t" ], [ "Ġ", "W" ], [ "p", "p" ], [ "Ġw", "as" ], [ "Ġ", "D" ], [ "ig", "h" ], [ "u", "d" ], [ "Ġ", "E" ], [ "Ġf", "rom" ], [ "Ġc", "an" ], [ "a", "nd" ], [ "o", "c" ], [ "Ġc", "h" ], [ "Ġ", "r" ], [ "Ġ", "R" ], [ "or", "t" ], [ "Ġn", "e" ], [ "Ġ", "F" ], [ "Ġs", "e" ], [ "i", "al" ], [ "Ġ", "2" ], [ "u", "re" ], [ "Ġ", "âĢ" ], [ "Ġn", "ot" ], [ "u", "n" ], [ "ar", "t" ], [ "Ġ", "N" ], [ "Ġ", "le" ], [ "ic", "al" ], [ "Ġ", "G" ], [ "Ġha", "ve" ], [ "r", "i" ], [ "Ġ", "L" ], [ "Ġs", "h" ], [ "o", "g" ], [ "Ġ", "en" ], [ "Ġth", "is" ], [ "Ġs", "p" ], [ "0", "0" ], [ "p", "t" ], [ "igh", "t" ], [ "r", "ou" ], [ "Ġa", "b" ], [ "re", "d" ], [ "ic", "h" ], [ "al", "l" ], [ "Ġw", "or" ], [ "t", "her" ], [ "ou", "ld" ], [ "T", "he" ], [ "g", "h" ], [ "ion", "s" ], [ "ou", "r" ], [ "om", "e" ], [ "o", "st" ], [ "d", "u" ], [ "at", "ed" ], [ "Ġ", "im" ], [ "Ġu", "s" ], [ "in", "e" ], [ "p", "er" ], [ "ou", "t" ], [ "ar", "d" ], [ "Ġp", "l" ], [ "a", "g" ], [ "Ġthe", "ir" ], [ "id", "e" ], [ "el", "l" ], [ "f", "f" ], [ "a", "ct" ], [ "an", "s" ], [ "Ġ", "O" ], [ "Ġin", "t" ], [ "as", "t" ], [ "p", "l" ], [ "e", "ar" ], [ "a", "ge" ], [ "u", "st" ], [ "Ġ", "J" ], [ "Ġwh", "ich" ], [ "a", "k" ], [ "Ġ", "k" ], [ "ou", "s" ], [ "Ġw", "he" ], [ "e", "c" ], [ "Ġthe", "y" ], [ "Ġ", "U" ], [ "i", "z" ] ] } }