{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFKC" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "prepend_scheme": "always", "split": true }, "post_processor": null, "decoder": { "type": "Metaspace", "replacement": "▁", "prepend_scheme": "always", "split": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "!": 4, "\"": 5, "$": 6, "%": 7, "&": 8, "'": 9, "(": 10, ")": 11, "*": 12, "+": 13, ",": 14, "-": 15, ".": 16, "/": 17, "0": 18, "1": 19, "2": 20, "3": 21, "4": 22, "5": 23, "6": 24, "7": 25, "8": 26, "9": 27, ":": 28, ";": 29, "<": 30, "=": 31, ">": 32, "?": 33, "@": 34, "[": 35, "\\": 36, "]": 37, "^": 38, "_": 39, "`": 40, "a": 41, "b": 42, "c": 43, "d": 44, "e": 45, "f": 46, "g": 47, "h": 48, "i": 49, "j": 50, "k": 51, "l": 52, "m": 53, "n": 54, "o": 55, "p": 56, "q": 57, "r": 58, "s": 59, "t": 60, "u": 61, "v": 62, "w": 63, "x": 64, "y": 65, "z": 66, "{": 67, "|": 68, "}": 69, "~": 70, "▁": 71, "▁t": 72, "he": 73, "▁a": 74, "in": 75, "▁the": 76, "er": 77, "▁s": 78, "▁o": 79, "▁w": 80, "re": 81, "▁c": 82, "an": 83, "is": 84, "on": 85, "▁b": 86, "ed": 87, "▁f": 88, "▁m": 89, "or": 90, "▁p": 91, "▁in": 92, "it": 93, "en": 94, "at": 95, "ar": 96, "es": 97, "▁an": 98, "▁of": 99, "al": 100, "as": 101, "▁and": 102, "▁d": 103, "▁h": 104, "ic": 105, "ro": 106, "▁l": 107, "ing": 108, "▁to": 109, "▁n": 110, "ou": 111, "▁is": 112, "le": 113, "ion": 114, "▁g": 115, "▁1": 116, "▁was": 117, "am": 118, "il": 119, "▁he": 120, "▁th": 121, "us": 122, "▁e": 123, "ent": 124, "om": 125, "▁be": 126, "▁re": 127, "un": 128, "st": 129, "▁on": 130, "ad": 131, "ol": 132, "▁st": 133, "▁\"": 134, "▁it": 135, "el": 136, "ct": 137, "▁2": 138, "▁for": 139, "ir": 140, "ot": 141, "id": 142, "ur": 143, "ay": 144, "▁(": 145, "et": 146, "▁19": 147, "ver": 148, "ch": 149, "im": 150, "ow": 151, "ly": 152, "ve": 153, "▁al": 154, "ce": 155, "▁j": 156, "▁as": 157, "ig": 158, "▁k": 159, "s.": 160, "ut": 161, "▁r": 162, "▁20": 163, "ter": 164, "ation": 165, "▁mo": 166, "rom": 167, "▁ch": 168, "and": 169, "her": 170, "th": 171, "▁are": 172, "ith": 173, "op": 174, "▁by": 175, "ul": 176, "▁wh": 177, "em": 178, "▁that": 179, "ist": 180, "▁with": 181, "▁at": 182, "ber": 183, "ag": 184, "▁se": 185, "▁from": 186, "s,": 187, "▁v": 188, "▁or": 189, "um": 190, "▁de": 191, "▁un": 192, "ri": 193, "ac": 194, "ain": 195, "all": 196, "est": 197, "os": 198, "art": 199, "▁pl": 200, "▁com": 201, "oun": 202, "▁con": 203, "ap": 204, "if": 205, "ak": 206, "ian": 207, "'s": 208, "ill": 209, "▁his": 210, "e.": 211, "ld": 212, "▁pro": 213, "ies": 214, "▁ne": 215, "iv": 216, "ia": 217, "up": 218, "od": 219, "▁y": 220, "ers": 221, "res": 222, "so": 223, "▁us": 224, "▁they": 225, "ity": 226, "ame": 227, "▁sp": 228, "▁wor": 229, "ant": 230, "▁sh": 231, "▁le": 232, "ud": 233, "ich": 234, "▁201": 235, "ab": 236, "se": 237, "ard": 238, "igh": 239, "ie": 240, "ort": 241, "▁this": 242, "ere": 243, "▁also": 244, "ate": 245, "▁ro": 246, "mer": 247, "ug": 248, "qu": 249, "ary": 250, "ra": 251, "oc": 252, "orn": 253, "ong": 254, "▁were": 255, "▁she": 256, "▁has": 257, "▁ha": 258, "ip": 259, "▁ar": 260, "▁en": 261, "▁man": 262, "▁not": 263, "ub": 264, "▁ex": 265, "ish": 266, "ther": 267, "▁200": 268, "▁whe": 269, "▁bec": 270, "▁pe": 271, "ast": 272, "▁can": 273, "ive": 274, "e,": 275, "ang": 276, "ust": 277, "▁af": 278, "ment": 279, "▁ab": 280, "ess": 281, "ell": 282, "own": 283, "▁play": 284, "der": 285, "▁te": 286, "ated": 287, "ire": 288, "es.": 289, "00": 290, "ore": 291, "ear": 292, "▁which": 293, "rit": 294, "▁fir": 295, "ome": 296, "our": 297, "ure": 298, "▁mar": 299, "ight": 300, "▁first": 301, "y.": 302, "ally": 303, "ican": 304, "▁part": 305, "▁have": 306, "▁one": 307, "▁cl": 308, "\".": 309, "▁peop": 310, "out": 311, "▁people": 312, "ok": 313, "▁tr": 314, "▁go": 315, "ack": 316, "uc": 317, "▁comp": 318, "▁other": 319, "▁kn": 320, "▁had": 321, "ect": 322, "ren": 323, "▁new": 324, "ount": 325, "▁ag": 326, "▁their": 327, "ik": 328, "▁her": 329, "og": 330, "▁but": 331, "cl": 332, "land": 333, "ial": 334, "pt": 335, "ical": 336, "▁after": 337, "▁ap": 338, "av": 339, "▁all": 340, "ater": 341, ").": 342, "man": 343, "▁amer": 344, "ine": 345, "▁there": 346, "▁tw": 347, "end": 348, "\",": 349, "▁mov": 350, "ous": 351, "▁sc": 352, "▁ser": 353, "ide": 354, "▁who": 355, "iver": 356, "ade": 357, "▁tim": 358, "▁year": 359, "▁many": 360, "▁call": 361, "es,": 362, "aus": 363, "a.": 364, "ry": 365, "▁me": 366, "per": 367, "to": 368, "▁when": 369, "ond": 370, "▁ad": 371, "▁most": 372, "are": 373, "ans": 374, "▁199": 375, "▁no": 376, "▁18": 377, "ction": 378, "▁its": 379, "ember": 380, "ord": 381, "),": 382, "amp": 383, "ass": 384, "▁co": 385, "ace": 386, "ould": 387, "▁known": 388, "ree": 389, "▁3": 390, "ited": 391, "▁col": 392, "iz": 393, "▁some": 394, "▁bo": 395, "▁called": 396, "ach": 397, "▁fe": 398, "ge": 399, "▁used": 400, "y,": 401, "fer": 402, "a,": 403, "▁about": 404, "▁american": 405, "the": 406, "ound": 407, "und": 408, "▁two": 409, "ec": 410, "▁stat": 411, "ational": 412, "▁ind": 413, "ied": 414, "▁pol": 415, "▁ver": 416, "▁so": 417, "uch": 418, "▁work": 419, "▁act": 420, "wn": 421, "port": 422, "▁ac": 423, "ice": 424, "▁cont": 425, "▁may": 426, "▁gro": 427, "gh": 428, "ict": 429, "▁mus": 430, "▁made": 431, "▁comm": 432, "▁world": 433, "age": 434, "ign": 435, "ue": 436, "ind": 437, "one": 438, "er.": 439, "▁per": 440, "ild": 441, "ugh": 442, "▁count": 443, "▁pres": 444, "ib": 445, "we": 446, "ten": 447, "uary": 448, "▁198": 449, "sh": 450, "▁jo": 451, "▁dis": 452, "pl": 453, "tern": 454, "ake": 455, "▁do": 456, "ult": 457, "▁pr": 458, "▁more": 459, "▁cent": 460, "▁fl": 461, "te": 462, "▁been": 463, "hed": 464, "clud": 465, "ased": 466, "ause": 467, "▁bl": 468, "▁united": 469, "vel": 470, "ition": 471, "aw": 472, "▁im": 473, "rop": 474, "▁up": 475, "orm": 476, "▁born": 477, "▁ele": 478, "mber": 479, "▁includ": 480, "ran": 481, "uring": 482, "ough": 483, "act": 484, "▁197": 485, "old": 486, "ence": 487, "▁par": 488, "▁ev": 489, "▁into": 490, "▁eng": 491, "ath": 492, "ick": 493, "▁car": 494, "ward": 495, "▁sy": 496, "▁we": 497, "▁bet": 498, "▁war": 499, "▁ph": 500, "les": 501, "▁rec": 502, "▁sou": 503, "ubl": 504, "ike": 505, "ident": 506, "▁out": 507, "▁over": 508, "ost": 509, "▁city": 510, "▁qu": 511 }, "merges": [ [ "▁", "t" ], [ "h", "e" ], [ "▁", "a" ], [ "i", "n" ], [ "▁t", "he" ], [ "e", "r" ], [ "▁", "s" ], [ "▁", "o" ], [ "▁", "w" ], [ "r", "e" ], [ "▁", "c" ], [ "a", "n" ], [ "i", "s" ], [ "o", "n" ], [ "▁", "b" ], [ "e", "d" ], [ "▁", "f" ], [ "▁", "m" ], [ "o", "r" ], [ "▁", "p" ], [ "▁", "in" ], [ "i", "t" ], [ "e", "n" ], [ "a", "t" ], [ "a", "r" ], [ "e", "s" ], [ "▁a", "n" ], [ "▁o", "f" ], [ "a", "l" ], [ "a", "s" ], [ "▁an", "d" ], [ "▁", "d" ], [ "▁", "h" ], [ "i", "c" ], [ "r", "o" ], [ "▁", "l" ], [ "in", "g" ], [ "▁t", "o" ], [ "▁", "n" ], [ "o", "u" ], [ "▁", "is" ], [ "l", "e" ], [ "i", "on" ], [ "▁", "g" ], [ "▁", "1" ], [ "▁w", "as" ], [ "a", "m" ], [ "i", "l" ], [ "▁", "he" ], [ "▁t", "h" ], [ "u", "s" ], [ "▁", "e" ], [ "en", "t" ], [ "o", "m" ], [ "▁b", "e" ], [ "▁", "re" ], [ "u", "n" ], [ "s", "t" ], [ "▁o", "n" ], [ "a", "d" ], [ "o", "l" ], [ "▁s", "t" ], [ "▁", "\"" ], [ "▁", "it" ], [ "e", "l" ], [ "c", "t" ], [ "▁", "2" ], [ "▁f", "or" ], [ "i", "r" ], [ "o", "t" ], [ "i", "d" ], [ "u", "r" ], [ "a", "y" ], [ "▁", "(" ], [ "e", "t" ], [ "▁1", "9" ], [ "v", "er" ], [ "c", "h" ], [ "i", "m" ], [ "o", "w" ], [ "l", "y" ], [ "v", "e" ], [ "▁a", "l" ], [ "c", "e" ], [ "▁", "j" ], [ "▁a", "s" ], [ "i", "g" ], [ "▁", "k" ], [ "s", "." ], [ "u", "t" ], [ "▁", "r" ], [ "▁2", "0" ], [ "t", "er" ], [ "at", "ion" ], [ "▁m", "o" ], [ "ro", "m" ], [ "▁c", "h" ], [ "an", "d" ], [ "he", "r" ], [ "t", "h" ], [ "▁a", "re" ], [ "it", "h" ], [ "o", "p" ], [ "▁b", "y" ], [ "u", "l" ], [ "▁w", "h" ], [ "e", "m" ], [ "▁th", "at" ], [ "is", "t" ], [ "▁w", "ith" ], [ "▁a", "t" ], [ "b", "er" ], [ "a", "g" ], [ "▁s", "e" ], [ "▁f", "rom" ], [ "s", "," ], [ "▁", "v" ], [ "▁o", "r" ], [ "u", "m" ], [ "▁d", "e" ], [ "▁", "un" ], [ "r", "i" ], [ "a", "c" ], [ "a", "in" ], [ "al", "l" ], [ "es", "t" ], [ "o", "s" ], [ "ar", "t" ], [ "▁p", "l" ], [ "▁c", "om" ], [ "ou", "n" ], [ "▁c", "on" ], [ "a", "p" ], [ "i", "f" ], [ "a", "k" ], [ "i", "an" ], [ "'", "s" ], [ "il", "l" ], [ "▁h", "is" ], [ "e", "." ], [ "l", "d" ], [ "▁p", "ro" ], [ "i", "es" ], [ "▁n", "e" ], [ "i", "v" ], [ "i", "a" ], [ "u", "p" ], [ "o", "d" ], [ "▁", "y" ], [ "er", "s" ], [ "re", "s" ], [ "s", "o" ], [ "▁", "us" ], [ "▁the", "y" ], [ "it", "y" ], [ "am", "e" ], [ "▁s", "p" ], [ "▁w", "or" ], [ "an", "t" ], [ "▁s", "h" ], [ "▁l", "e" ], [ "u", "d" ], [ "ic", "h" ], [ "▁20", "1" ], [ "a", "b" ], [ "s", "e" ], [ "ar", "d" ], [ "ig", "h" ], [ "i", "e" ], [ "or", "t" ], [ "▁th", "is" ], [ "er", "e" ], [ "▁al", "so" ], [ "at", "e" ], [ "▁", "ro" ], [ "m", "er" ], [ "u", "g" ], [ "q", "u" ], [ "ar", "y" ], [ "r", "a" ], [ "o", "c" ], [ "or", "n" ], [ "on", "g" ], [ "▁w", "ere" ], [ "▁s", "he" ], [ "▁h", "as" ], [ "▁h", "a" ], [ "i", "p" ], [ "▁a", "r" ], [ "▁", "en" ], [ "▁m", "an" ], [ "▁n", "ot" ], [ "u", "b" ], [ "▁e", "x" ], [ "is", "h" ], [ "t", "her" ], [ "▁20", "0" ], [ "▁w", "he" ], [ "▁be", "c" ], [ "▁p", "e" ], [ "as", "t" ], [ "▁c", "an" ], [ "i", "ve" ], [ "e", "," ], [ "an", "g" ], [ "us", "t" ], [ "▁a", "f" ], [ "m", "ent" ], [ "▁a", "b" ], [ "es", "s" ], [ "el", "l" ], [ "ow", "n" ], [ "▁pl", "ay" ], [ "d", "er" ], [ "▁t", "e" ], [ "at", "ed" ], [ "i", "re" ], [ "es", "." ], [ "0", "0" ], [ "o", "re" ], [ "e", "ar" ], [ "▁wh", "ich" ], [ "r", "it" ], [ "▁f", "ir" ], [ "om", "e" ], [ "ou", "r" ], [ "u", "re" ], [ "▁m", "ar" ], [ "igh", "t" ], [ "▁fir", "st" ], [ "y", "." ], [ "al", "ly" ], [ "ic", "an" ], [ "▁p", "art" ], [ "▁ha", "ve" ], [ "▁on", "e" ], [ "▁c", "l" ], [ "\"", "." ], [ "▁pe", "op" ], [ "ou", "t" ], [ "▁peop", "le" ], [ "o", "k" ], [ "▁t", "r" ], [ "▁g", "o" ], [ "ac", "k" ], [ "u", "c" ], [ "▁com", "p" ], [ "▁o", "ther" ], [ "▁k", "n" ], [ "▁h", "ad" ], [ "e", "ct" ], [ "re", "n" ], [ "▁ne", "w" ], [ "oun", "t" ], [ "▁a", "g" ], [ "▁the", "ir" ], [ "i", "k" ], [ "▁he", "r" ], [ "o", "g" ], [ "▁b", "ut" ], [ "c", "l" ], [ "l", "and" ], [ "i", "al" ], [ "p", "t" ], [ "ic", "al" ], [ "▁af", "ter" ], [ "▁a", "p" ], [ "a", "v" ], [ "▁al", "l" ], [ "at", "er" ], [ ")", "." ], [ "m", "an" ], [ "▁a", "mer" ], [ "in", "e" ], [ "▁the", "re" ], [ "▁t", "w" ], [ "en", "d" ], [ "\"", "," ], [ "▁mo", "v" ], [ "ou", "s" ], [ "▁s", "c" ], [ "▁s", "er" ], [ "id", "e" ], [ "▁wh", "o" ], [ "i", "ver" ], [ "ad", "e" ], [ "▁t", "im" ], [ "▁y", "ear" ], [ "▁man", "y" ], [ "▁c", "all" ], [ "es", "," ], [ "a", "us" ], [ "a", "." ], [ "r", "y" ], [ "▁m", "e" ], [ "p", "er" ], [ "t", "o" ], [ "▁whe", "n" ], [ "on", "d" ], [ "▁a", "d" ], [ "▁mo", "st" ], [ "a", "re" ], [ "an", "s" ], [ "▁19", "9" ], [ "▁n", "o" ], [ "▁1", "8" ], [ "ct", "ion" ], [ "▁it", "s" ], [ "em", "ber" ], [ "or", "d" ], [ ")", "," ], [ "am", "p" ], [ "as", "s" ], [ "▁c", "o" ], [ "a", "ce" ], [ "ou", "ld" ], [ "▁kn", "own" ], [ "re", "e" ], [ "▁", "3" ], [ "it", "ed" ], [ "▁c", "ol" ], [ "i", "z" ], [ "▁s", "ome" ], [ "▁b", "o" ], [ "▁call", "ed" ], [ "a", "ch" ], [ "▁f", "e" ], [ "g", "e" ], [ "▁us", "ed" ], [ "y", "," ], [ "f", "er" ], [ "a", "," ], [ "▁ab", "out" ], [ "▁amer", "ican" ], [ "t", "he" ], [ "oun", "d" ], [ "un", "d" ], [ "▁tw", "o" ], [ "e", "c" ], [ "▁st", "at" ], [ "ation", "al" ], [ "▁in", "d" ], [ "i", "ed" ], [ "▁p", "ol" ], [ "▁", "ver" ], [ "▁s", "o" ], [ "u", "ch" ], [ "▁wor", "k" ], [ "▁a", "ct" ], [ "w", "n" ], [ "p", "ort" ], [ "▁a", "c" ], [ "ic", "e" ], [ "▁con", "t" ], [ "▁m", "ay" ], [ "▁g", "ro" ], [ "g", "h" ], [ "ic", "t" ], [ "▁m", "us" ], [ "▁m", "ade" ], [ "▁com", "m" ], [ "▁wor", "ld" ], [ "ag", "e" ], [ "ig", "n" ], [ "u", "e" ], [ "in", "d" ], [ "on", "e" ], [ "er", "." ], [ "▁p", "er" ], [ "il", "d" ], [ "ug", "h" ], [ "▁c", "ount" ], [ "▁p", "res" ], [ "i", "b" ], [ "w", "e" ], [ "t", "en" ], [ "u", "ary" ], [ "▁19", "8" ], [ "s", "h" ], [ "▁j", "o" ], [ "▁d", "is" ], [ "p", "l" ], [ "ter", "n" ], [ "ak", "e" ], [ "▁d", "o" ], [ "ul", "t" ], [ "▁p", "r" ], [ "▁mo", "re" ], [ "▁c", "ent" ], [ "▁f", "l" ], [ "t", "e" ], [ "▁be", "en" ], [ "he", "d" ], [ "cl", "ud" ], [ "as", "ed" ], [ "aus", "e" ], [ "▁b", "l" ], [ "▁un", "ited" ], [ "v", "el" ], [ "it", "ion" ], [ "a", "w" ], [ "▁", "im" ], [ "ro", "p" ], [ "▁", "up" ], [ "or", "m" ], [ "▁b", "orn" ], [ "▁e", "le" ], [ "m", "ber" ], [ "▁in", "clud" ], [ "r", "an" ], [ "ur", "ing" ], [ "ou", "gh" ], [ "a", "ct" ], [ "▁19", "7" ], [ "ol", "d" ], [ "en", "ce" ], [ "▁p", "ar" ], [ "▁e", "v" ], [ "▁in", "to" ], [ "▁en", "g" ], [ "at", "h" ], [ "ic", "k" ], [ "▁c", "ar" ], [ "w", "ard" ], [ "▁s", "y" ], [ "▁w", "e" ], [ "▁be", "t" ], [ "▁w", "ar" ], [ "▁p", "h" ], [ "l", "es" ], [ "▁re", "c" ], [ "▁s", "ou" ], [ "ub", "l" ], [ "ik", "e" ], [ "id", "ent" ], [ "▁o", "ut" ], [ "▁o", "ver" ], [ "o", "st" ], [ "▁c", "ity" ], [ "▁", "qu" ] ] } }