{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFC" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[BOS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[EOS]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[BOS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[EOS]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[EOS]", "type_id": 1 } } ], "special_tokens": { "[BOS]": { "id": "[BOS]", "ids": [ 5 ], "tokens": [ "[BOS]" ] }, "[EOS]": { "id": "[EOS]", "ids": [ 6 ], "tokens": [ "[EOS]" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "[PAD]": 0, "[UNK]": 1, "[SEP]": 2, "[CLS]": 3, "[MASK]": 4, "[BOS]": 5, "[EOS]": 6, "!": 7, "\"": 8, "#": 9, "$": 10, "%": 11, "&": 12, "'": 13, "(": 14, ")": 15, "*": 16, "+": 17, ",": 18, "-": 19, ".": 20, "/": 21, "0": 22, "1": 23, "2": 24, "3": 25, "4": 26, "5": 27, "6": 28, "7": 29, "8": 30, "9": 31, ":": 32, ";": 33, "<": 34, "=": 35, ">": 36, "?": 37, "@": 38, "A": 39, "B": 40, "C": 41, "D": 42, "E": 43, "F": 44, "G": 45, "H": 46, "I": 47, "J": 48, "K": 49, "L": 50, "M": 51, "N": 52, "O": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60, "W": 61, "X": 62, "Y": 63, "Z": 64, "[": 65, "\\": 66, "]": 67, "^": 68, "_": 69, "`": 70, "a": 71, "b": 72, "c": 73, "d": 74, "e": 75, "f": 76, "g": 77, "h": 78, "i": 79, "j": 80, "k": 81, "l": 82, "m": 83, "n": 84, "o": 85, "p": 86, "q": 87, "r": 88, "s": 89, "t": 90, "u": 91, "v": 92, "w": 93, "x": 94, "y": 95, "z": 96, "{": 97, "|": 98, "}": 99, "~": 100, "¡": 101, "¢": 102, "£": 103, "¤": 104, "¥": 105, "¦": 106, "§": 107, "¨": 108, "©": 109, "ª": 110, "«": 111, "¬": 112, "®": 113, "¯": 114, "°": 115, "±": 116, "²": 117, "³": 118, "´": 119, "µ": 120, "¶": 121, "·": 122, "¸": 123, "¹": 124, "º": 125, "»": 126, "¼": 127, "½": 128, "¾": 129, "¿": 130, "À": 131, "Á": 132, "Â": 133, "Ã": 134, "Ä": 135, "Å": 136, "Æ": 137, "Ç": 138, "È": 139, "É": 140, "Ê": 141, "Ë": 142, "Ì": 143, "Í": 144, "Î": 145, "Ï": 146, "Ð": 147, "Ñ": 148, "Ò": 149, "Ó": 150, "Ô": 151, "Õ": 152, "Ö": 153, "×": 154, "Ø": 155, "Ù": 156, "Ú": 157, "Û": 158, "Ü": 159, "Ý": 160, "Þ": 161, "ß": 162, "à": 163, "á": 164, "â": 165, "ã": 166, "ä": 167, "å": 168, "æ": 169, "ç": 170, "è": 171, "é": 172, "ê": 173, "ë": 174, "ì": 175, "í": 176, "î": 177, "ï": 178, "ð": 179, "ñ": 180, "ò": 181, "ó": 182, "ô": 183, "õ": 184, "ö": 185, "÷": 186, "ø": 187, "ù": 188, "ú": 189, "û": 190, "ü": 191, "ý": 192, "þ": 193, "ÿ": 194, "Ā": 195, "ā": 196, "Ă": 197, "ă": 198, "Ą": 199, "ą": 200, "Ć": 201, "ć": 202, "Ĉ": 203, "ĉ": 204, "Ċ": 205, "ċ": 206, "Č": 207, "č": 208, "Ď": 209, "ď": 210, "Đ": 211, "đ": 212, "Ē": 213, "ē": 214, "Ĕ": 215, "ĕ": 216, "Ė": 217, "ė": 218, "Ę": 219, "ę": 220, "Ě": 221, "ě": 222, "Ĝ": 223, "ĝ": 224, "Ğ": 225, "ğ": 226, "Ġ": 227, "ġ": 228, "Ģ": 229, "ģ": 230, "Ĥ": 231, "ĥ": 232, "Ħ": 233, "ħ": 234, "Ĩ": 235, "ĩ": 236, "Ī": 237, "ī": 238, "Ĭ": 239, "ĭ": 240, "Į": 241, "į": 242, "İ": 243, "ı": 244, "IJ": 245, "ij": 246, "Ĵ": 247, "ĵ": 248, "Ķ": 249, "ķ": 250, "ĸ": 251, "Ĺ": 252, "ĺ": 253, "Ļ": 254, "ļ": 255, "Ľ": 256, "ľ": 257, "Ŀ": 258, "ŀ": 259, "Ł": 260, "ł": 261, "Ń": 262, "es": 263, "en": 264, "th": 265, "the": 266, "at": 267, "or": 268, "de": 269, "ates": 270, "ut": 271, "co": 272, "ra": 273, "di": 274, "Ġp": 275, "Ġt": 276, "ns": 277, "ent": 278, "st": 279, "Ġpr": 280, "Ġa": 281, "io": 282, "ts": 283, "Ġde": 284, "Ġdeco": 285, "li": 286, "wor": 287, "work": 288, "Ġf": 289, "ne": 290, "Ġpro": 291, "Ġg": 292, "des": 293, "Ġe": 294, "er": 295, "put": 296, "al": 297, "der": 298, "ces": 299, "for": 300, "rans": 301, "Ġtrans": 302, "form": 303, "Ġtransform": 304, "Ġs": 305, "Ġo": 306, "zes": 307, "ions": 308, "Ġen": 309, "Ġenco": 310, "re": 311, "Ġdecodes": 312, "ab": 313, "but": 314, "bab": 315, "ili": 316, "ibut": 317, "ribut": 318, "ty": 319, "Ġdi": 320, "stribut": 321, "Ġprobab": 322, "ility": 323, "Ġdistribut": 324, "Ġprobability": 325, "Ġdistributions": 326, "dd": 327, "hi": 328, "Ġst": 329, "Ġhi": 330, "dden": 331, "Ġstates": 332, "Ġhidden": 333, "ener": 334, "Ġgener": 335, "Ġgenerates": 336, "ework": 337, "mework": 338, "ramework": 339, "Ġframework": 340, "as": 341, "cl": 342, "fi": 343, "ifi": 344, "sifi": 345, "Ġcl": 346, "assifi": 347, "Ġclassifi": 348, "Ġclassifies": 349, "be": 350, "ddi": 351, "gs": 352, "mbe": 353, "ngs": 354, "Ġembe": 355, "ddings": 356, "Ġembeddings": 357, "twork": 358, "Ġne": 359, "Ġnetwork": 360, "eli": 361, "ip": 362, "Ġpip": 363, "eline": 364, "Ġpipeline": 365, "Ġdecoder": 366, "ct": 367, "eat": 368, "ect": 369, "ure": 370, "vect": 371, "Ġvect": 372, "ors": 373, "Ġfeat": 374, "Ġvectors": 375, "Ġfeature": 376, "an": 377, "gent": 378, "Ġagent": 379, "mo": 380, "Ġmo": 381, "del": 382, "Ġmodel": 383, "in": 384, "ken": 385, "oken": 386, "Ġin": 387, "Ġtoken": 388, "Ġinput": 389, "Ġtokens": 390, "gi": 391, "lo": 392, "Ġlo": 393, "utput": 394, "Ġoutput": 395, "gits": 396, "Ġlogits": 397, "uates": 398, "val": 399, "Ġeval": 400, "Ġtransforms": 401, "Ġevaluates": 402, "ses": 403, "Ġproces": 404, "Ġprocesses": 405, "dates": 406, "pdates": 407, "updates": 408, "Ġupdates": 409, "radi": 410, "Ġgradi": 411, "Ġgradient": 412, "nal": 413, "yzes": 414, "Ġanal": 415, "Ġanalyzes": 416, "eq": 417, "ex": 418, "uen": 419, "Ġtex": 420, "Ġseq": 421, "uences": 422, "Ġtext": 423, "Ġsequences": 424, "em": 425, "yst": 426, "Ġsyst": 427, "Ġsystem": 428, "Ġtransformer": 429, "Ġencodes": 430, "im": 431, "izes": 432, "pt": 433, "Ġopt": 434, "imizes": 435, "Ġoptimizes": 436, "Ġencoder": 437, "ei": 438, "gh": 439, "tent": 440, "wei": 441, "Ġat": 442, "Ġwei": 443, "ion": 444, "ghts": 445, "tention": 446, "Ġattention": 447, "Ġweights": 448, "gor": 449, "ith": 450, "lgor": 451, "Ġalgor": 452, "ithm": 453, "Ġalgorithm": 454, "cts": 455, "edi": 456, "Ġpredi": 457, "Ġpredicts": 458, "pr": 459, "Ġre": 460, "esent": 461, "ations": 462, "present": 463, "Ġrepresent": 464, "Ġrepresentations": 465 }, "merges": [ [ "e", "s" ], [ "e", "n" ], [ "t", "h" ], [ "th", "e" ], [ "a", "t" ], [ "o", "r" ], [ "d", "e" ], [ "at", "es" ], [ "u", "t" ], [ "c", "o" ], [ "r", "a" ], [ "d", "i" ], [ "Ġ", "p" ], [ "Ġ", "t" ], [ "n", "s" ], [ "en", "t" ], [ "s", "t" ], [ "Ġp", "r" ], [ "Ġ", "a" ], [ "i", "o" ], [ "t", "s" ], [ "Ġ", "de" ], [ "Ġde", "co" ], [ "l", "i" ], [ "w", "or" ], [ "wor", "k" ], [ "Ġ", "f" ], [ "n", "e" ], [ "Ġpr", "o" ], [ "Ġ", "g" ], [ "d", "es" ], [ "Ġ", "e" ], [ "e", "r" ], [ "p", "ut" ], [ "a", "l" ], [ "de", "r" ], [ "c", "es" ], [ "f", "or" ], [ "ra", "ns" ], [ "Ġt", "rans" ], [ "for", "m" ], [ "Ġtrans", "form" ], [ "Ġ", "s" ], [ "Ġ", "o" ], [ "z", "es" ], [ "io", "ns" ], [ "Ġ", "en" ], [ "Ġen", "co" ], [ "r", "e" ], [ "Ġdeco", "des" ], [ "a", "b" ], [ "b", "ut" ], [ "b", "ab" ], [ "i", "li" ], [ "i", "but" ], [ "r", "ibut" ], [ "t", "y" ], [ "Ġ", "di" ], [ "st", "ribut" ], [ "Ġpro", "bab" ], [ "ili", "ty" ], [ "Ġdi", "stribut" ], [ "Ġprobab", "ility" ], [ "Ġdistribut", "ions" ], [ "d", "d" ], [ "h", "i" ], [ "Ġ", "st" ], [ "Ġ", "hi" ], [ "dd", "en" ], [ "Ġst", "ates" ], [ "Ġhi", "dden" ], [ "en", "er" ], [ "Ġg", "ener" ], [ "Ġgener", "ates" ], [ "e", "work" ], [ "m", "ework" ], [ "ra", "mework" ], [ "Ġf", "ramework" ], [ "a", "s" ], [ "c", "l" ], [ "f", "i" ], [ "i", "fi" ], [ "s", "ifi" ], [ "Ġ", "cl" ], [ "as", "sifi" ], [ "Ġcl", "assifi" ], [ "Ġclassifi", "es" ], [ "b", "e" ], [ "d", "di" ], [ "g", "s" ], [ "m", "be" ], [ "n", "gs" ], [ "Ġe", "mbe" ], [ "ddi", "ngs" ], [ "Ġembe", "ddings" ], [ "t", "work" ], [ "Ġ", "ne" ], [ "Ġne", "twork" ], [ "e", "li" ], [ "i", "p" ], [ "Ġp", "ip" ], [ "eli", "ne" ], [ "Ġpip", "eline" ], [ "Ġdeco", "der" ], [ "c", "t" ], [ "e", "at" ], [ "e", "ct" ], [ "u", "re" ], [ "v", "ect" ], [ "Ġ", "vect" ], [ "or", "s" ], [ "Ġf", "eat" ], [ "Ġvect", "ors" ], [ "Ġfeat", "ure" ], [ "a", "n" ], [ "g", "ent" ], [ "Ġa", "gent" ], [ "m", "o" ], [ "Ġ", "mo" ], [ "de", "l" ], [ "Ġmo", "del" ], [ "i", "n" ], [ "k", "en" ], [ "o", "ken" ], [ "Ġ", "in" ], [ "Ġt", "oken" ], [ "Ġin", "put" ], [ "Ġtoken", "s" ], [ "g", "i" ], [ "l", "o" ], [ "Ġ", "lo" ], [ "ut", "put" ], [ "Ġo", "utput" ], [ "gi", "ts" ], [ "Ġlo", "gits" ], [ "u", "ates" ], [ "v", "al" ], [ "Ġe", "val" ], [ "Ġtransform", "s" ], [ "Ġeval", "uates" ], [ "s", "es" ], [ "Ġpro", "ces" ], [ "Ġproces", "ses" ], [ "d", "ates" ], [ "p", "dates" ], [ "u", "pdates" ], [ "Ġ", "updates" ], [ "ra", "di" ], [ "Ġg", "radi" ], [ "Ġgradi", "ent" ], [ "n", "al" ], [ "y", "zes" ], [ "Ġa", "nal" ], [ "Ġanal", "yzes" ], [ "e", "q" ], [ "e", "x" ], [ "u", "en" ], [ "Ġt", "ex" ], [ "Ġs", "eq" ], [ "uen", "ces" ], [ "Ġtex", "t" ], [ "Ġseq", "uences" ], [ "e", "m" ], [ "y", "st" ], [ "Ġs", "yst" ], [ "Ġsyst", "em" ], [ "Ġtransform", "er" ], [ "Ġenco", "des" ], [ "i", "m" ], [ "i", "zes" ], [ "p", "t" ], [ "Ġo", "pt" ], [ "im", "izes" ], [ "Ġopt", "imizes" ], [ "Ġenco", "der" ], [ "e", "i" ], [ "g", "h" ], [ "t", "ent" ], [ "w", "ei" ], [ "Ġ", "at" ], [ "Ġ", "wei" ], [ "io", "n" ], [ "gh", "ts" ], [ "tent", "ion" ], [ "Ġat", "tention" ], [ "Ġwei", "ghts" ], [ "g", "or" ], [ "i", "th" ], [ "l", "gor" ], [ "Ġa", "lgor" ], [ "ith", "m" ], [ "Ġalgor", "ithm" ], [ "c", "ts" ], [ "e", "di" ], [ "Ġpr", "edi" ], [ "Ġpredi", "cts" ], [ "p", "r" ], [ "Ġ", "re" ], [ "es", "ent" ], [ "at", "ions" ], [ "pr", "esent" ], [ "Ġre", "present" ], [ "Ġrepresent", "ations" ] ] } }