{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "H": 4, "T": 5, "a": 6, "b": 7, "c": 8, "d": 9, "e": 10, "f": 11, "g": 12, "h": 13, "i": 14, "j": 15, "k": 16, "l": 17, "m": 18, "n": 19, "o": 20, "p": 21, "q": 22, "r": 23, "s": 24, "t": 25, "u": 26, "v": 27, "w": 28, "x": 29, "y": 30, "z": 31, "Ċ": 32, "Ġ": 33, "Ġt": 34, "he": 35, "Ġthe": 36, "el": 37, "er": 38, "fo": 39, "is": 40, "Ġo": 41, "Ġfo": 42, "Hel": 43, "The": 44, "az": 45, "br": 46, "ck": 47, "do": 48, "del": 49, "en": 50, "es": 51, "his": 52, "iz": 53, "ick": 54, "ju": 55, "ken": 56, "ld": 57, "lo": 58, "laz": 59, "mo": 60, "mp": 61, "or": 62, "ow": 63, "oken": 64, "qu": 65, "ver": 66, "wor": 67, "Ġa": 68, "Ġis": 69, "Ġbr": 70, "Ġdo": 71, "Ġju": 72, "Ġlaz": 73, "Ġmo": 74, "Ġqu": 75, "Ġwor": 76, "Ġtes": 77, "Ġthis": 78, "Ġtoken": 79, "Ġof": 80, "Ġover": 81, "Ġfor": 82, "Ġfox": 83, "Hello": 84, "izer": 85, "mps": 86, "own": 87, "Ġbrown": 88, "Ġdog": 89, "Ġjumps": 90, "Ġlazy": 91, "Ġmodel": 92, "Ġquick": 93, "Ġworld": 94, "Ġtest": 95, "Ġtokenizer": 96 }, "merges": [ [ "Ġ", "t" ], [ "h", "e" ], [ "Ġt", "he" ], [ "e", "l" ], [ "e", "r" ], [ "f", "o" ], [ "i", "s" ], [ "Ġ", "o" ], [ "Ġ", "fo" ], [ "H", "el" ], [ "T", "he" ], [ "a", "z" ], [ "b", "r" ], [ "c", "k" ], [ "d", "o" ], [ "d", "el" ], [ "e", "n" ], [ "e", "s" ], [ "h", "is" ], [ "i", "z" ], [ "i", "ck" ], [ "j", "u" ], [ "k", "en" ], [ "l", "d" ], [ "l", "o" ], [ "l", "az" ], [ "m", "o" ], [ "m", "p" ], [ "o", "r" ], [ "o", "w" ], [ "o", "ken" ], [ "q", "u" ], [ "v", "er" ], [ "w", "or" ], [ "Ġ", "a" ], [ "Ġ", "is" ], [ "Ġ", "br" ], [ "Ġ", "do" ], [ "Ġ", "ju" ], [ "Ġ", "laz" ], [ "Ġ", "mo" ], [ "Ġ", "qu" ], [ "Ġ", "wor" ], [ "Ġt", "es" ], [ "Ġt", "his" ], [ "Ġt", "oken" ], [ "Ġo", "f" ], [ "Ġo", "ver" ], [ "Ġfo", "r" ], [ "Ġfo", "x" ], [ "Hel", "lo" ], [ "iz", "er" ], [ "mp", "s" ], [ "ow", "n" ], [ "Ġbr", "own" ], [ "Ġdo", "g" ], [ "Ġju", "mps" ], [ "Ġlaz", "y" ], [ "Ġmo", "del" ], [ "Ġqu", "ick" ], [ "Ġwor", "ld" ], [ "Ġtes", "t" ], [ "Ġtoken", "izer" ] ] } }