{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "a": 4, "d": 5, "e": 6, "f": 7, "h": 8, "i": 9, "k": 10, "l": 11, "n": 12, "o": 13, "r": 14, "s": 15, "t": 16, "w": 17, "y": 18, "z": 19, "Ġ": 20, "Ġt": 21, "or": 22, "at": 23, "dat": 24, "el": 25, "en": 26, "er": 27, "es": 28, "for": 29, "hel": 30, "in": 31, "iz": 32, "ken": 33, "ld": 34, "lo": 35, "oken": 36, "wor": 37, "Ġdat": 38, "Ġfor": 39, "Ġwor": 40, "Ġtes": 41, "Ġtin": 42, "Ġtoken": 43, "hello": 44, "izer": 45, "Ġdata": 46, "Ġworld": 47, "Ġtest": 48, "Ġtiny": 49, "Ġtokenizer": 50 }, "merges": [ [ "Ġ", "t" ], [ "o", "r" ], [ "a", "t" ], [ "d", "at" ], [ "e", "l" ], [ "e", "n" ], [ "e", "r" ], [ "e", "s" ], [ "f", "or" ], [ "h", "el" ], [ "i", "n" ], [ "i", "z" ], [ "k", "en" ], [ "l", "d" ], [ "l", "o" ], [ "o", "ken" ], [ "w", "or" ], [ "Ġ", "dat" ], [ "Ġ", "for" ], [ "Ġ", "wor" ], [ "Ġt", "es" ], [ "Ġt", "in" ], [ "Ġt", "oken" ], [ "hel", "lo" ], [ "iz", "er" ], [ "Ġdat", "a" ], [ "Ġwor", "ld" ], [ "Ġtes", "t" ], [ "Ġtin", "y" ], [ "Ġtoken", "izer" ] ] } }