{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 32, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": {} }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|endoftext|>": 0, "<|pad|>": 1, "a": 2, "b": 3, "c": 4, "d": 5, "e": 6, "f": 7, "g": 8, "h": 9, "i": 10, "j": 11, "k": 12, "l": 13, "m": 14, "n": 15, "o": 16, "p": 17, "q": 18, "r": 19, "s": 20, "t": 21, "u": 22, "v": 23, "w": 24, "x": 25, "y": 26, "z": 27, "Ġ": 28, "Ġw": 29, "Ġz": 30, "Ġt": 31, "Ġq": 32, "Ġa": 33, "Ġs": 34, "Ġr": 35, "Ġe": 36, "Ġg": 37, "Ġy": 38, "Ġc": 39, "Ġm": 40, "Ġb": 41, "Ġl": 42, "Ġo": 43, "Ġn": 44, "Ġj": 45, "Ġi": 46, "Ġp": 47, "Ġx": 48, "Ġu": 49 }, "merges": [ [ "Ġ", "w" ], [ "Ġ", "z" ], [ "Ġ", "t" ], [ "Ġ", "q" ], [ "Ġ", "a" ], [ "Ġ", "s" ], [ "Ġ", "r" ], [ "Ġ", "e" ], [ "Ġ", "g" ], [ "Ġ", "y" ], [ "Ġ", "c" ], [ "Ġ", "m" ], [ "Ġ", "b" ], [ "Ġ", "l" ], [ "Ġ", "o" ], [ "Ġ", "n" ], [ "Ġ", "j" ], [ "Ġ", "i" ], [ "Ġ", "p" ], [ "Ġ", "x" ], [ "Ġ", "u" ] ] } }