{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 158, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 158 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "<|pad|>" }, "added_tokens": [ { "id": 0, "content": "<|pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 1, "content": "<|unk|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 2, "content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 3, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|pad|>": 0, "<|unk|>": 1, "<|startoftext|>": 2, "<|endoftext|>": 3, "<|mask|>": 4, "A": 5, "C": 6, "D": 7, "E": 8, "F": 9, "G": 10, "H": 11, "I": 12, "K": 13, "L": 14, "M": 15, "N": 16, "P": 17, "Q": 18, "R": 19, "S": 20, "T": 21, "V": 22, "W": 23, "Y": 24 }, "merges": [] } }