{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 512, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 512 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "String": "" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "\n": 5, " ": 6, "!": 7, "?": 8, "a": 9, "b": 10, "c": 11, "d": 12, "e": 13, "f": 14, "g": 15, "h": 16, "i": 17, "j": 18, "k": 19, "l": 20, "m": 21, "n": 22, "o": 23, "p": 24, "q": 25, "r": 26, "s": 27, "t": 28, "u": 29, "v": 30, "w": 31, "x": 32, "y": 33, "z": 34, "A": 35, "B": 36, "C": 37, "D": 38, "E": 39, "F": 40, "G": 41, "H": 42, "I": 43, "J": 44, "K": 45, "L": 46, "M": 47, "N": 48, "O": 49, "P": 50, "Q": 51, "R": 52, "S": 53, "T": 54, "U": 55, "V": 56, "W": 57, "X": 58, "Y": 59, "Z": 60 }, "unk_token": "[UNK]" } }