{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "_", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "StripAccents" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "_": 0, "[PAD]": 1, "[UNK]": 2, "[BOS]": 3, "[EOS]": 4, "a": 5, "b": 6, "c": 7, "d": 8, "e": 9, "f": 10, "g": 11, "h": 12, "i": 13, "j": 14, "k": 15, "l": 16, "m": 17, "n": 18, "o": 19, "p": 20, "q": 21, "r": 22, "s": 23, "t": 24, "u": 25, "v": 26, "w": 27, "x": 28, "y": 29, "z": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36, "6": 37, "7": 38, "8": 39, "9": 40, "10": 41, "11": 42, "12": 43, "13": 44, "14": 45, "15": 46, "16": 47, "|": 48, "?": 49, "(": 50, ")": 51 }, "unk_token": "[UNK]" } }