{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFKD" }, { "type": "Replace", "pattern": { "Regex": "\\s+" }, "content": " " }, { "type": "Replace", "pattern": { "Regex": "[\u2013\u2014]" }, "content": "-" }, { "type": "Replace", "pattern": { "Regex": "[^ -\"$-.0-;?A-Za-z£́]" }, "content": "" } ] }, "pre_tokenizer": { "type": "FixedLength", "length": 1 }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { " ": 0, "!": 1, "\"": 2, "$": 3, "%": 4, "&": 5, "'": 6, "(": 7, ")": 8, "*": 9, "+": 10, ",": 11, "-": 12, ".": 13, "0": 14, "1": 15, "2": 16, "3": 17, "4": 18, "5": 19, "6": 20, "7": 21, "8": 22, "9": 23, ":": 24, ";": 25, "?": 26, "A": 27, "B": 28, "C": 29, "D": 30, "E": 31, "F": 32, "G": 33, "H": 34, "I": 35, "J": 36, "K": 37, "L": 38, "M": 39, "N": 40, "O": 41, "P": 42, "Q": 43, "R": 44, "S": 45, "T": 46, "U": 47, "V": 48, "W": 49, "X": 50, "Y": 51, "Z": 52, "a": 53, "b": 54, "c": 55, "d": 56, "e": 57, "f": 58, "g": 59, "h": 60, "i": 61, "j": 62, "k": 63, "l": 64, "m": 65, "n": 66, "o": 67, "p": 68, "q": 69, "r": 70, "s": 71, "t": 72, "u": 73, "v": 74, "w": 75, "x": 76, "y": 77, "z": 78, "£": 79, "\u0301": 80 }, "unk_token": "\u0301" } }