{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFKC" }, { "type": "Replace", "pattern": { "Regex": "A" }, "content": "↨a" }, { "type": "Replace", "pattern": { "Regex": "B" }, "content": "↨b" }, { "type": "Replace", "pattern": { "Regex": "C" }, "content": "↨c" }, { "type": "Replace", "pattern": { "Regex": "D" }, "content": "↨d" }, { "type": "Replace", "pattern": { "Regex": "E" }, "content": "↨e" }, { "type": "Replace", "pattern": { "Regex": "F" }, "content": "↨f" }, { "type": "Replace", "pattern": { "Regex": "G" }, "content": "↨g" }, { "type": "Replace", "pattern": { "Regex": "H" }, "content": "↨h" }, { "type": "Replace", "pattern": { "Regex": "I" }, "content": "↨i" }, { "type": "Replace", "pattern": { "Regex": "J" }, "content": "↨j" }, { "type": "Replace", "pattern": { "Regex": "K" }, "content": "↨k" }, { "type": "Replace", "pattern": { "Regex": "L" }, "content": "↨l" }, { "type": "Replace", "pattern": { "Regex": "M" }, "content": "↨m" }, { "type": "Replace", "pattern": { "Regex": "N" }, "content": "↨n" }, { "type": "Replace", "pattern": { "Regex": "O" }, "content": "↨o" }, { "type": "Replace", "pattern": { "Regex": "P" }, "content": "↨p" }, { "type": "Replace", "pattern": { "Regex": "Q" }, "content": "↨q" }, { "type": "Replace", "pattern": { "Regex": "R" }, "content": "↨r" }, { "type": "Replace", "pattern": { "Regex": "S" }, "content": "↨s" }, { "type": "Replace", "pattern": { "Regex": "T" }, "content": "↨t" }, { "type": "Replace", "pattern": { "Regex": "U" }, "content": "↨u" }, { "type": "Replace", "pattern": { "Regex": "V" }, "content": "↨v" }, { "type": "Replace", "pattern": { "Regex": "W" }, "content": "↨w" }, { "type": "Replace", "pattern": { "Regex": "X" }, "content": "↨x" }, { "type": "Replace", "pattern": { "Regex": "Y" }, "content": "↨y" }, { "type": "Replace", "pattern": { "Regex": "Z" }, "content": "↨z" } ] }, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "\\X" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 0 } } ], "special_tokens": {} }, "decoder": { "type": "Sequence", "decoders": [] }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "↨": 4, "\n": 5, "\t": 6, " ": 7, "0": 8, "1": 9, "2": 10, "3": 11, "4": 12, "5": 13, "6": 14, "7": 15, "8": 16, "9": 17, "a": 18, "b": 19, "c": 20, "d": 21, "e": 22, "f": 23, "g": 24, "h": 25, "i": 26, "j": 27, "k": 28, "l": 29, "m": 30, "n": 31, "o": 32, "p": 33, "q": 34, "r": 35, "s": 36, "t": 37, "u": 38, "v": 39, "w": 40, "x": 41, "y": 42, "z": 43, "\"": 44, "!": 45, "$": 46, "&": 47, "'": 48, "#": 49, ",": 50, "/": 51, "+": 52, "=": 53, "-": 54, "<": 55, ">": 56, "*": 57, "@": 58, ".": 59, ":": 60, ";": 61, "[": 62, "]": 63, "{": 64, "}": 65, "(": 66, ")": 67, "^": 68, "_": 69, "?": 70, "%": 71, "é": 72, "¤69": 73, "¤70": 74, "¤71": 75, "¤72": 76, "¤73": 77, "¤74": 78, "¤75": 79, "¤76": 80, "¤77": 81, "¤78": 82, "¤79": 83, "¤80": 84, "¤81": 85, "¤82": 86, "¤83": 87, "¤84": 88, "¤85": 89, "¤86": 90, "¤87": 91, "¤88": 92, "¤89": 93, "¤90": 94, "¤91": 95, "¤92": 96, "¤93": 97, "¤94": 98, "¤95": 99, "¤96": 100, "¤97": 101, "¤98": 102, "¤99": 103, "¤100": 104, "¤101": 105, "¤102": 106, "¤103": 107, "¤104": 108, "¤105": 109, "¤106": 110, "¤107": 111, "¤108": 112, "¤109": 113, "¤110": 114, "¤111": 115, "¤112": 116, "¤113": 117, "¤114": 118, "¤115": 119, "¤116": 120, "¤117": 121, "¤118": 122, "¤119": 123, "¤120": 124, "¤121": 125, "¤122": 126, "¤123": 127 }, "unk_token": "" } }