{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 128, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 128 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 1, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "'": 5, "-": 6, ".": 7, "/": 8, "<": 9, ">": 10, "?": 11, "A": 12, "B": 13, "C": 14, "D": 15, "E": 16, "F": 17, "G": 18, "H": 19, "I": 20, "J": 21, "K": 22, "L": 23, "M": 24, "N": 25, "O": 26, "P": 27, "Q": 28, "R": 29, "S": 30, "T": 31, "U": 32, "V": 33, "W": 34, "X": 35, "Y": 36, "Z": 37, "_": 38, "a": 39, "b": 40, "c": 41, "d": 42, "e": 43, "f": 44, "g": 45, "h": 46, "i": 47, "j": 48, "k": 49, "l": 50, "m": 51, "n": 52, "o": 53, "p": 54, "q": 55, "r": 56, "s": 57, "t": 58, "u": 59, "v": 60, "w": 61, "x": 62, "y": 63, "z": 64, "·": 65, "º": 66, "Â": 67, "Ä": 68, "Ġ": 69, "an": 70, "ey": 71, "Ġd": 72, "on": 73, "Ġa": 74, "Ġs": 75, "Ġg": 76, "Ġb": 77, "Ġt": 78, "Ġh": 79, "kk": 80, "Ġe": 81, "ar": 82, "or": 83, "eg": 84, "Ġp": 85, "ot": 86, "di": 87, "Ġag": 88, "Ġm": 89, "ur": 90, "Ġdeg": 91, "ol": 92, "in": 93, "en": 94, "Ġekk": 95, "Ġj": 96, "ong": 97, "un": 98, "al": 99, "Ġdegong": 100, "Ġar": 101, "Ġi": 102, "Ġgur": 103, "Ġagey": 104, "Ġn": 105, "Ġekkan": 106, "od": 107, "at": 108, "Ġo": 109, "Ġaro": 110, "ag": 111, "Ġhe": 112, "Ġsi": 113, "Ġl": 114, "ang": 115, "Ġ<": 116, "Ġse": 117, "Ġte": 118, "Ġdo": 119, "Ġhi": 120, "Ġol": 121, "Ġman": 122, "ai": 123, "Ġagon": 124, "ch": 125, "oi": 126, "ye": 127, "edi": 128, "ani": 129, "Ġguri": 130, "Ġdega": 131, "Ġgor": 132, "aj": 133, "Ġu": 134, "st": 135, "uj": 136, "il": 137, "Ġob": 138, "Ġr": 139, "er": 140, "ic": 141, "ara": 142, "ul": 143, "gan": 144, "Ġtey": 145, "ay": 146, "yot": 147, "Ġbi": 148, "Ġdogan": 149, "Ġdi": 150, "Ġk": 151, "Ġc": 152, "Ġna": 153, "us": 154, "Ġdol": 155, "de": 156, "eni": 157, "Ġ", "