{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[TOXIC]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[REPARENCE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SUIVANT]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[REMPLI]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 85, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[TOXIC]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "[TOXIC]": 0, "[REPARENCE]": 1, "[SUIVANT]": 2, "[REMPLI]": 3, "[MASK]": 4, "!": 5, "'": 6, ".": 7, "G": 8, "a": 9, "b": 10, "c": 11, "d": 12, "e": 13, "f": 14, "g": 15, "i": 16, "j": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "r": 23, "s": 24, "t": 25, "u": 26, "v": 27, "x": 28, "z": 29, "ca": 30, "re": 31, "Gre": 32, "fe": 33, "ffe": 34, "caca": 35, "Greffe": 36, "ou": 37, "an": 38, "de": 39, "du": 40, "dan": 41, "es": 42, "vou": 43, "Greffez": 44, "dans": 45, "vous": 46, "les": 47, "bou": 48, "di": 49, "le": 50, "boudi": 51, "boudin": 52, "ac": 53, "am": 54, "as": 55, "au": 56, "br": 57, "bes": 58, "ce": 59, "ds": 60, "en": 61, "ez": 62, "eau": 63, "eds": 64, "gen": 65, "il": 66, "ieds": 67, "jam": 68, "mac": 69, "nez": 70, "ore": 71, "omac": 72, "pieds": 73, "rv": 74, "tomac": 75, "oux": 76, "estomac": 77, "bras": 78, "cerv": 79, "genoux": 80, "illes": 81, "jambes": 82, "oreilles": 83, "cerveau": 84 }, "merges": [ [ "c", "a" ], [ "r", "e" ], [ "G", "re" ], [ "f", "e" ], [ "f", "fe" ], [ "ca", "ca" ], [ "Gre", "ffe" ], [ "o", "u" ], [ "a", "n" ], [ "d", "e" ], [ "d", "u" ], [ "d", "an" ], [ "e", "s" ], [ "v", "ou" ], [ "Greffe", "z" ], [ "dan", "s" ], [ "vou", "s" ], [ "l", "es" ], [ "b", "ou" ], [ "d", "i" ], [ "l", "e" ], [ "bou", "di" ], [ "boudi", "n" ], [ "a", "c" ], [ "a", "m" ], [ "a", "s" ], [ "a", "u" ], [ "b", "r" ], [ "b", "es" ], [ "c", "e" ], [ "d", "s" ], [ "e", "n" ], [ "e", "z" ], [ "e", "au" ], [ "e", "ds" ], [ "g", "en" ], [ "i", "l" ], [ "i", "eds" ], [ "j", "am" ], [ "m", "ac" ], [ "n", "ez" ], [ "o", "re" ], [ "o", "mac" ], [ "p", "ieds" ], [ "r", "v" ], [ "t", "omac" ], [ "ou", "x" ], [ "es", "tomac" ], [ "br", "as" ], [ "ce", "rv" ], [ "gen", "oux" ], [ "il", "les" ], [ "jam", "bes" ], [ "ore", "illes" ], [ "cerv", "eau" ] ] } }