{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 512, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 512 }, "direction": "Left", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "Question:", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 6, "content": "Réponse:", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false } ], "normalizer": { "type": "Replace", "pattern": { "String": " " }, "content": "▁" }, "pre_tokenizer": null, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": {} }, "decoder": { "type": "Sequence", "decoders": [ { "type": "Replace", "pattern": { "String": "▁" }, "content": " " }, { "type": "ByteFallback" }, { "type": "Fuse" } ] }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": true, "byte_fallback": true, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "Question:": 5, "Réponse:": 6, "▁–": 7, "▁-": 8, "▁s": 9, "ol": 10, "▁sol": 11, "do": 12, "▁do": 13, "mi": 14, "▁mi": 15, "fa": 16, "▁fa": 17, "la": 18, "▁la": 19, "ré": 20, "▁ré": 21, "▁S": 22, "▁si": 23, "▁Sol": 24, "Do": 25, "▁Do": 26, "La": 27, "▁La": 28, "Fa": 29, "Mi": 30, "Ré": 31, "▁Fa": 32, "▁Mi": 33, "▁Ré": 34, "▁Si": 35, "Si": 36, "So": 37, "si": 38, "so": 39, "▁D": 40, "▁F": 41, "▁L": 42, "▁M": 43, "▁R": 44, "▁d": 45, "▁f": 46, "▁l": 47, "▁m": 48, "▁r": 49, "Sol": 50, "▁so": 51, "▁": 52, "–": 53, "o": 54, "l": 55, "-": 56, "s": 57, "d": 58, "a": 59, "i": 60, "m": 61, "f": 62, "é": 63, "r": 64, "#": 65, "S": 66, "D": 67, "L": 68, "F": 69, "M": 70, "R": 71 }, "merges": [ [ "▁", "–" ], [ "▁", "-" ], [ "▁", "s" ], [ "o", "l" ], [ "▁s", "ol" ], [ "▁so", "l" ], [ "d", "o" ], [ "▁", "do" ], [ "▁d", "o" ], [ "m", "i" ], [ "▁", "mi" ], [ "▁m", "i" ], [ "f", "a" ], [ "▁", "fa" ], [ "▁f", "a" ], [ "l", "a" ], [ "▁", "la" ], [ "▁l", "a" ], [ "r", "é" ], [ "▁", "ré" ], [ "▁r", "é" ], [ "▁", "S" ], [ "▁", "si" ], [ "▁s", "i" ], [ "▁", "Sol" ], [ "▁S", "ol" ], [ "D", "o" ], [ "▁", "Do" ], [ "▁D", "o" ], [ "L", "a" ], [ "▁", "La" ], [ "▁L", "a" ], [ "F", "a" ], [ "M", "i" ], [ "R", "é" ], [ "▁", "Fa" ], [ "▁F", "a" ], [ "▁", "Mi" ], [ "▁M", "i" ], [ "▁", "Ré" ], [ "▁R", "é" ], [ "▁", "Si" ], [ "▁S", "i" ], [ "S", "i" ], [ "S", "o" ], [ "s", "i" ], [ "s", "o" ], [ "▁", "D" ], [ "▁", "F" ], [ "▁", "L" ], [ "▁", "M" ], [ "▁", "R" ], [ "▁", "d" ], [ "▁", "f" ], [ "▁", "l" ], [ "▁", "m" ], [ "▁", "r" ], [ "S", "ol" ], [ "So", "l" ], [ "▁", "so" ], [ "▁s", "o" ] ] } }