Midya-Beta-1 / tokenizer.json
Clemylia's picture
Premier modèle d'architecture gemma from scratch
3a160de verified
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 512,
"strategy": "LongestFirst",
"stride": 0
},
"padding": {
"strategy": {
"Fixed": 512
},
"direction": "Left",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "<pad>"
},
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "Question:",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 6,
"content": "RΓ©ponse:",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
}
],
"normalizer": {
"type": "Replace",
"pattern": {
"String": " "
},
"content": "▁"
},
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {}
},
"decoder": {
"type": "Sequence",
"decoders": [
{
"type": "Replace",
"pattern": {
"String": "▁"
},
"content": " "
},
{
"type": "ByteFallback"
},
{
"type": "Fuse"
}
]
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": true,
"byte_fallback": true,
"ignore_merges": false,
"vocab": {
"<pad>": 0,
"</s>": 1,
"<s>": 2,
"<unk>": 3,
"<mask>": 4,
"Question:": 5,
"RΓ©ponse:": 6,
"▁–": 7,
"▁-": 8,
"▁s": 9,
"ol": 10,
"▁sol": 11,
"do": 12,
"▁do": 13,
"mi": 14,
"▁mi": 15,
"fa": 16,
"▁fa": 17,
"la": 18,
"▁la": 19,
"rΓ©": 20,
"▁rΓ©": 21,
"▁S": 22,
"▁si": 23,
"▁Sol": 24,
"Do": 25,
"▁Do": 26,
"La": 27,
"▁La": 28,
"Fa": 29,
"Mi": 30,
"RΓ©": 31,
"▁Fa": 32,
"▁Mi": 33,
"▁RΓ©": 34,
"▁Si": 35,
"Si": 36,
"So": 37,
"si": 38,
"so": 39,
"▁D": 40,
"▁F": 41,
"▁L": 42,
"▁M": 43,
"▁R": 44,
"▁d": 45,
"▁f": 46,
"▁l": 47,
"▁m": 48,
"▁r": 49,
"Sol": 50,
"▁so": 51,
"▁": 52,
"–": 53,
"o": 54,
"l": 55,
"-": 56,
"s": 57,
"d": 58,
"a": 59,
"i": 60,
"m": 61,
"f": 62,
"Γ©": 63,
"r": 64,
"#": 65,
"S": 66,
"D": 67,
"L": 68,
"F": 69,
"M": 70,
"R": 71
},
"merges": [
[
"▁",
"–"
],
[
"▁",
"-"
],
[
"▁",
"s"
],
[
"o",
"l"
],
[
"▁s",
"ol"
],
[
"▁so",
"l"
],
[
"d",
"o"
],
[
"▁",
"do"
],
[
"▁d",
"o"
],
[
"m",
"i"
],
[
"▁",
"mi"
],
[
"▁m",
"i"
],
[
"f",
"a"
],
[
"▁",
"fa"
],
[
"▁f",
"a"
],
[
"l",
"a"
],
[
"▁",
"la"
],
[
"▁l",
"a"
],
[
"r",
"Γ©"
],
[
"▁",
"rΓ©"
],
[
"▁r",
"Γ©"
],
[
"▁",
"S"
],
[
"▁",
"si"
],
[
"▁s",
"i"
],
[
"▁",
"Sol"
],
[
"▁S",
"ol"
],
[
"D",
"o"
],
[
"▁",
"Do"
],
[
"▁D",
"o"
],
[
"L",
"a"
],
[
"▁",
"La"
],
[
"▁L",
"a"
],
[
"F",
"a"
],
[
"M",
"i"
],
[
"R",
"Γ©"
],
[
"▁",
"Fa"
],
[
"▁F",
"a"
],
[
"▁",
"Mi"
],
[
"▁M",
"i"
],
[
"▁",
"RΓ©"
],
[
"▁R",
"Γ©"
],
[
"▁",
"Si"
],
[
"▁S",
"i"
],
[
"S",
"i"
],
[
"S",
"o"
],
[
"s",
"i"
],
[
"s",
"o"
],
[
"▁",
"D"
],
[
"▁",
"F"
],
[
"▁",
"L"
],
[
"▁",
"M"
],
[
"▁",
"R"
],
[
"▁",
"d"
],
[
"▁",
"f"
],
[
"▁",
"l"
],
[
"▁",
"m"
],
[
"▁",
"r"
],
[
"S",
"ol"
],
[
"So",
"l"
],
[
"▁",
"so"
],
[
"▁s",
"o"
]
]
}
}