BaouleTokenizer_V1 / tokenizer.json
Adjoumani's picture
Upload folder using huggingface_hub
6a62038 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 206,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Replace",
"pattern": {
"String": " "
},
"content": "▁"
}
]
},
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<s>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 1
}
}
],
"special_tokens": {
"</s>": {
"id": "</s>",
"ids": [
2
],
"tokens": [
"</s>"
]
},
"<s>": {
"id": "<s>",
"ids": [
1
],
"tokens": [
"<s>"
]
}
}
},
"decoder": {
"type": "Sequence",
"decoders": [
{
"type": "Replace",
"pattern": {
"String": "▁"
},
"content": " "
},
{
"type": "ByteFallback"
},
{
"type": "Fuse"
}
]
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": true,
"byte_fallback": true,
"ignore_merges": false,
"vocab": {
"<unk>": 0,
"<s>": 1,
"</s>": 2,
"▁n": 3,
"▁a": 4,
"▁i": 5,
"▁ɛ": 6,
"▁u": 7,
"▁e": 8,
"▁l": 9,
"▁’": 10,
"▁k": 11,
"▁m": 12,
"▁s": 13,
"▁b": 14,
"▁ɔ": 15,
"▁,": 16,
"▁'": 17,
"▁o": 18,
"▁w": 19,
"▁f": 20,
"▁t": 21,
"▁g": 22,
"▁y": 23,
"▁.": 24,
"▁r": 25,
"▁d": 26,
"▁p": 27,
"▁z": 28,
"▁j": 29,
"▁:": 30,
"▁1": 31,
"▁Z": 32,
"▁c": 33,
"▁?": 34,
"▁S": 35,
"▁A": 36,
"▁N": 37,
"▁K": 38,
"▁v": 39,
"▁Ñ": 40,
"▁2": 41,
"▁Ɲ": 42,
"▁M": 43,
"▁ɲ": 44,
"▁B": 45,
"▁é": 46,
"β–β€œ": 47,
"▁”": 48,
"▁I": 49,
"▁́": 50,
"β–β€˜": 51,
"▁3": 52,
"▁Ɔ": 53,
"▁4": 54,
"▁0": 55,
"▁)": 56,
"▁E": 57,
"▁5": 58,
"▁-": 59,
"▁(": 60,
"▁L": 61,
"▁—": 62,
"▁Y": 63,
"▁F": 64,
"▁ó": 65,
"▁í": 66,
"▁ú": 67,
"▁6": 68,
"▁W": 69,
"▁7": 70,
"▁9": 71,
"▁D": 72,
"▁T": 73,
"▁8": 74,
"▁P": 75,
"▁;": 76,
"▁Ɛ": 77,
"▁!": 78,
"▁J": 79,
"▁ń": 80,
"▁G": 81,
"▁R": 82,
"▁U": 83,
"▁[": 84,
"▁]": 85,
"▁C": 86,
"▁O": 87,
"▁h": 88,
"▁‒": 89,
"▁É": 90,
"▁β–ͺ": 91,
"▁*": 92,
"▁/": 93,
"▁Ń": 94,
"▁q": 95,
"▁|": 96,
"▁V": 97,
"▁ḿ": 98,
"▁–": 99,
"▁è": 100,
"▁Ḿ": 101,
"▁H": 102,
"▁Á": 103,
"▁": 104,
"n": 105,
"a": 106,
"i": 107,
"Ι›": 108,
"u": 109,
"e": 110,
"l": 111,
"’": 112,
"k": 113,
"m": 114,
"s": 115,
"b": 116,
"Ι”": 117,
",": 118,
"'": 119,
"o": 120,
"w": 121,
"f": 122,
"t": 123,
"g": 124,
"y": 125,
".": 126,
"r": 127,
"d": 128,
"p": 129,
"z": 130,
"j": 131,
":": 132,
"1": 133,
"Z": 134,
"c": 135,
"?": 136,
"S": 137,
"A": 138,
"N": 139,
"K": 140,
"v": 141,
"Γ‘": 142,
"2": 143,
"Ɲ": 144,
"M": 145,
"Ι²": 146,
"B": 147,
"Γ©": 148,
"β€œ": 149,
"”": 150,
"I": 151,
"́": 152,
"β€˜": 153,
"3": 154,
"Ζ†": 155,
"4": 156,
"0": 157,
")": 158,
"E": 159,
"5": 160,
"-": 161,
"(": 162,
"L": 163,
"β€”": 164,
"Y": 165,
"F": 166,
"Γ³": 167,
"Γ­": 168,
"ΓΊ": 169,
"6": 170,
"W": 171,
"7": 172,
"9": 173,
"D": 174,
"T": 175,
"8": 176,
"P": 177,
";": 178,
"Ɛ": 179,
"!": 180,
"J": 181,
"Ε„": 182,
"G": 183,
"R": 184,
"U": 185,
"[": 186,
"]": 187,
"C": 188,
"O": 189,
"h": 190,
"β€’": 191,
"Γ‰": 192,
"β–ͺ": 193,
"*": 194,
"/": 195,
"Εƒ": 196,
"q": 197,
"|": 198,
"V": 199,
"αΈΏ": 200,
"–": 201,
"Γ¨": 202,
"αΈΎ": 203,
"H": 204,
"Á": 205
},
"merges": [
[
"▁",
"n"
],
[
"▁",
"a"
],
[
"▁",
"i"
],
[
"▁",
"Ι›"
],
[
"▁",
"u"
],
[
"▁",
"e"
],
[
"▁",
"l"
],
[
"▁",
"’"
],
[
"▁",
"k"
],
[
"▁",
"m"
],
[
"▁",
"s"
],
[
"▁",
"b"
],
[
"▁",
"Ι”"
],
[
"▁",
","
],
[
"▁",
"'"
],
[
"▁",
"o"
],
[
"▁",
"w"
],
[
"▁",
"f"
],
[
"▁",
"t"
],
[
"▁",
"g"
],
[
"▁",
"y"
],
[
"▁",
"."
],
[
"▁",
"r"
],
[
"▁",
"d"
],
[
"▁",
"p"
],
[
"▁",
"z"
],
[
"▁",
"j"
],
[
"▁",
":"
],
[
"▁",
"1"
],
[
"▁",
"Z"
],
[
"▁",
"c"
],
[
"▁",
"?"
],
[
"▁",
"S"
],
[
"▁",
"A"
],
[
"▁",
"N"
],
[
"▁",
"K"
],
[
"▁",
"v"
],
[
"▁",
"Γ‘"
],
[
"▁",
"2"
],
[
"▁",
"Ɲ"
],
[
"▁",
"M"
],
[
"▁",
"Ι²"
],
[
"▁",
"B"
],
[
"▁",
"Γ©"
],
[
"▁",
"β€œ"
],
[
"▁",
"”"
],
[
"▁",
"I"
],
[
"▁",
"́"
],
[
"▁",
"β€˜"
],
[
"▁",
"3"
],
[
"▁",
"Ζ†"
],
[
"▁",
"4"
],
[
"▁",
"0"
],
[
"▁",
")"
],
[
"▁",
"E"
],
[
"▁",
"5"
],
[
"▁",
"-"
],
[
"▁",
"("
],
[
"▁",
"L"
],
[
"▁",
"β€”"
],
[
"▁",
"Y"
],
[
"▁",
"F"
],
[
"▁",
"Γ³"
],
[
"▁",
"Γ­"
],
[
"▁",
"ΓΊ"
],
[
"▁",
"6"
],
[
"▁",
"W"
],
[
"▁",
"7"
],
[
"▁",
"9"
],
[
"▁",
"D"
],
[
"▁",
"T"
],
[
"▁",
"8"
],
[
"▁",
"P"
],
[
"▁",
";"
],
[
"▁",
"Ɛ"
],
[
"▁",
"!"
],
[
"▁",
"J"
],
[
"▁",
"Ε„"
],
[
"▁",
"G"
],
[
"▁",
"R"
],
[
"▁",
"U"
],
[
"▁",
"["
],
[
"▁",
"]"
],
[
"▁",
"C"
],
[
"▁",
"O"
],
[
"▁",
"h"
],
[
"▁",
"β€’"
],
[
"▁",
"Γ‰"
],
[
"▁",
"β–ͺ"
],
[
"▁",
"*"
],
[
"▁",
"/"
],
[
"▁",
"Εƒ"
],
[
"▁",
"q"
],
[
"▁",
"|"
],
[
"▁",
"V"
],
[
"▁",
"αΈΏ"
],
[
"▁",
"–"
],
[
"▁",
"Γ¨"
],
[
"▁",
"αΈΎ"
],
[
"▁",
"H"
],
[
"▁",
"Á"
]
]
}
}