AfriPalmLM / tokenizer.json
mosesdaudu's picture
Training in progress, step 100
2c56376 verified
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 128,
"strategy": "LongestFirst",
"stride": 0
},
"padding": {
"strategy": {
"Fixed": 128
},
"direction": "Left",
"pad_to_multiple_of": null,
"pad_id": 2,
"pad_type_id": 0,
"pad_token": "</s>"
},
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": true
},
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Prepend",
"prepend": "▁"
},
{
"type": "Replace",
"pattern": {
"String": " "
},
"content": "▁"
}
]
},
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<s>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<s>": {
"id": "<s>",
"ids": [
1
],
"tokens": [
"<s>"
]
}
}
},
"decoder": {
"type": "Sequence",
"decoders": [
{
"type": "Replace",
"pattern": {
"String": "▁"
},
"content": " "
},
{
"type": "ByteFallback"
},
{
"type": "Fuse"
},
{
"type": "Strip",
"content": " ",
"start": 1,
"stop": 0
}
]
},
"model": {
"type": "Unigram",
"unk_id": 0,
"vocab": [
[
"<unk>",
0.0
],
[
"<s>",
0.0
],
[
"</s>",
0.0
],
[
"▁",
-0.5537281368002951
],
[
"a",
-2.206163300308619
],
[
"i",
-3.4698377124275996
],
[
"o",
-3.625328505181651
],
[
"d",
-3.710261874841528
],
[
"y",
-3.895623014599913
],
[
"n",
-3.9241564889678657
],
[
"e",
-3.996031612968597
],
[
"u",
-4.020550100040991
],
[
"l",
-4.16796601703772
],
[
"h",
-4.2061951544872755
],
[
"s",
-4.258920876199081
],
[
"k",
-4.270646474711075
],
[
"r",
-4.366133846179466
],
[
"m",
-4.661349427160829
],
[
"g",
-4.671853885731711
],
[
"b",
-4.764145142326377
],
[
"w",
-5.020700862179128
],
[
"t",
-5.039283250304928
],
[
"x",
-5.041020067562583
],
[
"q",
-5.565562390890502
],
[
"c",
-5.627498296064616
],
[
",",
-5.821637688768487
],
[
".",
-6.056015545727806
],
[
"S",
-6.224462278190527
],
[
"f",
-6.228092265837139
],
[
"M",
-6.305043609360995
],
[
"j",
-6.4046242021046815
],
[
"A",
-6.618608152829582
],
[
"D",
-6.767235971284624
],
[
"W",
-6.784002580071416
],
[
"C",
-6.975347072856344
],
[
"I",
-7.094522631592117
],
[
"G",
-7.12377731380807
],
[
"-",
-7.163134595285085
],
[
"B",
-7.185302221532661
],
[
"K",
-7.284107055272177
],
[
"X",
-7.345178282504843
],
[
"H",
-7.441136939513969
],
[
"0",
-7.610454725175899
],
[
"1",
-7.676846365553844
],
[
"Q",
-7.6815023490292225
],
[
"2",
-7.832934400654507
],
[
"O",
-7.855510099432444
],
[
"’",
-7.913303956478877
],
[
"T",
-7.920521835325051
],
[
"N",
-7.927129060971368
],
[
"L",
-7.946540647005372
],
[
"F",
-8.030392557146758
],
[
"J",
-8.044456444862213
],
[
"R",
-8.061750108119739
],
[
"U",
-8.168080202728104
],
[
"E",
-8.322863716456041
],
[
":",
-8.419997311703058
],
[
")",
-8.515578814777218
],
[
"P",
-8.547115687654244
],
[
"Y",
-8.550819395121305
],
[
"(",
-8.564518237708931
],
[
"\"",
-8.768258741460592
],
[
"p",
-8.841598353802134
],
[
"3",
-8.84491235773391
],
[
"“",
-8.848237380812833
],
[
"'",
-8.861649146182236
],
[
"5",
-8.90475925914842
],
[
"9",
-8.91005962353548
],
[
"4",
-8.957212497597583
],
[
"”",
-8.975957183394758
],
[
"/",
-8.979748655995529
],
[
"8",
-9.062882529049046
],
[
"7",
-9.064948644751524
],
[
"6",
-9.1513021242946
],
[
"?",
-9.42740162082142
],
[
"v",
-9.473002191090902
],
[
"…",
-9.722540553349848
],
[
"–",
-10.13702742185092
],
[
"z",
-10.13702742185092
],
[
"V",
-10.286656086276462
],
[
";",
-10.307933395522006
],
[
"[",
-10.43788633130764
],
[
"]",
-10.496726484580272
],
[
"!",
-10.496726484580272
],
[
"Z",
-10.797478062315149
],
[
"_",
-10.821147530102262
],
[
"&",
-10.895715264505524
],
[
"‘",
-11.160012056497354
],
[
"%",
-11.176961209039728
],
[
"$",
-11.544672763846052
],
[
"+",
-11.705005812336996
],
[
"`",
-12.00725762502892
],
[
"»",
-12.04725762502892
],
[
"ل",
-12.088924291695587
],
[
"ا",
-12.132402552565154
],
[
"—",
-12.132402552565154
],
[
"​",
-12.225476145638746
],
[
"*",
-12.225476145638746
],
[
"«",
-12.504986809553444
],
[
"�",
-12.504986809553444
],
[
"€",
-12.64308204764871
],
[
"\\",
-12.803338457905266
],
[
"ö",
-12.803338457905266
],
[
"م",
-13.105358659928983
],
[
"•",
-13.105358659928983
],
[
"ä",
-13.230358659938087
],
[
"ي",
-13.230358659938087
],
[
"}",
-13.230358659938087
],
[
"ه",
-13.373215802835192
],
[
"ن",
-13.373215802835192
],
[
"â",
-13.373215802835192
],
[
"{",
-13.373215802835192
],
[
"=",
-13.373215802835192
],
[
"@",
-13.373215802835192
],
[
"→",
-13.373215802835192
],
[
"←",
-13.539882469501858
],
[
"و",
-13.539882469501858
],
[
"ع",
-13.73988246950186
],
[
"£",
-13.73988246950186
],
[
"·",
-13.73988246950186
],
[
"س",
-13.73988246950186
],
[
"^",
-13.73988246950186
],
[
"ø",
-13.73988246950186
],
[
"ر",
-13.73988246950186
],
[
"´",
-13.73988246950186
],
[
"ف",
-13.98988246950186
],
[
"ص",
-13.98988246950186
],
[
"é",
-13.98988246950186
],
[
"œ",
-13.98988246950186
],
[
"è",
-13.98988246950186
],
[
"ح",
-14.323215802835191
],
[
"á",
-14.323215802835191
],
[
"í",
-14.323215802835191
],
[
"„",
-14.323215802835191
],
[
"Ø",
-14.323215802835191
],
[
"ك",
-14.323215802835191
],
[
"ü",
-14.823215802835191
],
[
"<",
-14.823215802835191
],
[
"أ",
-14.823215802835191
],
[
"ُ",
-14.823215802835191
],
[
"ى",
-14.823215802835191
],
[
"፡",
-14.823215802835191
],
[
"²",
-14.823215802835191
],
[
"د",
-14.823215802835191
],
[
"ء",
-14.823215802835191
],
[
"ʼ",
-14.823215802835191
],
[
"ب",
-14.823215802835191
],
[
"­",
-14.823215802835191
],
[
"",
-14.823215802835191
],
[
"ط",
-15.823215802835191
],
[
"职",
-15.823215802835191
],
[
"å",
-15.823215802835191
],
[
"ﷺ",
-15.823215802835191
],
[
"业",
-15.823215802835191
],
[
"”",
-15.823215802835191
],
[
"ə",
-15.823215802835191
],
[
"川",
-15.823215802835191
],
[
"‎",
-15.823215802835191
],
[
"ق",
-15.823215802835191
],
[
"ē",
-15.823215802835191
],
[
"‐",
-15.823215802835191
],
[
"#",
-15.823215802835191
],
[
"“",
-15.823215802835191
],
[
"×",
-15.823215802835191
],
[
"ï",
-15.823215802835191
],
[
"ñ",
-15.823215802835191
],
[
"─",
-15.823215802835191
],
[
"®",
-15.823215802835191
],
[
"ذ",
-15.823215802835191
],
[
"ش",
-15.823215802835191
],
[
"四",
-15.823215802835191
],
[
"ج",
-15.823215802835191
],
[
"↑",
-15.823215802835191
],
[
"غ",
-15.823215802835191
],
[
"传",
-15.823215802835191
],
[
"媒",
-15.823215802835191
],
[
"院",
-15.823215802835191
],
[
"ë",
-15.823215802835191
],
[
"学",
-15.823215802835191
],
[
"ş",
-15.823215802835191
],
[
"™",
-15.823215802835191
],
[
"ğ",
-15.823215802835191
],
[
"‏",
-15.823215802835191
],
[
"ą",
-15.823215802835191
],
[
"ç",
-15.823215802835191
],
[
"ث",
-15.823215802835191
],
[
"ت",
-15.823215802835191
],
[
"ó",
-15.823215802835191
],
[
"文",
-15.823215802835191
],
[
"ã",
-15.823215802835191
],
[
"化",
-15.823215802835191
],
[
" ",
-15.823215802835191
],
[
">",
-15.823215802835191
]
],
"byte_fallback": false
}
}