CV_cs_uni150 / tokenizer.json
Lakoc's picture
Upload tokenizer
c6068dc
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Replace",
"pattern": {
"String": "``"
},
"content": "\""
},
{
"type": "Replace",
"pattern": {
"String": "''"
},
"content": "\""
},
{
"type": "Lowercase"
}
]
},
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 1
}
}
],
"special_tokens": {
"</s>": {
"id": "</s>",
"ids": [
1
],
"tokens": [
"</s>"
]
},
"<s>": {
"id": "<s>",
"ids": [
0
],
"tokens": [
"<s>"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"model": {
"type": "Unigram",
"unk_id": 2,
"vocab": [
[
"<s>",
0.0
],
[
"</s>",
0.0
],
[
"<unk>",
0.0
],
[
"<pad>",
0.0
],
[
"<mask>",
0.0
],
[
"▁",
-2.6869277950385246
],
[
"a",
-3.4736932742625743
],
[
"d",
-3.5169455130120006
],
[
"o",
-3.5286539017728824
],
[
"m",
-3.5323135588242067
],
[
".",
-3.564637887689294
],
[
"t",
-3.5766439833645745
],
[
"v",
-3.681297344354825
],
[
"l",
-3.689568030372799
],
[
"r",
-3.774267715687163
],
[
"á",
-3.7839508683985894
],
[
"u",
-3.8196891699702338
],
[
"i",
-3.8667663747150343
],
[
"s",
-3.909870967524375
],
[
"e",
-4.083499988550724
],
[
"k",
-4.110101473533085
],
[
"st",
-4.189659378196238
],
[
"í",
-4.216716213806508
],
[
"p",
-4.244355592837463
],
[
"ní",
-4.284565089272325
],
[
"▁s",
-4.356375169310841
],
[
"ch",
-4.378120176209528
],
[
"y",
-4.403435537916263
],
[
"n",
-4.4088704395341285
],
[
"ě",
-4.42776931797775
],
[
"z",
-4.50594373557815
],
[
"▁v",
-4.514565701293554
],
[
"b",
-4.56361649122743
],
[
"▁po",
-4.676046937232897
],
[
"é",
-4.716545855921773
],
[
"▁z",
-4.760183600044909
],
[
"▁p",
-4.79618222161065
],
[
"▁je",
-4.799375873449417
],
[
"le",
-4.8098642535404785
],
[
"č",
-4.8227973718325625
],
[
"▁a",
-4.848692763930883
],
[
"ra",
-4.896507664191766
],
[
"j",
-4.904878941980023
],
[
"h",
-4.950500923113031
],
[
"ho",
-4.953880215501709
],
[
"▁t",
-4.966483225754908
],
[
"ů",
-5.023017042244732
],
[
"▁na",
-5.023736128641509
],
[
"la",
-5.02582210509275
],
[
"▁se",
-5.036911486274294
],
[
"ře",
-5.078248195651179
],
[
"ž",
-5.086796820220469
],
[
"ce",
-5.1503929573363205
],
[
"ně",
-5.152313027900451
],
[
"▁o",
-5.163064953192693
],
[
"ro",
-5.170970372230601
],
[
"c",
-5.185020098469282
],
[
"to",
-5.240845782694485
],
[
"li",
-5.243891142159361
],
[
"te",
-5.276359647893782
],
[
"ni",
-5.3142533038567255
],
[
"▁k",
-5.333631613909036
],
[
"ou",
-5.335616010850735
],
[
"ý",
-5.339749960583564
],
[
"š",
-5.344368939067419
],
[
"▁ne",
-5.345719541688208
],
[
"ná",
-5.370142507492488
],
[
"lo",
-5.389693120322628
],
[
"ko",
-5.412780857659412
],
[
"▁d",
-5.415369018363141
],
[
"▁do",
-5.419837728390086
],
[
"va",
-5.419908818761444
],
[
"▁b",
-5.459566701869113
],
[
"na",
-5.483856512778948
],
[
"▁pro",
-5.491615520219384
],
[
"en",
-5.491632521325277
],
[
"g",
-5.567507094495232
],
[
"ř",
-5.626252535860573
],
[
"▁za",
-5.653361024454775
],
[
"em",
-5.671828012153959
],
[
"de",
-5.693742555724979
],
[
"vě",
-5.712634198060308
],
[
"né",
-5.725682521576861
],
[
"▁byl",
-5.7431856144338225
],
[
"ka",
-5.7564812233976745
],
[
"me",
-5.758932979560627
],
[
"ti",
-5.777689686394815
],
[
"ku",
-5.788214406875806
],
[
"▁ve",
-5.81788332162581
],
[
"▁h",
-5.828194482484325
],
[
"ci",
-5.8685364146133905
],
[
"▁ob",
-5.8795942338441325
],
[
"in",
-5.884429064451998
],
[
"▁vy",
-5.8904733193792165
],
[
"ze",
-5.900522297779989
],
[
"jí",
-5.900655871395716
],
[
"mi",
-5.959336618342466
],
[
"ky",
-5.962819212563628
],
[
"ší",
-5.982128060719085
],
[
",",
-5.997687410249881
],
[
"ji",
-6.039547006190626
],
[
"če",
-6.077300475105323
],
[
"že",
-6.082644219754787
],
[
"ské",
-6.084047594823979
],
[
"▁to",
-6.088417119613753
],
[
"▁u",
-6.091282547292413
],
[
"ný",
-6.092147356490781
],
[
"tu",
-6.122822759385493
],
[
"ži",
-6.175854456949489
],
[
"cí",
-6.194086173958022
],
[
"ny",
-6.2341268686471745
],
[
"▁vý",
-6.254300631687474
],
[
"ří",
-6.290109885002002
],
[
"ú",
-6.30229702449217
],
[
"oval",
-6.336053437150435
],
[
"f",
-6.365365744692261
],
[
"▁ma",
-6.544821545721467
],
[
"ové",
-6.585392005562145
],
[
"▁roz",
-6.672603700078633
],
[
"ován",
-6.740525371780512
],
[
"ský",
-6.812595149271572
],
[
"ické",
-6.917033038816811
],
[
"▁jsou",
-6.934163947080046
],
[
"\"",
-7.035184436268866
],
[
"x",
-7.189480387453991
],
[
"ň",
-7.258982009622979
],
[
"▁však",
-7.283309637010308
],
[
"?",
-7.502227999964619
],
[
"w",
-7.629618841988569
],
[
"▁několik",
-7.684828878204912
],
[
"▁evropsk",
-7.925232247421272
],
[
"ť",
-7.995589402202159
],
[
"ď",
-8.106278989112981
],
[
"ó",
-8.153162468932514
],
[
"!",
-8.485375997236162
],
[
"“",
-8.892698672686397
],
[
"-",
-9.90566296898728
],
[
"–",
-10.188199986670291
],
[
";",
-10.8609474862132
],
[
"ö",
-11.035189910455998
],
[
"ü",
-11.371301021579455
],
[
"/",
-12.46415816443661
],
[
"è",
-12.464158164476563
],
[
"ä",
-12.964158164476554
],
[
":",
-12.964158164476563
],
[
"„",
-13.964058164476564
],
[
"ï",
-13.964158164476563
],
[
"—",
-13.964158164476563
],
[
"q",
-13.964158164476563
],
[
"…",
-13.964158164476563
]
]
}
}