Transformers
uni200_cz / tokenizer.json
Lakoc's picture
Upload tokenizer
a6539ab verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "([bos])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "([eos])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "([unk])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "([pad])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "([mask])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "▁",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true,
"prepend_scheme": "always"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "([eos])",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "([eos])",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "([eos])",
"type_id": 1
}
}
],
"special_tokens": {
"([bos])": {
"id": "([bos])",
"ids": [
0
],
"tokens": [
"([bos])"
]
},
"([eos])": {
"id": "([eos])",
"ids": [
1
],
"tokens": [
"([eos])"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true,
"prepend_scheme": "always"
},
"model": {
"type": "Unigram",
"unk_id": 2,
"vocab": [
[
"([bos])",
0.0
],
[
"([eos])",
0.0
],
[
"([unk])",
0.0
],
[
"([pad])",
0.0
],
[
"([mask])",
0.0
],
[
"▁",
-2.499128778587856
],
[
"t",
-3.0865028844963014
],
[
"a",
-3.102306583394892
],
[
"o",
-3.326730424192313
],
[
"i",
-3.374887230699951
],
[
"d",
-3.4134162812057696
],
[
"m",
-3.421874311567425
],
[
"u",
-3.442879920624332
],
[
"s",
-3.6156725640851057
],
[
"k",
-3.668304993391647
],
[
"v",
-3.7592353524389495
],
[
"l",
-3.776068399269126
],
[
"n",
-3.895056384688768
],
[
"y",
-3.9000882135364208
],
[
"r",
-3.9586559073105274
],
[
"á",
-3.977549305378384
],
[
"e",
-4.143739257791065
],
[
"ní",
-4.2675180833194695
],
[
"ě",
-4.275882393493827
],
[
"í",
-4.322622048348919
],
[
"▁v",
-4.3677298638910855
],
[
"▁s",
-4.486084813895594
],
[
"ch",
-4.562045841945379
],
[
"z",
-4.642799812536991
],
[
"b",
-4.735980941671054
],
[
"p",
-4.782606605999646
],
[
"▁je",
-4.8109069611846405
],
[
"▁z",
-4.811171361391686
],
[
"č",
-4.819101692533103
],
[
"h",
-4.875324390639827
],
[
"ž",
-4.9469286061000215
],
[
"▁po",
-4.979733480765422
],
[
"▁k",
-4.983058318355871
],
[
"ho",
-5.014770710841287
],
[
"ně",
-5.027004448341966
],
[
"é",
-5.033922594851894
],
[
"▁se",
-5.070242550353601
],
[
"ů",
-5.076093696552347
],
[
"▁ne",
-5.07850315539042
],
[
"▁to",
-5.080683825550732
],
[
"▁na",
-5.082729427444132
],
[
"le",
-5.084931708228693
],
[
"j",
-5.092860839372255
],
[
"te",
-5.111849641292974
],
[
"ce",
-5.167893764093787
],
[
"to",
-5.201502454357334
],
[
"c",
-5.211751293203864
],
[
"la",
-5.220908690744487
],
[
"li",
-5.228401300961396
],
[
"▁pro",
-5.28909325560814
],
[
"š",
-5.336974677710405
],
[
"ou",
-5.361735088962444
],
[
"ra",
-5.373997695589459
],
[
"▁b",
-5.377170666366489
],
[
"ná",
-5.402067051724565
],
[
"▁do",
-5.409550696098668
],
[
"ý",
-5.499362912761228
],
[
"▁p",
-5.519592931848663
],
[
"no",
-5.527056838004395
],
[
"ro",
-5.569979277623883
],
[
"me",
-5.59098806652543
],
[
"▁za",
-5.595510922449559
],
[
"na",
-5.607319989704424
],
[
"je",
-5.677917904510625
],
[
"▁tak",
-5.693228345649542
],
[
"▁že",
-5.706112027218516
],
[
"ře",
-5.729075184880374
],
[
"po",
-5.743790949622708
],
[
"né",
-5.792051087494318
],
[
"g",
-5.792962802749521
],
[
"že",
-5.855186781283246
],
[
"ř",
-5.860803439108823
],
[
"ci",
-5.886413579381182
],
[
"vá",
-5.918579561871162
],
[
"ji",
-5.937103563893679
],
[
"▁pan",
-5.9859932213789
],
[
"še",
-5.994004038829976
],
[
"em",
-6.02749613036484
],
[
"jí",
-6.041999375202467
],
[
"ze",
-6.063005453953373
],
[
"ří",
-6.07363167284074
],
[
"če",
-6.08508852523804
],
[
"en",
-6.106917713545144
],
[
"▁byl",
-6.114855848806117
],
[
"▁pr",
-6.14395743369564
],
[
"▁ob",
-6.153504291755276
],
[
"ne",
-6.212989784008883
],
[
"ost",
-6.225184959112985
],
[
"▁jak",
-6.262777892874428
],
[
"cí",
-6.267809387408082
],
[
"▁při",
-6.27486814065419
],
[
"▁ú",
-6.315676344807086
],
[
"▁pod",
-6.316888069486108
],
[
"▁ve",
-6.321506938300814
],
[
"ých",
-6.326993152725493
],
[
"prav",
-6.338184621708324
],
[
"▁od",
-6.362887996804556
],
[
"ské",
-6.393272592934666
],
[
"ší",
-6.427460772202348
],
[
"ný",
-6.437086239901353
],
[
"▁f",
-6.437643247780519
],
[
"▁roz",
-6.456534287022377
],
[
"f",
-6.477355866487592
],
[
"ové",
-6.503588160892026
],
[
"ovat",
-6.507950646600429
],
[
"nost",
-6.524392146806889
],
[
"▁návrh",
-6.698396879769346
],
[
"▁de",
-6.713844618946004
],
[
"▁já",
-6.724048869017867
],
[
"▁má",
-6.731218690484539
],
[
"pe",
-6.740420329428542
],
[
"oval",
-6.752251425595066
],
[
"▁ale",
-6.753329701931678
],
[
"▁re",
-6.757130161018704
],
[
"▁aby",
-6.758720514734467
],
[
"▁sta",
-6.763339994105991
],
[
"▁pře",
-6.763952362064915
],
[
"nic",
-6.8317726182978475
],
[
"▁dva",
-6.897256310834436
],
[
"▁děkuji",
-6.945219659498331
],
[
"▁ten",
-6.949572680904785
],
[
"▁jsou",
-6.985344835550627
],
[
"▁jsem",
-6.990030017773023
],
[
"ň",
-7.018524063200246
],
[
"▁tady",
-7.024088728206813
],
[
"▁které",
-7.063595703065953
],
[
"▁bude",
-7.081398605215856
],
[
"▁zákon",
-7.138727001503309
],
[
"▁poslanec",
-7.203720232189541
],
[
"▁vlád",
-7.221688399966633
],
[
"▁který",
-7.238068816091342
],
[
"▁stát",
-7.238194522495668
],
[
"▁práv",
-7.260602338552209
],
[
"▁evrop",
-7.286883488740745
],
[
"▁jsme",
-7.291759729985696
],
[
"▁tři",
-7.310567645199065
],
[
"▁slov",
-7.336481848322062
],
[
"ické",
-7.392674139880333
],
[
"▁koleg",
-7.44231294147434
],
[
"x",
-7.453349163224733
],
[
"▁další",
-7.49134954045496
],
[
"▁výbor",
-7.558279881882326
],
[
"▁prosím",
-7.591223981658731
],
[
"▁poslanc",
-7.6476797215772
],
[
"▁druh",
-7.653909244894827
],
[
"▁ministr",
-7.660789503917892
],
[
"▁tisíc",
-7.66907435259613
],
[
"▁vážen",
-7.67107747420704
],
[
"▁musí",
-7.696587095874426
],
[
"▁možn",
-7.722404407417795
],
[
"ď",
-7.725015204220382
],
[
"▁důvod",
-7.799889505470269
],
[
"ť",
-7.803622958869749
],
[
"▁nějak",
-7.829075727010471
],
[
"▁čtyři",
-7.838153829020333
],
[
"▁myslím",
-7.882124621357049
],
[
"prostřed",
-7.940183185917483
],
[
"▁hlasování",
-7.998003495858967
],
[
"▁samozřejmě",
-8.052877830462736
],
[
"▁republik",
-8.197430485355493
],
[
"▁faktick",
-8.20619531687208
],
[
"▁ministerstv",
-8.277630046772881
],
[
"▁poslankyně",
-8.311690756845529
],
[
"▁pozměňovací",
-8.41157551431289
],
[
"w",
-8.5483905200163
],
[
"ó",
-8.840820959172692
],
[
"▁místopředsed",
-8.910689398104918
],
[
"ú",
-8.934579379610787
],
[
"“",
-10.298399657807384
],
[
"-",
-11.228961190328617
],
[
"q",
-11.615106119552989
],
[
"–",
-12.263928853040472
],
[
"ö",
-12.573028596778729
],
[
"ä",
-13.753554843340815
],
[
"ő",
-15.801294500537251
],
[
"ń",
-16.301294500537253
],
[
"à",
-16.30129450053726
],
[
"ľ",
-16.30129450053726
],
[
"ã",
-16.30129450053726
],
[
"ă",
-16.30129450053726
],
[
"ø",
-16.30129450053726
],
[
"…",
-16.30129450053726
],
[
"„",
-17.30089450053726
],
[
"ü",
-17.30099450053726
],
[
"ç",
-17.30109450053726
],
[
"è",
-17.30119450053726
],
[
"å",
-17.30129450049731
],
[
"ĺ",
-17.301294500537246
],
[
"ć",
-17.30129450053726
],
[
"ì",
-17.30129450053726
],
[
"ï",
-17.30129450053726
],
[
"ė",
-17.30129450053726
],
[
"ţ",
-17.30129450053726
],
[
"ş",
-17.30129450053726
],
[
"—",
-17.30129450053726
]
],
"byte_fallback": false
}
}