cz_ec_uni250 / tokenizer.json
iszoke's picture
Upload tokenizer
8a88188 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "([bos])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "([eos])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "([unk])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "([pad])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "([mask])",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 247,
"content": "(LNG)",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 248,
"content": "(UNK)",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 249,
"content": "(SPN)",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true,
"prepend_scheme": "always"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "([eos])",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "([eos])",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "([eos])",
"type_id": 1
}
}
],
"special_tokens": {
"([bos])": {
"id": "([bos])",
"ids": [
0
],
"tokens": [
"([bos])"
]
},
"([eos])": {
"id": "([eos])",
"ids": [
1
],
"tokens": [
"([eos])"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true,
"prepend_scheme": "always"
},
"model": {
"type": "Unigram",
"unk_id": 2,
"vocab": [
[
"([bos])",
0.0
],
[
"([eos])",
0.0
],
[
"([unk])",
0.0
],
[
"([pad])",
0.0
],
[
"([mask])",
0.0
],
[
"▁",
-1.9847706990161544
],
[
"a",
-3.1314497930271177
],
[
"t",
-3.1881965015958187
],
[
"i",
-3.4243963619008255
],
[
"d",
-3.5014511936716186
],
[
"o",
-3.5274805409389582
],
[
"m",
-3.558661298685198
],
[
"u",
-3.625320681110813
],
[
"s",
-3.712394209496356
],
[
"v",
-3.72415747794809
],
[
",",
-3.805334281306008
],
[
"k",
-3.823407487612606
],
[
"á",
-4.06402185490056
],
[
"e",
-4.118975276125365
],
[
"r",
-4.1561955333170815
],
[
"l",
-4.207310751159483
],
[
"n",
-4.260244529295308
],
[
"y",
-4.279903492234059
],
[
"í",
-4.4102129224806585
],
[
"ě",
-4.502923994115825
],
[
"p",
-4.532547889978606
],
[
"▁v",
-4.559246971264237
],
[
"ní",
-4.566408642453595
],
[
".",
-4.579478426041845
],
[
"ch",
-4.7060160462408955
],
[
"b",
-4.7267873231612505
],
[
"z",
-4.741376547385773
],
[
"▁s",
-4.7959555301926695
],
[
"▁po",
-4.890977951755792
],
[
"▁z",
-4.9117950878941965
],
[
"j",
-4.983830676165416
],
[
"é",
-4.997351240560704
],
[
"li",
-5.030678383363137
],
[
"c",
-5.0396387435626995
],
[
"ž",
-5.049142114448735
],
[
"to",
-5.057673228667184
],
[
"h",
-5.079586658466628
],
[
"▁je",
-5.113720460724675
],
[
"me",
-5.1181371558896736
],
[
"▁na",
-5.127083114594628
],
[
"▁pro",
-5.147757595304579
],
[
"č",
-5.179515263964058
],
[
"ů",
-5.200945320530458
],
[
"ho",
-5.221618751844693
],
[
"le",
-5.2250844030131365
],
[
"ně",
-5.27972704953069
],
[
"ce",
-5.281994589031864
],
[
"▁ne",
-5.308616451754174
],
[
"ý",
-5.31697552150202
],
[
"▁se",
-5.337009578506832
],
[
"la",
-5.338199582903597
],
[
"ni",
-5.339196490125113
],
[
"▁to",
-5.349248427402005
],
[
"te",
-5.381248563144281
],
[
"ra",
-5.498126975121622
],
[
"š",
-5.515959757283252
],
[
"pr",
-5.6046024850544
],
[
"▁za",
-5.620837358908096
],
[
"▁do",
-5.620997292088125
],
[
"ro",
-5.62869520219124
],
[
"g",
-5.636274983240279
],
[
"en",
-5.672796438052734
],
[
"ná",
-5.6936132171970435
],
[
"ou",
-5.705221838268827
],
[
"po",
-5.731479162812082
],
[
"ci",
-5.737597320989661
],
[
"▁k",
-5.739511554705135
],
[
"ře",
-5.750324760230148
],
[
"em",
-5.7763291411793904
],
[
"▁že",
-5.826140091837006
],
[
"mi",
-5.832772778061484
],
[
"na",
-5.836487492053504
],
[
"lo",
-5.843302017381193
],
[
"f",
-5.863906577570942
],
[
"vá",
-5.896441383118669
],
[
"ne",
-5.970429860963618
],
[
"né",
-6.012839546213854
],
[
"ř",
-6.0240161726213195
],
[
"jí",
-6.0667261910328385
],
[
"ji",
-6.0952373324629345
],
[
"ří",
-6.233171041707054
],
[
")",
-6.275676190119125
],
[
"ze",
-6.28944574316632
],
[
"ovat",
-6.332103824255016
],
[
"ých",
-6.338664587271952
],
[
"▁tak",
-6.3416936938172075
],
[
"▁jak",
-6.355716899830018
],
[
"še",
-6.358544883928106
],
[
"kon",
-6.362030472567895
],
[
"ské",
-6.372589277660225
],
[
"cí",
-6.396436911266985
],
[
"▁Evrop",
-6.396990124349596
],
[
"ost",
-6.415178844816895
],
[
"▁ob",
-6.4345557525303665
],
[
"▁při",
-6.453429679947529
],
[
"K",
-6.557669972049428
],
[
"▁byl",
-6.5913081200567305
],
[
"ú",
-6.608159467526681
],
[
"▁by",
-6.666809422458517
],
[
"▁děkuj",
-6.724698564420603
],
[
"▁aby",
-6.7258358147743795
],
[
"▁pří",
-6.764730143329496
],
[
"ové",
-6.787307118513603
],
[
"str",
-6.844449647445437
],
[
"▁roz",
-6.859666898290389
],
[
"▁které",
-6.884647178230626
],
[
"▁bude",
-6.909815247130545
],
[
"P",
-6.91499677672085
],
[
"▁práv",
-6.925378846074215
],
[
"ální",
-7.001369471478936
],
[
"2",
-7.002935587142833
],
[
"UN",
-7.008632740292555
],
[
"oval",
-7.0155367155397705
],
[
"▁stát",
-7.030364051450455
],
[
"S",
-7.056765638137547
],
[
"0",
-7.090000239807136
],
[
"▁paní",
-7.138568988225609
],
[
"bycho",
-7.1887860518667495
],
[
"1",
-7.204853513146585
],
[
"ň",
-7.207931701488759
],
[
"potřeb",
-7.285106413483476
],
[
"x",
-7.315223767308989
],
[
"E",
-7.375340704041776
],
[
"?",
-7.386826804856964
],
[
"LNG",
-7.3929905598606975
],
[
"R",
-7.401684079434748
],
[
"▁další",
-7.420579256632934
],
[
"▁koleg",
-7.428233796020805
],
[
"▁návrh",
-7.448936921677545
],
[
"-",
-7.472487244726157
],
[
"ď",
-7.533773150491692
],
[
"▁který",
-7.537522052818428
],
[
"▁budou",
-7.575344678712463
],
[
"▁občan",
-7.594603707059271
],
[
"A",
-7.615608685547574
],
[
"M",
-7.653390606972009
],
[
"B",
-7.6553176992492755
],
[
"▁společn",
-7.670544502769873
],
[
"U",
-7.67580272611945
],
[
"▁hlasování",
-7.707787867541219
],
[
"3",
-7.88204425480434
],
[
"▁komisař",
-7.930804195673383
],
[
"▁myslím",
-7.947026544282705
],
[
"▁parlament",
-8.01146461330666
],
[
"▁skutečn",
-8.050309277818695
],
[
"N",
-8.097239230405401
],
[
"5",
-8.105742503400506
],
[
"T",
-8.121502869949357
],
[
"C",
-8.176809220997738
],
[
"L",
-8.187699548052356
],
[
"▁důležité",
-8.214330862977269
],
[
"▁samozřejm",
-8.238805618383932
],
[
"I",
-8.29023538966668
],
[
"V",
-8.303056077697512
],
[
"4",
-8.304901096612927
],
[
"ť",
-8.317912432172614
],
[
"D",
-8.335528489527377
],
[
"O",
-8.350878966312928
],
[
"F",
-8.414064871470213
],
[
"Č",
-8.44899681851851
],
[
"H",
-8.500046908605349
],
[
"G",
-8.523514969953006
],
[
"6",
-8.57862541478596
],
[
"7",
-8.59906766269739
],
[
"J",
-8.64470295468657
],
[
"9",
-8.679007107657972
],
[
"ó",
-8.721975323121864
],
[
"8",
-8.76492085037819
],
[
"w",
-8.814906263661838
],
[
"(",
-8.961548871720103
],
[
"Z",
-9.1304809676736
],
[
"*",
-9.405361687806153
],
[
"W",
-9.730428224989948
],
[
"Ř",
-10.146836119011482
],
[
"%",
-10.215135657592311
],
[
"!",
-10.607176887842284
],
[
"X",
-10.677269027212253
],
[
"Ú",
-10.88161257963682
],
[
"Y",
-11.097694614772804
],
[
"ö",
-11.595201719460682
],
[
"ü",
-12.021529093476044
],
[
"ń",
-12.15686214196699
],
[
":",
-12.2478269369759
],
[
"ä",
-12.31341833482536
],
[
"–",
-12.38361537916034
],
[
"Q",
-12.499113954658911
],
[
"ă",
-12.72733247526874
],
[
";",
-12.894343139183434
],
[
"'",
-13.023509805850113
],
[
"è",
-13.094938377278702
],
[
"ñ",
-13.094938377278703
],
[
"ł",
-13.171861454201824
],
[
"ś",
-13.346103878444616
],
[
"’",
-13.446103878445394
],
[
"ç",
-13.55721498955898
],
[
"+",
-13.682214989568084
],
[
"î",
-13.682214989568084
],
[
"ę",
-13.825072132425229
],
[
"ş",
-13.825072132465184
],
[
"/",
-13.825072132465186
],
[
"Ť",
-13.991738799131848
],
[
"ė",
-14.191738799131851
],
[
"ê",
-14.775072132425228
],
[
"â",
-14.775072132465183
],
[
"Ě",
-14.775072132465183
],
[
"ț",
-14.775072132465183
],
[
"ż",
-14.775072132465183
],
[
"Ș",
-15.275072132425244
],
[
"´",
-15.27507213246516
],
[
"§",
-15.275072132465189
],
[
"”",
-15.275072132465189
],
[
"ë",
-15.275072132465189
],
[
"​",
-15.275072132465189
],
[
"ī",
-15.275072132465189
],
[
"ß",
-16.273572132465187
],
[
"Ş",
-16.273672132465187
],
[
"Ó",
-16.273772132465187
],
[
"Ł",
-16.273872132465186
],
[
"ã",
-16.273972132465186
],
[
"Í",
-16.274072132465186
],
[
"Š",
-16.274172132465186
],
[
"ć",
-16.274272132465185
],
[
"ź",
-16.27437213246519
],
[
"ţ",
-16.27447213246519
],
[
"“",
-16.274572132465188
],
[
"q",
-16.274672132465188
],
[
"ï",
-16.274772132465188
],
[
"Á",
-16.274872132465187
],
[
"ș",
-16.274972132465187
],
[
"Ö",
-16.275072132425237
],
[
"ā",
-16.275072132425237
],
[
"É",
-16.275072132465176
],
[
"Ž",
-16.275072132465187
],
[
"—",
-16.275072132465187
],
[
"¨",
-16.275072132465187
],
[
"°",
-16.275072132465187
],
[
"ū",
-16.275072132465187
],
[
"Ď",
-16.275072132465187
],
[
"»",
-16.275072132465187
],
[
"…",
-16.275072132465187
],
[
"«",
-16.275072132465187
],
[
"‑",
-16.275072132465187
]
],
"byte_fallback": false
}
}