{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "([bos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "([eos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "([unk])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "([pad])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "([mask])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true, "prepend_scheme": "always" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "([eos])", "type_id": 1 } } ], "special_tokens": { "([bos])": { "id": "([bos])", "ids": [ 0 ], "tokens": [ "([bos])" ] }, "([eos])": { "id": "([eos])", "ids": [ 1 ], "tokens": [ "([eos])" ] } } }, "decoder": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true, "prepend_scheme": "always" }, "model": { "type": "Unigram", "unk_id": 2, "vocab": [ [ "([bos])", 0.0 ], [ "([eos])", 0.0 ], [ "([unk])", 0.0 ], [ "([pad])", 0.0 ], [ "([mask])", 0.0 ], [ "s", -2.7087862142460697 ], [ "▁", -3.159713564956691 ], [ "t", -3.247934739073319 ], [ "e", -3.284542651100784 ], [ "▁the", -3.458398141767942 ], [ "d", -3.5931812444772806 ], [ "a", -3.6366062534936177 ], [ "i", -3.8026230161282193 ], [ "r", -3.894013105983418 ], [ "n", -3.9598806426474606 ], [ "o", -4.108208965984497 ], [ "▁a", -4.123017148530064 ], [ "l", -4.143189339085515 ], [ "ed", -4.185019838756677 ], [ "y", -4.417883797200398 ], [ "▁of", -4.418687866578182 ], [ "▁in", -4.419989440610058 ], [ "c", -4.4928092728880475 ], [ "m", -4.549063923185756 ], [ "p", -4.632586952754286 ], [ "▁to", -4.718095797580903 ], [ "ing", -4.718473047785871 ], [ "▁is", -4.728594972551644 ], [ "b", -4.766989553904384 ], [ "▁and", -4.77880181362533 ], [ "g", -4.8105314868809845 ], [ "er", -4.838757388268354 ], [ "in", -4.8881845890278015 ], [ "u", -4.926181255679008 ], [ "f", -4.950396547385262 ], [ "▁was", -4.968557781850629 ], [ "al", -5.0589039874057224 ], [ "▁he", -5.063578475115438 ], [ "le", -5.081968948387759 ], [ "or", -5.12773130976289 ], [ "an", -5.154499608378208 ], [ "ar", -5.228999932874652 ], [ "h", -5.239710728899752 ], [ "en", -5.276913049118749 ], [ "on", -5.287257314930473 ], [ "▁re", -5.29170807429036 ], [ "re", -5.376911310235307 ], [ "w", -5.37736139685706 ], [ "▁it", -5.438829496188923 ], [ "k", -5.44315681092983 ], [ "ly", -5.450308027570038 ], [ "it", -5.450545322048562 ], [ "▁be", -5.468488006995733 ], [ "ch", -5.521802761779233 ], [ "▁for", -5.548513141901553 ], [ "is", -5.596158258362125 ], [ "ic", -5.635561935799826 ], [ "▁are", -5.665579329263476 ], [ "▁w", -5.671906510671381 ], [ "ter", -5.710373423850561 ], [ "ur", -5.712613548234113 ], [ "ve", -5.795164697207461 ], [ "▁his", -5.81351636418619 ], [ "▁de", -5.848193572865624 ], [ "▁g", -5.858931530242998 ], [ "-", -5.865170099410005 ], [ "th", -5.912270797510585 ], [ "v", -5.916207085762094 ], [ "▁ma", -5.925417651109351 ], [ "▁on", -5.928735685698497 ], [ "▁by", -5.929119253111821 ], [ "▁with", -5.939965393017241 ], [ "ce", -5.944001428238662 ], [ "▁this", -5.9817138264670735 ], [ "▁also", -5.983956225982473 ], [ "▁di", -5.98632093594669 ], [ "ation", -5.989407901945238 ], [ "▁se", -5.991591319651997 ], [ "ck", -6.069032952442802 ], [ "▁ba", -6.076568601953877 ], [ "ent", -6.089212892005653 ], [ "▁con", -6.111962249346897 ], [ "ng", -6.143275412549821 ], [ "▁as", -6.147747034810779 ], [ "▁an", -6.1691273738919055 ], [ "▁mo", -6.176024423820149 ], [ "ul", -6.193235263628999 ], [ "▁co", -6.207722089411254 ], [ "▁po", -6.236389870862725 ], [ "▁li", -6.244125340728081 ], [ "▁c", -6.24659314699014 ], [ "▁so", -6.2584892075074325 ], [ "z", -6.262350036105108 ], [ "▁me", -6.265813448196459 ], [ "▁pa", -6.274214368513942 ], [ "ke", -6.284788373891308 ], [ "ll", -6.29727599976812 ], [ "vi", -6.297314723995699 ], [ "▁su", -6.312873884063279 ], [ "▁ch", -6.316733133214962 ], [ "▁were", -6.317001674657302 ], [ "ion", -6.32961679439499 ], [ "ge", -6.365805102591967 ], [ "▁at", -6.366347577865874 ], [ "▁lo", -6.368448533632174 ], [ "▁ro", -6.379973682438701 ], [ "▁has", -6.381738025259727 ], [ "ment", -6.385379137103088 ], [ "▁k", -6.386969828185796 ], [ "ver", -6.38992638519708 ], [ "▁bo", -6.4113570767182395 ], [ "un", -6.413821832531996 ], [ "▁le", -6.416708463357619 ], [ "▁from", -6.422667892332576 ], [ "ate", -6.4282492634575 ], [ "▁fa", -6.432844147368696 ], [ "x", -6.433501289905884 ], [ "am", -6.436652615309573 ], [ "▁ha", -6.439630948443641 ], [ "▁ex", -6.444623596405842 ], [ "ow", -6.444696534920828 ], [ "est", -6.445636255562283 ], [ "▁that", -6.455257224426214 ], [ "▁ca", -6.455280496823114 ], [ "▁she", -6.47833123379923 ], [ "▁la", -6.48007668633581 ], [ "at", -6.495527161682395 ], [ "us", -6.51192066539333 ], [ "ies", -6.514049205315091 ], [ "▁fi", -6.52545677841473 ], [ "▁sp", -6.52685820151644 ], [ "▁pro", -6.528963178196921 ], [ "mp", -6.538279720909209 ], [ "▁not", -6.54785989826822 ], [ "▁ho", -6.563725279244 ], [ "▁ne", -6.573235986746392 ], [ "▁sta", -6.57398357678378 ], [ "▁ra", -6.5807826647107 ], [ "▁th", -6.58745411057925 ], [ "▁do", -6.5949293099703254 ], [ "ut", -6.6132785145605 ], [ "om", -6.629934340802123 ], [ "ive", -6.641840396749629 ], [ "▁no", -6.653866998404455 ], [ "▁or", -6.655009632782145 ], [ "▁mi", -6.65771141558985 ], [ "▁sh", -6.661553283522425 ], [ "im", -6.664912974933774 ], [ "lo", -6.683941877897254 ], [ "per", -6.6936353152327115 ], [ "ther", -6.724802931319967 ], [ "▁un", -6.738575581758305 ], [ "▁fr", -6.738822448715048 ], [ "ide", -6.740660495044171 ], [ "ers", -6.7432945338476475 ], [ "ry", -6.746615494127802 ], [ "▁her", -6.747959730078813 ], [ "qu", -6.754441987038513 ], [ "ight", -6.756453568565393 ], [ "tion", -6.763906947562896 ], [ "▁have", -6.7661222733482465 ], [ "▁two", -6.769677687251608 ], [ "man", -6.778532527455926 ], [ "ph", -6.788187691225737 ], [ "ated", -6.791664740264787 ], [ "land", -6.806277135033374 ], [ "pp", -6.812916060110025 ], [ "▁wa", -6.815750988179804 ], [ "▁can", -6.819250477743541 ], [ "um", -6.855212810060099 ], [ "▁all", -6.863064196733795 ], [ "▁one", -6.883919766877581 ], [ "ally", -6.894574461770981 ], [ "▁we", -6.899248968878656 ], [ "▁there", -6.900372415768 ], [ "▁go", -6.901571026299864 ], [ "if", -6.9039358488068565 ], [ "▁i", -6.90424414037885 ], [ "▁you", -6.912114395683812 ], [ "▁they", -6.919467977755152 ], [ "▁part", -6.9361168002465785 ], [ "▁name", -6.941953920415864 ], [ "▁bu", -6.944066485594995 ], [ "ian", -6.963794923353964 ], [ "tic", -6.970500057495697 ], [ "▁play", -6.9790510011425315 ], [ "▁pre", -6.981428022342056 ], [ "▁com", -6.987803687852885 ], [ "ction", -7.000168263913396 ], [ "▁had", -7.011058670335936 ], [ "▁new", -7.024996664700733 ], [ "age", -7.025595740271836 ], [ "▁vi", -7.027038997638414 ], [ "ous", -7.028303776740474 ], [ "▁mu", -7.029504161358892 ], [ "▁tra", -7.038938213895344 ], [ "▁after", -7.044631576412625 ], [ "▁first", -7.056126055402661 ], [ "lu", -7.062308639897436 ], [ "▁been", -7.064865376182913 ], [ "▁comp", -7.066245780002827 ], [ "▁fe", -7.079193763985936 ], [ "ity", -7.080227628541172 ], [ "ial", -7.080810993511831 ], [ "hi", -7.084277518187218 ], [ "vo", -7.093572325435632 ], [ "▁school", -7.09957083097669 ], [ "▁ar", -7.105511095191675 ], [ "▁fl", -7.1078982175516074 ], [ "▁their", -7.1150041203253735 ], [ "ance", -7.1176754041850945 ], [ "▁cl", -7.118630542416138 ], [ "▁year", -7.120028089606139 ], [ "ition", -7.124910466647542 ], [ "▁its", -7.1355199302022445 ], [ "ical", -7.1428101778764095 ], [ "▁work", -7.144270629429425 ], [ "ence", -7.144764437238724 ], [ "▁other", -7.147743511062384 ], [ "▁str", -7.15178771128703 ], [ "▁car", -7.160388944412565 ], [ "min", -7.166712819426699 ], [ "▁some", -7.172070182565264 ], [ "▁time", -7.18110890535751 ], [ "ture", -7.186357992243128 ], [ "j", -7.190662353563619 ], [ "tri", -7.198171903624486 ], [ "▁mar", -7.202127789306614 ], [ "▁sc", -7.209850737464274 ], [ "▁pri", -7.2180317110096865 ], [ "ard", -7.221767758579366 ], [ "ill", -7.235003472921441 ], [ "tro", -7.243444688167287 ], [ "ary", -7.243461404640545 ], [ "port", -7.281185317840659 ], [ "cu", -7.287158066523228 ], [ "▁man", -7.291792484051358 ], [ "ell", -7.291801906698197 ], [ "day", -7.301005768455026 ], [ "▁ru", -7.303300816069839 ], [ "▁up", -7.308648049018128 ], [ "▁bi", -7.320100963035362 ], [ "▁count", -7.320844720582148 ], [ "ugh", -7.325578420982355 ], [ "way", -7.325586345677872 ], [ "▁these", -7.326853317808698 ], [ "▁but", -7.327670200652525 ], [ "▁most", -7.345183341289596 ], [ "▁later", -7.35122830418946 ], [ "▁pe", -7.357512849956709 ], [ "▁bro", -7.366609334491269 ], [ "▁fu", -7.368805431506818 ], [ "▁many", -7.373823250648902 ], [ "▁va", -7.375833538931573 ], [ "▁out", -7.381536894704848 ], [ "▁gra", -7.384781529137044 ], [ "▁him", -7.393000388890625 ], [ "able", -7.403376105408167 ], [ "rie", -7.408729531470341 ], [ "▁three", -7.42606380204095 ], [ "▁town", -7.427183101702385 ], [ "▁used", -7.429946275883083 ], [ "cent", -7.430185019770578 ], [ "came", -7.4366302808378 ], [ "pla", -7.441968222063206 ], [ "▁pi", -7.443664632996766 ], [ "▁ju", -7.4494340210691234 ], [ "▁el", -7.459397493055167 ], [ "▁high", -7.462373722776693 ], [ "▁sto", -7.4846231391256595 ], [ "▁however", -7.485133824745827 ], [ "ctor", -7.487695780691876 ], [ "▁jo", -7.495143111156208 ], [ "▁ja", -7.525092312225434 ], [ "▁city", -7.534486590732598 ], [ "▁hu", -7.535640061380375 ], [ "gre", -7.537763191129182 ], [ "ship", -7.541985965754442 ], [ "▁known", -7.560567236477107 ], [ "▁state", -7.569655102775105 ], [ "▁ri", -7.587086252535897 ], [ "ward", -7.590294044961327 ], [ "produc", -7.599503988032209 ], [ "▁into", -7.600409279475954 ], [ "▁over", -7.606763657533852 ], [ "▁will", -7.609507506403235 ], [ "▁born", -7.625680579045742 ], [ "▁pu", -7.634136634897592 ], [ "▁both", -7.638833495872163 ], [ "▁north", -7.640978527524354 ], [ "▁acc", -7.649090219623101 ], [ "▁bri", -7.650634774682942 ], [ "▁several", -7.65607141655474 ], [ "line", -7.674195552978695 ], [ "▁du", -7.674919488957654 ], [ "during", -7.67722606134674 ], [ "▁south", -7.677812087373661 ], [ "▁act", -7.711595022299397 ], [ "▁include", -7.712805810284968 ], [ "▁call", -7.720985117787054 ], [ "▁fo", -7.725039496806479 ], [ "ign", -7.728179065376409 ], [ "▁through", -7.7453947359728765 ], [ "▁four", -7.76434808542699 ], [ "▁found", -7.797665609419493 ], [ "▁large", -7.798877478656337 ], [ "▁film", -7.805622653996474 ], [ "▁under", -7.819562251260857 ], [ "▁would", -7.820400125605291 ], [ "▁who", -7.831715157199552 ], [ "▁located", -7.8341538978384335 ], [ "▁follow", -7.839028375712218 ], [ "▁serve", -7.846505271839957 ], [ "▁music", -7.876533841393858 ], [ "▁member", -7.887007719205323 ], [ "▁made", -7.892064035760038 ], [ "▁game", -7.907399306900546 ], [ "▁when", -7.919178671382307 ], [ "▁team", -7.9396147754433954 ], [ "▁current", -7.9438600228097425 ], [ "▁second", -8.00484816543743 ], [ "▁each", -8.006456901441448 ], [ "▁university", -8.007102176509145 ], [ "▁people", -8.01003188400972 ], [ "▁album", -8.031629388314958 ], [ "▁group", -8.035099266998278 ], [ "▁which", -8.043699536635295 ], [ "▁very", -8.04658253705172 ], [ "▁park", -8.052210607923111 ], [ "▁remain", -8.060677347794147 ], [ "▁record", -8.060683068624387 ], [ "▁house", -8.062960860388575 ], [ "▁plan", -8.063101760194963 ], [ "▁about", -8.066841585630506 ], [ "▁cri", -8.07267117759941 ], [ "▁appear", -8.10014737460207 ], [ "▁opera", -8.10838607600883 ], [ "▁number", -8.123578901880322 ], [ "▁children", -8.126500399957528 ], [ "▁bra", -8.13205521993703 ], [ "▁small", -8.146106911164622 ], [ "▁place", -8.14671201920939 ], [ "▁family", -8.156412204279727 ], [ "▁world", -8.161354359029843 ], [ "▁take", -8.166237382028916 ], [ "self", -8.168209060426628 ], [ "▁public", -8.193281029714244 ], [ "▁still", -8.197891609099521 ], [ "▁what", -8.198255226895526 ], [ "▁old", -8.200644872829253 ], [ "▁local", -8.202687765772456 ], [ "▁national", -8.206761519181546 ], [ "field", -8.208413299368788 ], [ "’", -8.210447064991602 ], [ "▁same", -8.221385696641576 ], [ "▁east", -8.227046672948772 ], [ "▁village", -8.240148070680647 ], [ "▁college", -8.24099685302726 ], [ "▁general", -8.243282309916683 ], [ "▁release", -8.252196761643082 ], [ "▁feature", -8.255292351555504 ], [ "▁said", -8.274305489790384 ], [ "▁service", -8.275617917046828 ], [ "▁develop", -8.278382172206454 ], [ "▁america", -8.28866665312544 ], [ "▁perform", -8.292972514805687 ], [ "▁system", -8.297304797335745 ], [ "▁district", -8.302092271380655 ], [ "▁receive", -8.314701158197984 ], [ "▁building", -8.316371990568316 ], [ "wood", -8.334157204402871 ], [ "▁major", -8.339930648867668 ], [ "▁continue", -8.354719374662293 ], [ "▁before", -8.355901431136036 ], [ "▁church", -8.373864752064762 ], [ "cause", -8.374994523725174 ], [ "▁different", -8.378677909559569 ], [ "▁consider", -8.379644319096371 ], [ "▁return", -8.398236208111976 ], [ "▁married", -8.403561309816 ], [ "▁between", -8.405165503565001 ], [ "▁office", -8.432822246570016 ], [ "▁while", -8.439216722942351 ], [ "▁black", -8.455245051296673 ], [ "▁white", -8.456305352690476 ], [ "▁where", -8.489684984231863 ], [ "▁success", -8.508232115728687 ], [ "▁community", -8.58451658089278 ], [ "▁popular", -8.618055276521458 ], [ "▁government", -8.626466578878379 ], [ "ground", -8.632522706351363 ], [ "▁publish", -8.642585807118772 ], [ "▁around", -8.65245225770258 ], [ "▁character", -8.67372648620668 ], [ "▁daughter", -8.690311524289319 ], [ "▁studie", -8.701632852931988 ], [ "▁student", -8.712422518543383 ], [ "▁language", -8.721007489965347 ], [ "▁written", -8.732011646611262 ], [ "▁english", -8.735706145750054 ], [ "▁availabl", -8.76611508104501 ], [ "▁council", -8.783614175792328 ], [ "▁represent", -8.81992241306137 ], [ "against", -8.828021082322095 ], [ "▁international", -8.828747745472203 ], [ "▁president", -8.830966865525369 ], [ "▁business", -8.83133702015181 ], [ "▁similar", -8.861789608880173 ], [ "▁important", -8.873694541232751 ], [ "▁football", -8.889268058772672 ], [ "▁project", -8.924414574369045 ], [ "▁describe", -8.988608347920229 ], [ "▁construct", -9.021653077852411 ], [ "▁australia", -9.04109987847347 ], [ "▁effect", -9.04247092997062 ], [ "▁subsequent", -9.116983546757709 ], [ "▁california", -9.175316509902466 ], [ "▁independen", -9.19749858554971 ], [ "▁establish", -9.292088040742566 ], [ "”", -9.573625240210962 ], [ "“", -9.591679104840328 ], [ "‘", -9.76603248107811 ], [ "q", -10.074918108580825 ], [ "—", -10.303615482492846 ], [ "é", -11.36889054536739 ], [ "–", -11.648021551968636 ], [ "ü", -11.718563368388722 ], [ "ä", -12.28400621656532 ], [ "ö", -12.491192913680244 ], [ "á", -12.629873544080864 ], [ "í", -12.771700006335152 ], [ "ó", -12.9370180637035 ], [ "ç", -13.31475682611221 ], [ "ß", -13.533785779279093 ], [ "â", -13.533785779279093 ], [ "à", -13.57545244594576 ], [ "ō", -13.929014963803612 ], [ "ú", -13.991514963803615 ], [ "ô", -14.28986661215544 ], [ "ï", -14.3807757030648 ], [ "…", -14.716886814188262 ], [ "ã", -14.716886814188262 ], [ "ł", -14.716886814188264 ], [ "ë", -14.859743957085366 ], [ "ø", -15.026410623712072 ], [ "č", -15.026410623712072 ], [ "ć", -15.026410623752032 ], [ "ă", -15.226410623712075 ], [ "´", -15.476410623712074 ], [ "š", -15.476410623712075 ], [ "î", -15.476410623752033 ], [ "ā", -15.809743957045407 ], [ "ș", -15.809743957045407 ], [ "ò", -15.809743957085365 ], [ "û", -15.809743957085365 ], [ "]", -15.809743957085365 ], [ "ž", -15.809743957085365 ], [ "ş", -16.30974395708536 ], [ "ʻ", -16.309743957085367 ], [ "ř", -16.309743957085367 ], [ "α", -16.309743957085367 ], [ "ı", -16.309743957085367 ], [ "å", -16.309743957085367 ], [ "»", -16.309743957085367 ], [ "·", -17.308943957085365 ], [ "ñ", -17.30904395708537 ], [ "[", -17.309143957085368 ], [ "œ", -17.309243957085368 ], [ "ê", -17.309343957085368 ], [ "ū", -17.309443957085367 ], [ "«", -17.309543957085367 ], [ "è", -17.309643957085367 ], [ "ả", -17.309743957045416 ], [ "尚", -17.30974395708534 ], [ "先", -17.30974395708534 ], [ "ạ", -17.30974395708536 ], [ "π", -17.309743957085367 ], [ "都", -17.309743957085367 ], [ "大", -17.309743957085367 ], [ "€", -17.309743957085367 ], [ "奔", -17.309743957085367 ], [ "נ", -17.309743957085367 ], [ "ň", -17.309743957085367 ], [ "ő", -17.309743957085367 ], [ "„", -17.309743957085367 ], [ "ð", -17.309743957085367 ], [ "ị", -17.309743957085367 ], [ "熊", -17.309743957085367 ], [ "阪", -17.309743957085367 ], [ "生", -17.309743957085367 ], [ "京", -17.309743957085367 ], [ "ý", -17.309743957085367 ], [ "а", -17.309743957085367 ], [ "¡", -17.309743957085367 ], [ "ń", -17.309743957085367 ], [ "χ", -17.309743957085367 ], [ "时", -17.309743957085367 ], [ "→", -17.309743957085367 ], [ "ī", -17.309743957085367 ], [ "ע", -17.309743957085367 ] ], "byte_fallback": false } }