Buckets:

rtrm's picture
download
raw
21.7 kB
import{s as dr,o as cr,n as Wt}from"../chunks/scheduler.7c59faff.js";import{S as fr,i as gr,e as $,s,c as d,h as zr,a as p,d as t,b as o,f as k,g as c,j as w,k as m,l,m as f,t as g,n as z,o as u,p as _}from"../chunks/index.09bb5655.js";import{C as ur,H as D,E as hr}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e5341148.js";import{D as y}from"../chunks/Docstring.493fda50.js";import{T as vr,M as Gt}from"../chunks/TokenizersLanguageContent.0fc17a7a.js";function xr(P){let n,v,r,h,b,F,I="Bytelevel Normalizer",K,C,M,N,S,H,L,i="Lowercase Normalizer",x,B,je,R,Z,Ct,we,Ot="NFC Unicode Normalizer",We,ee,Ge,U,te,yt,Ne,Jt="NFD Unicode Normalizer",Oe,re,Je,j,ne,Tt,Ce,Qt="NFKC Unicode Normalizer",Qe,ae,Xe,W,se,Dt,ye,Xt="NFKD Unicode Normalizer",Ye,oe,Ze,G,le,Pt,Te,Yt="Nmt normalizer",et,ie,tt,T,me,St,De,Zt="Base class for all normalizers",Ft,Pe,er=`This class is not supposed to be instantiated directly. Instead, any implementation of a
Normalizer will return an instance of this class when instantiated.`,Lt,q,$e,Et,Se,tr="Normalize a <code>NormalizedString</code> in-place",It,Fe,rr=`This method allows to modify a <code>NormalizedString</code> to
keep track of the alignment information. If you just want to see the result
of the normalization on a raw string, you can use
<code>normalize_str()</code>`,Bt,V,pe,qt,Le,nr="Normalize the given string",Vt,Ee,ar=`This method provides a way to visualize the effect of a
<a href="/docs/tokenizers/pr_1943/en/api/normalizers#tokenizers.normalizers.Normalizer">Normalizer</a> but it does not keep track of the alignment
information. If you need to get/convert offsets, you can use
<code>normalize()</code>`,rt,de,nt,O,ce,At,Ie,sr=`Precompiled normalizer
Don’t use manually it is used for compatibility for SentencePiece.`,at,fe,st,J,ge,Kt,Be,or="Replace normalizer",ot,ze,lt,Q,ue,Mt,qe,lr=`Allows concatenating multiple other Normalizer as a Sequence.
All the normalizers run in sequence in the given order`,it,he,mt,X,ve,Ht,Ve,ir="Strip normalizer",$t,xe,pt,Y,_e,Rt,Ae,mr="StripAccents normalizer",dt,be,ct,E,ke,Ut,Ke,$r="BertNormalizer",jt,Me,pr=`Takes care of normalizing raw text before giving it to a Bert model.
This includes cleaning the text, handling accents, chinese chars and lowercasing`,ft;return n=new D({props:{title:"ByteLevel",local:"tokenizers.normalizers.ByteLevel",headingTag:"h2"}}),h=new y({props:{name:"class tokenizers.normalizers.ByteLevel",anchor:"tokenizers.normalizers.ByteLevel",parameters:[]}}),C=new D({props:{title:"Lowercase",local:"tokenizers.normalizers.Lowercase",headingTag:"h2"}}),S=new y({props:{name:"class tokenizers.normalizers.Lowercase",anchor:"tokenizers.normalizers.Lowercase",parameters:[]}}),B=new D({props:{title:"NFC",local:"tokenizers.normalizers.NFC",headingTag:"h2"}}),Z=new y({props:{name:"class tokenizers.normalizers.NFC",anchor:"tokenizers.normalizers.NFC",parameters:[]}}),ee=new D({props:{title:"NFD",local:"tokenizers.normalizers.NFD",headingTag:"h2"}}),te=new y({props:{name:"class tokenizers.normalizers.NFD",anchor:"tokenizers.normalizers.NFD",parameters:[]}}),re=new D({props:{title:"NFKC",local:"tokenizers.normalizers.NFKC",headingTag:"h2"}}),ne=new y({props:{name:"class tokenizers.normalizers.NFKC",anchor:"tokenizers.normalizers.NFKC",parameters:[]}}),ae=new D({props:{title:"NFKD",local:"tokenizers.normalizers.NFKD",headingTag:"h2"}}),se=new y({props:{name:"class tokenizers.normalizers.NFKD",anchor:"tokenizers.normalizers.NFKD",parameters:[]}}),oe=new D({props:{title:"Nmt",local:"tokenizers.normalizers.Nmt",headingTag:"h2"}}),le=new y({props:{name:"class tokenizers.normalizers.Nmt",anchor:"tokenizers.normalizers.Nmt",parameters:[]}}),ie=new D({props:{title:"Normalizer",local:"tokenizers.normalizers.Normalizer",headingTag:"h2"}}),me=new y({props:{name:"class tokenizers.normalizers.Normalizer",anchor:"tokenizers.normalizers.Normalizer",parameters:""}}),$e=new y({props:{name:"normalize",anchor:"tokenizers.normalizers.Normalizer.normalize",parameters:[{name:"normalized",val:""}],parametersDescription:[{anchor:"tokenizers.normalizers.Normalizer.normalize.normalized",description:`<strong>normalized</strong> (<code>NormalizedString</code>) &#x2014;
The normalized string on which to apply this
<a href="/docs/tokenizers/pr_1943/en/api/normalizers#tokenizers.normalizers.Normalizer">Normalizer</a>`,name:"normalized"}]}}),pe=new y({props:{name:"normalize_str",anchor:"tokenizers.normalizers.Normalizer.normalize_str",parameters:[{name:"sequence",val:""}],parametersDescription:[{anchor:"tokenizers.normalizers.Normalizer.normalize_str.sequence",description:`<strong>sequence</strong> (<code>str</code>) &#x2014;
A string to normalize`,name:"sequence"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A string after normalization</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>str</code></p>
`}}),de=new D({props:{title:"Precompiled",local:"tokenizers.normalizers.Precompiled",headingTag:"h2"}}),ce=new y({props:{name:"class tokenizers.normalizers.Precompiled",anchor:"tokenizers.normalizers.Precompiled",parameters:[{name:"precompiled_charsmap",val:""}]}}),fe=new D({props:{title:"Replace",local:"tokenizers.normalizers.Replace",headingTag:"h2"}}),ge=new y({props:{name:"class tokenizers.normalizers.Replace",anchor:"tokenizers.normalizers.Replace",parameters:[{name:"pattern",val:""},{name:"content",val:""}]}}),ze=new D({props:{title:"Sequence",local:"tokenizers.normalizers.Sequence",headingTag:"h2"}}),ue=new y({props:{name:"class tokenizers.normalizers.Sequence",anchor:"tokenizers.normalizers.Sequence",parameters:[{name:"normalizers",val:""}],parametersDescription:[{anchor:"tokenizers.normalizers.Sequence.normalizers",description:`<strong>normalizers</strong> (<code>List[Normalizer]</code>) &#x2014;
A list of Normalizer to be run as a sequence`,name:"normalizers"}]}}),he=new D({props:{title:"Strip",local:"tokenizers.normalizers.Strip",headingTag:"h2"}}),ve=new y({props:{name:"class tokenizers.normalizers.Strip",anchor:"tokenizers.normalizers.Strip",parameters:[{name:"left",val:" = True"},{name:"right",val:" = True"}]}}),xe=new D({props:{title:"StripAccents",local:"tokenizers.normalizers.StripAccents",headingTag:"h2"}}),_e=new y({props:{name:"class tokenizers.normalizers.StripAccents",anchor:"tokenizers.normalizers.StripAccents",parameters:[]}}),be=new D({props:{title:"BertNormalizer",local:"tokenizers.normalizers.BertNormalizer",headingTag:"h2"}}),ke=new y({props:{name:"class tokenizers.normalizers.BertNormalizer",anchor:"tokenizers.normalizers.BertNormalizer",parameters:[{name:"clean_text",val:" = True"},{name:"handle_chinese_chars",val:" = True"},{name:"strip_accents",val:" = None"},{name:"lowercase",val:" = True"}],parametersDescription:[{anchor:"tokenizers.normalizers.BertNormalizer.clean_text",description:`<strong>clean_text</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to clean the text, by removing any control characters
and replacing all whitespaces by the classic one.`,name:"clean_text"},{anchor:"tokenizers.normalizers.BertNormalizer.handle_chinese_chars",description:`<strong>handle_chinese_chars</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to handle chinese chars by putting spaces around them.`,name:"handle_chinese_chars"},{anchor:"tokenizers.normalizers.BertNormalizer.strip_accents",description:`<strong>strip_accents</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether to strip all accents. If this option is not specified (ie == None),
then it will be determined by the value for <em>lowercase</em> (as in the original Bert).`,name:"strip_accents"},{anchor:"tokenizers.normalizers.BertNormalizer.lowercase",description:`<strong>lowercase</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to lowercase.`,name:"lowercase"}]}}),{c(){d(n.$$.fragment),v=s(),r=$("div"),d(h.$$.fragment),b=s(),F=$("p"),F.textContent=I,K=s(),d(C.$$.fragment),M=s(),N=$("div"),d(S.$$.fragment),H=s(),L=$("p"),L.textContent=i,x=s(),d(B.$$.fragment),je=s(),R=$("div"),d(Z.$$.fragment),Ct=s(),we=$("p"),we.textContent=Ot,We=s(),d(ee.$$.fragment),Ge=s(),U=$("div"),d(te.$$.fragment),yt=s(),Ne=$("p"),Ne.textContent=Jt,Oe=s(),d(re.$$.fragment),Je=s(),j=$("div"),d(ne.$$.fragment),Tt=s(),Ce=$("p"),Ce.textContent=Qt,Qe=s(),d(ae.$$.fragment),Xe=s(),W=$("div"),d(se.$$.fragment),Dt=s(),ye=$("p"),ye.textContent=Xt,Ye=s(),d(oe.$$.fragment),Ze=s(),G=$("div"),d(le.$$.fragment),Pt=s(),Te=$("p"),Te.textContent=Yt,et=s(),d(ie.$$.fragment),tt=s(),T=$("div"),d(me.$$.fragment),St=s(),De=$("p"),De.textContent=Zt,Ft=s(),Pe=$("p"),Pe.textContent=er,Lt=s(),q=$("div"),d($e.$$.fragment),Et=s(),Se=$("p"),Se.innerHTML=tr,It=s(),Fe=$("p"),Fe.innerHTML=rr,Bt=s(),V=$("div"),d(pe.$$.fragment),qt=s(),Le=$("p"),Le.textContent=nr,Vt=s(),Ee=$("p"),Ee.innerHTML=ar,rt=s(),d(de.$$.fragment),nt=s(),O=$("div"),d(ce.$$.fragment),At=s(),Ie=$("p"),Ie.textContent=sr,at=s(),d(fe.$$.fragment),st=s(),J=$("div"),d(ge.$$.fragment),Kt=s(),Be=$("p"),Be.textContent=or,ot=s(),d(ze.$$.fragment),lt=s(),Q=$("div"),d(ue.$$.fragment),Mt=s(),qe=$("p"),qe.textContent=lr,it=s(),d(he.$$.fragment),mt=s(),X=$("div"),d(ve.$$.fragment),Ht=s(),Ve=$("p"),Ve.textContent=ir,$t=s(),d(xe.$$.fragment),pt=s(),Y=$("div"),d(_e.$$.fragment),Rt=s(),Ae=$("p"),Ae.textContent=mr,dt=s(),d(be.$$.fragment),ct=s(),E=$("div"),d(ke.$$.fragment),Ut=s(),Ke=$("p"),Ke.textContent=$r,jt=s(),Me=$("p"),Me.textContent=pr,this.h()},l(e){c(n.$$.fragment,e),v=o(e),r=p(e,"DIV",{class:!0});var a=k(r);c(h.$$.fragment,a),b=o(a),F=p(a,"P",{"data-svelte-h":!0}),_(F)!=="svelte-1g2mx95"&&(F.textContent=I),a.forEach(t),K=o(e),c(C.$$.fragment,e),M=o(e),N=p(e,"DIV",{class:!0});var gt=k(N);c(S.$$.fragment,gt),H=o(gt),L=p(gt,"P",{"data-svelte-h":!0}),_(L)!=="svelte-1ws8pdc"&&(L.textContent=i),gt.forEach(t),x=o(e),c(B.$$.fragment,e),je=o(e),R=p(e,"DIV",{class:!0});var zt=k(R);c(Z.$$.fragment,zt),Ct=o(zt),we=p(zt,"P",{"data-svelte-h":!0}),_(we)!=="svelte-199ec7z"&&(we.textContent=Ot),zt.forEach(t),We=o(e),c(ee.$$.fragment,e),Ge=o(e),U=p(e,"DIV",{class:!0});var ut=k(U);c(te.$$.fragment,ut),yt=o(ut),Ne=p(ut,"P",{"data-svelte-h":!0}),_(Ne)!=="svelte-17uaov6"&&(Ne.textContent=Jt),ut.forEach(t),Oe=o(e),c(re.$$.fragment,e),Je=o(e),j=p(e,"DIV",{class:!0});var ht=k(j);c(ne.$$.fragment,ht),Tt=o(ht),Ce=p(ht,"P",{"data-svelte-h":!0}),_(Ce)!=="svelte-1mvgix8"&&(Ce.textContent=Qt),ht.forEach(t),Qe=o(e),c(ae.$$.fragment,e),Xe=o(e),W=p(e,"DIV",{class:!0});var vt=k(W);c(se.$$.fragment,vt),Dt=o(vt),ye=p(vt,"P",{"data-svelte-h":!0}),_(ye)!=="svelte-dyixwr"&&(ye.textContent=Xt),vt.forEach(t),Ye=o(e),c(oe.$$.fragment,e),Ze=o(e),G=p(e,"DIV",{class:!0});var xt=k(G);c(le.$$.fragment,xt),Pt=o(xt),Te=p(xt,"P",{"data-svelte-h":!0}),_(Te)!=="svelte-1isyure"&&(Te.textContent=Yt),xt.forEach(t),et=o(e),c(ie.$$.fragment,e),tt=o(e),T=p(e,"DIV",{class:!0});var A=k(T);c(me.$$.fragment,A),St=o(A),De=p(A,"P",{"data-svelte-h":!0}),_(De)!=="svelte-1jpqc6v"&&(De.textContent=Zt),Ft=o(A),Pe=p(A,"P",{"data-svelte-h":!0}),_(Pe)!=="svelte-ne1itq"&&(Pe.textContent=er),Lt=o(A),q=p(A,"DIV",{class:!0});var He=k(q);c($e.$$.fragment,He),Et=o(He),Se=p(He,"P",{"data-svelte-h":!0}),_(Se)!=="svelte-a9bu3y"&&(Se.innerHTML=tr),It=o(He),Fe=p(He,"P",{"data-svelte-h":!0}),_(Fe)!=="svelte-599qjf"&&(Fe.innerHTML=rr),He.forEach(t),Bt=o(A),V=p(A,"DIV",{class:!0});var Re=k(V);c(pe.$$.fragment,Re),qt=o(Re),Le=p(Re,"P",{"data-svelte-h":!0}),_(Le)!=="svelte-1hrxycy"&&(Le.textContent=nr),Vt=o(Re),Ee=p(Re,"P",{"data-svelte-h":!0}),_(Ee)!=="svelte-1meopcd"&&(Ee.innerHTML=ar),Re.forEach(t),A.forEach(t),rt=o(e),c(de.$$.fragment,e),nt=o(e),O=p(e,"DIV",{class:!0});var _t=k(O);c(ce.$$.fragment,_t),At=o(_t),Ie=p(_t,"P",{"data-svelte-h":!0}),_(Ie)!=="svelte-o4n9u0"&&(Ie.textContent=sr),_t.forEach(t),at=o(e),c(fe.$$.fragment,e),st=o(e),J=p(e,"DIV",{class:!0});var bt=k(J);c(ge.$$.fragment,bt),Kt=o(bt),Be=p(bt,"P",{"data-svelte-h":!0}),_(Be)!=="svelte-1s88uyt"&&(Be.textContent=or),bt.forEach(t),ot=o(e),c(ze.$$.fragment,e),lt=o(e),Q=p(e,"DIV",{class:!0});var kt=k(Q);c(ue.$$.fragment,kt),Mt=o(kt),qe=p(kt,"P",{"data-svelte-h":!0}),_(qe)!=="svelte-105zdm9"&&(qe.textContent=lr),kt.forEach(t),it=o(e),c(he.$$.fragment,e),mt=o(e),X=p(e,"DIV",{class:!0});var wt=k(X);c(ve.$$.fragment,wt),Ht=o(wt),Ve=p(wt,"P",{"data-svelte-h":!0}),_(Ve)!=="svelte-1279gw7"&&(Ve.textContent=ir),wt.forEach(t),$t=o(e),c(xe.$$.fragment,e),pt=o(e),Y=p(e,"DIV",{class:!0});var Nt=k(Y);c(_e.$$.fragment,Nt),Rt=o(Nt),Ae=p(Nt,"P",{"data-svelte-h":!0}),_(Ae)!=="svelte-7kf6fw"&&(Ae.textContent=mr),Nt.forEach(t),dt=o(e),c(be.$$.fragment,e),ct=o(e),E=p(e,"DIV",{class:!0});var Ue=k(E);c(ke.$$.fragment,Ue),Ut=o(Ue),Ke=p(Ue,"P",{"data-svelte-h":!0}),_(Ke)!=="svelte-kgfnlo"&&(Ke.textContent=$r),jt=o(Ue),Me=p(Ue,"P",{"data-svelte-h":!0}),_(Me)!=="svelte-ma12ts"&&(Me.textContent=pr),Ue.forEach(t),this.h()},h(){w(r,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(T,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,a){f(n,e,a),l(e,v,a),l(e,r,a),f(h,r,null),m(r,b),m(r,F),l(e,K,a),f(C,e,a),l(e,M,a),l(e,N,a),f(S,N,null),m(N,H),m(N,L),l(e,x,a),f(B,e,a),l(e,je,a),l(e,R,a),f(Z,R,null),m(R,Ct),m(R,we),l(e,We,a),f(ee,e,a),l(e,Ge,a),l(e,U,a),f(te,U,null),m(U,yt),m(U,Ne),l(e,Oe,a),f(re,e,a),l(e,Je,a),l(e,j,a),f(ne,j,null),m(j,Tt),m(j,Ce),l(e,Qe,a),f(ae,e,a),l(e,Xe,a),l(e,W,a),f(se,W,null),m(W,Dt),m(W,ye),l(e,Ye,a),f(oe,e,a),l(e,Ze,a),l(e,G,a),f(le,G,null),m(G,Pt),m(G,Te),l(e,et,a),f(ie,e,a),l(e,tt,a),l(e,T,a),f(me,T,null),m(T,St),m(T,De),m(T,Ft),m(T,Pe),m(T,Lt),m(T,q),f($e,q,null),m(q,Et),m(q,Se),m(q,It),m(q,Fe),m(T,Bt),m(T,V),f(pe,V,null),m(V,qt),m(V,Le),m(V,Vt),m(V,Ee),l(e,rt,a),f(de,e,a),l(e,nt,a),l(e,O,a),f(ce,O,null),m(O,At),m(O,Ie),l(e,at,a),f(fe,e,a),l(e,st,a),l(e,J,a),f(ge,J,null),m(J,Kt),m(J,Be),l(e,ot,a),f(ze,e,a),l(e,lt,a),l(e,Q,a),f(ue,Q,null),m(Q,Mt),m(Q,qe),l(e,it,a),f(he,e,a),l(e,mt,a),l(e,X,a),f(ve,X,null),m(X,Ht),m(X,Ve),l(e,$t,a),f(xe,e,a),l(e,pt,a),l(e,Y,a),f(_e,Y,null),m(Y,Rt),m(Y,Ae),l(e,dt,a),f(be,e,a),l(e,ct,a),l(e,E,a),f(ke,E,null),m(E,Ut),m(E,Ke),m(E,jt),m(E,Me),ft=!0},p:Wt,i(e){ft||(g(n.$$.fragment,e),g(h.$$.fragment,e),g(C.$$.fragment,e),g(S.$$.fragment,e),g(B.$$.fragment,e),g(Z.$$.fragment,e),g(ee.$$.fragment,e),g(te.$$.fragment,e),g(re.$$.fragment,e),g(ne.$$.fragment,e),g(ae.$$.fragment,e),g(se.$$.fragment,e),g(oe.$$.fragment,e),g(le.$$.fragment,e),g(ie.$$.fragment,e),g(me.$$.fragment,e),g($e.$$.fragment,e),g(pe.$$.fragment,e),g(de.$$.fragment,e),g(ce.$$.fragment,e),g(fe.$$.fragment,e),g(ge.$$.fragment,e),g(ze.$$.fragment,e),g(ue.$$.fragment,e),g(he.$$.fragment,e),g(ve.$$.fragment,e),g(xe.$$.fragment,e),g(_e.$$.fragment,e),g(be.$$.fragment,e),g(ke.$$.fragment,e),ft=!0)},o(e){z(n.$$.fragment,e),z(h.$$.fragment,e),z(C.$$.fragment,e),z(S.$$.fragment,e),z(B.$$.fragment,e),z(Z.$$.fragment,e),z(ee.$$.fragment,e),z(te.$$.fragment,e),z(re.$$.fragment,e),z(ne.$$.fragment,e),z(ae.$$.fragment,e),z(se.$$.fragment,e),z(oe.$$.fragment,e),z(le.$$.fragment,e),z(ie.$$.fragment,e),z(me.$$.fragment,e),z($e.$$.fragment,e),z(pe.$$.fragment,e),z(de.$$.fragment,e),z(ce.$$.fragment,e),z(fe.$$.fragment,e),z(ge.$$.fragment,e),z(ze.$$.fragment,e),z(ue.$$.fragment,e),z(he.$$.fragment,e),z(ve.$$.fragment,e),z(xe.$$.fragment,e),z(_e.$$.fragment,e),z(be.$$.fragment,e),z(ke.$$.fragment,e),ft=!1},d(e){e&&(t(v),t(r),t(K),t(M),t(N),t(x),t(je),t(R),t(We),t(Ge),t(U),t(Oe),t(Je),t(j),t(Qe),t(Xe),t(W),t(Ye),t(Ze),t(G),t(et),t(tt),t(T),t(rt),t(nt),t(O),t(at),t(st),t(J),t(ot),t(lt),t(Q),t(it),t(mt),t(X),t($t),t(pt),t(Y),t(dt),t(ct),t(E)),u(n,e),u(h),u(C,e),u(S),u(B,e),u(Z),u(ee,e),u(te),u(re,e),u(ne),u(ae,e),u(se),u(oe,e),u(le),u(ie,e),u(me),u($e),u(pe),u(de,e),u(ce),u(fe,e),u(ge),u(ze,e),u(ue),u(he,e),u(ve),u(xe,e),u(_e),u(be,e),u(ke)}}}function _r(P){let n,v;return n=new Gt({props:{$$slots:{default:[xr]},$$scope:{ctx:P}}}),{c(){d(n.$$.fragment)},l(r){c(n.$$.fragment,r)},m(r,h){f(n,r,h),v=!0},p(r,h){const b={};h&2&&(b.$$scope={dirty:h,ctx:r}),n.$set(b)},i(r){v||(g(n.$$.fragment,r),v=!0)},o(r){z(n.$$.fragment,r),v=!1},d(r){u(n,r)}}}function br(P){let n,v='The Rust API Reference is available directly on the <a href="https://docs.rs/tokenizers/latest/tokenizers/" rel="nofollow">Docs.rs</a> website.';return{c(){n=$("p"),n.innerHTML=v},l(r){n=p(r,"P",{"data-svelte-h":!0}),_(n)!=="svelte-4ytcyb"&&(n.innerHTML=v)},m(r,h){l(r,n,h)},p:Wt,d(r){r&&t(n)}}}function kr(P){let n,v;return n=new Gt({props:{$$slots:{default:[br]},$$scope:{ctx:P}}}),{c(){d(n.$$.fragment)},l(r){c(n.$$.fragment,r)},m(r,h){f(n,r,h),v=!0},p(r,h){const b={};h&2&&(b.$$scope={dirty:h,ctx:r}),n.$set(b)},i(r){v||(g(n.$$.fragment,r),v=!0)},o(r){z(n.$$.fragment,r),v=!1},d(r){u(n,r)}}}function wr(P){let n,v="The node API has not been documented yet.";return{c(){n=$("p"),n.textContent=v},l(r){n=p(r,"P",{"data-svelte-h":!0}),_(n)!=="svelte-1mrchm6"&&(n.textContent=v)},m(r,h){l(r,n,h)},p:Wt,d(r){r&&t(n)}}}function Nr(P){let n,v;return n=new Gt({props:{$$slots:{default:[wr]},$$scope:{ctx:P}}}),{c(){d(n.$$.fragment)},l(r){c(n.$$.fragment,r)},m(r,h){f(n,r,h),v=!0},p(r,h){const b={};h&2&&(b.$$scope={dirty:h,ctx:r}),n.$set(b)},i(r){v||(g(n.$$.fragment,r),v=!0)},o(r){z(n.$$.fragment,r),v=!1},d(r){u(n,r)}}}function Cr(P){let n,v,r,h,b,F,I,K,C,M,N,S,H,L;return b=new ur({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),I=new D({props:{title:"Normalizers",local:"normalizers",headingTag:"h1"}}),C=new vr({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Nr],rust:[kr],python:[_r]},$$scope:{ctx:P}}}),N=new hr({props:{source:"https://github.com/huggingface/tokenizers/blob/main/docs/source-doc-builder/api/normalizers.mdx"}}),{c(){n=$("meta"),v=s(),r=$("p"),h=s(),d(b.$$.fragment),F=s(),d(I.$$.fragment),K=s(),d(C.$$.fragment),M=s(),d(N.$$.fragment),S=s(),H=$("p"),this.h()},l(i){const x=zr("svelte-u9bgzb",document.head);n=p(x,"META",{name:!0,content:!0}),x.forEach(t),v=o(i),r=p(i,"P",{}),k(r).forEach(t),h=o(i),c(b.$$.fragment,i),F=o(i),c(I.$$.fragment,i),K=o(i),c(C.$$.fragment,i),M=o(i),c(N.$$.fragment,i),S=o(i),H=p(i,"P",{}),k(H).forEach(t),this.h()},h(){w(n,"name","hf:doc:metadata"),w(n,"content",yr)},m(i,x){m(document.head,n),l(i,v,x),l(i,r,x),l(i,h,x),f(b,i,x),l(i,F,x),f(I,i,x),l(i,K,x),f(C,i,x),l(i,M,x),f(N,i,x),l(i,S,x),l(i,H,x),L=!0},p(i,[x]){const B={};x&2&&(B.$$scope={dirty:x,ctx:i}),C.$set(B)},i(i){L||(g(b.$$.fragment,i),g(I.$$.fragment,i),g(C.$$.fragment,i),g(N.$$.fragment,i),L=!0)},o(i){z(b.$$.fragment,i),z(I.$$.fragment,i),z(C.$$.fragment,i),z(N.$$.fragment,i),L=!1},d(i){i&&(t(v),t(r),t(h),t(F),t(K),t(M),t(S),t(H)),t(n),u(b,i),u(I,i),u(C,i),u(N,i)}}}const yr='{"title":"Normalizers","local":"normalizers","sections":[{"title":"ByteLevel","local":"tokenizers.normalizers.ByteLevel","sections":[],"depth":2},{"title":"Lowercase","local":"tokenizers.normalizers.Lowercase","sections":[],"depth":2},{"title":"NFC","local":"tokenizers.normalizers.NFC","sections":[],"depth":2},{"title":"NFD","local":"tokenizers.normalizers.NFD","sections":[],"depth":2},{"title":"NFKC","local":"tokenizers.normalizers.NFKC","sections":[],"depth":2},{"title":"NFKD","local":"tokenizers.normalizers.NFKD","sections":[],"depth":2},{"title":"Nmt","local":"tokenizers.normalizers.Nmt","sections":[],"depth":2},{"title":"Normalizer","local":"tokenizers.normalizers.Normalizer","sections":[],"depth":2},{"title":"Precompiled","local":"tokenizers.normalizers.Precompiled","sections":[],"depth":2},{"title":"Replace","local":"tokenizers.normalizers.Replace","sections":[],"depth":2},{"title":"Sequence","local":"tokenizers.normalizers.Sequence","sections":[],"depth":2},{"title":"Strip","local":"tokenizers.normalizers.Strip","sections":[],"depth":2},{"title":"StripAccents","local":"tokenizers.normalizers.StripAccents","sections":[],"depth":2},{"title":"BertNormalizer","local":"tokenizers.normalizers.BertNormalizer","sections":[],"depth":2}],"depth":1}';function Tr(P){return cr(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Er extends fr{constructor(n){super(),gr(this,n,Tr,Cr,dr,{})}}export{Er as component};

Xet Storage Details

Size:
21.7 kB
·
Xet hash:
804b21491c1b10d2f0fdcfe311fdad664fc29c742c630b698879f02f127121e4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.