Buckets:

hf-doc-build/doc / transformers /main /de /_app /pages /preprocessing.mdx-hf-doc-builder.js
rtrm's picture
download
raw
119 kB
import{S as Lc,i as Hc,s as Kc,e as l,k as h,w as b,t as i,M as Oc,c as r,d as e,m,a as p,x as d,h as u,b as c,N as Yc,G as a,g as t,y as j,q as f,o as g,B as w,v as so,L as Pc}from"../chunks/vendor-hf-doc-builder.js";import{T as eo}from"../chunks/Tip-hf-doc-builder.js";import{Y as ao}from"../chunks/Youtube-hf-doc-builder.js";import{I as y}from"../chunks/IconCopyLink-hf-doc-builder.js";import{C as k}from"../chunks/CodeBlock-hf-doc-builder.js";import{D as no}from"../chunks/DocNotebookDropdown-hf-doc-builder.js";import{F as lo,M as Qc}from"../chunks/Markdown-hf-doc-builder.js";function ro(E){let v,T,o,J,M;return{c(){v=l("p"),T=i("Wenn Sie ein vortrainiertes Modell verwenden m\xF6chten, ist es wichtig, den zugeh\xF6rigen vortrainierten Tokenizer zu verwenden. Dadurch wird sichergestellt, dass der Text auf die gleiche Weise aufgeteilt wird wie das Pretraining-Korpus und die gleichen entsprechenden Token-zu-Index (in der Regel als "),o=l("em"),J=i("vocab"),M=i(" bezeichnet) w\xE4hrend des Pretrainings verwendet werden.")},l($){v=r($,"P",{});var S=p(v);T=u(S,"Wenn Sie ein vortrainiertes Modell verwenden m\xF6chten, ist es wichtig, den zugeh\xF6rigen vortrainierten Tokenizer zu verwenden. Dadurch wird sichergestellt, dass der Text auf die gleiche Weise aufgeteilt wird wie das Pretraining-Korpus und die gleichen entsprechenden Token-zu-Index (in der Regel als "),o=r(S,"EM",{});var as=p(o);J=u(as,"vocab"),as.forEach(e),M=u(S," bezeichnet) w\xE4hrend des Pretrainings verwendet werden."),S.forEach(e)},m($,S){t($,v,S),a(v,T),a(v,o),a(o,J),a(v,M)},d($){$&&e(v)}}}function po(E){let v,T;return v=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFwcmludChlbmNvZGVkX2lucHV0KQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: tensor([[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]),
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: tensor([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]),
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]])}`}}),{c(){b(v.$$.fragment)},l(o){d(v.$$.fragment,o)},m(o,J){j(v,o,J),T=!0},p:Pc,i(o){T||(f(v.$$.fragment,o),T=!0)},o(o){g(v.$$.fragment,o),T=!1},d(o){w(v,o)}}}function to(E){let v,T;return v=new Qc({props:{$$slots:{default:[po]},$$scope:{ctx:E}}}),{c(){b(v.$$.fragment)},l(o){d(v.$$.fragment,o)},m(o,J){j(v,o,J),T=!0},p(o,J){const M={};J&2&&(M.$$scope={dirty:J,ctx:o}),v.$set(M)},i(o){T||(f(v.$$.fragment,o),T=!0)},o(o){g(v.$$.fragment,o),T=!1},d(o){w(v,o)}}}function io(E){let v,T;return v=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJ0ZiUyMiklMEFwcmludChlbmNvZGVkX2lucHV0KQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: &lt;tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy=
array([[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
dtype=int32)&gt;,
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: &lt;tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy=
array([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], dtype=int32)&gt;,
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: &lt;tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy=
array([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], dtype=int32)&gt;}`}}),{c(){b(v.$$.fragment)},l(o){d(v.$$.fragment,o)},m(o,J){j(v,o,J),T=!0},p:Pc,i(o){T||(f(v.$$.fragment,o),T=!0)},o(o){g(v.$$.fragment,o),T=!1},d(o){w(v,o)}}}function uo(E){let v,T;return v=new Qc({props:{$$slots:{default:[io]},$$scope:{ctx:E}}}),{c(){b(v.$$.fragment)},l(o){d(v.$$.fragment,o)},m(o,J){j(v,o,J),T=!0},p(o,J){const M={};J&2&&(M.$$scope={dirty:J,ctx:o}),v.$set(M)},i(o){T||(f(v.$$.fragment,o),T=!0)},o(o){g(v.$$.fragment,o),T=!1},d(o){w(v,o)}}}function ho(E){let v,T,o,J,M,$,S,as,qp,cl,Gs,ol,Ke,Yp,bl,U,Qa,Pp,Qp,La,Lp,Hp,Ha,Kp,dl,F,ns,Ka,Zs,Op,Oa,st,jl,Fs,fl,A,et,Oe,at,nt,sn,lt,rt,gl,ls,wl,rs,pt,en,tt,it,vl,C,ps,an,Cs,ut,nn,ht,kl,ts,mt,ln,ct,ot,Tl,Rs,Jl,sa,bt,Ml,Ns,yl,ea,dt,$l,B,aa,na,jt,ft,gt,la,ra,wt,vt,kt,pa,ta,Tt,Jt,zl,is,Mt,rn,yt,$t,_l,Ds,El,x,zt,pn,_t,Et,tn,It,St,Il,ia,Ut,Sl,Xs,Ul,R,us,un,qs,At,hn,Bt,Al,hs,xt,mn,Wt,Vt,Bl,ua,Gt,xl,Ys,Wl,ha,Zt,Vl,N,ms,cn,Ps,Ft,on,Ct,Gl,ma,Rt,Zl,ca,Nt,Fl,Qs,Cl,D,cs,bn,Ls,Dt,dn,Xt,Rl,oa,qt,Nl,z,Yt,jn,Pt,Qt,fn,Lt,Ht,gn,Kt,Ot,Dl,os,Xl,X,bs,wn,Hs,si,vn,ei,ql,ds,ai,ba,ni,li,Yl,Ks,Pl,W,ri,Os,pi,ti,se,ii,ui,Ql,ee,Ll,js,hi,kn,mi,ci,Hl,ae,Kl,da,oi,Ol,V,Tn,bi,di,Jn,ji,fi,ja,Mn,gi,wi,sr,q,fs,yn,ne,vi,$n,ki,er,gs,Ti,le,Ji,Mi,ar,ws,yi,re,$i,zi,nr,pe,lr,fa,zn,_i,rr,te,pr,ie,_n,Ei,tr,ue,ir,ga,Ii,ur,Y,vs,En,he,Si,In,Ui,hr,_,Ai,Sn,Bi,xi,Un,Wi,Vi,An,Gi,Zi,mr,ks,Fi,Bn,Ci,Ri,cr,me,or,Ts,Ni,xn,Di,Xi,br,ce,dr,P,Js,Wn,oe,qi,Vn,Yi,jr,wa,Pi,fr,be,gr,va,Qi,wr,de,vr,ka,Li,kr,je,Tr,Ta,Hi,Jr,fe,Mr,Ja,Ki,yr,Q,Ms,Gn,ge,Oi,Zn,su,$r,Ma,eu,zr,G,au,we,nu,lu,Fn,ru,pu,_r,ve,Er,ys,tu,ke,iu,uu,Ir,Te,Sr,ya,$a,jh,Ur,L,$s,Cn,Je,hu,Rn,mu,Ar,zs,cu,Nn,ou,bu,Br,Me,xr,H,_s,Dn,ye,du,Xn,ju,Wr,Es,fu,$e,qn,gu,wu,Vr,za,I,vu,ze,Yn,ku,Tu,_e,Pn,Ju,Mu,Ee,Qn,yu,$u,Gr,Ie,Zr,Se,K,zu,_a,Ln,_u,Eu,Hn,Iu,Su,Fr,Ue,Cr,Ae,Be,Uu,xe,Kn,Au,Bu,Rr,We,Nr,Ve,On,xu,Dr,Ge,Xr,Ea,Wu,qr,Ze,Yr,Ia,Sa,fh,Pr,O,Is,sl,Fe,Vu,el,Gu,Qr,Ua,Zu,Lr,Ss,al,Fu,Cu,nl,Ru,Hr,Us,Nu,Ce,Du,Xu,Kr,Re,Or,Aa,qu,sp,Ne,ep,Ba,Yu,ap,De,np,As,Pu,xa,Qu,Lu,lp,Xe,rp,ss,Bs,ll,qe,Hu,rl,Ku,pp,Wa,Ou,tp,Ye,ip,Va,es,sh,pl,eh,ah,tl,nh,lh,up,Pe,hp,Qe,il,rh,mp,Le,cp,Z,ph,ul,th,ih,hl,uh,hh,op,Ga,mh,bp;return $=new y({}),Gs=new no({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/de/preprocessing.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/de/pytorch/preprocessing.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/de/tensorflow/preprocessing.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/de/preprocessing.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/de/pytorch/preprocessing.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/de/tensorflow/preprocessing.ipynb"}]}}),Zs=new y({}),Fs=new ao({props:{id:"Yffk5aydLzg"}}),ls=new eo({props:{$$slots:{default:[ro]},$$scope:{ctx:E}}}),Cs=new y({}),Rs=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJiZXJ0LWJhc2UtY2FzZWQlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;bert-base-cased&quot;</span>)`}}),Ns=new k({props:{code:"ZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplciglMjJEbyUyMG5vdCUyMG1lZGRsZSUyMGluJTIwdGhlJTIwYWZmYWlycyUyMG9mJTIwd2l6YXJkcyUyQyUyMGZvciUyMHRoZXklMjBhcmUlMjBzdWJ0bGUlMjBhbmQlMjBxdWljayUyMHRvJTIwYW5nZXIuJTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(<span class="hljs-string">&quot;Do not meddle in the affairs of wizards, for they are subtle and quick to anger.&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [<span class="hljs-number">101</span>, <span class="hljs-number">2079</span>, <span class="hljs-number">2025</span>, <span class="hljs-number">19960</span>, <span class="hljs-number">10362</span>, <span class="hljs-number">1999</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">3821</span>, <span class="hljs-number">1997</span>, <span class="hljs-number">16657</span>, <span class="hljs-number">1010</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">2027</span>, <span class="hljs-number">2024</span>, <span class="hljs-number">11259</span>, <span class="hljs-number">1998</span>, <span class="hljs-number">4248</span>, <span class="hljs-number">2000</span>, <span class="hljs-number">4963</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]}`}}),Ds=new k({props:{code:"dG9rZW5pemVyLmRlY29kZShlbmNvZGVkX2lucHV0JTVCJTIyaW5wdXRfaWRzJTIyJTVEKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.decode(encoded_input[<span class="hljs-string">&quot;input_ids&quot;</span>])
<span class="hljs-string">&#x27;[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]&#x27;</span>`}}),Xs=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoYmF0Y2hfc2VudGVuY2VzKSUwQXByaW50KGVuY29kZWRfaW5wdXRzKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_inputs = tokenizer(batch_sentences)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_inputs)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>]],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]}`}}),qs=new y({}),Ys=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSklMEFwcmludChlbmNvZGVkX2lucHV0KQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}`}}),Ps=new y({}),Qs=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}`}}),Ls=new y({}),os=new lo({props:{pytorch:!0,tensorflow:!0,jax:!1,$$slots:{tensorflow:[uo],pytorch:[to]},$$scope:{ctx:E}}}),Hs=new y({}),Ks=new k({props:{code:"cGlwJTIwaW5zdGFsbCUyMGRhdGFzZXRz",highlighted:"pip install datasets"}}),ee=new k({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTJDJTIwQXVkaW8lMEElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMlBvbHlBSSUyRm1pbmRzMTQlMjIlMkMlMjBuYW1lJTNEJTIyZW4tVVMlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, Audio
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;PolyAI/minds14&quot;</span>, name=<span class="hljs-string">&quot;en-US&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)`}}),ae=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([ <span class="hljs-number">0.</span> , <span class="hljs-number">0.00024414</span>, -<span class="hljs-number">0.00024414</span>, ..., -<span class="hljs-number">0.00024414</span>,
<span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> ], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">8000</span>}`}}),ne=new y({}),pe=new k({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJQb2x5QUklMkZtaW5kczE0JTIyJTJDJTIwbmFtZSUzRCUyMmVuLVVTJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0JTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVE",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;PolyAI/minds14&quot;</span>, name=<span class="hljs-string">&quot;en-US&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([ <span class="hljs-number">0.</span> , <span class="hljs-number">0.00024414</span>, -<span class="hljs-number">0.00024414</span>, ..., -<span class="hljs-number">0.00024414</span>,
<span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> ], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">8000</span>}`}}),te=new k({props:{code:"ZGF0YXNldCUyMCUzRCUyMGRhdGFzZXQuY2FzdF9jb2x1bW4oJTIyYXVkaW8lMjIlMkMlMjBBdWRpbyhzYW1wbGluZ19yYXRlJTNEMTZfMDAwKSk=",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = dataset.cast_column(<span class="hljs-string">&quot;audio&quot;</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))'}}),ue=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([ <span class="hljs-number">2.3443763e-05</span>, <span class="hljs-number">2.1729663e-04</span>, <span class="hljs-number">2.2145823e-04</span>, ...,
<span class="hljs-number">3.8356509e-05</span>, -<span class="hljs-number">7.3497440e-06</span>, -<span class="hljs-number">2.1754686e-05</span>], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">16000</span>}`}}),he=new y({}),me=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yJTBBJTBBZmVhdHVyZV9leHRyYWN0b3IlMjAlM0QlMjBBdXRvRmVhdHVyZUV4dHJhY3Rvci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZ3YXYydmVjMi1iYXNlJTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor
<span class="hljs-meta">&gt;&gt;&gt; </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">&quot;facebook/wav2vec2-base&quot;</span>)`}}),ce=new k({props:{code:"YXVkaW9faW5wdXQlMjAlM0QlMjAlNUJkYXRhc2V0JTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlNUQlMEFmZWF0dXJlX2V4dHJhY3RvcihhdWRpb19pbnB1dCUyQyUyMHNhbXBsaW5nX3JhdGUlM0QxNjAwMCk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>audio_input = [dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>][<span class="hljs-string">&quot;array&quot;</span>]]
<span class="hljs-meta">&gt;&gt;&gt; </span>feature_extractor(audio_input, sampling_rate=<span class="hljs-number">16000</span>)
{<span class="hljs-string">&#x27;input_values&#x27;</span>: [array([ <span class="hljs-number">3.8106556e-04</span>, <span class="hljs-number">2.7506407e-03</span>, <span class="hljs-number">2.8015103e-03</span>, ...,
<span class="hljs-number">5.6335266e-04</span>, <span class="hljs-number">4.6588284e-06</span>, -<span class="hljs-number">1.7142107e-04</span>], dtype=float32)]}`}}),oe=new y({}),be=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMmFycmF5JTIyJTVELnNoYXBlJTBBJTBBZGF0YXNldCU1QjElNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMmFycmF5JTIyJTVELnNoYXBl",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>][<span class="hljs-string">&quot;array&quot;</span>].shape
(<span class="hljs-number">173398</span>,)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">1</span>][<span class="hljs-string">&quot;audio&quot;</span>][<span class="hljs-string">&quot;array&quot;</span>].shape
(<span class="hljs-number">106496</span>,)`}}),de=new k({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBhdWRpb19hcnJheXMlMjAlM0QlMjAlNUJ4JTVCJTIyYXJyYXklMjIlNUQlMjBmb3IlMjB4JTIwaW4lMjBleGFtcGxlcyU1QiUyMmF1ZGlvJTIyJTVEJTVEJTBBJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZmVhdHVyZV9leHRyYWN0b3IoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXVkaW9fYXJyYXlzJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcGFkZGluZyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfbGVuZ3RoJTNEMTAwMDAwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwaW5wdXRz",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> audio_arrays = [x[<span class="hljs-string">&quot;array&quot;</span>] <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;audio&quot;</span>]]
<span class="hljs-meta">... </span> inputs = feature_extractor(
<span class="hljs-meta">... </span> audio_arrays,
<span class="hljs-meta">... </span> sampling_rate=<span class="hljs-number">16000</span>,
<span class="hljs-meta">... </span> padding=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> max_length=<span class="hljs-number">100000</span>,
<span class="hljs-meta">... </span> truncation=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> )
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> inputs`}}),je=new k({props:{code:"cHJvY2Vzc2VkX2RhdGFzZXQlMjAlM0QlMjBwcmVwcm9jZXNzX2Z1bmN0aW9uKGRhdGFzZXQlNUIlM0E1JTVEKQ==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>processed_dataset = preprocess_function(dataset[:<span class="hljs-number">5</span>])'}}),fe=new k({props:{code:"cHJvY2Vzc2VkX2RhdGFzZXQlNUIlMjJpbnB1dF92YWx1ZXMlMjIlNUQlNUIwJTVELnNoYXBlJTBBJTBBcHJvY2Vzc2VkX2RhdGFzZXQlNUIlMjJpbnB1dF92YWx1ZXMlMjIlNUQlNUIxJTVELnNoYXBl",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>processed_dataset[<span class="hljs-string">&quot;input_values&quot;</span>][<span class="hljs-number">0</span>].shape
(<span class="hljs-number">100000</span>,)
<span class="hljs-meta">&gt;&gt;&gt; </span>processed_dataset[<span class="hljs-string">&quot;input_values&quot;</span>][<span class="hljs-number">1</span>].shape
(<span class="hljs-number">100000</span>,)`}}),ge=new y({}),ve=new k({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJmb29kMTAxJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiU1QiUzQTEwMCU1RCUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;food101&quot;</span>, split=<span class="hljs-string">&quot;train[:100]&quot;</span>)`}}),Te=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJpbWFnZSUyMiU1RA==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;image&quot;</span>]'}}),Je=new y({}),Me=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9JbWFnZVByb2Nlc3NvciUwQSUwQWltYWdlX3Byb2Nlc3NvciUyMCUzRCUyMEF1dG9JbWFnZVByb2Nlc3Nvci5mcm9tX3ByZXRyYWluZWQoJTIyZ29vZ2xlJTJGdml0LWJhc2UtcGF0Y2gxNi0yMjQlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>)`}}),ye=new y({}),Ie=new k({props:{code:"ZnJvbSUyMHRvcmNodmlzaW9uLnRyYW5zZm9ybXMlMjBpbXBvcnQlMjBDb21wb3NlJTJDJTIwTm9ybWFsaXplJTJDJTIwUmFuZG9tUmVzaXplZENyb3AlMkMlMjBDb2xvckppdHRlciUyQyUyMFRvVGVuc29yJTBBJTBBbm9ybWFsaXplJTIwJTNEJTIwTm9ybWFsaXplKG1lYW4lM0RpbWFnZV9wcm9jZXNzb3IuaW1hZ2VfbWVhbiUyQyUyMHN0ZCUzRGltYWdlX3Byb2Nlc3Nvci5pbWFnZV9zdGQpJTBBX3RyYW5zZm9ybXMlMjAlM0QlMjBDb21wb3NlKCUwQSUyMCUyMCUyMCUyMCU1QlJhbmRvbVJlc2l6ZWRDcm9wKGltYWdlX3Byb2Nlc3Nvci5zaXplJTVCJTIyaGVpZ2h0JTIyJTVEKSUyQyUyMENvbG9ySml0dGVyKGJyaWdodG5lc3MlM0QwLjUlMkMlMjBodWUlM0QwLjUpJTJDJTIwVG9UZW5zb3IoKSUyQyUyMG5vcm1hbGl6ZSU1RCUwQSk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> Compose, Normalize, RandomResizedCrop, ColorJitter, ToTensor
<span class="hljs-meta">&gt;&gt;&gt; </span>normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
<span class="hljs-meta">&gt;&gt;&gt; </span>_transforms = Compose(
<span class="hljs-meta">... </span> [RandomResizedCrop(image_processor.size[<span class="hljs-string">&quot;height&quot;</span>]), ColorJitter(brightness=<span class="hljs-number">0.5</span>, hue=<span class="hljs-number">0.5</span>), ToTensor(), normalize]
<span class="hljs-meta">... </span>)`}}),Ue=new k({props:{code:"ZGVmJTIwdHJhbnNmb3JtcyhleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMnBpeGVsX3ZhbHVlcyUyMiU1RCUyMCUzRCUyMCU1Ql90cmFuc2Zvcm1zKGltYWdlLmNvbnZlcnQoJTIyUkdCJTIyKSklMjBmb3IlMjBpbWFnZSUyMGluJTIwZXhhbXBsZXMlNUIlMjJpbWFnZSUyMiU1RCU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGV4YW1wbGVz",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;pixel_values&quot;</span>] = [_transforms(image.convert(<span class="hljs-string">&quot;RGB&quot;</span>)) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples`}}),We=new k({props:{code:"ZGF0YXNldC5zZXRfdHJhbnNmb3JtKHRyYW5zZm9ybXMp",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset.set_transform(transforms)'}}),Ge=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJpbWFnZSUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;image&quot;</span>]
{<span class="hljs-string">&#x27;image&#x27;</span>: &lt;PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=384x512 at <span class="hljs-number">0x7F1A7B0630D0</span>&gt;,
<span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-number">6</span>,
<span class="hljs-string">&#x27;pixel_values&#x27;</span>: tensor([[[ <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0745</span>, <span class="hljs-number">0.1216</span>, ..., -<span class="hljs-number">0.9922</span>, -<span class="hljs-number">0.9922</span>, -<span class="hljs-number">0.9922</span>],
[-<span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0667</span>, <span class="hljs-number">0.1294</span>, ..., -<span class="hljs-number">0.9765</span>, -<span class="hljs-number">0.9843</span>, -<span class="hljs-number">0.9922</span>],
[ <span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0824</span>, <span class="hljs-number">0.1137</span>, ..., -<span class="hljs-number">0.9765</span>, -<span class="hljs-number">0.9686</span>, -<span class="hljs-number">0.8667</span>],
...,
[ <span class="hljs-number">0.0275</span>, <span class="hljs-number">0.0745</span>, <span class="hljs-number">0.0510</span>, ..., -<span class="hljs-number">0.1137</span>, -<span class="hljs-number">0.1216</span>, -<span class="hljs-number">0.0824</span>],
[ <span class="hljs-number">0.0667</span>, <span class="hljs-number">0.0824</span>, <span class="hljs-number">0.0667</span>, ..., -<span class="hljs-number">0.0588</span>, -<span class="hljs-number">0.0745</span>, -<span class="hljs-number">0.0980</span>],
[ <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0431</span>, ..., -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0588</span>]],
[[ <span class="hljs-number">0.2078</span>, <span class="hljs-number">0.2471</span>, <span class="hljs-number">0.2863</span>, ..., -<span class="hljs-number">0.9451</span>, -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.9451</span>],
[ <span class="hljs-number">0.1608</span>, <span class="hljs-number">0.2471</span>, <span class="hljs-number">0.3098</span>, ..., -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.9451</span>, -<span class="hljs-number">0.9373</span>],
[ <span class="hljs-number">0.2078</span>, <span class="hljs-number">0.2706</span>, <span class="hljs-number">0.3020</span>, ..., -<span class="hljs-number">0.9608</span>, -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.8275</span>],
...,
[-<span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0118</span>, -<span class="hljs-number">0.0039</span>, ..., -<span class="hljs-number">0.2392</span>, -<span class="hljs-number">0.2471</span>, -<span class="hljs-number">0.2078</span>],
[ <span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0196</span>, ..., -<span class="hljs-number">0.1843</span>, -<span class="hljs-number">0.2000</span>, -<span class="hljs-number">0.2235</span>],
[-<span class="hljs-number">0.0118</span>, -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0039</span>, ..., -<span class="hljs-number">0.0980</span>, -<span class="hljs-number">0.0980</span>, -<span class="hljs-number">0.1529</span>]],
[[ <span class="hljs-number">0.3961</span>, <span class="hljs-number">0.4431</span>, <span class="hljs-number">0.4980</span>, ..., -<span class="hljs-number">0.9216</span>, -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.9216</span>],
[ <span class="hljs-number">0.3569</span>, <span class="hljs-number">0.4510</span>, <span class="hljs-number">0.5216</span>, ..., -<span class="hljs-number">0.9059</span>, -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.9137</span>],
[ <span class="hljs-number">0.4118</span>, <span class="hljs-number">0.4745</span>, <span class="hljs-number">0.5216</span>, ..., -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.8902</span>, -<span class="hljs-number">0.7804</span>],
...,
[-<span class="hljs-number">0.2314</span>, -<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.2078</span>, ..., -<span class="hljs-number">0.4196</span>, -<span class="hljs-number">0.4275</span>, -<span class="hljs-number">0.3882</span>],
[-<span class="hljs-number">0.1843</span>, -<span class="hljs-number">0.1686</span>, -<span class="hljs-number">0.2000</span>, ..., -<span class="hljs-number">0.3647</span>, -<span class="hljs-number">0.3804</span>, -<span class="hljs-number">0.4039</span>],
[-<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.1922</span>, ..., -<span class="hljs-number">0.2941</span>, -<span class="hljs-number">0.2863</span>, -<span class="hljs-number">0.3412</span>]]])}`}}),Ze=new k({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBaW1wb3J0JTIwbWF0cGxvdGxpYi5weXBsb3QlMjBhcyUyMHBsdCUwQSUwQWltZyUyMCUzRCUyMGRhdGFzZXQlNUIwJTVEJTVCJTIycGl4ZWxfdmFsdWVzJTIyJTVEJTBBcGx0Lmltc2hvdyhpbWcucGVybXV0ZSgxJTJDJTIwMiUyQyUyMDApKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt
<span class="hljs-meta">&gt;&gt;&gt; </span>img = dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>plt.imshow(img.permute(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">0</span>))`}}),Fe=new y({}),Re=new k({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBbGpfc3BlZWNoJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmxqX3NwZWVjaCUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech = load_dataset(<span class="hljs-string">&quot;lj_speech&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)`}}),Ne=new k({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLm1hcChyZW1vdmVfY29sdW1ucyUzRCU1QiUyMmZpbGUlMjIlMkMlMjAlMjJpZCUyMiUyQyUyMCUyMm5vcm1hbGl6ZWRfdGV4dCUyMiU1RCk=",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech = lj_speech.<span class="hljs-built_in">map</span>(remove_columns=[<span class="hljs-string">&quot;file&quot;</span>, <span class="hljs-string">&quot;id&quot;</span>, <span class="hljs-string">&quot;normalized_text&quot;</span>])'}}),De=new k({props:{code:"bGpfc3BlZWNoJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTBBJTBBbGpfc3BlZWNoJTVCMCU1RCU1QiUyMnRleHQlMjIlNUQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([-<span class="hljs-number">7.3242188e-04</span>, -<span class="hljs-number">7.6293945e-04</span>, -<span class="hljs-number">6.4086914e-04</span>, ...,
<span class="hljs-number">7.3242188e-04</span>, <span class="hljs-number">2.1362305e-04</span>, <span class="hljs-number">6.1035156e-05</span>], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">22050</span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;text&quot;</span>]
<span class="hljs-string">&#x27;Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition&#x27;</span>`}}),Xe=new k({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLmNhc3RfY29sdW1uKCUyMmF1ZGlvJTIyJTJDJTIwQXVkaW8oc2FtcGxpbmdfcmF0ZSUzRDE2XzAwMCkp",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech = lj_speech.cast_column(<span class="hljs-string">&quot;audio&quot;</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))'}}),qe=new y({}),Ye=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Qcm9jZXNzb3IlMEElMEFwcm9jZXNzb3IlMjAlM0QlMjBBdXRvUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMjJmYWNlYm9vayUyRndhdjJ2ZWMyLWJhc2UtOTYwaCUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">&quot;facebook/wav2vec2-base-960h&quot;</span>)`}}),Pe=new k({props:{code:"ZGVmJTIwcHJlcGFyZV9kYXRhc2V0KGV4YW1wbGUpJTNBJTBBJTIwJTIwJTIwJTIwYXVkaW8lMjAlM0QlMjBleGFtcGxlJTVCJTIyYXVkaW8lMjIlNUQlMEElMEElMjAlMjAlMjAlMjBleGFtcGxlLnVwZGF0ZShwcm9jZXNzb3IoYXVkaW8lM0RhdWRpbyU1QiUyMmFycmF5JTIyJTVEJTJDJTIwdGV4dCUzRGV4YW1wbGUlNUIlMjJ0ZXh0JTIyJTVEJTJDJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwKSklMEElMEElMjAlMjAlMjAlMjByZXR1cm4lMjBleGFtcGxl",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">prepare_dataset</span>(<span class="hljs-params">example</span>):
<span class="hljs-meta">... </span> audio = example[<span class="hljs-string">&quot;audio&quot;</span>]
<span class="hljs-meta">... </span> example.update(processor(audio=audio[<span class="hljs-string">&quot;array&quot;</span>], text=example[<span class="hljs-string">&quot;text&quot;</span>], sampling_rate=<span class="hljs-number">16000</span>))
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> example`}}),Le=new k({props:{code:"cHJlcGFyZV9kYXRhc2V0KGxqX3NwZWVjaCU1QjAlNUQp",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>prepare_dataset(lj_speech[<span class="hljs-number">0</span>])'}}),{c(){v=l("meta"),T=h(),o=l("h1"),J=l("a"),M=l("span"),b($.$$.fragment),S=h(),as=l("span"),qp=i("Vorverarbeiten"),cl=h(),b(Gs.$$.fragment),ol=h(),Ke=l("p"),Yp=i("Bevor Sie Ihre Daten in einem Modell verwenden k\xF6nnen, m\xFCssen die Daten in ein f\xFCr das Modell akzeptables Format gebracht werden. Ein Modell versteht keine Rohtexte, Bilder oder Audiodaten. Diese Eingaben m\xFCssen in Zahlen umgewandelt und zu Tensoren zusammengesetzt werden. In dieser Anleitung werden Sie:"),bl=h(),U=l("ul"),Qa=l("li"),Pp=i("Textdaten mit einem Tokenizer vorverarbeiten."),Qp=h(),La=l("li"),Lp=i("Bild- oder Audiodaten mit einem Feature Extractor vorverarbeiten."),Hp=h(),Ha=l("li"),Kp=i("Daten f\xFCr eine multimodale Aufgabe mit einem Prozessor vorverarbeiten."),dl=h(),F=l("h2"),ns=l("a"),Ka=l("span"),b(Zs.$$.fragment),Op=h(),Oa=l("span"),st=i("NLP"),jl=h(),b(Fs.$$.fragment),fl=h(),A=l("p"),et=i("Das wichtigste Werkzeug zur Verarbeitung von Textdaten ist ein "),Oe=l("a"),at=i("Tokenizer"),nt=i(". Ein Tokenizer zerlegt Text zun\xE4chst nach einer Reihe von Regeln in "),sn=l("em"),lt=i("Token"),rt=i(". Die Token werden in Zahlen umgewandelt, die zum Aufbau von Tensoren als Eingabe f\xFCr ein Modell verwendet werden. Alle zus\xE4tzlichen Eingaben, die ein Modell ben\xF6tigt, werden ebenfalls vom Tokenizer hinzugef\xFCgt."),gl=h(),b(ls.$$.fragment),wl=h(),rs=l("p"),pt=i("Laden Sie einen vortrainierten Tokenizer mit der Klasse [AutoTokenizer], um schnell loszulegen. Damit wird das "),en=l("em"),tt=i("vocab"),it=i(" heruntergeladen, das verwendet wird, wenn ein Modell vortrainiert wird."),vl=h(),C=l("h3"),ps=l("a"),an=l("span"),b(Cs.$$.fragment),ut=h(),nn=l("span"),ht=i("Tokenize"),kl=h(),ts=l("p"),mt=i("Laden Sie einen vortrainierten Tokenizer mit "),ln=l("code"),ct=i("AutoTokenizer.from_pretrained()"),ot=i(":"),Tl=h(),b(Rs.$$.fragment),Jl=h(),sa=l("p"),bt=i("Dann \xFCbergeben Sie Ihren Satz an den Tokenizer:"),Ml=h(),b(Ns.$$.fragment),yl=h(),ea=l("p"),dt=i("Der Tokenizer gibt ein W\xF6rterbuch mit drei wichtigen Elementen zur\xFCck:"),$l=h(),B=l("ul"),aa=l("li"),na=l("a"),jt=i("input_ids"),ft=i(" sind die Indizes, die den einzelnen Token im Satz entsprechen."),gt=h(),la=l("li"),ra=l("a"),wt=i("attention_mask"),vt=i(" gibt an, ob ein Token beachtet werden soll oder nicht."),kt=h(),pa=l("li"),ta=l("a"),Tt=i("token_type_ids"),Jt=i(" gibt an, zu welcher Sequenz ein Token geh\xF6rt, wenn es mehr als eine Sequenz gibt."),zl=h(),is=l("p"),Mt=i("Sie k\xF6nnen die "),rn=l("code"),yt=i("input_ids"),$t=i(" dekodieren, um die urspr\xFCngliche Eingabe zur\xFCckzugeben:"),_l=h(),b(Ds.$$.fragment),El=h(),x=l("p"),zt=i("Wie Sie sehen k\xF6nnen, hat der Tokenisierer zwei spezielle Token - "),pn=l("code"),_t=i("CLS"),Et=i(" und "),tn=l("code"),It=i("SEP"),St=i(` (Klassifikator und Separator) - zum Satz hinzugef\xFCgt. Nicht alle Modelle ben\xF6tigen
spezielle Token, aber wenn dies der Fall ist, f\xFCgt der Tokenisierer sie automatisch f\xFCr Sie hinzu.`),Il=h(),ia=l("p"),Ut=i("Wenn Sie mehrere S\xE4tze verarbeiten wollen, \xFCbergeben Sie die S\xE4tze als Liste an den Tokenizer:"),Sl=h(),b(Xs.$$.fragment),Ul=h(),R=l("h3"),us=l("a"),un=l("span"),b(qs.$$.fragment),At=h(),hn=l("span"),Bt=i("Pad"),Al=h(),hs=l("p"),xt=i("Dies bringt uns zu einem wichtigen Thema. Wenn Sie einen Haufen von S\xE4tzen verarbeiten, sind diese nicht immer gleich lang. Das ist ein Problem, weil Tensoren, die Eingabe f\xFCr das Modell, eine einheitliche Form haben m\xFCssen. Padding ist eine Strategie, die sicherstellt, dass Tensoren rechteckig sind, indem ein spezielles "),mn=l("em"),Wt=i("Padding-Token"),Vt=i(" zu S\xE4tzen mit weniger Token hinzugef\xFCgt wird."),Bl=h(),ua=l("p"),Gt=i("Setzen Sie den Parameter \u201Cpadding\u201D auf \u201Ctrue\u201D, um die k\xFCrzeren Sequenzen im Stapel so aufzuf\xFCllen, dass sie der l\xE4ngsten Sequenz entsprechen:"),xl=h(),b(Ys.$$.fragment),Wl=h(),ha=l("p"),Zt=i("Beachten Sie, dass der Tokenizer den ersten und den dritten Satz mit einer \u201C0\u201D aufgef\xFCllt hat, weil sie k\xFCrzer sind!"),Vl=h(),N=l("h3"),ms=l("a"),cn=l("span"),b(Ps.$$.fragment),Ft=h(),on=l("span"),Ct=i("K\xFCrzung"),Gl=h(),ma=l("p"),Rt=i("Auf der anderen Seite des Spektrums kann es vorkommen, dass eine Sequenz zu lang f\xFCr ein Modell ist. In diesem Fall m\xFCssen Sie die Sequenz auf eine k\xFCrzere L\xE4nge k\xFCrzen."),Zl=h(),ca=l("p"),Nt=i("Setzen Sie den Parameter \u201Ctruncation\u201D auf \u201Ctrue\u201D, um eine Sequenz auf die vom Modell akzeptierte H\xF6chstl\xE4nge zu k\xFCrzen:"),Fl=h(),b(Qs.$$.fragment),Cl=h(),D=l("h3"),cs=l("a"),bn=l("span"),b(Ls.$$.fragment),Dt=h(),dn=l("span"),Xt=i("Tensoren erstellen"),Rl=h(),oa=l("p"),qt=i("Schlie\xDFlich m\xF6chten Sie, dass der Tokenizer die tats\xE4chlichen Tensoren zur\xFCckgibt, die dem Modell zugef\xFChrt werden."),Nl=h(),z=l("p"),Yt=i("Setzen Sie den Parameter "),jn=l("code"),Pt=i("return_tensors"),Qt=i(" entweder auf "),fn=l("code"),Lt=i("pt"),Ht=i(" f\xFCr PyTorch, oder "),gn=l("code"),Kt=i("tf"),Ot=i(" f\xFCr TensorFlow:"),Dl=h(),b(os.$$.fragment),Xl=h(),X=l("h2"),bs=l("a"),wn=l("span"),b(Hs.$$.fragment),si=h(),vn=l("span"),ei=i("Audio"),ql=h(),ds=l("p"),ai=i("Audioeingaben werden anders vorverarbeitet als Texteingaben, aber das Endziel bleibt dasselbe: numerische Sequenzen zu erstellen, die das Modell verstehen kann. Ein "),ba=l("a"),ni=i("feature extractor"),li=i(" dient dem ausdr\xFCcklichen Zweck, Merkmale aus Rohbild- oder Audiodaten zu extrahieren und in Tensoren zu konvertieren. Bevor Sie beginnen, installieren Sie \u{1F917} Datasets, um einen Audio-Datensatz zu laden, mit dem Sie experimentieren k\xF6nnen:"),Yl=h(),b(Ks.$$.fragment),Pl=h(),W=l("p"),ri=i("Laden Sie den "),Os=l("a"),pi=i("MInDS-14"),ti=i(" Datensatz (weitere Informationen zum Laden eines Datensatzes finden Sie im \u{1F917} "),se=l("a"),ii=i("Datasets tutorial"),ui=i("):"),Ql=h(),b(ee.$$.fragment),Ll=h(),js=l("p"),hi=i("Greifen Sie auf das erste Element der "),kn=l("code"),mi=i("audio"),ci=i("-Spalte zu, um einen Blick auf die Eingabe zu werfen. Durch den Aufruf der Spalte \u201Caudio\u201D wird die Audiodatei automatisch geladen und neu gesampelt:"),Hl=h(),b(ae.$$.fragment),Kl=h(),da=l("p"),oi=i("Dies gibt drei Elemente zur\xFCck:"),Ol=h(),V=l("ul"),Tn=l("li"),bi=i("\u201Carray\u201D ist das Sprachsignal, das als 1D-Array geladen - und m\xF6glicherweise neu gesampelt - wurde."),di=h(),Jn=l("li"),ji=i("Pfad\u201D zeigt auf den Speicherort der Audiodatei."),fi=h(),ja=l("li"),Mn=l("code"),gi=i("sampling_rate"),wi=i(" bezieht sich darauf, wie viele Datenpunkte im Sprachsignal pro Sekunde gemessen werden."),sr=h(),q=l("h3"),fs=l("a"),yn=l("span"),b(ne.$$.fragment),vi=h(),$n=l("span"),ki=i("Resample"),er=h(),gs=l("p"),Ti=i("F\xFCr dieses Tutorial werden Sie das Modell "),le=l("a"),Ji=i("Wav2Vec2"),Mi=i(" verwenden. Wie Sie aus der Modellkarte ersehen k\xF6nnen, ist das Wav2Vec2-Modell auf 16kHz abgetastetes Sprachaudio vortrainiert. Es ist wichtig, dass die Abtastrate Ihrer Audiodaten mit der Abtastrate des Datensatzes \xFCbereinstimmt, der f\xFCr das Pre-Training des Modells verwendet wurde. Wenn die Abtastrate Ihrer Daten nicht dieselbe ist, m\xFCssen Sie Ihre Audiodaten neu abtasten."),ar=h(),ws=l("p"),yi=i("Der Datensatz "),re=l("a"),$i=i("MInDS-14"),zi=i(" hat zum Beispiel eine Abtastrate von 8000 kHz. Um das Wav2Vec2-Modell mit diesem Datensatz verwenden zu k\xF6nnen, m\xFCssen Sie die Abtastrate auf 16 kHz erh\xF6hen:"),nr=h(),b(pe.$$.fragment),lr=h(),fa=l("ol"),zn=l("li"),_i=i("Verwenden Sie die Methode [~datasets.Dataset.cast_column] von \u{1F917} Datasets, um die Abtastrate auf 16kHz zu erh\xF6hen:"),rr=h(),b(te.$$.fragment),pr=h(),ie=l("ol"),_n=l("li"),Ei=i("Laden Sie die Audiodatei:"),tr=h(),b(ue.$$.fragment),ir=h(),ga=l("p"),Ii=i("Wie Sie sehen k\xF6nnen, ist die Abtastrate jetzt 16kHz!"),ur=h(),Y=l("h3"),vs=l("a"),En=l("span"),b(he.$$.fragment),Si=h(),In=l("span"),Ui=i("Merkmalsextraktor"),hr=h(),_=l("p"),Ai=i("Der n\xE4chste Schritt ist das Laden eines Merkmalsextraktors, um die Eingabe zu normalisieren und aufzuf\xFCllen. Beim Auff\xFCllen von Textdaten wird f\xFCr k\xFCrzere Sequenzen ein "),Sn=l("code"),Bi=i("0"),xi=i(" hinzugef\xFCgt. Die gleiche Idee gilt f\xFCr Audiodaten, und der Audio-Feature-Extraktor f\xFCgt eine "),Un=l("code"),Wi=i("0"),Vi=i(" - interpretiert als Stille - zu "),An=l("code"),Gi=i("array"),Zi=i(" hinzu."),mr=h(),ks=l("p"),Fi=i("Laden Sie den Merkmalsextraktor mit "),Bn=l("code"),Ci=i("AutoFeatureExtractor.from_pretrained()"),Ri=i(":"),cr=h(),b(me.$$.fragment),or=h(),Ts=l("p"),Ni=i("\xDCbergeben Sie das Audio-\u201DArray\u201D an den Feature-Extraktor. Wir empfehlen auch, das Argument "),xn=l("code"),Di=i("sampling_rate"),Xi=i(" im Feature Extractor hinzuzuf\xFCgen, um eventuell auftretende stille Fehler besser zu beheben."),br=h(),b(ce.$$.fragment),dr=h(),P=l("h3"),Js=l("a"),Wn=l("span"),b(oe.$$.fragment),qi=h(),Vn=l("span"),Yi=i("Auff\xFCllen und K\xFCrzen"),jr=h(),wa=l("p"),Pi=i("Genau wie beim Tokenizer k\xF6nnen Sie variable Sequenzen in einem Stapel durch Auff\xFCllen oder Abschneiden behandeln. Werfen Sie einen Blick auf die Sequenzl\xE4nge dieser beiden Audiobeispiele:"),fr=h(),b(be.$$.fragment),gr=h(),va=l("p"),Qi=i("Wie Sie sehen k\xF6nnen, hat das erste Beispiel eine l\xE4ngere Sequenz als das zweite Beispiel. Lassen Sie uns eine Funktion erstellen, die den Datensatz vorverarbeitet. Geben Sie eine maximale L\xE4nge der Probe an, und der Feature-Extraktor wird die Sequenzen entweder auff\xFCllen oder abschneiden, damit sie dieser L\xE4nge entsprechen:"),wr=h(),b(de.$$.fragment),vr=h(),ka=l("p"),Li=i("Wenden Sie die Funktion auf die ersten paar Beispiele im Datensatz an:"),kr=h(),b(je.$$.fragment),Tr=h(),Ta=l("p"),Hi=i("Schauen Sie sich nun noch einmal die verarbeiteten Beispiel-L\xE4ngen an:"),Jr=h(),b(fe.$$.fragment),Mr=h(),Ja=l("p"),Ki=i("Die L\xE4nge der ersten beiden Beispiele entspricht nun der von Ihnen angegebenen Maximall\xE4nge."),yr=h(),Q=l("h2"),Ms=l("a"),Gn=l("span"),b(ge.$$.fragment),Oi=h(),Zn=l("span"),su=i("Bildverarbeitung"),$r=h(),Ma=l("p"),eu=i("Ein Merkmalsextraktor wird auch verwendet, um Bilder f\xFCr Bildverarbeitungsaufgaben zu verarbeiten. Auch hier besteht das Ziel darin, das Rohbild in eine Reihe von Tensoren als Eingabe zu konvertieren."),zr=h(),G=l("p"),au=i("Laden wir den "),we=l("a"),nu=i("food101"),lu=i(" Datensatz f\xFCr dieses Tutorial. Verwenden Sie den Parameter \u{1F917} Datasets "),Fn=l("code"),ru=i("split"),pu=i(", um nur eine kleine Stichprobe aus dem Trainingssplit zu laden, da der Datensatz recht gro\xDF ist:"),_r=h(),b(ve.$$.fragment),Er=h(),ys=l("p"),tu=i("Als N\xE4chstes sehen Sie sich das Bild mit dem Merkmal \u{1F917} Datens\xE4tze [Bild] ("),ke=l("a"),iu=i("https://huggingface.co/docs/datasets/package_reference/main_classes.html?highlight=image#datasets.Image"),uu=i(") an:"),Ir=h(),b(Te.$$.fragment),Sr=h(),ya=l("p"),$a=l("img"),Ur=h(),L=l("h3"),$s=l("a"),Cn=l("span"),b(Je.$$.fragment),hu=h(),Rn=l("span"),mu=i("Merkmalsextraktor"),Ar=h(),zs=l("p"),cu=i("Laden Sie den Merkmalsextraktor mit "),Nn=l("code"),ou=i("AutoImageProcessor.from_pretrained()"),bu=i(":"),Br=h(),b(Me.$$.fragment),xr=h(),H=l("h3"),_s=l("a"),Dn=l("span"),b(ye.$$.fragment),du=h(),Xn=l("span"),ju=i("Datenerweiterung"),Wr=h(),Es=l("p"),fu=i("Bei Bildverarbeitungsaufgaben ist es \xFCblich, den Bildern als Teil der Vorverarbeitung eine Art von Datenerweiterung hinzuzuf\xFCgen. Sie k\xF6nnen Erweiterungen mit jeder beliebigen Bibliothek hinzuf\xFCgen, aber in diesem Tutorial werden Sie das Modul "),$e=l("a"),qn=l("code"),gu=i("transforms"),wu=i(" von torchvision verwenden."),Vr=h(),za=l("ol"),I=l("li"),vu=i("Normalisieren Sie das Bild und verwenden Sie "),ze=l("a"),Yn=l("code"),ku=i("Compose"),Tu=i(", um einige Transformationen - "),_e=l("a"),Pn=l("code"),Ju=i("RandomResizedCrop"),Mu=i(" und "),Ee=l("a"),Qn=l("code"),yu=i("ColorJitter"),$u=i(" - miteinander zu verkn\xFCpfen:"),Gr=h(),b(Ie.$$.fragment),Zr=h(),Se=l("ol"),K=l("li"),zu=i("Das Modell akzeptiert "),_a=l("a"),Ln=l("code"),_u=i("pixel_values"),Eu=i(" als Eingabe. Dieser Wert wird vom Merkmalsextraktor erzeugt. Erstellen Sie eine Funktion, die "),Hn=l("code"),Iu=i("pixel_values"),Su=i(" aus den Transformationen erzeugt:"),Fr=h(),b(Ue.$$.fragment),Cr=h(),Ae=l("ol"),Be=l("li"),Uu=i("Dann verwenden Sie \u{1F917} Datasets "),xe=l("a"),Kn=l("code"),Au=i("set_transform"),Bu=i(", um die Transformationen im laufenden Betrieb anzuwenden:"),Rr=h(),b(We.$$.fragment),Nr=h(),Ve=l("ol"),On=l("li"),xu=i("Wenn Sie nun auf das Bild zugreifen, werden Sie feststellen, dass der Feature Extractor die Modelleingabe \u201Cpixel_values\u201D hinzugef\xFCgt hat:"),Dr=h(),b(Ge.$$.fragment),Xr=h(),Ea=l("p"),Wu=i("Hier sehen Sie, wie das Bild nach der Vorverarbeitung aussieht. Wie von den angewandten Transformationen zu erwarten, wurde das Bild willk\xFCrlich beschnitten und seine Farbeigenschaften sind anders."),qr=h(),b(Ze.$$.fragment),Yr=h(),Ia=l("p"),Sa=l("img"),Pr=h(),O=l("h2"),Is=l("a"),sl=l("span"),b(Fe.$$.fragment),Vu=h(),el=l("span"),Gu=i("Multimodal"),Qr=h(),Ua=l("p"),Zu=i("F\xFCr multimodale Aufgaben werden Sie eine Kombination aus allem, was Sie bisher gelernt haben, verwenden und Ihre F\xE4higkeiten auf eine Aufgabe der automatischen Spracherkennung (ASR) anwenden. Dies bedeutet, dass Sie einen:"),Lr=h(),Ss=l("ul"),al=l("li"),Fu=i("Feature Extractor zur Vorverarbeitung der Audiodaten."),Cu=h(),nl=l("li"),Ru=i("Tokenizer, um den Text zu verarbeiten."),Hr=h(),Us=l("p"),Nu=i("Kehren wir zum "),Ce=l("a"),Du=i("LJ Speech"),Xu=i(" Datensatz zur\xFCck:"),Kr=h(),b(Re.$$.fragment),Or=h(),Aa=l("p"),qu=i("Da Sie haupts\xE4chlich an den Spalten \u201CAudio\u201D und \u201CText\u201D interessiert sind, entfernen Sie die anderen Spalten:"),sp=h(),b(Ne.$$.fragment),ep=h(),Ba=l("p"),Yu=i("Schauen Sie sich nun die Spalten \u201CAudio\u201D und \u201CText\u201D an:"),ap=h(),b(De.$$.fragment),np=h(),As=l("p"),Pu=i("Erinnern Sie sich an den fr\xFCheren Abschnitt \xFCber die Verarbeitung von Audiodaten: Sie sollten immer die Abtastrate Ihrer Audiodaten "),xa=l("a"),Qu=i("resample"),Lu=i(", damit sie mit der Abtastrate des Datensatzes \xFCbereinstimmt, der f\xFCr das Vortraining eines Modells verwendet wird:"),lp=h(),b(Xe.$$.fragment),rp=h(),ss=l("h3"),Bs=l("a"),ll=l("span"),b(qe.$$.fragment),Hu=h(),rl=l("span"),Ku=i("Prozessor"),pp=h(),Wa=l("p"),Ou=i("Ein Processor kombiniert einen Feature-Extraktor und einen Tokenizer. Laden Sie einen Processor mit [`AutoProcessor.from_pretrained]:"),tp=h(),b(Ye.$$.fragment),ip=h(),Va=l("ol"),es=l("li"),sh=i("Erstellen Sie eine Funktion, die die Audiodaten zu "),pl=l("code"),eh=i("input_values"),ah=i(" verarbeitet und den Text zu "),tl=l("code"),nh=i("labels"),lh=i(" tokenisiert. Dies sind Ihre Eingaben f\xFCr das Modell:"),up=h(),b(Pe.$$.fragment),hp=h(),Qe=l("ol"),il=l("li"),rh=i("Wenden Sie die Funktion \u201Cprepare_dataset\u201D auf ein Beispiel an:"),mp=h(),b(Le.$$.fragment),cp=h(),Z=l("p"),ph=i("Beachten Sie, dass der Processor "),ul=l("code"),th=i("input_values"),ih=i(" und "),hl=l("code"),uh=i("labels"),hh=i(" hinzugef\xFCgt hat. Auch die Abtastrate wurde korrekt auf 16kHz heruntergerechnet."),op=h(),Ga=l("p"),mh=i("Toll, Sie sollten jetzt in der Lage sein, Daten f\xFCr jede Modalit\xE4t vorzuverarbeiten und sogar verschiedene Modalit\xE4ten zu kombinieren! Im n\xE4chsten Kurs lernen Sie, wie Sie ein Modell mit Ihren neu aufbereiteten Daten feinabstimmen k\xF6nnen."),this.h()},l(s){const n=Oc('[data-svelte="svelte-1phssyn"]',document.head);v=r(n,"META",{name:!0,content:!0}),n.forEach(e),T=m(s),o=r(s,"H1",{class:!0});var He=p(o);J=r(He,"A",{id:!0,class:!0,href:!0});var ml=p(J);M=r(ml,"SPAN",{});var gh=p(M);d($.$$.fragment,gh),gh.forEach(e),ml.forEach(e),S=m(He),as=r(He,"SPAN",{});var wh=p(as);qp=u(wh,"Vorverarbeiten"),wh.forEach(e),He.forEach(e),cl=m(s),d(Gs.$$.fragment,s),ol=m(s),Ke=r(s,"P",{});var vh=p(Ke);Yp=u(vh,"Bevor Sie Ihre Daten in einem Modell verwenden k\xF6nnen, m\xFCssen die Daten in ein f\xFCr das Modell akzeptables Format gebracht werden. Ein Modell versteht keine Rohtexte, Bilder oder Audiodaten. Diese Eingaben m\xFCssen in Zahlen umgewandelt und zu Tensoren zusammengesetzt werden. In dieser Anleitung werden Sie:"),vh.forEach(e),bl=m(s),U=r(s,"UL",{});var Za=p(U);Qa=r(Za,"LI",{});var kh=p(Qa);Pp=u(kh,"Textdaten mit einem Tokenizer vorverarbeiten."),kh.forEach(e),Qp=m(Za),La=r(Za,"LI",{});var Th=p(La);Lp=u(Th,"Bild- oder Audiodaten mit einem Feature Extractor vorverarbeiten."),Th.forEach(e),Hp=m(Za),Ha=r(Za,"LI",{});var Jh=p(Ha);Kp=u(Jh,"Daten f\xFCr eine multimodale Aufgabe mit einem Prozessor vorverarbeiten."),Jh.forEach(e),Za.forEach(e),dl=m(s),F=r(s,"H2",{class:!0});var dp=p(F);ns=r(dp,"A",{id:!0,class:!0,href:!0});var Mh=p(ns);Ka=r(Mh,"SPAN",{});var yh=p(Ka);d(Zs.$$.fragment,yh),yh.forEach(e),Mh.forEach(e),Op=m(dp),Oa=r(dp,"SPAN",{});var $h=p(Oa);st=u($h,"NLP"),$h.forEach(e),dp.forEach(e),jl=m(s),d(Fs.$$.fragment,s),fl=m(s),A=r(s,"P",{});var Fa=p(A);et=u(Fa,"Das wichtigste Werkzeug zur Verarbeitung von Textdaten ist ein "),Oe=r(Fa,"A",{href:!0});var zh=p(Oe);at=u(zh,"Tokenizer"),zh.forEach(e),nt=u(Fa,". Ein Tokenizer zerlegt Text zun\xE4chst nach einer Reihe von Regeln in "),sn=r(Fa,"EM",{});var _h=p(sn);lt=u(_h,"Token"),_h.forEach(e),rt=u(Fa,". Die Token werden in Zahlen umgewandelt, die zum Aufbau von Tensoren als Eingabe f\xFCr ein Modell verwendet werden. Alle zus\xE4tzlichen Eingaben, die ein Modell ben\xF6tigt, werden ebenfalls vom Tokenizer hinzugef\xFCgt."),Fa.forEach(e),gl=m(s),d(ls.$$.fragment,s),wl=m(s),rs=r(s,"P",{});var jp=p(rs);pt=u(jp,"Laden Sie einen vortrainierten Tokenizer mit der Klasse [AutoTokenizer], um schnell loszulegen. Damit wird das "),en=r(jp,"EM",{});var Eh=p(en);tt=u(Eh,"vocab"),Eh.forEach(e),it=u(jp," heruntergeladen, das verwendet wird, wenn ein Modell vortrainiert wird."),jp.forEach(e),vl=m(s),C=r(s,"H3",{class:!0});var fp=p(C);ps=r(fp,"A",{id:!0,class:!0,href:!0});var Ih=p(ps);an=r(Ih,"SPAN",{});var Sh=p(an);d(Cs.$$.fragment,Sh),Sh.forEach(e),Ih.forEach(e),ut=m(fp),nn=r(fp,"SPAN",{});var Uh=p(nn);ht=u(Uh,"Tokenize"),Uh.forEach(e),fp.forEach(e),kl=m(s),ts=r(s,"P",{});var gp=p(ts);mt=u(gp,"Laden Sie einen vortrainierten Tokenizer mit "),ln=r(gp,"CODE",{});var Ah=p(ln);ct=u(Ah,"AutoTokenizer.from_pretrained()"),Ah.forEach(e),ot=u(gp,":"),gp.forEach(e),Tl=m(s),d(Rs.$$.fragment,s),Jl=m(s),sa=r(s,"P",{});var Bh=p(sa);bt=u(Bh,"Dann \xFCbergeben Sie Ihren Satz an den Tokenizer:"),Bh.forEach(e),Ml=m(s),d(Ns.$$.fragment,s),yl=m(s),ea=r(s,"P",{});var xh=p(ea);dt=u(xh,"Der Tokenizer gibt ein W\xF6rterbuch mit drei wichtigen Elementen zur\xFCck:"),xh.forEach(e),$l=m(s),B=r(s,"UL",{});var Ca=p(B);aa=r(Ca,"LI",{});var ch=p(aa);na=r(ch,"A",{href:!0});var Wh=p(na);jt=u(Wh,"input_ids"),Wh.forEach(e),ft=u(ch," sind die Indizes, die den einzelnen Token im Satz entsprechen."),ch.forEach(e),gt=m(Ca),la=r(Ca,"LI",{});var oh=p(la);ra=r(oh,"A",{href:!0});var Vh=p(ra);wt=u(Vh,"attention_mask"),Vh.forEach(e),vt=u(oh," gibt an, ob ein Token beachtet werden soll oder nicht."),oh.forEach(e),kt=m(Ca),pa=r(Ca,"LI",{});var bh=p(pa);ta=r(bh,"A",{href:!0});var Gh=p(ta);Tt=u(Gh,"token_type_ids"),Gh.forEach(e),Jt=u(bh," gibt an, zu welcher Sequenz ein Token geh\xF6rt, wenn es mehr als eine Sequenz gibt."),bh.forEach(e),Ca.forEach(e),zl=m(s),is=r(s,"P",{});var wp=p(is);Mt=u(wp,"Sie k\xF6nnen die "),rn=r(wp,"CODE",{});var Zh=p(rn);yt=u(Zh,"input_ids"),Zh.forEach(e),$t=u(wp," dekodieren, um die urspr\xFCngliche Eingabe zur\xFCckzugeben:"),wp.forEach(e),_l=m(s),d(Ds.$$.fragment,s),El=m(s),x=r(s,"P",{});var Ra=p(x);zt=u(Ra,"Wie Sie sehen k\xF6nnen, hat der Tokenisierer zwei spezielle Token - "),pn=r(Ra,"CODE",{});var Fh=p(pn);_t=u(Fh,"CLS"),Fh.forEach(e),Et=u(Ra," und "),tn=r(Ra,"CODE",{});var Ch=p(tn);It=u(Ch,"SEP"),Ch.forEach(e),St=u(Ra,` (Klassifikator und Separator) - zum Satz hinzugef\xFCgt. Nicht alle Modelle ben\xF6tigen
spezielle Token, aber wenn dies der Fall ist, f\xFCgt der Tokenisierer sie automatisch f\xFCr Sie hinzu.`),Ra.forEach(e),Il=m(s),ia=r(s,"P",{});var Rh=p(ia);Ut=u(Rh,"Wenn Sie mehrere S\xE4tze verarbeiten wollen, \xFCbergeben Sie die S\xE4tze als Liste an den Tokenizer:"),Rh.forEach(e),Sl=m(s),d(Xs.$$.fragment,s),Ul=m(s),R=r(s,"H3",{class:!0});var vp=p(R);us=r(vp,"A",{id:!0,class:!0,href:!0});var Nh=p(us);un=r(Nh,"SPAN",{});var Dh=p(un);d(qs.$$.fragment,Dh),Dh.forEach(e),Nh.forEach(e),At=m(vp),hn=r(vp,"SPAN",{});var Xh=p(hn);Bt=u(Xh,"Pad"),Xh.forEach(e),vp.forEach(e),Al=m(s),hs=r(s,"P",{});var kp=p(hs);xt=u(kp,"Dies bringt uns zu einem wichtigen Thema. Wenn Sie einen Haufen von S\xE4tzen verarbeiten, sind diese nicht immer gleich lang. Das ist ein Problem, weil Tensoren, die Eingabe f\xFCr das Modell, eine einheitliche Form haben m\xFCssen. Padding ist eine Strategie, die sicherstellt, dass Tensoren rechteckig sind, indem ein spezielles "),mn=r(kp,"EM",{});var qh=p(mn);Wt=u(qh,"Padding-Token"),qh.forEach(e),Vt=u(kp," zu S\xE4tzen mit weniger Token hinzugef\xFCgt wird."),kp.forEach(e),Bl=m(s),ua=r(s,"P",{});var Yh=p(ua);Gt=u(Yh,"Setzen Sie den Parameter \u201Cpadding\u201D auf \u201Ctrue\u201D, um die k\xFCrzeren Sequenzen im Stapel so aufzuf\xFCllen, dass sie der l\xE4ngsten Sequenz entsprechen:"),Yh.forEach(e),xl=m(s),d(Ys.$$.fragment,s),Wl=m(s),ha=r(s,"P",{});var Ph=p(ha);Zt=u(Ph,"Beachten Sie, dass der Tokenizer den ersten und den dritten Satz mit einer \u201C0\u201D aufgef\xFCllt hat, weil sie k\xFCrzer sind!"),Ph.forEach(e),Vl=m(s),N=r(s,"H3",{class:!0});var Tp=p(N);ms=r(Tp,"A",{id:!0,class:!0,href:!0});var Qh=p(ms);cn=r(Qh,"SPAN",{});var Lh=p(cn);d(Ps.$$.fragment,Lh),Lh.forEach(e),Qh.forEach(e),Ft=m(Tp),on=r(Tp,"SPAN",{});var Hh=p(on);Ct=u(Hh,"K\xFCrzung"),Hh.forEach(e),Tp.forEach(e),Gl=m(s),ma=r(s,"P",{});var Kh=p(ma);Rt=u(Kh,"Auf der anderen Seite des Spektrums kann es vorkommen, dass eine Sequenz zu lang f\xFCr ein Modell ist. In diesem Fall m\xFCssen Sie die Sequenz auf eine k\xFCrzere L\xE4nge k\xFCrzen."),Kh.forEach(e),Zl=m(s),ca=r(s,"P",{});var Oh=p(ca);Nt=u(Oh,"Setzen Sie den Parameter \u201Ctruncation\u201D auf \u201Ctrue\u201D, um eine Sequenz auf die vom Modell akzeptierte H\xF6chstl\xE4nge zu k\xFCrzen:"),Oh.forEach(e),Fl=m(s),d(Qs.$$.fragment,s),Cl=m(s),D=r(s,"H3",{class:!0});var Jp=p(D);cs=r(Jp,"A",{id:!0,class:!0,href:!0});var sm=p(cs);bn=r(sm,"SPAN",{});var em=p(bn);d(Ls.$$.fragment,em),em.forEach(e),sm.forEach(e),Dt=m(Jp),dn=r(Jp,"SPAN",{});var am=p(dn);Xt=u(am,"Tensoren erstellen"),am.forEach(e),Jp.forEach(e),Rl=m(s),oa=r(s,"P",{});var nm=p(oa);qt=u(nm,"Schlie\xDFlich m\xF6chten Sie, dass der Tokenizer die tats\xE4chlichen Tensoren zur\xFCckgibt, die dem Modell zugef\xFChrt werden."),nm.forEach(e),Nl=m(s),z=r(s,"P",{});var xs=p(z);Yt=u(xs,"Setzen Sie den Parameter "),jn=r(xs,"CODE",{});var lm=p(jn);Pt=u(lm,"return_tensors"),lm.forEach(e),Qt=u(xs," entweder auf "),fn=r(xs,"CODE",{});var rm=p(fn);Lt=u(rm,"pt"),rm.forEach(e),Ht=u(xs," f\xFCr PyTorch, oder "),gn=r(xs,"CODE",{});var pm=p(gn);Kt=u(pm,"tf"),pm.forEach(e),Ot=u(xs," f\xFCr TensorFlow:"),xs.forEach(e),Dl=m(s),d(os.$$.fragment,s),Xl=m(s),X=r(s,"H2",{class:!0});var Mp=p(X);bs=r(Mp,"A",{id:!0,class:!0,href:!0});var tm=p(bs);wn=r(tm,"SPAN",{});var im=p(wn);d(Hs.$$.fragment,im),im.forEach(e),tm.forEach(e),si=m(Mp),vn=r(Mp,"SPAN",{});var um=p(vn);ei=u(um,"Audio"),um.forEach(e),Mp.forEach(e),ql=m(s),ds=r(s,"P",{});var yp=p(ds);ai=u(yp,"Audioeingaben werden anders vorverarbeitet als Texteingaben, aber das Endziel bleibt dasselbe: numerische Sequenzen zu erstellen, die das Modell verstehen kann. Ein "),ba=r(yp,"A",{href:!0});var hm=p(ba);ni=u(hm,"feature extractor"),hm.forEach(e),li=u(yp," dient dem ausdr\xFCcklichen Zweck, Merkmale aus Rohbild- oder Audiodaten zu extrahieren und in Tensoren zu konvertieren. Bevor Sie beginnen, installieren Sie \u{1F917} Datasets, um einen Audio-Datensatz zu laden, mit dem Sie experimentieren k\xF6nnen:"),yp.forEach(e),Yl=m(s),d(Ks.$$.fragment,s),Pl=m(s),W=r(s,"P",{});var Na=p(W);ri=u(Na,"Laden Sie den "),Os=r(Na,"A",{href:!0,rel:!0});var mm=p(Os);pi=u(mm,"MInDS-14"),mm.forEach(e),ti=u(Na," Datensatz (weitere Informationen zum Laden eines Datensatzes finden Sie im \u{1F917} "),se=r(Na,"A",{href:!0,rel:!0});var cm=p(se);ii=u(cm,"Datasets tutorial"),cm.forEach(e),ui=u(Na,"):"),Na.forEach(e),Ql=m(s),d(ee.$$.fragment,s),Ll=m(s),js=r(s,"P",{});var $p=p(js);hi=u($p,"Greifen Sie auf das erste Element der "),kn=r($p,"CODE",{});var om=p(kn);mi=u(om,"audio"),om.forEach(e),ci=u($p,"-Spalte zu, um einen Blick auf die Eingabe zu werfen. Durch den Aufruf der Spalte \u201Caudio\u201D wird die Audiodatei automatisch geladen und neu gesampelt:"),$p.forEach(e),Hl=m(s),d(ae.$$.fragment,s),Kl=m(s),da=r(s,"P",{});var bm=p(da);oi=u(bm,"Dies gibt drei Elemente zur\xFCck:"),bm.forEach(e),Ol=m(s),V=r(s,"UL",{});var Da=p(V);Tn=r(Da,"LI",{});var dm=p(Tn);bi=u(dm,"\u201Carray\u201D ist das Sprachsignal, das als 1D-Array geladen - und m\xF6glicherweise neu gesampelt - wurde."),dm.forEach(e),di=m(Da),Jn=r(Da,"LI",{});var jm=p(Jn);ji=u(jm,"Pfad\u201D zeigt auf den Speicherort der Audiodatei."),jm.forEach(e),fi=m(Da),ja=r(Da,"LI",{});var dh=p(ja);Mn=r(dh,"CODE",{});var fm=p(Mn);gi=u(fm,"sampling_rate"),fm.forEach(e),wi=u(dh," bezieht sich darauf, wie viele Datenpunkte im Sprachsignal pro Sekunde gemessen werden."),dh.forEach(e),Da.forEach(e),sr=m(s),q=r(s,"H3",{class:!0});var zp=p(q);fs=r(zp,"A",{id:!0,class:!0,href:!0});var gm=p(fs);yn=r(gm,"SPAN",{});var wm=p(yn);d(ne.$$.fragment,wm),wm.forEach(e),gm.forEach(e),vi=m(zp),$n=r(zp,"SPAN",{});var vm=p($n);ki=u(vm,"Resample"),vm.forEach(e),zp.forEach(e),er=m(s),gs=r(s,"P",{});var _p=p(gs);Ti=u(_p,"F\xFCr dieses Tutorial werden Sie das Modell "),le=r(_p,"A",{href:!0,rel:!0});var km=p(le);Ji=u(km,"Wav2Vec2"),km.forEach(e),Mi=u(_p," verwenden. Wie Sie aus der Modellkarte ersehen k\xF6nnen, ist das Wav2Vec2-Modell auf 16kHz abgetastetes Sprachaudio vortrainiert. Es ist wichtig, dass die Abtastrate Ihrer Audiodaten mit der Abtastrate des Datensatzes \xFCbereinstimmt, der f\xFCr das Pre-Training des Modells verwendet wurde. Wenn die Abtastrate Ihrer Daten nicht dieselbe ist, m\xFCssen Sie Ihre Audiodaten neu abtasten."),_p.forEach(e),ar=m(s),ws=r(s,"P",{});var Ep=p(ws);yi=u(Ep,"Der Datensatz "),re=r(Ep,"A",{href:!0,rel:!0});var Tm=p(re);$i=u(Tm,"MInDS-14"),Tm.forEach(e),zi=u(Ep," hat zum Beispiel eine Abtastrate von 8000 kHz. Um das Wav2Vec2-Modell mit diesem Datensatz verwenden zu k\xF6nnen, m\xFCssen Sie die Abtastrate auf 16 kHz erh\xF6hen:"),Ep.forEach(e),nr=m(s),d(pe.$$.fragment,s),lr=m(s),fa=r(s,"OL",{});var Jm=p(fa);zn=r(Jm,"LI",{});var Mm=p(zn);_i=u(Mm,"Verwenden Sie die Methode [~datasets.Dataset.cast_column] von \u{1F917} Datasets, um die Abtastrate auf 16kHz zu erh\xF6hen:"),Mm.forEach(e),Jm.forEach(e),rr=m(s),d(te.$$.fragment,s),pr=m(s),ie=r(s,"OL",{start:!0});var ym=p(ie);_n=r(ym,"LI",{});var $m=p(_n);Ei=u($m,"Laden Sie die Audiodatei:"),$m.forEach(e),ym.forEach(e),tr=m(s),d(ue.$$.fragment,s),ir=m(s),ga=r(s,"P",{});var zm=p(ga);Ii=u(zm,"Wie Sie sehen k\xF6nnen, ist die Abtastrate jetzt 16kHz!"),zm.forEach(e),ur=m(s),Y=r(s,"H3",{class:!0});var Ip=p(Y);vs=r(Ip,"A",{id:!0,class:!0,href:!0});var _m=p(vs);En=r(_m,"SPAN",{});var Em=p(En);d(he.$$.fragment,Em),Em.forEach(e),_m.forEach(e),Si=m(Ip),In=r(Ip,"SPAN",{});var Im=p(In);Ui=u(Im,"Merkmalsextraktor"),Im.forEach(e),Ip.forEach(e),hr=m(s),_=r(s,"P",{});var Ws=p(_);Ai=u(Ws,"Der n\xE4chste Schritt ist das Laden eines Merkmalsextraktors, um die Eingabe zu normalisieren und aufzuf\xFCllen. Beim Auff\xFCllen von Textdaten wird f\xFCr k\xFCrzere Sequenzen ein "),Sn=r(Ws,"CODE",{});var Sm=p(Sn);Bi=u(Sm,"0"),Sm.forEach(e),xi=u(Ws," hinzugef\xFCgt. Die gleiche Idee gilt f\xFCr Audiodaten, und der Audio-Feature-Extraktor f\xFCgt eine "),Un=r(Ws,"CODE",{});var Um=p(Un);Wi=u(Um,"0"),Um.forEach(e),Vi=u(Ws," - interpretiert als Stille - zu "),An=r(Ws,"CODE",{});var Am=p(An);Gi=u(Am,"array"),Am.forEach(e),Zi=u(Ws," hinzu."),Ws.forEach(e),mr=m(s),ks=r(s,"P",{});var Sp=p(ks);Fi=u(Sp,"Laden Sie den Merkmalsextraktor mit "),Bn=r(Sp,"CODE",{});var Bm=p(Bn);Ci=u(Bm,"AutoFeatureExtractor.from_pretrained()"),Bm.forEach(e),Ri=u(Sp,":"),Sp.forEach(e),cr=m(s),d(me.$$.fragment,s),or=m(s),Ts=r(s,"P",{});var Up=p(Ts);Ni=u(Up,"\xDCbergeben Sie das Audio-\u201DArray\u201D an den Feature-Extraktor. Wir empfehlen auch, das Argument "),xn=r(Up,"CODE",{});var xm=p(xn);Di=u(xm,"sampling_rate"),xm.forEach(e),Xi=u(Up," im Feature Extractor hinzuzuf\xFCgen, um eventuell auftretende stille Fehler besser zu beheben."),Up.forEach(e),br=m(s),d(ce.$$.fragment,s),dr=m(s),P=r(s,"H3",{class:!0});var Ap=p(P);Js=r(Ap,"A",{id:!0,class:!0,href:!0});var Wm=p(Js);Wn=r(Wm,"SPAN",{});var Vm=p(Wn);d(oe.$$.fragment,Vm),Vm.forEach(e),Wm.forEach(e),qi=m(Ap),Vn=r(Ap,"SPAN",{});var Gm=p(Vn);Yi=u(Gm,"Auff\xFCllen und K\xFCrzen"),Gm.forEach(e),Ap.forEach(e),jr=m(s),wa=r(s,"P",{});var Zm=p(wa);Pi=u(Zm,"Genau wie beim Tokenizer k\xF6nnen Sie variable Sequenzen in einem Stapel durch Auff\xFCllen oder Abschneiden behandeln. Werfen Sie einen Blick auf die Sequenzl\xE4nge dieser beiden Audiobeispiele:"),Zm.forEach(e),fr=m(s),d(be.$$.fragment,s),gr=m(s),va=r(s,"P",{});var Fm=p(va);Qi=u(Fm,"Wie Sie sehen k\xF6nnen, hat das erste Beispiel eine l\xE4ngere Sequenz als das zweite Beispiel. Lassen Sie uns eine Funktion erstellen, die den Datensatz vorverarbeitet. Geben Sie eine maximale L\xE4nge der Probe an, und der Feature-Extraktor wird die Sequenzen entweder auff\xFCllen oder abschneiden, damit sie dieser L\xE4nge entsprechen:"),Fm.forEach(e),wr=m(s),d(de.$$.fragment,s),vr=m(s),ka=r(s,"P",{});var Cm=p(ka);Li=u(Cm,"Wenden Sie die Funktion auf die ersten paar Beispiele im Datensatz an:"),Cm.forEach(e),kr=m(s),d(je.$$.fragment,s),Tr=m(s),Ta=r(s,"P",{});var Rm=p(Ta);Hi=u(Rm,"Schauen Sie sich nun noch einmal die verarbeiteten Beispiel-L\xE4ngen an:"),Rm.forEach(e),Jr=m(s),d(fe.$$.fragment,s),Mr=m(s),Ja=r(s,"P",{});var Nm=p(Ja);Ki=u(Nm,"Die L\xE4nge der ersten beiden Beispiele entspricht nun der von Ihnen angegebenen Maximall\xE4nge."),Nm.forEach(e),yr=m(s),Q=r(s,"H2",{class:!0});var Bp=p(Q);Ms=r(Bp,"A",{id:!0,class:!0,href:!0});var Dm=p(Ms);Gn=r(Dm,"SPAN",{});var Xm=p(Gn);d(ge.$$.fragment,Xm),Xm.forEach(e),Dm.forEach(e),Oi=m(Bp),Zn=r(Bp,"SPAN",{});var qm=p(Zn);su=u(qm,"Bildverarbeitung"),qm.forEach(e),Bp.forEach(e),$r=m(s),Ma=r(s,"P",{});var Ym=p(Ma);eu=u(Ym,"Ein Merkmalsextraktor wird auch verwendet, um Bilder f\xFCr Bildverarbeitungsaufgaben zu verarbeiten. Auch hier besteht das Ziel darin, das Rohbild in eine Reihe von Tensoren als Eingabe zu konvertieren."),Ym.forEach(e),zr=m(s),G=r(s,"P",{});var Xa=p(G);au=u(Xa,"Laden wir den "),we=r(Xa,"A",{href:!0,rel:!0});var Pm=p(we);nu=u(Pm,"food101"),Pm.forEach(e),lu=u(Xa," Datensatz f\xFCr dieses Tutorial. Verwenden Sie den Parameter \u{1F917} Datasets "),Fn=r(Xa,"CODE",{});var Qm=p(Fn);ru=u(Qm,"split"),Qm.forEach(e),pu=u(Xa,", um nur eine kleine Stichprobe aus dem Trainingssplit zu laden, da der Datensatz recht gro\xDF ist:"),Xa.forEach(e),_r=m(s),d(ve.$$.fragment,s),Er=m(s),ys=r(s,"P",{});var xp=p(ys);tu=u(xp,"Als N\xE4chstes sehen Sie sich das Bild mit dem Merkmal \u{1F917} Datens\xE4tze [Bild] ("),ke=r(xp,"A",{href:!0,rel:!0});var Lm=p(ke);iu=u(Lm,"https://huggingface.co/docs/datasets/package_reference/main_classes.html?highlight=image#datasets.Image"),Lm.forEach(e),uu=u(xp,") an:"),xp.forEach(e),Ir=m(s),d(Te.$$.fragment,s),Sr=m(s),ya=r(s,"P",{});var Hm=p(ya);$a=r(Hm,"IMG",{src:!0,alt:!0}),Hm.forEach(e),Ur=m(s),L=r(s,"H3",{class:!0});var Wp=p(L);$s=r(Wp,"A",{id:!0,class:!0,href:!0});var Km=p($s);Cn=r(Km,"SPAN",{});var Om=p(Cn);d(Je.$$.fragment,Om),Om.forEach(e),Km.forEach(e),hu=m(Wp),Rn=r(Wp,"SPAN",{});var sc=p(Rn);mu=u(sc,"Merkmalsextraktor"),sc.forEach(e),Wp.forEach(e),Ar=m(s),zs=r(s,"P",{});var Vp=p(zs);cu=u(Vp,"Laden Sie den Merkmalsextraktor mit "),Nn=r(Vp,"CODE",{});var ec=p(Nn);ou=u(ec,"AutoImageProcessor.from_pretrained()"),ec.forEach(e),bu=u(Vp,":"),Vp.forEach(e),Br=m(s),d(Me.$$.fragment,s),xr=m(s),H=r(s,"H3",{class:!0});var Gp=p(H);_s=r(Gp,"A",{id:!0,class:!0,href:!0});var ac=p(_s);Dn=r(ac,"SPAN",{});var nc=p(Dn);d(ye.$$.fragment,nc),nc.forEach(e),ac.forEach(e),du=m(Gp),Xn=r(Gp,"SPAN",{});var lc=p(Xn);ju=u(lc,"Datenerweiterung"),lc.forEach(e),Gp.forEach(e),Wr=m(s),Es=r(s,"P",{});var Zp=p(Es);fu=u(Zp,"Bei Bildverarbeitungsaufgaben ist es \xFCblich, den Bildern als Teil der Vorverarbeitung eine Art von Datenerweiterung hinzuzuf\xFCgen. Sie k\xF6nnen Erweiterungen mit jeder beliebigen Bibliothek hinzuf\xFCgen, aber in diesem Tutorial werden Sie das Modul "),$e=r(Zp,"A",{href:!0,rel:!0});var rc=p($e);qn=r(rc,"CODE",{});var pc=p(qn);gu=u(pc,"transforms"),pc.forEach(e),rc.forEach(e),wu=u(Zp," von torchvision verwenden."),Zp.forEach(e),Vr=m(s),za=r(s,"OL",{});var tc=p(za);I=r(tc,"LI",{});var Vs=p(I);vu=u(Vs,"Normalisieren Sie das Bild und verwenden Sie "),ze=r(Vs,"A",{href:!0,rel:!0});var ic=p(ze);Yn=r(ic,"CODE",{});var uc=p(Yn);ku=u(uc,"Compose"),uc.forEach(e),ic.forEach(e),Tu=u(Vs,", um einige Transformationen - "),_e=r(Vs,"A",{href:!0,rel:!0});var hc=p(_e);Pn=r(hc,"CODE",{});var mc=p(Pn);Ju=u(mc,"RandomResizedCrop"),mc.forEach(e),hc.forEach(e),Mu=u(Vs," und "),Ee=r(Vs,"A",{href:!0,rel:!0});var cc=p(Ee);Qn=r(cc,"CODE",{});var oc=p(Qn);yu=u(oc,"ColorJitter"),oc.forEach(e),cc.forEach(e),$u=u(Vs," - miteinander zu verkn\xFCpfen:"),Vs.forEach(e),tc.forEach(e),Gr=m(s),d(Ie.$$.fragment,s),Zr=m(s),Se=r(s,"OL",{start:!0});var bc=p(Se);K=r(bc,"LI",{});var qa=p(K);zu=u(qa,"Das Modell akzeptiert "),_a=r(qa,"A",{href:!0});var dc=p(_a);Ln=r(dc,"CODE",{});var jc=p(Ln);_u=u(jc,"pixel_values"),jc.forEach(e),dc.forEach(e),Eu=u(qa," als Eingabe. Dieser Wert wird vom Merkmalsextraktor erzeugt. Erstellen Sie eine Funktion, die "),Hn=r(qa,"CODE",{});var fc=p(Hn);Iu=u(fc,"pixel_values"),fc.forEach(e),Su=u(qa," aus den Transformationen erzeugt:"),qa.forEach(e),bc.forEach(e),Fr=m(s),d(Ue.$$.fragment,s),Cr=m(s),Ae=r(s,"OL",{start:!0});var gc=p(Ae);Be=r(gc,"LI",{});var Fp=p(Be);Uu=u(Fp,"Dann verwenden Sie \u{1F917} Datasets "),xe=r(Fp,"A",{href:!0,rel:!0});var wc=p(xe);Kn=r(wc,"CODE",{});var vc=p(Kn);Au=u(vc,"set_transform"),vc.forEach(e),wc.forEach(e),Bu=u(Fp,", um die Transformationen im laufenden Betrieb anzuwenden:"),Fp.forEach(e),gc.forEach(e),Rr=m(s),d(We.$$.fragment,s),Nr=m(s),Ve=r(s,"OL",{start:!0});var kc=p(Ve);On=r(kc,"LI",{});var Tc=p(On);xu=u(Tc,"Wenn Sie nun auf das Bild zugreifen, werden Sie feststellen, dass der Feature Extractor die Modelleingabe \u201Cpixel_values\u201D hinzugef\xFCgt hat:"),Tc.forEach(e),kc.forEach(e),Dr=m(s),d(Ge.$$.fragment,s),Xr=m(s),Ea=r(s,"P",{});var Jc=p(Ea);Wu=u(Jc,"Hier sehen Sie, wie das Bild nach der Vorverarbeitung aussieht. Wie von den angewandten Transformationen zu erwarten, wurde das Bild willk\xFCrlich beschnitten und seine Farbeigenschaften sind anders."),Jc.forEach(e),qr=m(s),d(Ze.$$.fragment,s),Yr=m(s),Ia=r(s,"P",{});var Mc=p(Ia);Sa=r(Mc,"IMG",{src:!0,alt:!0}),Mc.forEach(e),Pr=m(s),O=r(s,"H2",{class:!0});var Cp=p(O);Is=r(Cp,"A",{id:!0,class:!0,href:!0});var yc=p(Is);sl=r(yc,"SPAN",{});var $c=p(sl);d(Fe.$$.fragment,$c),$c.forEach(e),yc.forEach(e),Vu=m(Cp),el=r(Cp,"SPAN",{});var zc=p(el);Gu=u(zc,"Multimodal"),zc.forEach(e),Cp.forEach(e),Qr=m(s),Ua=r(s,"P",{});var _c=p(Ua);Zu=u(_c,"F\xFCr multimodale Aufgaben werden Sie eine Kombination aus allem, was Sie bisher gelernt haben, verwenden und Ihre F\xE4higkeiten auf eine Aufgabe der automatischen Spracherkennung (ASR) anwenden. Dies bedeutet, dass Sie einen:"),_c.forEach(e),Lr=m(s),Ss=r(s,"UL",{});var Rp=p(Ss);al=r(Rp,"LI",{});var Ec=p(al);Fu=u(Ec,"Feature Extractor zur Vorverarbeitung der Audiodaten."),Ec.forEach(e),Cu=m(Rp),nl=r(Rp,"LI",{});var Ic=p(nl);Ru=u(Ic,"Tokenizer, um den Text zu verarbeiten."),Ic.forEach(e),Rp.forEach(e),Hr=m(s),Us=r(s,"P",{});var Np=p(Us);Nu=u(Np,"Kehren wir zum "),Ce=r(Np,"A",{href:!0,rel:!0});var Sc=p(Ce);Du=u(Sc,"LJ Speech"),Sc.forEach(e),Xu=u(Np," Datensatz zur\xFCck:"),Np.forEach(e),Kr=m(s),d(Re.$$.fragment,s),Or=m(s),Aa=r(s,"P",{});var Uc=p(Aa);qu=u(Uc,"Da Sie haupts\xE4chlich an den Spalten \u201CAudio\u201D und \u201CText\u201D interessiert sind, entfernen Sie die anderen Spalten:"),Uc.forEach(e),sp=m(s),d(Ne.$$.fragment,s),ep=m(s),Ba=r(s,"P",{});var Ac=p(Ba);Yu=u(Ac,"Schauen Sie sich nun die Spalten \u201CAudio\u201D und \u201CText\u201D an:"),Ac.forEach(e),ap=m(s),d(De.$$.fragment,s),np=m(s),As=r(s,"P",{});var Dp=p(As);Pu=u(Dp,"Erinnern Sie sich an den fr\xFCheren Abschnitt \xFCber die Verarbeitung von Audiodaten: Sie sollten immer die Abtastrate Ihrer Audiodaten "),xa=r(Dp,"A",{href:!0});var Bc=p(xa);Qu=u(Bc,"resample"),Bc.forEach(e),Lu=u(Dp,", damit sie mit der Abtastrate des Datensatzes \xFCbereinstimmt, der f\xFCr das Vortraining eines Modells verwendet wird:"),Dp.forEach(e),lp=m(s),d(Xe.$$.fragment,s),rp=m(s),ss=r(s,"H3",{class:!0});var Xp=p(ss);Bs=r(Xp,"A",{id:!0,class:!0,href:!0});var xc=p(Bs);ll=r(xc,"SPAN",{});var Wc=p(ll);d(qe.$$.fragment,Wc),Wc.forEach(e),xc.forEach(e),Hu=m(Xp),rl=r(Xp,"SPAN",{});var Vc=p(rl);Ku=u(Vc,"Prozessor"),Vc.forEach(e),Xp.forEach(e),pp=m(s),Wa=r(s,"P",{});var Gc=p(Wa);Ou=u(Gc,"Ein Processor kombiniert einen Feature-Extraktor und einen Tokenizer. Laden Sie einen Processor mit [`AutoProcessor.from_pretrained]:"),Gc.forEach(e),tp=m(s),d(Ye.$$.fragment,s),ip=m(s),Va=r(s,"OL",{});var Zc=p(Va);es=r(Zc,"LI",{});var Ya=p(es);sh=u(Ya,"Erstellen Sie eine Funktion, die die Audiodaten zu "),pl=r(Ya,"CODE",{});var Fc=p(pl);eh=u(Fc,"input_values"),Fc.forEach(e),ah=u(Ya," verarbeitet und den Text zu "),tl=r(Ya,"CODE",{});var Cc=p(tl);nh=u(Cc,"labels"),Cc.forEach(e),lh=u(Ya," tokenisiert. Dies sind Ihre Eingaben f\xFCr das Modell:"),Ya.forEach(e),Zc.forEach(e),up=m(s),d(Pe.$$.fragment,s),hp=m(s),Qe=r(s,"OL",{start:!0});var Rc=p(Qe);il=r(Rc,"LI",{});var Nc=p(il);rh=u(Nc,"Wenden Sie die Funktion \u201Cprepare_dataset\u201D auf ein Beispiel an:"),Nc.forEach(e),Rc.forEach(e),mp=m(s),d(Le.$$.fragment,s),cp=m(s),Z=r(s,"P",{});var Pa=p(Z);ph=u(Pa,"Beachten Sie, dass der Processor "),ul=r(Pa,"CODE",{});var Dc=p(ul);th=u(Dc,"input_values"),Dc.forEach(e),ih=u(Pa," und "),hl=r(Pa,"CODE",{});var Xc=p(hl);uh=u(Xc,"labels"),Xc.forEach(e),hh=u(Pa," hinzugef\xFCgt hat. Auch die Abtastrate wurde korrekt auf 16kHz heruntergerechnet."),Pa.forEach(e),op=m(s),Ga=r(s,"P",{});var qc=p(Ga);mh=u(qc,"Toll, Sie sollten jetzt in der Lage sein, Daten f\xFCr jede Modalit\xE4t vorzuverarbeiten und sogar verschiedene Modalit\xE4ten zu kombinieren! Im n\xE4chsten Kurs lernen Sie, wie Sie ein Modell mit Ihren neu aufbereiteten Daten feinabstimmen k\xF6nnen."),qc.forEach(e),this.h()},h(){c(v,"name","hf:doc:metadata"),c(v,"content",JSON.stringify(mo)),c(J,"id","vorverarbeiten"),c(J,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(J,"href","#vorverarbeiten"),c(o,"class","relative group"),c(ns,"id","nlp"),c(ns,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(ns,"href","#nlp"),c(F,"class","relative group"),c(Oe,"href","main_classes/tokenizer"),c(ps,"id","tokenize"),c(ps,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(ps,"href","#tokenize"),c(C,"class","relative group"),c(na,"href","glossary#input-ids"),c(ra,"href","glossary#attention-mask"),c(ta,"href","glossary#token-type-ids"),c(us,"id","pad"),c(us,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(us,"href","#pad"),c(R,"class","relative group"),c(ms,"id","krzung"),c(ms,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(ms,"href","#krzung"),c(N,"class","relative group"),c(cs,"id","tensoren-erstellen"),c(cs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(cs,"href","#tensoren-erstellen"),c(D,"class","relative group"),c(bs,"id","audio"),c(bs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(bs,"href","#audio"),c(X,"class","relative group"),c(ba,"href","main_classes/feature_extractor"),c(Os,"href","https://huggingface.co/datasets/PolyAI/minds14"),c(Os,"rel","nofollow"),c(se,"href","https://huggingface.co/docs/datasets/load_hub.html"),c(se,"rel","nofollow"),c(fs,"id","resample"),c(fs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(fs,"href","#resample"),c(q,"class","relative group"),c(le,"href","https://huggingface.co/facebook/wav2vec2-base"),c(le,"rel","nofollow"),c(re,"href","https://huggingface.co/datasets/PolyAI/minds14"),c(re,"rel","nofollow"),c(ie,"start","2"),c(vs,"id","merkmalsextraktor"),c(vs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(vs,"href","#merkmalsextraktor"),c(Y,"class","relative group"),c(Js,"id","auffllen-und-krzen"),c(Js,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(Js,"href","#auffllen-und-krzen"),c(P,"class","relative group"),c(Ms,"id","bildverarbeitung"),c(Ms,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(Ms,"href","#bildverarbeitung"),c(Q,"class","relative group"),c(we,"href","https://huggingface.co/datasets/food101"),c(we,"rel","nofollow"),c(ke,"href","https://huggingface.co/docs/datasets/package_reference/main_classes.html?highlight=image#datasets.Image"),c(ke,"rel","nofollow"),Yc($a.src,jh="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png")||c($a,"src",jh),c($a,"alt","vision-preprocess-tutorial.png"),c($s,"id","merkmalsextraktor"),c($s,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c($s,"href","#merkmalsextraktor"),c(L,"class","relative group"),c(_s,"id","datenerweiterung"),c(_s,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(_s,"href","#datenerweiterung"),c(H,"class","relative group"),c($e,"href","https://pytorch.org/vision/stable/transforms.html"),c($e,"rel","nofollow"),c(ze,"href","https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html"),c(ze,"rel","nofollow"),c(_e,"href","https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html"),c(_e,"rel","nofollow"),c(Ee,"href","https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html"),c(Ee,"rel","nofollow"),c(_a,"href","model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values"),c(Se,"start","2"),c(xe,"href","https://huggingface.co/docs/datasets/process.html#format-transform"),c(xe,"rel","nofollow"),c(Ae,"start","3"),c(Ve,"start","4"),Yc(Sa.src,fh="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png")||c(Sa,"src",fh),c(Sa,"alt","preprocessed_image"),c(Is,"id","multimodal"),c(Is,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(Is,"href","#multimodal"),c(O,"class","relative group"),c(Ce,"href","https://huggingface.co/datasets/lj_speech"),c(Ce,"rel","nofollow"),c(xa,"href","preprocessing#audio"),c(Bs,"id","prozessor"),c(Bs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),c(Bs,"href","#prozessor"),c(ss,"class","relative group"),c(Qe,"start","2")},m(s,n){a(document.head,v),t(s,T,n),t(s,o,n),a(o,J),a(J,M),j($,M,null),a(o,S),a(o,as),a(as,qp),t(s,cl,n),j(Gs,s,n),t(s,ol,n),t(s,Ke,n),a(Ke,Yp),t(s,bl,n),t(s,U,n),a(U,Qa),a(Qa,Pp),a(U,Qp),a(U,La),a(La,Lp),a(U,Hp),a(U,Ha),a(Ha,Kp),t(s,dl,n),t(s,F,n),a(F,ns),a(ns,Ka),j(Zs,Ka,null),a(F,Op),a(F,Oa),a(Oa,st),t(s,jl,n),j(Fs,s,n),t(s,fl,n),t(s,A,n),a(A,et),a(A,Oe),a(Oe,at),a(A,nt),a(A,sn),a(sn,lt),a(A,rt),t(s,gl,n),j(ls,s,n),t(s,wl,n),t(s,rs,n),a(rs,pt),a(rs,en),a(en,tt),a(rs,it),t(s,vl,n),t(s,C,n),a(C,ps),a(ps,an),j(Cs,an,null),a(C,ut),a(C,nn),a(nn,ht),t(s,kl,n),t(s,ts,n),a(ts,mt),a(ts,ln),a(ln,ct),a(ts,ot),t(s,Tl,n),j(Rs,s,n),t(s,Jl,n),t(s,sa,n),a(sa,bt),t(s,Ml,n),j(Ns,s,n),t(s,yl,n),t(s,ea,n),a(ea,dt),t(s,$l,n),t(s,B,n),a(B,aa),a(aa,na),a(na,jt),a(aa,ft),a(B,gt),a(B,la),a(la,ra),a(ra,wt),a(la,vt),a(B,kt),a(B,pa),a(pa,ta),a(ta,Tt),a(pa,Jt),t(s,zl,n),t(s,is,n),a(is,Mt),a(is,rn),a(rn,yt),a(is,$t),t(s,_l,n),j(Ds,s,n),t(s,El,n),t(s,x,n),a(x,zt),a(x,pn),a(pn,_t),a(x,Et),a(x,tn),a(tn,It),a(x,St),t(s,Il,n),t(s,ia,n),a(ia,Ut),t(s,Sl,n),j(Xs,s,n),t(s,Ul,n),t(s,R,n),a(R,us),a(us,un),j(qs,un,null),a(R,At),a(R,hn),a(hn,Bt),t(s,Al,n),t(s,hs,n),a(hs,xt),a(hs,mn),a(mn,Wt),a(hs,Vt),t(s,Bl,n),t(s,ua,n),a(ua,Gt),t(s,xl,n),j(Ys,s,n),t(s,Wl,n),t(s,ha,n),a(ha,Zt),t(s,Vl,n),t(s,N,n),a(N,ms),a(ms,cn),j(Ps,cn,null),a(N,Ft),a(N,on),a(on,Ct),t(s,Gl,n),t(s,ma,n),a(ma,Rt),t(s,Zl,n),t(s,ca,n),a(ca,Nt),t(s,Fl,n),j(Qs,s,n),t(s,Cl,n),t(s,D,n),a(D,cs),a(cs,bn),j(Ls,bn,null),a(D,Dt),a(D,dn),a(dn,Xt),t(s,Rl,n),t(s,oa,n),a(oa,qt),t(s,Nl,n),t(s,z,n),a(z,Yt),a(z,jn),a(jn,Pt),a(z,Qt),a(z,fn),a(fn,Lt),a(z,Ht),a(z,gn),a(gn,Kt),a(z,Ot),t(s,Dl,n),j(os,s,n),t(s,Xl,n),t(s,X,n),a(X,bs),a(bs,wn),j(Hs,wn,null),a(X,si),a(X,vn),a(vn,ei),t(s,ql,n),t(s,ds,n),a(ds,ai),a(ds,ba),a(ba,ni),a(ds,li),t(s,Yl,n),j(Ks,s,n),t(s,Pl,n),t(s,W,n),a(W,ri),a(W,Os),a(Os,pi),a(W,ti),a(W,se),a(se,ii),a(W,ui),t(s,Ql,n),j(ee,s,n),t(s,Ll,n),t(s,js,n),a(js,hi),a(js,kn),a(kn,mi),a(js,ci),t(s,Hl,n),j(ae,s,n),t(s,Kl,n),t(s,da,n),a(da,oi),t(s,Ol,n),t(s,V,n),a(V,Tn),a(Tn,bi),a(V,di),a(V,Jn),a(Jn,ji),a(V,fi),a(V,ja),a(ja,Mn),a(Mn,gi),a(ja,wi),t(s,sr,n),t(s,q,n),a(q,fs),a(fs,yn),j(ne,yn,null),a(q,vi),a(q,$n),a($n,ki),t(s,er,n),t(s,gs,n),a(gs,Ti),a(gs,le),a(le,Ji),a(gs,Mi),t(s,ar,n),t(s,ws,n),a(ws,yi),a(ws,re),a(re,$i),a(ws,zi),t(s,nr,n),j(pe,s,n),t(s,lr,n),t(s,fa,n),a(fa,zn),a(zn,_i),t(s,rr,n),j(te,s,n),t(s,pr,n),t(s,ie,n),a(ie,_n),a(_n,Ei),t(s,tr,n),j(ue,s,n),t(s,ir,n),t(s,ga,n),a(ga,Ii),t(s,ur,n),t(s,Y,n),a(Y,vs),a(vs,En),j(he,En,null),a(Y,Si),a(Y,In),a(In,Ui),t(s,hr,n),t(s,_,n),a(_,Ai),a(_,Sn),a(Sn,Bi),a(_,xi),a(_,Un),a(Un,Wi),a(_,Vi),a(_,An),a(An,Gi),a(_,Zi),t(s,mr,n),t(s,ks,n),a(ks,Fi),a(ks,Bn),a(Bn,Ci),a(ks,Ri),t(s,cr,n),j(me,s,n),t(s,or,n),t(s,Ts,n),a(Ts,Ni),a(Ts,xn),a(xn,Di),a(Ts,Xi),t(s,br,n),j(ce,s,n),t(s,dr,n),t(s,P,n),a(P,Js),a(Js,Wn),j(oe,Wn,null),a(P,qi),a(P,Vn),a(Vn,Yi),t(s,jr,n),t(s,wa,n),a(wa,Pi),t(s,fr,n),j(be,s,n),t(s,gr,n),t(s,va,n),a(va,Qi),t(s,wr,n),j(de,s,n),t(s,vr,n),t(s,ka,n),a(ka,Li),t(s,kr,n),j(je,s,n),t(s,Tr,n),t(s,Ta,n),a(Ta,Hi),t(s,Jr,n),j(fe,s,n),t(s,Mr,n),t(s,Ja,n),a(Ja,Ki),t(s,yr,n),t(s,Q,n),a(Q,Ms),a(Ms,Gn),j(ge,Gn,null),a(Q,Oi),a(Q,Zn),a(Zn,su),t(s,$r,n),t(s,Ma,n),a(Ma,eu),t(s,zr,n),t(s,G,n),a(G,au),a(G,we),a(we,nu),a(G,lu),a(G,Fn),a(Fn,ru),a(G,pu),t(s,_r,n),j(ve,s,n),t(s,Er,n),t(s,ys,n),a(ys,tu),a(ys,ke),a(ke,iu),a(ys,uu),t(s,Ir,n),j(Te,s,n),t(s,Sr,n),t(s,ya,n),a(ya,$a),t(s,Ur,n),t(s,L,n),a(L,$s),a($s,Cn),j(Je,Cn,null),a(L,hu),a(L,Rn),a(Rn,mu),t(s,Ar,n),t(s,zs,n),a(zs,cu),a(zs,Nn),a(Nn,ou),a(zs,bu),t(s,Br,n),j(Me,s,n),t(s,xr,n),t(s,H,n),a(H,_s),a(_s,Dn),j(ye,Dn,null),a(H,du),a(H,Xn),a(Xn,ju),t(s,Wr,n),t(s,Es,n),a(Es,fu),a(Es,$e),a($e,qn),a(qn,gu),a(Es,wu),t(s,Vr,n),t(s,za,n),a(za,I),a(I,vu),a(I,ze),a(ze,Yn),a(Yn,ku),a(I,Tu),a(I,_e),a(_e,Pn),a(Pn,Ju),a(I,Mu),a(I,Ee),a(Ee,Qn),a(Qn,yu),a(I,$u),t(s,Gr,n),j(Ie,s,n),t(s,Zr,n),t(s,Se,n),a(Se,K),a(K,zu),a(K,_a),a(_a,Ln),a(Ln,_u),a(K,Eu),a(K,Hn),a(Hn,Iu),a(K,Su),t(s,Fr,n),j(Ue,s,n),t(s,Cr,n),t(s,Ae,n),a(Ae,Be),a(Be,Uu),a(Be,xe),a(xe,Kn),a(Kn,Au),a(Be,Bu),t(s,Rr,n),j(We,s,n),t(s,Nr,n),t(s,Ve,n),a(Ve,On),a(On,xu),t(s,Dr,n),j(Ge,s,n),t(s,Xr,n),t(s,Ea,n),a(Ea,Wu),t(s,qr,n),j(Ze,s,n),t(s,Yr,n),t(s,Ia,n),a(Ia,Sa),t(s,Pr,n),t(s,O,n),a(O,Is),a(Is,sl),j(Fe,sl,null),a(O,Vu),a(O,el),a(el,Gu),t(s,Qr,n),t(s,Ua,n),a(Ua,Zu),t(s,Lr,n),t(s,Ss,n),a(Ss,al),a(al,Fu),a(Ss,Cu),a(Ss,nl),a(nl,Ru),t(s,Hr,n),t(s,Us,n),a(Us,Nu),a(Us,Ce),a(Ce,Du),a(Us,Xu),t(s,Kr,n),j(Re,s,n),t(s,Or,n),t(s,Aa,n),a(Aa,qu),t(s,sp,n),j(Ne,s,n),t(s,ep,n),t(s,Ba,n),a(Ba,Yu),t(s,ap,n),j(De,s,n),t(s,np,n),t(s,As,n),a(As,Pu),a(As,xa),a(xa,Qu),a(As,Lu),t(s,lp,n),j(Xe,s,n),t(s,rp,n),t(s,ss,n),a(ss,Bs),a(Bs,ll),j(qe,ll,null),a(ss,Hu),a(ss,rl),a(rl,Ku),t(s,pp,n),t(s,Wa,n),a(Wa,Ou),t(s,tp,n),j(Ye,s,n),t(s,ip,n),t(s,Va,n),a(Va,es),a(es,sh),a(es,pl),a(pl,eh),a(es,ah),a(es,tl),a(tl,nh),a(es,lh),t(s,up,n),j(Pe,s,n),t(s,hp,n),t(s,Qe,n),a(Qe,il),a(il,rh),t(s,mp,n),j(Le,s,n),t(s,cp,n),t(s,Z,n),a(Z,ph),a(Z,ul),a(ul,th),a(Z,ih),a(Z,hl),a(hl,uh),a(Z,hh),t(s,op,n),t(s,Ga,n),a(Ga,mh),bp=!0},p(s,[n]){const He={};n&2&&(He.$$scope={dirty:n,ctx:s}),ls.$set(He);const ml={};n&2&&(ml.$$scope={dirty:n,ctx:s}),os.$set(ml)},i(s){bp||(f($.$$.fragment,s),f(Gs.$$.fragment,s),f(Zs.$$.fragment,s),f(Fs.$$.fragment,s),f(ls.$$.fragment,s),f(Cs.$$.fragment,s),f(Rs.$$.fragment,s),f(Ns.$$.fragment,s),f(Ds.$$.fragment,s),f(Xs.$$.fragment,s),f(qs.$$.fragment,s),f(Ys.$$.fragment,s),f(Ps.$$.fragment,s),f(Qs.$$.fragment,s),f(Ls.$$.fragment,s),f(os.$$.fragment,s),f(Hs.$$.fragment,s),f(Ks.$$.fragment,s),f(ee.$$.fragment,s),f(ae.$$.fragment,s),f(ne.$$.fragment,s),f(pe.$$.fragment,s),f(te.$$.fragment,s),f(ue.$$.fragment,s),f(he.$$.fragment,s),f(me.$$.fragment,s),f(ce.$$.fragment,s),f(oe.$$.fragment,s),f(be.$$.fragment,s),f(de.$$.fragment,s),f(je.$$.fragment,s),f(fe.$$.fragment,s),f(ge.$$.fragment,s),f(ve.$$.fragment,s),f(Te.$$.fragment,s),f(Je.$$.fragment,s),f(Me.$$.fragment,s),f(ye.$$.fragment,s),f(Ie.$$.fragment,s),f(Ue.$$.fragment,s),f(We.$$.fragment,s),f(Ge.$$.fragment,s),f(Ze.$$.fragment,s),f(Fe.$$.fragment,s),f(Re.$$.fragment,s),f(Ne.$$.fragment,s),f(De.$$.fragment,s),f(Xe.$$.fragment,s),f(qe.$$.fragment,s),f(Ye.$$.fragment,s),f(Pe.$$.fragment,s),f(Le.$$.fragment,s),bp=!0)},o(s){g($.$$.fragment,s),g(Gs.$$.fragment,s),g(Zs.$$.fragment,s),g(Fs.$$.fragment,s),g(ls.$$.fragment,s),g(Cs.$$.fragment,s),g(Rs.$$.fragment,s),g(Ns.$$.fragment,s),g(Ds.$$.fragment,s),g(Xs.$$.fragment,s),g(qs.$$.fragment,s),g(Ys.$$.fragment,s),g(Ps.$$.fragment,s),g(Qs.$$.fragment,s),g(Ls.$$.fragment,s),g(os.$$.fragment,s),g(Hs.$$.fragment,s),g(Ks.$$.fragment,s),g(ee.$$.fragment,s),g(ae.$$.fragment,s),g(ne.$$.fragment,s),g(pe.$$.fragment,s),g(te.$$.fragment,s),g(ue.$$.fragment,s),g(he.$$.fragment,s),g(me.$$.fragment,s),g(ce.$$.fragment,s),g(oe.$$.fragment,s),g(be.$$.fragment,s),g(de.$$.fragment,s),g(je.$$.fragment,s),g(fe.$$.fragment,s),g(ge.$$.fragment,s),g(ve.$$.fragment,s),g(Te.$$.fragment,s),g(Je.$$.fragment,s),g(Me.$$.fragment,s),g(ye.$$.fragment,s),g(Ie.$$.fragment,s),g(Ue.$$.fragment,s),g(We.$$.fragment,s),g(Ge.$$.fragment,s),g(Ze.$$.fragment,s),g(Fe.$$.fragment,s),g(Re.$$.fragment,s),g(Ne.$$.fragment,s),g(De.$$.fragment,s),g(Xe.$$.fragment,s),g(qe.$$.fragment,s),g(Ye.$$.fragment,s),g(Pe.$$.fragment,s),g(Le.$$.fragment,s),bp=!1},d(s){e(v),s&&e(T),s&&e(o),w($),s&&e(cl),w(Gs,s),s&&e(ol),s&&e(Ke),s&&e(bl),s&&e(U),s&&e(dl),s&&e(F),w(Zs),s&&e(jl),w(Fs,s),s&&e(fl),s&&e(A),s&&e(gl),w(ls,s),s&&e(wl),s&&e(rs),s&&e(vl),s&&e(C),w(Cs),s&&e(kl),s&&e(ts),s&&e(Tl),w(Rs,s),s&&e(Jl),s&&e(sa),s&&e(Ml),w(Ns,s),s&&e(yl),s&&e(ea),s&&e($l),s&&e(B),s&&e(zl),s&&e(is),s&&e(_l),w(Ds,s),s&&e(El),s&&e(x),s&&e(Il),s&&e(ia),s&&e(Sl),w(Xs,s),s&&e(Ul),s&&e(R),w(qs),s&&e(Al),s&&e(hs),s&&e(Bl),s&&e(ua),s&&e(xl),w(Ys,s),s&&e(Wl),s&&e(ha),s&&e(Vl),s&&e(N),w(Ps),s&&e(Gl),s&&e(ma),s&&e(Zl),s&&e(ca),s&&e(Fl),w(Qs,s),s&&e(Cl),s&&e(D),w(Ls),s&&e(Rl),s&&e(oa),s&&e(Nl),s&&e(z),s&&e(Dl),w(os,s),s&&e(Xl),s&&e(X),w(Hs),s&&e(ql),s&&e(ds),s&&e(Yl),w(Ks,s),s&&e(Pl),s&&e(W),s&&e(Ql),w(ee,s),s&&e(Ll),s&&e(js),s&&e(Hl),w(ae,s),s&&e(Kl),s&&e(da),s&&e(Ol),s&&e(V),s&&e(sr),s&&e(q),w(ne),s&&e(er),s&&e(gs),s&&e(ar),s&&e(ws),s&&e(nr),w(pe,s),s&&e(lr),s&&e(fa),s&&e(rr),w(te,s),s&&e(pr),s&&e(ie),s&&e(tr),w(ue,s),s&&e(ir),s&&e(ga),s&&e(ur),s&&e(Y),w(he),s&&e(hr),s&&e(_),s&&e(mr),s&&e(ks),s&&e(cr),w(me,s),s&&e(or),s&&e(Ts),s&&e(br),w(ce,s),s&&e(dr),s&&e(P),w(oe),s&&e(jr),s&&e(wa),s&&e(fr),w(be,s),s&&e(gr),s&&e(va),s&&e(wr),w(de,s),s&&e(vr),s&&e(ka),s&&e(kr),w(je,s),s&&e(Tr),s&&e(Ta),s&&e(Jr),w(fe,s),s&&e(Mr),s&&e(Ja),s&&e(yr),s&&e(Q),w(ge),s&&e($r),s&&e(Ma),s&&e(zr),s&&e(G),s&&e(_r),w(ve,s),s&&e(Er),s&&e(ys),s&&e(Ir),w(Te,s),s&&e(Sr),s&&e(ya),s&&e(Ur),s&&e(L),w(Je),s&&e(Ar),s&&e(zs),s&&e(Br),w(Me,s),s&&e(xr),s&&e(H),w(ye),s&&e(Wr),s&&e(Es),s&&e(Vr),s&&e(za),s&&e(Gr),w(Ie,s),s&&e(Zr),s&&e(Se),s&&e(Fr),w(Ue,s),s&&e(Cr),s&&e(Ae),s&&e(Rr),w(We,s),s&&e(Nr),s&&e(Ve),s&&e(Dr),w(Ge,s),s&&e(Xr),s&&e(Ea),s&&e(qr),w(Ze,s),s&&e(Yr),s&&e(Ia),s&&e(Pr),s&&e(O),w(Fe),s&&e(Qr),s&&e(Ua),s&&e(Lr),s&&e(Ss),s&&e(Hr),s&&e(Us),s&&e(Kr),w(Re,s),s&&e(Or),s&&e(Aa),s&&e(sp),w(Ne,s),s&&e(ep),s&&e(Ba),s&&e(ap),w(De,s),s&&e(np),s&&e(As),s&&e(lp),w(Xe,s),s&&e(rp),s&&e(ss),w(qe),s&&e(pp),s&&e(Wa),s&&e(tp),w(Ye,s),s&&e(ip),s&&e(Va),s&&e(up),w(Pe,s),s&&e(hp),s&&e(Qe),s&&e(mp),w(Le,s),s&&e(cp),s&&e(Z),s&&e(op),s&&e(Ga)}}}const mo={local:"vorverarbeiten",sections:[{local:"nlp",sections:[{local:"tokenize",title:"Tokenize"},{local:"pad",title:"Pad"},{local:"krzung",title:"K\xFCrzung"},{local:"tensoren-erstellen",title:"Tensoren erstellen"}],title:"NLP"},{local:"audio",sections:[{local:"resample",title:"Resample"},{local:"merkmalsextraktor",title:"Merkmalsextraktor"},{local:"auffllen-und-krzen",title:"Auff\xFCllen und K\xFCrzen"}],title:"Audio"},{local:"bildverarbeitung",sections:[{local:"merkmalsextraktor",title:"Merkmalsextraktor"},{local:"datenerweiterung",title:"Datenerweiterung"}],title:"Bildverarbeitung"},{local:"multimodal",sections:[{local:"prozessor",title:"Prozessor"}],title:"Multimodal"}],title:"Vorverarbeiten"};function co(E){return so(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ko extends Lc{constructor(v){super();Hc(this,v,co,ho,Kc,{})}}export{ko as default,mo as metadata};

Xet Storage Details

Size:
119 kB
·
Xet hash:
3635dc638648ff718ab863c737b4c63ef790ca374654ff2991fae61aeee839c2

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.