Buckets:
| import{S as Iu,i as Uu,s as Cu,e as n,k as c,t as p,c as t,a as r,m as u,h as o,d as a,b as m,g as i,G as e,Q as Fn,q as g,l as yd,n as wp,o as v,B as w,p as $p,w as $,y as T,j as Ud,K as Cd,U as Td,x as E,V as Gd,T as Vd,Y as Md,Z as Ed,M as Bd,N as Jd,v as qd}from"../chunks/vendor-hf-doc-builder.js";import{T as Zd}from"../chunks/Tip-hf-doc-builder.js";import{Y as Ad}from"../chunks/Youtube-hf-doc-builder.js";import{I as V}from"../chunks/IconCopyLink-hf-doc-builder.js";import{a as kd,C}from"../chunks/CodeBlock-hf-doc-builder.js";import{b as zd,I as Rd,a as Nd}from"../chunks/IconTensorflow-hf-doc-builder.js";import{D as xd}from"../chunks/DocNotebookDropdown-hf-doc-builder.js";function _d(k,h,j){const b=k.slice();return b[8]=h[j],b[10]=j,b}function wd(k){let h,j,b;var y=k[8].icon;function f(M){return{props:{classNames:"mr-1.5"}}}return y&&(h=new y(f())),{c(){h&&$(h.$$.fragment),j=yd()},l(M){h&&E(h.$$.fragment,M),j=yd()},m(M,J){h&&T(h,M,J),i(M,j,J),b=!0},p(M,J){if(y!==(y=M[8].icon)){if(h){wp();const _=h;v(_.$$.fragment,1,0,()=>{w(_,1)}),$p()}y?(h=new y(f()),$(h.$$.fragment),g(h.$$.fragment,1),T(h,j.parentNode,j)):h=null}},i(M){b||(h&&g(h.$$.fragment,M),b=!0)},o(M){h&&v(h.$$.fragment,M),b=!1},d(M){M&&a(j),h&&w(h,M)}}}function $d(k){let h,j,b,y=k[8].name+"",f,M,J,_,d,z,I,U=k[8].icon&&wd(k);function bs(){return k[6](k[8])}return{c(){h=n("button"),U&&U.c(),j=c(),b=n("p"),f=p(y),J=c(),this.h()},l(B){h=t(B,"BUTTON",{class:!0});var G=r(h);U&&U.l(G),j=u(G),b=t(G,"P",{class:!0});var q=r(b);f=o(q,y),q.forEach(a),J=u(G),G.forEach(a),this.h()},h(){m(b,"class",M="!m-0 "+k[8].classNames),m(h,"class",_="flex justify-center py-1.5 px-2.5 focus:outline-none rounded-"+(k[10]?"r":"l")+" "+(k[8].group!==k[1]&&"text-gray-500 filter grayscale"))},m(B,G){i(B,h,G),U&&U.m(h,null),e(h,j),e(h,b),e(b,f),e(h,J),d=!0,z||(I=Fn(h,"click",bs),z=!0)},p(B,G){k=B,k[8].icon?U?(U.p(k,G),G&1&&g(U,1)):(U=wd(k),U.c(),g(U,1),U.m(h,j)):U&&(wp(),v(U,1,1,()=>{U=null}),$p()),(!d||G&1)&&y!==(y=k[8].name+"")&&Ud(f,y),(!d||G&1&&M!==(M="!m-0 "+k[8].classNames))&&m(b,"class",M),(!d||G&3&&_!==(_="flex justify-center py-1.5 px-2.5 focus:outline-none rounded-"+(k[10]?"r":"l")+" "+(k[8].group!==k[1]&&"text-gray-500 filter grayscale")))&&m(h,"class",_)},i(B){d||(g(U),d=!0)},o(B){v(U),d=!1},d(B){B&&a(h),U&&U.d(),z=!1,I()}}}function Wd(k){let h,j,b,y=k[3].filter(k[5]),f=[];for(let J=0;J<y.length;J+=1)f[J]=$d(_d(k,y,J));const M=J=>v(f[J],1,1,()=>{f[J]=null});return{c(){h=n("div"),j=n("div");for(let J=0;J<f.length;J+=1)f[J].c();this.h()},l(J){h=t(J,"DIV",{});var _=r(h);j=t(_,"DIV",{class:!0});var d=r(j);for(let z=0;z<f.length;z+=1)f[z].l(d);d.forEach(a),_.forEach(a),this.h()},h(){m(j,"class","bg-white leading-none border border-gray-100 rounded-lg inline-flex p-0.5 text-sm mb-4 select-none")},m(J,_){i(J,h,_),e(h,j);for(let d=0;d<f.length;d+=1)f[d].m(j,null);b=!0},p(J,[_]){if(_&27){y=J[3].filter(J[5]);let d;for(d=0;d<y.length;d+=1){const z=_d(J,y,d);f[d]?(f[d].p(z,_),g(f[d],1)):(f[d]=$d(z),f[d].c(),g(f[d],1),f[d].m(j,null))}for(wp(),d=y.length;d<f.length;d+=1)M(d);$p()}},i(J){if(!b){for(let _=0;_<y.length;_+=1)g(f[_]);b=!0}},o(J){f=f.filter(Boolean);for(let _=0;_<f.length;_+=1)v(f[_]);b=!1},d(J){J&&a(h),Cd(f,J)}}}function Fd(k,h,j){let b,{ids:y}=h;const f=y.join("-"),M=zd(f);Td(k,M,I=>j(1,b=I));const J=[{id:"pt",classNames:"",icon:Rd,name:"Pytorch",group:"group1"},{id:"tf",classNames:"",icon:Nd,name:"TensorFlow",group:"group2"},{id:"stringapi",classNames:"text-blue-600",name:"String API",group:"group1"},{id:"readinstruction",classNames:"text-blue-600",name:"ReadInstruction",group:"group2"}];function _(I){Gd(M,b=I,b)}const d=I=>y.includes(I.id),z=I=>_(I.group);return k.$$set=I=>{"ids"in I&&j(0,y=I.ids)},[y,b,M,J,_,d,z]}class Id extends Iu{constructor(h){super();Uu(this,h,Fd,Wd,Cu,{ids:0})}}function Xd(k){let h,j,b,y,f,M,J=k[1].highlighted+"",_;return j=new kd({props:{classNames:"transition duration-200 ease-in-out "+(k[2]&&"opacity-0"),title:"Copy code excerpt to clipboard",value:k[1].code}}),f=new Id({props:{ids:k[4]}}),{c(){h=n("div"),$(j.$$.fragment),b=c(),y=n("pre"),$(f.$$.fragment),M=new Md,this.h()},l(d){h=t(d,"DIV",{class:!0});var z=r(h);E(j.$$.fragment,z),z.forEach(a),b=u(d),y=t(d,"PRE",{});var I=r(y);E(f.$$.fragment,I),M=Ed(I),I.forEach(a),this.h()},h(){m(h,"class","absolute top-2.5 right-4"),M.a=null},m(d,z){i(d,h,z),T(j,h,null),i(d,b,z),i(d,y,z),T(f,y,null),M.m(J,y),_=!0},p(d,z){const I={};z&4&&(I.classNames="transition duration-200 ease-in-out "+(d[2]&&"opacity-0")),z&2&&(I.value=d[1].code),j.$set(I),(!_||z&2)&&J!==(J=d[1].highlighted+"")&&M.p(J)},i(d){_||(g(j.$$.fragment,d),g(f.$$.fragment,d),_=!0)},o(d){v(j.$$.fragment,d),v(f.$$.fragment,d),_=!1},d(d){d&&a(h),w(j),d&&a(b),d&&a(y),w(f)}}}function Yd(k){let h,j,b,y,f,M,J=k[0].highlighted+"",_;return j=new kd({props:{classNames:"transition duration-200 ease-in-out "+(k[2]&&"opacity-0"),title:"Copy code excerpt to clipboard",value:k[0].code}}),f=new Id({props:{ids:k[4]}}),{c(){h=n("div"),$(j.$$.fragment),b=c(),y=n("pre"),$(f.$$.fragment),M=new Md,this.h()},l(d){h=t(d,"DIV",{class:!0});var z=r(h);E(j.$$.fragment,z),z.forEach(a),b=u(d),y=t(d,"PRE",{});var I=r(y);E(f.$$.fragment,I),M=Ed(I),I.forEach(a),this.h()},h(){m(h,"class","absolute top-2.5 right-4"),M.a=null},m(d,z){i(d,h,z),T(j,h,null),i(d,b,z),i(d,y,z),T(f,y,null),M.m(J,y),_=!0},p(d,z){const I={};z&4&&(I.classNames="transition duration-200 ease-in-out "+(d[2]&&"opacity-0")),z&1&&(I.value=d[0].code),j.$set(I),(!_||z&1)&&J!==(J=d[0].highlighted+"")&&M.p(J)},i(d){_||(g(j.$$.fragment,d),g(f.$$.fragment,d),_=!0)},o(d){v(j.$$.fragment,d),v(f.$$.fragment,d),_=!1},d(d){d&&a(h),w(j),d&&a(b),d&&a(y),w(f)}}}function Pd(k){let h,j,b,y,f,M;const J=[Yd,Xd],_=[];function d(z,I){return z[3]==="group1"?0:1}return j=d(k),b=_[j]=J[j](k),{c(){h=n("div"),b.c(),this.h()},l(z){h=t(z,"DIV",{class:!0});var I=r(h);b.l(I),I.forEach(a),this.h()},h(){m(h,"class","code-block relative")},m(z,I){i(z,h,I),_[j].m(h,null),y=!0,f||(M=[Fn(h,"mouseover",k[6]),Fn(h,"focus",k[6]),Fn(h,"mouseout",k[7]),Fn(h,"focus",k[7])],f=!0)},p(z,[I]){let U=j;j=d(z),j===U?_[j].p(z,I):(wp(),v(_[U],1,1,()=>{_[U]=null}),$p(),b=_[j],b?b.p(z,I):(b=_[j]=J[j](z),b.c()),g(b,1),b.m(h,null))},i(z){y||(g(b),y=!0)},o(z){v(b),y=!1},d(z){z&&a(h),_[j].d(),f=!1,Vd(M)}}}function Qd(k,h,j){let b,{group1:y}=h,{group2:f}=h;const M=[y.id,f.id],J=M.join("-"),_=zd(J);Td(k,_,U=>j(3,b=U));let d=!0;function z(){j(2,d=!1)}function I(){j(2,d=!0)}return k.$$set=U=>{"group1"in U&&j(0,y=U.group1),"group2"in U&&j(1,f=U.group2)},[y,f,d,b,M,_,z,I]}class Dd extends Iu{constructor(h){super();Uu(this,h,Qd,Pd,Cu,{group1:0,group2:1})}}function Sd(k){let h,j,b,y,f;return{c(){h=n("p"),j=p("Se stai pensando si utilizzare un modello preaddestrato, \xE8 importante utilizzare il tokenizer preaddestrato associato. Questo assicura che il testo sia separato allo stesso modo che nel corpus usato per l\u2019addestramento, e venga usata la stessa mappatura tokens-to-index (solitamente indicato come il "),b=n("em"),y=p("vocabolario"),f=p(") come nel preaddestramento.")},l(M){h=t(M,"P",{});var J=r(h);j=o(J,"Se stai pensando si utilizzare un modello preaddestrato, \xE8 importante utilizzare il tokenizer preaddestrato associato. Questo assicura che il testo sia separato allo stesso modo che nel corpus usato per l\u2019addestramento, e venga usata la stessa mappatura tokens-to-index (solitamente indicato come il "),b=t(J,"EM",{});var _=r(b);y=o(_,"vocabolario"),_.forEach(a),f=o(J,") come nel preaddestramento."),J.forEach(a)},m(M,J){i(M,h,J),e(h,j),e(h,b),e(b,y),e(h,f)},d(M){M&&a(h)}}}function Hd(k){let h,j,b,y,f,M,J,_,d,z,I,U,bs,B,G,q,hl,Tp,Mp,dl,Ep,kp,jl,zp,Xn,ss,fs,bl,Hs,Ip,fl,Up,Yn,Ls,Pn,N,Cp,de,Gp,Vp,gl,Bp,qp,Qn,gs,Dn,x,Zp,vl,Ap,Rp,yl,Np,xp,Sn,as,vs,Jl,Os,Wp,_l,Fp,Hn,ys,Xp,wl,Yp,Pp,Ln,Ks,On,je,Qp,Kn,sa,st,be,Dp,at,W,fe,ge,Sp,Hp,Lp,ve,ye,Op,Kp,so,Je,_e,ao,eo,et,Js,lo,$l,no,to,lt,aa,nt,F,ro,Tl,po,oo,Ml,io,co,tt,we,uo,rt,ea,pt,es,_s,El,la,mo,kl,ho,ot,ws,jo,zl,bo,fo,it,X,go,Il,vo,yo,Ul,Jo,_o,ct,na,ut,$s,wo,Cl,$o,To,mt,ls,Ts,Gl,ta,Mo,Vl,Eo,ht,$e,ko,dt,Y,zo,Bl,Io,Uo,ql,Co,Go,jt,ra,bt,ns,Ms,Zl,pa,Vo,Al,Bo,ft,Te,qo,gt,Z,Zo,Rl,Ao,Ro,Nl,No,xo,xl,Wo,Fo,vt,oa,yt,ts,Es,Wl,ia,Xo,Fl,Yo,Jt,ks,Po,Me,Qo,Do,_t,ca,wt,P,So,ua,Ho,Lo,ma,Oo,Ko,$t,ha,Tt,Q,si,Xl,ai,ei,Yl,li,ni,Mt,da,Et,Ee,ti,kt,D,ke,Pl,ri,pi,oi,ze,Ql,ii,ci,ui,Ie,Dl,mi,hi,zt,rs,zs,Sl,ja,di,Hl,ji,It,Is,bi,ba,fi,gi,Ut,Us,vi,fa,yi,Ji,Ct,ga,Gt,Ue,va,_i,ya,Ll,wi,$i,Vt,Ja,Bt,_a,Ol,Ti,qt,wa,Zt,Cs,Mi,Kl,Ei,ki,At,ps,Gs,sn,$a,zi,an,Ii,Rt,A,Ui,en,Ci,Gi,ln,Vi,Bi,nn,qi,Zi,Nt,Vs,Ai,tn,Ri,Ni,xt,Ta,Wt,S,xi,rn,Wi,Fi,pn,Xi,Yi,Ft,Ma,Xt,os,Bs,on,Ea,Pi,cn,Qi,Yt,Ce,Di,Pt,ka,Qt,Ge,Si,Dt,za,St,Ve,Hi,Ht,Ia,Lt,Be,Li,Ot,Ua,Kt,qe,Oi,sr,is,qs,un,Ca,Ki,mn,sc,ar,Ze,ac,er,H,ec,Ga,lc,nc,hn,tc,rc,lr,Va,nr,Zs,pc,Ba,dn,oc,ic,tr,qa,rr,Ae,Re,Gu,pr,cs,As,jn,Za,cc,bn,uc,or,Rs,mc,fn,hc,dc,ir,Aa,cr,us,Ns,gn,Ra,jc,vn,bc,ur,xs,fc,Na,yn,gc,vc,mr,Ne,R,yc,xa,Jn,Jc,_c,Wa,_n,wc,$c,Fa,wn,Tc,Mc,hr,Xa,dr,Ya,ms,Ec,xe,$n,kc,zc,Tn,Ic,Uc,jr,Pa,br,Qa,Da,Cc,Sa,Mn,Gc,Vc,fr,Ha,gr,La,Oa,Bc,En,qc,Zc,vr,Ka,yr,We,Ac,Jr,se,_r,Fe,Xe,Vu,wr,hs,Ws,kn,ae,Rc,zn,Nc,$r,Ye,xc,Tr,Fs,In,Wc,Fc,Un,Xc,Mr,Xs,Yc,ee,Pc,Qc,Er,le,kr,L,Dc,Cn,Sc,Hc,Gn,Lc,Oc,zr,ne,Ir,O,Kc,Vn,su,au,Bn,eu,lu,Ur,te,Cr,Ys,nu,Pe,tu,ru,Gr,re,Vr,ds,Ps,qn,pe,pu,Zn,ou,Br,Qe,iu,qr,oe,Zr,De,js,cu,An,uu,mu,Rn,hu,du,Ar,ie,Rr,ce,ue,ju,Nn,bu,fu,Nr,me,xr,K,gu,xn,vu,yu,Wn,Ju,_u,Wr,Se,wu,Fr;return M=new V({}),I=new xd({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/it/preprocessing.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/it/pytorch/preprocessing.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/it/tensorflow/preprocessing.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/it/preprocessing.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/it/pytorch/preprocessing.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/it/tensorflow/preprocessing.ipynb"}]}}),Hs=new V({}),Ls=new Ad({props:{id:"Yffk5aydLzg"}}),gs=new Zd({props:{$$slots:{default:[Sd]},$$scope:{ctx:k}}}),Os=new V({}),Ks=new C({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJiZXJ0LWJhc2UtY2FzZWQlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"bert-base-cased"</span>)`}}),sa=new C({props:{code:"ZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplciglMjJEbyUyMG5vdCUyMG1lZGRsZSUyMGluJTIwdGhlJTIwYWZmYWlycyUyMG9mJTIwd2l6YXJkcyUyQyUyMGZvciUyMHRoZXklMjBhcmUlMjBzdWJ0bGUlMjBhbmQlMjBxdWljayUyMHRvJTIwYW5nZXIuJTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>encoded_input = tokenizer(<span class="hljs-string">"Do not meddle in the affairs of wizards, for they are subtle and quick to anger."</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: [<span class="hljs-number">101</span>, <span class="hljs-number">2079</span>, <span class="hljs-number">2025</span>, <span class="hljs-number">19960</span>, <span class="hljs-number">10362</span>, <span class="hljs-number">1999</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">3821</span>, <span class="hljs-number">1997</span>, <span class="hljs-number">16657</span>, <span class="hljs-number">1010</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">2027</span>, <span class="hljs-number">2024</span>, <span class="hljs-number">11259</span>, <span class="hljs-number">1998</span>, <span class="hljs-number">4248</span>, <span class="hljs-number">2000</span>, <span class="hljs-number">4963</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>], | |
| <span class="hljs-string">'token_type_ids'</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| <span class="hljs-string">'attention_mask'</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]}`}}),aa=new C({props:{code:"dG9rZW5pemVyLmRlY29kZShlbmNvZGVkX2lucHV0JTVCJTIyaW5wdXRfaWRzJTIyJTVEKQ==",highlighted:`<span class="hljs-meta">>>> </span>tokenizer.decode(encoded_input[<span class="hljs-string">"input_ids"</span>]) | |
| <span class="hljs-string">'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'</span>`}}),ea=new C({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoYmF0Y2hfc2VudGVuY2VzKSUwQXByaW50KGVuY29kZWRfaW5wdXRzKQ==",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_inputs = tokenizer(batch_sentences) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_inputs) | |
| {<span class="hljs-string">'input_ids'</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>]], | |
| <span class="hljs-string">'token_type_ids'</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'attention_mask'</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]}`}}),la=new V({}),na=new C({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSklMEFwcmludChlbmNvZGVkX2lucHV0KQ==",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'token_type_ids'</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'attention_mask'</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}`}}),ta=new V({}),ra=new C({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'token_type_ids'</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'attention_mask'</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}`}}),pa=new V({}),oa=new Dd({props:{group1:{id:"pt",code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaCUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: tensor([[ <span class="hljs-number">101</span>, <span class="hljs-number">153</span>, <span class="hljs-number">7719</span>, <span class="hljs-number">21490</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">1114</span>, <span class="hljs-number">9582</span>, <span class="hljs-number">1623</span>, <span class="hljs-number">102</span>], | |
| [ <span class="hljs-number">101</span>, <span class="hljs-number">5226</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">9649</span>, <span class="hljs-number">1199</span>, <span class="hljs-number">2610</span>, <span class="hljs-number">1236</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>]]), | |
| <span class="hljs-string">'token_type_ids'</span>: tensor([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]), | |
| <span class="hljs-string">'attention_mask'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>]])}`},group2:{id:"tf",code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaCUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnRmJTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: <tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy= | |
| array([[ <span class="hljs-number">101</span>, <span class="hljs-number">153</span>, <span class="hljs-number">7719</span>, <span class="hljs-number">21490</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">1114</span>, <span class="hljs-number">9582</span>, <span class="hljs-number">1623</span>, <span class="hljs-number">102</span>], | |
| [ <span class="hljs-number">101</span>, <span class="hljs-number">5226</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">9649</span>, <span class="hljs-number">1199</span>, <span class="hljs-number">2610</span>, <span class="hljs-number">1236</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>]], | |
| dtype=int32)>, | |
| <span class="hljs-string">'token_type_ids'</span>: <tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy= | |
| array([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], dtype=int32)>, | |
| <span class="hljs-string">'attention_mask'</span>: <tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy= | |
| array([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>]], dtype=int32)>}`}}}),ia=new V({}),ca=new C({props:{code:"cGlwJTIwaW5zdGFsbCUyMGRhdGFzZXRz",highlighted:"pip install datasets"}}),ha=new C({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTJDJTIwQXVkaW8lMEElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMlBvbHlBSSUyRm1pbmRzMTQlMjIlMkMlMjBuYW1lJTNEJTIyZW4tVVMlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, Audio | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"PolyAI/minds14"</span>, name=<span class="hljs-string">"en-US"</span>, split=<span class="hljs-string">"train"</span>)`}}),da=new C({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([ <span class="hljs-number">0.</span> , <span class="hljs-number">0.00024414</span>, -<span class="hljs-number">0.00024414</span>, ..., -<span class="hljs-number">0.00024414</span>, | |
| <span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> ], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">8000</span>}`}}),ja=new V({}),ga=new C({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJQb2x5QUklMkZtaW5kczE0JTIyJTJDJTIwbmFtZSUzRCUyMmVuLVVTJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0JTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"PolyAI/minds14"</span>, name=<span class="hljs-string">"en-US"</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([ <span class="hljs-number">0.</span> , <span class="hljs-number">0.00024414</span>, -<span class="hljs-number">0.00024414</span>, ..., -<span class="hljs-number">0.00024414</span>, | |
| <span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> ], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">8000</span>}`}}),Ja=new C({props:{code:"ZGF0YXNldCUyMCUzRCUyMGRhdGFzZXQuY2FzdF9jb2x1bW4oJTIyYXVkaW8lMjIlMkMlMjBBdWRpbyhzYW1wbGluZ19yYXRlJTNEMTZfMDAwKSk=",highlighted:'<span class="hljs-meta">>>> </span>dataset = dataset.cast_column(<span class="hljs-string">"audio"</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))'}}),wa=new C({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([ <span class="hljs-number">2.3443763e-05</span>, <span class="hljs-number">2.1729663e-04</span>, <span class="hljs-number">2.2145823e-04</span>, ..., | |
| <span class="hljs-number">3.8356509e-05</span>, -<span class="hljs-number">7.3497440e-06</span>, -<span class="hljs-number">2.1754686e-05</span>], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">16000</span>}`}}),$a=new V({}),Ta=new C({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yJTBBJTBBZmVhdHVyZV9leHRyYWN0b3IlMjAlM0QlMjBBdXRvRmVhdHVyZUV4dHJhY3Rvci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZ3YXYydmVjMi1iYXNlJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"facebook/wav2vec2-base"</span>)`}}),Ma=new C({props:{code:"YXVkaW9faW5wdXQlMjAlM0QlMjAlNUJkYXRhc2V0JTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlNUQlMEFmZWF0dXJlX2V4dHJhY3RvcihhdWRpb19pbnB1dCUyQyUyMHNhbXBsaW5nX3JhdGUlM0QxNjAwMCk=",highlighted:`<span class="hljs-meta">>>> </span>audio_input = [dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>]] | |
| <span class="hljs-meta">>>> </span>feature_extractor(audio_input, sampling_rate=<span class="hljs-number">16000</span>) | |
| {<span class="hljs-string">'input_values'</span>: [array([ <span class="hljs-number">3.8106556e-04</span>, <span class="hljs-number">2.7506407e-03</span>, <span class="hljs-number">2.8015103e-03</span>, ..., | |
| <span class="hljs-number">5.6335266e-04</span>, <span class="hljs-number">4.6588284e-06</span>, -<span class="hljs-number">1.7142107e-04</span>], dtype=float32)]}`}}),Ea=new V({}),ka=new C({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMmFycmF5JTIyJTVELnNoYXBlJTBBJTBBZGF0YXNldCU1QjElNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMmFycmF5JTIyJTVELnNoYXBl",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>].shape | |
| (<span class="hljs-number">173398</span>,) | |
| <span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">1</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>].shape | |
| (<span class="hljs-number">106496</span>,)`}}),za=new C({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBhdWRpb19hcnJheXMlMjAlM0QlMjAlNUJ4JTVCJTIyYXJyYXklMjIlNUQlMjBmb3IlMjB4JTIwaW4lMjBleGFtcGxlcyU1QiUyMmF1ZGlvJTIyJTVEJTVEJTBBJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZmVhdHVyZV9leHRyYWN0b3IoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXVkaW9fYXJyYXlzJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcGFkZGluZyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfbGVuZ3RoJTNEMTAwMDAwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwaW5wdXRz",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> audio_arrays = [x[<span class="hljs-string">"array"</span>] <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> examples[<span class="hljs-string">"audio"</span>]] | |
| <span class="hljs-meta">... </span> inputs = feature_extractor( | |
| <span class="hljs-meta">... </span> audio_arrays, | |
| <span class="hljs-meta">... </span> sampling_rate=<span class="hljs-number">16000</span>, | |
| <span class="hljs-meta">... </span> padding=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> max_length=<span class="hljs-number">100000</span>, | |
| <span class="hljs-meta">... </span> truncation=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> ) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> inputs`}}),Ia=new C({props:{code:"cHJvY2Vzc2VkX2RhdGFzZXQlMjAlM0QlMjBwcmVwcm9jZXNzX2Z1bmN0aW9uKGRhdGFzZXQlNUIlM0E1JTVEKQ==",highlighted:'<span class="hljs-meta">>>> </span>processed_dataset = preprocess_function(dataset[:<span class="hljs-number">5</span>])'}}),Ua=new C({props:{code:"cHJvY2Vzc2VkX2RhdGFzZXQlNUIlMjJpbnB1dF92YWx1ZXMlMjIlNUQlNUIwJTVELnNoYXBlJTBBJTBBcHJvY2Vzc2VkX2RhdGFzZXQlNUIlMjJpbnB1dF92YWx1ZXMlMjIlNUQlNUIxJTVELnNoYXBl",highlighted:`<span class="hljs-meta">>>> </span>processed_dataset[<span class="hljs-string">"input_values"</span>][<span class="hljs-number">0</span>].shape | |
| (<span class="hljs-number">100000</span>,) | |
| <span class="hljs-meta">>>> </span>processed_dataset[<span class="hljs-string">"input_values"</span>][<span class="hljs-number">1</span>].shape | |
| (<span class="hljs-number">100000</span>,)`}}),Ca=new V({}),Va=new C({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJmb29kMTAxJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiU1QiUzQTEwMCU1RCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"food101"</span>, split=<span class="hljs-string">"train[:100]"</span>)`}}),qa=new C({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJpbWFnZSUyMiU1RA==",highlighted:'<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"image"</span>]'}}),Za=new V({}),Aa=new C({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yJTBBJTBBZmVhdHVyZV9leHRyYWN0b3IlMjAlM0QlMjBBdXRvRmVhdHVyZUV4dHJhY3Rvci5mcm9tX3ByZXRyYWluZWQoJTIyZ29vZ2xlJTJGdml0LWJhc2UtcGF0Y2gxNi0yMjQlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"google/vit-base-patch16-224"</span>)`}}),Ra=new V({}),Xa=new C({props:{code:"ZnJvbSUyMHRvcmNodmlzaW9uLnRyYW5zZm9ybXMlMjBpbXBvcnQlMjBDb21wb3NlJTJDJTIwTm9ybWFsaXplJTJDJTIwUmFuZG9tUmVzaXplZENyb3AlMkMlMjBDb2xvckppdHRlciUyQyUyMFRvVGVuc29yJTBBJTBBbm9ybWFsaXplJTIwJTNEJTIwTm9ybWFsaXplKG1lYW4lM0RmZWF0dXJlX2V4dHJhY3Rvci5pbWFnZV9tZWFuJTJDJTIwc3RkJTNEZmVhdHVyZV9leHRyYWN0b3IuaW1hZ2Vfc3RkKSUwQV90cmFuc2Zvcm1zJTIwJTNEJTIwQ29tcG9zZSglMEElMjAlMjAlMjAlMjAlNUJSYW5kb21SZXNpemVkQ3JvcChmZWF0dXJlX2V4dHJhY3Rvci5zaXplKSUyQyUyMENvbG9ySml0dGVyKGJyaWdodG5lc3MlM0QwLjUlMkMlMjBodWUlM0QwLjUpJTJDJTIwVG9UZW5zb3IoKSUyQyUyMG5vcm1hbGl6ZSU1RCUwQSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> Compose, Normalize, RandomResizedCrop, ColorJitter, ToTensor | |
| <span class="hljs-meta">>>> </span>normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) | |
| <span class="hljs-meta">>>> </span>_transforms = Compose( | |
| <span class="hljs-meta">... </span> [RandomResizedCrop(feature_extractor.size), ColorJitter(brightness=<span class="hljs-number">0.5</span>, hue=<span class="hljs-number">0.5</span>), ToTensor(), normalize] | |
| <span class="hljs-meta">... </span>)`}}),Pa=new C({props:{code:"ZGVmJTIwdHJhbnNmb3JtcyhleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMnBpeGVsX3ZhbHVlcyUyMiU1RCUyMCUzRCUyMCU1Ql90cmFuc2Zvcm1zKGltYWdlLmNvbnZlcnQoJTIyUkdCJTIyKSklMjBmb3IlMjBpbWFnZSUyMGluJTIwZXhhbXBsZXMlNUIlMjJpbWFnZSUyMiU1RCU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGV4YW1wbGVz",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> examples[<span class="hljs-string">"pixel_values"</span>] = [_transforms(image.convert(<span class="hljs-string">"RGB"</span>)) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">"image"</span>]] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples`}}),Ha=new C({props:{code:"ZGF0YXNldC5zZXRfdHJhbnNmb3JtKHRyYW5zZm9ybXMp",highlighted:'<span class="hljs-meta">>>> </span>dataset.set_transform(transforms)'}}),Ka=new C({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJpbWFnZSUyMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"image"</span>] | |
| {<span class="hljs-string">'image'</span>: <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=384x512 at <span class="hljs-number">0x7F1A7B0630D0</span>>, | |
| <span class="hljs-string">'label'</span>: <span class="hljs-number">6</span>, | |
| <span class="hljs-string">'pixel_values'</span>: tensor([[[ <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0745</span>, <span class="hljs-number">0.1216</span>, ..., -<span class="hljs-number">0.9922</span>, -<span class="hljs-number">0.9922</span>, -<span class="hljs-number">0.9922</span>], | |
| [-<span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0667</span>, <span class="hljs-number">0.1294</span>, ..., -<span class="hljs-number">0.9765</span>, -<span class="hljs-number">0.9843</span>, -<span class="hljs-number">0.9922</span>], | |
| [ <span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0824</span>, <span class="hljs-number">0.1137</span>, ..., -<span class="hljs-number">0.9765</span>, -<span class="hljs-number">0.9686</span>, -<span class="hljs-number">0.8667</span>], | |
| ..., | |
| [ <span class="hljs-number">0.0275</span>, <span class="hljs-number">0.0745</span>, <span class="hljs-number">0.0510</span>, ..., -<span class="hljs-number">0.1137</span>, -<span class="hljs-number">0.1216</span>, -<span class="hljs-number">0.0824</span>], | |
| [ <span class="hljs-number">0.0667</span>, <span class="hljs-number">0.0824</span>, <span class="hljs-number">0.0667</span>, ..., -<span class="hljs-number">0.0588</span>, -<span class="hljs-number">0.0745</span>, -<span class="hljs-number">0.0980</span>], | |
| [ <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0431</span>, ..., -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0588</span>]], | |
| [[ <span class="hljs-number">0.2078</span>, <span class="hljs-number">0.2471</span>, <span class="hljs-number">0.2863</span>, ..., -<span class="hljs-number">0.9451</span>, -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.9451</span>], | |
| [ <span class="hljs-number">0.1608</span>, <span class="hljs-number">0.2471</span>, <span class="hljs-number">0.3098</span>, ..., -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.9451</span>, -<span class="hljs-number">0.9373</span>], | |
| [ <span class="hljs-number">0.2078</span>, <span class="hljs-number">0.2706</span>, <span class="hljs-number">0.3020</span>, ..., -<span class="hljs-number">0.9608</span>, -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.8275</span>], | |
| ..., | |
| [-<span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0118</span>, -<span class="hljs-number">0.0039</span>, ..., -<span class="hljs-number">0.2392</span>, -<span class="hljs-number">0.2471</span>, -<span class="hljs-number">0.2078</span>], | |
| [ <span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0196</span>, ..., -<span class="hljs-number">0.1843</span>, -<span class="hljs-number">0.2000</span>, -<span class="hljs-number">0.2235</span>], | |
| [-<span class="hljs-number">0.0118</span>, -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0039</span>, ..., -<span class="hljs-number">0.0980</span>, -<span class="hljs-number">0.0980</span>, -<span class="hljs-number">0.1529</span>]], | |
| [[ <span class="hljs-number">0.3961</span>, <span class="hljs-number">0.4431</span>, <span class="hljs-number">0.4980</span>, ..., -<span class="hljs-number">0.9216</span>, -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.9216</span>], | |
| [ <span class="hljs-number">0.3569</span>, <span class="hljs-number">0.4510</span>, <span class="hljs-number">0.5216</span>, ..., -<span class="hljs-number">0.9059</span>, -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.9137</span>], | |
| [ <span class="hljs-number">0.4118</span>, <span class="hljs-number">0.4745</span>, <span class="hljs-number">0.5216</span>, ..., -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.8902</span>, -<span class="hljs-number">0.7804</span>], | |
| ..., | |
| [-<span class="hljs-number">0.2314</span>, -<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.2078</span>, ..., -<span class="hljs-number">0.4196</span>, -<span class="hljs-number">0.4275</span>, -<span class="hljs-number">0.3882</span>], | |
| [-<span class="hljs-number">0.1843</span>, -<span class="hljs-number">0.1686</span>, -<span class="hljs-number">0.2000</span>, ..., -<span class="hljs-number">0.3647</span>, -<span class="hljs-number">0.3804</span>, -<span class="hljs-number">0.4039</span>], | |
| [-<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.1922</span>, ..., -<span class="hljs-number">0.2941</span>, -<span class="hljs-number">0.2863</span>, -<span class="hljs-number">0.3412</span>]]])}`}}),se=new C({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBaW1wb3J0JTIwbWF0cGxvdGxpYi5weXBsb3QlMjBhcyUyMHBsdCUwQSUwQWltZyUyMCUzRCUyMGRhdGFzZXQlNUIwJTVEJTVCJTIycGl4ZWxfdmFsdWVzJTIyJTVEJTBBcGx0Lmltc2hvdyhpbWcucGVybXV0ZSgxJTJDJTIwMiUyQyUyMDApKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt | |
| <span class="hljs-meta">>>> </span>img = dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"pixel_values"</span>] | |
| <span class="hljs-meta">>>> </span>plt.imshow(img.permute(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">0</span>))`}}),ae=new V({}),le=new C({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBbGpfc3BlZWNoJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmxqX3NwZWVjaCUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>lj_speech = load_dataset(<span class="hljs-string">"lj_speech"</span>, split=<span class="hljs-string">"train"</span>)`}}),ne=new C({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLm1hcChyZW1vdmVfY29sdW1ucyUzRCU1QiUyMmZpbGUlMjIlMkMlMjAlMjJpZCUyMiUyQyUyMCUyMm5vcm1hbGl6ZWRfdGV4dCUyMiU1RCk=",highlighted:'<span class="hljs-meta">>>> </span>lj_speech = lj_speech.<span class="hljs-built_in">map</span>(remove_columns=[<span class="hljs-string">"file"</span>, <span class="hljs-string">"id"</span>, <span class="hljs-string">"normalized_text"</span>])'}}),te=new C({props:{code:"bGpfc3BlZWNoJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTBBJTBBbGpfc3BlZWNoJTVCMCU1RCU1QiUyMnRleHQlMjIlNUQ=",highlighted:`<span class="hljs-meta">>>> </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([-<span class="hljs-number">7.3242188e-04</span>, -<span class="hljs-number">7.6293945e-04</span>, -<span class="hljs-number">6.4086914e-04</span>, ..., | |
| <span class="hljs-number">7.3242188e-04</span>, <span class="hljs-number">2.1362305e-04</span>, <span class="hljs-number">6.1035156e-05</span>], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">22050</span>} | |
| <span class="hljs-meta">>>> </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">"text"</span>] | |
| <span class="hljs-string">'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'</span>`}}),re=new C({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLmNhc3RfY29sdW1uKCUyMmF1ZGlvJTIyJTJDJTIwQXVkaW8oc2FtcGxpbmdfcmF0ZSUzRDE2XzAwMCkp",highlighted:'<span class="hljs-meta">>>> </span>lj_speech = lj_speech.cast_column(<span class="hljs-string">"audio"</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))'}}),pe=new V({}),oe=new C({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Qcm9jZXNzb3IlMEElMEFwcm9jZXNzb3IlMjAlM0QlMjBBdXRvUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMjJmYWNlYm9vayUyRndhdjJ2ZWMyLWJhc2UtOTYwaCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor | |
| <span class="hljs-meta">>>> </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">"facebook/wav2vec2-base-960h"</span>)`}}),ie=new C({props:{code:"ZGVmJTIwcHJlcGFyZV9kYXRhc2V0KGV4YW1wbGUpJTNBJTBBJTIwJTIwJTIwJTIwYXVkaW8lMjAlM0QlMjBleGFtcGxlJTVCJTIyYXVkaW8lMjIlNUQlMEElMEElMjAlMjAlMjAlMjBleGFtcGxlLnVwZGF0ZShwcm9jZXNzb3IoYXVkaW8lM0RhdWRpbyU1QiUyMmFycmF5JTIyJTVEJTJDJTIwdGV4dCUzRGV4YW1wbGUlNUIlMjJ0ZXh0JTIyJTVEJTJDJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwKSklMEElMEElMjAlMjAlMjAlMjByZXR1cm4lMjBleGFtcGxl",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">prepare_dataset</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-meta">... </span> audio = example[<span class="hljs-string">"audio"</span>] | |
| <span class="hljs-meta">... </span> example.update(processor(audio=audio[<span class="hljs-string">"array"</span>], text=example[<span class="hljs-string">"text"</span>], sampling_rate=<span class="hljs-number">16000</span>)) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> example`}}),me=new C({props:{code:"cHJlcGFyZV9kYXRhc2V0KGxqX3NwZWVjaCU1QjAlNUQp",highlighted:'<span class="hljs-meta">>>> </span>prepare_dataset(lj_speech[<span class="hljs-number">0</span>])'}}),{c(){h=n("meta"),j=c(),b=n("h1"),y=n("a"),f=n("span"),$(M.$$.fragment),J=c(),_=n("span"),d=p("Preprocess"),z=c(),$(I.$$.fragment),U=c(),bs=n("p"),B=p("Prima di poter usare i dati in un modello, bisogna processarli in un formato accettabile per quest\u2019ultimo. Un modello non comprende il testo grezzo, le immagini o l\u2019audio. Bisogna convertire questi input in numeri e assemblarli all\u2019interno di tensori. In questa esercitazione, tu potrai:"),G=c(),q=n("ul"),hl=n("li"),Tp=p("Preprocessare dati testuali con un tokenizer."),Mp=c(),dl=n("li"),Ep=p("Preprocessare immagini o dati audio con un estrattore di caratteristiche."),kp=c(),jl=n("li"),zp=p("Preprocessare dati per attivit\xE0 multimodali mediante un processore."),Xn=c(),ss=n("h2"),fs=n("a"),bl=n("span"),$(Hs.$$.fragment),Ip=c(),fl=n("span"),Up=p("NLP"),Yn=c(),$(Ls.$$.fragment),Pn=c(),N=n("p"),Cp=p("Lo strumento principale per processare dati testuali \xE8 un "),de=n("a"),Gp=p("tokenizer"),Vp=p(". Un tokenizer inizia separando il testo in "),gl=n("em"),Bp=p("tokens"),qp=p(" secondo una serie di regole. I tokens sono convertiti in numeri, questi vengono utilizzati per costruire i tensori di input del modello. Anche altri input addizionali se richiesti dal modello vengono aggiunti dal tokenizer."),Qn=c(),$(gs.$$.fragment),Dn=c(),x=n("p"),Zp=p("Iniziamo subito caricando un tokenizer preaddestrato con la classe "),vl=n("code"),Ap=p("AutoTokenizer"),Rp=p(". Questo scarica il "),yl=n("em"),Np=p("vocabolario"),xp=p(" usato quando il modello \xE8 stato preaddestrato."),Sn=c(),as=n("h3"),vs=n("a"),Jl=n("span"),$(Os.$$.fragment),Wp=c(),_l=n("span"),Fp=p("Tokenize"),Hn=c(),ys=n("p"),Xp=p("Carica un tokenizer preaddestrato con "),wl=n("code"),Yp=p("AutoTokenizer.from_pretrained()"),Pp=p(":"),Ln=c(),$(Ks.$$.fragment),On=c(),je=n("p"),Qp=p("Poi inserisci le tue frasi nel tokenizer:"),Kn=c(),$(sa.$$.fragment),st=c(),be=n("p"),Dp=p("Il tokenizer restituisce un dizionario contenente tre oggetti importanti:"),at=c(),W=n("ul"),fe=n("li"),ge=n("a"),Sp=p("input_ids"),Hp=p(" sono gli indici che corrispondono ad ogni token nella frase."),Lp=c(),ve=n("li"),ye=n("a"),Op=p("attention_mask"),Kp=p(" indicata se un token deve essere elaborato o no."),so=c(),Je=n("li"),_e=n("a"),ao=p("token_type_ids"),eo=p(" identifica a quale sequenza appartiene un token se \xE8 presente pi\xF9 di una sequenza."),et=c(),Js=n("p"),lo=p("Si possono decodificare gli "),$l=n("code"),no=p("input_ids"),to=p(" per farsi restituire l\u2019input originale:"),lt=c(),$(aa.$$.fragment),nt=c(),F=n("p"),ro=p("Come si pu\xF2 vedere, il tokenizer aggiunge due token speciali - "),Tl=n("code"),po=p("CLS"),oo=p(" e "),Ml=n("code"),io=p("SEP"),co=p(" (classificatore e separatore) - alla frase. Non tutti i modelli hanno bisogno dei token speciali, ma se servono, il tokenizer li aggiunger\xE0 automaticamente."),tt=c(),we=n("p"),uo=p("Se ci sono pi\xF9 frasi che vuoi processare, passale come una lista al tokenizer:"),rt=c(),$(ea.$$.fragment),pt=c(),es=n("h3"),_s=n("a"),El=n("span"),$(la.$$.fragment),mo=c(),kl=n("span"),ho=p("Pad"),ot=c(),ws=n("p"),jo=p("Questo \xE8 un argomento importante. Quando processi un insieme di frasi potrebbero non avere tutte la stessa lunghezza. Questo \xE8 un problema perch\xE8 i tensori, in input del modello, devono avere dimensioni uniformi. Il padding \xE8 una strategia per assicurarsi che i tensori siano rettangolari aggiungendo uno speciale "),zl=n("em"),bo=p("padding token"),fo=p(" alle frasi pi\xF9 corte."),it=c(),X=n("p"),go=p("Imposta il parametro "),Il=n("code"),vo=p("padding"),yo=p(" a "),Ul=n("code"),Jo=p("True"),_o=p(" per imbottire le frasi pi\xF9 corte nel gruppo in modo che combacino con la massima lunghezza presente:"),ct=c(),$(na.$$.fragment),ut=c(),$s=n("p"),wo=p("Nota che il tokenizer aggiunge alle sequenze degli "),Cl=n("code"),$o=p("0"),To=p(" perch\xE8 sono troppo corte!"),mt=c(),ls=n("h3"),Ts=n("a"),Gl=n("span"),$(ta.$$.fragment),Mo=c(),Vl=n("span"),Eo=p("Truncation"),ht=c(),$e=n("p"),ko=p("L\u2019altra faccia della medaglia \xE8 che avolte le sequenze possono essere troppo lunghe per essere gestite dal modello. In questo caso, avrai bisogno di troncare la sequenza per avere una lunghezza minore."),dt=c(),Y=n("p"),zo=p("Imposta il parametro "),Bl=n("code"),Io=p("truncation"),Uo=p(" a "),ql=n("code"),Co=p("True"),Go=p(" per troncare una sequenza alla massima lunghezza accettata dal modello:"),jt=c(),$(ra.$$.fragment),bt=c(),ns=n("h3"),Ms=n("a"),Zl=n("span"),$(pa.$$.fragment),Vo=c(),Al=n("span"),Bo=p("Costruire i tensori"),ft=c(),Te=n("p"),qo=p("Infine, vuoi che il tokenizer restituisca i tensori prodotti dal modello."),gt=c(),Z=n("p"),Zo=p("Imposta il parametro "),Rl=n("code"),Ao=p("return_tensors"),Ro=p(" su "),Nl=n("code"),No=p("pt"),xo=p(" per PyTorch, o "),xl=n("code"),Wo=p("tf"),Fo=p(" per TensorFlow:"),vt=c(),$(oa.$$.fragment),yt=c(),ts=n("h2"),Es=n("a"),Wl=n("span"),$(ia.$$.fragment),Xo=c(),Fl=n("span"),Yo=p("Audio"),Jt=c(),ks=n("p"),Po=p("Gli input audio sono processati in modo differente rispetto al testo, ma l\u2019obiettivo rimane lo stesso: creare sequenze numeriche che il modello pu\xF2 capire. Un "),Me=n("a"),Qo=p("estrattore di caratteristiche"),Do=p(" \xE8 progettato con lo scopo preciso di estrarre caratteristiche da immagini o dati audio grezzi e convertirli in tensori. Prima di iniziare, installa \u{1F917} Datasets per caricare un dataset audio e sperimentare:"),_t=c(),$(ca.$$.fragment),wt=c(),P=n("p"),So=p("Carica il dataset "),ua=n("a"),Ho=p("MInDS-14"),Lo=p(" (vedi il \u{1F917} "),ma=n("a"),Oo=p("Datasets tutorial"),Ko=p(" per avere maggiori dettagli su come caricare un dataset):"),$t=c(),$(ha.$$.fragment),Tt=c(),Q=n("p"),si=p("Accedi al primo elemento della colonna "),Xl=n("code"),ai=p("audio"),ei=p(" per dare uno sguardo all\u2019input. Richiamando la colonna "),Yl=n("code"),li=p("audio"),ni=p(" sar\xE0 caricato automaticamente e ricampionato il file audio:"),Mt=c(),$(da.$$.fragment),Et=c(),Ee=n("p"),ti=p("Questo restituisce tre oggetti:"),kt=c(),D=n("ul"),ke=n("li"),Pl=n("code"),ri=p("array"),pi=p(" \xE8 il segnale vocale caricato - e potenzialmente ricampionato - come vettore 1D."),oi=c(),ze=n("li"),Ql=n("code"),ii=p("path"),ci=p(" il percorso del file audio."),ui=c(),Ie=n("li"),Dl=n("code"),mi=p("sampling_rate"),hi=p(" si riferisce al numero di campioni del segnale vocale misurati al secondo."),zt=c(),rs=n("h3"),zs=n("a"),Sl=n("span"),$(ja.$$.fragment),di=c(),Hl=n("span"),ji=p("Ricampionamento"),It=c(),Is=n("p"),bi=p("Per questo tutorial, puoi usare il modello "),ba=n("a"),fi=p("Wav2Vec2"),gi=p(". Come puoi vedere dalla model card, il modello Wav2Vec2 \xE8 preaddestrato su un campionamento vocale a 16kHz.\xC8 importante che la frequenza di campionamento dei tuoi dati audio combaci con la frequenza di campionamento del dataset usato per preaddestrare il modello. Se la frequenza di campionamento dei tuoi dati non \xE8 uguale dovrai ricampionare i tuoi dati audio."),Ut=c(),Us=n("p"),vi=p("Per esempio, il dataset "),fa=n("a"),yi=p("MInDS-14"),Ji=p(" ha una frequenza di campionamento di 8000kHz. Utilizzando il modello Wav2Vec2 su questo dataset, alzala a 16kHz:"),Ct=c(),$(ga.$$.fragment),Gt=c(),Ue=n("ol"),va=n("li"),_i=p("Usa il metodo di \u{1F917} Datasets\u2019 "),ya=n("a"),Ll=n("code"),wi=p("cast_column"),$i=p(" per alzare la frequenza di campionamento a 16kHz:"),Vt=c(),$(Ja.$$.fragment),Bt=c(),_a=n("ol"),Ol=n("li"),Ti=p("Carica il file audio:"),qt=c(),$(wa.$$.fragment),Zt=c(),Cs=n("p"),Mi=p("Come puoi notare, la "),Kl=n("code"),Ei=p("sampling_rate"),ki=p(" adesso \xE8 16kHz!"),At=c(),ps=n("h3"),Gs=n("a"),sn=n("span"),$($a.$$.fragment),zi=c(),an=n("span"),Ii=p("Feature extractor"),Rt=c(),A=n("p"),Ui=p("Il prossimo passo \xE8 caricare un estrattore di caratteristiche per normalizzare e fare padding sull\u2019input. Quando applichiamo il padding sui dati testuali, uno "),en=n("code"),Ci=p("0"),Gi=p(" \xE8 aggiunto alle sequenze pi\xF9 brevi. La stessa idea si applica ai dati audio, l\u2019estrattore di caratteristiche per gli audio aggiunger\xE0 uno "),ln=n("code"),Vi=p("0"),Bi=p(" - interpretato come silenzio - agli "),nn=n("code"),qi=p("array"),Zi=p("."),Nt=c(),Vs=n("p"),Ai=p("Carica l\u2019estrattore delle caratteristiche con "),tn=n("code"),Ri=p("AutoFeatureExtractor.from_pretrained()"),Ni=p(":"),xt=c(),$(Ta.$$.fragment),Wt=c(),S=n("p"),xi=p("Inserisci l\u2019 "),rn=n("code"),Wi=p("array"),Fi=p(" audio nell\u2019estrattore delle caratteristiche. Noi raccomandiamo sempre di aggiungere il parametro "),pn=n("code"),Xi=p("sampling_rate"),Yi=p(" nell\u2019estrattore delle caratteristiche per correggere meglio qualche errore, dovuto ai silenzi, che potrebbe verificarsi."),Ft=c(),$(Ma.$$.fragment),Xt=c(),os=n("h3"),Bs=n("a"),on=n("span"),$(Ea.$$.fragment),Pi=c(),cn=n("span"),Qi=p("Pad e truncate"),Yt=c(),Ce=n("p"),Di=p("Come per il tokenizer, puoi applicare le operazioni padding o truncation per manipolare sequenze di variabili a lotti. Dai uno sguaro alla lunghezza delle sequenze di questi due campioni audio:"),Pt=c(),$(ka.$$.fragment),Qt=c(),Ge=n("p"),Si=p("Come puoi vedere, il primo campione ha una sequenza pi\xF9 lunga del secondo. Crea una funzione che preprocesser\xE0 il dataset. Specifica una lunghezza massima del campione, e l\u2019estrattore di features si occuper\xE0 di riempire o troncare la sequenza per coincidervi:"),Dt=c(),$(za.$$.fragment),St=c(),Ve=n("p"),Hi=p("Applica la funzione ai primi esempi nel dataset:"),Ht=c(),$(Ia.$$.fragment),Lt=c(),Be=n("p"),Li=p("Adesso guarda la lunghezza dei campioni elaborati:"),Ot=c(),$(Ua.$$.fragment),Kt=c(),qe=n("p"),Oi=p("La lunghezza dei campioni adesso coincide con la massima lunghezza impostata nelle funzione."),sr=c(),is=n("h2"),qs=n("a"),un=n("span"),$(Ca.$$.fragment),Ki=c(),mn=n("span"),sc=p("Vision"),ar=c(),Ze=n("p"),ac=p("Un estrattore di caratteristiche si pu\xF2 usare anche per processare immagini e per compiti di visione. Ancora una volta, l\u2019obiettivo \xE8 convertire l\u2019immagine grezza in un lotto di tensori come input."),er=c(),H=n("p"),ec=p("Carica il dataset "),Ga=n("a"),lc=p("food101"),nc=p(" per questa esercitazione. Usa il parametro "),hn=n("code"),tc=p("split"),rc=p(" di \u{1F917} Datasets per caricare solo un piccolo campione dal dataset di addestramento poich\xE8 il set di dati \xE8 molto grande:"),lr=c(),$(Va.$$.fragment),nr=c(),Zs=n("p"),pc=p("Secondo passo, dai uno sguardo alle immagini usando la caratteristica "),Ba=n("a"),dn=n("code"),oc=p("Image"),ic=p(" di \u{1F917} Datasets:"),tr=c(),$(qa.$$.fragment),rr=c(),Ae=n("p"),Re=n("img"),pr=c(),cs=n("h3"),As=n("a"),jn=n("span"),$(Za.$$.fragment),cc=c(),bn=n("span"),uc=p("Feature extractor"),or=c(),Rs=n("p"),mc=p("Carica l\u2019estrattore di caratteristiche "),fn=n("code"),hc=p("AutoFeatureExtractor.from_pretrained()"),dc=p(":"),ir=c(),$(Aa.$$.fragment),cr=c(),us=n("h3"),Ns=n("a"),gn=n("span"),$(Ra.$$.fragment),jc=c(),vn=n("span"),bc=p("Data augmentation"),ur=c(),xs=n("p"),fc=p("Per le attivit\xE0 di visione, \xE8 usuale aggiungere alcuni tipi di data augmentation alle immagini come parte del preprocessing. Puoi aggiungere augmentations con qualsiasi libreria che preferisci, ma in questa esercitazione, userai il modulo "),Na=n("a"),yn=n("code"),gc=p("transforms"),vc=p(" di torchvision."),mr=c(),Ne=n("ol"),R=n("li"),yc=p("Normalizza l\u2019immagine e usa "),xa=n("a"),Jn=n("code"),Jc=p("Compose"),_c=p(" per concatenare alcune trasformazioni - "),Wa=n("a"),_n=n("code"),wc=p("RandomResizedCrop"),$c=p(" e "),Fa=n("a"),wn=n("code"),Tc=p("ColorJitter"),Mc=p(" - insieme:"),hr=c(),$(Xa.$$.fragment),dr=c(),Ya=n("ol"),ms=n("li"),Ec=p("Il modello accetta "),xe=n("a"),$n=n("code"),kc=p("pixel_values"),zc=p(" come input. Questo valore \xE8 generato dall\u2019estrattore di caratteristiche. Crea una funzione che genera "),Tn=n("code"),Ic=p("pixel_values"),Uc=p(" dai transforms:"),jr=c(),$(Pa.$$.fragment),br=c(),Qa=n("ol"),Da=n("li"),Cc=p("Poi utilizza \u{1F917} Datasets "),Sa=n("a"),Mn=n("code"),Gc=p("set_transform"),Vc=p("per applicare al volo la trasformazione:"),fr=c(),$(Ha.$$.fragment),gr=c(),La=n("ol"),Oa=n("li"),Bc=p("Adesso quando accedi all\u2019immagine, puoi notare che l\u2019estrattore di caratteristiche ha aggiunto "),En=n("code"),qc=p("pixel_values"),Zc=p(" allo schema di input:"),vr=c(),$(Ka.$$.fragment),yr=c(),We=n("p"),Ac=p("Di seguito come si vede l\u2019immagine dopo la fase di preprocessing. Come ci si aspetterebbe dalle trasformazioni applicate, l\u2019immagine \xE8 stata ritagliata in modo casuale e le propriet\xE0 del colore sono diverse."),Jr=c(),$(se.$$.fragment),_r=c(),Fe=n("p"),Xe=n("img"),wr=c(),hs=n("h2"),Ws=n("a"),kn=n("span"),$(ae.$$.fragment),Rc=c(),zn=n("span"),Nc=p("Multimodal"),$r=c(),Ye=n("p"),xc=p("Per attivit\xE0 multimodali userai una combinazione di tutto quello che hai imparato poco fa e applicherai le tue competenze alla comprensione automatica del parlato (Automatic Speech Recognition - ASR). Questo significa che avrai bisogno di:"),Tr=c(),Fs=n("ul"),In=n("li"),Wc=p("Un estrattore delle caratteristiche per processare i dati audio."),Fc=c(),Un=n("li"),Xc=p("Il Tokenizer per processare i testi."),Mr=c(),Xs=n("p"),Yc=p("Ritorna sul datasere "),ee=n("a"),Pc=p("LJ Speech"),Qc=p(":"),Er=c(),$(le.$$.fragment),kr=c(),L=n("p"),Dc=p("Visto che sei interessato solo alle colonne "),Cn=n("code"),Sc=p("audio"),Hc=p(" e "),Gn=n("code"),Lc=p("text"),Oc=p(", elimina tutte le altre:"),zr=c(),$(ne.$$.fragment),Ir=c(),O=n("p"),Kc=p("Adesso guarda le colonne "),Vn=n("code"),su=p("audio"),au=p(" e "),Bn=n("code"),eu=p("text"),lu=p(":"),Ur=c(),$(te.$$.fragment),Cr=c(),Ys=n("p"),nu=p("Ricorda dalla sezione precedente sull\u2019elaborazione dei dati audio, tu dovresti sempre "),Pe=n("a"),tu=p("ricampionare"),ru=p(" la frequenza di campionamento dei tuoi dati audio per farla coincidere con quella del dataset usato dal modello preaddestrato:"),Gr=c(),$(re.$$.fragment),Vr=c(),ds=n("h3"),Ps=n("a"),qn=n("span"),$(pe.$$.fragment),pu=c(),Zn=n("span"),ou=p("Processor"),Br=c(),Qe=n("p"),iu=p("Un processor combina un estrattore di caratteristiche e un tokenizer. Carica un processor con [`AutoProcessor.from_pretrained]:"),qr=c(),$(oe.$$.fragment),Zr=c(),De=n("ol"),js=n("li"),cu=p("Crea una funzione che processi i dati audio in "),An=n("code"),uu=p("input_values"),mu=p(", e tokenizza il testo in "),Rn=n("code"),hu=p("labels"),du=p(". Questi sono i tuoi input per il modello:"),Ar=c(),$(ie.$$.fragment),Rr=c(),ce=n("ol"),ue=n("li"),ju=p("Applica la funzione "),Nn=n("code"),bu=p("prepare_dataset"),fu=p(" ad un campione:"),Nr=c(),$(me.$$.fragment),xr=c(),K=n("p"),gu=p("Nota che il processor ha aggiunto "),xn=n("code"),vu=p("input_values"),yu=p(" e "),Wn=n("code"),Ju=p("labels"),_u=p(". La frequenza di campionamento \xE8 stata corretta riducendola a 16kHz."),Wr=c(),Se=n("p"),wu=p("Fantastico, ora dovresti essere in grado di preelaborare i dati per qualsiasi modalit\xE0 e persino di combinare modalit\xE0 diverse! Nella prossima esercitazione, impareremo a mettere a punto un modello sui dati appena pre-elaborati."),this.h()},l(s){const l=Bd('[data-svelte="svelte-1phssyn"]',document.head);h=t(l,"META",{name:!0,content:!0}),l.forEach(a),j=u(s),b=t(s,"H1",{class:!0});var he=r(b);y=t(he,"A",{id:!0,class:!0,href:!0});var Bu=r(y);f=t(Bu,"SPAN",{});var qu=r(f);E(M.$$.fragment,qu),qu.forEach(a),Bu.forEach(a),J=u(he),_=t(he,"SPAN",{});var Zu=r(_);d=o(Zu,"Preprocess"),Zu.forEach(a),he.forEach(a),z=u(s),E(I.$$.fragment,s),U=u(s),bs=t(s,"P",{});var Au=r(bs);B=o(Au,"Prima di poter usare i dati in un modello, bisogna processarli in un formato accettabile per quest\u2019ultimo. Un modello non comprende il testo grezzo, le immagini o l\u2019audio. Bisogna convertire questi input in numeri e assemblarli all\u2019interno di tensori. In questa esercitazione, tu potrai:"),Au.forEach(a),G=u(s),q=t(s,"UL",{});var He=r(q);hl=t(He,"LI",{});var Ru=r(hl);Tp=o(Ru,"Preprocessare dati testuali con un tokenizer."),Ru.forEach(a),Mp=u(He),dl=t(He,"LI",{});var Nu=r(dl);Ep=o(Nu,"Preprocessare immagini o dati audio con un estrattore di caratteristiche."),Nu.forEach(a),kp=u(He),jl=t(He,"LI",{});var xu=r(jl);zp=o(xu,"Preprocessare dati per attivit\xE0 multimodali mediante un processore."),xu.forEach(a),He.forEach(a),Xn=u(s),ss=t(s,"H2",{class:!0});var Xr=r(ss);fs=t(Xr,"A",{id:!0,class:!0,href:!0});var Wu=r(fs);bl=t(Wu,"SPAN",{});var Fu=r(bl);E(Hs.$$.fragment,Fu),Fu.forEach(a),Wu.forEach(a),Ip=u(Xr),fl=t(Xr,"SPAN",{});var Xu=r(fl);Up=o(Xu,"NLP"),Xu.forEach(a),Xr.forEach(a),Yn=u(s),E(Ls.$$.fragment,s),Pn=u(s),N=t(s,"P",{});var Le=r(N);Cp=o(Le,"Lo strumento principale per processare dati testuali \xE8 un "),de=t(Le,"A",{href:!0});var Yu=r(de);Gp=o(Yu,"tokenizer"),Yu.forEach(a),Vp=o(Le,". Un tokenizer inizia separando il testo in "),gl=t(Le,"EM",{});var Pu=r(gl);Bp=o(Pu,"tokens"),Pu.forEach(a),qp=o(Le," secondo una serie di regole. I tokens sono convertiti in numeri, questi vengono utilizzati per costruire i tensori di input del modello. Anche altri input addizionali se richiesti dal modello vengono aggiunti dal tokenizer."),Le.forEach(a),Qn=u(s),E(gs.$$.fragment,s),Dn=u(s),x=t(s,"P",{});var Oe=r(x);Zp=o(Oe,"Iniziamo subito caricando un tokenizer preaddestrato con la classe "),vl=t(Oe,"CODE",{});var Qu=r(vl);Ap=o(Qu,"AutoTokenizer"),Qu.forEach(a),Rp=o(Oe,". Questo scarica il "),yl=t(Oe,"EM",{});var Du=r(yl);Np=o(Du,"vocabolario"),Du.forEach(a),xp=o(Oe," usato quando il modello \xE8 stato preaddestrato."),Oe.forEach(a),Sn=u(s),as=t(s,"H3",{class:!0});var Yr=r(as);vs=t(Yr,"A",{id:!0,class:!0,href:!0});var Su=r(vs);Jl=t(Su,"SPAN",{});var Hu=r(Jl);E(Os.$$.fragment,Hu),Hu.forEach(a),Su.forEach(a),Wp=u(Yr),_l=t(Yr,"SPAN",{});var Lu=r(_l);Fp=o(Lu,"Tokenize"),Lu.forEach(a),Yr.forEach(a),Hn=u(s),ys=t(s,"P",{});var Pr=r(ys);Xp=o(Pr,"Carica un tokenizer preaddestrato con "),wl=t(Pr,"CODE",{});var Ou=r(wl);Yp=o(Ou,"AutoTokenizer.from_pretrained()"),Ou.forEach(a),Pp=o(Pr,":"),Pr.forEach(a),Ln=u(s),E(Ks.$$.fragment,s),On=u(s),je=t(s,"P",{});var Ku=r(je);Qp=o(Ku,"Poi inserisci le tue frasi nel tokenizer:"),Ku.forEach(a),Kn=u(s),E(sa.$$.fragment,s),st=u(s),be=t(s,"P",{});var sm=r(be);Dp=o(sm,"Il tokenizer restituisce un dizionario contenente tre oggetti importanti:"),sm.forEach(a),at=u(s),W=t(s,"UL",{});var Ke=r(W);fe=t(Ke,"LI",{});var $u=r(fe);ge=t($u,"A",{href:!0});var am=r(ge);Sp=o(am,"input_ids"),am.forEach(a),Hp=o($u," sono gli indici che corrispondono ad ogni token nella frase."),$u.forEach(a),Lp=u(Ke),ve=t(Ke,"LI",{});var Tu=r(ve);ye=t(Tu,"A",{href:!0});var em=r(ye);Op=o(em,"attention_mask"),em.forEach(a),Kp=o(Tu," indicata se un token deve essere elaborato o no."),Tu.forEach(a),so=u(Ke),Je=t(Ke,"LI",{});var Mu=r(Je);_e=t(Mu,"A",{href:!0});var lm=r(_e);ao=o(lm,"token_type_ids"),lm.forEach(a),eo=o(Mu," identifica a quale sequenza appartiene un token se \xE8 presente pi\xF9 di una sequenza."),Mu.forEach(a),Ke.forEach(a),et=u(s),Js=t(s,"P",{});var Qr=r(Js);lo=o(Qr,"Si possono decodificare gli "),$l=t(Qr,"CODE",{});var nm=r($l);no=o(nm,"input_ids"),nm.forEach(a),to=o(Qr," per farsi restituire l\u2019input originale:"),Qr.forEach(a),lt=u(s),E(aa.$$.fragment,s),nt=u(s),F=t(s,"P",{});var sl=r(F);ro=o(sl,"Come si pu\xF2 vedere, il tokenizer aggiunge due token speciali - "),Tl=t(sl,"CODE",{});var tm=r(Tl);po=o(tm,"CLS"),tm.forEach(a),oo=o(sl," e "),Ml=t(sl,"CODE",{});var rm=r(Ml);io=o(rm,"SEP"),rm.forEach(a),co=o(sl," (classificatore e separatore) - alla frase. Non tutti i modelli hanno bisogno dei token speciali, ma se servono, il tokenizer li aggiunger\xE0 automaticamente."),sl.forEach(a),tt=u(s),we=t(s,"P",{});var pm=r(we);uo=o(pm,"Se ci sono pi\xF9 frasi che vuoi processare, passale come una lista al tokenizer:"),pm.forEach(a),rt=u(s),E(ea.$$.fragment,s),pt=u(s),es=t(s,"H3",{class:!0});var Dr=r(es);_s=t(Dr,"A",{id:!0,class:!0,href:!0});var om=r(_s);El=t(om,"SPAN",{});var im=r(El);E(la.$$.fragment,im),im.forEach(a),om.forEach(a),mo=u(Dr),kl=t(Dr,"SPAN",{});var cm=r(kl);ho=o(cm,"Pad"),cm.forEach(a),Dr.forEach(a),ot=u(s),ws=t(s,"P",{});var Sr=r(ws);jo=o(Sr,"Questo \xE8 un argomento importante. Quando processi un insieme di frasi potrebbero non avere tutte la stessa lunghezza. Questo \xE8 un problema perch\xE8 i tensori, in input del modello, devono avere dimensioni uniformi. Il padding \xE8 una strategia per assicurarsi che i tensori siano rettangolari aggiungendo uno speciale "),zl=t(Sr,"EM",{});var um=r(zl);bo=o(um,"padding token"),um.forEach(a),fo=o(Sr," alle frasi pi\xF9 corte."),Sr.forEach(a),it=u(s),X=t(s,"P",{});var al=r(X);go=o(al,"Imposta il parametro "),Il=t(al,"CODE",{});var mm=r(Il);vo=o(mm,"padding"),mm.forEach(a),yo=o(al," a "),Ul=t(al,"CODE",{});var hm=r(Ul);Jo=o(hm,"True"),hm.forEach(a),_o=o(al," per imbottire le frasi pi\xF9 corte nel gruppo in modo che combacino con la massima lunghezza presente:"),al.forEach(a),ct=u(s),E(na.$$.fragment,s),ut=u(s),$s=t(s,"P",{});var Hr=r($s);wo=o(Hr,"Nota che il tokenizer aggiunge alle sequenze degli "),Cl=t(Hr,"CODE",{});var dm=r(Cl);$o=o(dm,"0"),dm.forEach(a),To=o(Hr," perch\xE8 sono troppo corte!"),Hr.forEach(a),mt=u(s),ls=t(s,"H3",{class:!0});var Lr=r(ls);Ts=t(Lr,"A",{id:!0,class:!0,href:!0});var jm=r(Ts);Gl=t(jm,"SPAN",{});var bm=r(Gl);E(ta.$$.fragment,bm),bm.forEach(a),jm.forEach(a),Mo=u(Lr),Vl=t(Lr,"SPAN",{});var fm=r(Vl);Eo=o(fm,"Truncation"),fm.forEach(a),Lr.forEach(a),ht=u(s),$e=t(s,"P",{});var gm=r($e);ko=o(gm,"L\u2019altra faccia della medaglia \xE8 che avolte le sequenze possono essere troppo lunghe per essere gestite dal modello. In questo caso, avrai bisogno di troncare la sequenza per avere una lunghezza minore."),gm.forEach(a),dt=u(s),Y=t(s,"P",{});var el=r(Y);zo=o(el,"Imposta il parametro "),Bl=t(el,"CODE",{});var vm=r(Bl);Io=o(vm,"truncation"),vm.forEach(a),Uo=o(el," a "),ql=t(el,"CODE",{});var ym=r(ql);Co=o(ym,"True"),ym.forEach(a),Go=o(el," per troncare una sequenza alla massima lunghezza accettata dal modello:"),el.forEach(a),jt=u(s),E(ra.$$.fragment,s),bt=u(s),ns=t(s,"H3",{class:!0});var Or=r(ns);Ms=t(Or,"A",{id:!0,class:!0,href:!0});var Jm=r(Ms);Zl=t(Jm,"SPAN",{});var _m=r(Zl);E(pa.$$.fragment,_m),_m.forEach(a),Jm.forEach(a),Vo=u(Or),Al=t(Or,"SPAN",{});var wm=r(Al);Bo=o(wm,"Costruire i tensori"),wm.forEach(a),Or.forEach(a),ft=u(s),Te=t(s,"P",{});var $m=r(Te);qo=o($m,"Infine, vuoi che il tokenizer restituisca i tensori prodotti dal modello."),$m.forEach(a),gt=u(s),Z=t(s,"P",{});var Qs=r(Z);Zo=o(Qs,"Imposta il parametro "),Rl=t(Qs,"CODE",{});var Tm=r(Rl);Ao=o(Tm,"return_tensors"),Tm.forEach(a),Ro=o(Qs," su "),Nl=t(Qs,"CODE",{});var Mm=r(Nl);No=o(Mm,"pt"),Mm.forEach(a),xo=o(Qs," per PyTorch, o "),xl=t(Qs,"CODE",{});var Em=r(xl);Wo=o(Em,"tf"),Em.forEach(a),Fo=o(Qs," per TensorFlow:"),Qs.forEach(a),vt=u(s),E(oa.$$.fragment,s),yt=u(s),ts=t(s,"H2",{class:!0});var Kr=r(ts);Es=t(Kr,"A",{id:!0,class:!0,href:!0});var km=r(Es);Wl=t(km,"SPAN",{});var zm=r(Wl);E(ia.$$.fragment,zm),zm.forEach(a),km.forEach(a),Xo=u(Kr),Fl=t(Kr,"SPAN",{});var Im=r(Fl);Yo=o(Im,"Audio"),Im.forEach(a),Kr.forEach(a),Jt=u(s),ks=t(s,"P",{});var sp=r(ks);Po=o(sp,"Gli input audio sono processati in modo differente rispetto al testo, ma l\u2019obiettivo rimane lo stesso: creare sequenze numeriche che il modello pu\xF2 capire. Un "),Me=t(sp,"A",{href:!0});var Um=r(Me);Qo=o(Um,"estrattore di caratteristiche"),Um.forEach(a),Do=o(sp," \xE8 progettato con lo scopo preciso di estrarre caratteristiche da immagini o dati audio grezzi e convertirli in tensori. Prima di iniziare, installa \u{1F917} Datasets per caricare un dataset audio e sperimentare:"),sp.forEach(a),_t=u(s),E(ca.$$.fragment,s),wt=u(s),P=t(s,"P",{});var ll=r(P);So=o(ll,"Carica il dataset "),ua=t(ll,"A",{href:!0,rel:!0});var Cm=r(ua);Ho=o(Cm,"MInDS-14"),Cm.forEach(a),Lo=o(ll," (vedi il \u{1F917} "),ma=t(ll,"A",{href:!0,rel:!0});var Gm=r(ma);Oo=o(Gm,"Datasets tutorial"),Gm.forEach(a),Ko=o(ll," per avere maggiori dettagli su come caricare un dataset):"),ll.forEach(a),$t=u(s),E(ha.$$.fragment,s),Tt=u(s),Q=t(s,"P",{});var nl=r(Q);si=o(nl,"Accedi al primo elemento della colonna "),Xl=t(nl,"CODE",{});var Vm=r(Xl);ai=o(Vm,"audio"),Vm.forEach(a),ei=o(nl," per dare uno sguardo all\u2019input. Richiamando la colonna "),Yl=t(nl,"CODE",{});var Bm=r(Yl);li=o(Bm,"audio"),Bm.forEach(a),ni=o(nl," sar\xE0 caricato automaticamente e ricampionato il file audio:"),nl.forEach(a),Mt=u(s),E(da.$$.fragment,s),Et=u(s),Ee=t(s,"P",{});var qm=r(Ee);ti=o(qm,"Questo restituisce tre oggetti:"),qm.forEach(a),kt=u(s),D=t(s,"UL",{});var tl=r(D);ke=t(tl,"LI",{});var Eu=r(ke);Pl=t(Eu,"CODE",{});var Zm=r(Pl);ri=o(Zm,"array"),Zm.forEach(a),pi=o(Eu," \xE8 il segnale vocale caricato - e potenzialmente ricampionato - come vettore 1D."),Eu.forEach(a),oi=u(tl),ze=t(tl,"LI",{});var ku=r(ze);Ql=t(ku,"CODE",{});var Am=r(Ql);ii=o(Am,"path"),Am.forEach(a),ci=o(ku," il percorso del file audio."),ku.forEach(a),ui=u(tl),Ie=t(tl,"LI",{});var zu=r(Ie);Dl=t(zu,"CODE",{});var Rm=r(Dl);mi=o(Rm,"sampling_rate"),Rm.forEach(a),hi=o(zu," si riferisce al numero di campioni del segnale vocale misurati al secondo."),zu.forEach(a),tl.forEach(a),zt=u(s),rs=t(s,"H3",{class:!0});var ap=r(rs);zs=t(ap,"A",{id:!0,class:!0,href:!0});var Nm=r(zs);Sl=t(Nm,"SPAN",{});var xm=r(Sl);E(ja.$$.fragment,xm),xm.forEach(a),Nm.forEach(a),di=u(ap),Hl=t(ap,"SPAN",{});var Wm=r(Hl);ji=o(Wm,"Ricampionamento"),Wm.forEach(a),ap.forEach(a),It=u(s),Is=t(s,"P",{});var ep=r(Is);bi=o(ep,"Per questo tutorial, puoi usare il modello "),ba=t(ep,"A",{href:!0,rel:!0});var Fm=r(ba);fi=o(Fm,"Wav2Vec2"),Fm.forEach(a),gi=o(ep,". Come puoi vedere dalla model card, il modello Wav2Vec2 \xE8 preaddestrato su un campionamento vocale a 16kHz.\xC8 importante che la frequenza di campionamento dei tuoi dati audio combaci con la frequenza di campionamento del dataset usato per preaddestrare il modello. Se la frequenza di campionamento dei tuoi dati non \xE8 uguale dovrai ricampionare i tuoi dati audio."),ep.forEach(a),Ut=u(s),Us=t(s,"P",{});var lp=r(Us);vi=o(lp,"Per esempio, il dataset "),fa=t(lp,"A",{href:!0,rel:!0});var Xm=r(fa);yi=o(Xm,"MInDS-14"),Xm.forEach(a),Ji=o(lp," ha una frequenza di campionamento di 8000kHz. Utilizzando il modello Wav2Vec2 su questo dataset, alzala a 16kHz:"),lp.forEach(a),Ct=u(s),E(ga.$$.fragment,s),Gt=u(s),Ue=t(s,"OL",{});var Ym=r(Ue);va=t(Ym,"LI",{});var np=r(va);_i=o(np,"Usa il metodo di \u{1F917} Datasets\u2019 "),ya=t(np,"A",{href:!0,rel:!0});var Pm=r(ya);Ll=t(Pm,"CODE",{});var Qm=r(Ll);wi=o(Qm,"cast_column"),Qm.forEach(a),Pm.forEach(a),$i=o(np," per alzare la frequenza di campionamento a 16kHz:"),np.forEach(a),Ym.forEach(a),Vt=u(s),E(Ja.$$.fragment,s),Bt=u(s),_a=t(s,"OL",{start:!0});var Dm=r(_a);Ol=t(Dm,"LI",{});var Sm=r(Ol);Ti=o(Sm,"Carica il file audio:"),Sm.forEach(a),Dm.forEach(a),qt=u(s),E(wa.$$.fragment,s),Zt=u(s),Cs=t(s,"P",{});var tp=r(Cs);Mi=o(tp,"Come puoi notare, la "),Kl=t(tp,"CODE",{});var Hm=r(Kl);Ei=o(Hm,"sampling_rate"),Hm.forEach(a),ki=o(tp," adesso \xE8 16kHz!"),tp.forEach(a),At=u(s),ps=t(s,"H3",{class:!0});var rp=r(ps);Gs=t(rp,"A",{id:!0,class:!0,href:!0});var Lm=r(Gs);sn=t(Lm,"SPAN",{});var Om=r(sn);E($a.$$.fragment,Om),Om.forEach(a),Lm.forEach(a),zi=u(rp),an=t(rp,"SPAN",{});var Km=r(an);Ii=o(Km,"Feature extractor"),Km.forEach(a),rp.forEach(a),Rt=u(s),A=t(s,"P",{});var Ds=r(A);Ui=o(Ds,"Il prossimo passo \xE8 caricare un estrattore di caratteristiche per normalizzare e fare padding sull\u2019input. Quando applichiamo il padding sui dati testuali, uno "),en=t(Ds,"CODE",{});var sh=r(en);Ci=o(sh,"0"),sh.forEach(a),Gi=o(Ds," \xE8 aggiunto alle sequenze pi\xF9 brevi. La stessa idea si applica ai dati audio, l\u2019estrattore di caratteristiche per gli audio aggiunger\xE0 uno "),ln=t(Ds,"CODE",{});var ah=r(ln);Vi=o(ah,"0"),ah.forEach(a),Bi=o(Ds," - interpretato come silenzio - agli "),nn=t(Ds,"CODE",{});var eh=r(nn);qi=o(eh,"array"),eh.forEach(a),Zi=o(Ds,"."),Ds.forEach(a),Nt=u(s),Vs=t(s,"P",{});var pp=r(Vs);Ai=o(pp,"Carica l\u2019estrattore delle caratteristiche con "),tn=t(pp,"CODE",{});var lh=r(tn);Ri=o(lh,"AutoFeatureExtractor.from_pretrained()"),lh.forEach(a),Ni=o(pp,":"),pp.forEach(a),xt=u(s),E(Ta.$$.fragment,s),Wt=u(s),S=t(s,"P",{});var rl=r(S);xi=o(rl,"Inserisci l\u2019 "),rn=t(rl,"CODE",{});var nh=r(rn);Wi=o(nh,"array"),nh.forEach(a),Fi=o(rl," audio nell\u2019estrattore delle caratteristiche. Noi raccomandiamo sempre di aggiungere il parametro "),pn=t(rl,"CODE",{});var th=r(pn);Xi=o(th,"sampling_rate"),th.forEach(a),Yi=o(rl," nell\u2019estrattore delle caratteristiche per correggere meglio qualche errore, dovuto ai silenzi, che potrebbe verificarsi."),rl.forEach(a),Ft=u(s),E(Ma.$$.fragment,s),Xt=u(s),os=t(s,"H3",{class:!0});var op=r(os);Bs=t(op,"A",{id:!0,class:!0,href:!0});var rh=r(Bs);on=t(rh,"SPAN",{});var ph=r(on);E(Ea.$$.fragment,ph),ph.forEach(a),rh.forEach(a),Pi=u(op),cn=t(op,"SPAN",{});var oh=r(cn);Qi=o(oh,"Pad e truncate"),oh.forEach(a),op.forEach(a),Yt=u(s),Ce=t(s,"P",{});var ih=r(Ce);Di=o(ih,"Come per il tokenizer, puoi applicare le operazioni padding o truncation per manipolare sequenze di variabili a lotti. Dai uno sguaro alla lunghezza delle sequenze di questi due campioni audio:"),ih.forEach(a),Pt=u(s),E(ka.$$.fragment,s),Qt=u(s),Ge=t(s,"P",{});var ch=r(Ge);Si=o(ch,"Come puoi vedere, il primo campione ha una sequenza pi\xF9 lunga del secondo. Crea una funzione che preprocesser\xE0 il dataset. Specifica una lunghezza massima del campione, e l\u2019estrattore di features si occuper\xE0 di riempire o troncare la sequenza per coincidervi:"),ch.forEach(a),Dt=u(s),E(za.$$.fragment,s),St=u(s),Ve=t(s,"P",{});var uh=r(Ve);Hi=o(uh,"Applica la funzione ai primi esempi nel dataset:"),uh.forEach(a),Ht=u(s),E(Ia.$$.fragment,s),Lt=u(s),Be=t(s,"P",{});var mh=r(Be);Li=o(mh,"Adesso guarda la lunghezza dei campioni elaborati:"),mh.forEach(a),Ot=u(s),E(Ua.$$.fragment,s),Kt=u(s),qe=t(s,"P",{});var hh=r(qe);Oi=o(hh,"La lunghezza dei campioni adesso coincide con la massima lunghezza impostata nelle funzione."),hh.forEach(a),sr=u(s),is=t(s,"H2",{class:!0});var ip=r(is);qs=t(ip,"A",{id:!0,class:!0,href:!0});var dh=r(qs);un=t(dh,"SPAN",{});var jh=r(un);E(Ca.$$.fragment,jh),jh.forEach(a),dh.forEach(a),Ki=u(ip),mn=t(ip,"SPAN",{});var bh=r(mn);sc=o(bh,"Vision"),bh.forEach(a),ip.forEach(a),ar=u(s),Ze=t(s,"P",{});var fh=r(Ze);ac=o(fh,"Un estrattore di caratteristiche si pu\xF2 usare anche per processare immagini e per compiti di visione. Ancora una volta, l\u2019obiettivo \xE8 convertire l\u2019immagine grezza in un lotto di tensori come input."),fh.forEach(a),er=u(s),H=t(s,"P",{});var pl=r(H);ec=o(pl,"Carica il dataset "),Ga=t(pl,"A",{href:!0,rel:!0});var gh=r(Ga);lc=o(gh,"food101"),gh.forEach(a),nc=o(pl," per questa esercitazione. Usa il parametro "),hn=t(pl,"CODE",{});var vh=r(hn);tc=o(vh,"split"),vh.forEach(a),rc=o(pl," di \u{1F917} Datasets per caricare solo un piccolo campione dal dataset di addestramento poich\xE8 il set di dati \xE8 molto grande:"),pl.forEach(a),lr=u(s),E(Va.$$.fragment,s),nr=u(s),Zs=t(s,"P",{});var cp=r(Zs);pc=o(cp,"Secondo passo, dai uno sguardo alle immagini usando la caratteristica "),Ba=t(cp,"A",{href:!0,rel:!0});var yh=r(Ba);dn=t(yh,"CODE",{});var Jh=r(dn);oc=o(Jh,"Image"),Jh.forEach(a),yh.forEach(a),ic=o(cp," di \u{1F917} Datasets:"),cp.forEach(a),tr=u(s),E(qa.$$.fragment,s),rr=u(s),Ae=t(s,"P",{});var _h=r(Ae);Re=t(_h,"IMG",{src:!0,alt:!0}),_h.forEach(a),pr=u(s),cs=t(s,"H3",{class:!0});var up=r(cs);As=t(up,"A",{id:!0,class:!0,href:!0});var wh=r(As);jn=t(wh,"SPAN",{});var $h=r(jn);E(Za.$$.fragment,$h),$h.forEach(a),wh.forEach(a),cc=u(up),bn=t(up,"SPAN",{});var Th=r(bn);uc=o(Th,"Feature extractor"),Th.forEach(a),up.forEach(a),or=u(s),Rs=t(s,"P",{});var mp=r(Rs);mc=o(mp,"Carica l\u2019estrattore di caratteristiche "),fn=t(mp,"CODE",{});var Mh=r(fn);hc=o(Mh,"AutoFeatureExtractor.from_pretrained()"),Mh.forEach(a),dc=o(mp,":"),mp.forEach(a),ir=u(s),E(Aa.$$.fragment,s),cr=u(s),us=t(s,"H3",{class:!0});var hp=r(us);Ns=t(hp,"A",{id:!0,class:!0,href:!0});var Eh=r(Ns);gn=t(Eh,"SPAN",{});var kh=r(gn);E(Ra.$$.fragment,kh),kh.forEach(a),Eh.forEach(a),jc=u(hp),vn=t(hp,"SPAN",{});var zh=r(vn);bc=o(zh,"Data augmentation"),zh.forEach(a),hp.forEach(a),ur=u(s),xs=t(s,"P",{});var dp=r(xs);fc=o(dp,"Per le attivit\xE0 di visione, \xE8 usuale aggiungere alcuni tipi di data augmentation alle immagini come parte del preprocessing. Puoi aggiungere augmentations con qualsiasi libreria che preferisci, ma in questa esercitazione, userai il modulo "),Na=t(dp,"A",{href:!0,rel:!0});var Ih=r(Na);yn=t(Ih,"CODE",{});var Uh=r(yn);gc=o(Uh,"transforms"),Uh.forEach(a),Ih.forEach(a),vc=o(dp," di torchvision."),dp.forEach(a),mr=u(s),Ne=t(s,"OL",{});var Ch=r(Ne);R=t(Ch,"LI",{});var Ss=r(R);yc=o(Ss,"Normalizza l\u2019immagine e usa "),xa=t(Ss,"A",{href:!0,rel:!0});var Gh=r(xa);Jn=t(Gh,"CODE",{});var Vh=r(Jn);Jc=o(Vh,"Compose"),Vh.forEach(a),Gh.forEach(a),_c=o(Ss," per concatenare alcune trasformazioni - "),Wa=t(Ss,"A",{href:!0,rel:!0});var Bh=r(Wa);_n=t(Bh,"CODE",{});var qh=r(_n);wc=o(qh,"RandomResizedCrop"),qh.forEach(a),Bh.forEach(a),$c=o(Ss," e "),Fa=t(Ss,"A",{href:!0,rel:!0});var Zh=r(Fa);wn=t(Zh,"CODE",{});var Ah=r(wn);Tc=o(Ah,"ColorJitter"),Ah.forEach(a),Zh.forEach(a),Mc=o(Ss," - insieme:"),Ss.forEach(a),Ch.forEach(a),hr=u(s),E(Xa.$$.fragment,s),dr=u(s),Ya=t(s,"OL",{start:!0});var Rh=r(Ya);ms=t(Rh,"LI",{});var ol=r(ms);Ec=o(ol,"Il modello accetta "),xe=t(ol,"A",{href:!0});var Nh=r(xe);$n=t(Nh,"CODE",{});var xh=r($n);kc=o(xh,"pixel_values"),xh.forEach(a),Nh.forEach(a),zc=o(ol," come input. Questo valore \xE8 generato dall\u2019estrattore di caratteristiche. Crea una funzione che genera "),Tn=t(ol,"CODE",{});var Wh=r(Tn);Ic=o(Wh,"pixel_values"),Wh.forEach(a),Uc=o(ol," dai transforms:"),ol.forEach(a),Rh.forEach(a),jr=u(s),E(Pa.$$.fragment,s),br=u(s),Qa=t(s,"OL",{start:!0});var Fh=r(Qa);Da=t(Fh,"LI",{});var jp=r(Da);Cc=o(jp,"Poi utilizza \u{1F917} Datasets "),Sa=t(jp,"A",{href:!0,rel:!0});var Xh=r(Sa);Mn=t(Xh,"CODE",{});var Yh=r(Mn);Gc=o(Yh,"set_transform"),Yh.forEach(a),Xh.forEach(a),Vc=o(jp,"per applicare al volo la trasformazione:"),jp.forEach(a),Fh.forEach(a),fr=u(s),E(Ha.$$.fragment,s),gr=u(s),La=t(s,"OL",{start:!0});var Ph=r(La);Oa=t(Ph,"LI",{});var bp=r(Oa);Bc=o(bp,"Adesso quando accedi all\u2019immagine, puoi notare che l\u2019estrattore di caratteristiche ha aggiunto "),En=t(bp,"CODE",{});var Qh=r(En);qc=o(Qh,"pixel_values"),Qh.forEach(a),Zc=o(bp," allo schema di input:"),bp.forEach(a),Ph.forEach(a),vr=u(s),E(Ka.$$.fragment,s),yr=u(s),We=t(s,"P",{});var Dh=r(We);Ac=o(Dh,"Di seguito come si vede l\u2019immagine dopo la fase di preprocessing. Come ci si aspetterebbe dalle trasformazioni applicate, l\u2019immagine \xE8 stata ritagliata in modo casuale e le propriet\xE0 del colore sono diverse."),Dh.forEach(a),Jr=u(s),E(se.$$.fragment,s),_r=u(s),Fe=t(s,"P",{});var Sh=r(Fe);Xe=t(Sh,"IMG",{src:!0,alt:!0}),Sh.forEach(a),wr=u(s),hs=t(s,"H2",{class:!0});var fp=r(hs);Ws=t(fp,"A",{id:!0,class:!0,href:!0});var Hh=r(Ws);kn=t(Hh,"SPAN",{});var Lh=r(kn);E(ae.$$.fragment,Lh),Lh.forEach(a),Hh.forEach(a),Rc=u(fp),zn=t(fp,"SPAN",{});var Oh=r(zn);Nc=o(Oh,"Multimodal"),Oh.forEach(a),fp.forEach(a),$r=u(s),Ye=t(s,"P",{});var Kh=r(Ye);xc=o(Kh,"Per attivit\xE0 multimodali userai una combinazione di tutto quello che hai imparato poco fa e applicherai le tue competenze alla comprensione automatica del parlato (Automatic Speech Recognition - ASR). Questo significa che avrai bisogno di:"),Kh.forEach(a),Tr=u(s),Fs=t(s,"UL",{});var gp=r(Fs);In=t(gp,"LI",{});var sd=r(In);Wc=o(sd,"Un estrattore delle caratteristiche per processare i dati audio."),sd.forEach(a),Fc=u(gp),Un=t(gp,"LI",{});var ad=r(Un);Xc=o(ad,"Il Tokenizer per processare i testi."),ad.forEach(a),gp.forEach(a),Mr=u(s),Xs=t(s,"P",{});var vp=r(Xs);Yc=o(vp,"Ritorna sul datasere "),ee=t(vp,"A",{href:!0,rel:!0});var ed=r(ee);Pc=o(ed,"LJ Speech"),ed.forEach(a),Qc=o(vp,":"),vp.forEach(a),Er=u(s),E(le.$$.fragment,s),kr=u(s),L=t(s,"P",{});var il=r(L);Dc=o(il,"Visto che sei interessato solo alle colonne "),Cn=t(il,"CODE",{});var ld=r(Cn);Sc=o(ld,"audio"),ld.forEach(a),Hc=o(il," e "),Gn=t(il,"CODE",{});var nd=r(Gn);Lc=o(nd,"text"),nd.forEach(a),Oc=o(il,", elimina tutte le altre:"),il.forEach(a),zr=u(s),E(ne.$$.fragment,s),Ir=u(s),O=t(s,"P",{});var cl=r(O);Kc=o(cl,"Adesso guarda le colonne "),Vn=t(cl,"CODE",{});var td=r(Vn);su=o(td,"audio"),td.forEach(a),au=o(cl," e "),Bn=t(cl,"CODE",{});var rd=r(Bn);eu=o(rd,"text"),rd.forEach(a),lu=o(cl,":"),cl.forEach(a),Ur=u(s),E(te.$$.fragment,s),Cr=u(s),Ys=t(s,"P",{});var yp=r(Ys);nu=o(yp,"Ricorda dalla sezione precedente sull\u2019elaborazione dei dati audio, tu dovresti sempre "),Pe=t(yp,"A",{href:!0});var pd=r(Pe);tu=o(pd,"ricampionare"),pd.forEach(a),ru=o(yp," la frequenza di campionamento dei tuoi dati audio per farla coincidere con quella del dataset usato dal modello preaddestrato:"),yp.forEach(a),Gr=u(s),E(re.$$.fragment,s),Vr=u(s),ds=t(s,"H3",{class:!0});var Jp=r(ds);Ps=t(Jp,"A",{id:!0,class:!0,href:!0});var od=r(Ps);qn=t(od,"SPAN",{});var id=r(qn);E(pe.$$.fragment,id),id.forEach(a),od.forEach(a),pu=u(Jp),Zn=t(Jp,"SPAN",{});var cd=r(Zn);ou=o(cd,"Processor"),cd.forEach(a),Jp.forEach(a),Br=u(s),Qe=t(s,"P",{});var ud=r(Qe);iu=o(ud,"Un processor combina un estrattore di caratteristiche e un tokenizer. Carica un processor con [`AutoProcessor.from_pretrained]:"),ud.forEach(a),qr=u(s),E(oe.$$.fragment,s),Zr=u(s),De=t(s,"OL",{});var md=r(De);js=t(md,"LI",{});var ul=r(js);cu=o(ul,"Crea una funzione che processi i dati audio in "),An=t(ul,"CODE",{});var hd=r(An);uu=o(hd,"input_values"),hd.forEach(a),mu=o(ul,", e tokenizza il testo in "),Rn=t(ul,"CODE",{});var dd=r(Rn);hu=o(dd,"labels"),dd.forEach(a),du=o(ul,". Questi sono i tuoi input per il modello:"),ul.forEach(a),md.forEach(a),Ar=u(s),E(ie.$$.fragment,s),Rr=u(s),ce=t(s,"OL",{start:!0});var jd=r(ce);ue=t(jd,"LI",{});var _p=r(ue);ju=o(_p,"Applica la funzione "),Nn=t(_p,"CODE",{});var bd=r(Nn);bu=o(bd,"prepare_dataset"),bd.forEach(a),fu=o(_p," ad un campione:"),_p.forEach(a),jd.forEach(a),Nr=u(s),E(me.$$.fragment,s),xr=u(s),K=t(s,"P",{});var ml=r(K);gu=o(ml,"Nota che il processor ha aggiunto "),xn=t(ml,"CODE",{});var fd=r(xn);vu=o(fd,"input_values"),fd.forEach(a),yu=o(ml," e "),Wn=t(ml,"CODE",{});var gd=r(Wn);Ju=o(gd,"labels"),gd.forEach(a),_u=o(ml,". La frequenza di campionamento \xE8 stata corretta riducendola a 16kHz."),ml.forEach(a),Wr=u(s),Se=t(s,"P",{});var vd=r(Se);wu=o(vd,"Fantastico, ora dovresti essere in grado di preelaborare i dati per qualsiasi modalit\xE0 e persino di combinare modalit\xE0 diverse! Nella prossima esercitazione, impareremo a mettere a punto un modello sui dati appena pre-elaborati."),vd.forEach(a),this.h()},h(){m(h,"name","hf:doc:metadata"),m(h,"content",JSON.stringify(Ld)),m(y,"id","preprocess"),m(y,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(y,"href","#preprocess"),m(b,"class","relative group"),m(fs,"id","nlp"),m(fs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(fs,"href","#nlp"),m(ss,"class","relative group"),m(de,"href","main_classes/tokenizer"),m(vs,"id","tokenize"),m(vs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(vs,"href","#tokenize"),m(as,"class","relative group"),m(ge,"href","glossary#input-ids"),m(ye,"href","glossary#attention-mask"),m(_e,"href","glossary#token-type-ids"),m(_s,"id","pad"),m(_s,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(_s,"href","#pad"),m(es,"class","relative group"),m(Ts,"id","truncation"),m(Ts,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Ts,"href","#truncation"),m(ls,"class","relative group"),m(Ms,"id","costruire-i-tensori"),m(Ms,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Ms,"href","#costruire-i-tensori"),m(ns,"class","relative group"),m(Es,"id","audio"),m(Es,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Es,"href","#audio"),m(ts,"class","relative group"),m(Me,"href","main_classes/feature_extractor"),m(ua,"href","https://huggingface.co/datasets/PolyAI/minds14"),m(ua,"rel","nofollow"),m(ma,"href","https://huggingface.co/docs/datasets/load_hub.html"),m(ma,"rel","nofollow"),m(zs,"id","ricampionamento"),m(zs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(zs,"href","#ricampionamento"),m(rs,"class","relative group"),m(ba,"href","https://huggingface.co/facebook/wav2vec2-base"),m(ba,"rel","nofollow"),m(fa,"href","https://huggingface.co/datasets/PolyAI/minds14"),m(fa,"rel","nofollow"),m(ya,"href","https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.cast_column"),m(ya,"rel","nofollow"),m(_a,"start","2"),m(Gs,"id","feature-extractor"),m(Gs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Gs,"href","#feature-extractor"),m(ps,"class","relative group"),m(Bs,"id","pad-e-truncate"),m(Bs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Bs,"href","#pad-e-truncate"),m(os,"class","relative group"),m(qs,"id","vision"),m(qs,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(qs,"href","#vision"),m(is,"class","relative group"),m(Ga,"href","https://huggingface.co/datasets/food101"),m(Ga,"rel","nofollow"),m(Ba,"href","https://huggingface.co/docs/datasets/package_reference/main_classes.html?highlight=image#datasets.Image"),m(Ba,"rel","nofollow"),Jd(Re.src,Gu="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png")||m(Re,"src",Gu),m(Re,"alt","vision-preprocess-tutorial.png"),m(As,"id","feature-extractor"),m(As,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(As,"href","#feature-extractor"),m(cs,"class","relative group"),m(Ns,"id","data-augmentation"),m(Ns,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Ns,"href","#data-augmentation"),m(us,"class","relative group"),m(Na,"href","https://pytorch.org/vision/stable/transforms.html"),m(Na,"rel","nofollow"),m(xa,"href","https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html"),m(xa,"rel","nofollow"),m(Wa,"href","https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html"),m(Wa,"rel","nofollow"),m(Fa,"href","https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html"),m(Fa,"rel","nofollow"),m(xe,"href","model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values"),m(Ya,"start","2"),m(Sa,"href","https://huggingface.co/docs/datasets/process.html#format-transform"),m(Sa,"rel","nofollow"),m(Qa,"start","3"),m(La,"start","4"),Jd(Xe.src,Vu="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png")||m(Xe,"src",Vu),m(Xe,"alt","preprocessed_image"),m(Ws,"id","multimodal"),m(Ws,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Ws,"href","#multimodal"),m(hs,"class","relative group"),m(ee,"href","https://huggingface.co/datasets/lj_speech"),m(ee,"rel","nofollow"),m(Pe,"href","preprocessing#audio"),m(Ps,"id","processor"),m(Ps,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),m(Ps,"href","#processor"),m(ds,"class","relative group"),m(ce,"start","2")},m(s,l){e(document.head,h),i(s,j,l),i(s,b,l),e(b,y),e(y,f),T(M,f,null),e(b,J),e(b,_),e(_,d),i(s,z,l),T(I,s,l),i(s,U,l),i(s,bs,l),e(bs,B),i(s,G,l),i(s,q,l),e(q,hl),e(hl,Tp),e(q,Mp),e(q,dl),e(dl,Ep),e(q,kp),e(q,jl),e(jl,zp),i(s,Xn,l),i(s,ss,l),e(ss,fs),e(fs,bl),T(Hs,bl,null),e(ss,Ip),e(ss,fl),e(fl,Up),i(s,Yn,l),T(Ls,s,l),i(s,Pn,l),i(s,N,l),e(N,Cp),e(N,de),e(de,Gp),e(N,Vp),e(N,gl),e(gl,Bp),e(N,qp),i(s,Qn,l),T(gs,s,l),i(s,Dn,l),i(s,x,l),e(x,Zp),e(x,vl),e(vl,Ap),e(x,Rp),e(x,yl),e(yl,Np),e(x,xp),i(s,Sn,l),i(s,as,l),e(as,vs),e(vs,Jl),T(Os,Jl,null),e(as,Wp),e(as,_l),e(_l,Fp),i(s,Hn,l),i(s,ys,l),e(ys,Xp),e(ys,wl),e(wl,Yp),e(ys,Pp),i(s,Ln,l),T(Ks,s,l),i(s,On,l),i(s,je,l),e(je,Qp),i(s,Kn,l),T(sa,s,l),i(s,st,l),i(s,be,l),e(be,Dp),i(s,at,l),i(s,W,l),e(W,fe),e(fe,ge),e(ge,Sp),e(fe,Hp),e(W,Lp),e(W,ve),e(ve,ye),e(ye,Op),e(ve,Kp),e(W,so),e(W,Je),e(Je,_e),e(_e,ao),e(Je,eo),i(s,et,l),i(s,Js,l),e(Js,lo),e(Js,$l),e($l,no),e(Js,to),i(s,lt,l),T(aa,s,l),i(s,nt,l),i(s,F,l),e(F,ro),e(F,Tl),e(Tl,po),e(F,oo),e(F,Ml),e(Ml,io),e(F,co),i(s,tt,l),i(s,we,l),e(we,uo),i(s,rt,l),T(ea,s,l),i(s,pt,l),i(s,es,l),e(es,_s),e(_s,El),T(la,El,null),e(es,mo),e(es,kl),e(kl,ho),i(s,ot,l),i(s,ws,l),e(ws,jo),e(ws,zl),e(zl,bo),e(ws,fo),i(s,it,l),i(s,X,l),e(X,go),e(X,Il),e(Il,vo),e(X,yo),e(X,Ul),e(Ul,Jo),e(X,_o),i(s,ct,l),T(na,s,l),i(s,ut,l),i(s,$s,l),e($s,wo),e($s,Cl),e(Cl,$o),e($s,To),i(s,mt,l),i(s,ls,l),e(ls,Ts),e(Ts,Gl),T(ta,Gl,null),e(ls,Mo),e(ls,Vl),e(Vl,Eo),i(s,ht,l),i(s,$e,l),e($e,ko),i(s,dt,l),i(s,Y,l),e(Y,zo),e(Y,Bl),e(Bl,Io),e(Y,Uo),e(Y,ql),e(ql,Co),e(Y,Go),i(s,jt,l),T(ra,s,l),i(s,bt,l),i(s,ns,l),e(ns,Ms),e(Ms,Zl),T(pa,Zl,null),e(ns,Vo),e(ns,Al),e(Al,Bo),i(s,ft,l),i(s,Te,l),e(Te,qo),i(s,gt,l),i(s,Z,l),e(Z,Zo),e(Z,Rl),e(Rl,Ao),e(Z,Ro),e(Z,Nl),e(Nl,No),e(Z,xo),e(Z,xl),e(xl,Wo),e(Z,Fo),i(s,vt,l),T(oa,s,l),i(s,yt,l),i(s,ts,l),e(ts,Es),e(Es,Wl),T(ia,Wl,null),e(ts,Xo),e(ts,Fl),e(Fl,Yo),i(s,Jt,l),i(s,ks,l),e(ks,Po),e(ks,Me),e(Me,Qo),e(ks,Do),i(s,_t,l),T(ca,s,l),i(s,wt,l),i(s,P,l),e(P,So),e(P,ua),e(ua,Ho),e(P,Lo),e(P,ma),e(ma,Oo),e(P,Ko),i(s,$t,l),T(ha,s,l),i(s,Tt,l),i(s,Q,l),e(Q,si),e(Q,Xl),e(Xl,ai),e(Q,ei),e(Q,Yl),e(Yl,li),e(Q,ni),i(s,Mt,l),T(da,s,l),i(s,Et,l),i(s,Ee,l),e(Ee,ti),i(s,kt,l),i(s,D,l),e(D,ke),e(ke,Pl),e(Pl,ri),e(ke,pi),e(D,oi),e(D,ze),e(ze,Ql),e(Ql,ii),e(ze,ci),e(D,ui),e(D,Ie),e(Ie,Dl),e(Dl,mi),e(Ie,hi),i(s,zt,l),i(s,rs,l),e(rs,zs),e(zs,Sl),T(ja,Sl,null),e(rs,di),e(rs,Hl),e(Hl,ji),i(s,It,l),i(s,Is,l),e(Is,bi),e(Is,ba),e(ba,fi),e(Is,gi),i(s,Ut,l),i(s,Us,l),e(Us,vi),e(Us,fa),e(fa,yi),e(Us,Ji),i(s,Ct,l),T(ga,s,l),i(s,Gt,l),i(s,Ue,l),e(Ue,va),e(va,_i),e(va,ya),e(ya,Ll),e(Ll,wi),e(va,$i),i(s,Vt,l),T(Ja,s,l),i(s,Bt,l),i(s,_a,l),e(_a,Ol),e(Ol,Ti),i(s,qt,l),T(wa,s,l),i(s,Zt,l),i(s,Cs,l),e(Cs,Mi),e(Cs,Kl),e(Kl,Ei),e(Cs,ki),i(s,At,l),i(s,ps,l),e(ps,Gs),e(Gs,sn),T($a,sn,null),e(ps,zi),e(ps,an),e(an,Ii),i(s,Rt,l),i(s,A,l),e(A,Ui),e(A,en),e(en,Ci),e(A,Gi),e(A,ln),e(ln,Vi),e(A,Bi),e(A,nn),e(nn,qi),e(A,Zi),i(s,Nt,l),i(s,Vs,l),e(Vs,Ai),e(Vs,tn),e(tn,Ri),e(Vs,Ni),i(s,xt,l),T(Ta,s,l),i(s,Wt,l),i(s,S,l),e(S,xi),e(S,rn),e(rn,Wi),e(S,Fi),e(S,pn),e(pn,Xi),e(S,Yi),i(s,Ft,l),T(Ma,s,l),i(s,Xt,l),i(s,os,l),e(os,Bs),e(Bs,on),T(Ea,on,null),e(os,Pi),e(os,cn),e(cn,Qi),i(s,Yt,l),i(s,Ce,l),e(Ce,Di),i(s,Pt,l),T(ka,s,l),i(s,Qt,l),i(s,Ge,l),e(Ge,Si),i(s,Dt,l),T(za,s,l),i(s,St,l),i(s,Ve,l),e(Ve,Hi),i(s,Ht,l),T(Ia,s,l),i(s,Lt,l),i(s,Be,l),e(Be,Li),i(s,Ot,l),T(Ua,s,l),i(s,Kt,l),i(s,qe,l),e(qe,Oi),i(s,sr,l),i(s,is,l),e(is,qs),e(qs,un),T(Ca,un,null),e(is,Ki),e(is,mn),e(mn,sc),i(s,ar,l),i(s,Ze,l),e(Ze,ac),i(s,er,l),i(s,H,l),e(H,ec),e(H,Ga),e(Ga,lc),e(H,nc),e(H,hn),e(hn,tc),e(H,rc),i(s,lr,l),T(Va,s,l),i(s,nr,l),i(s,Zs,l),e(Zs,pc),e(Zs,Ba),e(Ba,dn),e(dn,oc),e(Zs,ic),i(s,tr,l),T(qa,s,l),i(s,rr,l),i(s,Ae,l),e(Ae,Re),i(s,pr,l),i(s,cs,l),e(cs,As),e(As,jn),T(Za,jn,null),e(cs,cc),e(cs,bn),e(bn,uc),i(s,or,l),i(s,Rs,l),e(Rs,mc),e(Rs,fn),e(fn,hc),e(Rs,dc),i(s,ir,l),T(Aa,s,l),i(s,cr,l),i(s,us,l),e(us,Ns),e(Ns,gn),T(Ra,gn,null),e(us,jc),e(us,vn),e(vn,bc),i(s,ur,l),i(s,xs,l),e(xs,fc),e(xs,Na),e(Na,yn),e(yn,gc),e(xs,vc),i(s,mr,l),i(s,Ne,l),e(Ne,R),e(R,yc),e(R,xa),e(xa,Jn),e(Jn,Jc),e(R,_c),e(R,Wa),e(Wa,_n),e(_n,wc),e(R,$c),e(R,Fa),e(Fa,wn),e(wn,Tc),e(R,Mc),i(s,hr,l),T(Xa,s,l),i(s,dr,l),i(s,Ya,l),e(Ya,ms),e(ms,Ec),e(ms,xe),e(xe,$n),e($n,kc),e(ms,zc),e(ms,Tn),e(Tn,Ic),e(ms,Uc),i(s,jr,l),T(Pa,s,l),i(s,br,l),i(s,Qa,l),e(Qa,Da),e(Da,Cc),e(Da,Sa),e(Sa,Mn),e(Mn,Gc),e(Da,Vc),i(s,fr,l),T(Ha,s,l),i(s,gr,l),i(s,La,l),e(La,Oa),e(Oa,Bc),e(Oa,En),e(En,qc),e(Oa,Zc),i(s,vr,l),T(Ka,s,l),i(s,yr,l),i(s,We,l),e(We,Ac),i(s,Jr,l),T(se,s,l),i(s,_r,l),i(s,Fe,l),e(Fe,Xe),i(s,wr,l),i(s,hs,l),e(hs,Ws),e(Ws,kn),T(ae,kn,null),e(hs,Rc),e(hs,zn),e(zn,Nc),i(s,$r,l),i(s,Ye,l),e(Ye,xc),i(s,Tr,l),i(s,Fs,l),e(Fs,In),e(In,Wc),e(Fs,Fc),e(Fs,Un),e(Un,Xc),i(s,Mr,l),i(s,Xs,l),e(Xs,Yc),e(Xs,ee),e(ee,Pc),e(Xs,Qc),i(s,Er,l),T(le,s,l),i(s,kr,l),i(s,L,l),e(L,Dc),e(L,Cn),e(Cn,Sc),e(L,Hc),e(L,Gn),e(Gn,Lc),e(L,Oc),i(s,zr,l),T(ne,s,l),i(s,Ir,l),i(s,O,l),e(O,Kc),e(O,Vn),e(Vn,su),e(O,au),e(O,Bn),e(Bn,eu),e(O,lu),i(s,Ur,l),T(te,s,l),i(s,Cr,l),i(s,Ys,l),e(Ys,nu),e(Ys,Pe),e(Pe,tu),e(Ys,ru),i(s,Gr,l),T(re,s,l),i(s,Vr,l),i(s,ds,l),e(ds,Ps),e(Ps,qn),T(pe,qn,null),e(ds,pu),e(ds,Zn),e(Zn,ou),i(s,Br,l),i(s,Qe,l),e(Qe,iu),i(s,qr,l),T(oe,s,l),i(s,Zr,l),i(s,De,l),e(De,js),e(js,cu),e(js,An),e(An,uu),e(js,mu),e(js,Rn),e(Rn,hu),e(js,du),i(s,Ar,l),T(ie,s,l),i(s,Rr,l),i(s,ce,l),e(ce,ue),e(ue,ju),e(ue,Nn),e(Nn,bu),e(ue,fu),i(s,Nr,l),T(me,s,l),i(s,xr,l),i(s,K,l),e(K,gu),e(K,xn),e(xn,vu),e(K,yu),e(K,Wn),e(Wn,Ju),e(K,_u),i(s,Wr,l),i(s,Se,l),e(Se,wu),Fr=!0},p(s,[l]){const he={};l&2&&(he.$$scope={dirty:l,ctx:s}),gs.$set(he)},i(s){Fr||(g(M.$$.fragment,s),g(I.$$.fragment,s),g(Hs.$$.fragment,s),g(Ls.$$.fragment,s),g(gs.$$.fragment,s),g(Os.$$.fragment,s),g(Ks.$$.fragment,s),g(sa.$$.fragment,s),g(aa.$$.fragment,s),g(ea.$$.fragment,s),g(la.$$.fragment,s),g(na.$$.fragment,s),g(ta.$$.fragment,s),g(ra.$$.fragment,s),g(pa.$$.fragment,s),g(oa.$$.fragment,s),g(ia.$$.fragment,s),g(ca.$$.fragment,s),g(ha.$$.fragment,s),g(da.$$.fragment,s),g(ja.$$.fragment,s),g(ga.$$.fragment,s),g(Ja.$$.fragment,s),g(wa.$$.fragment,s),g($a.$$.fragment,s),g(Ta.$$.fragment,s),g(Ma.$$.fragment,s),g(Ea.$$.fragment,s),g(ka.$$.fragment,s),g(za.$$.fragment,s),g(Ia.$$.fragment,s),g(Ua.$$.fragment,s),g(Ca.$$.fragment,s),g(Va.$$.fragment,s),g(qa.$$.fragment,s),g(Za.$$.fragment,s),g(Aa.$$.fragment,s),g(Ra.$$.fragment,s),g(Xa.$$.fragment,s),g(Pa.$$.fragment,s),g(Ha.$$.fragment,s),g(Ka.$$.fragment,s),g(se.$$.fragment,s),g(ae.$$.fragment,s),g(le.$$.fragment,s),g(ne.$$.fragment,s),g(te.$$.fragment,s),g(re.$$.fragment,s),g(pe.$$.fragment,s),g(oe.$$.fragment,s),g(ie.$$.fragment,s),g(me.$$.fragment,s),Fr=!0)},o(s){v(M.$$.fragment,s),v(I.$$.fragment,s),v(Hs.$$.fragment,s),v(Ls.$$.fragment,s),v(gs.$$.fragment,s),v(Os.$$.fragment,s),v(Ks.$$.fragment,s),v(sa.$$.fragment,s),v(aa.$$.fragment,s),v(ea.$$.fragment,s),v(la.$$.fragment,s),v(na.$$.fragment,s),v(ta.$$.fragment,s),v(ra.$$.fragment,s),v(pa.$$.fragment,s),v(oa.$$.fragment,s),v(ia.$$.fragment,s),v(ca.$$.fragment,s),v(ha.$$.fragment,s),v(da.$$.fragment,s),v(ja.$$.fragment,s),v(ga.$$.fragment,s),v(Ja.$$.fragment,s),v(wa.$$.fragment,s),v($a.$$.fragment,s),v(Ta.$$.fragment,s),v(Ma.$$.fragment,s),v(Ea.$$.fragment,s),v(ka.$$.fragment,s),v(za.$$.fragment,s),v(Ia.$$.fragment,s),v(Ua.$$.fragment,s),v(Ca.$$.fragment,s),v(Va.$$.fragment,s),v(qa.$$.fragment,s),v(Za.$$.fragment,s),v(Aa.$$.fragment,s),v(Ra.$$.fragment,s),v(Xa.$$.fragment,s),v(Pa.$$.fragment,s),v(Ha.$$.fragment,s),v(Ka.$$.fragment,s),v(se.$$.fragment,s),v(ae.$$.fragment,s),v(le.$$.fragment,s),v(ne.$$.fragment,s),v(te.$$.fragment,s),v(re.$$.fragment,s),v(pe.$$.fragment,s),v(oe.$$.fragment,s),v(ie.$$.fragment,s),v(me.$$.fragment,s),Fr=!1},d(s){a(h),s&&a(j),s&&a(b),w(M),s&&a(z),w(I,s),s&&a(U),s&&a(bs),s&&a(G),s&&a(q),s&&a(Xn),s&&a(ss),w(Hs),s&&a(Yn),w(Ls,s),s&&a(Pn),s&&a(N),s&&a(Qn),w(gs,s),s&&a(Dn),s&&a(x),s&&a(Sn),s&&a(as),w(Os),s&&a(Hn),s&&a(ys),s&&a(Ln),w(Ks,s),s&&a(On),s&&a(je),s&&a(Kn),w(sa,s),s&&a(st),s&&a(be),s&&a(at),s&&a(W),s&&a(et),s&&a(Js),s&&a(lt),w(aa,s),s&&a(nt),s&&a(F),s&&a(tt),s&&a(we),s&&a(rt),w(ea,s),s&&a(pt),s&&a(es),w(la),s&&a(ot),s&&a(ws),s&&a(it),s&&a(X),s&&a(ct),w(na,s),s&&a(ut),s&&a($s),s&&a(mt),s&&a(ls),w(ta),s&&a(ht),s&&a($e),s&&a(dt),s&&a(Y),s&&a(jt),w(ra,s),s&&a(bt),s&&a(ns),w(pa),s&&a(ft),s&&a(Te),s&&a(gt),s&&a(Z),s&&a(vt),w(oa,s),s&&a(yt),s&&a(ts),w(ia),s&&a(Jt),s&&a(ks),s&&a(_t),w(ca,s),s&&a(wt),s&&a(P),s&&a($t),w(ha,s),s&&a(Tt),s&&a(Q),s&&a(Mt),w(da,s),s&&a(Et),s&&a(Ee),s&&a(kt),s&&a(D),s&&a(zt),s&&a(rs),w(ja),s&&a(It),s&&a(Is),s&&a(Ut),s&&a(Us),s&&a(Ct),w(ga,s),s&&a(Gt),s&&a(Ue),s&&a(Vt),w(Ja,s),s&&a(Bt),s&&a(_a),s&&a(qt),w(wa,s),s&&a(Zt),s&&a(Cs),s&&a(At),s&&a(ps),w($a),s&&a(Rt),s&&a(A),s&&a(Nt),s&&a(Vs),s&&a(xt),w(Ta,s),s&&a(Wt),s&&a(S),s&&a(Ft),w(Ma,s),s&&a(Xt),s&&a(os),w(Ea),s&&a(Yt),s&&a(Ce),s&&a(Pt),w(ka,s),s&&a(Qt),s&&a(Ge),s&&a(Dt),w(za,s),s&&a(St),s&&a(Ve),s&&a(Ht),w(Ia,s),s&&a(Lt),s&&a(Be),s&&a(Ot),w(Ua,s),s&&a(Kt),s&&a(qe),s&&a(sr),s&&a(is),w(Ca),s&&a(ar),s&&a(Ze),s&&a(er),s&&a(H),s&&a(lr),w(Va,s),s&&a(nr),s&&a(Zs),s&&a(tr),w(qa,s),s&&a(rr),s&&a(Ae),s&&a(pr),s&&a(cs),w(Za),s&&a(or),s&&a(Rs),s&&a(ir),w(Aa,s),s&&a(cr),s&&a(us),w(Ra),s&&a(ur),s&&a(xs),s&&a(mr),s&&a(Ne),s&&a(hr),w(Xa,s),s&&a(dr),s&&a(Ya),s&&a(jr),w(Pa,s),s&&a(br),s&&a(Qa),s&&a(fr),w(Ha,s),s&&a(gr),s&&a(La),s&&a(vr),w(Ka,s),s&&a(yr),s&&a(We),s&&a(Jr),w(se,s),s&&a(_r),s&&a(Fe),s&&a(wr),s&&a(hs),w(ae),s&&a($r),s&&a(Ye),s&&a(Tr),s&&a(Fs),s&&a(Mr),s&&a(Xs),s&&a(Er),w(le,s),s&&a(kr),s&&a(L),s&&a(zr),w(ne,s),s&&a(Ir),s&&a(O),s&&a(Ur),w(te,s),s&&a(Cr),s&&a(Ys),s&&a(Gr),w(re,s),s&&a(Vr),s&&a(ds),w(pe),s&&a(Br),s&&a(Qe),s&&a(qr),w(oe,s),s&&a(Zr),s&&a(De),s&&a(Ar),w(ie,s),s&&a(Rr),s&&a(ce),s&&a(Nr),w(me,s),s&&a(xr),s&&a(K),s&&a(Wr),s&&a(Se)}}}const Ld={local:"preprocess",sections:[{local:"nlp",sections:[{local:"tokenize",title:"Tokenize"},{local:"pad",title:"Pad"},{local:"truncation",title:"Truncation"},{local:"costruire-i-tensori",title:"Costruire i tensori"}],title:"NLP"},{local:"audio",sections:[{local:"ricampionamento",title:"Ricampionamento"},{local:"feature-extractor",title:"Feature extractor"},{local:"pad-e-truncate",title:"Pad e truncate"}],title:"Audio"},{local:"vision",sections:[{local:"feature-extractor",title:"Feature extractor"},{local:"data-augmentation",title:"Data augmentation"}],title:"Vision"},{local:"multimodal",sections:[{local:"processor",title:"Processor"}],title:"Multimodal"}],title:"Preprocess"};function Od(k){return qd(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class rj extends Iu{constructor(h){super();Uu(this,h,Od,Hd,Cu,{})}}export{rj as default,Ld as metadata}; | |
Xet Storage Details
- Size:
- 119 kB
- Xet hash:
- 045a5368bf2cb023f3ac716418382b51a09097d16f470d55261c5d39f9ab4bfc
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.