Buckets:
| import{s as Dn,e as dt,h as yt,r as Jt,o as Mt,n as $t}from"../chunks/scheduler.36a0863c.js";import{S as Kn,i as On,g as c,h as u,j as q,f as e,k as C,a as n,y as Ha,d as m,p as Pn,b as Sn,t as h,z as wt,s as l,m as _t,c as t,n as vt,C as Ya,o as xt,q as ct,r as T,e as ut,u as M,v as y,w as J,H as jt,B as bt,A as kt,x as j}from"../chunks/index.f891bdb2.js";import{T as Ut}from"../chunks/Tip.a8272f7f.js";import{Y as Ct}from"../chunks/Youtube.0cbacd3d.js";import{a as gt,C as k}from"../chunks/CodeBlock.3ec784ea.js";import{g as ft}from"../chunks/stores.300cf1d0.js";import{e as ot}from"../chunks/each.e59479a4.js";import{I as It,a as Gt}from"../chunks/IconTensorflow.8e6f27b1.js";import{D as Vt}from"../chunks/DocNotebookDropdown.81c1f0fb.js";import{H as I,E as qt}from"../chunks/EditOnGithub.a58e27a9.js";function mt($,p,o){const d=$.slice();return d[8]=p[o],d[10]=o,d}function it($){let p,o,d;var b=$[8].icon;function i(x,f){return{props:{classNames:"mr-1.5"}}}return b&&(p=ct(b,i())),{c(){p&&T(p.$$.fragment),o=ut()},l(x){p&&M(p.$$.fragment,x),o=ut()},m(x,f){p&&y(p,x,f),n(x,o,f),d=!0},p(x,f){if(f&1&&b!==(b=x[8].icon)){if(p){Pn();const _=p;h(_.$$.fragment,1,0,()=>{J(_,1)}),Sn()}b?(p=ct(b,i()),T(p.$$.fragment),m(p.$$.fragment,1),y(p,o.parentNode,o)):p=null}},i(x){d||(p&&m(p.$$.fragment,x),d=!0)},o(x){p&&h(p.$$.fragment,x),d=!1},d(x){x&&e(o),p&&J(p,x)}}}function ht($){let p,o,d,b=$[8].name+"",i,x,f,_,g,r,w,v=$[8].icon&&it($);function V(){return $[6]($[8])}return{c(){p=c("button"),v&&v.c(),o=l(),d=c("p"),i=_t(b),f=l(),this.h()},l(G){p=u(G,"BUTTON",{class:!0});var U=q(p);v&&v.l(U),o=t(U),d=u(U,"P",{class:!0});var E=q(d);i=vt(E,b),E.forEach(e),f=t(U),U.forEach(e),this.h()},h(){C(d,"class",x="!m-0 "+$[8].classNames),C(p,"class",_="flex justify-center py-1.5 px-2.5 focus:outline-none rounded-"+($[10]?"r":"l")+" "+($[8].group!==$[1]&&"text-gray-500 filter grayscale"))},m(G,U){n(G,p,U),v&&v.m(p,null),Ha(p,o),Ha(p,d),Ha(d,i),Ha(p,f),g=!0,r||(w=Ya(p,"click",V),r=!0)},p(G,U){$=G,$[8].icon?v?(v.p($,U),U&1&&m(v,1)):(v=it($),v.c(),m(v,1),v.m(p,o)):v&&(Pn(),h(v,1,1,()=>{v=null}),Sn()),(!g||U&1)&&b!==(b=$[8].name+"")&&xt(i,b),(!g||U&1&&x!==(x="!m-0 "+$[8].classNames))&&C(d,"class",x),(!g||U&3&&_!==(_="flex justify-center py-1.5 px-2.5 focus:outline-none rounded-"+($[10]?"r":"l")+" "+($[8].group!==$[1]&&"text-gray-500 filter grayscale")))&&C(p,"class",_)},i(G){g||(m(v),g=!0)},o(G){h(v),g=!1},d(G){G&&e(p),v&&v.d(),r=!1,w()}}}function zt($){let p,o,d,b=ot($[3].filter($[5])),i=[];for(let f=0;f<b.length;f+=1)i[f]=ht(mt($,b,f));const x=f=>h(i[f],1,1,()=>{i[f]=null});return{c(){p=c("div"),o=c("div");for(let f=0;f<i.length;f+=1)i[f].c();this.h()},l(f){p=u(f,"DIV",{});var _=q(p);o=u(_,"DIV",{class:!0});var g=q(o);for(let r=0;r<i.length;r+=1)i[r].l(g);g.forEach(e),_.forEach(e),this.h()},h(){C(o,"class","bg-white leading-none border border-gray-100 rounded-lg inline-flex p-0.5 text-sm mb-4 select-none")},m(f,_){n(f,p,_),Ha(p,o);for(let g=0;g<i.length;g+=1)i[g]&&i[g].m(o,null);d=!0},p(f,[_]){if(_&27){b=ot(f[3].filter(f[5]));let g;for(g=0;g<b.length;g+=1){const r=mt(f,b,g);i[g]?(i[g].p(r,_),m(i[g],1)):(i[g]=ht(r),i[g].c(),m(i[g],1),i[g].m(o,null))}for(Pn(),g=b.length;g<i.length;g+=1)x(g);Sn()}},i(f){if(!d){for(let _=0;_<b.length;_+=1)m(i[_]);d=!0}},o(f){i=i.filter(Boolean);for(let _=0;_<i.length;_+=1)h(i[_]);d=!1},d(f){f&&e(p),wt(i,f)}}}function Zt($,p,o){let d,{ids:b}=p;const i=b.join("-"),x=ft(i);dt($,x,w=>o(1,d=w));const f=[{id:"pt",classNames:"",icon:It,name:"Pytorch",group:"group1"},{id:"tf",classNames:"",icon:Gt,name:"TensorFlow",group:"group2"},{id:"stringapi",classNames:"text-blue-600",name:"String API",group:"group1"},{id:"readinstruction",classNames:"text-blue-600",name:"ReadInstruction",group:"group2"}];function _(w){yt(x,d=w,d)}const g=w=>b.includes(w.id),r=w=>_(w.group);return $.$$set=w=>{"ids"in w&&o(0,b=w.ids)},[b,d,x,f,_,g,r]}class Tt extends Kn{constructor(p){super(),On(this,p,Zt,zt,Dn,{ids:0})}}function Bt($){let p,o,d,b,i,x,f=$[1].highlighted+"",_,g;return o=new gt({props:{classNames:"transition duration-200 ease-in-out "+($[3]&&"opacity-0"),title:"Copy code excerpt to clipboard",value:$[1].code}}),i=new Tt({props:{ids:$[5]}}),{c(){p=c("div"),T(o.$$.fragment),d=l(),b=c("pre"),T(i.$$.fragment),x=new jt(!1),this.h()},l(r){p=u(r,"DIV",{class:!0});var w=q(p);M(o.$$.fragment,w),w.forEach(e),d=t(r),b=u(r,"PRE",{class:!0});var v=q(b);M(i.$$.fragment,v),x=bt(v,!1),v.forEach(e),this.h()},h(){C(p,"class","absolute top-2.5 right-4"),x.a=null,C(b,"class",_=$[2]?"whitespace-pre-wrap":"")},m(r,w){n(r,p,w),y(o,p,null),n(r,d,w),n(r,b,w),y(i,b,null),x.m(f,b),g=!0},p(r,w){const v={};w&8&&(v.classNames="transition duration-200 ease-in-out "+(r[3]&&"opacity-0")),w&2&&(v.value=r[1].code),o.$set(v),(!g||w&2)&&f!==(f=r[1].highlighted+"")&&x.p(f),(!g||w&4&&_!==(_=r[2]?"whitespace-pre-wrap":""))&&C(b,"class",_)},i(r){g||(m(o.$$.fragment,r),m(i.$$.fragment,r),g=!0)},o(r){h(o.$$.fragment,r),h(i.$$.fragment,r),g=!1},d(r){r&&(e(p),e(d),e(b)),J(o),J(i)}}}function Rt($){let p,o,d,b,i,x,f=$[0].highlighted+"",_,g;return o=new gt({props:{classNames:"transition duration-200 ease-in-out "+($[3]&&"opacity-0"),title:"Copy code excerpt to clipboard",value:$[0].code}}),i=new Tt({props:{ids:$[5]}}),{c(){p=c("div"),T(o.$$.fragment),d=l(),b=c("pre"),T(i.$$.fragment),x=new jt(!1),this.h()},l(r){p=u(r,"DIV",{class:!0});var w=q(p);M(o.$$.fragment,w),w.forEach(e),d=t(r),b=u(r,"PRE",{class:!0});var v=q(b);M(i.$$.fragment,v),x=bt(v,!1),v.forEach(e),this.h()},h(){C(p,"class","absolute top-2.5 right-4"),x.a=null,C(b,"class",_=$[2]?"whitespace-pre-wrap":"")},m(r,w){n(r,p,w),y(o,p,null),n(r,d,w),n(r,b,w),y(i,b,null),x.m(f,b),g=!0},p(r,w){const v={};w&8&&(v.classNames="transition duration-200 ease-in-out "+(r[3]&&"opacity-0")),w&1&&(v.value=r[0].code),o.$set(v),(!g||w&1)&&f!==(f=r[0].highlighted+"")&&x.p(f),(!g||w&4&&_!==(_=r[2]?"whitespace-pre-wrap":""))&&C(b,"class",_)},i(r){g||(m(o.$$.fragment,r),m(i.$$.fragment,r),g=!0)},o(r){h(o.$$.fragment,r),h(i.$$.fragment,r),g=!1},d(r){r&&(e(p),e(d),e(b)),J(o),J(i)}}}function Wt($){let p,o,d,b,i,x;const f=[Rt,Bt],_=[];function g(r,w){return r[4]==="group1"?0:1}return o=g($),d=_[o]=f[o]($),{c(){p=c("div"),d.c(),this.h()},l(r){p=u(r,"DIV",{class:!0});var w=q(p);d.l(w),w.forEach(e),this.h()},h(){C(p,"class","code-block relative")},m(r,w){n(r,p,w),_[o].m(p,null),b=!0,i||(x=[Ya(p,"mouseover",$[7]),Ya(p,"focus",$[7]),Ya(p,"mouseout",$[8]),Ya(p,"focus",$[8])],i=!0)},p(r,[w]){let v=o;o=g(r),o===v?_[o].p(r,w):(Pn(),h(_[v],1,1,()=>{_[v]=null}),Sn(),d=_[o],d?d.p(r,w):(d=_[o]=f[o](r),d.c()),m(d,1),d.m(p,null))},i(r){b||(m(d),b=!0)},o(r){h(d),b=!1},d(r){r&&e(p),_[o].d(),i=!1,Jt(x)}}}function Ht($,p,o){let d,{group1:b}=p,{group2:i}=p,{wrap:x=!1}=p;const f=[b.id,i.id],_=f.join("-"),g=ft(_);dt($,g,V=>o(4,d=V));let r=!0;function w(){o(3,r=!1)}function v(){o(3,r=!0)}return $.$$set=V=>{"group1"in V&&o(0,b=V.group1),"group2"in V&&o(1,i=V.group2),"wrap"in V&&o(2,x=V.wrap)},[b,i,x,r,d,f,g,w,v]}class Et extends Kn{constructor(p){super(),On(this,p,Ht,Wt,Dn,{group1:0,group2:1,wrap:2})}}function Yt($){let p,o="Si tienes previsto utilizar un modelo pre-entrenado, es importante que utilices el tokenizador pre-entrenado asociado. Esto te asegura que el texto se divide de la misma manera que el corpus de pre-entrenamiento y utiliza el mismo índice de tokens correspondiente (usualmente referido como el <em>vocab</em>) durante el pre-entrenamiento.";return{c(){p=c("p"),p.innerHTML=o},l(d){p=u(d,"P",{"data-svelte-h":!0}),j(p)!=="svelte-1i562p5"&&(p.innerHTML=o)},m(d,b){n(d,p,b)},p:$t,d(d){d&&e(p)}}}function Ft($){let p,o,d,b,i,x,f,_,g,r="Antes de que puedas utilizar los datos en un modelo, debes procesarlos en un formato aceptable para el modelo. Un modelo no entiende el texto en bruto, las imágenes o el audio. Estas entradas necesitan ser convertidas en números y ensambladas en tensores. En este tutorial, podrás:",w,v,V="<li>Preprocesar los datos textuales con un tokenizador.</li> <li>Preprocesar datos de imagen o audio con un extractor de características.</li> <li>Preprocesar datos para una tarea multimodal con un procesador.</li>",G,U,E,Y,Fa,F,sl='La principal herramienta para procesar datos textuales es un <a href="main_classes/tokenizer">tokenizador</a>. Un tokenizador comienza dividiendo el texto en <em>tokens</em> según un conjunto de reglas. Los tokens se convierten en números, que se utilizan para construir tensores como entrada a un modelo. El tokenizador también añade cualquier entrada adicional que requiera el modelo.',Na,z,Xa,N,al="Comienza rápidamente cargando un tokenizador pre-entrenado con la clase <code>AutoTokenizer</code>. Esto descarga el <em>vocab</em> utilizado cuando un modelo es pre-entrenado.",La,X,Qa,L,el="Carga un tokenizador pre-entrenado con <code>AutoTokenizer.from_pretrained()</code>:",Aa,Q,Pa,A,nl="A continuación, pasa tu frase al tokenizador:",Sa,P,Da,S,ll="El tokenizador devuelve un diccionario con tres ítems importantes:",Ka,D,tl='<li><a href="glossary#input-ids">input_ids</a> son los índices correspondientes a cada token de la frase.</li> <li><a href="glossary#attention-mask">attention_mask</a> indica si un token debe ser atendido o no.</li> <li><a href="glossary#token-type-ids">token_type_ids</a> identifica a qué secuencia pertenece un token cuando hay más de una secuencia.</li>',Oa,K,pl="Tu puedes decodificar el <code>input_ids</code> para devolver la entrada original:",se,O,ae,ss,rl=`Como puedes ver, el tokenizador ha añadido dos tokens especiales - <code>CLS</code> y <code>SEP</code> (clasificador y separador) - a la frase. No todos los modelos necesitan | |
| tokens especiales, pero si lo llegas a necesitar, el tokenizador los añadirá automáticamente.`,ee,as,cl="Si hay varias frases que quieres preprocesar, pasa las frases como una lista al tokenizador:",ne,es,le,ns,te,ls,ul="Esto nos lleva a un tema importante. Cuando se procesa un batch de frases, no siempre tienen la misma longitud. Esto es un problema porque los tensores que se introducen en el modelo deben tener una forma uniforme. El pad es una estrategia para asegurar que los tensores sean rectangulares añadiendo un “padding token” especial a las oraciones con menos tokens.",pe,ts,ol="Establece el parámetro <code>padding</code> en <code>True</code> aplicando el pad a las secuencias más cortas del batch para que coincidan con la secuencia más larga:",re,ps,ce,rs,ml="Observa que el tokenizador ha aplicado el pad a la primera y la tercera frase con un “0” porque son más cortas.",ue,cs,oe,us,il="En el otro extremo del espectro, a veces una secuencia puede ser demasiado larga para un modelo. En este caso, tendrás que truncar la secuencia a una longitud más corta.",me,os,hl="Establece el parámetro <code>truncation</code> a <code>True</code> para truncar una secuencia a la longitud máxima aceptada por el modelo:",ie,ms,he,is,de,hs,dl="Finalmente, si quieres que el tokenizador devuelva los tensores reales que se introducen en el modelo.",je,ds,jl="Establece el parámetro <code>return_tensors</code> como <code>pt</code> para PyTorch, o <code>tf</code> para TensorFlow:",be,js,ge,bs,fe,gs,bl='Las entradas de audio se preprocesan de forma diferente a las entradas textuales, pero el objetivo final es el mismo: crear secuencias numéricas que el modelo pueda entender. Un <a href="main_classes/feature_extractor">extractor de características</a> (o feature extractor en inglés) está diseñado para extraer características de datos provenientes de imágenes o audio sin procesar y convertirlos en tensores. Antes de empezar, instala 🤗 Datasets para cargar un dataset de audio para experimentar:',Te,fs,ye,Ts,gl='Carga la tarea de detección de palabras clave del benchmark <a href="https://huggingface.co/datasets/superb" rel="nofollow">SUPERB</a> (consulta el <a href="https://huggingface.co/docs/datasets/load_hub" rel="nofollow">tutorial 🤗 Dataset</a> para que obtengas más detalles sobre cómo cargar un dataset):',Je,ys,Me,Js,fl="Accede al primer elemento de la columna <code>audio</code> para echar un vistazo a la entrada. Al llamar a la columna <code>audio</code> se cargará y volverá a muestrear automáticamente el archivo de audio:",$e,Ms,we,$s,Tl="Esto devuelve tres elementos:",_e,ws,yl="<li><code>array</code> es la señal de voz cargada - y potencialmente remuestreada - como un array 1D.</li> <li><code>path</code> apunta a la ubicación del archivo de audio.</li> <li><code>sampling_rate</code> se refiere a cuántos puntos de datos de la señal de voz se miden por segundo.</li>",ve,_s,xe,vs,Jl='Para este tutorial, se utilizará el modelo <a href="https://huggingface.co/facebook/wav2vec2-base" rel="nofollow">Wav2Vec2</a>. Como puedes ver en la model card, el modelo Wav2Vec2 está pre-entrenado en audio de voz muestreado a 16kHz. Es importante que la tasa de muestreo de tus datos de audio coincida con la tasa de muestreo del dataset utilizado para pre-entrenar el modelo. Si la tasa de muestreo de tus datos no es la misma, deberás volver a muestrear tus datos de audio.',ke,xs,Ml='Por ejemplo, carga el dataset <a href="https://huggingface.co/datasets/lj_speech" rel="nofollow">LJ Speech</a> que tiene una tasa de muestreo de 22050kHz. Para utilizar el modelo Wav2Vec2 con este dataset, reduce la tasa de muestreo a 16kHz:',Ue,ks,Ce,Us,$l='<li>Usa el método 🤗 Datasets’ <a href="https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.cast_column" rel="nofollow"><code>cast_column</code></a> para reducir la tasa de muestreo a 16kHz:</li>',Ie,Cs,Ge,Z,wl="<li>Carga el archivo de audio:</li>",Ve,Is,qe,Gs,_l="Como puedes ver, el <code>sampling_rate</code> se ha reducido a 16kHz. Ahora que sabes cómo funciona el resampling, volvamos a nuestro ejemplo anterior con el dataset SUPERB.",ze,Vs,Ze,qs,vl="El siguiente paso es cargar un extractor de características para normalizar y aplicar el pad a la entrada. Cuando se aplica padding a los datos textuales, se añade un “0” para las secuencias más cortas. La misma idea se aplica a los datos de audio y el extractor de características de audio añadirá un “0” - interpretado como silencio - al “array”.",Be,zs,xl="Carga el extractor de características con <code>AutoFeatureExtractor.from_pretrained()</code>:",Re,Zs,We,Bs,kl="Pasa el <code>array</code> de audio al extractor de características. También te recomendamos añadir el argumento <code>sampling_rate</code> en el extractor de características para poder depurar mejor los errores silenciosos que puedan producirse.",He,Rs,Ee,Ws,Ye,Hs,Ul="Al igual que el tokenizador, puedes aplicar padding o truncamiento para manejar secuencias variables en un batch. Fíjate en la longitud de la secuencia de estas dos muestras de audio:",Fe,Es,Ne,Ys,Cl="Como puedes ver, el <code>sampling_rate</code> se ha reducido a 16kHz.",Xe,Fs,Le,Ns,Il="Aplica la función a los primeros ejemplos del dataset:",Qe,Xs,Ae,Ls,Gl="Ahora echa un vistazo a las longitudes de las muestras procesadas:",Pe,Qs,Se,As,Vl="Las longitudes de las dos primeras muestras coinciden ahora con la longitud máxima especificada.",De,Ps,Ke,Ss,ql="También se utiliza un extractor de características para procesar imágenes para tareas de visión por computadora. Una vez más, el objetivo es convertir la imagen en bruto en un batch de tensores como entrada.",Oe,Ds,zl='Vamos a cargar el dataset <a href="https://huggingface.co/datasets/food101" rel="nofollow">food101</a> para este tutorial. Usa el parámetro 🤗 Datasets <code>split</code> para cargar solo una pequeña muestra de la división de entrenamiento ya que el dataset es bastante grande:',sn,Ks,an,Os,Zl='A continuación, observa la imagen con la función 🤗 Datasets <a href="https://huggingface.co/docs/datasets/package_reference/main_classes?highlight=image#datasets.Image" rel="nofollow"><code>Image</code></a>:',en,sa,nn,aa,Bl='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png" alt="vision-preprocess-tutorial.png"/>',ln,ea,tn,na,Rl="Carga el extractor de características con <code>AutoFeatureExtractor.from_pretrained()</code>:",pn,la,rn,ta,cn,pa,Wl='Para las tareas de visión por computadora es común añadir algún tipo de aumento de datos (o data augmentation) a las imágenes como parte del preprocesamiento. Puedes añadir el método de aumento de datos con cualquier librería que quieras, pero en este tutorial utilizarás el módulo <a href="https://pytorch.org/vision/stable/transforms.html" rel="nofollow"><code>transforms</code></a> de torchvision.',un,ra,Hl='<li>Normaliza la imagen y utiliza <a href="https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html" rel="nofollow"><code>Compose</code></a> para encadenar algunas transformaciones - <a href="https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html" rel="nofollow"><code>RandomResizedCrop</code></a> y <a href="https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html" rel="nofollow"><code>ColorJitter</code></a> - juntas:</li>',on,ca,mn,B,El='<li>El modelo acepta <a href="model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values"><code>pixel_values</code></a> como entrada. Este valor es generado por el extractor de características. Crea una función que genere <code>pixel_values</code> a partir de las transformaciones:</li>',hn,ua,dn,R,Yl='<li>A continuación, utiliza 🤗 Datasets <a href="https://huggingface.co/docs/datasets/process#format-transform" rel="nofollow"><code>set_transform</code></a> para aplicar las transformaciones sobre la marcha:</li>',jn,oa,bn,W,Fl="<li>Ahora, cuando accedes a la imagen, observarás que el extractor de características ha añadido a la entrada del modelo <code>pixel_values</code>:</li>",gn,ma,fn,ia,Nl="Este es el aspecto de la imagen después de preprocesarla. Como era de esperar por las transformaciones aplicadas, la imagen ha sido recortada aleatoriamente y sus propiedades de color son diferentes.",Tn,ha,yn,da,Xl='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png" alt="preprocessed_image"/>',Jn,ja,Mn,ba,Ll="Para las tareas multimodales utilizarás una combinación de todo lo que has aprendido hasta ahora y aplicarás tus habilidades a una tarea de reconocimiento automático de voz (ASR). Esto significa que necesitarás un:",$n,ga,Ql="<li>Extractor de características para preprocesar los datos de audio.</li> <li>Un tokenizador para procesar el texto.</li>",wn,fa,Al='Volvamos al dataset <a href="https://huggingface.co/datasets/lj_speech" rel="nofollow">LJ Speech</a>:',_n,Ta,vn,ya,Pl="Suponiendo que te interesan principalmente las columnas <code>audio</code> y <code>texto</code>, elimina las demás columnas:",xn,Ja,kn,Ma,Sl="Ahora echa un vistazo a las columnas <code>audio</code> y <code>texto</code>:",Un,$a,Cn,wa,Dl='Recuerda la sección anterior sobre el procesamiento de datos de audio, siempre debes <a href="preprocessing#audio">volver a muestrear</a> la tasa de muestreo de tus datos de audio para que coincida con la tasa de muestreo del dataset utilizado para preentrenar un modelo:',In,_a,Gn,va,Vn,xa,Kl="Un processor combina un extractor de características y un tokenizador. Cargue un procesador con <code>AutoProcessor.from_pretrained()</code>:",qn,ka,zn,Ua,Ol="<li>Crea una función para procesar los datos de audio en <code>input_values</code>, y tokeniza el texto en <code>labels</code>. Estas son las entradas del modelo:</li>",Zn,Ca,Bn,H,st="<li>Aplica la función <code>prepare_dataset</code> a una muestra:</li>",Rn,Ia,Wn,Ga,at="Observa que el método processor ha añadido <code>input_values</code> y <code>labels</code>. La tasa de muestreo también se ha reducido correctamente a 16kHz.",Hn,Va,et="Genial, ahora deberías ser capaz de preprocesar datos para cualquier modalidad e incluso combinar diferentes modalidades. En el siguiente tutorial, aprenderás aplicar fine tuning a un modelo en tus datos recién preprocesados.",En,qa,Yn,za,nt=`Hemos visto los comandos que funcionarán para la mayoría de los casos (hacer pad a tu batch teniendo en cuenta la longitud de la frase máxima y | |
| truncar a la longitud máxima que el modelo puede aceptar). Sin embargo, la API admite más estrategias si las necesitas. Los | |
| tres argumentos que necesitas conocer para ello son <code>padding</code>, <code>truncation</code> y <code>max_length</code>.`,Fn,Za,lt=`<li><p><code>padding</code> controla el aplicarme padding al texto. Puede ser un booleano o una cadena que debe ser:</p> <ul><li><code>True</code> o <code>'longest'</code> para aplicar el pad hasta la secuencia más larga del batch (no apliques el padding si sólo le proporcionas | |
| una sola secuencia).</li> <li><code>'max_length'</code> para aplicar el pad hasta la longitud especificada por el argumento <code>max_length</code> o la longitud máxima aceptada | |
| por el modelo si no le proporcionas <code>longitud_máxima</code> (<code>longitud_máxima=None</code>). Si sólo le proporcionas una única secuencia | |
| se le aplicará el padding. | |
| <code>False</code> o <code>'do_not_pad'</code> para no aplicar pad a las secuencias. Como hemos visto antes, este es el comportamiento por | |
| defecto.</li></ul></li> <li><p><code>truncation</code> controla el truncamiento. Puede ser un booleano o una string que debe ser:</p> <ul><li><code>True</code> o <code>'longest_first'</code> truncan hasta la longitud máxima especificada por el argumento <code>max_length</code> o | |
| la longitud máxima aceptada por el modelo si no le proporcionas <code>max_length</code> (<code>max_length=None</code>). Esto | |
| truncará token por token, eliminando un token de la secuencia más larga del par hasta alcanzar la longitud | |
| adecuada.</li> <li><code>'only_second'</code> trunca hasta la longitud máxima especificada por el argumento <code>max_length</code> o la | |
| longitud máxima aceptada por el modelo si no le proporcionas <code>max_length</code> (<code>max_length=None</code>). Esto sólo truncará | |
| la segunda frase de un par si le proporcionas un par de secuencias (o un batch de pares de secuencias).</li> <li><code>'only_first'</code> trunca hasta la longitud máxima especificada por el argumento <code>max_length</code> o la longitud máxima | |
| aceptada por el modelo si no se proporciona <code>max_length</code> (<code>max_length=None</code>). Esto sólo truncará | |
| la primera frase de un par si se proporciona un par de secuencias (o un lote de pares de secuencias).</li> <li><code>False</code> o <code>'do_not_truncate'</code> para no truncar las secuencias. Como hemos visto antes, este es el comportamiento | |
| por defecto.</li></ul></li> <li><p><code>max_length</code> para controlar la longitud del padding/truncamiento. Puede ser un número entero o <code>None</code>, en cuyo caso | |
| será por defecto la longitud máxima que el modelo puede aceptar. Si el modelo no tiene una longitud máxima de entrada específica, el | |
| padding/truncamiento a <code>longitud_máxima</code> se desactiva.</p></li>`,Nn,Ba,tt=`A continuación te mostramos en una tabla que resume la forma recomendada de configurar el padding y el truncamiento. Si utilizas un par de secuencias de entrada en | |
| algunos de los siguientes ejemplos, puedes sustituir <code>truncation=True</code> por una <code>STRATEGY</code> seleccionada en | |
| <code>['only_first', 'only_second', 'longest_first']</code>, es decir, <code>truncation='only_second'</code> o <code>truncation= 'longest_first'</code> para controlar cómo se truncan ambas secuencias del par como se ha detallado anteriormente.`,Xn,Ra,pt="<thead><tr><th>Truncation</th> <th>Padding</th> <th>Instrucciones</th></tr></thead> <tbody><tr><td>no truncation</td> <td>no padding</td> <td><code>tokenizer(batch_sentences)</code></td></tr> <tr><td></td> <td>padding secuencia max del batch</td> <td><code>tokenizer(batch_sentences, padding=True)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, padding='longest')</code></td></tr> <tr><td></td> <td>padding long max de input model</td> <td><code>tokenizer(batch_sentences, padding='max_length')</code></td></tr> <tr><td></td> <td>padding a una long especifica</td> <td><code>tokenizer(batch_sentences, padding='max_length', max_length=42)</code></td></tr> <tr><td>truncation long max del input model</td> <td>no padding</td> <td><code>tokenizer(batch_sentences, truncation=True)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, truncation=STRATEGY)</code></td></tr> <tr><td></td> <td>padding secuencia max del batch</td> <td><code>tokenizer(batch_sentences, padding=True, truncation=True)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, padding=True, truncation=STRATEGY)</code></td></tr> <tr><td></td> <td>padding long max de input model</td> <td><code>tokenizer(batch_sentences, padding='max_length', truncation=True)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY)</code></td></tr> <tr><td></td> <td>padding a una long especifica</td> <td>Not possible</td></tr> <tr><td>truncation a una long especifica</td> <td>no padding</td> <td><code>tokenizer(batch_sentences, truncation=True, max_length=42)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, truncation=STRATEGY, max_length=42)</code></td></tr> <tr><td></td> <td>padding secuencia max del batch</td> <td><code>tokenizer(batch_sentences, padding=True, truncation=True, max_length=42)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, padding=True, truncation=STRATEGY, max_length=42)</code></td></tr> <tr><td></td> <td>padding long max de input model</td> <td>Not possible</td></tr> <tr><td></td> <td>padding a una long especifica</td> <td><code>tokenizer(batch_sentences, padding='max_length', truncation=True, max_length=42)</code> or</td></tr> <tr><td></td> <td></td> <td><code>tokenizer(batch_sentences, padding='max_length', truncation=STRATEGY, max_length=42)</code></td></tr></tbody>",Ln,Wa,Qn,Ea,An;return i=new I({props:{title:"Preprocesamiento",local:"preprocesamiento",headingTag:"h1"}}),f=new Vt({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/es/preprocessing.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/es/pytorch/preprocessing.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/es/tensorflow/preprocessing.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/es/preprocessing.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/es/pytorch/preprocessing.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/es/tensorflow/preprocessing.ipynb"}]}}),U=new I({props:{title:"NLP",local:"nlp",headingTag:"h2"}}),Y=new Ct({props:{id:"Yffk5aydLzg"}}),z=new Ut({props:{$$slots:{default:[Yt]},$$scope:{ctx:$}}}),X=new I({props:{title:"Tokenizar",local:"tokenizar",headingTag:"h3"}}),Q=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtYmVydCUyRmJlcnQtYmFzZS1jYXNlZCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"google-bert/bert-base-cased"</span>)`,wrap:!1}}),P=new k({props:{code:"ZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplciglMjJEbyUyMG5vdCUyMG1lZGRsZSUyMGluJTIwdGhlJTIwYWZmYWlycyUyMG9mJTIwd2l6YXJkcyUyQyUyMGZvciUyMHRoZXklMjBhcmUlMjBzdWJ0bGUlMjBhbmQlMjBxdWljayUyMHRvJTIwYW5nZXIuJTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>encoded_input = tokenizer(<span class="hljs-string">"Do not meddle in the affairs of wizards, for they are subtle and quick to anger."</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: [<span class="hljs-number">101</span>, <span class="hljs-number">2079</span>, <span class="hljs-number">2025</span>, <span class="hljs-number">19960</span>, <span class="hljs-number">10362</span>, <span class="hljs-number">1999</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">3821</span>, <span class="hljs-number">1997</span>, <span class="hljs-number">16657</span>, <span class="hljs-number">1010</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">2027</span>, <span class="hljs-number">2024</span>, <span class="hljs-number">11259</span>, <span class="hljs-number">1998</span>, <span class="hljs-number">4248</span>, <span class="hljs-number">2000</span>, <span class="hljs-number">4963</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>], | |
| <span class="hljs-string">'token_type_ids'</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| <span class="hljs-string">'attention_mask'</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]}`,wrap:!1}}),O=new k({props:{code:"dG9rZW5pemVyLmRlY29kZShlbmNvZGVkX2lucHV0JTVCJTIyaW5wdXRfaWRzJTIyJTVEKQ==",highlighted:`<span class="hljs-meta">>>> </span>tokenizer.decode(encoded_input[<span class="hljs-string">"input_ids"</span>]) | |
| <span class="hljs-string">'[CLS] Do not meddle in the affairs of wizards, for they are subtle and quick to anger. [SEP]'</span>`,wrap:!1}}),es=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoYmF0Y2hfc2VudGVuY2VzKSUwQXByaW50KGVuY29kZWRfaW5wdXRzKQ==",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_inputs = tokenizer(batch_sentences) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_inputs) | |
| {<span class="hljs-string">'input_ids'</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>]], | |
| <span class="hljs-string">'token_type_ids'</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'attention_mask'</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]}`,wrap:!1}}),ns=new I({props:{title:"Pad",local:"pad",headingTag:"h3"}}),ps=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSklMEFwcmludChlbmNvZGVkX2lucHV0KQ==",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'token_type_ids'</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'attention_mask'</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}`,wrap:!1}}),cs=new I({props:{title:"Truncamiento",local:"truncamiento",headingTag:"h3"}}),ms=new k({props:{code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaF9zZW50ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>], | |
| [<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'token_type_ids'</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], | |
| <span class="hljs-string">'attention_mask'</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}`,wrap:!1}}),is=new I({props:{title:"Construye tensores",local:"construye-tensores",headingTag:"h3"}}),js=new Et({props:{group1:{id:"pt",code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaCUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: tensor([[ <span class="hljs-number">101</span>, <span class="hljs-number">153</span>, <span class="hljs-number">7719</span>, <span class="hljs-number">21490</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">1114</span>, <span class="hljs-number">9582</span>, <span class="hljs-number">1623</span>, <span class="hljs-number">102</span>], | |
| [ <span class="hljs-number">101</span>, <span class="hljs-number">5226</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">9649</span>, <span class="hljs-number">1199</span>, <span class="hljs-number">2610</span>, <span class="hljs-number">1236</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>]]), | |
| <span class="hljs-string">'token_type_ids'</span>: tensor([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]), | |
| <span class="hljs-string">'attention_mask'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>]])}`},group2:{id:"tf",code:"YmF0Y2hfc2VudGVuY2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIyQnV0JTIwd2hhdCUyMGFib3V0JTIwc2Vjb25kJTIwYnJlYWtmYXN0JTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIyRG9uJ3QlMjB0aGluayUyMGhlJTIwa25vd3MlMjBhYm91dCUyMHNlY29uZCUyMGJyZWFrZmFzdCUyQyUyMFBpcC4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJXaGF0JTIwYWJvdXQlMjBlbGV2ZW5zaWVzJTNGJTIyJTJDJTBBJTVEJTBBZW5jb2RlZF9pbnB1dCUyMCUzRCUyMHRva2VuaXplcihiYXRjaCUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnRmJTIyKSUwQXByaW50KGVuY29kZWRfaW5wdXQp",highlighted:`<span class="hljs-meta">>>> </span>batch_sentences = [ | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"But what about second breakfast?"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"Don't think he knows about second breakfast, Pip."</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"What about elevensies?"</span>, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer(batch, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">print</span>(encoded_input) | |
| {<span class="hljs-string">'input_ids'</span>: <tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy= | |
| array([[ <span class="hljs-number">101</span>, <span class="hljs-number">153</span>, <span class="hljs-number">7719</span>, <span class="hljs-number">21490</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">1114</span>, <span class="hljs-number">9582</span>, <span class="hljs-number">1623</span>, <span class="hljs-number">102</span>], | |
| [ <span class="hljs-number">101</span>, <span class="hljs-number">5226</span>, <span class="hljs-number">1122</span>, <span class="hljs-number">9649</span>, <span class="hljs-number">1199</span>, <span class="hljs-number">2610</span>, <span class="hljs-number">1236</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>]], | |
| dtype=int32)>, | |
| <span class="hljs-string">'token_type_ids'</span>: <tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy= | |
| array([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]], dtype=int32)>, | |
| <span class="hljs-string">'attention_mask'</span>: <tf.Tensor: shape=(<span class="hljs-number">2</span>, <span class="hljs-number">9</span>), dtype=int32, numpy= | |
| array([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>]], dtype=int32)>}`},wrap:!1}}),bs=new I({props:{title:"Audio",local:"audio",headingTag:"h2"}}),fs=new k({props:{code:"cGlwJTIwaW5zdGFsbCUyMGRhdGFzZXRz",highlighted:"pip install datasets",wrap:!1}}),ys=new k({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTJDJTIwQXVkaW8lMEElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMnN1cGVyYiUyMiUyQyUyMCUyMmtzJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, Audio | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"superb"</span>, <span class="hljs-string">"ks"</span>)`,wrap:!1}}),Ms=new k({props:{code:"ZGF0YXNldCU1QiUyMnRyYWluJTIyJTVEJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([ <span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> , ..., -<span class="hljs-number">0.00592041</span>, | |
| -<span class="hljs-number">0.00405884</span>, -<span class="hljs-number">0.00253296</span>], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/05734a36d88019a09725c20cc024e1c4e7982e37d7d55c0c1ca1742ea1cdd47f/_background_noise_/doing_the_dishes.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">16000</span>}`,wrap:!1}}),_s=new I({props:{title:"Resample",local:"resample",headingTag:"h3"}}),ks=new k({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmxqX3NwZWVjaCUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIpJTBBbGpfc3BlZWNoJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span>lj_speech = load_dataset(<span class="hljs-string">"lj_speech"</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([-<span class="hljs-number">7.3242188e-04</span>, -<span class="hljs-number">7.6293945e-04</span>, -<span class="hljs-number">6.4086914e-04</span>, ..., | |
| <span class="hljs-number">7.3242188e-04</span>, <span class="hljs-number">2.1362305e-04</span>, <span class="hljs-number">6.1035156e-05</span>], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">22050</span>}`,wrap:!1}}),Cs=new k({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLmNhc3RfY29sdW1uKCUyMmF1ZGlvJTIyJTJDJTIwQXVkaW8oc2FtcGxpbmdfcmF0ZSUzRDE2XzAwMCkp",highlighted:'<span class="hljs-meta">>>> </span>lj_speech = lj_speech.cast_column(<span class="hljs-string">"audio"</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))',wrap:!1}}),Is=new k({props:{code:"bGpfc3BlZWNoJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([-<span class="hljs-number">0.00064146</span>, -<span class="hljs-number">0.00074657</span>, -<span class="hljs-number">0.00068768</span>, ..., <span class="hljs-number">0.00068341</span>, | |
| <span class="hljs-number">0.00014045</span>, <span class="hljs-number">0.</span> ], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">16000</span>}`,wrap:!1}}),Vs=new I({props:{title:"Extractor de características",local:"extractor-de-características",headingTag:"h3"}}),Zs=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yJTBBJTBBZmVhdHVyZV9leHRyYWN0b3IlMjAlM0QlMjBBdXRvRmVhdHVyZUV4dHJhY3Rvci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZ3YXYydmVjMi1iYXNlJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"facebook/wav2vec2-base"</span>)`,wrap:!1}}),Rs=new k({props:{code:"YXVkaW9faW5wdXQlMjAlM0QlMjAlNUJkYXRhc2V0JTVCJTIydHJhaW4lMjIlNUQlNUIwJTVEJTVCJTIyYXVkaW8lMjIlNUQlNUIlMjJhcnJheSUyMiU1RCU1RCUwQWZlYXR1cmVfZXh0cmFjdG9yKGF1ZGlvX2lucHV0JTJDJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwKQ==",highlighted:`<span class="hljs-meta">>>> </span>audio_input = [dataset[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>]] | |
| <span class="hljs-meta">>>> </span>feature_extractor(audio_input, sampling_rate=<span class="hljs-number">16000</span>) | |
| {<span class="hljs-string">'input_values'</span>: [array([ <span class="hljs-number">0.00045439</span>, <span class="hljs-number">0.00045439</span>, <span class="hljs-number">0.00045439</span>, ..., -<span class="hljs-number">0.1578519</span> , -<span class="hljs-number">0.10807519</span>, -<span class="hljs-number">0.06727459</span>], dtype=float32)]}`,wrap:!1}}),Ws=new I({props:{title:"Pad y truncamiento",local:"pad-y-truncamiento",headingTag:"h3"}}),Es=new k({props:{code:"ZGF0YXNldCU1QiUyMnRyYWluJTIyJTVEJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQuc2hhcGUlMEElMEFkYXRhc2V0JTVCJTIydHJhaW4lMjIlNUQlNUIxJTVEJTVCJTIyYXVkaW8lMjIlNUQlNUIlMjJhcnJheSUyMiU1RC5zaGFwZQ==",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>].shape | |
| (<span class="hljs-number">1522930</span>,) | |
| <span class="hljs-meta">>>> </span>dataset[<span class="hljs-string">"train"</span>][<span class="hljs-number">1</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>].shape | |
| (<span class="hljs-number">988891</span>,)`,wrap:!1}}),Fs=new k({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBhdWRpb19hcnJheXMlMjAlM0QlMjAlNUJ4JTVCJTIyYXJyYXklMjIlNUQlMjBmb3IlMjB4JTIwaW4lMjBleGFtcGxlcyU1QiUyMmF1ZGlvJTIyJTVEJTVEJTBBJTIwJTIwJTIwJTIwaW5wdXRzJTIwJTNEJTIwZmVhdHVyZV9leHRyYWN0b3IoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXVkaW9fYXJyYXlzJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcGFkZGluZyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfbGVuZ3RoJTNEMTAwMDAwMCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGlucHV0cw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> audio_arrays = [x[<span class="hljs-string">"array"</span>] <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> examples[<span class="hljs-string">"audio"</span>]] | |
| <span class="hljs-meta">... </span> inputs = feature_extractor( | |
| <span class="hljs-meta">... </span> audio_arrays, | |
| <span class="hljs-meta">... </span> sampling_rate=<span class="hljs-number">16000</span>, | |
| <span class="hljs-meta">... </span> padding=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> max_length=<span class="hljs-number">1000000</span>, | |
| <span class="hljs-meta">... </span> truncation=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> ) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> inputs`,wrap:!1}}),Xs=new k({props:{code:"cHJvY2Vzc2VkX2RhdGFzZXQlMjAlM0QlMjBwcmVwcm9jZXNzX2Z1bmN0aW9uKGRhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCU1QiUzQTUlNUQp",highlighted:'<span class="hljs-meta">>>> </span>processed_dataset = preprocess_function(dataset[<span class="hljs-string">"train"</span>][:<span class="hljs-number">5</span>])',wrap:!1}}),Qs=new k({props:{code:"cHJvY2Vzc2VkX2RhdGFzZXQlNUIlMjJpbnB1dF92YWx1ZXMlMjIlNUQlNUIwJTVELnNoYXBlJTBBJTBBcHJvY2Vzc2VkX2RhdGFzZXQlNUIlMjJpbnB1dF92YWx1ZXMlMjIlNUQlNUIxJTVELnNoYXBl",highlighted:`<span class="hljs-meta">>>> </span>processed_dataset[<span class="hljs-string">"input_values"</span>][<span class="hljs-number">0</span>].shape | |
| (<span class="hljs-number">1000000</span>,) | |
| <span class="hljs-meta">>>> </span>processed_dataset[<span class="hljs-string">"input_values"</span>][<span class="hljs-number">1</span>].shape | |
| (<span class="hljs-number">1000000</span>,)`,wrap:!1}}),Ps=new I({props:{title:"Visión",local:"visión",headingTag:"h2"}}),Ks=new k({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJmb29kMTAxJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiU1QiUzQTEwMCU1RCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"food101"</span>, split=<span class="hljs-string">"train[:100]"</span>)`,wrap:!1}}),sa=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJpbWFnZSUyMiU1RA==",highlighted:'<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"image"</span>]',wrap:!1}}),ea=new I({props:{title:"Extractor de características",local:"extractor-de-características",headingTag:"h3"}}),la=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9GZWF0dXJlRXh0cmFjdG9yJTBBJTBBZmVhdHVyZV9leHRyYWN0b3IlMjAlM0QlMjBBdXRvRmVhdHVyZUV4dHJhY3Rvci5mcm9tX3ByZXRyYWluZWQoJTIyZ29vZ2xlJTJGdml0LWJhc2UtcGF0Y2gxNi0yMjQlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor | |
| <span class="hljs-meta">>>> </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">"google/vit-base-patch16-224"</span>)`,wrap:!1}}),ta=new I({props:{title:"Aumento de Datos",local:"aumento-de-datos",headingTag:"h3"}}),ca=new k({props:{code:"ZnJvbSUyMHRvcmNodmlzaW9uLnRyYW5zZm9ybXMlMjBpbXBvcnQlMjBDb21wb3NlJTJDJTIwTm9ybWFsaXplJTJDJTIwUmFuZG9tUmVzaXplZENyb3AlMkMlMjBDb2xvckppdHRlciUyQyUyMFRvVGVuc29yJTBBJTBBbm9ybWFsaXplJTIwJTNEJTIwTm9ybWFsaXplKG1lYW4lM0RmZWF0dXJlX2V4dHJhY3Rvci5pbWFnZV9tZWFuJTJDJTIwc3RkJTNEZmVhdHVyZV9leHRyYWN0b3IuaW1hZ2Vfc3RkKSUwQV90cmFuc2Zvcm1zJTIwJTNEJTIwQ29tcG9zZSglMEElMjAlMjAlMjAlMjAlNUJSYW5kb21SZXNpemVkQ3JvcChmZWF0dXJlX2V4dHJhY3Rvci5zaXplKSUyQyUyMENvbG9ySml0dGVyKGJyaWdodG5lc3MlM0QwLjUlMkMlMjBodWUlM0QwLjUpJTJDJTIwVG9UZW5zb3IoKSUyQyUyMG5vcm1hbGl6ZSU1RCUwQSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> Compose, Normalize, RandomResizedCrop, ColorJitter, ToTensor | |
| <span class="hljs-meta">>>> </span>normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) | |
| <span class="hljs-meta">>>> </span>_transforms = Compose( | |
| <span class="hljs-meta">... </span> [RandomResizedCrop(feature_extractor.size), ColorJitter(brightness=<span class="hljs-number">0.5</span>, hue=<span class="hljs-number">0.5</span>), ToTensor(), normalize] | |
| <span class="hljs-meta">... </span>)`,wrap:!1}}),ua=new k({props:{code:"ZGVmJTIwdHJhbnNmb3JtcyhleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMnBpeGVsX3ZhbHVlcyUyMiU1RCUyMCUzRCUyMCU1Ql90cmFuc2Zvcm1zKGltYWdlLmNvbnZlcnQoJTIyUkdCJTIyKSklMjBmb3IlMjBpbWFnZSUyMGluJTIwZXhhbXBsZXMlNUIlMjJpbWFnZSUyMiU1RCU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGV4YW1wbGVz",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> examples[<span class="hljs-string">"pixel_values"</span>] = [_transforms(image.convert(<span class="hljs-string">"RGB"</span>)) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">"image"</span>]] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples`,wrap:!1}}),oa=new k({props:{code:"ZGF0YXNldC5zZXRfdHJhbnNmb3JtKHRyYW5zZm9ybXMp",highlighted:'<span class="hljs-meta">>>> </span>dataset.set_transform(transforms)',wrap:!1}}),ma=new k({props:{code:"ZGF0YXNldCU1QjAlNUQlNUIlMjJpbWFnZSUyMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"image"</span>] | |
| {<span class="hljs-string">'image'</span>: <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=384x512 at <span class="hljs-number">0x7F1A7B0630D0</span>>, | |
| <span class="hljs-string">'label'</span>: <span class="hljs-number">6</span>, | |
| <span class="hljs-string">'pixel_values'</span>: tensor([[[ <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0745</span>, <span class="hljs-number">0.1216</span>, ..., -<span class="hljs-number">0.9922</span>, -<span class="hljs-number">0.9922</span>, -<span class="hljs-number">0.9922</span>], | |
| [-<span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0667</span>, <span class="hljs-number">0.1294</span>, ..., -<span class="hljs-number">0.9765</span>, -<span class="hljs-number">0.9843</span>, -<span class="hljs-number">0.9922</span>], | |
| [ <span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0824</span>, <span class="hljs-number">0.1137</span>, ..., -<span class="hljs-number">0.9765</span>, -<span class="hljs-number">0.9686</span>, -<span class="hljs-number">0.8667</span>], | |
| ..., | |
| [ <span class="hljs-number">0.0275</span>, <span class="hljs-number">0.0745</span>, <span class="hljs-number">0.0510</span>, ..., -<span class="hljs-number">0.1137</span>, -<span class="hljs-number">0.1216</span>, -<span class="hljs-number">0.0824</span>], | |
| [ <span class="hljs-number">0.0667</span>, <span class="hljs-number">0.0824</span>, <span class="hljs-number">0.0667</span>, ..., -<span class="hljs-number">0.0588</span>, -<span class="hljs-number">0.0745</span>, -<span class="hljs-number">0.0980</span>], | |
| [ <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0431</span>, ..., -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0588</span>]], | |
| [[ <span class="hljs-number">0.2078</span>, <span class="hljs-number">0.2471</span>, <span class="hljs-number">0.2863</span>, ..., -<span class="hljs-number">0.9451</span>, -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.9451</span>], | |
| [ <span class="hljs-number">0.1608</span>, <span class="hljs-number">0.2471</span>, <span class="hljs-number">0.3098</span>, ..., -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.9451</span>, -<span class="hljs-number">0.9373</span>], | |
| [ <span class="hljs-number">0.2078</span>, <span class="hljs-number">0.2706</span>, <span class="hljs-number">0.3020</span>, ..., -<span class="hljs-number">0.9608</span>, -<span class="hljs-number">0.9373</span>, -<span class="hljs-number">0.8275</span>], | |
| ..., | |
| [-<span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0118</span>, -<span class="hljs-number">0.0039</span>, ..., -<span class="hljs-number">0.2392</span>, -<span class="hljs-number">0.2471</span>, -<span class="hljs-number">0.2078</span>], | |
| [ <span class="hljs-number">0.0196</span>, <span class="hljs-number">0.0353</span>, <span class="hljs-number">0.0196</span>, ..., -<span class="hljs-number">0.1843</span>, -<span class="hljs-number">0.2000</span>, -<span class="hljs-number">0.2235</span>], | |
| [-<span class="hljs-number">0.0118</span>, -<span class="hljs-number">0.0039</span>, -<span class="hljs-number">0.0039</span>, ..., -<span class="hljs-number">0.0980</span>, -<span class="hljs-number">0.0980</span>, -<span class="hljs-number">0.1529</span>]], | |
| [[ <span class="hljs-number">0.3961</span>, <span class="hljs-number">0.4431</span>, <span class="hljs-number">0.4980</span>, ..., -<span class="hljs-number">0.9216</span>, -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.9216</span>], | |
| [ <span class="hljs-number">0.3569</span>, <span class="hljs-number">0.4510</span>, <span class="hljs-number">0.5216</span>, ..., -<span class="hljs-number">0.9059</span>, -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.9137</span>], | |
| [ <span class="hljs-number">0.4118</span>, <span class="hljs-number">0.4745</span>, <span class="hljs-number">0.5216</span>, ..., -<span class="hljs-number">0.9137</span>, -<span class="hljs-number">0.8902</span>, -<span class="hljs-number">0.7804</span>], | |
| ..., | |
| [-<span class="hljs-number">0.2314</span>, -<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.2078</span>, ..., -<span class="hljs-number">0.4196</span>, -<span class="hljs-number">0.4275</span>, -<span class="hljs-number">0.3882</span>], | |
| [-<span class="hljs-number">0.1843</span>, -<span class="hljs-number">0.1686</span>, -<span class="hljs-number">0.2000</span>, ..., -<span class="hljs-number">0.3647</span>, -<span class="hljs-number">0.3804</span>, -<span class="hljs-number">0.4039</span>], | |
| [-<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.1922</span>, -<span class="hljs-number">0.1922</span>, ..., -<span class="hljs-number">0.2941</span>, -<span class="hljs-number">0.2863</span>, -<span class="hljs-number">0.3412</span>]]])}`,wrap:!1}}),ha=new k({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBaW1wb3J0JTIwbWF0cGxvdGxpYi5weXBsb3QlMjBhcyUyMHBsdCUwQSUwQWltZyUyMCUzRCUyMGRhdGFzZXQlNUIwJTVEJTVCJTIycGl4ZWxfdmFsdWVzJTIyJTVEJTBBcGx0Lmltc2hvdyhpbWcucGVybXV0ZSgxJTJDJTIwMiUyQyUyMDApKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt | |
| <span class="hljs-meta">>>> </span>img = dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"pixel_values"</span>] | |
| <span class="hljs-meta">>>> </span>plt.imshow(img.permute(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">0</span>))`,wrap:!1}}),ja=new I({props:{title:"Multimodal",local:"multimodal",headingTag:"h2"}}),Ta=new k({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBbGpfc3BlZWNoJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmxqX3NwZWVjaCUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>lj_speech = load_dataset(<span class="hljs-string">"lj_speech"</span>, split=<span class="hljs-string">"train"</span>)`,wrap:!1}}),Ja=new k({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLm1hcChyZW1vdmVfY29sdW1ucyUzRCU1QiUyMmZpbGUlMjIlMkMlMjAlMjJpZCUyMiUyQyUyMCUyMm5vcm1hbGl6ZWRfdGV4dCUyMiU1RCk=",highlighted:'<span class="hljs-meta">>>> </span>lj_speech = lj_speech.<span class="hljs-built_in">map</span>(remove_columns=[<span class="hljs-string">"file"</span>, <span class="hljs-string">"id"</span>, <span class="hljs-string">"normalized_text"</span>])',wrap:!1}}),$a=new k({props:{code:"bGpfc3BlZWNoJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTBBJTBBbGpfc3BlZWNoJTVCMCU1RCU1QiUyMnRleHQlMjIlNUQ=",highlighted:`<span class="hljs-meta">>>> </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>] | |
| {<span class="hljs-string">'array'</span>: array([-<span class="hljs-number">7.3242188e-04</span>, -<span class="hljs-number">7.6293945e-04</span>, -<span class="hljs-number">6.4086914e-04</span>, ..., | |
| <span class="hljs-number">7.3242188e-04</span>, <span class="hljs-number">2.1362305e-04</span>, <span class="hljs-number">6.1035156e-05</span>], dtype=float32), | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav'</span>, | |
| <span class="hljs-string">'sampling_rate'</span>: <span class="hljs-number">22050</span>} | |
| <span class="hljs-meta">>>> </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">"text"</span>] | |
| <span class="hljs-string">'Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition'</span>`,wrap:!1}}),_a=new k({props:{code:"bGpfc3BlZWNoJTIwJTNEJTIwbGpfc3BlZWNoLmNhc3RfY29sdW1uKCUyMmF1ZGlvJTIyJTJDJTIwQXVkaW8oc2FtcGxpbmdfcmF0ZSUzRDE2XzAwMCkp",highlighted:'<span class="hljs-meta">>>> </span>lj_speech = lj_speech.cast_column(<span class="hljs-string">"audio"</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))',wrap:!1}}),va=new I({props:{title:"Processor",local:"processor",headingTag:"h3"}}),ka=new k({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Qcm9jZXNzb3IlMEElMEFwcm9jZXNzb3IlMjAlM0QlMjBBdXRvUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMjJmYWNlYm9vayUyRndhdjJ2ZWMyLWJhc2UtOTYwaCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor | |
| <span class="hljs-meta">>>> </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">"facebook/wav2vec2-base-960h"</span>)`,wrap:!1}}),Ca=new k({props:{code:"ZGVmJTIwcHJlcGFyZV9kYXRhc2V0KGV4YW1wbGUpJTNBJTBBJTIwJTIwJTIwJTIwYXVkaW8lMjAlM0QlMjBleGFtcGxlJTVCJTIyYXVkaW8lMjIlNUQlMEElMEElMjAlMjAlMjAlMjBleGFtcGxlLnVwZGF0ZShwcm9jZXNzb3IoYXVkaW8lM0RhdWRpbyU1QiUyMmFycmF5JTIyJTVEJTJDJTIwdGV4dCUzRGV4YW1wbGUlNUIlMjJ0ZXh0JTIyJTVEJTJDJTIwc2FtcGxpbmdfcmF0ZSUzRDE2MDAwKSklMEElMEElMjAlMjAlMjAlMjByZXR1cm4lMjBleGFtcGxl",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">prepare_dataset</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-meta">... </span> audio = example[<span class="hljs-string">"audio"</span>] | |
| <span class="hljs-meta">... </span> example.update(processor(audio=audio[<span class="hljs-string">"array"</span>], text=example[<span class="hljs-string">"text"</span>], sampling_rate=<span class="hljs-number">16000</span>)) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> example`,wrap:!1}}),Ia=new k({props:{code:"cHJlcGFyZV9kYXRhc2V0KGxqX3NwZWVjaCU1QjAlNUQp",highlighted:'<span class="hljs-meta">>>> </span>prepare_dataset(lj_speech[<span class="hljs-number">0</span>])',wrap:!1}}),qa=new I({props:{title:"Todo lo que siempre quisiste saber sobre el padding y el truncamiento",local:"todo-lo-que-siempre-quisiste-saber-sobre-el-padding-y-el-truncamiento",headingTag:"h2"}}),Wa=new qt({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/es/preprocessing.md"}}),{c(){p=c("meta"),o=l(),d=c("p"),b=l(),T(i.$$.fragment),x=l(),T(f.$$.fragment),_=l(),g=c("p"),g.textContent=r,w=l(),v=c("ul"),v.innerHTML=V,G=l(),T(U.$$.fragment),E=l(),T(Y.$$.fragment),Fa=l(),F=c("p"),F.innerHTML=sl,Na=l(),T(z.$$.fragment),Xa=l(),N=c("p"),N.innerHTML=al,La=l(),T(X.$$.fragment),Qa=l(),L=c("p"),L.innerHTML=el,Aa=l(),T(Q.$$.fragment),Pa=l(),A=c("p"),A.textContent=nl,Sa=l(),T(P.$$.fragment),Da=l(),S=c("p"),S.textContent=ll,Ka=l(),D=c("ul"),D.innerHTML=tl,Oa=l(),K=c("p"),K.innerHTML=pl,se=l(),T(O.$$.fragment),ae=l(),ss=c("p"),ss.innerHTML=rl,ee=l(),as=c("p"),as.textContent=cl,ne=l(),T(es.$$.fragment),le=l(),T(ns.$$.fragment),te=l(),ls=c("p"),ls.textContent=ul,pe=l(),ts=c("p"),ts.innerHTML=ol,re=l(),T(ps.$$.fragment),ce=l(),rs=c("p"),rs.textContent=ml,ue=l(),T(cs.$$.fragment),oe=l(),us=c("p"),us.textContent=il,me=l(),os=c("p"),os.innerHTML=hl,ie=l(),T(ms.$$.fragment),he=l(),T(is.$$.fragment),de=l(),hs=c("p"),hs.textContent=dl,je=l(),ds=c("p"),ds.innerHTML=jl,be=l(),T(js.$$.fragment),ge=l(),T(bs.$$.fragment),fe=l(),gs=c("p"),gs.innerHTML=bl,Te=l(),T(fs.$$.fragment),ye=l(),Ts=c("p"),Ts.innerHTML=gl,Je=l(),T(ys.$$.fragment),Me=l(),Js=c("p"),Js.innerHTML=fl,$e=l(),T(Ms.$$.fragment),we=l(),$s=c("p"),$s.textContent=Tl,_e=l(),ws=c("ul"),ws.innerHTML=yl,ve=l(),T(_s.$$.fragment),xe=l(),vs=c("p"),vs.innerHTML=Jl,ke=l(),xs=c("p"),xs.innerHTML=Ml,Ue=l(),T(ks.$$.fragment),Ce=l(),Us=c("ol"),Us.innerHTML=$l,Ie=l(),T(Cs.$$.fragment),Ge=l(),Z=c("ol"),Z.innerHTML=wl,Ve=l(),T(Is.$$.fragment),qe=l(),Gs=c("p"),Gs.innerHTML=_l,ze=l(),T(Vs.$$.fragment),Ze=l(),qs=c("p"),qs.textContent=vl,Be=l(),zs=c("p"),zs.innerHTML=xl,Re=l(),T(Zs.$$.fragment),We=l(),Bs=c("p"),Bs.innerHTML=kl,He=l(),T(Rs.$$.fragment),Ee=l(),T(Ws.$$.fragment),Ye=l(),Hs=c("p"),Hs.textContent=Ul,Fe=l(),T(Es.$$.fragment),Ne=l(),Ys=c("p"),Ys.innerHTML=Cl,Xe=l(),T(Fs.$$.fragment),Le=l(),Ns=c("p"),Ns.textContent=Il,Qe=l(),T(Xs.$$.fragment),Ae=l(),Ls=c("p"),Ls.textContent=Gl,Pe=l(),T(Qs.$$.fragment),Se=l(),As=c("p"),As.textContent=Vl,De=l(),T(Ps.$$.fragment),Ke=l(),Ss=c("p"),Ss.textContent=ql,Oe=l(),Ds=c("p"),Ds.innerHTML=zl,sn=l(),T(Ks.$$.fragment),an=l(),Os=c("p"),Os.innerHTML=Zl,en=l(),T(sa.$$.fragment),nn=l(),aa=c("p"),aa.innerHTML=Bl,ln=l(),T(ea.$$.fragment),tn=l(),na=c("p"),na.innerHTML=Rl,pn=l(),T(la.$$.fragment),rn=l(),T(ta.$$.fragment),cn=l(),pa=c("p"),pa.innerHTML=Wl,un=l(),ra=c("ol"),ra.innerHTML=Hl,on=l(),T(ca.$$.fragment),mn=l(),B=c("ol"),B.innerHTML=El,hn=l(),T(ua.$$.fragment),dn=l(),R=c("ol"),R.innerHTML=Yl,jn=l(),T(oa.$$.fragment),bn=l(),W=c("ol"),W.innerHTML=Fl,gn=l(),T(ma.$$.fragment),fn=l(),ia=c("p"),ia.textContent=Nl,Tn=l(),T(ha.$$.fragment),yn=l(),da=c("p"),da.innerHTML=Xl,Jn=l(),T(ja.$$.fragment),Mn=l(),ba=c("p"),ba.textContent=Ll,$n=l(),ga=c("ul"),ga.innerHTML=Ql,wn=l(),fa=c("p"),fa.innerHTML=Al,_n=l(),T(Ta.$$.fragment),vn=l(),ya=c("p"),ya.innerHTML=Pl,xn=l(),T(Ja.$$.fragment),kn=l(),Ma=c("p"),Ma.innerHTML=Sl,Un=l(),T($a.$$.fragment),Cn=l(),wa=c("p"),wa.innerHTML=Dl,In=l(),T(_a.$$.fragment),Gn=l(),T(va.$$.fragment),Vn=l(),xa=c("p"),xa.innerHTML=Kl,qn=l(),T(ka.$$.fragment),zn=l(),Ua=c("ol"),Ua.innerHTML=Ol,Zn=l(),T(Ca.$$.fragment),Bn=l(),H=c("ol"),H.innerHTML=st,Rn=l(),T(Ia.$$.fragment),Wn=l(),Ga=c("p"),Ga.innerHTML=at,Hn=l(),Va=c("p"),Va.textContent=et,En=l(),T(qa.$$.fragment),Yn=l(),za=c("p"),za.innerHTML=nt,Fn=l(),Za=c("ul"),Za.innerHTML=lt,Nn=l(),Ba=c("p"),Ba.innerHTML=tt,Xn=l(),Ra=c("table"),Ra.innerHTML=pt,Ln=l(),T(Wa.$$.fragment),Qn=l(),Ea=c("p"),this.h()},l(s){const a=kt("svelte-u9bgzb",document.head);p=u(a,"META",{name:!0,content:!0}),a.forEach(e),o=t(s),d=u(s,"P",{}),q(d).forEach(e),b=t(s),M(i.$$.fragment,s),x=t(s),M(f.$$.fragment,s),_=t(s),g=u(s,"P",{"data-svelte-h":!0}),j(g)!=="svelte-y89zx"&&(g.textContent=r),w=t(s),v=u(s,"UL",{"data-svelte-h":!0}),j(v)!=="svelte-6x0kay"&&(v.innerHTML=V),G=t(s),M(U.$$.fragment,s),E=t(s),M(Y.$$.fragment,s),Fa=t(s),F=u(s,"P",{"data-svelte-h":!0}),j(F)!=="svelte-1osx0nf"&&(F.innerHTML=sl),Na=t(s),M(z.$$.fragment,s),Xa=t(s),N=u(s,"P",{"data-svelte-h":!0}),j(N)!=="svelte-13apvvk"&&(N.innerHTML=al),La=t(s),M(X.$$.fragment,s),Qa=t(s),L=u(s,"P",{"data-svelte-h":!0}),j(L)!=="svelte-nc82a0"&&(L.innerHTML=el),Aa=t(s),M(Q.$$.fragment,s),Pa=t(s),A=u(s,"P",{"data-svelte-h":!0}),j(A)!=="svelte-7ko66l"&&(A.textContent=nl),Sa=t(s),M(P.$$.fragment,s),Da=t(s),S=u(s,"P",{"data-svelte-h":!0}),j(S)!=="svelte-1weufbq"&&(S.textContent=ll),Ka=t(s),D=u(s,"UL",{"data-svelte-h":!0}),j(D)!=="svelte-51u6fg"&&(D.innerHTML=tl),Oa=t(s),K=u(s,"P",{"data-svelte-h":!0}),j(K)!=="svelte-6qpoi9"&&(K.innerHTML=pl),se=t(s),M(O.$$.fragment,s),ae=t(s),ss=u(s,"P",{"data-svelte-h":!0}),j(ss)!=="svelte-ahbs6d"&&(ss.innerHTML=rl),ee=t(s),as=u(s,"P",{"data-svelte-h":!0}),j(as)!=="svelte-16mbq9c"&&(as.textContent=cl),ne=t(s),M(es.$$.fragment,s),le=t(s),M(ns.$$.fragment,s),te=t(s),ls=u(s,"P",{"data-svelte-h":!0}),j(ls)!=="svelte-1x6e4jq"&&(ls.textContent=ul),pe=t(s),ts=u(s,"P",{"data-svelte-h":!0}),j(ts)!=="svelte-1h6qsj7"&&(ts.innerHTML=ol),re=t(s),M(ps.$$.fragment,s),ce=t(s),rs=u(s,"P",{"data-svelte-h":!0}),j(rs)!=="svelte-1nnwsvq"&&(rs.textContent=ml),ue=t(s),M(cs.$$.fragment,s),oe=t(s),us=u(s,"P",{"data-svelte-h":!0}),j(us)!=="svelte-j1hu7u"&&(us.textContent=il),me=t(s),os=u(s,"P",{"data-svelte-h":!0}),j(os)!=="svelte-d28dd5"&&(os.innerHTML=hl),ie=t(s),M(ms.$$.fragment,s),he=t(s),M(is.$$.fragment,s),de=t(s),hs=u(s,"P",{"data-svelte-h":!0}),j(hs)!=="svelte-2g89e"&&(hs.textContent=dl),je=t(s),ds=u(s,"P",{"data-svelte-h":!0}),j(ds)!=="svelte-15z78bl"&&(ds.innerHTML=jl),be=t(s),M(js.$$.fragment,s),ge=t(s),M(bs.$$.fragment,s),fe=t(s),gs=u(s,"P",{"data-svelte-h":!0}),j(gs)!=="svelte-qxkpt6"&&(gs.innerHTML=bl),Te=t(s),M(fs.$$.fragment,s),ye=t(s),Ts=u(s,"P",{"data-svelte-h":!0}),j(Ts)!=="svelte-1bteyqg"&&(Ts.innerHTML=gl),Je=t(s),M(ys.$$.fragment,s),Me=t(s),Js=u(s,"P",{"data-svelte-h":!0}),j(Js)!=="svelte-15xdt4s"&&(Js.innerHTML=fl),$e=t(s),M(Ms.$$.fragment,s),we=t(s),$s=u(s,"P",{"data-svelte-h":!0}),j($s)!=="svelte-kxl3uf"&&($s.textContent=Tl),_e=t(s),ws=u(s,"UL",{"data-svelte-h":!0}),j(ws)!=="svelte-wvhb5k"&&(ws.innerHTML=yl),ve=t(s),M(_s.$$.fragment,s),xe=t(s),vs=u(s,"P",{"data-svelte-h":!0}),j(vs)!=="svelte-1nbwfda"&&(vs.innerHTML=Jl),ke=t(s),xs=u(s,"P",{"data-svelte-h":!0}),j(xs)!=="svelte-1a77arx"&&(xs.innerHTML=Ml),Ue=t(s),M(ks.$$.fragment,s),Ce=t(s),Us=u(s,"OL",{"data-svelte-h":!0}),j(Us)!=="svelte-b32fxj"&&(Us.innerHTML=$l),Ie=t(s),M(Cs.$$.fragment,s),Ge=t(s),Z=u(s,"OL",{start:!0,"data-svelte-h":!0}),j(Z)!=="svelte-dr0mp4"&&(Z.innerHTML=wl),Ve=t(s),M(Is.$$.fragment,s),qe=t(s),Gs=u(s,"P",{"data-svelte-h":!0}),j(Gs)!=="svelte-1oaz39k"&&(Gs.innerHTML=_l),ze=t(s),M(Vs.$$.fragment,s),Ze=t(s),qs=u(s,"P",{"data-svelte-h":!0}),j(qs)!=="svelte-1er2avd"&&(qs.textContent=vl),Be=t(s),zs=u(s,"P",{"data-svelte-h":!0}),j(zs)!=="svelte-1e9mt49"&&(zs.innerHTML=xl),Re=t(s),M(Zs.$$.fragment,s),We=t(s),Bs=u(s,"P",{"data-svelte-h":!0}),j(Bs)!=="svelte-1m9y0kd"&&(Bs.innerHTML=kl),He=t(s),M(Rs.$$.fragment,s),Ee=t(s),M(Ws.$$.fragment,s),Ye=t(s),Hs=u(s,"P",{"data-svelte-h":!0}),j(Hs)!=="svelte-1bz2o9j"&&(Hs.textContent=Ul),Fe=t(s),M(Es.$$.fragment,s),Ne=t(s),Ys=u(s,"P",{"data-svelte-h":!0}),j(Ys)!=="svelte-wdtfgs"&&(Ys.innerHTML=Cl),Xe=t(s),M(Fs.$$.fragment,s),Le=t(s),Ns=u(s,"P",{"data-svelte-h":!0}),j(Ns)!=="svelte-aa9o0z"&&(Ns.textContent=Il),Qe=t(s),M(Xs.$$.fragment,s),Ae=t(s),Ls=u(s,"P",{"data-svelte-h":!0}),j(Ls)!=="svelte-gj0xvu"&&(Ls.textContent=Gl),Pe=t(s),M(Qs.$$.fragment,s),Se=t(s),As=u(s,"P",{"data-svelte-h":!0}),j(As)!=="svelte-u21qg0"&&(As.textContent=Vl),De=t(s),M(Ps.$$.fragment,s),Ke=t(s),Ss=u(s,"P",{"data-svelte-h":!0}),j(Ss)!=="svelte-1boco99"&&(Ss.textContent=ql),Oe=t(s),Ds=u(s,"P",{"data-svelte-h":!0}),j(Ds)!=="svelte-qad428"&&(Ds.innerHTML=zl),sn=t(s),M(Ks.$$.fragment,s),an=t(s),Os=u(s,"P",{"data-svelte-h":!0}),j(Os)!=="svelte-15ws7x2"&&(Os.innerHTML=Zl),en=t(s),M(sa.$$.fragment,s),nn=t(s),aa=u(s,"P",{"data-svelte-h":!0}),j(aa)!=="svelte-1pswjfa"&&(aa.innerHTML=Bl),ln=t(s),M(ea.$$.fragment,s),tn=t(s),na=u(s,"P",{"data-svelte-h":!0}),j(na)!=="svelte-1e9mt49"&&(na.innerHTML=Rl),pn=t(s),M(la.$$.fragment,s),rn=t(s),M(ta.$$.fragment,s),cn=t(s),pa=u(s,"P",{"data-svelte-h":!0}),j(pa)!=="svelte-1w4wr7j"&&(pa.innerHTML=Wl),un=t(s),ra=u(s,"OL",{"data-svelte-h":!0}),j(ra)!=="svelte-1l4nif2"&&(ra.innerHTML=Hl),on=t(s),M(ca.$$.fragment,s),mn=t(s),B=u(s,"OL",{start:!0,"data-svelte-h":!0}),j(B)!=="svelte-1s9i14m"&&(B.innerHTML=El),hn=t(s),M(ua.$$.fragment,s),dn=t(s),R=u(s,"OL",{start:!0,"data-svelte-h":!0}),j(R)!=="svelte-ssl26j"&&(R.innerHTML=Yl),jn=t(s),M(oa.$$.fragment,s),bn=t(s),W=u(s,"OL",{start:!0,"data-svelte-h":!0}),j(W)!=="svelte-t81baw"&&(W.innerHTML=Fl),gn=t(s),M(ma.$$.fragment,s),fn=t(s),ia=u(s,"P",{"data-svelte-h":!0}),j(ia)!=="svelte-qon1zc"&&(ia.textContent=Nl),Tn=t(s),M(ha.$$.fragment,s),yn=t(s),da=u(s,"P",{"data-svelte-h":!0}),j(da)!=="svelte-vgp975"&&(da.innerHTML=Xl),Jn=t(s),M(ja.$$.fragment,s),Mn=t(s),ba=u(s,"P",{"data-svelte-h":!0}),j(ba)!=="svelte-6xcv0a"&&(ba.textContent=Ll),$n=t(s),ga=u(s,"UL",{"data-svelte-h":!0}),j(ga)!=="svelte-1wb4hvd"&&(ga.innerHTML=Ql),wn=t(s),fa=u(s,"P",{"data-svelte-h":!0}),j(fa)!=="svelte-as98xc"&&(fa.innerHTML=Al),_n=t(s),M(Ta.$$.fragment,s),vn=t(s),ya=u(s,"P",{"data-svelte-h":!0}),j(ya)!=="svelte-o8z0gg"&&(ya.innerHTML=Pl),xn=t(s),M(Ja.$$.fragment,s),kn=t(s),Ma=u(s,"P",{"data-svelte-h":!0}),j(Ma)!=="svelte-1q5hmwr"&&(Ma.innerHTML=Sl),Un=t(s),M($a.$$.fragment,s),Cn=t(s),wa=u(s,"P",{"data-svelte-h":!0}),j(wa)!=="svelte-18na6ic"&&(wa.innerHTML=Dl),In=t(s),M(_a.$$.fragment,s),Gn=t(s),M(va.$$.fragment,s),Vn=t(s),xa=u(s,"P",{"data-svelte-h":!0}),j(xa)!=="svelte-1q04m25"&&(xa.innerHTML=Kl),qn=t(s),M(ka.$$.fragment,s),zn=t(s),Ua=u(s,"OL",{"data-svelte-h":!0}),j(Ua)!=="svelte-refowx"&&(Ua.innerHTML=Ol),Zn=t(s),M(Ca.$$.fragment,s),Bn=t(s),H=u(s,"OL",{start:!0,"data-svelte-h":!0}),j(H)!=="svelte-wrs0lc"&&(H.innerHTML=st),Rn=t(s),M(Ia.$$.fragment,s),Wn=t(s),Ga=u(s,"P",{"data-svelte-h":!0}),j(Ga)!=="svelte-yqf723"&&(Ga.innerHTML=at),Hn=t(s),Va=u(s,"P",{"data-svelte-h":!0}),j(Va)!=="svelte-1p9l7gz"&&(Va.textContent=et),En=t(s),M(qa.$$.fragment,s),Yn=t(s),za=u(s,"P",{"data-svelte-h":!0}),j(za)!=="svelte-1fldut5"&&(za.innerHTML=nt),Fn=t(s),Za=u(s,"UL",{"data-svelte-h":!0}),j(Za)!=="svelte-e765f0"&&(Za.innerHTML=lt),Nn=t(s),Ba=u(s,"P",{"data-svelte-h":!0}),j(Ba)!=="svelte-1ud68fs"&&(Ba.innerHTML=tt),Xn=t(s),Ra=u(s,"TABLE",{"data-svelte-h":!0}),j(Ra)!=="svelte-4okjp0"&&(Ra.innerHTML=pt),Ln=t(s),M(Wa.$$.fragment,s),Qn=t(s),Ea=u(s,"P",{}),q(Ea).forEach(e),this.h()},h(){C(p,"name","hf:doc:metadata"),C(p,"content",Nt),C(Z,"start","2"),C(B,"start","2"),C(R,"start","3"),C(W,"start","4"),C(H,"start","2")},m(s,a){Ha(document.head,p),n(s,o,a),n(s,d,a),n(s,b,a),y(i,s,a),n(s,x,a),y(f,s,a),n(s,_,a),n(s,g,a),n(s,w,a),n(s,v,a),n(s,G,a),y(U,s,a),n(s,E,a),y(Y,s,a),n(s,Fa,a),n(s,F,a),n(s,Na,a),y(z,s,a),n(s,Xa,a),n(s,N,a),n(s,La,a),y(X,s,a),n(s,Qa,a),n(s,L,a),n(s,Aa,a),y(Q,s,a),n(s,Pa,a),n(s,A,a),n(s,Sa,a),y(P,s,a),n(s,Da,a),n(s,S,a),n(s,Ka,a),n(s,D,a),n(s,Oa,a),n(s,K,a),n(s,se,a),y(O,s,a),n(s,ae,a),n(s,ss,a),n(s,ee,a),n(s,as,a),n(s,ne,a),y(es,s,a),n(s,le,a),y(ns,s,a),n(s,te,a),n(s,ls,a),n(s,pe,a),n(s,ts,a),n(s,re,a),y(ps,s,a),n(s,ce,a),n(s,rs,a),n(s,ue,a),y(cs,s,a),n(s,oe,a),n(s,us,a),n(s,me,a),n(s,os,a),n(s,ie,a),y(ms,s,a),n(s,he,a),y(is,s,a),n(s,de,a),n(s,hs,a),n(s,je,a),n(s,ds,a),n(s,be,a),y(js,s,a),n(s,ge,a),y(bs,s,a),n(s,fe,a),n(s,gs,a),n(s,Te,a),y(fs,s,a),n(s,ye,a),n(s,Ts,a),n(s,Je,a),y(ys,s,a),n(s,Me,a),n(s,Js,a),n(s,$e,a),y(Ms,s,a),n(s,we,a),n(s,$s,a),n(s,_e,a),n(s,ws,a),n(s,ve,a),y(_s,s,a),n(s,xe,a),n(s,vs,a),n(s,ke,a),n(s,xs,a),n(s,Ue,a),y(ks,s,a),n(s,Ce,a),n(s,Us,a),n(s,Ie,a),y(Cs,s,a),n(s,Ge,a),n(s,Z,a),n(s,Ve,a),y(Is,s,a),n(s,qe,a),n(s,Gs,a),n(s,ze,a),y(Vs,s,a),n(s,Ze,a),n(s,qs,a),n(s,Be,a),n(s,zs,a),n(s,Re,a),y(Zs,s,a),n(s,We,a),n(s,Bs,a),n(s,He,a),y(Rs,s,a),n(s,Ee,a),y(Ws,s,a),n(s,Ye,a),n(s,Hs,a),n(s,Fe,a),y(Es,s,a),n(s,Ne,a),n(s,Ys,a),n(s,Xe,a),y(Fs,s,a),n(s,Le,a),n(s,Ns,a),n(s,Qe,a),y(Xs,s,a),n(s,Ae,a),n(s,Ls,a),n(s,Pe,a),y(Qs,s,a),n(s,Se,a),n(s,As,a),n(s,De,a),y(Ps,s,a),n(s,Ke,a),n(s,Ss,a),n(s,Oe,a),n(s,Ds,a),n(s,sn,a),y(Ks,s,a),n(s,an,a),n(s,Os,a),n(s,en,a),y(sa,s,a),n(s,nn,a),n(s,aa,a),n(s,ln,a),y(ea,s,a),n(s,tn,a),n(s,na,a),n(s,pn,a),y(la,s,a),n(s,rn,a),y(ta,s,a),n(s,cn,a),n(s,pa,a),n(s,un,a),n(s,ra,a),n(s,on,a),y(ca,s,a),n(s,mn,a),n(s,B,a),n(s,hn,a),y(ua,s,a),n(s,dn,a),n(s,R,a),n(s,jn,a),y(oa,s,a),n(s,bn,a),n(s,W,a),n(s,gn,a),y(ma,s,a),n(s,fn,a),n(s,ia,a),n(s,Tn,a),y(ha,s,a),n(s,yn,a),n(s,da,a),n(s,Jn,a),y(ja,s,a),n(s,Mn,a),n(s,ba,a),n(s,$n,a),n(s,ga,a),n(s,wn,a),n(s,fa,a),n(s,_n,a),y(Ta,s,a),n(s,vn,a),n(s,ya,a),n(s,xn,a),y(Ja,s,a),n(s,kn,a),n(s,Ma,a),n(s,Un,a),y($a,s,a),n(s,Cn,a),n(s,wa,a),n(s,In,a),y(_a,s,a),n(s,Gn,a),y(va,s,a),n(s,Vn,a),n(s,xa,a),n(s,qn,a),y(ka,s,a),n(s,zn,a),n(s,Ua,a),n(s,Zn,a),y(Ca,s,a),n(s,Bn,a),n(s,H,a),n(s,Rn,a),y(Ia,s,a),n(s,Wn,a),n(s,Ga,a),n(s,Hn,a),n(s,Va,a),n(s,En,a),y(qa,s,a),n(s,Yn,a),n(s,za,a),n(s,Fn,a),n(s,Za,a),n(s,Nn,a),n(s,Ba,a),n(s,Xn,a),n(s,Ra,a),n(s,Ln,a),y(Wa,s,a),n(s,Qn,a),n(s,Ea,a),An=!0},p(s,[a]){const rt={};a&2&&(rt.$$scope={dirty:a,ctx:s}),z.$set(rt)},i(s){An||(m(i.$$.fragment,s),m(f.$$.fragment,s),m(U.$$.fragment,s),m(Y.$$.fragment,s),m(z.$$.fragment,s),m(X.$$.fragment,s),m(Q.$$.fragment,s),m(P.$$.fragment,s),m(O.$$.fragment,s),m(es.$$.fragment,s),m(ns.$$.fragment,s),m(ps.$$.fragment,s),m(cs.$$.fragment,s),m(ms.$$.fragment,s),m(is.$$.fragment,s),m(js.$$.fragment,s),m(bs.$$.fragment,s),m(fs.$$.fragment,s),m(ys.$$.fragment,s),m(Ms.$$.fragment,s),m(_s.$$.fragment,s),m(ks.$$.fragment,s),m(Cs.$$.fragment,s),m(Is.$$.fragment,s),m(Vs.$$.fragment,s),m(Zs.$$.fragment,s),m(Rs.$$.fragment,s),m(Ws.$$.fragment,s),m(Es.$$.fragment,s),m(Fs.$$.fragment,s),m(Xs.$$.fragment,s),m(Qs.$$.fragment,s),m(Ps.$$.fragment,s),m(Ks.$$.fragment,s),m(sa.$$.fragment,s),m(ea.$$.fragment,s),m(la.$$.fragment,s),m(ta.$$.fragment,s),m(ca.$$.fragment,s),m(ua.$$.fragment,s),m(oa.$$.fragment,s),m(ma.$$.fragment,s),m(ha.$$.fragment,s),m(ja.$$.fragment,s),m(Ta.$$.fragment,s),m(Ja.$$.fragment,s),m($a.$$.fragment,s),m(_a.$$.fragment,s),m(va.$$.fragment,s),m(ka.$$.fragment,s),m(Ca.$$.fragment,s),m(Ia.$$.fragment,s),m(qa.$$.fragment,s),m(Wa.$$.fragment,s),An=!0)},o(s){h(i.$$.fragment,s),h(f.$$.fragment,s),h(U.$$.fragment,s),h(Y.$$.fragment,s),h(z.$$.fragment,s),h(X.$$.fragment,s),h(Q.$$.fragment,s),h(P.$$.fragment,s),h(O.$$.fragment,s),h(es.$$.fragment,s),h(ns.$$.fragment,s),h(ps.$$.fragment,s),h(cs.$$.fragment,s),h(ms.$$.fragment,s),h(is.$$.fragment,s),h(js.$$.fragment,s),h(bs.$$.fragment,s),h(fs.$$.fragment,s),h(ys.$$.fragment,s),h(Ms.$$.fragment,s),h(_s.$$.fragment,s),h(ks.$$.fragment,s),h(Cs.$$.fragment,s),h(Is.$$.fragment,s),h(Vs.$$.fragment,s),h(Zs.$$.fragment,s),h(Rs.$$.fragment,s),h(Ws.$$.fragment,s),h(Es.$$.fragment,s),h(Fs.$$.fragment,s),h(Xs.$$.fragment,s),h(Qs.$$.fragment,s),h(Ps.$$.fragment,s),h(Ks.$$.fragment,s),h(sa.$$.fragment,s),h(ea.$$.fragment,s),h(la.$$.fragment,s),h(ta.$$.fragment,s),h(ca.$$.fragment,s),h(ua.$$.fragment,s),h(oa.$$.fragment,s),h(ma.$$.fragment,s),h(ha.$$.fragment,s),h(ja.$$.fragment,s),h(Ta.$$.fragment,s),h(Ja.$$.fragment,s),h($a.$$.fragment,s),h(_a.$$.fragment,s),h(va.$$.fragment,s),h(ka.$$.fragment,s),h(Ca.$$.fragment,s),h(Ia.$$.fragment,s),h(qa.$$.fragment,s),h(Wa.$$.fragment,s),An=!1},d(s){s&&(e(o),e(d),e(b),e(x),e(_),e(g),e(w),e(v),e(G),e(E),e(Fa),e(F),e(Na),e(Xa),e(N),e(La),e(Qa),e(L),e(Aa),e(Pa),e(A),e(Sa),e(Da),e(S),e(Ka),e(D),e(Oa),e(K),e(se),e(ae),e(ss),e(ee),e(as),e(ne),e(le),e(te),e(ls),e(pe),e(ts),e(re),e(ce),e(rs),e(ue),e(oe),e(us),e(me),e(os),e(ie),e(he),e(de),e(hs),e(je),e(ds),e(be),e(ge),e(fe),e(gs),e(Te),e(ye),e(Ts),e(Je),e(Me),e(Js),e($e),e(we),e($s),e(_e),e(ws),e(ve),e(xe),e(vs),e(ke),e(xs),e(Ue),e(Ce),e(Us),e(Ie),e(Ge),e(Z),e(Ve),e(qe),e(Gs),e(ze),e(Ze),e(qs),e(Be),e(zs),e(Re),e(We),e(Bs),e(He),e(Ee),e(Ye),e(Hs),e(Fe),e(Ne),e(Ys),e(Xe),e(Le),e(Ns),e(Qe),e(Ae),e(Ls),e(Pe),e(Se),e(As),e(De),e(Ke),e(Ss),e(Oe),e(Ds),e(sn),e(an),e(Os),e(en),e(nn),e(aa),e(ln),e(tn),e(na),e(pn),e(rn),e(cn),e(pa),e(un),e(ra),e(on),e(mn),e(B),e(hn),e(dn),e(R),e(jn),e(bn),e(W),e(gn),e(fn),e(ia),e(Tn),e(yn),e(da),e(Jn),e(Mn),e(ba),e($n),e(ga),e(wn),e(fa),e(_n),e(vn),e(ya),e(xn),e(kn),e(Ma),e(Un),e(Cn),e(wa),e(In),e(Gn),e(Vn),e(xa),e(qn),e(zn),e(Ua),e(Zn),e(Bn),e(H),e(Rn),e(Wn),e(Ga),e(Hn),e(Va),e(En),e(Yn),e(za),e(Fn),e(Za),e(Nn),e(Ba),e(Xn),e(Ra),e(Ln),e(Qn),e(Ea)),e(p),J(i,s),J(f,s),J(U,s),J(Y,s),J(z,s),J(X,s),J(Q,s),J(P,s),J(O,s),J(es,s),J(ns,s),J(ps,s),J(cs,s),J(ms,s),J(is,s),J(js,s),J(bs,s),J(fs,s),J(ys,s),J(Ms,s),J(_s,s),J(ks,s),J(Cs,s),J(Is,s),J(Vs,s),J(Zs,s),J(Rs,s),J(Ws,s),J(Es,s),J(Fs,s),J(Xs,s),J(Qs,s),J(Ps,s),J(Ks,s),J(sa,s),J(ea,s),J(la,s),J(ta,s),J(ca,s),J(ua,s),J(oa,s),J(ma,s),J(ha,s),J(ja,s),J(Ta,s),J(Ja,s),J($a,s),J(_a,s),J(va,s),J(ka,s),J(Ca,s),J(Ia,s),J(qa,s),J(Wa,s)}}}const Nt='{"title":"Preprocesamiento","local":"preprocesamiento","sections":[{"title":"NLP","local":"nlp","sections":[{"title":"Tokenizar","local":"tokenizar","sections":[],"depth":3},{"title":"Pad","local":"pad","sections":[],"depth":3},{"title":"Truncamiento","local":"truncamiento","sections":[],"depth":3},{"title":"Construye tensores","local":"construye-tensores","sections":[],"depth":3}],"depth":2},{"title":"Audio","local":"audio","sections":[{"title":"Resample","local":"resample","sections":[],"depth":3},{"title":"Extractor de características","local":"extractor-de-características","sections":[],"depth":3},{"title":"Pad y truncamiento","local":"pad-y-truncamiento","sections":[],"depth":3}],"depth":2},{"title":"Visión","local":"visión","sections":[{"title":"Extractor de características","local":"extractor-de-características","sections":[],"depth":3},{"title":"Aumento de Datos","local":"aumento-de-datos","sections":[],"depth":3}],"depth":2},{"title":"Multimodal","local":"multimodal","sections":[{"title":"Processor","local":"processor","sections":[],"depth":3}],"depth":2},{"title":"Todo lo que siempre quisiste saber sobre el padding y el truncamiento","local":"todo-lo-que-siempre-quisiste-saber-sobre-el-padding-y-el-truncamiento","sections":[],"depth":2}],"depth":1}';function Xt($){return Mt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ep extends Kn{constructor(p){super(),On(this,p,Xt,Ft,Dn,{})}}export{ep as component}; | |
Xet Storage Details
- Size:
- 101 kB
- Xet hash:
- 1aaadf634b1a431fd93f3ebe5f15d974d10d5b9b9ab59300f60ab00f3a0ff8b4
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.