Buckets:
| import{s as sl,o as tl}from"../chunks/scheduler.37c15a92.js";import{S as nl,i as al,g as d,s as i,r as c,A as il,h as J,f as s,c as o,j as Le,u as m,x as T,k as Ke,y as ol,a as n,v as M,t as r,b as Oe,d as u,w as b,p as el}from"../chunks/index.2bf4358c.js";import{C as w}from"../chunks/CodeBlock.4e987730.js";import{C as ll}from"../chunks/CourseFloatingBanner.9ff4c771.js";import{F as pl}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Ge,E as rl}from"../chunks/getInferenceSnippets.24b50994.js";function ul(j){let a,p;return a=new ll({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"}]}}),{c(){c(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,y){M(a,t,y),p=!0},i(t){p||(u(a.$$.fragment,t),p=!0)},o(t){r(a.$$.fragment,t),p=!1},d(t){b(a,t)}}}function cl(j){let a,p;return a=new ll({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"}]}}),{c(){c(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,y){M(a,t,y),p=!0},i(t){p||(u(a.$$.fragment,t),p=!0)},o(t){r(a.$$.fragment,t),p=!1},d(t){b(a,t)}}}function ml(j){let a,p;return a=new w({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtc3N0LTItZW5nbGlzaCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBc2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEF0b2tlbnMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBb3V0cHV0JTIwJTNEJTIwbW9kZWwoKip0b2tlbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForSequenceClassification | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| output = model(**tokens)`,wrap:!1}}),{c(){c(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,y){M(a,t,y),p=!0},i(t){p||(u(a.$$.fragment,t),p=!0)},o(t){r(a.$$.fragment,t),p=!1},d(t){b(a,t)}}}function Ml(j){let a,p;return a=new w({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoY2hlY2twb2ludCklMEFzZXF1ZW5jZXMlMjAlM0QlMjAlNUIlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMkMlMjAlMjJTbyUyMGhhdmUlMjBJISUyMiU1RCUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXQlMjAlM0QlMjBtb2RlbCgqKnRva2Vucyk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| output = model(**tokens)`,wrap:!1}}),{c(){c(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,y){M(a,t,y),p=!0},i(t){p||(u(a.$$.fragment,t),p=!0)},o(t){r(a.$$.fragment,t),p=!1},d(t){b(a,t)}}}function bl(j){let a,p,t,y,U,te,Z,ne,h,f,ee,k,ve="Nelle ultime sezioni abbiamo fatto del nostro meglio per fare la maggior parte del lavoro a mano. Abbiamo esplorato il funzionamento dei tokenizer e abbiamo esaminato la tokenizzazione, la conversione in ID di input, il padding, il troncamento e le maschere di attenzione.",ae,z,Se="Tuttavia, come abbiamo visto nella sezione 2, l’API 🤗 Transformers può gestire tutto questo con una funzione di alto livello che approfondiremo qui. Quando si chiama il <code>tokenizer</code> direttamente sulla frase, si ottengono input pronti per passare attraverso il modello:",ie,V,oe,$,qe="Qui, la variabile <code>model_inputs</code> contiene tutto ciò che è necessario per il buon funzionamento del modello. Per DistilBERT, questo include gli ID degli ingressi e la maschera di attenzione. Altri modelli che accettano input aggiuntivi avranno anche questi output dall’oggetto <code>tokenizer</code>.",pe,W,Xe="Come vedremo in alcuni esempi, questo metodo è molto potente. Innanzitutto, può tokenizzare una singola sequenza:",re,B,ue,G,Ee="Gestisce anche più sequenze alla volta, senza alcuna modifica dell’API:",ce,v,me,S,Re="Possiamo implementare il padding in diversi modi",Me,q,be,X,_e="Può anche troncare le sequenze:",de,E,Je,R,xe="L’oggetto <code>tokenizer</code> può gestire la conversione in tensori di framework specifici, che possono successivamente essere inviati direttamente al modello. Per esempio, nel seguente esempio di codice si chiede al tokenizer di restituire i tensori dei diversi framework: <code>"pt"</code> restituisce i tensori di PyTorch, <code>"tf"</code> restituisce i tensori di TensorFlow e <code>"np"</code> restituisce gli array di NumPy:",ye,_,Te,x,we,N,Ne="Se diamo un’occhiata agli ID di input restituiti dal tokenizer, noteremo che sono leggermente diversi da quelli che avevamo prima:",je,Y,he,Q,fe,C,Ye="Un ID token è stato aggiunto all’inizio e uno alla fine. Decodifichiamo le due sequenze di ID qui sopra per capire di cosa si tratta:",ge,F,Ie,H,Ue,A,Qe="Il tokenizer ha aggiunto la parola speciale <code>[CLS]</code> all’inizio e la parola speciale <code>[SEP]</code> alla fine. Questo perché il modello è stato preaddestrato con queste parole, quindi per ottenere gli stessi risultati per l’inferenza dobbiamo aggiungerle anche noi. Si noti che alcuni modelli non aggiungono parole speciali, o ne aggiungono di diverse; i modelli possono anche aggiungere queste parole speciali solo all’inizio o solo alla fine. In ogni caso, il tokenizer sa quali sono previste e se ne occuperà per voi.",Ze,D,ke,P,Ce="Ora che abbiamo visto tutti i singoli passaggi che l’oggetto <code>tokenizer</code> utilizza quando viene applicato ai testi, vediamo un’ultima volta come può gestire sequenze multiple (padding!), sequenze molto lunghe (troncamento!) e diversi tipi di tensori con la sua API principale:",ze,g,I,le,L,Ve,se,$e;U=new pl({props:{fw:j[0]}}),Z=new Ge({props:{title:"Mettiamo insieme i pezzi",local:"mettiamo-insieme-i-pezzi",headingTag:"h1"}});const Fe=[cl,ul],K=[];function He(e,l){return e[0]==="pt"?0:1}h=He(j),f=K[h]=Fe[h](j),V=new w({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQXNlcXVlbmNlJTIwJTNEJTIwJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTBBJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence)`,wrap:!1}}),B=new w({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2Up",highlighted:`sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence)`,wrap:!1}}),v=new w({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzKQ==",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| model_inputs = tokenizer(sequences)`,wrap:!1}}),q=new w({props:{code:"JTIzJTIwRWZmZXR0dWElMjBpbCUyMHBhZGRpbmclMjBkZWxsYSUyMHNlcXVlbnphJTIwZmlubyUyMGFsbGxhJTIwbWFzc2ltYSUyMGx1bmdoZXp6YSUyMGRlbGxhJTIwc2VxdWVuemElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRCUyMmxvbmdlc3QlMjIpJTBBJTBBJTIzJTIwRWZmZXR0dWElMjBpbCUyMHBhZGRpbmclMjBmaW5vJTIwYWxsYSUyMGx1bmdoZXp6YSUyMG1hc3NpbWElMjBkZWwlMjBtb2RlbGxvJTBBJTIzJTIwKDUxMiUyMHBlciUyMEJFUlQlMjBvJTIwRGlzdGlsQkVSVCklMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRCUyMm1heF9sZW5ndGglMjIpJTBBJTBBJTIzJTIwRWZmZXR0dWElMjBpbCUyMHBhZGRpbmclMjBmaW5vJTIwYWxsYSUyMGx1bmdoZXp6YSUyMG1hc3NpbWElMjBzcGVjaWZpY2F0YSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiUyQyUyMG1heF9sZW5ndGglM0Q4KQ==",highlighted:`<span class="hljs-comment"># Effettua il padding della sequenza fino allla massima lunghezza della sequenza</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"longest"</span>) | |
| <span class="hljs-comment"># Effettua il padding fino alla lunghezza massima del modello</span> | |
| <span class="hljs-comment"># (512 per BERT o DistilBERT)</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"max_length"</span>) | |
| <span class="hljs-comment"># Effettua il padding fino alla lunghezza massima specificata</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"max_length"</span>, max_length=<span class="hljs-number">8</span>)`,wrap:!1}}),E=new w({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBUcm9uY2ElMjBsZSUyMHNlcXVlbnplJTIwcGklQzMlQjklMjBsdW5naGUlMjBkZWxsYSUyMGx1bmdoZXp6YSUyMG1hc3NpbWElMjBkZWwlMjBtb2RlbGxvLiUwQSUyMyUyMCg1MTIlMjBwZXIlMjBCRVJUJTIwbyUyMERpc3RpbEJFUlQpJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHRydW5jYXRpb24lM0RUcnVlKSUwQSUwQSUyMyUyMFRyb25jYSUyMGxlJTIwc2VxdWVuemUlMjBwaSVDMyVCOSUyMGx1bmdoZSUyMGRlbGxhJTIwbHVuZ2hlenphJTIwbWFzc2ltYSUyMHNwZWNpZmljYXRhLiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBtYXhfbGVuZ3RoJTNEOCUyQyUyMHRydW5jYXRpb24lM0RUcnVlKQ==",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| <span class="hljs-comment"># Tronca le sequenze più lunghe della lunghezza massima del modello.</span> | |
| <span class="hljs-comment"># (512 per BERT o DistilBERT)</span> | |
| model_inputs = tokenizer(sequences, truncation=<span class="hljs-literal">True</span>) | |
| <span class="hljs-comment"># Tronca le sequenze più lunghe della lunghezza massima specificata.</span> | |
| model_inputs = tokenizer(sequences, max_length=<span class="hljs-number">8</span>, truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),_=new w({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBSaXRvcm5hJTIwdGVuc29yaSUyMFB5VG9yY2glMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQSUwQSUyMyUyMFJpdG9ybmElMjB0ZW5zb3JpJTIwVGVuc29yRmxvdyUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBJTBBJTIzJTIwUml0b3JuYSUyME51bVB5JTIwYXJyYXlzJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJucCUyMik=",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| <span class="hljs-comment"># Ritorna tensori PyTorch</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-comment"># Ritorna tensori TensorFlow</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-comment"># Ritorna NumPy arrays</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"np"</span>)`,wrap:!1}}),x=new Ge({props:{title:"Token speciali",local:"token-speciali",headingTag:"h2"}}),Y=new w({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2UpJTBBcHJpbnQobW9kZWxfaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplci50b2tlbml6ZShzZXF1ZW5jZSklMEFpZHMlMjAlM0QlMjB0b2tlbml6ZXIuY29udmVydF90b2tlbnNfdG9faWRzKHRva2VucyklMEFwcmludChpZHMp",highlighted:`sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence) | |
| <span class="hljs-built_in">print</span>(model_inputs[<span class="hljs-string">"input_ids"</span>]) | |
| tokens = tokenizer.tokenize(sequence) | |
| ids = tokenizer.convert_tokens_to_ids(tokens) | |
| <span class="hljs-built_in">print</span>(ids)`,wrap:!1}}),Q=new w({props:{code:"JTVCMTAxJTJDJTIwMTA0NSUyQyUyMDEwMDUlMkMlMjAyMzEwJTJDJTIwMjA0MiUyQyUyMDM0MDMlMkMlMjAyMDA1JTJDJTIwMTAzNyUyQyUyMDE3NjYyJTJDJTIwMTIxNzIlMkMlMjAyNjA3JTJDJTIwMjAyNiUyQyUyMDI4NzglMkMlMjAyMTY2JTJDJTIwMTAxMiUyQyUyMDEwMiU1RCUwQSU1QjEwNDUlMkMlMjAxMDA1JTJDJTIwMjMxMCUyQyUyMDIwNDIlMkMlMjAzNDAzJTJDJTIwMjAwNSUyQyUyMDEwMzclMkMlMjAxNzY2MiUyQyUyMDEyMTcyJTJDJTIwMjYwNyUyQyUyMDIwMjYlMkMlMjAyODc4JTJDJTIwMjE2NiUyQyUyMDEwMTIlNUQ=",highlighted:`[<span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>] | |
| [<span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>]`,wrap:!1}}),F=new w({props:{code:"cHJpbnQodG9rZW5pemVyLmRlY29kZShtb2RlbF9pbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQpKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUoaWRzKSk=",highlighted:`<span class="hljs-built_in">print</span>(tokenizer.decode(model_inputs[<span class="hljs-string">"input_ids"</span>])) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(ids))`,wrap:!1}}),H=new w({props:{code:"JTIyJTVCQ0xTJTVEJTIwaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIwJTVCU0VQJTVEJTIyJTBBJTIyaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIy",highlighted:`<span class="hljs-string">"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"</span> | |
| <span class="hljs-string">"i've been waiting for a huggingface course my whole life."</span>`,wrap:!1}}),D=new Ge({props:{title:"Conclusione: Dal tokenizer al modello",local:"conclusione-dal-tokenizer-al-modello",headingTag:"h2"}});const Ae=[Ml,ml],O=[];function De(e,l){return e[0]==="pt"?0:1}return g=De(j),I=O[g]=Ae[g](j),L=new rl({props:{source:"https://github.com/huggingface/course/blob/main/chapters/it/chapter2/6.mdx"}}),{c(){a=d("meta"),p=i(),t=d("p"),y=i(),c(U.$$.fragment),te=i(),c(Z.$$.fragment),ne=i(),f.c(),ee=i(),k=d("p"),k.textContent=ve,ae=i(),z=d("p"),z.innerHTML=Se,ie=i(),c(V.$$.fragment),oe=i(),$=d("p"),$.innerHTML=qe,pe=i(),W=d("p"),W.textContent=Xe,re=i(),c(B.$$.fragment),ue=i(),G=d("p"),G.textContent=Ee,ce=i(),c(v.$$.fragment),me=i(),S=d("p"),S.textContent=Re,Me=i(),c(q.$$.fragment),be=i(),X=d("p"),X.textContent=_e,de=i(),c(E.$$.fragment),Je=i(),R=d("p"),R.innerHTML=xe,ye=i(),c(_.$$.fragment),Te=i(),c(x.$$.fragment),we=i(),N=d("p"),N.textContent=Ne,je=i(),c(Y.$$.fragment),he=i(),c(Q.$$.fragment),fe=i(),C=d("p"),C.textContent=Ye,ge=i(),c(F.$$.fragment),Ie=i(),c(H.$$.fragment),Ue=i(),A=d("p"),A.innerHTML=Qe,Ze=i(),c(D.$$.fragment),ke=i(),P=d("p"),P.innerHTML=Ce,ze=i(),I.c(),le=i(),c(L.$$.fragment),Ve=i(),se=d("p"),this.h()},l(e){const l=il("svelte-u9bgzb",document.head);a=J(l,"META",{name:!0,content:!0}),l.forEach(s),p=o(e),t=J(e,"P",{}),Le(t).forEach(s),y=o(e),m(U.$$.fragment,e),te=o(e),m(Z.$$.fragment,e),ne=o(e),f.l(e),ee=o(e),k=J(e,"P",{"data-svelte-h":!0}),T(k)!=="svelte-1gq1u3k"&&(k.textContent=ve),ae=o(e),z=J(e,"P",{"data-svelte-h":!0}),T(z)!=="svelte-1xosnnr"&&(z.innerHTML=Se),ie=o(e),m(V.$$.fragment,e),oe=o(e),$=J(e,"P",{"data-svelte-h":!0}),T($)!=="svelte-1aee5q6"&&($.innerHTML=qe),pe=o(e),W=J(e,"P",{"data-svelte-h":!0}),T(W)!=="svelte-1518yby"&&(W.textContent=Xe),re=o(e),m(B.$$.fragment,e),ue=o(e),G=J(e,"P",{"data-svelte-h":!0}),T(G)!=="svelte-t4crif"&&(G.textContent=Ee),ce=o(e),m(v.$$.fragment,e),me=o(e),S=J(e,"P",{"data-svelte-h":!0}),T(S)!=="svelte-110r6wo"&&(S.textContent=Re),Me=o(e),m(q.$$.fragment,e),be=o(e),X=J(e,"P",{"data-svelte-h":!0}),T(X)!=="svelte-c5od7r"&&(X.textContent=_e),de=o(e),m(E.$$.fragment,e),Je=o(e),R=J(e,"P",{"data-svelte-h":!0}),T(R)!=="svelte-avke6z"&&(R.innerHTML=xe),ye=o(e),m(_.$$.fragment,e),Te=o(e),m(x.$$.fragment,e),we=o(e),N=J(e,"P",{"data-svelte-h":!0}),T(N)!=="svelte-1r8bndt"&&(N.textContent=Ne),je=o(e),m(Y.$$.fragment,e),he=o(e),m(Q.$$.fragment,e),fe=o(e),C=J(e,"P",{"data-svelte-h":!0}),T(C)!=="svelte-di8hcs"&&(C.textContent=Ye),ge=o(e),m(F.$$.fragment,e),Ie=o(e),m(H.$$.fragment,e),Ue=o(e),A=J(e,"P",{"data-svelte-h":!0}),T(A)!=="svelte-1m0dtz8"&&(A.innerHTML=Qe),Ze=o(e),m(D.$$.fragment,e),ke=o(e),P=J(e,"P",{"data-svelte-h":!0}),T(P)!=="svelte-8r23w7"&&(P.innerHTML=Ce),ze=o(e),I.l(e),le=o(e),m(L.$$.fragment,e),Ve=o(e),se=J(e,"P",{}),Le(se).forEach(s),this.h()},h(){Ke(a,"name","hf:doc:metadata"),Ke(a,"content",dl)},m(e,l){ol(document.head,a),n(e,p,l),n(e,t,l),n(e,y,l),M(U,e,l),n(e,te,l),M(Z,e,l),n(e,ne,l),K[h].m(e,l),n(e,ee,l),n(e,k,l),n(e,ae,l),n(e,z,l),n(e,ie,l),M(V,e,l),n(e,oe,l),n(e,$,l),n(e,pe,l),n(e,W,l),n(e,re,l),M(B,e,l),n(e,ue,l),n(e,G,l),n(e,ce,l),M(v,e,l),n(e,me,l),n(e,S,l),n(e,Me,l),M(q,e,l),n(e,be,l),n(e,X,l),n(e,de,l),M(E,e,l),n(e,Je,l),n(e,R,l),n(e,ye,l),M(_,e,l),n(e,Te,l),M(x,e,l),n(e,we,l),n(e,N,l),n(e,je,l),M(Y,e,l),n(e,he,l),M(Q,e,l),n(e,fe,l),n(e,C,l),n(e,ge,l),M(F,e,l),n(e,Ie,l),M(H,e,l),n(e,Ue,l),n(e,A,l),n(e,Ze,l),M(D,e,l),n(e,ke,l),n(e,P,l),n(e,ze,l),O[g].m(e,l),n(e,le,l),M(L,e,l),n(e,Ve,l),n(e,se,l),$e=!0},p(e,[l]){const Pe={};l&1&&(Pe.fw=e[0]),U.$set(Pe);let We=h;h=He(e),h!==We&&(el(),r(K[We],1,1,()=>{K[We]=null}),Oe(),f=K[h],f||(f=K[h]=Fe[h](e),f.c()),u(f,1),f.m(ee.parentNode,ee));let Be=g;g=De(e),g!==Be&&(el(),r(O[Be],1,1,()=>{O[Be]=null}),Oe(),I=O[g],I||(I=O[g]=Ae[g](e),I.c()),u(I,1),I.m(le.parentNode,le))},i(e){$e||(u(U.$$.fragment,e),u(Z.$$.fragment,e),u(f),u(V.$$.fragment,e),u(B.$$.fragment,e),u(v.$$.fragment,e),u(q.$$.fragment,e),u(E.$$.fragment,e),u(_.$$.fragment,e),u(x.$$.fragment,e),u(Y.$$.fragment,e),u(Q.$$.fragment,e),u(F.$$.fragment,e),u(H.$$.fragment,e),u(D.$$.fragment,e),u(I),u(L.$$.fragment,e),$e=!0)},o(e){r(U.$$.fragment,e),r(Z.$$.fragment,e),r(f),r(V.$$.fragment,e),r(B.$$.fragment,e),r(v.$$.fragment,e),r(q.$$.fragment,e),r(E.$$.fragment,e),r(_.$$.fragment,e),r(x.$$.fragment,e),r(Y.$$.fragment,e),r(Q.$$.fragment,e),r(F.$$.fragment,e),r(H.$$.fragment,e),r(D.$$.fragment,e),r(I),r(L.$$.fragment,e),$e=!1},d(e){e&&(s(p),s(t),s(y),s(te),s(ne),s(ee),s(k),s(ae),s(z),s(ie),s(oe),s($),s(pe),s(W),s(re),s(ue),s(G),s(ce),s(me),s(S),s(Me),s(be),s(X),s(de),s(Je),s(R),s(ye),s(Te),s(we),s(N),s(je),s(he),s(fe),s(C),s(ge),s(Ie),s(Ue),s(A),s(Ze),s(ke),s(P),s(ze),s(le),s(Ve),s(se)),s(a),b(U,e),b(Z,e),K[h].d(e),b(V,e),b(B,e),b(v,e),b(q,e),b(E,e),b(_,e),b(x,e),b(Y,e),b(Q,e),b(F,e),b(H,e),b(D,e),O[g].d(e),b(L,e)}}}const dl='{"title":"Mettiamo insieme i pezzi","local":"mettiamo-insieme-i-pezzi","sections":[{"title":"Token speciali","local":"token-speciali","sections":[],"depth":2},{"title":"Conclusione: Dal tokenizer al modello","local":"conclusione-dal-tokenizer-al-modello","sections":[],"depth":2}],"depth":1}';function Jl(j,a,p){let t="pt";return tl(()=>{const y=new URLSearchParams(window.location.search);p(0,t=y.get("fw")||"pt")}),[t]}class gl extends nl{constructor(a){super(),al(this,a,Jl,bl,sl,{})}}export{gl as component}; | |
Xet Storage Details
- Size:
- 23.7 kB
- Xet hash:
- 5b6a21f6835caa28dd6d2bf53b88bd72a6bf1eb4753495f5fb2cc40dca27b674
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.