Buckets:
| import{s as Tt,o as ht,n as Ut}from"../chunks/scheduler.37c15a92.js";import{S as ft,i as Ct,g as m,s,r as i,A as It,h as M,f as t,c as n,j as bt,u as o,x as u,k as Jt,y as Zt,a,v as r,d as p,t as c,w as d,m as Bt,n as gt}from"../chunks/index.2bf4358c.js";import{T as wt}from"../chunks/Tip.363c041f.js";import{Y as jt}from"../chunks/Youtube.1e50a667.js";import{C as y}from"../chunks/CodeBlock.4e987730.js";import{D as $t}from"../chunks/DocNotebookDropdown.efc1fb7c.js";import{H as Ge,E as Gt}from"../chunks/getInferenceSnippets.ebf8be91.js";function vt(Be){let b,J="✏️ <strong>Prova tu!</strong> Modifica il ciclo di addestramento precedente per affinare il modello sul dataset SST-2.";return{c(){b=m("p"),b.innerHTML=J},l(w){b=M(w,"P",{"data-svelte-h":!0}),u(b)!=="svelte-1tob4ng"&&(b.innerHTML=J)},m(w,ge){a(w,b,ge)},p:Ut,d(w){w&&t(b)}}}function kt(Be){let b;return{c(){b=Bt('⚠️ Per poter beneficiare dell\'accelerazione offerta da Cloud TPUs, è raccomandabile applicare padding ad una lunghezza fissa tramite gli argomenti `padding="max_length"` e `max_length` del tokenizer.')},l(J){b=gt(J,'⚠️ Per poter beneficiare dell\'accelerazione offerta da Cloud TPUs, è raccomandabile applicare padding ad una lunghezza fissa tramite gli argomenti `padding="max_length"` e `max_length` del tokenizer.')},m(J,w){a(J,b,w)},d(J){J&&t(b)}}}function zt(Be){let b,J,w,ge,h,ve,U,ke,f,ze,C,Nl="Ora vedremo come ottenere gli stessi risultati della sezione precedente senza utilizzare la classe <code>Trainer</code>. Ancora una volta, aver compiuto il processing dei dati spiegato nella sezione 2 è un prerequisito. Ecco un riassunto di tutto ciò di cui avrete bisogno:",Xe,I,We,Z,Re,B,Al="Prima di cominciare a scrivere il nostro ciclo di addestramento, dobbiamo definire alcuni oggetti. Per prima cosa, i dataloaders (caricatori di dati) che useremo per iterare sulle batch. Ma prima di poter definire i dataloaders, dobbiamo applicare un po’ di postprocessing ai nostri <code>tokenized_datasets</code>, per compiere alcune operazione che <code>Trainer</code> gestiva in automatico per noi. Nello specifico dobbiamo:",_e,g,El="<li>Rimuovere le colonne corrispondente a valori che il modello non si aspetta (come ad esempio le colonne <code>sentence1</code> e <code>sentence2 </code>).</li> <li>Rinominare la colonna <code>label</code> a <code>labels</code> (perché il modello si aspetta questo nome).</li> <li>Fissare il formato dei datasets in modo che restituiscano tensori Pytorch invece di liste.</li>",Ye,$,Fl="L’oggetto <code>tokenized_datasets</code> ha un metodo per ciascuno di questi punti:",Ve,G,Ne,v,Hl="Possiamo poi controllare che il risultato ha solo solo colonne che saranno accettate dal nostro modello:",Ae,k,Ee,z,Ql="Ora che questo è fatto, possiamo finalmente definire i dataloaders in maniera semplice:",Fe,X,He,W,xl="Per controllare velocemente che non ci sono errori nel processing dei dati, possiamo ispezionare una batch in questo modo:",Qe,R,xe,_,Se,Y,Sl="È importante sottolineare che i valori di shape (forma) potrebbero essere leggermente diversi per voi, poiché abbiamo fissato <code>shuffle=True</code> (rimescolamento attivo) per i dataloader di apprendimento, e stiamo applicando padding alla lunghezza massima all’interno della batch.",qe,V,ql="Ora che il preprocessing dei dati è completato (uno scopo soddisfacente ma elusivo per qualunque praticante di ML), focalizziamoci sul modello. Lo istanziamo esattamente come avevamo fatto nella sezione precedente:",Le,N,Ke,A,Ll="Per assicurarci che tutto andrà bene durante l’addestramento, passiamo la batch al modello:",Pe,E,De,F,Oe,H,Kl="Tutti i modelli 🤗 Transformers restituiscono il valore obiettivo quando vengono fornite loro le <code>labels</code>, e anche i logits (due per ciascun input della batch, quindi un tensore di dimensioni 8 x 2).",el,Q,Pl='Siamo quasi pronti a scrivere il ciclo di addestramento! Mancano solo due cose: un ottimizzatore e un learning rate scheduler. Poiché stiamo tentando di replicare a mano ciò che viene fatto dal <code>Trainer</code>, utilizzeremo gli stessi valori di default. L’ottimizzatore utilizzato dal <code>Trainer</code> è <code>AdamW</code>, che è lo stesso di Adam ma con una variazione per quanto riguarda la regolarizzazione del decadimento dei pesi (rif. <a href="https://arxiv.org/abs/1711.05101" rel="nofollow">“Decoupled Weight Decay Regularization”</a> di Ilya Loshchilov e Frank Hutter):',ll,x,tl,S,Dl="Infine, il learning rate scheduler usato di default è solo un decadimento lineare dal valore massimo (5e-5) fino a 0. Per definirlo correttamente, dobbiamo sapere il numero di iterazioni per l’addestramento, che è dato dal numero di epoche che vogliamo eseguire moltiplicato per il numero di batch per l’addestramento (ovverosia la lunghezza del dataloader). Il <code>Trainer</code> usa 3 epoche di default, quindi:",al,q,sl,L,nl,K,il,P,Ol="Un’ultima cosa: se si ha accesso ad una GPU è consigliato usarla (su una CPU, l’addestramento potrebbe richiedere svariate ore invece di un paio di minuti). Per usare la GPU, definiamo un <code>device</code> su cui spostare il modello e le batch:",ol,D,rl,O,pl,ee,et="Siamo pronti per l’addestramento! Per avere un’intuizione di quando sarà finito, aggiungiamo una barra di progresso sul numero di iterazioni di addestramento, usando la libreria <code>tqdm</code>:",cl,le,dl,te,lt="Potete vedere che il nocciolo del ciclo di addestramento è molto simile a quello nell’introduzione. Non abbiamo chiesto nessun report, quindi il ciclo non ci informerà su come si sta comportando il modello. Dobbiamo aggiungere un ciclo di valutazione per quello.",ml,ae,Ml,se,tt="Come fatto in precedenza, utilizzeremo una metrica fornita dalla libreria 🤗 Datasets. Abbiamo già visto il metodo <code>metric.compute()</code>, ma le metriche possono automaticamente accumulare le batch nel ciclo di predizione col metodo <code>add_batch()</code>. Una volta accumulate tutte le batch, possiamo ottenere il risultato finale con <code>metric.compute()</code>. Ecco come implementare tutto ciò in un ciclo di valutazione:",ul,ne,yl,ie,bl,oe,at="Ancora una volta, i vostri risultati potrebbero essere leggermente diversi a causa della casualità nell’inizializzazione della testa del modello e del ricombinamento dei dati, ma dovrebbero essere nello stesso ordine di grandezza.",Jl,j,wl,re,jl,pe,Tl,ce,st='Il ciclo di addestramento che abbiamo definito prima funziona bene per una sola CPU o GPU. Ma grazie alla libreria <a href="https://github.com/huggingface/accelerate" rel="nofollow">🤗 Accelerate</a>, con alcuni aggiustamenti possiamo attivare l’addestramento distribuito su svariate GPU o TPU. Partendo dalla creazione dei dataloaders di addestramento e validazione, ecco l’aspetto del nostro ciclo di addestramento manuale:',hl,de,Ul,me,nt="Ecco i cambiamenti necessari:",fl,Me,Cl,ue,it="Prima di tutto bisogna inserire la linea di importazione. La seconda linea istanzia un oggetto di tipo <code>Accelerator</code> che controllerà e inizializzerà il corretto ambiente distribuito. 🤗 Accelerate gestice il posizionamento sui dispositivi per voi, quindi potete togliere le linee che spostavano il modello sul dispositivo (o, se preferite, cambiare in modo da usare <code>acceleratore.device</code> invece di <code>device</code>).",Il,ye,ot="Dopodiché la maggior parte del lavoro è fatta dalla linea che invia i dataloaders, il modello e gli ottimizzatori a <code>accelerator.prepare()</code>. Ciò serve a incapsulare queli oggetti nei contenitori appropriati per far sì che l’addestramento distribuito funzioni correttamente. I cambiamenti rimanenti sono la rimozione della linea che sposta la batch sul <code>device</code> (dispositivo) (di nuovo, se volete tenerlo potete semplicemente cambiarlo con <code>accelerator.device</code>) e lo scambio di <code>loss.backward()</code> con <code>accelerator.backward(loss)</code>.",Zl,T,Bl,be,rt="Se volete copiare e incollare il codice per giocarci, ecco un ciclo di addestramento completo che usa 🤗 Accelerate:",gl,Je,$l,we,pt="Mettere questo codice in uno script <code>train.py</code> lo renderà eseguibile su qualsiasi ambiente distribuito. Per provarlo nel vostro ambiente distribuito, eseguite:",Gl,je,vl,Te,ct="che vi chiederà di rispondere ad alcune domande e inserirà le vostre risposte in un documento di configurazione usato dal comando:",kl,he,zl,Ue,dt="che eseguirà l’addestramento distribuito.",Xl,fe,mt="Se volete provarlo in un Notebook (ad esempio, per testarlo con le TPUs su Colab), incollate il codice in una <code>training_function()</code> ed eseguite l’ultima cella con:",Wl,Ce,Rl,Ie,Mt='Potete trovare altri esempi nella <a href="https://github.com/huggingface/accelerate/tree/main/examples" rel="nofollow">🤗 Accelerate repo</a>.',_l,Ze,Yl,$e,Vl;return h=new Ge({props:{title:"Un addestramento completo",local:"un-addestramento-completo",headingTag:"h1"}}),U=new $t({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/it/chapter3/section4.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/it/chapter3/section4.ipynb"}]}}),f=new jt({props:{id:"Dh9CL8fyG80"}}),I=new y({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBEYXRhQ29sbGF0b3JXaXRoUGFkZGluZyUwQSUwQXJhd19kYXRhc2V0cyUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJnbHVlJTIyJTJDJTIwJTIybXJwYyUyMiklMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyYmVydC1iYXNlLXVuY2FzZWQlMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQSUwQWRlZiUyMHRva2VuaXplX2Z1bmN0aW9uKGV4YW1wbGUpJTNBJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwdG9rZW5pemVyKGV4YW1wbGUlNUIlMjJzZW50ZW5jZTElMjIlNUQlMkMlMjBleGFtcGxlJTVCJTIyc2VudGVuY2UyJTIyJTVEJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUpJTBBJTBBJTBBdG9rZW5pemVkX2RhdGFzZXRzJTIwJTNEJTIwcmF3X2RhdGFzZXRzLm1hcCh0b2tlbml6ZV9mdW5jdGlvbiUyQyUyMGJhdGNoZWQlM0RUcnVlKSUwQWRhdGFfY29sbGF0b3IlMjAlM0QlMjBEYXRhQ29sbGF0b3JXaXRoUGFkZGluZyh0b2tlbml6ZXIlM0R0b2tlbml6ZXIp",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, DataCollatorWithPadding | |
| raw_datasets = load_dataset(<span class="hljs-string">"glue"</span>, <span class="hljs-string">"mrpc"</span>) | |
| checkpoint = <span class="hljs-string">"bert-base-uncased"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_function</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> tokenizer(example[<span class="hljs-string">"sentence1"</span>], example[<span class="hljs-string">"sentence2"</span>], truncation=<span class="hljs-literal">True</span>) | |
| tokenized_datasets = raw_datasets.<span class="hljs-built_in">map</span>(tokenize_function, batched=<span class="hljs-literal">True</span>) | |
| data_collator = DataCollatorWithPadding(tokenizer=tokenizer)`,wrap:!1}}),Z=new Ge({props:{title:"Preparazione all’addestramento",local:"preparazione-alladdestramento",headingTag:"h3"}}),G=new y({props:{code:"dG9rZW5pemVkX2RhdGFzZXRzJTIwJTNEJTIwdG9rZW5pemVkX2RhdGFzZXRzLnJlbW92ZV9jb2x1bW5zKCU1QiUyMnNlbnRlbmNlMSUyMiUyQyUyMCUyMnNlbnRlbmNlMiUyMiUyQyUyMCUyMmlkeCUyMiU1RCklMEF0b2tlbml6ZWRfZGF0YXNldHMlMjAlM0QlMjB0b2tlbml6ZWRfZGF0YXNldHMucmVuYW1lX2NvbHVtbiglMjJsYWJlbCUyMiUyQyUyMCUyMmxhYmVscyUyMiklMEF0b2tlbml6ZWRfZGF0YXNldHMuc2V0X2Zvcm1hdCglMjJ0b3JjaCUyMiklMEF0b2tlbml6ZWRfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RC5jb2x1bW5fbmFtZXM=",highlighted:`tokenized_datasets = tokenized_datasets.remove_columns([<span class="hljs-string">"sentence1"</span>, <span class="hljs-string">"sentence2"</span>, <span class="hljs-string">"idx"</span>]) | |
| tokenized_datasets = tokenized_datasets.rename_column(<span class="hljs-string">"label"</span>, <span class="hljs-string">"labels"</span>) | |
| tokenized_datasets.set_format(<span class="hljs-string">"torch"</span>) | |
| tokenized_datasets[<span class="hljs-string">"train"</span>].column_names`,wrap:!1}}),k=new y({props:{code:"JTVCJTIyYXR0ZW50aW9uX21hc2slMjIlMkMlMjAlMjJpbnB1dF9pZHMlMjIlMkMlMjAlMjJsYWJlbHMlMjIlMkMlMjAlMjJ0b2tlbl90eXBlX2lkcyUyMiU1RA==",highlighted:'[<span class="hljs-string">"attention_mask"</span>, <span class="hljs-string">"input_ids"</span>, <span class="hljs-string">"labels"</span>, <span class="hljs-string">"token_type_ids"</span>]',wrap:!1}}),X=new y({props:{code:"ZnJvbSUyMHRvcmNoLnV0aWxzLmRhdGElMjBpbXBvcnQlMjBEYXRhTG9hZGVyJTBBJTBBdHJhaW5fZGF0YWxvYWRlciUyMCUzRCUyMERhdGFMb2FkZXIoJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVkX2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlMkMlMjBzaHVmZmxlJTNEVHJ1ZSUyQyUyMGJhdGNoX3NpemUlM0Q4JTJDJTIwY29sbGF0ZV9mbiUzRGRhdGFfY29sbGF0b3IlMEEpJTBBZXZhbF9kYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlciglMEElMjAlMjAlMjAlMjB0b2tlbml6ZWRfZGF0YXNldHMlNUIlMjJ2YWxpZGF0aW9uJTIyJTVEJTJDJTIwYmF0Y2hfc2l6ZSUzRDglMkMlMjBjb2xsYXRlX2ZuJTNEZGF0YV9jb2xsYXRvciUwQSk=",highlighted:`<span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| train_dataloader = DataLoader( | |
| tokenized_datasets[<span class="hljs-string">"train"</span>], shuffle=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">8</span>, collate_fn=data_collator | |
| ) | |
| eval_dataloader = DataLoader( | |
| tokenized_datasets[<span class="hljs-string">"validation"</span>], batch_size=<span class="hljs-number">8</span>, collate_fn=data_collator | |
| )`,wrap:!1}}),R=new y({props:{code:"Zm9yJTIwYmF0Y2glMjBpbiUyMHRyYWluX2RhdGFsb2FkZXIlM0ElMEElMjAlMjAlMjAlMjBicmVhayUwQSU3QmslM0ElMjB2LnNoYXBlJTIwZm9yJTIwayUyQyUyMHYlMjBpbiUyMGJhdGNoLml0ZW1zKCklN0Q=",highlighted:`<span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> train_dataloader: | |
| <span class="hljs-keyword">break</span> | |
| {k: v.shape <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> batch.items()}`,wrap:!1}}),_=new y({props:{code:"JTdCJ2F0dGVudGlvbl9tYXNrJyUzQSUyMHRvcmNoLlNpemUoJTVCOCUyQyUyMDY1JTVEKSUyQyUwQSUyMCdpbnB1dF9pZHMnJTNBJTIwdG9yY2guU2l6ZSglNUI4JTJDJTIwNjUlNUQpJTJDJTBBJTIwJ2xhYmVscyclM0ElMjB0b3JjaC5TaXplKCU1QjglNUQpJTJDJTBBJTIwJ3Rva2VuX3R5cGVfaWRzJyUzQSUyMHRvcmNoLlNpemUoJTVCOCUyQyUyMDY1JTVEKSU3RA==",highlighted:`{<span class="hljs-string">'attention_mask'</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">65</span>]), | |
| <span class="hljs-string">'input_ids'</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">65</span>]), | |
| <span class="hljs-string">'labels'</span>: torch.Size([<span class="hljs-number">8</span>]), | |
| <span class="hljs-string">'token_type_ids'</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">65</span>])}`,wrap:!1}}),N=new y({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQlMkMlMjBudW1fbGFiZWxzJTNEMik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSequenceClassification | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=<span class="hljs-number">2</span>)`,wrap:!1}}),E=new y({props:{code:"b3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqYmF0Y2gpJTBBcHJpbnQob3V0cHV0cy5sb3NzJTJDJTIwb3V0cHV0cy5sb2dpdHMuc2hhcGUp",highlighted:`outputs = model(**batch) | |
| <span class="hljs-built_in">print</span>(outputs.loss, outputs.logits.shape)`,wrap:!1}}),F=new y({props:{code:"dGVuc29yKDAuNTQ0MSUyQyUyMGdyYWRfZm4lM0QlM0NObGxMb3NzQmFja3dhcmQlM0UpJTIwdG9yY2guU2l6ZSglNUI4JTJDJTIwMiU1RCk=",highlighted:'tensor(<span class="hljs-number">0.5441</span>, grad_fn=<NllLossBackward>) torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">2</span>])',wrap:!1}}),x=new y({props:{code:"ZnJvbSUyMHRvcmNoLm9wdGltJTIwaW1wb3J0JTIwQWRhbVclMEElMEFvcHRpbWl6ZXIlMjAlM0QlMjBBZGFtVyhtb2RlbC5wYXJhbWV0ZXJzKCklMkMlMjBsciUzRDVlLTUp",highlighted:`<span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW | |
| optimizer = AdamW(model.parameters(), lr=<span class="hljs-number">5e-5</span>)`,wrap:!1}}),q=new y({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMGdldF9zY2hlZHVsZXIlMEElMEFudW1fZXBvY2hzJTIwJTNEJTIwMyUwQW51bV90cmFpbmluZ19zdGVwcyUyMCUzRCUyMG51bV9lcG9jaHMlMjAqJTIwbGVuKHRyYWluX2RhdGFsb2FkZXIpJTBBbHJfc2NoZWR1bGVyJTIwJTNEJTIwZ2V0X3NjaGVkdWxlciglMEElMjAlMjAlMjAlMjAlMjJsaW5lYXIlMjIlMkMlMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIlM0RvcHRpbWl6ZXIlMkMlMEElMjAlMjAlMjAlMjBudW1fd2FybXVwX3N0ZXBzJTNEMCUyQyUwQSUyMCUyMCUyMCUyMG51bV90cmFpbmluZ19zdGVwcyUzRG51bV90cmFpbmluZ19zdGVwcyUyQyUwQSklMEFwcmludChudW1fdHJhaW5pbmdfc3RlcHMp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> get_scheduler | |
| num_epochs = <span class="hljs-number">3</span> | |
| num_training_steps = num_epochs * <span class="hljs-built_in">len</span>(train_dataloader) | |
| lr_scheduler = get_scheduler( | |
| <span class="hljs-string">"linear"</span>, | |
| optimizer=optimizer, | |
| num_warmup_steps=<span class="hljs-number">0</span>, | |
| num_training_steps=num_training_steps, | |
| ) | |
| <span class="hljs-built_in">print</span>(num_training_steps)`,wrap:!1}}),L=new y({props:{code:"MTM3Nw==",highlighted:'<span class="hljs-number">1377</span>',wrap:!1}}),K=new Ge({props:{title:"Il ciclo di addestramento",local:"il-ciclo-di-addestramento",headingTag:"h3"}}),D=new y({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEFkZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiklMjBpZiUyMHRvcmNoLmN1ZGEuaXNfYXZhaWxhYmxlKCklMjBlbHNlJTIwdG9yY2guZGV2aWNlKCUyMmNwdSUyMiklMEFtb2RlbC50byhkZXZpY2UpJTBBZGV2aWNl",highlighted:`<span class="hljs-keyword">import</span> torch | |
| device = torch.device(<span class="hljs-string">"cuda"</span>) <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> torch.device(<span class="hljs-string">"cpu"</span>) | |
| model.to(device) | |
| device`,wrap:!1}}),O=new y({props:{code:"ZGV2aWNlKHR5cGUlM0QnY3VkYScp",highlighted:'device(<span class="hljs-built_in">type</span>=<span class="hljs-string">'cuda'</span>)',wrap:!1}}),le=new y({props:{code:"ZnJvbSUyMHRxZG0uYXV0byUyMGltcG9ydCUyMHRxZG0lMEElMEFwcm9ncmVzc19iYXIlMjAlM0QlMjB0cWRtKHJhbmdlKG51bV90cmFpbmluZ19zdGVwcykpJTBBJTBBbW9kZWwudHJhaW4oKSUwQWZvciUyMGVwb2NoJTIwaW4lMjByYW5nZShudW1fZXBvY2hzKSUzQSUwQSUyMCUyMCUyMCUyMGZvciUyMGJhdGNoJTIwaW4lMjB0cmFpbl9kYXRhbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYmF0Y2glMjAlM0QlMjAlN0JrJTNBJTIwdi50byhkZXZpY2UpJTIwZm9yJTIwayUyQyUyMHYlMjBpbiUyMGJhdGNoLml0ZW1zKCklN0QlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKipiYXRjaCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwb3V0cHV0cy5sb3NzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zcy5iYWNrd2FyZCgpJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGxyX3NjaGVkdWxlci5zdGVwKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9ncmVzc19iYXIudXBkYXRlKDEp",highlighted:`<span class="hljs-keyword">from</span> tqdm.auto <span class="hljs-keyword">import</span> tqdm | |
| progress_bar = tqdm(<span class="hljs-built_in">range</span>(num_training_steps)) | |
| model.train() | |
| <span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_epochs): | |
| <span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> train_dataloader: | |
| batch = {k: v.to(device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> batch.items()} | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| loss.backward() | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| progress_bar.update(<span class="hljs-number">1</span>)`,wrap:!1}}),ae=new Ge({props:{title:"Il ciclo di valutazione",local:"il-ciclo-di-valutazione",headingTag:"h3"}}),ne=new y({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9tZXRyaWMlMEElMEFtZXRyaWMlMjAlM0QlMjBsb2FkX21ldHJpYyglMjJnbHVlJTIyJTJDJTIwJTIybXJwYyUyMiklMEFtb2RlbC5ldmFsKCklMEFmb3IlMjBiYXRjaCUyMGluJTIwZXZhbF9kYXRhbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwYmF0Y2glMjAlM0QlMjAlN0JrJTNBJTIwdi50byhkZXZpY2UpJTIwZm9yJTIwayUyQyUyMHYlMjBpbiUyMGJhdGNoLml0ZW1zKCklN0QlMEElMjAlMjAlMjAlMjB3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqYmF0Y2gpJTBBJTBBJTIwJTIwJTIwJTIwbG9naXRzJTIwJTNEJTIwb3V0cHV0cy5sb2dpdHMlMEElMjAlMjAlMjAlMjBwcmVkaWN0aW9ucyUyMCUzRCUyMHRvcmNoLmFyZ21heChsb2dpdHMlMkMlMjBkaW0lM0QtMSklMEElMjAlMjAlMjAlMjBtZXRyaWMuYWRkX2JhdGNoKHByZWRpY3Rpb25zJTNEcHJlZGljdGlvbnMlMkMlMjByZWZlcmVuY2VzJTNEYmF0Y2glNUIlMjJsYWJlbHMlMjIlNUQpJTBBJTBBbWV0cmljLmNvbXB1dGUoKQ==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_metric | |
| metric = load_metric(<span class="hljs-string">"glue"</span>, <span class="hljs-string">"mrpc"</span>) | |
| model.<span class="hljs-built_in">eval</span>() | |
| <span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> eval_dataloader: | |
| batch = {k: v.to(device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> batch.items()} | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| outputs = model(**batch) | |
| logits = outputs.logits | |
| predictions = torch.argmax(logits, dim=-<span class="hljs-number">1</span>) | |
| metric.add_batch(predictions=predictions, references=batch[<span class="hljs-string">"labels"</span>]) | |
| metric.compute()`,wrap:!1}}),ie=new y({props:{code:"JTdCJ2FjY3VyYWN5JyUzQSUyMDAuODQzMTM3MjU0OTAxOTYwOCUyQyUyMCdmMSclM0ElMjAwLjg5MDc4NDk4MjkzNTE1MzUlN0Q=",highlighted:'{<span class="hljs-string">'accuracy'</span>: <span class="hljs-number">0.8431372549019608</span>, <span class="hljs-string">'f1'</span>: <span class="hljs-number">0.8907849829351535</span>}',wrap:!1}}),j=new wt({props:{$$slots:{default:[vt]},$$scope:{ctx:Be}}}),re=new Ge({props:{title:"Potenzia il tuo ciclo di addestramento con 🤗 Accelerate",local:"potenzia-il-tuo-ciclo-di-addestramento-con--accelerate",headingTag:"h3"}}),pe=new jt({props:{id:"s7dy8QRgjJ0"}}),de=new y({props:{code:"ZnJvbSUyMHRvcmNoLm9wdGltJTIwaW1wb3J0JTIwQWRhbVclMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUyQyUyMGdldF9zY2hlZHVsZXIlMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQlMkMlMjBudW1fbGFiZWxzJTNEMiklMEFvcHRpbWl6ZXIlMjAlM0QlMjBBZGFtVyhtb2RlbC5wYXJhbWV0ZXJzKCklMkMlMjBsciUzRDNlLTUpJTBBJTBBZGV2aWNlJTIwJTNEJTIwdG9yY2guZGV2aWNlKCUyMmN1ZGElMjIpJTIwaWYlMjB0b3JjaC5jdWRhLmlzX2F2YWlsYWJsZSgpJTIwZWxzZSUyMHRvcmNoLmRldmljZSglMjJjcHUlMjIpJTBBbW9kZWwudG8oZGV2aWNlKSUwQSUwQW51bV9lcG9jaHMlMjAlM0QlMjAzJTBBbnVtX3RyYWluaW5nX3N0ZXBzJTIwJTNEJTIwbnVtX2Vwb2NocyUyMColMjBsZW4odHJhaW5fZGF0YWxvYWRlciklMEFscl9zY2hlZHVsZXIlMjAlM0QlMjBnZXRfc2NoZWR1bGVyKCUwQSUyMCUyMCUyMCUyMCUyMmxpbmVhciUyMiUyQyUwQSUyMCUyMCUyMCUyMG9wdGltaXplciUzRG9wdGltaXplciUyQyUwQSUyMCUyMCUyMCUyMG51bV93YXJtdXBfc3RlcHMlM0QwJTJDJTBBJTIwJTIwJTIwJTIwbnVtX3RyYWluaW5nX3N0ZXBzJTNEbnVtX3RyYWluaW5nX3N0ZXBzJTJDJTBBKSUwQSUwQXByb2dyZXNzX2JhciUyMCUzRCUyMHRxZG0ocmFuZ2UobnVtX3RyYWluaW5nX3N0ZXBzKSklMEElMEFtb2RlbC50cmFpbigpJTBBZm9yJTIwZXBvY2glMjBpbiUyMHJhbmdlKG51bV9lcG9jaHMpJTNBJTBBJTIwJTIwJTIwJTIwZm9yJTIwYmF0Y2glMjBpbiUyMHRyYWluX2RhdGFsb2FkZXIlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBiYXRjaCUyMCUzRCUyMCU3QmslM0ElMjB2LnRvKGRldmljZSklMjBmb3IlMjBrJTJDJTIwdiUyMGluJTIwYmF0Y2guaXRlbXMoKSU3RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmJhdGNoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBvdXRwdXRzLmxvc3MlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzLmJhY2t3YXJkKCklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbHJfc2NoZWR1bGVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByb2dyZXNzX2Jhci51cGRhdGUoMSk=",highlighted:`<span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSequenceClassification, get_scheduler | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=<span class="hljs-number">2</span>) | |
| optimizer = AdamW(model.parameters(), lr=<span class="hljs-number">3e-5</span>) | |
| device = torch.device(<span class="hljs-string">"cuda"</span>) <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> torch.device(<span class="hljs-string">"cpu"</span>) | |
| model.to(device) | |
| num_epochs = <span class="hljs-number">3</span> | |
| num_training_steps = num_epochs * <span class="hljs-built_in">len</span>(train_dataloader) | |
| lr_scheduler = get_scheduler( | |
| <span class="hljs-string">"linear"</span>, | |
| optimizer=optimizer, | |
| num_warmup_steps=<span class="hljs-number">0</span>, | |
| num_training_steps=num_training_steps, | |
| ) | |
| progress_bar = tqdm(<span class="hljs-built_in">range</span>(num_training_steps)) | |
| model.train() | |
| <span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_epochs): | |
| <span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> train_dataloader: | |
| batch = {k: v.to(device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> batch.items()} | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| loss.backward() | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| progress_bar.update(<span class="hljs-number">1</span>)`,wrap:!1}}),Me=new y({props:{code:"JTJCJTIwZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUyMCUyMGZyb20lMjB0b3JjaC5vcHRpbSUyMGltcG9ydCUyMEFkYW1XJTBBJTIwJTIwZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMkMlMjBnZXRfc2NoZWR1bGVyJTBBJTBBJTJCJTIwYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBJTBBJTIwJTIwbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50JTJDJTIwbnVtX2xhYmVscyUzRDIpJTBBJTIwJTIwb3B0aW1pemVyJTIwJTNEJTIwQWRhbVcobW9kZWwucGFyYW1ldGVycygpJTJDJTIwbHIlM0QzZS01KSUwQSUwQS0lMjBkZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiklMjBpZiUyMHRvcmNoLmN1ZGEuaXNfYXZhaWxhYmxlKCklMjBlbHNlJTIwdG9yY2guZGV2aWNlKCUyMmNwdSUyMiklMEEtJTIwbW9kZWwudG8oZGV2aWNlKSUwQSUwQSUyQiUyMHRyYWluX2RhdGFsb2FkZXIlMkMlMjBldmFsX2RhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoJTBBJTJCJTIwJTIwJTIwJTIwJTIwdHJhaW5fZGF0YWxvYWRlciUyQyUyMGV2YWxfZGF0YWxvYWRlciUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyJTBBJTJCJTIwKSUwQSUwQSUyMCUyMG51bV9lcG9jaHMlMjAlM0QlMjAzJTBBJTIwJTIwbnVtX3RyYWluaW5nX3N0ZXBzJTIwJTNEJTIwbnVtX2Vwb2NocyUyMColMjBsZW4odHJhaW5fZGF0YWxvYWRlciklMEElMjAlMjBscl9zY2hlZHVsZXIlMjAlM0QlMjBnZXRfc2NoZWR1bGVyKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMmxpbmVhciUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMG9wdGltaXplciUzRG9wdGltaXplciUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMG51bV93YXJtdXBfc3RlcHMlM0QwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwbnVtX3RyYWluaW5nX3N0ZXBzJTNEbnVtX3RyYWluaW5nX3N0ZXBzJTBBJTIwJTIwKSUwQSUwQSUyMCUyMHByb2dyZXNzX2JhciUyMCUzRCUyMHRxZG0ocmFuZ2UobnVtX3RyYWluaW5nX3N0ZXBzKSklMEElMEElMjAlMjBtb2RlbC50cmFpbigpJTBBJTIwJTIwZm9yJTIwZXBvY2glMjBpbiUyMHJhbmdlKG51bV9lcG9jaHMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwYmF0Y2glMjBpbiUyMHRyYWluX2RhdGFsb2FkZXIlM0ElMEEtJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYmF0Y2glMjAlM0QlMjAlN0JrJTNBJTIwdi50byhkZXZpY2UpJTIwZm9yJTIwayUyQyUyMHYlMjBpbiUyMGJhdGNoLml0ZW1zKCklN0QlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKipiYXRjaCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwb3V0cHV0cy5sb3NzJTBBLSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGxvc3MuYmFja3dhcmQoKSUwQSUyQiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGxyX3NjaGVkdWxlci5zdGVwKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9ncmVzc19iYXIudXBkYXRlKDEp",highlighted:`<span class="hljs-addition">+ from accelerate import Accelerator</span> | |
| from torch.optim import AdamW | |
| from transformers import AutoModelForSequenceClassification, get_scheduler | |
| <span class="hljs-addition">+ accelerator = Accelerator()</span> | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2) | |
| optimizer = AdamW(model.parameters(), lr=3e-5) | |
| <span class="hljs-deletion">- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")</span> | |
| <span class="hljs-deletion">- model.to(device)</span> | |
| <span class="hljs-addition">+ train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(</span> | |
| <span class="hljs-addition">+ train_dataloader, eval_dataloader, model, optimizer</span> | |
| <span class="hljs-addition">+ )</span> | |
| num_epochs = 3 | |
| num_training_steps = num_epochs * len(train_dataloader) | |
| lr_scheduler = get_scheduler( | |
| "linear", | |
| optimizer=optimizer, | |
| num_warmup_steps=0, | |
| num_training_steps=num_training_steps | |
| ) | |
| progress_bar = tqdm(range(num_training_steps)) | |
| model.train() | |
| for epoch in range(num_epochs): | |
| for batch in train_dataloader: | |
| <span class="hljs-deletion">- batch = {k: v.to(device) for k, v in batch.items()}</span> | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| <span class="hljs-deletion">- loss.backward()</span> | |
| <span class="hljs-addition">+ accelerator.backward(loss)</span> | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| progress_bar.update(1)`,wrap:!1}}),T=new wt({props:{$$slots:{default:[kt]},$$scope:{ctx:Be}}}),Je=new y({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQWZyb20lMjB0b3JjaC5vcHRpbSUyMGltcG9ydCUyMEFkYW1XJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMkMlMjBnZXRfc2NoZWR1bGVyJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50JTJDJTIwbnVtX2xhYmVscyUzRDIpJTBBb3B0aW1pemVyJTIwJTNEJTIwQWRhbVcobW9kZWwucGFyYW1ldGVycygpJTJDJTIwbHIlM0QzZS01KSUwQSUwQXRyYWluX2RsJTJDJTIwZXZhbF9kbCUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZSglMEElMjAlMjAlMjAlMjB0cmFpbl9kYXRhbG9hZGVyJTJDJTIwZXZhbF9kYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMEEpJTBBJTBBbnVtX2Vwb2NocyUyMCUzRCUyMDMlMEFudW1fdHJhaW5pbmdfc3RlcHMlMjAlM0QlMjBudW1fZXBvY2hzJTIwKiUyMGxlbih0cmFpbl9kbCklMEFscl9zY2hlZHVsZXIlMjAlM0QlMjBnZXRfc2NoZWR1bGVyKCUwQSUyMCUyMCUyMCUyMCUyMmxpbmVhciUyMiUyQyUwQSUyMCUyMCUyMCUyMG9wdGltaXplciUzRG9wdGltaXplciUyQyUwQSUyMCUyMCUyMCUyMG51bV93YXJtdXBfc3RlcHMlM0QwJTJDJTBBJTIwJTIwJTIwJTIwbnVtX3RyYWluaW5nX3N0ZXBzJTNEbnVtX3RyYWluaW5nX3N0ZXBzJTJDJTBBKSUwQSUwQXByb2dyZXNzX2JhciUyMCUzRCUyMHRxZG0ocmFuZ2UobnVtX3RyYWluaW5nX3N0ZXBzKSklMEElMEFtb2RlbC50cmFpbigpJTBBZm9yJTIwZXBvY2glMjBpbiUyMHJhbmdlKG51bV9lcG9jaHMpJTNBJTBBJTIwJTIwJTIwJTIwZm9yJTIwYmF0Y2glMjBpbiUyMHRyYWluX2RsJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqYmF0Y2gpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zcyUyMCUzRCUyMG91dHB1dHMubG9zcyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGxyX3NjaGVkdWxlci5zdGVwKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwcm9ncmVzc19iYXIudXBkYXRlKDEp",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator | |
| <span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSequenceClassification, get_scheduler | |
| accelerator = Accelerator() | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=<span class="hljs-number">2</span>) | |
| optimizer = AdamW(model.parameters(), lr=<span class="hljs-number">3e-5</span>) | |
| train_dl, eval_dl, model, optimizer = accelerator.prepare( | |
| train_dataloader, eval_dataloader, model, optimizer | |
| ) | |
| num_epochs = <span class="hljs-number">3</span> | |
| num_training_steps = num_epochs * <span class="hljs-built_in">len</span>(train_dl) | |
| lr_scheduler = get_scheduler( | |
| <span class="hljs-string">"linear"</span>, | |
| optimizer=optimizer, | |
| num_warmup_steps=<span class="hljs-number">0</span>, | |
| num_training_steps=num_training_steps, | |
| ) | |
| progress_bar = tqdm(<span class="hljs-built_in">range</span>(num_training_steps)) | |
| model.train() | |
| <span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_epochs): | |
| <span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> train_dl: | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| accelerator.backward(loss) | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| progress_bar.update(<span class="hljs-number">1</span>)`,wrap:!1}}),je=new y({props:{code:"YWNjZWxlcmF0ZSUyMGNvbmZpZw==",highlighted:"accelerate config",wrap:!1}}),he=new y({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHRyYWluLnB5",highlighted:'accelerate <span class="hljs-built_in">launch</span> train.py',wrap:!1}}),Ce=new y({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBub3RlYm9va19sYXVuY2hlciUwQSUwQW5vdGVib29rX2xhdW5jaGVyKHRyYWluaW5nX2Z1bmN0aW9uKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> notebook_launcher | |
| notebook_launcher(training_function)`,wrap:!1}}),Ze=new Gt({props:{source:"https://github.com/huggingface/course/blob/main/chapters/it/chapter3/4.mdx"}}),{c(){b=m("meta"),J=s(),w=m("p"),ge=s(),i(h.$$.fragment),ve=s(),i(U.$$.fragment),ke=s(),i(f.$$.fragment),ze=s(),C=m("p"),C.innerHTML=Nl,Xe=s(),i(I.$$.fragment),We=s(),i(Z.$$.fragment),Re=s(),B=m("p"),B.innerHTML=Al,_e=s(),g=m("ul"),g.innerHTML=El,Ye=s(),$=m("p"),$.innerHTML=Fl,Ve=s(),i(G.$$.fragment),Ne=s(),v=m("p"),v.textContent=Hl,Ae=s(),i(k.$$.fragment),Ee=s(),z=m("p"),z.textContent=Ql,Fe=s(),i(X.$$.fragment),He=s(),W=m("p"),W.textContent=xl,Qe=s(),i(R.$$.fragment),xe=s(),i(_.$$.fragment),Se=s(),Y=m("p"),Y.innerHTML=Sl,qe=s(),V=m("p"),V.textContent=ql,Le=s(),i(N.$$.fragment),Ke=s(),A=m("p"),A.textContent=Ll,Pe=s(),i(E.$$.fragment),De=s(),i(F.$$.fragment),Oe=s(),H=m("p"),H.innerHTML=Kl,el=s(),Q=m("p"),Q.innerHTML=Pl,ll=s(),i(x.$$.fragment),tl=s(),S=m("p"),S.innerHTML=Dl,al=s(),i(q.$$.fragment),sl=s(),i(L.$$.fragment),nl=s(),i(K.$$.fragment),il=s(),P=m("p"),P.innerHTML=Ol,ol=s(),i(D.$$.fragment),rl=s(),i(O.$$.fragment),pl=s(),ee=m("p"),ee.innerHTML=et,cl=s(),i(le.$$.fragment),dl=s(),te=m("p"),te.textContent=lt,ml=s(),i(ae.$$.fragment),Ml=s(),se=m("p"),se.innerHTML=tt,ul=s(),i(ne.$$.fragment),yl=s(),i(ie.$$.fragment),bl=s(),oe=m("p"),oe.textContent=at,Jl=s(),i(j.$$.fragment),wl=s(),i(re.$$.fragment),jl=s(),i(pe.$$.fragment),Tl=s(),ce=m("p"),ce.innerHTML=st,hl=s(),i(de.$$.fragment),Ul=s(),me=m("p"),me.textContent=nt,fl=s(),i(Me.$$.fragment),Cl=s(),ue=m("p"),ue.innerHTML=it,Il=s(),ye=m("p"),ye.innerHTML=ot,Zl=s(),i(T.$$.fragment),Bl=s(),be=m("p"),be.textContent=rt,gl=s(),i(Je.$$.fragment),$l=s(),we=m("p"),we.innerHTML=pt,Gl=s(),i(je.$$.fragment),vl=s(),Te=m("p"),Te.textContent=ct,kl=s(),i(he.$$.fragment),zl=s(),Ue=m("p"),Ue.textContent=dt,Xl=s(),fe=m("p"),fe.innerHTML=mt,Wl=s(),i(Ce.$$.fragment),Rl=s(),Ie=m("p"),Ie.innerHTML=Mt,_l=s(),i(Ze.$$.fragment),Yl=s(),$e=m("p"),this.h()},l(e){const l=It("svelte-u9bgzb",document.head);b=M(l,"META",{name:!0,content:!0}),l.forEach(t),J=n(e),w=M(e,"P",{}),bt(w).forEach(t),ge=n(e),o(h.$$.fragment,e),ve=n(e),o(U.$$.fragment,e),ke=n(e),o(f.$$.fragment,e),ze=n(e),C=M(e,"P",{"data-svelte-h":!0}),u(C)!=="svelte-1us6pmo"&&(C.innerHTML=Nl),Xe=n(e),o(I.$$.fragment,e),We=n(e),o(Z.$$.fragment,e),Re=n(e),B=M(e,"P",{"data-svelte-h":!0}),u(B)!=="svelte-1r5rhw5"&&(B.innerHTML=Al),_e=n(e),g=M(e,"UL",{"data-svelte-h":!0}),u(g)!=="svelte-1soygtz"&&(g.innerHTML=El),Ye=n(e),$=M(e,"P",{"data-svelte-h":!0}),u($)!=="svelte-109i7es"&&($.innerHTML=Fl),Ve=n(e),o(G.$$.fragment,e),Ne=n(e),v=M(e,"P",{"data-svelte-h":!0}),u(v)!=="svelte-ouxzs3"&&(v.textContent=Hl),Ae=n(e),o(k.$$.fragment,e),Ee=n(e),z=M(e,"P",{"data-svelte-h":!0}),u(z)!=="svelte-9dsd8e"&&(z.textContent=Ql),Fe=n(e),o(X.$$.fragment,e),He=n(e),W=M(e,"P",{"data-svelte-h":!0}),u(W)!=="svelte-10x8m39"&&(W.textContent=xl),Qe=n(e),o(R.$$.fragment,e),xe=n(e),o(_.$$.fragment,e),Se=n(e),Y=M(e,"P",{"data-svelte-h":!0}),u(Y)!=="svelte-1vy1r30"&&(Y.innerHTML=Sl),qe=n(e),V=M(e,"P",{"data-svelte-h":!0}),u(V)!=="svelte-tvclea"&&(V.textContent=ql),Le=n(e),o(N.$$.fragment,e),Ke=n(e),A=M(e,"P",{"data-svelte-h":!0}),u(A)!=="svelte-17r0l89"&&(A.textContent=Ll),Pe=n(e),o(E.$$.fragment,e),De=n(e),o(F.$$.fragment,e),Oe=n(e),H=M(e,"P",{"data-svelte-h":!0}),u(H)!=="svelte-14z3wqu"&&(H.innerHTML=Kl),el=n(e),Q=M(e,"P",{"data-svelte-h":!0}),u(Q)!=="svelte-1tiesbf"&&(Q.innerHTML=Pl),ll=n(e),o(x.$$.fragment,e),tl=n(e),S=M(e,"P",{"data-svelte-h":!0}),u(S)!=="svelte-agqccw"&&(S.innerHTML=Dl),al=n(e),o(q.$$.fragment,e),sl=n(e),o(L.$$.fragment,e),nl=n(e),o(K.$$.fragment,e),il=n(e),P=M(e,"P",{"data-svelte-h":!0}),u(P)!=="svelte-1wb397t"&&(P.innerHTML=Ol),ol=n(e),o(D.$$.fragment,e),rl=n(e),o(O.$$.fragment,e),pl=n(e),ee=M(e,"P",{"data-svelte-h":!0}),u(ee)!=="svelte-1xyrr8b"&&(ee.innerHTML=et),cl=n(e),o(le.$$.fragment,e),dl=n(e),te=M(e,"P",{"data-svelte-h":!0}),u(te)!=="svelte-k27sq5"&&(te.textContent=lt),ml=n(e),o(ae.$$.fragment,e),Ml=n(e),se=M(e,"P",{"data-svelte-h":!0}),u(se)!=="svelte-vqhpoz"&&(se.innerHTML=tt),ul=n(e),o(ne.$$.fragment,e),yl=n(e),o(ie.$$.fragment,e),bl=n(e),oe=M(e,"P",{"data-svelte-h":!0}),u(oe)!=="svelte-rz5cet"&&(oe.textContent=at),Jl=n(e),o(j.$$.fragment,e),wl=n(e),o(re.$$.fragment,e),jl=n(e),o(pe.$$.fragment,e),Tl=n(e),ce=M(e,"P",{"data-svelte-h":!0}),u(ce)!=="svelte-mgnuhl"&&(ce.innerHTML=st),hl=n(e),o(de.$$.fragment,e),Ul=n(e),me=M(e,"P",{"data-svelte-h":!0}),u(me)!=="svelte-hyicis"&&(me.textContent=nt),fl=n(e),o(Me.$$.fragment,e),Cl=n(e),ue=M(e,"P",{"data-svelte-h":!0}),u(ue)!=="svelte-c6nglj"&&(ue.innerHTML=it),Il=n(e),ye=M(e,"P",{"data-svelte-h":!0}),u(ye)!=="svelte-curkr2"&&(ye.innerHTML=ot),Zl=n(e),o(T.$$.fragment,e),Bl=n(e),be=M(e,"P",{"data-svelte-h":!0}),u(be)!=="svelte-81d5ez"&&(be.textContent=rt),gl=n(e),o(Je.$$.fragment,e),$l=n(e),we=M(e,"P",{"data-svelte-h":!0}),u(we)!=="svelte-1s4avns"&&(we.innerHTML=pt),Gl=n(e),o(je.$$.fragment,e),vl=n(e),Te=M(e,"P",{"data-svelte-h":!0}),u(Te)!=="svelte-5f8y46"&&(Te.textContent=ct),kl=n(e),o(he.$$.fragment,e),zl=n(e),Ue=M(e,"P",{"data-svelte-h":!0}),u(Ue)!=="svelte-h9xzji"&&(Ue.textContent=dt),Xl=n(e),fe=M(e,"P",{"data-svelte-h":!0}),u(fe)!=="svelte-142b7g5"&&(fe.innerHTML=mt),Wl=n(e),o(Ce.$$.fragment,e),Rl=n(e),Ie=M(e,"P",{"data-svelte-h":!0}),u(Ie)!=="svelte-zmmr5l"&&(Ie.innerHTML=Mt),_l=n(e),o(Ze.$$.fragment,e),Yl=n(e),$e=M(e,"P",{}),bt($e).forEach(t),this.h()},h(){Jt(b,"name","hf:doc:metadata"),Jt(b,"content",Xt)},m(e,l){Zt(document.head,b),a(e,J,l),a(e,w,l),a(e,ge,l),r(h,e,l),a(e,ve,l),r(U,e,l),a(e,ke,l),r(f,e,l),a(e,ze,l),a(e,C,l),a(e,Xe,l),r(I,e,l),a(e,We,l),r(Z,e,l),a(e,Re,l),a(e,B,l),a(e,_e,l),a(e,g,l),a(e,Ye,l),a(e,$,l),a(e,Ve,l),r(G,e,l),a(e,Ne,l),a(e,v,l),a(e,Ae,l),r(k,e,l),a(e,Ee,l),a(e,z,l),a(e,Fe,l),r(X,e,l),a(e,He,l),a(e,W,l),a(e,Qe,l),r(R,e,l),a(e,xe,l),r(_,e,l),a(e,Se,l),a(e,Y,l),a(e,qe,l),a(e,V,l),a(e,Le,l),r(N,e,l),a(e,Ke,l),a(e,A,l),a(e,Pe,l),r(E,e,l),a(e,De,l),r(F,e,l),a(e,Oe,l),a(e,H,l),a(e,el,l),a(e,Q,l),a(e,ll,l),r(x,e,l),a(e,tl,l),a(e,S,l),a(e,al,l),r(q,e,l),a(e,sl,l),r(L,e,l),a(e,nl,l),r(K,e,l),a(e,il,l),a(e,P,l),a(e,ol,l),r(D,e,l),a(e,rl,l),r(O,e,l),a(e,pl,l),a(e,ee,l),a(e,cl,l),r(le,e,l),a(e,dl,l),a(e,te,l),a(e,ml,l),r(ae,e,l),a(e,Ml,l),a(e,se,l),a(e,ul,l),r(ne,e,l),a(e,yl,l),r(ie,e,l),a(e,bl,l),a(e,oe,l),a(e,Jl,l),r(j,e,l),a(e,wl,l),r(re,e,l),a(e,jl,l),r(pe,e,l),a(e,Tl,l),a(e,ce,l),a(e,hl,l),r(de,e,l),a(e,Ul,l),a(e,me,l),a(e,fl,l),r(Me,e,l),a(e,Cl,l),a(e,ue,l),a(e,Il,l),a(e,ye,l),a(e,Zl,l),r(T,e,l),a(e,Bl,l),a(e,be,l),a(e,gl,l),r(Je,e,l),a(e,$l,l),a(e,we,l),a(e,Gl,l),r(je,e,l),a(e,vl,l),a(e,Te,l),a(e,kl,l),r(he,e,l),a(e,zl,l),a(e,Ue,l),a(e,Xl,l),a(e,fe,l),a(e,Wl,l),r(Ce,e,l),a(e,Rl,l),a(e,Ie,l),a(e,_l,l),r(Ze,e,l),a(e,Yl,l),a(e,$e,l),Vl=!0},p(e,[l]){const ut={};l&2&&(ut.$$scope={dirty:l,ctx:e}),j.$set(ut);const yt={};l&2&&(yt.$$scope={dirty:l,ctx:e}),T.$set(yt)},i(e){Vl||(p(h.$$.fragment,e),p(U.$$.fragment,e),p(f.$$.fragment,e),p(I.$$.fragment,e),p(Z.$$.fragment,e),p(G.$$.fragment,e),p(k.$$.fragment,e),p(X.$$.fragment,e),p(R.$$.fragment,e),p(_.$$.fragment,e),p(N.$$.fragment,e),p(E.$$.fragment,e),p(F.$$.fragment,e),p(x.$$.fragment,e),p(q.$$.fragment,e),p(L.$$.fragment,e),p(K.$$.fragment,e),p(D.$$.fragment,e),p(O.$$.fragment,e),p(le.$$.fragment,e),p(ae.$$.fragment,e),p(ne.$$.fragment,e),p(ie.$$.fragment,e),p(j.$$.fragment,e),p(re.$$.fragment,e),p(pe.$$.fragment,e),p(de.$$.fragment,e),p(Me.$$.fragment,e),p(T.$$.fragment,e),p(Je.$$.fragment,e),p(je.$$.fragment,e),p(he.$$.fragment,e),p(Ce.$$.fragment,e),p(Ze.$$.fragment,e),Vl=!0)},o(e){c(h.$$.fragment,e),c(U.$$.fragment,e),c(f.$$.fragment,e),c(I.$$.fragment,e),c(Z.$$.fragment,e),c(G.$$.fragment,e),c(k.$$.fragment,e),c(X.$$.fragment,e),c(R.$$.fragment,e),c(_.$$.fragment,e),c(N.$$.fragment,e),c(E.$$.fragment,e),c(F.$$.fragment,e),c(x.$$.fragment,e),c(q.$$.fragment,e),c(L.$$.fragment,e),c(K.$$.fragment,e),c(D.$$.fragment,e),c(O.$$.fragment,e),c(le.$$.fragment,e),c(ae.$$.fragment,e),c(ne.$$.fragment,e),c(ie.$$.fragment,e),c(j.$$.fragment,e),c(re.$$.fragment,e),c(pe.$$.fragment,e),c(de.$$.fragment,e),c(Me.$$.fragment,e),c(T.$$.fragment,e),c(Je.$$.fragment,e),c(je.$$.fragment,e),c(he.$$.fragment,e),c(Ce.$$.fragment,e),c(Ze.$$.fragment,e),Vl=!1},d(e){e&&(t(J),t(w),t(ge),t(ve),t(ke),t(ze),t(C),t(Xe),t(We),t(Re),t(B),t(_e),t(g),t(Ye),t($),t(Ve),t(Ne),t(v),t(Ae),t(Ee),t(z),t(Fe),t(He),t(W),t(Qe),t(xe),t(Se),t(Y),t(qe),t(V),t(Le),t(Ke),t(A),t(Pe),t(De),t(Oe),t(H),t(el),t(Q),t(ll),t(tl),t(S),t(al),t(sl),t(nl),t(il),t(P),t(ol),t(rl),t(pl),t(ee),t(cl),t(dl),t(te),t(ml),t(Ml),t(se),t(ul),t(yl),t(bl),t(oe),t(Jl),t(wl),t(jl),t(Tl),t(ce),t(hl),t(Ul),t(me),t(fl),t(Cl),t(ue),t(Il),t(ye),t(Zl),t(Bl),t(be),t(gl),t($l),t(we),t(Gl),t(vl),t(Te),t(kl),t(zl),t(Ue),t(Xl),t(fe),t(Wl),t(Rl),t(Ie),t(_l),t(Yl),t($e)),t(b),d(h,e),d(U,e),d(f,e),d(I,e),d(Z,e),d(G,e),d(k,e),d(X,e),d(R,e),d(_,e),d(N,e),d(E,e),d(F,e),d(x,e),d(q,e),d(L,e),d(K,e),d(D,e),d(O,e),d(le,e),d(ae,e),d(ne,e),d(ie,e),d(j,e),d(re,e),d(pe,e),d(de,e),d(Me,e),d(T,e),d(Je,e),d(je,e),d(he,e),d(Ce,e),d(Ze,e)}}}const Xt='{"title":"Un addestramento completo","local":"un-addestramento-completo","sections":[{"title":"Preparazione all’addestramento","local":"preparazione-alladdestramento","sections":[],"depth":3},{"title":"Il ciclo di addestramento","local":"il-ciclo-di-addestramento","sections":[],"depth":3},{"title":"Il ciclo di valutazione","local":"il-ciclo-di-valutazione","sections":[],"depth":3},{"title":"Potenzia il tuo ciclo di addestramento con 🤗 Accelerate","local":"potenzia-il-tuo-ciclo-di-addestramento-con--accelerate","sections":[],"depth":3}],"depth":1}';function Wt(Be){return ht(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ft extends ft{constructor(b){super(),Ct(this,b,Wt,zt,Tt,{})}}export{Ft as component}; | |
Xet Storage Details
- Size:
- 44.3 kB
- Xet hash:
- fb940be933564003c149f5c26a7eb5eaa0f821ce85673d0791aca6957c4f77c9
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.