Buckets:
| import{s as Ue,o as me,n as zs}from"../chunks/scheduler.37c15a92.js";import{S as je,i as Te,g as M,s as a,r as c,A as we,h as p,f as e,c as n,j as Je,u,x as r,k as oe,y as de,a as t,v as y,d as J,t as o,w as U}from"../chunks/index.2bf4358c.js";import{T as Ns}from"../chunks/Tip.363c041f.js";import{C as T}from"../chunks/CodeBlock.4e987730.js";import{C as he}from"../chunks/CourseFloatingBanner.6add7356.js";import{H as d,E as Ce}from"../chunks/getInferenceSnippets.ebf8be91.js";function Ie(h){let i,j="Acest exercițiu poate fi rulat pe un GPU T4 Google Colab gratuit. Pentru cea mai bună experiență, urmărește notebook-ul legat mai sus și încearcă-l singur.";return{c(){i=M("p"),i.textContent=j},l(m){i=p(m,"P",{"data-svelte-h":!0}),r(i)!=="svelte-1u7ab9d"&&(i.textContent=j)},m(m,w){t(m,i,w)},p:zs,d(m){m&&e(i)}}}function be(h){let i,j='Nu vom acoperi detaliile LoRA în acest capitol, dar poți învăța mai multe în <a href="/course/chapter11/3">Capitolul 11</a>.';return{c(){i=M("p"),i.innerHTML=j},l(m){i=p(m,"P",{"data-svelte-h":!0}),r(i)!=="svelte-nce7ys"&&(i.innerHTML=j)},m(m,w){t(m,i,w)},p:zs,d(m){m&&e(i)}}}function fe(h){let i,j="Antrenarea poate dura ceva timp. S-ar putea să nu vezi recompensele crescând imediat - poate dura 150-200 de pași înainte să începi să vezi îmbunătățiri. Fii răbdător!";return{c(){i=M("p"),i.textContent=j},l(m){i=p(m,"P",{"data-svelte-h":!0}),r(i)!=="svelte-f1p6xp"&&(i.textContent=j)},m(m,w){t(m,i,w)},p:zs,d(m){m&&e(i)}}}function ge(h){let i,j,m,w,f,Bl,g,Ql,B,Vs='În acest exercițiu, vei ajusta fin un model cu GRPO (Optimizarea Relativă a Politicii de Grup) folosind Unsloth, pentru a îmbunătăți capacitățile de raționament ale unui model. Am acoperit GRPO în <a href="/course/chapter3/3">Capitolul 3</a>.',Gl,Q,As="Unsloth este o bibliotecă care accelerează ajustarea fină a LLM-urilor, făcând posibilă antrenarea modelelor mai repede și cu mai puține resurse computaționale. Unsloth se conectează la TRL, deci vom construi pe ceea ce am învățat în secțiunile anterioare, și o vom adapta pentru specificațiile Unsloth.",Zl,C,_l,G,Nl,Z,$s="În primul rând, să instalăm bibliotecile necesare. Vom avea nevoie de Unsloth pentru ajustarea fină accelerată și vLLM pentru inferența rapidă.",zl,_,Vl,N,Al,z,vs="Unsloth oferă o clasă (<code>FastLanguageModel</code>) care integrează transformers cu optimizările Unsloth. Să o importăm:",$l,V,vl,A,Rs="Acum, să încărcăm modelul Google Gemma 3 1B Instruct și să-l configurăm pentru ajustarea fină:",Rl,$,kl,v,ks="Acest cod încarcă modelul în cuantizare 4-bit pentru a economisi memoria și aplică LoRA (Adaptarea de Rang Mic) pentru ajustarea fină eficientă. Parametrul <code>target_modules</code> specifică care straturi ale modelului să fie ajustate fin, și <code>use_gradient_checkpointing</code> permite antrenarea cu contexte mai lungi.",El,I,Xl,R,xl,k,Es="Pentru acest exercițiu, vom folosi setul de date GSM8K, care conține probleme de matematică de gimnaziu. Vom formata datele pentru a încuraja modelul să-și arate raționamentul înainte de a oferi un răspuns.",Wl,E,Xs="În primul rând, vom defini formatul prompt-urilor și răspunsurilor:",Sl,X,Fl,x,xs="Acum, să pregătim setul de date:",Yl,W,ql,S,Ws="Setul de date este pregătit prin extragerea răspunsului din setul de date și formatarea acestuia ca șir de caractere.",Hl,F,Dl,Y,Ss='După cum am discutat într-o <a href="/course/chapter13/4">pagină anterioară</a>, GRPO poate folosi funcții de recompensă pentru a ghida învățarea modelului bazată pe criterii verificabile precum lungimea și formatarea.',Ll,q,Fs="În acest exercițiu, vom defini mai multe funcții de recompensă care încurajează diferite aspecte ale unui raționament bun. De exemplu, vom recompensa modelul pentru oferirea unui răspuns întreg, și pentru urmarea formatului strict.",Pl,H,Ol,D,Ys="Aceste funcții de recompensă servesc diferite scopuri:",Kl,L,qs="<thead><tr><th>Funcția de Recompensă</th> <th>Scopul</th></tr></thead> <tbody><tr><td><code>correctness_reward_func</code></td> <td>Recompensează modelul când răspunsul său se potrivește cu răspunsul corect</td></tr> <tr><td><code>int_reward_func</code></td> <td>Recompensează modelul pentru oferirea unui răspuns numeric</td></tr> <tr><td><code>strict_format_reward_func</code> și <code>soft_format_reward_func</code></td> <td>Recompensează modelul pentru urmarea formatului specificat</td></tr> <tr><td><code>xmlcount_reward_func</code></td> <td>Recompensează utilizarea corectă a tag-urilor XML și penalizează conținutul extra după tag-urile de închidere</td></tr></tbody>",ls,P,ss,O,Hs='Acum vom configura antrenorul GRPO cu modelul nostru, tokenizer-ul și funcțiile de recompensă. Această parte urmează aceeași abordare ca <a href="/course/chapter12/5">exercițiul anterior</a>.',es,K,ts,ll,Ds="<code>GRPOConfig</code> setează diferiți hiperparametri pentru antrenare:",as,sl,Ls="<li><code>use_vllm</code>: Activează inferența rapidă cu vLLM</li> <li><code>learning_rate</code>: Controlează cât de repede învață modelul</li> <li><code>num_generations</code>: Numărul de completări de generat pentru fiecare prompt</li> <li><code>max_steps</code>: Numărul total de pași de antrenare de efectuat</li>",ns,el,Ps="Acum să începem antrenarea:",Ms,tl,ps,b,is,al,rs,nl,Os="După antrenare, să testăm modelul nostru pentru a vedea cum performează. În primul rând, vom salva greutățile LoRA:",cs,Ml,us,pl,Ks="Acum, să testăm modelul cu o întrebare nouă:",ys,il,Js,rl,le="Ar trebui să vezi că modelul urmează acum formatul specificat, arătându-și raționamentul înainte de a oferi un răspuns.",os,cl,Us,ul,se="Unsloth oferă mai multe opțiuni pentru salvarea modelului tău ajustat fin, dar ne vom concentra pe cea mai comună.",ms,yl,js,Jl,Ts,ol,ee="Vom încărca modelul pe Hugging Face Hub folosind metoda <code>push_to_hub_merged</code>. Această metodă ne permite să încărcăm modelul în multiple formate de cuantizare.",ws,Ul,ds,ml,te="Unsloth suportă de asemenea salvarea în format GGUF pentru utilizare cu llama.cpp:",hs,jl,Cs,Tl,ae="Fișierele GGUF pot fi folosite cu llama.cpp sau sisteme bazate pe UI precum Jan sau Open WebUI.",Is,wl,bs,dl,ne="În acest exercițiu, ai învățat cum să:",fs,hl,Me="<li>Configurezi Unsloth pentru ajustarea fină accelerată</li> <li>Pregătești datele pentru antrenarea GRPO</li> <li>Definești funcții de recompensă personalizate pentru a ghida învățarea modelului</li> <li>Antrenezi un model folosind GRPO</li> <li>Testezi modelul ajustat fin</li> <li>Salvezi modelul în diverse formate</li>",gs,Cl,pe="GRPO este o tehnică puternică pentru alinierea modelelor de limbaj cu comportamente specifice, iar Unsloth o face accesibilă chiar și pe hardware limitat. Prin combinarea mai multor funcții de recompensă, poți ghida modelul să urmeze un format specific în timp ce îi îmbunătățești și capacitățile de raționament.",Bs,Il,ie="Pentru mai multe informații și resurse, verifică:",Qs,bl,re='<li><a href="https://docs.unsloth.ai/" rel="nofollow">Documentația Unsloth</a></li> <li><a href="https://discord.gg/unsloth" rel="nofollow">Discord Unsloth</a></li> <li><a href="https://github.com/unslothai/unsloth" rel="nofollow">GitHub Unsloth</a></li>',Gs,fl,Zs,gl,_s;return f=new he({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/HuggingFace%20Course-Gemma3_(1B)-GRPO.ipynb"}]}}),g=new d({props:{title:"Exercițiu Practic: GRPO cu Unsloth",local:"exercițiu-practic-grpo-cu-unsloth",headingTag:"h1"}}),C=new Ns({props:{$$slots:{default:[Ie]},$$scope:{ctx:h}}}),G=new d({props:{title:"Instalează dependențele",local:"instalează-dependențele",headingTag:"h2"}}),_=new T({props:{code:"cGlwJTIwaW5zdGFsbCUyMHVuc2xvdGglMjB2bGxtJTBBcGlwJTIwaW5zdGFsbCUyMC0tdXBncmFkZSUyMHBpbGxvdw==",highlighted:`pip install unsloth vllm | |
| pip install --upgrade pillow`,wrap:!1}}),N=new d({props:{title:"Configurarea Unsloth",local:"configurarea-unsloth",headingTag:"h2"}}),V=new T({props:{code:"ZnJvbSUyMHVuc2xvdGglMjBpbXBvcnQlMjBGYXN0TGFuZ3VhZ2VNb2RlbA==",highlighted:'<span class="hljs-keyword">from</span> unsloth <span class="hljs-keyword">import</span> FastLanguageModel',wrap:!1}}),$=new T({props:{code:"ZnJvbSUyMHVuc2xvdGglMjBpbXBvcnQlMjBGYXN0TGFuZ3VhZ2VNb2RlbCUwQWltcG9ydCUyMHRvcmNoJTBBJTBBbWF4X3NlcV9sZW5ndGglMjAlM0QlMjAxMDI0JTIwJTIwJTIzJTIwUG9hdGUlMjBjcmUlQzglOTl0ZSUyMHBlbnRydSUyMHVybWUlMjBkZSUyMHJhJUM4JTlCaW9uYW1lbnQlMjBtYWklMjBsdW5naSUwQWxvcmFfcmFuayUyMCUzRCUyMDMyJTIwJTIwJTIzJTIwUmFuZyUyMG1haSUyMG1hcmUlMjAlM0QlMjBtYWklMjBpbnRlbGlnZW50JTJDJTIwZGFyJTIwbWFpJTIwbGVudCUwQSUwQW1vZGVsJTJDJTIwdG9rZW5pemVyJTIwJTNEJTIwRmFzdExhbmd1YWdlTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMG1vZGVsX25hbWUlM0QlMjJnb29nbGUlMkZnZW1tYS0zLTFiLWl0JTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3NlcV9sZW5ndGglM0RtYXhfc2VxX2xlbmd0aCUyQyUwQSUyMCUyMCUyMCUyMGxvYWRfaW5fNGJpdCUzRFRydWUlMkMlMjAlMjAlMjMlMjBGYWxzZSUyMHBlbnRydSUyMExvUkElMjAxNmJpdCUwQSUyMCUyMCUyMCUyMGZhc3RfaW5mZXJlbmNlJTNEVHJ1ZSUyQyUyMCUyMCUyMyUyMEFjdGl2ZWF6JUM0JTgzJTIwaW5mZXJlbiVDOCU5QmElMjByYXBpZCVDNCU4MyUyMHZMTE0lMEElMjAlMjAlMjAlMjBtYXhfbG9yYV9yYW5rJTNEbG9yYV9yYW5rJTJDJTBBJTIwJTIwJTIwJTIwZ3B1X21lbW9yeV91dGlsaXphdGlvbiUzRDAuNiUyQyUyMCUyMCUyMyUyMFJlZHVjZSUyMGRhYyVDNCU4MyUyMHIlQzQlODNtJUMzJUEyaSUyMGYlQzQlODNyJUM0JTgzJTIwbWVtb3JpZSUwQSklMEElMEFtb2RlbCUyMCUzRCUyMEZhc3RMYW5ndWFnZU1vZGVsLmdldF9wZWZ0X21vZGVsKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwciUzRGxvcmFfcmFuayUyQyUyMCUyMCUyMyUyMEFsZWdlJTIwb3JpY2UlMjBudW0lQzQlODNyJTIwJTNFJTIwMCUyMCElMjBTdWdlcmF0JTIwOCUyQyUyMDE2JTJDJTIwMzIlMkMlMjA2NCUyQyUyMDEyOCUwQSUyMCUyMCUyMCUyMHRhcmdldF9tb2R1bGVzJTNEJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycV9wcm9qJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIya19wcm9qJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydl9wcm9qJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyb19wcm9qJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZ2F0ZV9wcm9qJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydXBfcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRvd25fcHJvaiUyMiUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUyMCUyMCUyMyUyMEVsaW1pbiVDNCU4MyUyMFFLVk8lMjBkYWMlQzQlODMlMjByJUM0JTgzbSVDMyVBMmklMjBmJUM0JTgzciVDNCU4MyUyMG1lbW9yaWUlMEElMjAlMjAlMjAlMjBsb3JhX2FscGhhJTNEbG9yYV9yYW5rJTJDJTBBJTIwJTIwJTIwJTIwdXNlX2dyYWRpZW50X2NoZWNrcG9pbnRpbmclM0QlMjJ1bnNsb3RoJTIyJTJDJTIwJTIwJTIzJTIwQWN0aXZlYXolQzQlODMlMjBhanVzdGFyZWElMjBmaW4lQzQlODMlMjBwZW50cnUlMjBjb250ZXh0JTIwbHVuZyUwQSUyMCUyMCUyMCUyMHJhbmRvbV9zdGF0ZSUzRDM0MDclMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> unsloth <span class="hljs-keyword">import</span> FastLanguageModel | |
| <span class="hljs-keyword">import</span> torch | |
| max_seq_length = <span class="hljs-number">1024</span> <span class="hljs-comment"># Poate crește pentru urme de raționament mai lungi</span> | |
| lora_rank = <span class="hljs-number">32</span> <span class="hljs-comment"># Rang mai mare = mai inteligent, dar mai lent</span> | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=<span class="hljs-string">"google/gemma-3-1b-it"</span>, | |
| max_seq_length=max_seq_length, | |
| load_in_4bit=<span class="hljs-literal">True</span>, <span class="hljs-comment"># False pentru LoRA 16bit</span> | |
| fast_inference=<span class="hljs-literal">True</span>, <span class="hljs-comment"># Activează inferența rapidă vLLM</span> | |
| max_lora_rank=lora_rank, | |
| gpu_memory_utilization=<span class="hljs-number">0.6</span>, <span class="hljs-comment"># Reduce dacă rămâi fără memorie</span> | |
| ) | |
| model = FastLanguageModel.get_peft_model( | |
| model, | |
| r=lora_rank, <span class="hljs-comment"># Alege orice număr > 0 ! Sugerat 8, 16, 32, 64, 128</span> | |
| target_modules=[ | |
| <span class="hljs-string">"q_proj"</span>, | |
| <span class="hljs-string">"k_proj"</span>, | |
| <span class="hljs-string">"v_proj"</span>, | |
| <span class="hljs-string">"o_proj"</span>, | |
| <span class="hljs-string">"gate_proj"</span>, | |
| <span class="hljs-string">"up_proj"</span>, | |
| <span class="hljs-string">"down_proj"</span>, | |
| ], <span class="hljs-comment"># Elimină QKVO dacă rămâi fără memorie</span> | |
| lora_alpha=lora_rank, | |
| use_gradient_checkpointing=<span class="hljs-string">"unsloth"</span>, <span class="hljs-comment"># Activează ajustarea fină pentru context lung</span> | |
| random_state=<span class="hljs-number">3407</span>, | |
| )`,wrap:!1}}),I=new Ns({props:{$$slots:{default:[be]},$$scope:{ctx:h}}}),R=new d({props:{title:"Pregătirea Datelor",local:"pregătirea-datelor",headingTag:"h2"}}),X=new T({props:{code:"",highlighted:`<span class="hljs-comment"># Definește prompt-ul de sistem care instruiește modelul să folosească un format specific</span> | |
| SYSTEM_PROMPT = <span class="hljs-string">""" | |
| Răspunde în următorul format: | |
| <reasoning> | |
| ... | |
| </reasoning> | |
| <answer> | |
| ... | |
| </answer> | |
| """</span> | |
| XML_COT_FORMAT = <span class="hljs-string">"""\\ | |
| <reasoning> | |
| {reasoning} | |
| </reasoning> | |
| <answer> | |
| {answer} | |
| </answer> | |
| """</span>`,wrap:!1}}),W=new T({props:{code:"aW1wb3J0JTIwcmUlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMkMlMjBEYXRhc2V0JTBBJTBBJTBBJTIzJTIwRnVuYyVDOCU5QmlpJTIwaGVscGVyJTIwcGVudHJ1JTIwYSUyMGV4dHJhZ2UlMjByJUM0JTgzc3B1bnN1cmklMjBkaW4lMjBmb3JtYXRlJTIwZGlmZXJpdGUlMEFkZWYlMjBleHRyYWN0X3htbF9hbnN3ZXIodGV4dCUzQSUyMHN0ciklMjAtJTNFJTIwc3RyJTNBJTBBJTIwJTIwJTIwJTIwYW5zd2VyJTIwJTNEJTIwdGV4dC5zcGxpdCglMjIlM0NhbnN3ZXIlM0UlMjIpJTVCLTElNUQlMEElMjAlMjAlMjAlMjBhbnN3ZXIlMjAlM0QlMjBhbnN3ZXIuc3BsaXQoJTIyJTNDJTJGYW5zd2VyJTNFJTIyKSU1QjAlNUQlMEElMjAlMjAlMjAlMjByZXR1cm4lMjBhbnN3ZXIuc3RyaXAoKSUwQSUwQSUwQWRlZiUyMGV4dHJhY3RfaGFzaF9hbnN3ZXIodGV4dCUzQSUyMHN0ciklMjAtJTNFJTIwc3RyJTIwJTdDJTIwTm9uZSUzQSUwQSUyMCUyMCUyMCUyMGlmJTIwJTIyJTIzJTIzJTIzJTIzJTIyJTIwbm90JTIwaW4lMjB0ZXh0JTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwTm9uZSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMHRleHQuc3BsaXQoJTIyJTIzJTIzJTIzJTIzJTIyKSU1QjElNUQuc3RyaXAoKSUwQSUwQSUwQSUyMyUyMEZ1bmMlQzglOUJpZSUyMHBlbnRydSUyMGElMjBwcmVnJUM0JTgzdGklMjBzZXR1bCUyMGRlJTIwZGF0ZSUyMEdTTThLJTBBZGVmJTIwZ2V0X2dzbThrX3F1ZXN0aW9ucyhzcGxpdCUzRCUyMnRyYWluJTIyKSUyMC0lM0UlMjBEYXRhc2V0JTNBJTBBJTIwJTIwJTIwJTIwZGF0YSUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJvcGVuYWklMkZnc204ayUyMiUyQyUyMCUyMm1haW4lMjIpJTVCc3BsaXQlNUQlMEElMjAlMjAlMjAlMjBkYXRhJTIwJTNEJTIwZGF0YS5tYXAoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnByb21wdCUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJzeXN0ZW0lMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwU1lTVEVNX1BST01QVCU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMHglNUIlMjJxdWVzdGlvbiUyMiU1RCU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmFuc3dlciUyMiUzQSUyMGV4dHJhY3RfaGFzaF9hbnN3ZXIoeCU1QiUyMmFuc3dlciUyMiU1RCklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwZGF0YSUwQSUwQSUwQWRhdGFzZXQlMjAlM0QlMjBnZXRfZ3NtOGtfcXVlc3Rpb25zKCk=",highlighted:`<span class="hljs-keyword">import</span> re | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, Dataset | |
| <span class="hljs-comment"># Funcții helper pentru a extrage răspunsuri din formate diferite</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">extract_xml_answer</span>(<span class="hljs-params">text: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">str</span>: | |
| answer = text.split(<span class="hljs-string">"<answer>"</span>)[-<span class="hljs-number">1</span>] | |
| answer = answer.split(<span class="hljs-string">"</answer>"</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">return</span> answer.strip() | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">extract_hash_answer</span>(<span class="hljs-params">text: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">str</span> | <span class="hljs-literal">None</span>: | |
| <span class="hljs-keyword">if</span> <span class="hljs-string">"####"</span> <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> text: | |
| <span class="hljs-keyword">return</span> <span class="hljs-literal">None</span> | |
| <span class="hljs-keyword">return</span> text.split(<span class="hljs-string">"####"</span>)[<span class="hljs-number">1</span>].strip() | |
| <span class="hljs-comment"># Funcție pentru a pregăti setul de date GSM8K</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">get_gsm8k_questions</span>(<span class="hljs-params">split=<span class="hljs-string">"train"</span></span>) -> Dataset: | |
| data = load_dataset(<span class="hljs-string">"openai/gsm8k"</span>, <span class="hljs-string">"main"</span>)[split] | |
| data = data.<span class="hljs-built_in">map</span>( | |
| <span class="hljs-keyword">lambda</span> x: { | |
| <span class="hljs-string">"prompt"</span>: [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: SYSTEM_PROMPT}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: x[<span class="hljs-string">"question"</span>]}, | |
| ], | |
| <span class="hljs-string">"answer"</span>: extract_hash_answer(x[<span class="hljs-string">"answer"</span>]), | |
| } | |
| ) | |
| <span class="hljs-keyword">return</span> data | |
| dataset = get_gsm8k_questions()`,wrap:!1}}),F=new d({props:{title:"Definirea Funcțiilor de Recompensă",local:"definirea-funcțiilor-de-recompensă",headingTag:"h2"}}),H=new T({props:{code:"JTIzJTIwRnVuYyVDOCU5QmlhJTIwZGUlMjByZWNvbXBlbnMlQzQlODMlMjBjYXJlJTIwdmVyaWZpYyVDNCU4MyUyMGRhYyVDNCU4MyUyMHIlQzQlODNzcHVuc3VsJTIwZXN0ZSUyMGNvcmVjdCUwQWRlZiUyMGNvcnJlY3RuZXNzX3Jld2FyZF9mdW5jKHByb21wdHMlMkMlMjBjb21wbGV0aW9ucyUyQyUyMGFuc3dlciUyQyUyMCoqa3dhcmdzKSUyMC0lM0UlMjBsaXN0JTVCZmxvYXQlNUQlM0ElMEElMjAlMjAlMjAlMjByZXNwb25zZXMlMjAlM0QlMjAlNUJjb21wbGV0aW9uJTVCMCU1RCU1QiUyMmNvbnRlbnQlMjIlNUQlMjBmb3IlMjBjb21wbGV0aW9uJTIwaW4lMjBjb21wbGV0aW9ucyU1RCUwQSUyMCUyMCUyMCUyMHElMjAlM0QlMjBwcm9tcHRzJTVCMCU1RCU1Qi0xJTVEJTVCJTIyY29udGVudCUyMiU1RCUwQSUyMCUyMCUyMCUyMGV4dHJhY3RlZF9yZXNwb25zZXMlMjAlM0QlMjAlNUJleHRyYWN0X3htbF9hbnN3ZXIociklMjBmb3IlMjByJTIwaW4lMjByZXNwb25zZXMlNUQlMEElMjAlMjAlMjAlMjBwcmludCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjItJTIyJTIwKiUyMDIwJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZiUyMiVDMyU4RW50cmViYXJlJTNBJTVDbiU3QnElN0QlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmJTIyJTVDblIlQzQlODNzcHVucyUzQSU1Q24lN0JhbnN3ZXIlNUIwJTVEJTdEJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZiUyMiU1Q25SJUM0JTgzc3B1bnMlMjBtb2RlbCUzQSU1Q24lN0JyZXNwb25zZXMlNUIwJTVEJTdEJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZiUyMiU1Q25FeHRyYXMlM0ElNUNuJTdCZXh0cmFjdGVkX3Jlc3BvbnNlcyU1QjAlNUQlN0QlMjIlMkMlMEElMjAlMjAlMjAlMjApJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTVCMi4wJTIwaWYlMjByJTIwJTNEJTNEJTIwYSUyMGVsc2UlMjAwLjAlMjBmb3IlMjByJTJDJTIwYSUyMGluJTIwemlwKGV4dHJhY3RlZF9yZXNwb25zZXMlMkMlMjBhbnN3ZXIpJTVEJTBBJTBBJTBBJTIzJTIwRnVuYyVDOCU5QmlhJTIwZGUlMjByZWNvbXBlbnMlQzQlODMlMjBjYXJlJTIwdmVyaWZpYyVDNCU4MyUyMGRhYyVDNCU4MyUyMHIlQzQlODNzcHVuc3VsJTIwZXN0ZSUyMHVuJTIwJUMzJUFFbnRyZWclMEFkZWYlMjBpbnRfcmV3YXJkX2Z1bmMoY29tcGxldGlvbnMlMkMlMjAqKmt3YXJncyklMjAtJTNFJTIwbGlzdCU1QmZsb2F0JTVEJTNBJTBBJTIwJTIwJTIwJTIwcmVzcG9uc2VzJTIwJTNEJTIwJTVCY29tcGxldGlvbiU1QjAlNUQlNUIlMjJjb250ZW50JTIyJTVEJTIwZm9yJTIwY29tcGxldGlvbiUyMGluJTIwY29tcGxldGlvbnMlNUQlMEElMjAlMjAlMjAlMjBleHRyYWN0ZWRfcmVzcG9uc2VzJTIwJTNEJTIwJTVCZXh0cmFjdF94bWxfYW5zd2VyKHIpJTIwZm9yJTIwciUyMGluJTIwcmVzcG9uc2VzJTVEJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTVCMC41JTIwaWYlMjByLmlzZGlnaXQoKSUyMGVsc2UlMjAwLjAlMjBmb3IlMjByJTIwaW4lMjBleHRyYWN0ZWRfcmVzcG9uc2VzJTVEJTBBJTBBJTBBJTIzJTIwRnVuYyVDOCU5QmlhJTIwZGUlMjByZWNvbXBlbnMlQzQlODMlMjBjYXJlJTIwdmVyaWZpYyVDNCU4MyUyMGRhYyVDNCU4MyUyMGNvbXBsZXRhcmVhJTIwdXJtZWF6JUM0JTgzJTIwZm9ybWF0dWwlMjBzdHJpY3QlMEFkZWYlMjBzdHJpY3RfZm9ybWF0X3Jld2FyZF9mdW5jKGNvbXBsZXRpb25zJTJDJTIwKiprd2FyZ3MpJTIwLSUzRSUyMGxpc3QlNUJmbG9hdCU1RCUzQSUwQSUyMCUyMCUyMCUyMHBhdHRlcm4lMjAlM0QlMjByJTIyJTVFJTNDcmVhc29uaW5nJTNFJTVDbi4qJTNGJTVDbiUzQyUyRnJlYXNvbmluZyUzRSU1Q24lM0NhbnN3ZXIlM0UlNUNuLiolM0YlNUNuJTNDJTJGYW5zd2VyJTNFJTVDbiUyNCUyMiUwQSUyMCUyMCUyMCUyMHJlc3BvbnNlcyUyMCUzRCUyMCU1QmNvbXBsZXRpb24lNUIwJTVEJTVCJTIyY29udGVudCUyMiU1RCUyMGZvciUyMGNvbXBsZXRpb24lMjBpbiUyMGNvbXBsZXRpb25zJTVEJTBBJTIwJTIwJTIwJTIwbWF0Y2hlcyUyMCUzRCUyMCU1QnJlLm1hdGNoKHBhdHRlcm4lMkMlMjByKSUyMGZvciUyMHIlMjBpbiUyMHJlc3BvbnNlcyU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMCU1QjAuNSUyMGlmJTIwbWF0Y2glMjBlbHNlJTIwMC4wJTIwZm9yJTIwbWF0Y2glMjBpbiUyMG1hdGNoZXMlNUQlMEElMEElMEElMjMlMjBGdW5jJUM4JTlCaWElMjBkZSUyMHJlY29tcGVucyVDNCU4MyUyMGNhcmUlMjB2ZXJpZmljJUM0JTgzJTIwZGFjJUM0JTgzJTIwY29tcGxldGFyZWElMjB1cm1lYXolQzQlODMlMjB1biUyMGZvcm1hdCUyMG1haSUyMHJlbGF4YXQlMEFkZWYlMjBzb2Z0X2Zvcm1hdF9yZXdhcmRfZnVuYyhjb21wbGV0aW9ucyUyQyUyMCoqa3dhcmdzKSUyMC0lM0UlMjBsaXN0JTVCZmxvYXQlNUQlM0ElMEElMjAlMjAlMjAlMjBwYXR0ZXJuJTIwJTNEJTIwciUyMiUzQ3JlYXNvbmluZyUzRS4qJTNGJTNDJTJGcmVhc29uaW5nJTNFJTVDcyolM0NhbnN3ZXIlM0UuKiUzRiUzQyUyRmFuc3dlciUzRSUyMiUwQSUyMCUyMCUyMCUyMHJlc3BvbnNlcyUyMCUzRCUyMCU1QmNvbXBsZXRpb24lNUIwJTVEJTVCJTIyY29udGVudCUyMiU1RCUyMGZvciUyMGNvbXBsZXRpb24lMjBpbiUyMGNvbXBsZXRpb25zJTVEJTBBJTIwJTIwJTIwJTIwbWF0Y2hlcyUyMCUzRCUyMCU1QnJlLm1hdGNoKHBhdHRlcm4lMkMlMjByKSUyMGZvciUyMHIlMjBpbiUyMHJlc3BvbnNlcyU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMCU1QjAuNSUyMGlmJTIwbWF0Y2glMjBlbHNlJTIwMC4wJTIwZm9yJTIwbWF0Y2glMjBpbiUyMG1hdGNoZXMlNUQlMEElMEElMEElMjMlMjBGdW5jJUM4JTlCaWElMjBkZSUyMHJlY29tcGVucyVDNCU4MyUyMGNhcmUlMjBudW0lQzQlODNyJUM0JTgzJTIwdGFnLXVyaWxlJTIwWE1MJTIwJUM4JTk5aSUyMHBlbmFsaXplYXolQzQlODMlMjBjb24lQzglOUJpbnV0dWwlMjBleHRyYSUwQWRlZiUyMGNvdW50X3htbCh0ZXh0KSUyMC0lM0UlMjBmbG9hdCUzQSUwQSUyMCUyMCUyMCUyMGNvdW50JTIwJTNEJTIwMC4wJTBBJTIwJTIwJTIwJTIwaWYlMjB0ZXh0LmNvdW50KCUyMiUzQ3JlYXNvbmluZyUzRSU1Q24lMjIpJTIwJTNEJTNEJTIwMSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvdW50JTIwJTJCJTNEJTIwMC4xMjUlMEElMjAlMjAlMjAlMjBpZiUyMHRleHQuY291bnQoJTIyJTVDbiUzQyUyRnJlYXNvbmluZyUzRSU1Q24lMjIpJTIwJTNEJTNEJTIwMSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvdW50JTIwJTJCJTNEJTIwMC4xMjUlMEElMjAlMjAlMjAlMjBpZiUyMHRleHQuY291bnQoJTIyJTVDbiUzQ2Fuc3dlciUzRSU1Q24lMjIpJTIwJTNEJTNEJTIwMSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvdW50JTIwJTJCJTNEJTIwMC4xMjUlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjb3VudCUyMC0lM0QlMjBsZW4odGV4dC5zcGxpdCglMjIlNUNuJTNDJTJGYW5zd2VyJTNFJTVDbiUyMiklNUItMSU1RCklMjAqJTIwMC4wMDElMEElMjAlMjAlMjAlMjBpZiUyMHRleHQuY291bnQoJTIyJTVDbiUzQyUyRmFuc3dlciUzRSUyMiklMjAlM0QlM0QlMjAxJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY291bnQlMjAlMkIlM0QlMjAwLjEyNSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvdW50JTIwLSUzRCUyMChsZW4odGV4dC5zcGxpdCglMjIlNUNuJTNDJTJGYW5zd2VyJTNFJTIyKSU1Qi0xJTVEKSUyMC0lMjAxKSUyMColMjAwLjAwMSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGNvdW50JTBBJTBBJTBBZGVmJTIweG1sY291bnRfcmV3YXJkX2Z1bmMoY29tcGxldGlvbnMlMkMlMjAqKmt3YXJncyklMjAtJTNFJTIwbGlzdCU1QmZsb2F0JTVEJTNBJTBBJTIwJTIwJTIwJTIwY29udGVudHMlMjAlM0QlMjAlNUJjb21wbGV0aW9uJTVCMCU1RCU1QiUyMmNvbnRlbnQlMjIlNUQlMjBmb3IlMjBjb21wbGV0aW9uJTIwaW4lMjBjb21wbGV0aW9ucyU1RCUwQSUyMCUyMCUyMCUyMHJldHVybiUyMCU1QmNvdW50X3htbChjKSUyMGZvciUyMGMlMjBpbiUyMGNvbnRlbnRzJTVE",highlighted:`<span class="hljs-comment"># Funcția de recompensă care verifică dacă răspunsul este corect</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">correctness_reward_func</span>(<span class="hljs-params">prompts, completions, answer, **kwargs</span>) -> <span class="hljs-built_in">list</span>[<span class="hljs-built_in">float</span>]: | |
| responses = [completion[<span class="hljs-number">0</span>][<span class="hljs-string">"content"</span>] <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions] | |
| q = prompts[<span class="hljs-number">0</span>][-<span class="hljs-number">1</span>][<span class="hljs-string">"content"</span>] | |
| extracted_responses = [extract_xml_answer(r) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> responses] | |
| <span class="hljs-built_in">print</span>( | |
| <span class="hljs-string">"-"</span> * <span class="hljs-number">20</span>, | |
| <span class="hljs-string">f"Întrebare:\\n<span class="hljs-subst">{q}</span>"</span>, | |
| <span class="hljs-string">f"\\nRăspuns:\\n<span class="hljs-subst">{answer[<span class="hljs-number">0</span>]}</span>"</span>, | |
| <span class="hljs-string">f"\\nRăspuns model:\\n<span class="hljs-subst">{responses[<span class="hljs-number">0</span>]}</span>"</span>, | |
| <span class="hljs-string">f"\\nExtras:\\n<span class="hljs-subst">{extracted_responses[<span class="hljs-number">0</span>]}</span>"</span>, | |
| ) | |
| <span class="hljs-keyword">return</span> [<span class="hljs-number">2.0</span> <span class="hljs-keyword">if</span> r == a <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span> <span class="hljs-keyword">for</span> r, a <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(extracted_responses, answer)] | |
| <span class="hljs-comment"># Funcția de recompensă care verifică dacă răspunsul este un întreg</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">int_reward_func</span>(<span class="hljs-params">completions, **kwargs</span>) -> <span class="hljs-built_in">list</span>[<span class="hljs-built_in">float</span>]: | |
| responses = [completion[<span class="hljs-number">0</span>][<span class="hljs-string">"content"</span>] <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions] | |
| extracted_responses = [extract_xml_answer(r) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> responses] | |
| <span class="hljs-keyword">return</span> [<span class="hljs-number">0.5</span> <span class="hljs-keyword">if</span> r.isdigit() <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span> <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> extracted_responses] | |
| <span class="hljs-comment"># Funcția de recompensă care verifică dacă completarea urmează formatul strict</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">strict_format_reward_func</span>(<span class="hljs-params">completions, **kwargs</span>) -> <span class="hljs-built_in">list</span>[<span class="hljs-built_in">float</span>]: | |
| pattern = <span class="hljs-string">r"^<reasoning>\\n.*?\\n</reasoning>\\n<answer>\\n.*?\\n</answer>\\n$"</span> | |
| responses = [completion[<span class="hljs-number">0</span>][<span class="hljs-string">"content"</span>] <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions] | |
| matches = [re.<span class="hljs-keyword">match</span>(pattern, r) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> responses] | |
| <span class="hljs-keyword">return</span> [<span class="hljs-number">0.5</span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">match</span> <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span> <span class="hljs-keyword">for</span> <span class="hljs-keyword">match</span> <span class="hljs-keyword">in</span> matches] | |
| <span class="hljs-comment"># Funcția de recompensă care verifică dacă completarea urmează un format mai relaxat</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">soft_format_reward_func</span>(<span class="hljs-params">completions, **kwargs</span>) -> <span class="hljs-built_in">list</span>[<span class="hljs-built_in">float</span>]: | |
| pattern = <span class="hljs-string">r"<reasoning>.*?</reasoning>\\s*<answer>.*?</answer>"</span> | |
| responses = [completion[<span class="hljs-number">0</span>][<span class="hljs-string">"content"</span>] <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions] | |
| matches = [re.<span class="hljs-keyword">match</span>(pattern, r) <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> responses] | |
| <span class="hljs-keyword">return</span> [<span class="hljs-number">0.5</span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">match</span> <span class="hljs-keyword">else</span> <span class="hljs-number">0.0</span> <span class="hljs-keyword">for</span> <span class="hljs-keyword">match</span> <span class="hljs-keyword">in</span> matches] | |
| <span class="hljs-comment"># Funcția de recompensă care numără tag-urile XML și penalizează conținutul extra</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">count_xml</span>(<span class="hljs-params">text</span>) -> <span class="hljs-built_in">float</span>: | |
| count = <span class="hljs-number">0.0</span> | |
| <span class="hljs-keyword">if</span> text.count(<span class="hljs-string">"<reasoning>\\n"</span>) == <span class="hljs-number">1</span>: | |
| count += <span class="hljs-number">0.125</span> | |
| <span class="hljs-keyword">if</span> text.count(<span class="hljs-string">"\\n</reasoning>\\n"</span>) == <span class="hljs-number">1</span>: | |
| count += <span class="hljs-number">0.125</span> | |
| <span class="hljs-keyword">if</span> text.count(<span class="hljs-string">"\\n<answer>\\n"</span>) == <span class="hljs-number">1</span>: | |
| count += <span class="hljs-number">0.125</span> | |
| count -= <span class="hljs-built_in">len</span>(text.split(<span class="hljs-string">"\\n</answer>\\n"</span>)[-<span class="hljs-number">1</span>]) * <span class="hljs-number">0.001</span> | |
| <span class="hljs-keyword">if</span> text.count(<span class="hljs-string">"\\n</answer>"</span>) == <span class="hljs-number">1</span>: | |
| count += <span class="hljs-number">0.125</span> | |
| count -= (<span class="hljs-built_in">len</span>(text.split(<span class="hljs-string">"\\n</answer>"</span>)[-<span class="hljs-number">1</span>]) - <span class="hljs-number">1</span>) * <span class="hljs-number">0.001</span> | |
| <span class="hljs-keyword">return</span> count | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">xmlcount_reward_func</span>(<span class="hljs-params">completions, **kwargs</span>) -> <span class="hljs-built_in">list</span>[<span class="hljs-built_in">float</span>]: | |
| contents = [completion[<span class="hljs-number">0</span>][<span class="hljs-string">"content"</span>] <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions] | |
| <span class="hljs-keyword">return</span> [count_xml(c) <span class="hljs-keyword">for</span> c <span class="hljs-keyword">in</span> contents]`,wrap:!1}}),P=new d({props:{title:"Antrenarea cu GRPO",local:"antrenarea-cu-grpo",headingTag:"h2"}}),K=new T({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMEdSUE9Db25maWclMkMlMjBHUlBPVHJhaW5lciUwQSUwQW1heF9wcm9tcHRfbGVuZ3RoJTIwJTNEJTIwMjU2JTBBJTBBdHJhaW5pbmdfYXJncyUyMCUzRCUyMEdSUE9Db25maWcoJTBBJTIwJTIwJTIwJTIwbGVhcm5pbmdfcmF0ZSUzRDVlLTYlMkMlMEElMjAlMjAlMjAlMjBhZGFtX2JldGExJTNEMC45JTJDJTBBJTIwJTIwJTIwJTIwYWRhbV9iZXRhMiUzRDAuOTklMkMlMEElMjAlMjAlMjAlMjB3ZWlnaHRfZGVjYXklM0QwLjElMkMlMEElMjAlMjAlMjAlMjB3YXJtdXBfcmF0aW8lM0QwLjElMkMlMEElMjAlMjAlMjAlMjBscl9zY2hlZHVsZXJfdHlwZSUzRCUyMmNvc2luZSUyMiUyQyUwQSUyMCUyMCUyMCUyMG9wdGltJTNEJTIycGFnZWRfYWRhbXdfOGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMGxvZ2dpbmdfc3RlcHMlM0QxJTJDJTBBJTIwJTIwJTIwJTIwcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTNEMSUyQyUwQSUyMCUyMCUyMCUyMGdyYWRpZW50X2FjY3VtdWxhdGlvbl9zdGVwcyUzRDElMkMlMjAlMjAlMjMlMjBDcmUlQzglOTl0ZSUyMGxhJTIwNCUyMHBlbnRydSUyMGFudHJlbmFyZSUyMG1haSUyMGxpbiVDNCU4MyUwQSUyMCUyMCUyMCUyMG51bV9nZW5lcmF0aW9ucyUzRDYlMkMlMjAlMjAlMjMlMjBTY2FkZSUyMGRhYyVDNCU4MyUyMHIlQzQlODNtJUMzJUEyaSUyMGYlQzQlODNyJUM0JTgzJTIwbWVtb3JpZSUwQSUyMCUyMCUyMCUyMG1heF9wcm9tcHRfbGVuZ3RoJTNEbWF4X3Byb21wdF9sZW5ndGglMkMlMEElMjAlMjAlMjAlMjBtYXhfY29tcGxldGlvbl9sZW5ndGglM0RtYXhfc2VxX2xlbmd0aCUyMC0lMjBtYXhfcHJvbXB0X2xlbmd0aCUyQyUwQSUyMCUyMCUyMCUyMCUyMyUyMG51bV90cmFpbl9lcG9jaHMlMjAlM0QlMjAxJTJDJTIwJTIzJTIwU2V0ZWF6JUM0JTgzJTIwbGElMjAxJTIwcGVudHJ1JTIwbyUyMHJ1bGFyZSUyMGNvbXBsZXQlQzQlODMlMjBkZSUyMGFudHJlbmFyZSUwQSUyMCUyMCUyMCUyMG1heF9zdGVwcyUzRDI1MCUyQyUwQSUyMCUyMCUyMCUyMHNhdmVfc3RlcHMlM0QyNTAlMkMlMEElMjAlMjAlMjAlMjBtYXhfZ3JhZF9ub3JtJTNEMC4xJTJDJTBBJTIwJTIwJTIwJTIwcmVwb3J0X3RvJTNEJTIybm9uZSUyMiUyQyUyMCUyMCUyMyUyMFBvYXRlJTIwZm9sb3NpJTIwV2VpZ2h0cyUyMCUyNiUyMEJpYXNlcyUwQSUyMCUyMCUyMCUyMG91dHB1dF9kaXIlM0QlMjJvdXRwdXRzJTIyJTJDJTBBKSUwQSUwQXRyYWluZXIlMjAlM0QlMjBHUlBPVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwcHJvY2Vzc2luZ19jbGFzcyUzRHRva2VuaXplciUyQyUwQSUyMCUyMCUyMCUyMHJld2FyZF9mdW5jcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHhtbGNvdW50X3Jld2FyZF9mdW5jJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc29mdF9mb3JtYXRfcmV3YXJkX2Z1bmMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdHJpY3RfZm9ybWF0X3Jld2FyZF9mdW5jJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaW50X3Jld2FyZF9mdW5jJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY29ycmVjdG5lc3NfcmV3YXJkX2Z1bmMlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0RkYXRhc2V0JTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> GRPOConfig, GRPOTrainer | |
| max_prompt_length = <span class="hljs-number">256</span> | |
| training_args = GRPOConfig( | |
| learning_rate=<span class="hljs-number">5e-6</span>, | |
| adam_beta1=<span class="hljs-number">0.9</span>, | |
| adam_beta2=<span class="hljs-number">0.99</span>, | |
| weight_decay=<span class="hljs-number">0.1</span>, | |
| warmup_ratio=<span class="hljs-number">0.1</span>, | |
| lr_scheduler_type=<span class="hljs-string">"cosine"</span>, | |
| optim=<span class="hljs-string">"paged_adamw_8bit"</span>, | |
| logging_steps=<span class="hljs-number">1</span>, | |
| per_device_train_batch_size=<span class="hljs-number">1</span>, | |
| gradient_accumulation_steps=<span class="hljs-number">1</span>, <span class="hljs-comment"># Crește la 4 pentru antrenare mai lină</span> | |
| num_generations=<span class="hljs-number">6</span>, <span class="hljs-comment"># Scade dacă rămâi fără memorie</span> | |
| max_prompt_length=max_prompt_length, | |
| max_completion_length=max_seq_length - max_prompt_length, | |
| <span class="hljs-comment"># num_train_epochs = 1, # Setează la 1 pentru o rulare completă de antrenare</span> | |
| max_steps=<span class="hljs-number">250</span>, | |
| save_steps=<span class="hljs-number">250</span>, | |
| max_grad_norm=<span class="hljs-number">0.1</span>, | |
| report_to=<span class="hljs-string">"none"</span>, <span class="hljs-comment"># Poate folosi Weights & Biases</span> | |
| output_dir=<span class="hljs-string">"outputs"</span>, | |
| ) | |
| trainer = GRPOTrainer( | |
| model=model, | |
| processing_class=tokenizer, | |
| reward_funcs=[ | |
| xmlcount_reward_func, | |
| soft_format_reward_func, | |
| strict_format_reward_func, | |
| int_reward_func, | |
| correctness_reward_func, | |
| ], | |
| args=training_args, | |
| train_dataset=dataset, | |
| )`,wrap:!1}}),tl=new T({props:{code:"dHJhaW5lci50cmFpbigp",highlighted:"trainer.train()",wrap:!1}}),b=new Ns({props:{warning:!0,$$slots:{default:[fe]},$$scope:{ctx:h}}}),al=new d({props:{title:"Testarea Modelului",local:"testarea-modelului",headingTag:"h2"}}),Ml=new T({props:{code:"bW9kZWwuc2F2ZV9sb3JhKCUyMmdycG9fc2F2ZWRfbG9yYSUyMik=",highlighted:'model.save_lora(<span class="hljs-string">"grpo_saved_lora"</span>)',wrap:!1}}),il=new T({props:{code:"ZnJvbSUyMHZsbG0lMjBpbXBvcnQlMjBTYW1wbGluZ1BhcmFtcyUwQSUwQXRleHQlMjAlM0QlMjB0b2tlbml6ZXIuYXBwbHlfY2hhdF90ZW1wbGF0ZSglMEElMjAlMjAlMjAlMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMFNZU1RFTV9QUk9NUFQlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJDYWxjdWxlYXolQzQlODMlMjBwaS4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjB0b2tlbml6ZSUzREZhbHNlJTJDJTBBJTIwJTIwJTIwJTIwYWRkX2dlbmVyYXRpb25fcHJvbXB0JTNEVHJ1ZSUyQyUwQSklMEElMEFzYW1wbGluZ19wYXJhbXMlMjAlM0QlMjBTYW1wbGluZ1BhcmFtcyglMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMDI0JTJDJTBBKSUwQW91dHB1dCUyMCUzRCUyMCglMEElMjAlMjAlMjAlMjBtb2RlbC5mYXN0X2dlbmVyYXRlKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRleHQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzYW1wbGluZ19wYXJhbXMlM0RzYW1wbGluZ19wYXJhbXMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3JhX3JlcXVlc3QlM0Rtb2RlbC5sb2FkX2xvcmEoJTIyZ3Jwb19zYXZlZF9sb3JhJTIyKSUyQyUwQSUyMCUyMCUyMCUyMCklNUIwJTVEJTBBJTIwJTIwJTIwJTIwLm91dHB1dHMlNUIwJTVEJTBBJTIwJTIwJTIwJTIwLnRleHQlMEEpJTBBJTBBcHJpbnQob3V0cHV0KQ==",highlighted:`<span class="hljs-keyword">from</span> vllm <span class="hljs-keyword">import</span> SamplingParams | |
| text = tokenizer.apply_chat_template( | |
| [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: SYSTEM_PROMPT}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Calculează pi."</span>}, | |
| ], | |
| tokenize=<span class="hljs-literal">False</span>, | |
| add_generation_prompt=<span class="hljs-literal">True</span>, | |
| ) | |
| sampling_params = SamplingParams( | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| max_tokens=<span class="hljs-number">1024</span>, | |
| ) | |
| output = ( | |
| model.fast_generate( | |
| text, | |
| sampling_params=sampling_params, | |
| lora_request=model.load_lora(<span class="hljs-string">"grpo_saved_lora"</span>), | |
| )[<span class="hljs-number">0</span>] | |
| .outputs[<span class="hljs-number">0</span>] | |
| .text | |
| ) | |
| <span class="hljs-built_in">print</span>(output)`,wrap:!1}}),cl=new d({props:{title:"Salvarea Modelului",local:"salvarea-modelului",headingTag:"h2"}}),yl=new T({props:{code:"JTIzJTIwU2FsdmVheiVDNCU4MyUyMGN1JTIwcHJlY2l6aWUlMjBkZSUyMDE2JTIwYmklQzglOUJpJTBBbW9kZWwuc2F2ZV9wcmV0cmFpbmVkX21lcmdlZCglMjJtb2RlbCUyMiUyQyUyMHRva2VuaXplciUyQyUyMHNhdmVfbWV0aG9kJTNEJTIybWVyZ2VkXzE2Yml0JTIyKQ==",highlighted:`<span class="hljs-comment"># Salvează cu precizie de 16 biți</span> | |
| model.save_pretrained_merged(<span class="hljs-string">"model"</span>, tokenizer, save_method=<span class="hljs-string">"merged_16bit"</span>)`,wrap:!1}}),Jl=new d({props:{title:"Încărcarea pe Hugging Face Hub",local:"încărcarea-pe-hugging-face-hub",headingTag:"h2"}}),Ul=new T({props:{code:"JTIzJTIwJUMzJThFbmNhcmMlQzQlODMlMjBwZSUyMEh1Z2dpbmclMjBGYWNlJTIwSHViJTIwKG5lY2VzaXQlQzQlODMlMjB1biUyMHRva2VuKSUwQW1vZGVsLnB1c2hfdG9faHViX21lcmdlZCglMEElMjAlMjAlMjAlMjAlMjJudW1lbGUtdGF1JTJGbnVtZWxlLW1vZGVsdWx1aSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRva2VuaXplciUyQyUwQSUyMCUyMCUyMCUyMHNhdmVfbWV0aG9kJTNEJTIybWVyZ2VkXzE2Yml0JTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW4lM0QlMjJ0b2tlbi11bC10YXUlMjIlMkMlMEEp",highlighted:`<span class="hljs-comment"># Încarcă pe Hugging Face Hub (necesită un token)</span> | |
| model.push_to_hub_merged( | |
| <span class="hljs-string">"numele-tau/numele-modelului"</span>, | |
| tokenizer, | |
| save_method=<span class="hljs-string">"merged_16bit"</span>, | |
| token=<span class="hljs-string">"token-ul-tau"</span>, | |
| )`,wrap:!1}}),jl=new T({props:{code:"bW9kZWwucHVzaF90b19odWJfZ2d1ZiglMEElMjAlMjAlMjAlMjAlMjJudW1lbGUtdGF1JTJGbnVtZWxlLW1vZGVsdWx1aSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRva2VuaXplciUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9tZXRob2QlM0QlNUIlMjJxNF9rX20lMjIlMkMlMjAlMjJxOF8wJTIyJTJDJTIwJTIycTVfa19tJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW4lM0QlMjJ0b2tlbi11bC10YXUlMjIlMkMlMEEp",highlighted:`model.push_to_hub_gguf( | |
| <span class="hljs-string">"numele-tau/numele-modelului"</span>, | |
| tokenizer, | |
| quantization_method=[<span class="hljs-string">"q4_k_m"</span>, <span class="hljs-string">"q8_0"</span>, <span class="hljs-string">"q5_k_m"</span>], | |
| token=<span class="hljs-string">"token-ul-tau"</span>, | |
| )`,wrap:!1}}),wl=new d({props:{title:"Concluzie",local:"concluzie",headingTag:"h2"}}),fl=new Ce({props:{source:"https://github.com/huggingface/course/blob/main/chapters/rum/chapter12/6.mdx"}}),{c(){i=M("meta"),j=a(),m=M("p"),w=a(),c(f.$$.fragment),Bl=a(),c(g.$$.fragment),Ql=a(),B=M("p"),B.innerHTML=Vs,Gl=a(),Q=M("p"),Q.textContent=As,Zl=a(),c(C.$$.fragment),_l=a(),c(G.$$.fragment),Nl=a(),Z=M("p"),Z.textContent=$s,zl=a(),c(_.$$.fragment),Vl=a(),c(N.$$.fragment),Al=a(),z=M("p"),z.innerHTML=vs,$l=a(),c(V.$$.fragment),vl=a(),A=M("p"),A.textContent=Rs,Rl=a(),c($.$$.fragment),kl=a(),v=M("p"),v.innerHTML=ks,El=a(),c(I.$$.fragment),Xl=a(),c(R.$$.fragment),xl=a(),k=M("p"),k.textContent=Es,Wl=a(),E=M("p"),E.textContent=Xs,Sl=a(),c(X.$$.fragment),Fl=a(),x=M("p"),x.textContent=xs,Yl=a(),c(W.$$.fragment),ql=a(),S=M("p"),S.textContent=Ws,Hl=a(),c(F.$$.fragment),Dl=a(),Y=M("p"),Y.innerHTML=Ss,Ll=a(),q=M("p"),q.textContent=Fs,Pl=a(),c(H.$$.fragment),Ol=a(),D=M("p"),D.textContent=Ys,Kl=a(),L=M("table"),L.innerHTML=qs,ls=a(),c(P.$$.fragment),ss=a(),O=M("p"),O.innerHTML=Hs,es=a(),c(K.$$.fragment),ts=a(),ll=M("p"),ll.innerHTML=Ds,as=a(),sl=M("ul"),sl.innerHTML=Ls,ns=a(),el=M("p"),el.textContent=Ps,Ms=a(),c(tl.$$.fragment),ps=a(),c(b.$$.fragment),is=a(),c(al.$$.fragment),rs=a(),nl=M("p"),nl.textContent=Os,cs=a(),c(Ml.$$.fragment),us=a(),pl=M("p"),pl.textContent=Ks,ys=a(),c(il.$$.fragment),Js=a(),rl=M("p"),rl.textContent=le,os=a(),c(cl.$$.fragment),Us=a(),ul=M("p"),ul.textContent=se,ms=a(),c(yl.$$.fragment),js=a(),c(Jl.$$.fragment),Ts=a(),ol=M("p"),ol.innerHTML=ee,ws=a(),c(Ul.$$.fragment),ds=a(),ml=M("p"),ml.textContent=te,hs=a(),c(jl.$$.fragment),Cs=a(),Tl=M("p"),Tl.textContent=ae,Is=a(),c(wl.$$.fragment),bs=a(),dl=M("p"),dl.textContent=ne,fs=a(),hl=M("ol"),hl.innerHTML=Me,gs=a(),Cl=M("p"),Cl.textContent=pe,Bs=a(),Il=M("p"),Il.textContent=ie,Qs=a(),bl=M("ul"),bl.innerHTML=re,Gs=a(),c(fl.$$.fragment),Zs=a(),gl=M("p"),this.h()},l(l){const s=we("svelte-u9bgzb",document.head);i=p(s,"META",{name:!0,content:!0}),s.forEach(e),j=n(l),m=p(l,"P",{}),Je(m).forEach(e),w=n(l),u(f.$$.fragment,l),Bl=n(l),u(g.$$.fragment,l),Ql=n(l),B=p(l,"P",{"data-svelte-h":!0}),r(B)!=="svelte-1czfiii"&&(B.innerHTML=Vs),Gl=n(l),Q=p(l,"P",{"data-svelte-h":!0}),r(Q)!=="svelte-gujhi0"&&(Q.textContent=As),Zl=n(l),u(C.$$.fragment,l),_l=n(l),u(G.$$.fragment,l),Nl=n(l),Z=p(l,"P",{"data-svelte-h":!0}),r(Z)!=="svelte-2per9o"&&(Z.textContent=$s),zl=n(l),u(_.$$.fragment,l),Vl=n(l),u(N.$$.fragment,l),Al=n(l),z=p(l,"P",{"data-svelte-h":!0}),r(z)!=="svelte-1uxs7kh"&&(z.innerHTML=vs),$l=n(l),u(V.$$.fragment,l),vl=n(l),A=p(l,"P",{"data-svelte-h":!0}),r(A)!=="svelte-1w7fftr"&&(A.textContent=Rs),Rl=n(l),u($.$$.fragment,l),kl=n(l),v=p(l,"P",{"data-svelte-h":!0}),r(v)!=="svelte-105vqkb"&&(v.innerHTML=ks),El=n(l),u(I.$$.fragment,l),Xl=n(l),u(R.$$.fragment,l),xl=n(l),k=p(l,"P",{"data-svelte-h":!0}),r(k)!=="svelte-1936rd4"&&(k.textContent=Es),Wl=n(l),E=p(l,"P",{"data-svelte-h":!0}),r(E)!=="svelte-savllt"&&(E.textContent=Xs),Sl=n(l),u(X.$$.fragment,l),Fl=n(l),x=p(l,"P",{"data-svelte-h":!0}),r(x)!=="svelte-1tiu5id"&&(x.textContent=xs),Yl=n(l),u(W.$$.fragment,l),ql=n(l),S=p(l,"P",{"data-svelte-h":!0}),r(S)!=="svelte-qv1780"&&(S.textContent=Ws),Hl=n(l),u(F.$$.fragment,l),Dl=n(l),Y=p(l,"P",{"data-svelte-h":!0}),r(Y)!=="svelte-1858ain"&&(Y.innerHTML=Ss),Ll=n(l),q=p(l,"P",{"data-svelte-h":!0}),r(q)!=="svelte-b0lkty"&&(q.textContent=Fs),Pl=n(l),u(H.$$.fragment,l),Ol=n(l),D=p(l,"P",{"data-svelte-h":!0}),r(D)!=="svelte-1szvi14"&&(D.textContent=Ys),Kl=n(l),L=p(l,"TABLE",{"data-svelte-h":!0}),r(L)!=="svelte-18ti4cp"&&(L.innerHTML=qs),ls=n(l),u(P.$$.fragment,l),ss=n(l),O=p(l,"P",{"data-svelte-h":!0}),r(O)!=="svelte-wxqqvx"&&(O.innerHTML=Hs),es=n(l),u(K.$$.fragment,l),ts=n(l),ll=p(l,"P",{"data-svelte-h":!0}),r(ll)!=="svelte-yoo396"&&(ll.innerHTML=Ds),as=n(l),sl=p(l,"UL",{"data-svelte-h":!0}),r(sl)!=="svelte-1xmdbk7"&&(sl.innerHTML=Ls),ns=n(l),el=p(l,"P",{"data-svelte-h":!0}),r(el)!=="svelte-32t6p"&&(el.textContent=Ps),Ms=n(l),u(tl.$$.fragment,l),ps=n(l),u(b.$$.fragment,l),is=n(l),u(al.$$.fragment,l),rs=n(l),nl=p(l,"P",{"data-svelte-h":!0}),r(nl)!=="svelte-wwrmb7"&&(nl.textContent=Os),cs=n(l),u(Ml.$$.fragment,l),us=n(l),pl=p(l,"P",{"data-svelte-h":!0}),r(pl)!=="svelte-fmezoh"&&(pl.textContent=Ks),ys=n(l),u(il.$$.fragment,l),Js=n(l),rl=p(l,"P",{"data-svelte-h":!0}),r(rl)!=="svelte-1k72oxj"&&(rl.textContent=le),os=n(l),u(cl.$$.fragment,l),Us=n(l),ul=p(l,"P",{"data-svelte-h":!0}),r(ul)!=="svelte-1gfjffu"&&(ul.textContent=se),ms=n(l),u(yl.$$.fragment,l),js=n(l),u(Jl.$$.fragment,l),Ts=n(l),ol=p(l,"P",{"data-svelte-h":!0}),r(ol)!=="svelte-3fz42z"&&(ol.innerHTML=ee),ws=n(l),u(Ul.$$.fragment,l),ds=n(l),ml=p(l,"P",{"data-svelte-h":!0}),r(ml)!=="svelte-rhxj0n"&&(ml.textContent=te),hs=n(l),u(jl.$$.fragment,l),Cs=n(l),Tl=p(l,"P",{"data-svelte-h":!0}),r(Tl)!=="svelte-ru1qrx"&&(Tl.textContent=ae),Is=n(l),u(wl.$$.fragment,l),bs=n(l),dl=p(l,"P",{"data-svelte-h":!0}),r(dl)!=="svelte-1hcn9j3"&&(dl.textContent=ne),fs=n(l),hl=p(l,"OL",{"data-svelte-h":!0}),r(hl)!=="svelte-yuxkjo"&&(hl.innerHTML=Me),gs=n(l),Cl=p(l,"P",{"data-svelte-h":!0}),r(Cl)!=="svelte-1ic5kr1"&&(Cl.textContent=pe),Bs=n(l),Il=p(l,"P",{"data-svelte-h":!0}),r(Il)!=="svelte-cg9ehp"&&(Il.textContent=ie),Qs=n(l),bl=p(l,"UL",{"data-svelte-h":!0}),r(bl)!=="svelte-1c0k3wb"&&(bl.innerHTML=re),Gs=n(l),u(fl.$$.fragment,l),Zs=n(l),gl=p(l,"P",{}),Je(gl).forEach(e),this.h()},h(){oe(i,"name","hf:doc:metadata"),oe(i,"content",Be)},m(l,s){de(document.head,i),t(l,j,s),t(l,m,s),t(l,w,s),y(f,l,s),t(l,Bl,s),y(g,l,s),t(l,Ql,s),t(l,B,s),t(l,Gl,s),t(l,Q,s),t(l,Zl,s),y(C,l,s),t(l,_l,s),y(G,l,s),t(l,Nl,s),t(l,Z,s),t(l,zl,s),y(_,l,s),t(l,Vl,s),y(N,l,s),t(l,Al,s),t(l,z,s),t(l,$l,s),y(V,l,s),t(l,vl,s),t(l,A,s),t(l,Rl,s),y($,l,s),t(l,kl,s),t(l,v,s),t(l,El,s),y(I,l,s),t(l,Xl,s),y(R,l,s),t(l,xl,s),t(l,k,s),t(l,Wl,s),t(l,E,s),t(l,Sl,s),y(X,l,s),t(l,Fl,s),t(l,x,s),t(l,Yl,s),y(W,l,s),t(l,ql,s),t(l,S,s),t(l,Hl,s),y(F,l,s),t(l,Dl,s),t(l,Y,s),t(l,Ll,s),t(l,q,s),t(l,Pl,s),y(H,l,s),t(l,Ol,s),t(l,D,s),t(l,Kl,s),t(l,L,s),t(l,ls,s),y(P,l,s),t(l,ss,s),t(l,O,s),t(l,es,s),y(K,l,s),t(l,ts,s),t(l,ll,s),t(l,as,s),t(l,sl,s),t(l,ns,s),t(l,el,s),t(l,Ms,s),y(tl,l,s),t(l,ps,s),y(b,l,s),t(l,is,s),y(al,l,s),t(l,rs,s),t(l,nl,s),t(l,cs,s),y(Ml,l,s),t(l,us,s),t(l,pl,s),t(l,ys,s),y(il,l,s),t(l,Js,s),t(l,rl,s),t(l,os,s),y(cl,l,s),t(l,Us,s),t(l,ul,s),t(l,ms,s),y(yl,l,s),t(l,js,s),y(Jl,l,s),t(l,Ts,s),t(l,ol,s),t(l,ws,s),y(Ul,l,s),t(l,ds,s),t(l,ml,s),t(l,hs,s),y(jl,l,s),t(l,Cs,s),t(l,Tl,s),t(l,Is,s),y(wl,l,s),t(l,bs,s),t(l,dl,s),t(l,fs,s),t(l,hl,s),t(l,gs,s),t(l,Cl,s),t(l,Bs,s),t(l,Il,s),t(l,Qs,s),t(l,bl,s),t(l,Gs,s),y(fl,l,s),t(l,Zs,s),t(l,gl,s),_s=!0},p(l,[s]){const ce={};s&2&&(ce.$$scope={dirty:s,ctx:l}),C.$set(ce);const ue={};s&2&&(ue.$$scope={dirty:s,ctx:l}),I.$set(ue);const ye={};s&2&&(ye.$$scope={dirty:s,ctx:l}),b.$set(ye)},i(l){_s||(J(f.$$.fragment,l),J(g.$$.fragment,l),J(C.$$.fragment,l),J(G.$$.fragment,l),J(_.$$.fragment,l),J(N.$$.fragment,l),J(V.$$.fragment,l),J($.$$.fragment,l),J(I.$$.fragment,l),J(R.$$.fragment,l),J(X.$$.fragment,l),J(W.$$.fragment,l),J(F.$$.fragment,l),J(H.$$.fragment,l),J(P.$$.fragment,l),J(K.$$.fragment,l),J(tl.$$.fragment,l),J(b.$$.fragment,l),J(al.$$.fragment,l),J(Ml.$$.fragment,l),J(il.$$.fragment,l),J(cl.$$.fragment,l),J(yl.$$.fragment,l),J(Jl.$$.fragment,l),J(Ul.$$.fragment,l),J(jl.$$.fragment,l),J(wl.$$.fragment,l),J(fl.$$.fragment,l),_s=!0)},o(l){o(f.$$.fragment,l),o(g.$$.fragment,l),o(C.$$.fragment,l),o(G.$$.fragment,l),o(_.$$.fragment,l),o(N.$$.fragment,l),o(V.$$.fragment,l),o($.$$.fragment,l),o(I.$$.fragment,l),o(R.$$.fragment,l),o(X.$$.fragment,l),o(W.$$.fragment,l),o(F.$$.fragment,l),o(H.$$.fragment,l),o(P.$$.fragment,l),o(K.$$.fragment,l),o(tl.$$.fragment,l),o(b.$$.fragment,l),o(al.$$.fragment,l),o(Ml.$$.fragment,l),o(il.$$.fragment,l),o(cl.$$.fragment,l),o(yl.$$.fragment,l),o(Jl.$$.fragment,l),o(Ul.$$.fragment,l),o(jl.$$.fragment,l),o(wl.$$.fragment,l),o(fl.$$.fragment,l),_s=!1},d(l){l&&(e(j),e(m),e(w),e(Bl),e(Ql),e(B),e(Gl),e(Q),e(Zl),e(_l),e(Nl),e(Z),e(zl),e(Vl),e(Al),e(z),e($l),e(vl),e(A),e(Rl),e(kl),e(v),e(El),e(Xl),e(xl),e(k),e(Wl),e(E),e(Sl),e(Fl),e(x),e(Yl),e(ql),e(S),e(Hl),e(Dl),e(Y),e(Ll),e(q),e(Pl),e(Ol),e(D),e(Kl),e(L),e(ls),e(ss),e(O),e(es),e(ts),e(ll),e(as),e(sl),e(ns),e(el),e(Ms),e(ps),e(is),e(rs),e(nl),e(cs),e(us),e(pl),e(ys),e(Js),e(rl),e(os),e(Us),e(ul),e(ms),e(js),e(Ts),e(ol),e(ws),e(ds),e(ml),e(hs),e(Cs),e(Tl),e(Is),e(bs),e(dl),e(fs),e(hl),e(gs),e(Cl),e(Bs),e(Il),e(Qs),e(bl),e(Gs),e(Zs),e(gl)),e(i),U(f,l),U(g,l),U(C,l),U(G,l),U(_,l),U(N,l),U(V,l),U($,l),U(I,l),U(R,l),U(X,l),U(W,l),U(F,l),U(H,l),U(P,l),U(K,l),U(tl,l),U(b,l),U(al,l),U(Ml,l),U(il,l),U(cl,l),U(yl,l),U(Jl,l),U(Ul,l),U(jl,l),U(wl,l),U(fl,l)}}}const Be='{"title":"Exercițiu Practic: GRPO cu Unsloth","local":"exercițiu-practic-grpo-cu-unsloth","sections":[{"title":"Instalează dependențele","local":"instalează-dependențele","sections":[],"depth":2},{"title":"Configurarea Unsloth","local":"configurarea-unsloth","sections":[],"depth":2},{"title":"Pregătirea Datelor","local":"pregătirea-datelor","sections":[],"depth":2},{"title":"Definirea Funcțiilor de Recompensă","local":"definirea-funcțiilor-de-recompensă","sections":[],"depth":2},{"title":"Antrenarea cu GRPO","local":"antrenarea-cu-grpo","sections":[],"depth":2},{"title":"Testarea Modelului","local":"testarea-modelului","sections":[],"depth":2},{"title":"Salvarea Modelului","local":"salvarea-modelului","sections":[],"depth":2},{"title":"Încărcarea pe Hugging Face Hub","local":"încărcarea-pe-hugging-face-hub","sections":[],"depth":2},{"title":"Concluzie","local":"concluzie","sections":[],"depth":2}],"depth":1}';function Qe(h){return me(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ae extends je{constructor(i){super(),Te(this,i,Qe,ge,Ue,{})}}export{Ae as component}; | |
Xet Storage Details
- Size:
- 51 kB
- Xet hash:
- 1a868b2b3ef39821945b42e263f109f7b611d7422a79fc21501d58043d7d1a95
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.