Buckets:
| import{s as ol,o as rl,n as ml}from"../chunks/scheduler.37c15a92.js";import{S as pl,i as Ml,g as i,s,r,A as dl,h as o,f as l,c as a,j as al,u as m,x as h,k as il,y as yl,a as n,v as p,d as M,t as d,w as y}from"../chunks/index.7cb9c9b8.js";import{T as hl}from"../chunks/Tip.d10b3fc9.js";import{C as c}from"../chunks/CodeBlock.abae2786.js";import{C as ul}from"../chunks/CourseFloatingBanner.df82c153.js";import{H as w,E as cl}from"../chunks/getInferenceSnippets.a2135f3c.js";function wl($e){let u,T='This exercise was written by LLM fine-tuning expert <a href="https://huggingface.co/mlabonne" rel="nofollow">@mlabonne</a>.';return{c(){u=i("p"),u.innerHTML=T},l(J){u=o(J,"P",{"data-svelte-h":!0}),h(u)!=="svelte-1hjy8dv"&&(u.innerHTML=T)},m(J,Ue){n(J,u,Ue)},p:ml,d(J){J&&l(u)}}}function Jl($e){let u,T,J,Ue,g,Ie,b,Ge,U,Zt="Now that you’ve seen the theory, let’s put it into practice! In this exercise, you’ll fine-tune a model with GRPO.",Ce,f,Be,j,Ze,$,vt="First, let’s install the dependencies for this exercise.",ve,I,We,G,Wt="Now we’ll import the necessary libraries.",xe,C,Re,B,Xe,Z,xt="Weights & Biases is a tool for logging and monitoring your experiments. We’ll use it to log our fine-tuning process.",Fe,v,ke,W,Rt="You can do this exercise without logging in to Weights & Biases, but it’s recommended to do so to track your experiments and interpret the results.",_e,x,Qe,R,Xt='Now, let’s load the dataset. In this case, we’ll use the <a href="https://huggingface.co/datasets/mlabonne/smoltldr" rel="nofollow"><code>mlabonne/smoltldr</code></a> dataset, which contains a list of short stories.',Ve,X,Ye,F,Ee,k,Ft="Now, let’s load the model.",Ne,_,kt='For this exercise, we’ll use the <a href="https://huggingface.co/HuggingFaceTB/SmolLM2-135M" rel="nofollow"><code>SmolLM2-135M</code></a> model.',ze,Q,_t='This is a small 135M parameter model that runs on limited hardware. This makes the model ideal for learning, but it’s not the most powerful model out there. If you have access to more powerful hardware, you can try to fine-tune a larger model like <a href="https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B" rel="nofollow"><code>SmolLM2-1.7B</code></a>.',He,V,Ae,Y,Se,E,Qt="Now, let’s load the LoRA configuration. We’ll take advantage of LoRA to reduce the number of trainable parameters, and in turn the memory footprint we need to fine-tune the model.",qe,N,Vt='If you’re not familiar with LoRA, you can read more about it in <a href="https://huggingface.co/learn/course/en/chapter11/3" rel="nofollow">Chapter 11</a>.',Le,z,Pe,H,De,A,Oe,S,Yt="As mentioned in the previous section, GRPO can use any reward function to improve the model. In this case, we’ll use a simple reward function that encourages the model to generate text that is 50 tokens long.",Ke,q,et,L,tt,P,Et="Now, let’s define the training arguments. We’ll use the <code>GRPOConfig</code> class to define the training arguments in a typical <code>transformers</code> style.",lt,D,Nt='If this is the first time you’re defining training arguments, you can check the <a href="https://huggingface.co/docs/transformers/en/main_classes/trainer#trainingarguments" rel="nofollow">TrainingArguments</a> class for more information, or <a href="https://huggingface.co/learn/course/en/chapter2/1" rel="nofollow">Chapter 2</a> for a detailed introduction.',nt,O,st,K,zt="Now, we can initialize the trainer with model, dataset, and training arguments and start training.",at,ee,it,te,Ht="Training takes around 1 hour on a single A10G GPU which is available on Google Colab or via Hugging Face Spaces.",ot,le,rt,ne,At="If we set the <code>push_to_hub</code> argument to <code>True</code> and the <code>model_id</code> argument to a valid model name, the model will be pushed to the Hugging Face Hub whilst we’re training. This is useful if you want to start vibe testing the model straight away!",mt,se,pt,ae,St="<code>GRPOTrainer</code> logs the reward from your reward function, the loss, and a range of other metrics.",Mt,ie,qt="We will focus on the reward from the reward function and the loss.",dt,oe,Lt="As you can see, the reward from the reward function moves closer to 0 as the model learns. This is a good sign that the model is learning to generate text of the correct length.",yt,re,Pt='<img src="https://huggingface.co/reasoning-course/images/resolve/main/grpo/13.png" alt="Reward from reward function"/>',ht,me,Dt="You might notice that the loss starts at zero and then increases during training, which may seem counterintuitive. This behavior is expected in GRPO and is directly related to the mathematical formulation of the algorithm. The loss in GRPO is proportional to the KL divergence (the cap relative to original policy) . As training progresses, the model learns to generate text that better matches the reward function, causing it to diverge more from its initial policy. This increasing divergence is reflected in the rising loss value, which actually indicates that the model is successfully adapting to optimize for the reward function.",ut,pe,Ot='<img src="https://huggingface.co/reasoning-course/images/resolve/main/grpo/14.png" alt="Loss"/>',ct,Me,wt,de,Kt="Let’s share the model with the community!",Jt,ye,ft,he,Tt,ue,el="🎉 You’ve successfully fine-tuned a model with GRPO! Now, let’s generate some text with the model.",gt,ce,tl="First, we’ll define a really long document!",bt,we,Ut,Je,ll="Now, we can generate text with the model.",jt,fe,$t,Te,It,ge,nl="In this chapter, we’ve seen how to fine-tune a model with GRPO. We’ve also seen how to interpret the training results and generate text with the model.",Gt,be,Ct,je,Bt;return g=new ul({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/course/en/chapter12/grpo_finetune.ipynb"}]}}),b=new w({props:{title:"Practical Exercise: Fine-tune a model with GRPO",local:"practical-exercise-fine-tune-a-model-with-grpo",headingTag:"h1"}}),f=new hl({props:{$$slots:{default:[wl]},$$scope:{ctx:$e}}}),j=new w({props:{title:"Install dependencies",local:"install-dependencies",headingTag:"h2"}}),I=new c({props:{code:"IXBpcCUyMGluc3RhbGwlMjAtcXFxJTIwZGF0YXNldHMlM0QlM0QzLjIuMCUyMHRyYW5zZm9ybWVycyUzRCUzRDQuNDcuMSUyMHRybCUzRCUzRDAuMTQuMCUyMHBlZnQlM0QlM0QwLjE0LjAlMjBhY2NlbGVyYXRlJTNEJTNEMS4yLjElMjBiaXRzYW5kYnl0ZXMlM0QlM0QwLjQ1LjIlMjB3YW5kYiUzRCUzRDAuMTkuNyUyMC0tcHJvZ3Jlc3MtYmFyJTIwb2ZmJTBBIXBpcCUyMGluc3RhbGwlMjAtcXFxJTIwZmxhc2gtYXR0biUyMC0tbm8tYnVpbGQtaXNvbGF0aW9uJTIwLS1wcm9ncmVzcy1iYXIlMjBvZmY=",highlighted:`!pip install -qqq datasets==3.2.0 transformers==4.47.1 trl==0.14.0 peft==0.14.0 accelerate==1.2.1 bitsandbytes==0.45.2 wandb==0.19.7 --progress-bar off | |
| !pip install -qqq flash-attn --no-build-isolation --progress-bar off`,wrap:!1}}),C=new c({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEFmcm9tJTIwcGVmdCUyMGltcG9ydCUyMExvcmFDb25maWclMkMlMjBnZXRfcGVmdF9tb2RlbCUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTSUyQyUyMEF1dG9Ub2tlbml6ZXIlMEFmcm9tJTIwdHJsJTIwaW1wb3J0JTIwR1JQT0NvbmZpZyUyQyUyMEdSUE9UcmFpbmVy",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> GRPOConfig, GRPOTrainer`,wrap:!1}}),B=new w({props:{title:"Import and log in to Weights & Biases",local:"import-and-log-in-to-weights--biases",headingTag:"h2"}}),v=new c({props:{code:"aW1wb3J0JTIwd2FuZGIlMEElMEF3YW5kYi5sb2dpbigp",highlighted:`<span class="hljs-keyword">import</span> wandb | |
| wandb.login()`,wrap:!1}}),x=new w({props:{title:"Load the dataset",local:"load-the-dataset",headingTag:"h2"}}),X=new c({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJtbGFib25uZSUyRnNtb2x0bGRyJTIyKSUwQXByaW50KGRhdGFzZXQp",highlighted:`dataset = load_dataset(<span class="hljs-string">"mlabonne/smoltldr"</span>) | |
| <span class="hljs-built_in">print</span>(dataset)`,wrap:!1}}),F=new w({props:{title:"Load model",local:"load-model",headingTag:"h2"}}),V=new c({props:{code:"bW9kZWxfaWQlMjAlM0QlMjAlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNLTEzNU0tSW5zdHJ1Y3QlMjIlMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9pZCUyQyUwQSUyMCUyMCUyMCUyMHRvcmNoX2R0eXBlJTNEJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMGRldmljZV9tYXAlM0QlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwYXR0bl9pbXBsZW1lbnRhdGlvbiUzRCUyMmZsYXNoX2F0dGVudGlvbl8yJTIyJTJDJTBBKSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkKQ==",highlighted:`model_id = <span class="hljs-string">"HuggingFaceTB/SmolLM-135M-Instruct"</span> | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=<span class="hljs-string">"auto"</span>, | |
| device_map=<span class="hljs-string">"auto"</span>, | |
| attn_implementation=<span class="hljs-string">"flash_attention_2"</span>, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id)`,wrap:!1}}),Y=new w({props:{title:"Load LoRA",local:"load-lora",headingTag:"h2"}}),z=new c({props:{code:"JTIzJTIwTG9hZCUyMExvUkElMEFsb3JhX2NvbmZpZyUyMCUzRCUyMExvcmFDb25maWcoJTBBJTIwJTIwJTIwJTIwdGFza190eXBlJTNEJTIyQ0FVU0FMX0xNJTIyJTJDJTBBJTIwJTIwJTIwJTIwciUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9hbHBoYSUzRDMyJTJDJTBBJTIwJTIwJTIwJTIwdGFyZ2V0X21vZHVsZXMlM0QlMjJhbGwtbGluZWFyJTIyJTJDJTBBKSUwQW1vZGVsJTIwJTNEJTIwZ2V0X3BlZnRfbW9kZWwobW9kZWwlMkMlMjBsb3JhX2NvbmZpZyklMEFwcmludChtb2RlbC5wcmludF90cmFpbmFibGVfcGFyYW1ldGVycygpKQ==",highlighted:`<span class="hljs-comment"># Load LoRA</span> | |
| lora_config = LoraConfig( | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| r=<span class="hljs-number">16</span>, | |
| lora_alpha=<span class="hljs-number">32</span>, | |
| target_modules=<span class="hljs-string">"all-linear"</span>, | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| <span class="hljs-built_in">print</span>(model.print_trainable_parameters())`,wrap:!1}}),H=new c({props:{code:"VG90YWwlMjB0cmFpbmFibGUlMjBwYXJhbWV0ZXJzJTNBJTIwMTM1TQ==",highlighted:"Total trainable parameters: 135M",wrap:!1}}),A=new w({props:{title:"Define the reward function",local:"define-the-reward-function",headingTag:"h2"}}),q=new c({props:{code:"JTIzJTIwUmV3YXJkJTIwZnVuY3Rpb24lMEFpZGVhbF9sZW5ndGglMjAlM0QlMjA1MCUwQSUwQSUwQWRlZiUyMHJld2FyZF9sZW4oY29tcGxldGlvbnMlMkMlMjAqKmt3YXJncyklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjAlNUItYWJzKGlkZWFsX2xlbmd0aCUyMC0lMjBsZW4oY29tcGxldGlvbikpJTIwZm9yJTIwY29tcGxldGlvbiUyMGluJTIwY29tcGxldGlvbnMlNUQ=",highlighted:`<span class="hljs-comment"># Reward function</span> | |
| ideal_length = <span class="hljs-number">50</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">reward_len</span>(<span class="hljs-params">completions, **kwargs</span>): | |
| <span class="hljs-keyword">return</span> [-<span class="hljs-built_in">abs</span>(ideal_length - <span class="hljs-built_in">len</span>(completion)) <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions]`,wrap:!1}}),L=new w({props:{title:"Define the training arguments",local:"define-the-training-arguments",headingTag:"h2"}}),O=new c({props:{code:"JTIzJTIwVHJhaW5pbmclMjBhcmd1bWVudHMlMEF0cmFpbmluZ19hcmdzJTIwJTNEJTIwR1JQT0NvbmZpZyglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEJTIyR1JQTyUyMiUyQyUwQSUyMCUyMCUyMCUyMGxlYXJuaW5nX3JhdGUlM0QyZS01JTJDJTBBJTIwJTIwJTIwJTIwcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTNEOCUyQyUwQSUyMCUyMCUyMCUyMGdyYWRpZW50X2FjY3VtdWxhdGlvbl9zdGVwcyUzRDIlMkMlMEElMjAlMjAlMjAlMjBtYXhfcHJvbXB0X2xlbmd0aCUzRDUxMiUyQyUwQSUyMCUyMCUyMCUyMG1heF9jb21wbGV0aW9uX2xlbmd0aCUzRDk2JTJDJTBBJTIwJTIwJTIwJTIwbnVtX2dlbmVyYXRpb25zJTNEOCUyQyUwQSUyMCUyMCUyMCUyMG9wdGltJTNEJTIyYWRhbXdfOGJpdCUyMiUyQyUwQSUyMCUyMCUyMCUyMG51bV90cmFpbl9lcG9jaHMlM0QxJTJDJTBBJTIwJTIwJTIwJTIwYmYxNiUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjByZXBvcnRfdG8lM0QlNUIlMjJ3YW5kYiUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMHJlbW92ZV91bnVzZWRfY29sdW1ucyUzREZhbHNlJTJDJTBBJTIwJTIwJTIwJTIwbG9nZ2luZ19zdGVwcyUzRDElMkMlMEEp",highlighted:`<span class="hljs-comment"># Training arguments</span> | |
| training_args = GRPOConfig( | |
| output_dir=<span class="hljs-string">"GRPO"</span>, | |
| learning_rate=<span class="hljs-number">2e-5</span>, | |
| per_device_train_batch_size=<span class="hljs-number">8</span>, | |
| gradient_accumulation_steps=<span class="hljs-number">2</span>, | |
| max_prompt_length=<span class="hljs-number">512</span>, | |
| max_completion_length=<span class="hljs-number">96</span>, | |
| num_generations=<span class="hljs-number">8</span>, | |
| optim=<span class="hljs-string">"adamw_8bit"</span>, | |
| num_train_epochs=<span class="hljs-number">1</span>, | |
| bf16=<span class="hljs-literal">True</span>, | |
| report_to=[<span class="hljs-string">"wandb"</span>], | |
| remove_unused_columns=<span class="hljs-literal">False</span>, | |
| logging_steps=<span class="hljs-number">1</span>, | |
| )`,wrap:!1}}),ee=new c({props:{code:"JTIzJTIwVHJhaW5lciUwQXRyYWluZXIlMjAlM0QlMjBHUlBPVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwcmV3YXJkX2Z1bmNzJTNEJTVCcmV3YXJkX2xlbiU1RCUyQyUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCUyQyUwQSklMEElMEElMjMlMjBUcmFpbiUyMG1vZGVsJTBBd2FuZGIuaW5pdChwcm9qZWN0JTNEJTIyR1JQTyUyMiklMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-comment"># Trainer</span> | |
| trainer = GRPOTrainer( | |
| model=model, | |
| reward_funcs=[reward_len], | |
| args=training_args, | |
| train_dataset=dataset[<span class="hljs-string">"train"</span>], | |
| ) | |
| <span class="hljs-comment"># Train model</span> | |
| wandb.init(project=<span class="hljs-string">"GRPO"</span>) | |
| trainer.train()`,wrap:!1}}),le=new w({props:{title:"Push the model to the Hub during training",local:"push-the-model-to-the-hub-during-training",headingTag:"h2"}}),se=new w({props:{title:"Interpret training results",local:"interpret-training-results",headingTag:"h2"}}),Me=new w({props:{title:"Save and publish the model",local:"save-and-publish-the-model",headingTag:"h2"}}),ye=new c({props:{code:"bWVyZ2VkX21vZGVsJTIwJTNEJTIwdHJhaW5lci5tb2RlbC5tZXJnZV9hbmRfdW5sb2FkKCklMEFtZXJnZWRfbW9kZWwucHVzaF90b19odWIoJTBBJTIwJTIwJTIwJTIwJTIyU21vbEdSUE8tMTM1TSUyMiUyQyUyMHByaXZhdGUlM0RGYWxzZSUyQyUyMHRhZ3MlM0QlNUIlMjJHUlBPJTIyJTJDJTIwJTIyUmVhc29uaW5nLUNvdXJzZSUyMiU1RCUwQSk=",highlighted:`merged_model = trainer.model.merge_and_unload() | |
| merged_model.push_to_hub( | |
| <span class="hljs-string">"SmolGRPO-135M"</span>, private=<span class="hljs-literal">False</span>, tags=[<span class="hljs-string">"GRPO"</span>, <span class="hljs-string">"Reasoning-Course"</span>] | |
| )`,wrap:!1}}),he=new w({props:{title:"Generate text",local:"generate-text",headingTag:"h2"}}),we=new c({props:{code:"cHJvbXB0JTIwJTNEJTIwJTIyJTIyJTIyJTBBJTIzJTIwQSUyMGxvbmclMjBkb2N1bWVudCUyMGFib3V0JTIwdGhlJTIwQ2F0JTBBJTBBVGhlJTIwY2F0JTIwKEZlbGlzJTIwY2F0dXMpJTJDJTIwYWxzbyUyMHJlZmVycmVkJTIwdG8lMjBhcyUyMHRoZSUyMGRvbWVzdGljJTIwY2F0JTIwb3IlMjBob3VzZSUyMGNhdCUyQyUyMGlzJTIwYSUyMHNtYWxsJTIwJTBBZG9tZXN0aWNhdGVkJTIwY2Fybml2b3JvdXMlMjBtYW1tYWwuJTIwSXQlMjBpcyUyMHRoZSUyMG9ubHklMjBkb21lc3RpY2F0ZWQlMjBzcGVjaWVzJTIwb2YlMjB0aGUlMjBmYW1pbHklMjBGZWxpZGFlLiUwQUFkdmFuY2VzJTIwaW4lMjBhcmNoYWVvbG9neSUyMGFuZCUyMGdlbmV0aWNzJTIwaGF2ZSUyMHNob3duJTIwdGhhdCUyMHRoZSUyMGRvbWVzdGljYXRpb24lMjBvZiUyMHRoZSUyMGNhdCUyMG9jY3VycmVkJTBBaW4lMjB0aGUlMjBOZWFyJTIwRWFzdCUyMGFyb3VuZCUyMDc1MDAlMjBCQy4lMjBJdCUyMGlzJTIwY29tbW9ubHklMjBrZXB0JTIwYXMlMjBhJTIwcGV0JTIwYW5kJTIwZmFybSUyMGNhdCUyQyUyMGJ1dCUyMGFsc28lMjByYW5nZXMlMEFmcmVlbHklMjBhcyUyMGElMjBmZXJhbCUyMGNhdCUyMGF2b2lkaW5nJTIwaHVtYW4lMjBjb250YWN0LiUyMEl0JTIwaXMlMjB2YWx1ZWQlMjBieSUyMGh1bWFucyUyMGZvciUyMGNvbXBhbmlvbnNoaXAlMjBhbmQlMEFpdHMlMjBhYmlsaXR5JTIwdG8lMjBraWxsJTIwdmVybWluLiUyMEl0cyUyMHJldHJhY3RhYmxlJTIwY2xhd3MlMjBhcmUlMjBhZGFwdGVkJTIwdG8lMjBraWxsaW5nJTIwc21hbGwlMjBwcmV5JTIwc3BlY2llcyUwQXN1Y2glMjBhcyUyMG1pY2UlMjBhbmQlMjByYXRzLiUyMEl0JTIwaGFzJTIwYSUyMHN0cm9uZyUyQyUyMGZsZXhpYmxlJTIwYm9keSUyQyUyMHF1aWNrJTIwcmVmbGV4ZXMlMkMlMjBhbmQlMjBzaGFycCUyMHRlZXRoJTJDJTBBYW5kJTIwaXRzJTIwbmlnaHQlMjB2aXNpb24lMjBhbmQlMjBzZW5zZSUyMG9mJTIwc21lbGwlMjBhcmUlMjB3ZWxsJTIwZGV2ZWxvcGVkLiUyMEl0JTIwaXMlMjBhJTIwc29jaWFsJTIwc3BlY2llcyUyQyUwQWJ1dCUyMGElMjBzb2xpdGFyeSUyMGh1bnRlciUyMGFuZCUyMGElMjBjcmVwdXNjdWxhciUyMHByZWRhdG9yLiUyMENhdCUyMGNvbW11bmljYXRpb24lMjBpbmNsdWRlcyUwQXZvY2FsaXphdGlvbnMlRTIlODAlOTRpbmNsdWRpbmclMjBtZW93aW5nJTJDJTIwcHVycmluZyUyQyUyMHRyaWxsaW5nJTJDJTIwaGlzc2luZyUyQyUyMGdyb3dsaW5nJTJDJTIwYW5kJTIwZ3J1bnRpbmclRTIlODAlOTRhcyUwQXdlbGwlMjBhcyUyMGJvZHklMjBsYW5ndWFnZS4lMjBJdCUyMGNhbiUyMGhlYXIlMjBzb3VuZHMlMjB0b28lMjBmYWludCUyMG9yJTIwdG9vJTIwaGlnaCUyMGluJTIwZnJlcXVlbmN5JTIwZm9yJTIwaHVtYW4lMjBlYXJzJTJDJTBBc3VjaCUyMGFzJTIwdGhvc2UlMjBtYWRlJTIwYnklMjBzbWFsbCUyMG1hbW1hbHMuJTIwSXQlMjBzZWNyZXRlcyUyMGFuZCUyMHBlcmNlaXZlcyUyMHBoZXJvbW9uZXMuJTBBJTIyJTIyJTIyJTBBJTBBbWVzc2FnZXMlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjBwcm9tcHQlN0QlMkMlMEElNUQ=",highlighted:`prompt = <span class="hljs-string">""" | |
| # A long document about the Cat | |
| The cat (Felis catus), also referred to as the domestic cat or house cat, is a small | |
| domesticated carnivorous mammal. It is the only domesticated species of the family Felidae. | |
| Advances in archaeology and genetics have shown that the domestication of the cat occurred | |
| in the Near East around 7500 BC. It is commonly kept as a pet and farm cat, but also ranges | |
| freely as a feral cat avoiding human contact. It is valued by humans for companionship and | |
| its ability to kill vermin. Its retractable claws are adapted to killing small prey species | |
| such as mice and rats. It has a strong, flexible body, quick reflexes, and sharp teeth, | |
| and its night vision and sense of smell are well developed. It is a social species, | |
| but a solitary hunter and a crepuscular predator. Cat communication includes | |
| vocalizations—including meowing, purring, trilling, hissing, growling, and grunting—as | |
| well as body language. It can hear sounds too faint or too high in frequency for human ears, | |
| such as those made by small mammals. It secretes and perceives pheromones. | |
| """</span> | |
| messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: prompt}, | |
| ]`,wrap:!1}}),fe=new c({props:{code:"JTIzJTIwR2VuZXJhdGUlMjB0ZXh0JTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMHBpcGVsaW5lJTBBJTBBZ2VuZXJhdG9yJTIwJTNEJTIwcGlwZWxpbmUoJTIydGV4dC1nZW5lcmF0aW9uJTIyJTJDJTIwbW9kZWwlM0QlMjJTbW9sR1JQTy0xMzVNJTIyKSUwQSUwQSUyMyUyMyUyME9yJTIwdXNlJTIwdGhlJTIwbW9kZWwlMjBhbmQlMjB0b2tlbml6ZXIlMjB3ZSUyMGRlZmluZWQlMjBlYXJsaWVyJTBBJTIzJTIwZ2VuZXJhdG9yJTIwJTNEJTIwcGlwZWxpbmUoJTIydGV4dC1nZW5lcmF0aW9uJTIyJTJDJTIwbW9kZWwlM0Rtb2RlbCUyQyUyMHRva2VuaXplciUzRHRva2VuaXplciklMEElMEFnZW5lcmF0ZV9rd2FyZ3MlMjAlM0QlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjJtYXhfbmV3X3Rva2VucyUyMiUzQSUyMDI1NiUyQyUwQSUyMCUyMCUyMCUyMCUyMmRvX3NhbXBsZSUyMiUzQSUyMFRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjJ0ZW1wZXJhdHVyZSUyMiUzQSUyMDAuNSUyQyUwQSUyMCUyMCUyMCUyMCUyMm1pbl9wJTIyJTNBJTIwMC4xJTJDJTBBJTdEJTBBJTBBZ2VuZXJhdGVkX3RleHQlMjAlM0QlMjBnZW5lcmF0b3IobWVzc2FnZXMlMkMlMjBnZW5lcmF0ZV9rd2FyZ3MlM0RnZW5lcmF0ZV9rd2FyZ3MpJTBBJTBBcHJpbnQoZ2VuZXJhdGVkX3RleHQp",highlighted:`<span class="hljs-comment"># Generate text</span> | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| generator = pipeline(<span class="hljs-string">"text-generation"</span>, model=<span class="hljs-string">"SmolGRPO-135M"</span>) | |
| <span class="hljs-comment">## Or use the model and tokenizer we defined earlier</span> | |
| <span class="hljs-comment"># generator = pipeline("text-generation", model=model, tokenizer=tokenizer)</span> | |
| generate_kwargs = { | |
| <span class="hljs-string">"max_new_tokens"</span>: <span class="hljs-number">256</span>, | |
| <span class="hljs-string">"do_sample"</span>: <span class="hljs-literal">True</span>, | |
| <span class="hljs-string">"temperature"</span>: <span class="hljs-number">0.5</span>, | |
| <span class="hljs-string">"min_p"</span>: <span class="hljs-number">0.1</span>, | |
| } | |
| generated_text = generator(messages, generate_kwargs=generate_kwargs) | |
| <span class="hljs-built_in">print</span>(generated_text)`,wrap:!1}}),Te=new w({props:{title:"Conclusion",local:"conclusion",headingTag:"h1"}}),be=new cl({props:{source:"https://github.com/huggingface/course/blob/main/chapters/en/chapter12/5.mdx"}}),{c(){u=i("meta"),T=s(),J=i("p"),Ue=s(),r(g.$$.fragment),Ie=s(),r(b.$$.fragment),Ge=s(),U=i("p"),U.textContent=Zt,Ce=s(),r(f.$$.fragment),Be=s(),r(j.$$.fragment),Ze=s(),$=i("p"),$.textContent=vt,ve=s(),r(I.$$.fragment),We=s(),G=i("p"),G.textContent=Wt,xe=s(),r(C.$$.fragment),Re=s(),r(B.$$.fragment),Xe=s(),Z=i("p"),Z.textContent=xt,Fe=s(),r(v.$$.fragment),ke=s(),W=i("p"),W.textContent=Rt,_e=s(),r(x.$$.fragment),Qe=s(),R=i("p"),R.innerHTML=Xt,Ve=s(),r(X.$$.fragment),Ye=s(),r(F.$$.fragment),Ee=s(),k=i("p"),k.textContent=Ft,Ne=s(),_=i("p"),_.innerHTML=kt,ze=s(),Q=i("p"),Q.innerHTML=_t,He=s(),r(V.$$.fragment),Ae=s(),r(Y.$$.fragment),Se=s(),E=i("p"),E.textContent=Qt,qe=s(),N=i("p"),N.innerHTML=Vt,Le=s(),r(z.$$.fragment),Pe=s(),r(H.$$.fragment),De=s(),r(A.$$.fragment),Oe=s(),S=i("p"),S.textContent=Yt,Ke=s(),r(q.$$.fragment),et=s(),r(L.$$.fragment),tt=s(),P=i("p"),P.innerHTML=Et,lt=s(),D=i("p"),D.innerHTML=Nt,nt=s(),r(O.$$.fragment),st=s(),K=i("p"),K.textContent=zt,at=s(),r(ee.$$.fragment),it=s(),te=i("p"),te.textContent=Ht,ot=s(),r(le.$$.fragment),rt=s(),ne=i("p"),ne.innerHTML=At,mt=s(),r(se.$$.fragment),pt=s(),ae=i("p"),ae.innerHTML=St,Mt=s(),ie=i("p"),ie.textContent=qt,dt=s(),oe=i("p"),oe.textContent=Lt,yt=s(),re=i("p"),re.innerHTML=Pt,ht=s(),me=i("p"),me.textContent=Dt,ut=s(),pe=i("p"),pe.innerHTML=Ot,ct=s(),r(Me.$$.fragment),wt=s(),de=i("p"),de.textContent=Kt,Jt=s(),r(ye.$$.fragment),ft=s(),r(he.$$.fragment),Tt=s(),ue=i("p"),ue.textContent=el,gt=s(),ce=i("p"),ce.textContent=tl,bt=s(),r(we.$$.fragment),Ut=s(),Je=i("p"),Je.textContent=ll,jt=s(),r(fe.$$.fragment),$t=s(),r(Te.$$.fragment),It=s(),ge=i("p"),ge.textContent=nl,Gt=s(),r(be.$$.fragment),Ct=s(),je=i("p"),this.h()},l(e){const t=dl("svelte-u9bgzb",document.head);u=o(t,"META",{name:!0,content:!0}),t.forEach(l),T=a(e),J=o(e,"P",{}),al(J).forEach(l),Ue=a(e),m(g.$$.fragment,e),Ie=a(e),m(b.$$.fragment,e),Ge=a(e),U=o(e,"P",{"data-svelte-h":!0}),h(U)!=="svelte-cm5jhs"&&(U.textContent=Zt),Ce=a(e),m(f.$$.fragment,e),Be=a(e),m(j.$$.fragment,e),Ze=a(e),$=o(e,"P",{"data-svelte-h":!0}),h($)!=="svelte-ptnxo3"&&($.textContent=vt),ve=a(e),m(I.$$.fragment,e),We=a(e),G=o(e,"P",{"data-svelte-h":!0}),h(G)!=="svelte-12snwaz"&&(G.textContent=Wt),xe=a(e),m(C.$$.fragment,e),Re=a(e),m(B.$$.fragment,e),Xe=a(e),Z=o(e,"P",{"data-svelte-h":!0}),h(Z)!=="svelte-1n1a8k6"&&(Z.textContent=xt),Fe=a(e),m(v.$$.fragment,e),ke=a(e),W=o(e,"P",{"data-svelte-h":!0}),h(W)!=="svelte-1ew5fxu"&&(W.textContent=Rt),_e=a(e),m(x.$$.fragment,e),Qe=a(e),R=o(e,"P",{"data-svelte-h":!0}),h(R)!=="svelte-26gya7"&&(R.innerHTML=Xt),Ve=a(e),m(X.$$.fragment,e),Ye=a(e),m(F.$$.fragment,e),Ee=a(e),k=o(e,"P",{"data-svelte-h":!0}),h(k)!=="svelte-121fjkj"&&(k.textContent=Ft),Ne=a(e),_=o(e,"P",{"data-svelte-h":!0}),h(_)!=="svelte-12w23k"&&(_.innerHTML=kt),ze=a(e),Q=o(e,"P",{"data-svelte-h":!0}),h(Q)!=="svelte-31m4si"&&(Q.innerHTML=_t),He=a(e),m(V.$$.fragment,e),Ae=a(e),m(Y.$$.fragment,e),Se=a(e),E=o(e,"P",{"data-svelte-h":!0}),h(E)!=="svelte-1d01rwe"&&(E.textContent=Qt),qe=a(e),N=o(e,"P",{"data-svelte-h":!0}),h(N)!=="svelte-zej957"&&(N.innerHTML=Vt),Le=a(e),m(z.$$.fragment,e),Pe=a(e),m(H.$$.fragment,e),De=a(e),m(A.$$.fragment,e),Oe=a(e),S=o(e,"P",{"data-svelte-h":!0}),h(S)!=="svelte-19qfksx"&&(S.textContent=Yt),Ke=a(e),m(q.$$.fragment,e),et=a(e),m(L.$$.fragment,e),tt=a(e),P=o(e,"P",{"data-svelte-h":!0}),h(P)!=="svelte-yvl12o"&&(P.innerHTML=Et),lt=a(e),D=o(e,"P",{"data-svelte-h":!0}),h(D)!=="svelte-1a9j2u8"&&(D.innerHTML=Nt),nt=a(e),m(O.$$.fragment,e),st=a(e),K=o(e,"P",{"data-svelte-h":!0}),h(K)!=="svelte-djcfcf"&&(K.textContent=zt),at=a(e),m(ee.$$.fragment,e),it=a(e),te=o(e,"P",{"data-svelte-h":!0}),h(te)!=="svelte-1de2igi"&&(te.textContent=Ht),ot=a(e),m(le.$$.fragment,e),rt=a(e),ne=o(e,"P",{"data-svelte-h":!0}),h(ne)!=="svelte-1xii7zd"&&(ne.innerHTML=At),mt=a(e),m(se.$$.fragment,e),pt=a(e),ae=o(e,"P",{"data-svelte-h":!0}),h(ae)!=="svelte-87il9y"&&(ae.innerHTML=St),Mt=a(e),ie=o(e,"P",{"data-svelte-h":!0}),h(ie)!=="svelte-eocx64"&&(ie.textContent=qt),dt=a(e),oe=o(e,"P",{"data-svelte-h":!0}),h(oe)!=="svelte-1qdyttt"&&(oe.textContent=Lt),yt=a(e),re=o(e,"P",{"data-svelte-h":!0}),h(re)!=="svelte-nb9yq5"&&(re.innerHTML=Pt),ht=a(e),me=o(e,"P",{"data-svelte-h":!0}),h(me)!=="svelte-10noxip"&&(me.textContent=Dt),ut=a(e),pe=o(e,"P",{"data-svelte-h":!0}),h(pe)!=="svelte-1bbe9id"&&(pe.innerHTML=Ot),ct=a(e),m(Me.$$.fragment,e),wt=a(e),de=o(e,"P",{"data-svelte-h":!0}),h(de)!=="svelte-wu0gyd"&&(de.textContent=Kt),Jt=a(e),m(ye.$$.fragment,e),ft=a(e),m(he.$$.fragment,e),Tt=a(e),ue=o(e,"P",{"data-svelte-h":!0}),h(ue)!=="svelte-u9gu30"&&(ue.textContent=el),gt=a(e),ce=o(e,"P",{"data-svelte-h":!0}),h(ce)!=="svelte-1bqln8"&&(ce.textContent=tl),bt=a(e),m(we.$$.fragment,e),Ut=a(e),Je=o(e,"P",{"data-svelte-h":!0}),h(Je)!=="svelte-4yybog"&&(Je.textContent=ll),jt=a(e),m(fe.$$.fragment,e),$t=a(e),m(Te.$$.fragment,e),It=a(e),ge=o(e,"P",{"data-svelte-h":!0}),h(ge)!=="svelte-1y8m2ev"&&(ge.textContent=nl),Gt=a(e),m(be.$$.fragment,e),Ct=a(e),je=o(e,"P",{}),al(je).forEach(l),this.h()},h(){il(u,"name","hf:doc:metadata"),il(u,"content",fl)},m(e,t){yl(document.head,u),n(e,T,t),n(e,J,t),n(e,Ue,t),p(g,e,t),n(e,Ie,t),p(b,e,t),n(e,Ge,t),n(e,U,t),n(e,Ce,t),p(f,e,t),n(e,Be,t),p(j,e,t),n(e,Ze,t),n(e,$,t),n(e,ve,t),p(I,e,t),n(e,We,t),n(e,G,t),n(e,xe,t),p(C,e,t),n(e,Re,t),p(B,e,t),n(e,Xe,t),n(e,Z,t),n(e,Fe,t),p(v,e,t),n(e,ke,t),n(e,W,t),n(e,_e,t),p(x,e,t),n(e,Qe,t),n(e,R,t),n(e,Ve,t),p(X,e,t),n(e,Ye,t),p(F,e,t),n(e,Ee,t),n(e,k,t),n(e,Ne,t),n(e,_,t),n(e,ze,t),n(e,Q,t),n(e,He,t),p(V,e,t),n(e,Ae,t),p(Y,e,t),n(e,Se,t),n(e,E,t),n(e,qe,t),n(e,N,t),n(e,Le,t),p(z,e,t),n(e,Pe,t),p(H,e,t),n(e,De,t),p(A,e,t),n(e,Oe,t),n(e,S,t),n(e,Ke,t),p(q,e,t),n(e,et,t),p(L,e,t),n(e,tt,t),n(e,P,t),n(e,lt,t),n(e,D,t),n(e,nt,t),p(O,e,t),n(e,st,t),n(e,K,t),n(e,at,t),p(ee,e,t),n(e,it,t),n(e,te,t),n(e,ot,t),p(le,e,t),n(e,rt,t),n(e,ne,t),n(e,mt,t),p(se,e,t),n(e,pt,t),n(e,ae,t),n(e,Mt,t),n(e,ie,t),n(e,dt,t),n(e,oe,t),n(e,yt,t),n(e,re,t),n(e,ht,t),n(e,me,t),n(e,ut,t),n(e,pe,t),n(e,ct,t),p(Me,e,t),n(e,wt,t),n(e,de,t),n(e,Jt,t),p(ye,e,t),n(e,ft,t),p(he,e,t),n(e,Tt,t),n(e,ue,t),n(e,gt,t),n(e,ce,t),n(e,bt,t),p(we,e,t),n(e,Ut,t),n(e,Je,t),n(e,jt,t),p(fe,e,t),n(e,$t,t),p(Te,e,t),n(e,It,t),n(e,ge,t),n(e,Gt,t),p(be,e,t),n(e,Ct,t),n(e,je,t),Bt=!0},p(e,[t]){const sl={};t&2&&(sl.$$scope={dirty:t,ctx:e}),f.$set(sl)},i(e){Bt||(M(g.$$.fragment,e),M(b.$$.fragment,e),M(f.$$.fragment,e),M(j.$$.fragment,e),M(I.$$.fragment,e),M(C.$$.fragment,e),M(B.$$.fragment,e),M(v.$$.fragment,e),M(x.$$.fragment,e),M(X.$$.fragment,e),M(F.$$.fragment,e),M(V.$$.fragment,e),M(Y.$$.fragment,e),M(z.$$.fragment,e),M(H.$$.fragment,e),M(A.$$.fragment,e),M(q.$$.fragment,e),M(L.$$.fragment,e),M(O.$$.fragment,e),M(ee.$$.fragment,e),M(le.$$.fragment,e),M(se.$$.fragment,e),M(Me.$$.fragment,e),M(ye.$$.fragment,e),M(he.$$.fragment,e),M(we.$$.fragment,e),M(fe.$$.fragment,e),M(Te.$$.fragment,e),M(be.$$.fragment,e),Bt=!0)},o(e){d(g.$$.fragment,e),d(b.$$.fragment,e),d(f.$$.fragment,e),d(j.$$.fragment,e),d(I.$$.fragment,e),d(C.$$.fragment,e),d(B.$$.fragment,e),d(v.$$.fragment,e),d(x.$$.fragment,e),d(X.$$.fragment,e),d(F.$$.fragment,e),d(V.$$.fragment,e),d(Y.$$.fragment,e),d(z.$$.fragment,e),d(H.$$.fragment,e),d(A.$$.fragment,e),d(q.$$.fragment,e),d(L.$$.fragment,e),d(O.$$.fragment,e),d(ee.$$.fragment,e),d(le.$$.fragment,e),d(se.$$.fragment,e),d(Me.$$.fragment,e),d(ye.$$.fragment,e),d(he.$$.fragment,e),d(we.$$.fragment,e),d(fe.$$.fragment,e),d(Te.$$.fragment,e),d(be.$$.fragment,e),Bt=!1},d(e){e&&(l(T),l(J),l(Ue),l(Ie),l(Ge),l(U),l(Ce),l(Be),l(Ze),l($),l(ve),l(We),l(G),l(xe),l(Re),l(Xe),l(Z),l(Fe),l(ke),l(W),l(_e),l(Qe),l(R),l(Ve),l(Ye),l(Ee),l(k),l(Ne),l(_),l(ze),l(Q),l(He),l(Ae),l(Se),l(E),l(qe),l(N),l(Le),l(Pe),l(De),l(Oe),l(S),l(Ke),l(et),l(tt),l(P),l(lt),l(D),l(nt),l(st),l(K),l(at),l(it),l(te),l(ot),l(rt),l(ne),l(mt),l(pt),l(ae),l(Mt),l(ie),l(dt),l(oe),l(yt),l(re),l(ht),l(me),l(ut),l(pe),l(ct),l(wt),l(de),l(Jt),l(ft),l(Tt),l(ue),l(gt),l(ce),l(bt),l(Ut),l(Je),l(jt),l($t),l(It),l(ge),l(Gt),l(Ct),l(je)),l(u),y(g,e),y(b,e),y(f,e),y(j,e),y(I,e),y(C,e),y(B,e),y(v,e),y(x,e),y(X,e),y(F,e),y(V,e),y(Y,e),y(z,e),y(H,e),y(A,e),y(q,e),y(L,e),y(O,e),y(ee,e),y(le,e),y(se,e),y(Me,e),y(ye,e),y(he,e),y(we,e),y(fe,e),y(Te,e),y(be,e)}}}const fl='{"title":"Practical Exercise: Fine-tune a model with GRPO","local":"practical-exercise-fine-tune-a-model-with-grpo","sections":[{"title":"Install dependencies","local":"install-dependencies","sections":[],"depth":2},{"title":"Import and log in to Weights & Biases","local":"import-and-log-in-to-weights--biases","sections":[],"depth":2},{"title":"Load the dataset","local":"load-the-dataset","sections":[],"depth":2},{"title":"Load model","local":"load-model","sections":[],"depth":2},{"title":"Load LoRA","local":"load-lora","sections":[],"depth":2},{"title":"Define the reward function","local":"define-the-reward-function","sections":[],"depth":2},{"title":"Define the training arguments","local":"define-the-training-arguments","sections":[],"depth":2},{"title":"Push the model to the Hub during training","local":"push-the-model-to-the-hub-during-training","sections":[],"depth":2},{"title":"Interpret training results","local":"interpret-training-results","sections":[],"depth":2},{"title":"Save and publish the model","local":"save-and-publish-the-model","sections":[],"depth":2},{"title":"Generate text","local":"generate-text","sections":[],"depth":2}],"depth":1}';function Tl($e){return rl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Gl extends pl{constructor(u){super(),Ml(this,u,Tl,Jl,ol,{})}}export{Gl as component}; | |
Xet Storage Details
- Size:
- 30.5 kB
- Xet hash:
- ca43b34057c73f97211b901a52188cf0182a6906dc46000affdbe6e898d47a84
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.