Buckets:
| import{s as qs,o as Ds,n as Ea}from"../chunks/scheduler.7b731bd4.js";import{S as Os,i as Ks,e as T,s as a,c as o,h as ti,a as d,d as l,b as s,f as Ps,g as M,j as U,k as il,l as ei,m as n,n as p,t as r,o as m,p as y}from"../chunks/index.cc268345.js";import{C as li,H as w,E as ni}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{C as h}from"../chunks/CodeBlock.169a125f.js";import{H as ai,a as Aa}from"../chunks/HfOption.9f04abd1.js";function si(E){let u,Z,j,F="The <code>SFTTrainer</code> is used for supervised fine-tuning on instruction datasets.",C,g,f,c,B,I,R,$,A;return u=new w({props:{title:"Supervised Fine-Tuning (SFT)",local:"supervised-fine-tuning-sft",headingTag:"h3"}}),g=new w({props:{title:"With LoRA",local:"with-lora",headingTag:"h4"}}),c=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRnNmdC5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLTAuNUIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZDYXB5YmFyYSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbGVhcm5pbmdfcmF0ZSUyMDIuMGUtNCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbnVtX3RyYWluX2Vwb2NocyUyMDElMjAlNUMlMEElMjAlMjAlMjAlMjAtLXBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUyMDIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWdyYWRpZW50X2FjY3VtdWxhdGlvbl9zdGVwcyUyMDglMjAlNUMlMEElMjAlMjAlMjAlMjAtLXVzZV9wZWZ0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX3IlMjAzMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9yYV9hbHBoYSUyMDE2JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1vdXRwdXRfZGlyJTIwUXdlbjItMC41Qi1TRlQtTG9SQQ==",highlighted:`python trl/scripts/sft.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B \\ | |
| --dataset_name trl-lib/Capybara \\ | |
| --learning_rate 2.0e-4 \\ | |
| --num_train_epochs 1 \\ | |
| --per_device_train_batch_size 2 \\ | |
| --gradient_accumulation_steps 8 \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --output_dir Qwen2-0.5B-SFT-LoRA`,wrap:!1}}),I=new w({props:{title:"Python Example",local:"python-example",headingTag:"h4"}}),$=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUyQyUyMFNGVFRyYWluZXIlMEElMEElMjMlMjBDb25maWd1cmUlMjBMb1JBJTBBcGVmdF9jb25maWclMjAlM0QlMjBMb3JhQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHIlM0QzMiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfYWxwaGElM0QxNiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhcmdldF9tb2R1bGVzJTNEJTVCJTIycV9wcm9qJTIyJTJDJTIwJTIydl9wcm9qJTIyJTVEJTJDJTIwJTIwJTIzJTIwb3B0aW9uYWwlM0ElMjBzcGVjaWZ5JTIwdGFyZ2V0JTIwbW9kdWxlcyUwQSklMEElMEElMjMlMjBDb25maWd1cmUlMjB0cmFpbmluZyUyMHdpdGglMjBoaWdoZXIlMjBsZWFybmluZyUyMHJhdGUlMjBmb3IlMjBMb1JBJTBBdHJhaW5pbmdfYXJncyUyMCUzRCUyMFNGVENvbmZpZyglMEElMjAlMjAlMjAlMjBsZWFybmluZ19yYXRlJTNEMi4wZS00JTJDJTIwJTIwJTIzJTIwMTB4JTIwdGhlJTIwYmFzZSUyMHJhdGUlMjBmb3IlMjBMb1JBJTBBJTIwJTIwJTIwJTIwLi4uJTBBKSUwQSUwQSUyMyUyMENyZWF0ZSUyMHRyYWluZXIlMjB3aXRoJTIwUEVGVCUyMGNvbmZpZyUwQXRyYWluZXIlMjAlM0QlMjBTRlRUcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyUXdlbiUyRlF3ZW4yLTAuNUIlMjIlMkMlMjAlMjAlMjMlMjBjYW4lMjBwYXNzJTIwbW9kZWwlMjBuYW1lJTIwb3IlMjBsb2FkZWQlMjBtb2RlbCUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRHBlZnRfY29uZmlnJTJDJTIwJTIwJTIzJTIwcGFzcyUyMFBFRlQlMjBjb25maWclMjBoZXJlJTBBKSUwQXRyYWluZXIudHJhaW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-comment"># Configure LoRA</span> | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"v_proj"</span>], <span class="hljs-comment"># optional: specify target modules</span> | |
| ) | |
| <span class="hljs-comment"># Configure training with higher learning rate for LoRA</span> | |
| training_args = SFTConfig( | |
| learning_rate=<span class="hljs-number">2.0e-4</span>, <span class="hljs-comment"># 10x the base rate for LoRA</span> | |
| ... | |
| ) | |
| <span class="hljs-comment"># Create trainer with PEFT config</span> | |
| trainer = SFTTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen2-0.5B"</span>, <span class="hljs-comment"># can pass model name or loaded model</span> | |
| args=training_args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, <span class="hljs-comment"># pass PEFT config here</span> | |
| ) | |
| trainer.train()`,wrap:!1}}),{c(){o(u.$$.fragment),Z=a(),j=T("p"),j.innerHTML=F,C=a(),o(g.$$.fragment),f=a(),o(c.$$.fragment),B=a(),o(I.$$.fragment),R=a(),o($.$$.fragment)},l(i){M(u.$$.fragment,i),Z=s(i),j=d(i,"P",{"data-svelte-h":!0}),U(j)!=="svelte-jcnl34"&&(j.innerHTML=F),C=s(i),M(g.$$.fragment,i),f=s(i),M(c.$$.fragment,i),B=s(i),M(I.$$.fragment,i),R=s(i),M($.$$.fragment,i)},m(i,b){p(u,i,b),n(i,Z,b),n(i,j,b),n(i,C,b),p(g,i,b),n(i,f,b),p(c,i,b),n(i,B,b),p(I,i,b),n(i,R,b),p($,i,b),A=!0},p:Ea,i(i){A||(r(u.$$.fragment,i),r(g.$$.fragment,i),r(c.$$.fragment,i),r(I.$$.fragment,i),r($.$$.fragment,i),A=!0)},o(i){m(u.$$.fragment,i),m(g.$$.fragment,i),m(c.$$.fragment,i),m(I.$$.fragment,i),m($.$$.fragment,i),A=!1},d(i){i&&(l(Z),l(j),l(C),l(f),l(B),l(R)),y(u,i),y(g,i),y(c,i),y(I,i),y($,i)}}}function ii(E){let u,Z,j,F='The <a href="/docs/trl/pr_5607/en/bema_for_reference_model#trl.DPOTrainer">DPOTrainer</a> implements preference learning from human feedback.',C,g,f,c,B,I,R,$,A,i,b="<strong>Note:</strong> When using PEFT with DPO, you don’t need to provide a separate reference model (<code>ref_model</code>). The trainer automatically uses the frozen base model as the reference.",L;return u=new w({props:{title:"Direct Preference Optimization (DPO)",local:"direct-preference-optimization-dpo",headingTag:"h3"}}),g=new w({props:{title:"With LoRA",local:"with-lora",headingTag:"h4"}}),c=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRmRwby5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLTAuNUItSW5zdHJ1Y3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZ1bHRyYWZlZWRiYWNrX2JpbmFyaXplZCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbGVhcm5pbmdfcmF0ZSUyMDUuMGUtNiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTIwMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTIwOCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tdXNlX3BlZnQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfciUyMDMyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX2FscGhhJTIwMTYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW91dHB1dF9kaXIlMjBRd2VuMi0wLjVCLURQTy1Mb1JB",highlighted:`python trl/scripts/dpo.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B-Instruct \\ | |
| --dataset_name trl-lib/ultrafeedback_binarized \\ | |
| --learning_rate 5.0e-6 \\ | |
| --per_device_train_batch_size 2 \\ | |
| --gradient_accumulation_steps 8 \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --output_dir Qwen2-0.5B-DPO-LoRA`,wrap:!1}}),I=new w({props:{title:"Python Example",local:"python-example",headingTag:"h4"}}),$=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMERQT0NvbmZpZyUyQyUyMERQT1RyYWluZXIlMEElMEElMjMlMjBDb25maWd1cmUlMjBMb1JBJTBBcGVmdF9jb25maWclMjAlM0QlMjBMb3JhQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHIlM0QzMiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfYWxwaGElM0QxNiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSklMEElMEElMjMlMjBDb25maWd1cmUlMjB0cmFpbmluZyUyMHdpdGglMjBoaWdoZXIlMjBsZWFybmluZyUyMHJhdGUlMjBmb3IlMjBMb1JBJTBBdHJhaW5pbmdfYXJncyUyMCUzRCUyMERQT0NvbmZpZyglMEElMjAlMjAlMjAlMjBsZWFybmluZ19yYXRlJTNENS4wZS02JTJDJTIwJTIwJTIzJTIwMTB4JTIwdGhlJTIwYmFzZSUyMHJhdGUlMjBmb3IlMjBEUE8lMjB3aXRoJTIwTG9SQSUwQSUyMCUyMCUyMCUyMC4uLiUwQSklMEElMEElMjMlMjBDcmVhdGUlMjB0cmFpbmVyJTIwd2l0aCUyMFBFRlQlMjBjb25maWclMEF0cmFpbmVyJTIwJTNEJTIwRFBPVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMlF3ZW4lMkZRd2VuMi0wLjVCJTIyJTJDJTIwJTIwJTIzJTIwY2FuJTIwcGFzcyUyMG1vZGVsJTIwbmFtZSUyMG9yJTIwbG9hZGVkJTIwbW9kZWwlMEElMjAlMjAlMjAlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0RkYXRhc2V0JTJDJTBBJTIwJTIwJTIwJTIwcGVmdF9jb25maWclM0RwZWZ0X2NvbmZpZyUyQyUyMCUyMCUyMyUyMHBhc3MlMjBQRUZUJTIwY29uZmlnJTIwaGVyZSUwQSklMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> DPOConfig, DPOTrainer | |
| <span class="hljs-comment"># Configure LoRA</span> | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| <span class="hljs-comment"># Configure training with higher learning rate for LoRA</span> | |
| training_args = DPOConfig( | |
| learning_rate=<span class="hljs-number">5.0e-6</span>, <span class="hljs-comment"># 10x the base rate for DPO with LoRA</span> | |
| ... | |
| ) | |
| <span class="hljs-comment"># Create trainer with PEFT config</span> | |
| trainer = DPOTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen2-0.5B"</span>, <span class="hljs-comment"># can pass model name or loaded model</span> | |
| args=training_args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, <span class="hljs-comment"># pass PEFT config here</span> | |
| ) | |
| trainer.train()`,wrap:!1}}),{c(){o(u.$$.fragment),Z=a(),j=T("p"),j.innerHTML=F,C=a(),o(g.$$.fragment),f=a(),o(c.$$.fragment),B=a(),o(I.$$.fragment),R=a(),o($.$$.fragment),A=a(),i=T("p"),i.innerHTML=b},l(J){M(u.$$.fragment,J),Z=s(J),j=d(J,"P",{"data-svelte-h":!0}),U(j)!=="svelte-1q5creh"&&(j.innerHTML=F),C=s(J),M(g.$$.fragment,J),f=s(J),M(c.$$.fragment,J),B=s(J),M(I.$$.fragment,J),R=s(J),M($.$$.fragment,J),A=s(J),i=d(J,"P",{"data-svelte-h":!0}),U(i)!=="svelte-fwr38l"&&(i.innerHTML=b)},m(J,_){p(u,J,_),n(J,Z,_),n(J,j,_),n(J,C,_),p(g,J,_),n(J,f,_),p(c,J,_),n(J,B,_),p(I,J,_),n(J,R,_),p($,J,_),n(J,A,_),n(J,i,_),L=!0},p:Ea,i(J){L||(r(u.$$.fragment,J),r(g.$$.fragment,J),r(c.$$.fragment,J),r(I.$$.fragment,J),r($.$$.fragment,J),L=!0)},o(J){m(u.$$.fragment,J),m(g.$$.fragment,J),m(c.$$.fragment,J),m(I.$$.fragment,J),m($.$$.fragment,J),L=!1},d(J){J&&(l(Z),l(j),l(C),l(f),l(B),l(R),l(A),l(i)),y(u,J),y(g,J),y(c,J),y(I,J),y($,J)}}}function oi(E){let u,Z,j,F="The <code>GRPOTrainer</code> optimizes policies using group-based rewards.",C,g,f,c,B,I,R,$,A;return u=new w({props:{title:"Group Relative Policy Optimization (GRPO)",local:"group-relative-policy-optimization-grpo",headingTag:"h3"}}),g=new w({props:{title:"With LoRA",local:"with-lora",headingTag:"h4"}}),c=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRmdycG8ucHklMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1vZGVsX25hbWVfb3JfcGF0aCUyMFF3ZW4lMkZRd2VuMi0wLjVCJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1kYXRhc2V0X25hbWUlMjB0cmwtbGliJTJGbWF0aC1yZWFzb25pbmclMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxlYXJuaW5nX3JhdGUlMjAxLjBlLTUlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUyMDIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXVzZV9wZWZ0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX3IlMjAzMiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9yYV9hbHBoYSUyMDE2JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1vdXRwdXRfZGlyJTIwUXdlbjItMC41Qi1HUlBPLUxvUkE=",highlighted:`python trl/scripts/grpo.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B \\ | |
| --dataset_name trl-lib/math-reasoning \\ | |
| --learning_rate 1.0e-5 \\ | |
| --per_device_train_batch_size 2 \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --output_dir Qwen2-0.5B-GRPO-LoRA`,wrap:!1}}),I=new w({props:{title:"Python Example",local:"python-example",headingTag:"h4"}}),$=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMEdSUE9Db25maWclMkMlMjBHUlBPVHJhaW5lciUwQSUwQSUyMyUyMENvbmZpZ3VyZSUyMExvUkElMEFwZWZ0X2NvbmZpZyUyMCUzRCUyMExvcmFDb25maWcoJTBBJTIwJTIwJTIwJTIwciUzRDMyJTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9hbHBoYSUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9kcm9wb3V0JTNEMC4wNSUyQyUwQSUyMCUyMCUyMCUyMGJpYXMlM0QlMjJub25lJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFza190eXBlJTNEJTIyQ0FVU0FMX0xNJTIyJTJDJTBBKSUwQSUwQSUyMyUyMENvbmZpZ3VyZSUyMHRyYWluaW5nJTIwd2l0aCUyMGhpZ2hlciUyMGxlYXJuaW5nJTIwcmF0ZSUyMGZvciUyMExvUkElMEF0cmFpbmluZ19hcmdzJTIwJTNEJTIwR1JQT0NvbmZpZyglMEElMjAlMjAlMjAlMjBsZWFybmluZ19yYXRlJTNEMS4wZS01JTJDJTIwJTIwJTIzJTIwMTB4JTIwdGhlJTIwYmFzZSUyMHJhdGUlMjBmb3IlMjBHUlBPJTIwd2l0aCUyMExvUkElMEElMjAlMjAlMjAlMjAuLi4lMEEpJTBBJTBBJTIzJTIwQ3JlYXRlJTIwdHJhaW5lciUyMHdpdGglMjBQRUZUJTIwY29uZmlnJTBBdHJhaW5lciUyMCUzRCUyMEdSUE9UcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyUXdlbiUyRlF3ZW4yLTAuNUIlMjIlMkMlMjAlMjAlMjMlMjBjYW4lMjBwYXNzJTIwbW9kZWwlMjBuYW1lJTIwb3IlMjBsb2FkZWQlMjBtb2RlbCUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRHBlZnRfY29uZmlnJTJDJTIwJTIwJTIzJTIwcGFzcyUyMFBFRlQlMjBjb25maWclMjBoZXJlJTBBKSUwQXRyYWluZXIudHJhaW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> GRPOConfig, GRPOTrainer | |
| <span class="hljs-comment"># Configure LoRA</span> | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| <span class="hljs-comment"># Configure training with higher learning rate for LoRA</span> | |
| training_args = GRPOConfig( | |
| learning_rate=<span class="hljs-number">1.0e-5</span>, <span class="hljs-comment"># 10x the base rate for GRPO with LoRA</span> | |
| ... | |
| ) | |
| <span class="hljs-comment"># Create trainer with PEFT config</span> | |
| trainer = GRPOTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen2-0.5B"</span>, <span class="hljs-comment"># can pass model name or loaded model</span> | |
| args=training_args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, <span class="hljs-comment"># pass PEFT config here</span> | |
| ) | |
| trainer.train()`,wrap:!1}}),{c(){o(u.$$.fragment),Z=a(),j=T("p"),j.innerHTML=F,C=a(),o(g.$$.fragment),f=a(),o(c.$$.fragment),B=a(),o(I.$$.fragment),R=a(),o($.$$.fragment)},l(i){M(u.$$.fragment,i),Z=s(i),j=d(i,"P",{"data-svelte-h":!0}),U(j)!=="svelte-11csk42"&&(j.innerHTML=F),C=s(i),M(g.$$.fragment,i),f=s(i),M(c.$$.fragment,i),B=s(i),M(I.$$.fragment,i),R=s(i),M($.$$.fragment,i)},m(i,b){p(u,i,b),n(i,Z,b),n(i,j,b),n(i,C,b),p(g,i,b),n(i,f,b),p(c,i,b),n(i,B,b),p(I,i,b),n(i,R,b),p($,i,b),A=!0},p:Ea,i(i){A||(r(u.$$.fragment,i),r(g.$$.fragment,i),r(c.$$.fragment,i),r(I.$$.fragment,i),r($.$$.fragment,i),A=!0)},o(i){m(u.$$.fragment,i),m(g.$$.fragment,i),m(c.$$.fragment,i),m(I.$$.fragment,i),m($.$$.fragment,i),A=!1},d(i){i&&(l(Z),l(j),l(C),l(f),l(B),l(R)),y(u,i),y(g,i),y(c,i),y(I,i),y($,i)}}}function Mi(E){let u,Z,j,F,C,g;return u=new Aa({props:{id:"trainer-type",option:"sft",$$slots:{default:[si]},$$scope:{ctx:E}}}),j=new Aa({props:{id:"trainer-type",option:"dpo",$$slots:{default:[ii]},$$scope:{ctx:E}}}),C=new Aa({props:{id:"trainer-type",option:"grpo",$$slots:{default:[oi]},$$scope:{ctx:E}}}),{c(){o(u.$$.fragment),Z=a(),o(j.$$.fragment),F=a(),o(C.$$.fragment)},l(f){M(u.$$.fragment,f),Z=s(f),M(j.$$.fragment,f),F=s(f),M(C.$$.fragment,f)},m(f,c){p(u,f,c),n(f,Z,c),p(j,f,c),n(f,F,c),p(C,f,c),g=!0},p(f,c){const B={};c&2&&(B.$$scope={dirty:c,ctx:f}),u.$set(B);const I={};c&2&&(I.$$scope={dirty:c,ctx:f}),j.$set(I);const R={};c&2&&(R.$$scope={dirty:c,ctx:f}),C.$set(R)},i(f){g||(r(u.$$.fragment,f),r(j.$$.fragment,f),r(C.$$.fragment,f),g=!0)},o(f){m(u.$$.fragment,f),m(j.$$.fragment,f),m(C.$$.fragment,f),g=!1},d(f){f&&(l(Z),l(F)),y(u,f),y(j,f),y(C,f)}}}function pi(E){let u,Z,j,F,C,g,f,c,B,I='TRL supports <a href="https://github.com/huggingface/peft" rel="nofollow">PEFT</a> (Parameter-Efficient Fine-Tuning) methods for memory-efficient model training. PEFT enables fine-tuning large language models by training only a small number of additional parameters while keeping the base model frozen, significantly reducing computational costs and memory requirements.',R,$,A="This guide covers how to use PEFT with different TRL trainers, including LoRA, QLoRA, and prompt tuning techniques.",i,b,L='For a complete working example, see the <a href="https://github.com/huggingface/trl/blob/main/examples/notebooks/sft_trl_lora_qlora.ipynb" rel="nofollow">SFT with LoRA/QLoRA notebook</a>.',J,_,ol,X,Qa="To use PEFT with TRL, install the required dependencies:",Ml,k,pl,S,va="For QLoRA support (4-bit and 8-bit quantization), also install:",rl,H,ml,V,yl,x,Wa="All TRL trainers support PEFT through the <code>peft_config</code> argument. The simplest way to enable PEFT is by using the command-line interface with the <code>--use_peft</code> flag:",Tl,N,dl,z,Ga="Alternatively, you can pass a PEFT config directly in your Python code:",Ul,Y,fl,P,Jl,q,La="TRL provides three different methods to configure PEFT, each suited for different use cases:",wl,D,ul,O,Xa="The easiest way to enable PEFT is to use the <code>--use_peft</code> flag with the command-line interface. This method is ideal for quick experiments and standard configurations:",cl,K,jl,tt,ka="<strong>Pros</strong>: Quick setup, no code required",bl,et,Sa="<strong>Cons</strong>: Limited to LoRA, fewer customization options",gl,lt,hl,nt,Ha="For more control, pass a PEFT configuration directly to the trainer. This is the recommended approach for most use cases:",Cl,at,$l,st,Va="<strong>Pros</strong>: Full control, supports all PEFT methods (LoRA, Prompt Tuning, etc.)",Il,it,xa="<strong>Cons</strong>: Requires Python code",Bl,ot,Zl,Mt,Na="For maximum flexibility, you can apply PEFT to your model before passing it to the trainer:",Rl,pt,_l,rt,za="<strong>Pros</strong>: Maximum control, useful for custom model architectures or complex setups",Fl,mt,Ya="<strong>Cons</strong>: More verbose, requires understanding of PEFT internals",Al,yt,El,Tt,Pa="When using LoRA or other PEFT methods, you typically need to use a <strong>higher learning rate</strong> (approximately 10x) compared to full fine-tuning. This is because PEFT methods train only a small fraction of parameters, requiring a larger learning rate to achieve similar parameter updates.",Ql,dt,qa="<strong>Recommended learning rates:</strong>",vl,Ut,Da="<thead><tr><th>Trainer</th> <th>Full Fine-Tuning</th> <th>With LoRA (10x)</th></tr></thead> <tbody><tr><td><strong>SFT</strong></td> <td><code>2.0e-5</code></td> <td><code>2.0e-4</code></td></tr> <tr><td><strong>DPO</strong></td> <td><code>5.0e-7</code></td> <td><code>5.0e-6</code></td></tr> <tr><td><strong>GRPO</strong></td> <td><code>1.0e-6</code></td> <td><code>1.0e-5</code></td></tr> <tr><td><strong>Prompt Tuning</strong></td> <td>N/A</td> <td><code>1.0e-2</code> to <code>3.0e-2</code></td></tr></tbody>",Wl,ft,Oa='<p><strong>Why 10x?</strong> LoRA adapters have significantly fewer trainable parameters than the full model. A higher learning rate compensates for this reduced parameter count, ensuring effective training. For detailed explanation, see <a href="https://thinkingmachines.ai/blog/lora/" rel="nofollow">this blog post</a>.</p>',Gl,Jt,Ka='For additional best practices on using LoRA effectively, refer to the <a href="lora_without_regret">LoRA Without Regret</a> documentation.',Ll,wt,Xl,ut,ts="TRL’s trainers support PEFT configurations for various training paradigms. Below are detailed examples for each major trainer.",kl,Q,Sl,ct,Hl,jt,Vl,bt,es="You can use a single base model with multiple PEFT adapters for the entire PPO algorithm - including retrieving reference logits, computing active logits, and calculating rewards. This approach is useful for memory-efficient RL training.",xl,v,ls="<p>This feature is experimental and convergence has not been extensively tested. We encourage the community to share feedback and report any issues.</p>",Nl,gt,ns="<strong>Requirements</strong>",zl,ht,as="Install PEFT and optionally bitsandbytes for 8-bit models:",Yl,Ct,Pl,$t,ss="<strong>Training Workflow</strong>",ql,It,is="The multi-adapter approach requires three stages:",Dl,Bt,os='<li><strong>Supervised Fine-Tuning (SFT)</strong>: Train a base model on your target domain (e.g., IMDB dataset) using <code>SFTTrainer</code></li> <li><strong>Reward Model Training</strong>: Train a reward model adapter using PEFT and <code>RewardTrainer</code> (see <a href="https://github.com/huggingface/trl/tree/main/examples/scripts/reward_modeling.py" rel="nofollow">reward modeling example</a>)</li> <li><strong>PPO Training</strong>: Fine-tune new adapters using PPO with the reward adapter</li>',Ol,W,Ms="<p>Use the same base model (architecture and weights) for stages 2 & 3.</p>",Kl,Zt,ps="<strong>Basic Usage</strong>",tn,Rt,rs="After training your reward adapter and pushing it to the Hub:",en,_t,ln,Ft,ms="In your training loop, compute rewards using:",nn,At,an,Et,ys="<strong>Advanced Features</strong>",sn,Qt,Ts="<strong>Quantized Base Models</strong>",on,vt,ds="For memory-efficient training, load the base model in 8-bit or 4-bit while keeping adapters in float32:",Mn,Wt,pn,Gt,rn,Lt,Us="QLoRA combines 4-bit quantization with LoRA to enable fine-tuning of very large models on consumer hardware. This technique can reduce memory requirements by up to 4x compared to standard LoRA.",mn,Xt,yn,kt,fs="<li><strong>4-bit Quantization</strong>: The base model is loaded in 4-bit precision using <code>bitsandbytes</code></li> <li><strong>Frozen Weights</strong>: The quantized model weights remain frozen during training</li> <li><strong>LoRA Adapters</strong>: Only the LoRA adapter parameters are trained in higher precision</li> <li><strong>Memory Efficiency</strong>: Enables fine-tuning of models like Llama-70B on a single consumer GPU</li>",Tn,St,dn,Ht,Js="Simply combine <code>load_in_4bit=True</code> with PEFT configuration:",Un,Vt,fn,xt,Jn,Nt,wn,zt,un,Yt,cn,Pt,ws="The <code>BitsAndBytesConfig</code> provides several options to optimize memory and performance:",jn,qt,bn,Dt,us="<strong>Configuration Parameters:</strong>",gn,Ot,cs="<li><code>bnb_4bit_quant_type</code>: Quantization data type (<code>"nf4"</code> or <code>"fp4"</code>). NF4 is recommended.</li> <li><code>bnb_4bit_compute_dtype</code>: The dtype used for computation. Use <code>bfloat16</code> for better training stability.</li> <li><code>bnb_4bit_use_double_quant</code>: Enable nested quantization to save additional ~0.4 bits per parameter.</li>",hn,Kt,Cn,te,js="For slightly higher precision with reduced memory savings, you can use 8-bit quantization:",$n,ee,In,le,bs="Or via command line:",Bn,ne,Zn,ae,Rn,se,gs="Prompt tuning is another PEFT technique that learns soft prompts (continuous embeddings) prepended to the input, while keeping the entire model frozen. This is particularly effective for large models.",_n,ie,Fn,oe,hs="<li><strong>Virtual Tokens</strong>: Adds learnable continuous embeddings (virtual tokens) to the input</li> <li><strong>Frozen Model</strong>: The entire base model remains frozen</li> <li><strong>Task-Specific Prompts</strong>: Each task learns its own prompt embeddings</li> <li><strong>Extreme Efficiency</strong>: Only the prompt embeddings are trained (typically 8-20 tokens)</li>",An,Me,En,pe,Qn,re,vn,me,Wn,ye,Cs="<strong>Configuration Parameters:</strong>",Gn,Te,$s="<li><code>task_type</code>: The task type (<code>TaskType.CAUSAL_LM</code> for language modeling)</li> <li><code>prompt_tuning_init</code>: Initialization method (<code>TEXT</code>, <code>RANDOM</code>)</li> <li><code>num_virtual_tokens</code>: Number of virtual tokens to prepend (typically 8-20)</li> <li><code>prompt_tuning_init_text</code>: Text to initialize the virtual tokens (when using <code>TEXT</code> init)</li> <li><code>tokenizer_name_or_path</code>: Tokenizer for initializing from text</li>",Ln,de,Xn,Ue,Is="<thead><tr><th>Feature</th> <th>Prompt Tuning</th> <th>LoRA</th></tr></thead> <tbody><tr><td><strong>Parameters Trained</strong></td> <td>~0.001%</td> <td>~0.1-1%</td></tr> <tr><td><strong>Memory Usage</strong></td> <td>Minimal</td> <td>Low</td></tr> <tr><td><strong>Training Speed</strong></td> <td>Fastest</td> <td>Fast</td></tr> <tr><td><strong>Model Modification</strong></td> <td>None</td> <td>Adapter layers</td></tr> <tr><td><strong>Best For</strong></td> <td>Large models, many tasks</td> <td>General fine-tuning</td></tr> <tr><td><strong>Learning Rate</strong></td> <td>Higher (1e-2 to 3e-2)</td> <td>Standard (1e-4 to 3e-4)</td></tr></tbody>",kn,fe,Sn,Je,Hn,we,Vn,ue,Bs="<strong>Key Parameters:</strong>",xn,ce,Zs=`<li><code>r</code>: LoRA rank (typical values: 8, 16, 32, 64). Higher rank = more parameters but potentially better performance.</li> <li><code>lora_alpha</code>: Scaling factor (typically 2x the rank). Controls the magnitude of LoRA updates.</li> <li><code>lora_dropout</code>: Dropout probability for LoRA layers (typical: 0.05-0.1).</li> <li><code>target_modules</code>: Which modules to apply LoRA to. Common choices: | |
| <ul><li><code>["q_proj", "v_proj"]</code>: Attention query and value (memory efficient)</li> <li><code>["q_proj", "k_proj", "v_proj", "o_proj"]</code>: All attention projections</li> <li><code>["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]</code>: All linear layers</li></ul></li> <li><code>modules_to_save</code>: Additional modules to fully train (e.g., <code>["embed_tokens", "lm_head"]</code>)</li>`,Nn,je,zn,be,Rs="You can specify which modules to apply LoRA to. Common patterns:",Yn,ge,Pn,he,qn,Ce,_s="TRL scripts accept PEFT parameters via command line:",Dn,$e,On,Ie,Fs="Available flags:",Kn,Be,As="<li><code>--use_peft</code>: Enable PEFT</li> <li><code>--lora_r</code>: LoRA rank (default: 16)</li> <li><code>--lora_alpha</code>: LoRA alpha (default: 32)</li> <li><code>--lora_dropout</code>: LoRA dropout (default: 0.05)</li> <li><code>--lora_target_modules</code>: Target modules (space-separated)</li> <li><code>--lora_modules_to_save</code>: Additional modules to train</li> <li><code>--use_rslora</code>: Enable Rank-Stabilized LoRA</li> <li><code>--use_dora</code>: Enable Weight-Decomposed LoRA (DoRA)</li> <li><code>--load_in_4bit</code>: Enable 4-bit quantization (QLoRA)</li> <li><code>--load_in_8bit</code>: Enable 8-bit quantization</li>",ta,Ze,ea,Re,la,_e,Es="After training, save your PEFT adapters:",na,Fe,aa,Ae,Qs="This saves only the adapter weights (~few MB) rather than the full model (~several GB).",sa,Ee,ia,Qe,vs="Load a PEFT model for inference:",oa,ve,Ma,We,pa,Ge,Ws="You can easily share your PEFT adapters on the Hugging Face Hub:",ra,Le,ma,Xe,ya,ke,Gs="PEFT works seamlessly with TRL’s multi-GPU support through <code>accelerate</code>:",Ta,Se,da,He,Ls="For QLoRA with multiple GPUs, the base model is automatically sharded:",Ua,Ve,fa,xe,Ja,Ne,Xs="For very large models (>60B parameters), TRL supports Naive Pipeline Parallelism (NPP), which distributes the model and adapters across multiple GPUs. The activations and gradients are communicated across GPUs, supporting both <code>int8</code> and other data types.",wa,ze,ks='<img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl-npp.png" alt="NPP"/>',ua,Ye,Ss="<strong>How to Use NPP</strong>",ca,Pe,Hs="Load your model with a custom <code>device_map</code> to split it across multiple devices:",ja,qe,ba,G,Vs='<ul><li>Keep the <code>lm_head</code> module on the first GPU (device 0) to avoid errors</li> <li>See this <a href="https://github.com/huggingface/blog/blob/main/accelerate-large-models.md" rel="nofollow">tutorial on device maps</a> for proper configuration</li> <li>Run training scripts directly (not with <code>accelerate launch</code>): <code>python script.py</code></li> <li>Data Parallelism is not yet supported with NPP</li></ul>',ga,De,ha,Oe,Ca,Ke,xs='<li><strong><a href="https://github.com/huggingface/trl/blob/main/examples/notebooks/sft_trl_lora_qlora.ipynb" rel="nofollow">SFT with LoRA/QLoRA Notebook</a></strong> - Complete working example showing both LoRA and QLoRA implementations</li> <li><strong><a href="https://github.com/huggingface/trl/tree/main/examples" rel="nofollow">TRL Examples Directory</a></strong> - Collection of training scripts demonstrating PEFT with different trainers</li> <li><strong><a href="https://github.com/huggingface/cookbook/tree/main/notebooks/transformers" rel="nofollow">TRL Cookbook Recipes</a></strong> - Step-by-step guides for common PEFT training scenarios</li>',$a,tl,Ia,el,Ns='<li><a href="https://huggingface.co/docs/peft" rel="nofollow">PEFT Documentation</a> - Official PEFT library documentation</li> <li><a href="https://huggingface.co/docs/trl" rel="nofollow">TRL Documentation</a> - Complete TRL documentation with trainer guides</li> <li><a href="lora_without_regret">LoRA Without Regret</a> - Best practices for using LoRA effectively</li>',Ba,ll,Za,nl,zs='<li><a href="https://huggingface.co/papers/2106.09685" rel="nofollow">LoRA Paper</a> - Original LoRA methodology and results</li> <li><a href="https://huggingface.co/papers/2305.14314" rel="nofollow">QLoRA Paper</a> - Efficient finetuning with 4-bit quantization</li> <li><a href="https://huggingface.co/papers/2104.08691" rel="nofollow">Prompt Tuning Paper</a> - The Power of Scale for Parameter-Efficient Prompt Tuning</li>',Ra,al,_a,sl,Fa;return C=new li({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new w({props:{title:"PEFT Integration",local:"peft-integration",headingTag:"h1"}}),_=new w({props:{title:"Installation",local:"installation",headingTag:"h2"}}),k=new h({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRybCU1QnBlZnQlNUQ=",highlighted:"pip install trl[peft]",wrap:!1}}),H=new h({props:{code:"cGlwJTIwaW5zdGFsbCUyMGJpdHNhbmRieXRlcw==",highlighted:"pip install bitsandbytes",wrap:!1}}),V=new w({props:{title:"Quick Start",local:"quick-start",headingTag:"h2"}}),N=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRnNmdC5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLTAuNUIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZDYXB5YmFyYSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tdXNlX3BlZnQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfciUyMDMyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX2FscGhhJTIwMTYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW91dHB1dF9kaXIlMjBRd2VuMi0wLjVCLVNGVC1Mb1JB",highlighted:`python trl/scripts/sft.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B \\ | |
| --dataset_name trl-lib/Capybara \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --output_dir Qwen2-0.5B-SFT-LoRA`,wrap:!1}}),Y=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVFRyYWluZXIlMEElMEElMjMlMjBDb25maWd1cmUlMjBMb1JBJTBBcGVmdF9jb25maWclMjAlM0QlMjBMb3JhQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHIlM0QzMiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfYWxwaGElM0QxNiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSklMEElMEElMjMlMjBDb25maWd1cmUlMjB0cmFpbmluZyUyMC0lMjBub3RlJTIwdGhlJTIwaGlnaGVyJTIwbGVhcm5pbmclMjByYXRlJTIwZm9yJTIwTG9SQSUyMCgxMHglMjBiYXNlJTIwcmF0ZSklMEF0cmFpbmluZ19hcmdzJTIwJTNEJTIwU0ZUQ29uZmlnKCUwQSUyMCUyMCUyMCUyMGxlYXJuaW5nX3JhdGUlM0QyLjBlLTQlMkMlMjAlMjAlMjMlMjAxMHglMjB0aGUlMjBiYXNlJTIwcmF0ZSUyMCgyLjBlLTUpJTIwZm9yJTIwTG9SQSUwQSUyMCUyMCUyMCUyMC4uLiUwQSklMEElMEElMjMlMjBDcmVhdGUlMjB0cmFpbmVyJTIwd2l0aCUyMFBFRlQlMEF0cmFpbmVyJTIwJTNEJTIwU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRHBlZnRfY29uZmlnJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTTrainer | |
| <span class="hljs-comment"># Configure LoRA</span> | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| <span class="hljs-comment"># Configure training - note the higher learning rate for LoRA (10x base rate)</span> | |
| training_args = SFTConfig( | |
| learning_rate=<span class="hljs-number">2.0e-4</span>, <span class="hljs-comment"># 10x the base rate (2.0e-5) for LoRA</span> | |
| ... | |
| ) | |
| <span class="hljs-comment"># Create trainer with PEFT</span> | |
| trainer = SFTTrainer( | |
| model=model, | |
| train_dataset=dataset, | |
| peft_config=peft_config, | |
| )`,wrap:!1}}),P=new w({props:{title:"Three Ways to Configure PEFT",local:"three-ways-to-configure-peft",headingTag:"h2"}}),D=new w({props:{title:"1. Using CLI Flags (Simplest)",local:"1-using-cli-flags-simplest",headingTag:"h3"}}),K=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRnNmdC5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLTAuNUIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZDYXB5YmFyYSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tdXNlX3BlZnQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfciUyMDMyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX2FscGhhJTIwMTYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfZHJvcG91dCUyMDAuMDUlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW91dHB1dF9kaXIlMjBRd2VuMi0wLjVCLVNGVC1Mb1JB",highlighted:`python trl/scripts/sft.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B \\ | |
| --dataset_name trl-lib/Capybara \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --lora_dropout 0.05 \\ | |
| --output_dir Qwen2-0.5B-SFT-LoRA`,wrap:!1}}),lt=new w({props:{title:"2. Passing peft_config to Trainer (Recommended)",local:"2-passing-peftconfig-to-trainer-recommended",headingTag:"h3"}}),at=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUyQyUyMFNGVFRyYWluZXIlMEElMEFwZWZ0X2NvbmZpZyUyMCUzRCUyMExvcmFDb25maWcoJTBBJTIwJTIwJTIwJTIwciUzRDMyJTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9hbHBoYSUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwbG9yYV9kcm9wb3V0JTNEMC4wNSUyQyUwQSUyMCUyMCUyMCUyMGJpYXMlM0QlMjJub25lJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFza190eXBlJTNEJTIyQ0FVU0FMX0xNJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGFyZ2V0X21vZHVsZXMlM0QlNUIlMjJxX3Byb2olMjIlMkMlMjAlMjJ2X3Byb2olMjIlMkMlMjAlMjJrX3Byb2olMjIlMkMlMjAlMjJvX3Byb2olMjIlNUQlMkMlMEEpJTBBJTBBdHJhaW5lciUyMCUzRCUyMFNGVFRyYWluZXIoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0Rtb2RlbCUyQyUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRHBlZnRfY29uZmlnJTJDJTIwJTIwJTIzJTIwUGFzcyUyMGNvbmZpZyUyMGhlcmUlMEEp",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"v_proj"</span>, <span class="hljs-string">"k_proj"</span>, <span class="hljs-string">"o_proj"</span>], | |
| ) | |
| trainer = SFTTrainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, <span class="hljs-comment"># Pass config here</span> | |
| )`,wrap:!1}}),ot=new w({props:{title:"3. Applying PEFT to Model Directly (Advanced)",local:"3-applying-peft-to-model-directly-advanced",headingTag:"h3"}}),pt=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTJDJTIwZ2V0X3BlZnRfbW9kZWwlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0lMEFmcm9tJTIwdHJsJTIwaW1wb3J0JTIwU0ZUQ29uZmlnJTJDJTIwU0ZUVHJhaW5lciUwQSUwQSUyMyUyMExvYWQlMjBiYXNlJTIwbW9kZWwlMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJRd2VuJTJGUXdlbjItMC41QiUyMiklMEElMEElMjMlMjBBcHBseSUyMFBFRlQlMjBjb25maWd1cmF0aW9uJTBBcGVmdF9jb25maWclMjAlM0QlMjBMb3JhQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHIlM0QzMiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfYWxwaGElM0QxNiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSklMEFtb2RlbCUyMCUzRCUyMGdldF9wZWZ0X21vZGVsKG1vZGVsJTJDJTIwcGVmdF9jb25maWcpJTBBJTBBJTIzJTIwUGFzcyUyMFBFRlQtd3JhcHBlZCUyMG1vZGVsJTIwdG8lMjB0cmFpbmVyJTBBdHJhaW5lciUyMCUzRCUyMFNGVFRyYWluZXIoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0Rtb2RlbCUyQyUyMCUyMCUyMyUyMEFscmVhZHklMjBoYXMlMjBQRUZUJTIwYXBwbGllZCUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjBOb3RlJTNBJTIwbm8lMjBwZWZ0X2NvbmZpZyUyMG5lZWRlZCUyMGhlcmUlMEEp",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-comment"># Load base model</span> | |
| model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"Qwen/Qwen2-0.5B"</span>) | |
| <span class="hljs-comment"># Apply PEFT configuration</span> | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| model = get_peft_model(model, peft_config) | |
| <span class="hljs-comment"># Pass PEFT-wrapped model to trainer</span> | |
| trainer = SFTTrainer( | |
| model=model, <span class="hljs-comment"># Already has PEFT applied</span> | |
| args=training_args, | |
| train_dataset=dataset, | |
| <span class="hljs-comment"># Note: no peft_config needed here</span> | |
| )`,wrap:!1}}),yt=new w({props:{title:"Learning Rate Considerations",local:"learning-rate-considerations",headingTag:"h2"}}),wt=new w({props:{title:"PEFT with Different Trainers",local:"peft-with-different-trainers",headingTag:"h2"}}),Q=new ai({props:{id:"trainer-type",options:["sft","dpo","grpo"],$$slots:{default:[Mi]},$$scope:{ctx:E}}}),ct=new w({props:{title:"Proximal Policy Optimization (PPO)",local:"proximal-policy-optimization-ppo",headingTag:"h3"}}),jt=new w({props:{title:"Multi-Adapter RL Training",local:"multi-adapter-rl-training",headingTag:"h4"}}),Ct=new h({props:{code:"cGlwJTIwaW5zdGFsbCUyMHBlZnQlMjBiaXRzYW5kYnl0ZXM=",highlighted:"pip install peft bitsandbytes",wrap:!1}}),_t=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBZnJvbSUyMHRybC5leHBlcmltZW50YWwucHBvJTIwaW1wb3J0JTIwUFBPVHJhaW5lciUyQyUyMEF1dG9Nb2RlbEZvckNhdXNhbExNV2l0aFZhbHVlSGVhZCUwQSUwQW1vZGVsX25hbWUlMjAlM0QlMjAlMjJodWdneWxsYW1hJTJGbGxhbWEtN2IlMjIlMEFybV9hZGFwdGVyX2lkJTIwJTNEJTIwJTIydHJsLWxpYiUyRmxsYW1hLTdiLWhoLXJtLWFkYXB0ZXIlMjIlMEElMEElMjMlMjBDb25maWd1cmUlMjBQUE8lMjBhZGFwdGVyJTBBbG9yYV9jb25maWclMjAlM0QlMjBMb3JhQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHIlM0QxNiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfYWxwaGElM0QzMiUyQyUwQSUyMCUyMCUyMCUyMGxvcmFfZHJvcG91dCUzRDAuMDUlMkMlMEElMjAlMjAlMjAlMjBiaWFzJTNEJTIybm9uZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUwQSklMEElMEElMjMlMjBMb2FkJTIwbW9kZWwlMjB3aXRoJTIwcmV3YXJkJTIwYWRhcHRlciUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE1XaXRoVmFsdWVIZWFkLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9uYW1lJTJDJTBBJTIwJTIwJTIwJTIwcGVmdF9jb25maWclM0Rsb3JhX2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHJld2FyZF9hZGFwdGVyJTNEcm1fYWRhcHRlcl9pZCUyQyUwQSklMEElMEF0cmFpbmVyJTIwJTNEJTIwUFBPVHJhaW5lcihtb2RlbCUzRG1vZGVsJTJDJTIwLi4uKQ==",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> trl.experimental.ppo <span class="hljs-keyword">import</span> PPOTrainer, AutoModelForCausalLMWithValueHead | |
| model_name = <span class="hljs-string">"huggyllama/llama-7b"</span> | |
| rm_adapter_id = <span class="hljs-string">"trl-lib/llama-7b-hh-rm-adapter"</span> | |
| <span class="hljs-comment"># Configure PPO adapter</span> | |
| lora_config = LoraConfig( | |
| r=<span class="hljs-number">16</span>, | |
| lora_alpha=<span class="hljs-number">32</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| <span class="hljs-comment"># Load model with reward adapter</span> | |
| model = AutoModelForCausalLMWithValueHead.from_pretrained( | |
| model_name, | |
| peft_config=lora_config, | |
| reward_adapter=rm_adapter_id, | |
| ) | |
| trainer = PPOTrainer(model=model, ...)`,wrap:!1}}),At=new h({props:{code:"cmV3YXJkcyUyMCUzRCUyMHRyYWluZXIubW9kZWwuY29tcHV0ZV9yZXdhcmRfc2NvcmUoKippbnB1dHMp",highlighted:"rewards = trainer.model.compute_reward_score(**inputs)",wrap:!1}}),Wt=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEJpdHNBbmRCeXRlc0NvbmZpZyUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE1XaXRoVmFsdWVIZWFkLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBtb2RlbF9uYW1lJTJDJTBBJTIwJTIwJTIwJTIwcGVmdF9jb25maWclM0Rsb3JhX2NvbmZpZyUyQyUwQSUyMCUyMCUyMCUyMHJld2FyZF9hZGFwdGVyJTNEcm1fYWRhcHRlcl9pZCUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RCaXRzQW5kQnl0ZXNDb25maWcobG9hZF9pbl84Yml0JTNEVHJ1ZSklMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig | |
| model = AutoModelForCausalLMWithValueHead.from_pretrained( | |
| model_name, | |
| peft_config=lora_config, | |
| reward_adapter=rm_adapter_id, | |
| quantization_config=BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>), | |
| )`,wrap:!1}}),Gt=new w({props:{title:"QLoRA: Quantized Low-Rank Adaptation",local:"qlora-quantized-low-rank-adaptation",headingTag:"h2"}}),Xt=new w({props:{title:"How QLoRA Works",local:"how-qlora-works",headingTag:"h3"}}),St=new w({props:{title:"Using QLoRA with TRL",local:"using-qlora-with-trl",headingTag:"h3"}}),Vt=new w({props:{title:"Command Line",local:"command-line",headingTag:"h4"}}),xt=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRnNmdC5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwbWV0YS1sbGFtYSUyRkxsYW1hLTItN2ItaGYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZDYXB5YmFyYSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9hZF9pbl80Yml0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS11c2VfcGVmdCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9yYV9yJTIwMzIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfYWxwaGElMjAxNiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTIwMSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTIwMTYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW91dHB1dF9kaXIlMjBMbGFtYS0yLTdiLVFMb1JB",highlighted:`python trl/scripts/sft.py \\ | |
| --model_name_or_path meta-llama/Llama-2-7b-hf \\ | |
| --dataset_name trl-lib/Capybara \\ | |
| --load_in_4bit \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --per_device_train_batch_size 1 \\ | |
| --gradient_accumulation_steps 16 \\ | |
| --output_dir Llama-2-7b-QLoRA`,wrap:!1}}),Nt=new w({props:{title:"Python Example",local:"python-example",headingTag:"h4"}}),zt=new h({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEFmcm9tJTIwcGVmdCUyMGltcG9ydCUyMExvcmFDb25maWclMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0lMkMlMjBCaXRzQW5kQnl0ZXNDb25maWclMEFmcm9tJTIwdHJsJTIwaW1wb3J0JTIwU0ZUQ29uZmlnJTJDJTIwU0ZUVHJhaW5lciUwQSUwQSUyMyUyMENvbmZpZ3VyZSUyMDQtYml0JTIwcXVhbnRpemF0aW9uJTBBYm5iX2NvbmZpZyUyMCUzRCUyMEJpdHNBbmRCeXRlc0NvbmZpZyglMEElMjAlMjAlMjAlMjBsb2FkX2luXzRiaXQlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwYm5iXzRiaXRfcXVhbnRfdHlwZSUzRCUyMm5mNCUyMiUyQyUwQSUyMCUyMCUyMCUyMGJuYl80Yml0X2NvbXB1dGVfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSUyMCUyMCUyMCUyMGJuYl80Yml0X3VzZV9kb3VibGVfcXVhbnQlM0RUcnVlJTJDJTBBKSUwQSUwQSUyMyUyMExvYWQlMjBtb2RlbCUyMHdpdGglMjBxdWFudGl6YXRpb24lMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJtZXRhLWxsYW1hJTJGTGxhbWEtMi03Yi1oZiUyMiUyQyUwQSUyMCUyMCUyMCUyMHF1YW50aXphdGlvbl9jb25maWclM0RibmJfY29uZmlnJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlX21hcCUzRCUyMmF1dG8lMjIlMkMlMEEpJTBBJTBBJTIzJTIwQ29uZmlndXJlJTIwTG9SQSUwQXBlZnRfY29uZmlnJTIwJTNEJTIwTG9yYUNvbmZpZyglMEElMjAlMjAlMjAlMjByJTNEMzIlMkMlMEElMjAlMjAlMjAlMjBsb3JhX2FscGhhJTNEMTYlMkMlMEElMjAlMjAlMjAlMjBsb3JhX2Ryb3BvdXQlM0QwLjA1JTJDJTBBJTIwJTIwJTIwJTIwYmlhcyUzRCUyMm5vbmUlMjIlMkMlMEElMjAlMjAlMjAlMjB0YXNrX3R5cGUlM0QlMjJDQVVTQUxfTE0lMjIlMkMlMEEpJTBBJTBBJTIzJTIwQ29uZmlndXJlJTIwdHJhaW5pbmclMjB3aXRoJTIwaGlnaGVyJTIwbGVhcm5pbmclMjByYXRlJTIwZm9yJTIwTG9SQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBTRlRDb25maWcoJTBBJTIwJTIwJTIwJTIwbGVhcm5pbmdfcmF0ZSUzRDIuMGUtNCUyQyUyMCUyMCUyMyUyMDEweCUyMHRoZSUyMGJhc2UlMjByYXRlJTIwZm9yJTIwUUxvUkElMEElMjAlMjAlMjAlMjAuLi4lMEEpJTBBJTBBJTIzJTIwQ3JlYXRlJTIwdHJhaW5lciUyMHdpdGglMjBQRUZUJTIwY29uZmlnJTBBdHJhaW5lciUyMCUzRCUyMFNGVFRyYWluZXIoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0Rtb2RlbCUyQyUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGRhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBwZWZ0X2NvbmZpZyUzRHBlZnRfY29uZmlnJTJDJTBBKSUwQSUwQXRyYWluZXIudHJhaW4oKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, BitsAndBytesConfig | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-comment"># Configure 4-bit quantization</span> | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=<span class="hljs-literal">True</span>, | |
| bnb_4bit_quant_type=<span class="hljs-string">"nf4"</span>, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_use_double_quant=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-comment"># Load model with quantization</span> | |
| model = AutoModelForCausalLM.from_pretrained( | |
| <span class="hljs-string">"meta-llama/Llama-2-7b-hf"</span>, | |
| quantization_config=bnb_config, | |
| device_map=<span class="hljs-string">"auto"</span>, | |
| ) | |
| <span class="hljs-comment"># Configure LoRA</span> | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">32</span>, | |
| lora_alpha=<span class="hljs-number">16</span>, | |
| lora_dropout=<span class="hljs-number">0.05</span>, | |
| bias=<span class="hljs-string">"none"</span>, | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| ) | |
| <span class="hljs-comment"># Configure training with higher learning rate for LoRA</span> | |
| training_args = SFTConfig( | |
| learning_rate=<span class="hljs-number">2.0e-4</span>, <span class="hljs-comment"># 10x the base rate for QLoRA</span> | |
| ... | |
| ) | |
| <span class="hljs-comment"># Create trainer with PEFT config</span> | |
| trainer = SFTTrainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, | |
| ) | |
| trainer.train()`,wrap:!1}}),Yt=new w({props:{title:"QLoRA Configuration Options",local:"qlora-configuration-options",headingTag:"h3"}}),qt=new h({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQml0c0FuZEJ5dGVzQ29uZmlnJTBBJTBBYm5iX2NvbmZpZyUyMCUzRCUyMEJpdHNBbmRCeXRlc0NvbmZpZyglMEElMjAlMjAlMjAlMjBsb2FkX2luXzRiaXQlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwYm5iXzRiaXRfcXVhbnRfdHlwZSUzRCUyMm5mNCUyMiUyQyUyMCUyMCUyMyUyMG9yJTIwJTIyZnA0JTIyJTBBJTIwJTIwJTIwJTIwYm5iXzRiaXRfY29tcHV0ZV9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2JTJDJTIwJTIwJTIzJTIwQ29tcHV0ZSUyMGR0eXBlJTIwZm9yJTIwNC1iaXQlMjBiYXNlJTIwbW9kZWxzJTBBJTIwJTIwJTIwJTIwYm5iXzRiaXRfdXNlX2RvdWJsZV9xdWFudCUzRFRydWUlMkMlMjAlMjAlMjMlMjBOZXN0ZWQlMjBxdWFudGl6YXRpb24lMjBmb3IlMjBhZGRpdGlvbmFsJTIwbWVtb3J5JTIwc2F2aW5ncyUwQSk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=<span class="hljs-literal">True</span>, | |
| bnb_4bit_quant_type=<span class="hljs-string">"nf4"</span>, <span class="hljs-comment"># or "fp4"</span> | |
| bnb_4bit_compute_dtype=torch.bfloat16, <span class="hljs-comment"># Compute dtype for 4-bit base models</span> | |
| bnb_4bit_use_double_quant=<span class="hljs-literal">True</span>, <span class="hljs-comment"># Nested quantization for additional memory savings</span> | |
| )`,wrap:!1}}),Kt=new w({props:{title:"8-bit Quantization",local:"8-bit-quantization",headingTag:"h3"}}),ee=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEJpdHNBbmRCeXRlc0NvbmZpZyUyQyUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBJTBBYm5iX2NvbmZpZyUyMCUzRCUyMEJpdHNBbmRCeXRlc0NvbmZpZyhsb2FkX2luXzhiaXQlM0RUcnVlKSUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMm1ldGEtbGxhbWElMkZMbGFtYS0yLTdiLWhmJTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRGJuYl9jb25maWclMkMlMEElMjAlMjAlMjAlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BitsAndBytesConfig, AutoModelForCausalLM | |
| bnb_config = BitsAndBytesConfig(load_in_8bit=<span class="hljs-literal">True</span>) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| <span class="hljs-string">"meta-llama/Llama-2-7b-hf"</span>, | |
| quantization_config=bnb_config, | |
| device_map=<span class="hljs-string">"auto"</span>, | |
| )`,wrap:!1}}),ne=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRnNmdC5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwbWV0YS1sbGFtYSUyRkxsYW1hLTItN2ItaGYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvYWRfaW5fOGJpdCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tdXNlX3BlZnQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfciUyMDMyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX2FscGhhJTIwMTY=",highlighted:`python trl/scripts/sft.py \\ | |
| --model_name_or_path meta-llama/Llama-2-7b-hf \\ | |
| --load_in_8bit \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16`,wrap:!1}}),ae=new w({props:{title:"Prompt Tuning",local:"prompt-tuning",headingTag:"h2"}}),ie=new w({props:{title:"How Prompt Tuning Works",local:"how-prompt-tuning-works",headingTag:"h3"}}),Me=new w({props:{title:"Using Prompt Tuning with TRL",local:"using-prompt-tuning-with-trl",headingTag:"h3"}}),pe=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBQcm9tcHRUdW5pbmdDb25maWclMkMlMjBQcm9tcHRUdW5pbmdJbml0JTJDJTIwVGFza1R5cGUlMEFmcm9tJTIwdHJsJTIwaW1wb3J0JTIwU0ZUQ29uZmlnJTJDJTIwU0ZUVHJhaW5lciUwQSUwQSUyMyUyMENvbmZpZ3VyZSUyMFByb21wdCUyMFR1bmluZyUwQXBlZnRfY29uZmlnJTIwJTNEJTIwUHJvbXB0VHVuaW5nQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRFRhc2tUeXBlLkNBVVNBTF9MTSUyQyUwQSUyMCUyMCUyMCUyMHByb21wdF90dW5pbmdfaW5pdCUzRFByb21wdFR1bmluZ0luaXQuVEVYVCUyQyUwQSUyMCUyMCUyMCUyMG51bV92aXJ0dWFsX3Rva2VucyUzRDglMkMlMEElMjAlMjAlMjAlMjBwcm9tcHRfdHVuaW5nX2luaXRfdGV4dCUzRCUyMkNsYXNzaWZ5JTIwaWYlMjB0aGUlMjB0d2VldCUyMGlzJTIwYSUyMGNvbXBsYWludCUyMG9yJTIwbm90JTNBJTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyX25hbWVfb3JfcGF0aCUzRCUyMlF3ZW4lMkZRd2VuMi0wLjVCJTIyJTJDJTBBKSUwQSUwQSUyMyUyMENvbmZpZ3VyZSUyMHRyYWluaW5nJTIwd2l0aCUyMGhpZ2hlciUyMGxlYXJuaW5nJTIwcmF0ZSUyMGZvciUyMFByb21wdCUyMFR1bmluZyUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBTRlRDb25maWcoJTBBJTIwJTIwJTIwJTIwbGVhcm5pbmdfcmF0ZSUzRDIuMGUtMiUyQyUyMCUyMCUyMyUyMFByb21wdCUyMFR1bmluZyUyMHR5cGljYWxseSUyMHVzZXMlMjAxZS0yJTIwdG8lMjAzZS0yJTBBJTIwJTIwJTIwJTIwLi4uJTBBKSUwQSUwQSUyMyUyMENyZWF0ZSUyMHRyYWluZXIlMjB3aXRoJTIwUEVGVCUyMGNvbmZpZyUwQXRyYWluZXIlMjAlM0QlMjBTRlRUcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEbW9kZWwlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0RkYXRhc2V0JTJDJTBBJTIwJTIwJTIwJTIwcGVmdF9jb25maWclM0RwZWZ0X2NvbmZpZyUyQyUyMCUyMCUyMyUyMHBhc3MlMjBQRUZUJTIwY29uZmlnJTIwaGVyZSUwQSklMEElMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PromptTuningConfig, PromptTuningInit, TaskType | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-comment"># Configure Prompt Tuning</span> | |
| peft_config = PromptTuningConfig( | |
| task_type=TaskType.CAUSAL_LM, | |
| prompt_tuning_init=PromptTuningInit.TEXT, | |
| num_virtual_tokens=<span class="hljs-number">8</span>, | |
| prompt_tuning_init_text=<span class="hljs-string">"Classify if the tweet is a complaint or not:"</span>, | |
| tokenizer_name_or_path=<span class="hljs-string">"Qwen/Qwen2-0.5B"</span>, | |
| ) | |
| <span class="hljs-comment"># Configure training with higher learning rate for Prompt Tuning</span> | |
| training_args = SFTConfig( | |
| learning_rate=<span class="hljs-number">2.0e-2</span>, <span class="hljs-comment"># Prompt Tuning typically uses 1e-2 to 3e-2</span> | |
| ... | |
| ) | |
| <span class="hljs-comment"># Create trainer with PEFT config</span> | |
| trainer = SFTTrainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| peft_config=peft_config, <span class="hljs-comment"># pass PEFT config here</span> | |
| ) | |
| trainer.train()`,wrap:!1}}),re=new w({props:{title:"Prompt Tuning Configuration",local:"prompt-tuning-configuration",headingTag:"h3"}}),me=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBQcm9tcHRUdW5pbmdDb25maWclMkMlMjBQcm9tcHRUdW5pbmdJbml0JTJDJTIwVGFza1R5cGUlMEElMEFwZWZ0X2NvbmZpZyUyMCUzRCUyMFByb21wdFR1bmluZ0NvbmZpZyglMEElMjAlMjAlMjAlMjB0YXNrX3R5cGUlM0RUYXNrVHlwZS5DQVVTQUxfTE0lMkMlMjAlMjAlMjMlMjBUYXNrJTIwdHlwZSUwQSUyMCUyMCUyMCUyMHByb21wdF90dW5pbmdfaW5pdCUzRFByb21wdFR1bmluZ0luaXQuVEVYVCUyQyUyMCUyMCUyMyUyMEluaXRpYWxpemUlMjBmcm9tJTIwdGV4dCUwQSUyMCUyMCUyMCUyMG51bV92aXJ0dWFsX3Rva2VucyUzRDglMkMlMjAlMjAlMjMlMjBOdW1iZXIlMjBvZiUyMHZpcnR1YWwlMjB0b2tlbnMlMEElMjAlMjAlMjAlMjBwcm9tcHRfdHVuaW5nX2luaXRfdGV4dCUzRCUyMllvdXIlMjBpbml0aWFsaXphdGlvbiUyMHRleHQlMjBoZXJlJTIyJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyX25hbWVfb3JfcGF0aCUzRCUyMm1vZGVsX25hbWUlMjIlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PromptTuningConfig, PromptTuningInit, TaskType | |
| peft_config = PromptTuningConfig( | |
| task_type=TaskType.CAUSAL_LM, <span class="hljs-comment"># Task type</span> | |
| prompt_tuning_init=PromptTuningInit.TEXT, <span class="hljs-comment"># Initialize from text</span> | |
| num_virtual_tokens=<span class="hljs-number">8</span>, <span class="hljs-comment"># Number of virtual tokens</span> | |
| prompt_tuning_init_text=<span class="hljs-string">"Your initialization text here"</span>, | |
| tokenizer_name_or_path=<span class="hljs-string">"model_name"</span>, | |
| )`,wrap:!1}}),de=new w({props:{title:"Prompt Tuning vs LoRA",local:"prompt-tuning-vs-lora",headingTag:"h3"}}),fe=new w({props:{title:"Advanced PEFT Configurations",local:"advanced-peft-configurations",headingTag:"h2"}}),Je=new w({props:{title:"LoRA Configuration Parameters",local:"lora-configuration-parameters",headingTag:"h3"}}),we=new h({props:{code:"ZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBJTBBcGVmdF9jb25maWclMjAlM0QlMjBMb3JhQ29uZmlnKCUwQSUyMCUyMCUyMCUyMHIlM0QxNiUyQyUyMCUyMCUyMyUyMExvUkElMjByYW5rJTBBJTIwJTIwJTIwJTIwbG9yYV9hbHBoYSUzRDMyJTJDJTIwJTIwJTIzJTIwTG9SQSUyMHNjYWxpbmclMjBmYWN0b3IlMEElMjAlMjAlMjAlMjBsb3JhX2Ryb3BvdXQlM0QwLjA1JTJDJTIwJTIwJTIzJTIwRHJvcG91dCUyMHByb2JhYmlsaXR5JTBBJTIwJTIwJTIwJTIwYmlhcyUzRCUyMm5vbmUlMjIlMkMlMjAlMjAlMjMlMjBCaWFzJTIwdHJhaW5pbmclMjBzdHJhdGVneSUwQSUyMCUyMCUyMCUyMHRhc2tfdHlwZSUzRCUyMkNBVVNBTF9MTSUyMiUyQyUyMCUyMCUyMyUyMFRhc2slMjB0eXBlJTBBJTIwJTIwJTIwJTIwdGFyZ2V0X21vZHVsZXMlM0QlNUIlMjJxX3Byb2olMjIlMkMlMjAlMjJ2X3Byb2olMjIlNUQlMkMlMjAlMjAlMjMlMjBNb2R1bGVzJTIwdG8lMjBhcHBseSUyMExvUkElMEElMjAlMjAlMjAlMjBtb2R1bGVzX3RvX3NhdmUlM0ROb25lJTJDJTIwJTIwJTIzJTIwQWRkaXRpb25hbCUyMG1vZHVsZXMlMjB0byUyMHRyYWluJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| peft_config = LoraConfig( | |
| r=<span class="hljs-number">16</span>, <span class="hljs-comment"># LoRA rank</span> | |
| lora_alpha=<span class="hljs-number">32</span>, <span class="hljs-comment"># LoRA scaling factor</span> | |
| lora_dropout=<span class="hljs-number">0.05</span>, <span class="hljs-comment"># Dropout probability</span> | |
| bias=<span class="hljs-string">"none"</span>, <span class="hljs-comment"># Bias training strategy</span> | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, <span class="hljs-comment"># Task type</span> | |
| target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"v_proj"</span>], <span class="hljs-comment"># Modules to apply LoRA</span> | |
| modules_to_save=<span class="hljs-literal">None</span>, <span class="hljs-comment"># Additional modules to train</span> | |
| )`,wrap:!1}}),je=new w({props:{title:"Target Module Selection",local:"target-module-selection",headingTag:"h3"}}),ge=new h({props:{code:"JTIzJTIwTWluaW1hbCUyMChtb3N0JTIwbWVtb3J5JTIwZWZmaWNpZW50KSUwQXRhcmdldF9tb2R1bGVzJTNEJTVCJTIycV9wcm9qJTIyJTJDJTIwJTIydl9wcm9qJTIyJTVEJTBBJTBBJTIzJTIwQXR0ZW50aW9uJTIwb25seSUwQXRhcmdldF9tb2R1bGVzJTNEJTVCJTIycV9wcm9qJTIyJTJDJTIwJTIya19wcm9qJTIyJTJDJTIwJTIydl9wcm9qJTIyJTJDJTIwJTIyb19wcm9qJTIyJTVEJTBBJTBBJTIzJTIwQWxsJTIwbGluZWFyJTIwbGF5ZXJzJTIwKGJlc3QlMjBwZXJmb3JtYW5jZSUyQyUyMG1vcmUlMjBtZW1vcnkpJTBBdGFyZ2V0X21vZHVsZXMlM0QlNUIlMjJxX3Byb2olMjIlMkMlMjAlMjJrX3Byb2olMjIlMkMlMjAlMjJ2X3Byb2olMjIlMkMlMjAlMjJvX3Byb2olMjIlMkMlMjAlMjJnYXRlX3Byb2olMjIlMkMlMjAlMjJ1cF9wcm9qJTIyJTJDJTIwJTIyZG93bl9wcm9qJTIyJTVE",highlighted:`<span class="hljs-comment"># Minimal (most memory efficient)</span> | |
| target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"v_proj"</span>] | |
| <span class="hljs-comment"># Attention only</span> | |
| target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"k_proj"</span>, <span class="hljs-string">"v_proj"</span>, <span class="hljs-string">"o_proj"</span>] | |
| <span class="hljs-comment"># All linear layers (best performance, more memory)</span> | |
| target_modules=[<span class="hljs-string">"q_proj"</span>, <span class="hljs-string">"k_proj"</span>, <span class="hljs-string">"v_proj"</span>, <span class="hljs-string">"o_proj"</span>, <span class="hljs-string">"gate_proj"</span>, <span class="hljs-string">"up_proj"</span>, <span class="hljs-string">"down_proj"</span>]`,wrap:!1}}),he=new w({props:{title:"Using Command-Line Arguments",local:"using-command-line-arguments",headingTag:"h3"}}),$e=new h({props:{code:"cHl0aG9uJTIwdHJsJTJGc2NyaXB0cyUyRnNmdC5weSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLTAuNUIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZDYXB5YmFyYSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tdXNlX3BlZnQlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfciUyMDMyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sb3JhX2FscGhhJTIwMTYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfZHJvcG91dCUyMDAuMDUlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfdGFyZ2V0X21vZHVsZXMlMjBxX3Byb2olMjB2X3Byb2olMjAlNUMlMEElMjAlMjAlMjAlMjAtLW91dHB1dF9kaXIlMjBvdXRwdXQ=",highlighted:`python trl/scripts/sft.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B \\ | |
| --dataset_name trl-lib/Capybara \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16 \\ | |
| --lora_dropout 0.05 \\ | |
| --lora_target_modules q_proj v_proj \\ | |
| --output_dir output`,wrap:!1}}),Ze=new w({props:{title:"Saving and Loading PEFT Models",local:"saving-and-loading-peft-models",headingTag:"h2"}}),Re=new w({props:{title:"Saving",local:"saving",headingTag:"h3"}}),Fe=new h({props:{code:"JTIzJTIwU2F2ZSUyMHRoZSUyMGFkYXB0ZXJzJTBBdHJhaW5lci5zYXZlX21vZGVsKCUyMnBhdGglMkZ0byUyRmFkYXB0ZXJzJTIyKSUwQSUwQSUyMyUyME9yJTIwbWFudWFsbHklMEFtb2RlbC5zYXZlX3ByZXRyYWluZWQoJTIycGF0aCUyRnRvJTJGYWRhcHRlcnMlMjIp",highlighted:`<span class="hljs-comment"># Save the adapters</span> | |
| trainer.save_model(<span class="hljs-string">"path/to/adapters"</span>) | |
| <span class="hljs-comment"># Or manually</span> | |
| model.save_pretrained(<span class="hljs-string">"path/to/adapters"</span>)`,wrap:!1}}),Ee=new w({props:{title:"Loading",local:"loading",headingTag:"h3"}}),ve=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBQZWZ0TW9kZWwlMEElMEElMjMlMjBMb2FkJTIwYmFzZSUyMG1vZGVsJTBBYmFzZV9tb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJRd2VuJTJGUXdlbjItMC41QiUyMiklMEElMEElMjMlMjBMb2FkJTIwUEVGVCUyMGFkYXB0ZXJzJTBBbW9kZWwlMjAlM0QlMjBQZWZ0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGJhc2VfbW9kZWwlMkMlMjAlMjJwYXRoJTJGdG8lMkZhZGFwdGVycyUyMiklMEElMEElMjMlMjBPcHRpb25hbGx5JTIwbWVyZ2UlMjBhZGFwdGVycyUyMGludG8lMjBiYXNlJTIwbW9kZWwlMjBmb3IlMjBmYXN0ZXIlMjBpbmZlcmVuY2UlMEFtb2RlbCUyMCUzRCUyMG1vZGVsLm1lcmdlX2FuZF91bmxvYWQoKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel | |
| <span class="hljs-comment"># Load base model</span> | |
| base_model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"Qwen/Qwen2-0.5B"</span>) | |
| <span class="hljs-comment"># Load PEFT adapters</span> | |
| model = PeftModel.from_pretrained(base_model, <span class="hljs-string">"path/to/adapters"</span>) | |
| <span class="hljs-comment"># Optionally merge adapters into base model for faster inference</span> | |
| model = model.merge_and_unload()`,wrap:!1}}),We=new w({props:{title:"Pushing to Hub",local:"pushing-to-hub",headingTag:"h3"}}),Le=new h({props:{code:"JTIzJTIwUHVzaCUyMGFkYXB0ZXJzJTIwdG8lMjBIdWIlMEFtb2RlbC5wdXNoX3RvX2h1YiglMjJ1c2VybmFtZSUyRm1vZGVsLW5hbWUtbG9yYSUyMiklMEElMEElMjMlMjBMb2FkJTIwZnJvbSUyMEh1YiUwQWZyb20lMjBwZWZ0JTIwaW1wb3J0JTIwUGVmdE1vZGVsJTBBbW9kZWwlMjAlM0QlMjBQZWZ0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKGJhc2VfbW9kZWwlMkMlMjAlMjJ1c2VybmFtZSUyRm1vZGVsLW5hbWUtbG9yYSUyMik=",highlighted:`<span class="hljs-comment"># Push adapters to Hub</span> | |
| model.push_to_hub(<span class="hljs-string">"username/model-name-lora"</span>) | |
| <span class="hljs-comment"># Load from Hub</span> | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel | |
| model = PeftModel.from_pretrained(base_model, <span class="hljs-string">"username/model-name-lora"</span>)`,wrap:!1}}),Xe=new w({props:{title:"Multi-GPU Training",local:"multi-gpu-training",headingTag:"h2"}}),Se=new h({props:{code:"JTIzJTIwQ29uZmlndXJlJTIwYWNjZWxlcmF0ZSUwQWFjY2VsZXJhdGUlMjBjb25maWclMEElMEElMjMlMjBMYXVuY2glMjB0cmFpbmluZyUwQWFjY2VsZXJhdGUlMjBsYXVuY2glMjB0cmwlMkZzY3JpcHRzJTJGc2Z0LnB5JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tb2RlbF9uYW1lX29yX3BhdGglMjBRd2VuJTJGUXdlbjItMC41QiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZGF0YXNldF9uYW1lJTIwdHJsLWxpYiUyRkNhcHliYXJhJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS11c2VfcGVmdCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9yYV9yJTIwMzIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWxvcmFfYWxwaGElMjAxNg==",highlighted:`<span class="hljs-comment"># Configure accelerate</span> | |
| accelerate config | |
| <span class="hljs-comment"># Launch training</span> | |
| accelerate launch trl/scripts/sft.py \\ | |
| --model_name_or_path Qwen/Qwen2-0.5B \\ | |
| --dataset_name trl-lib/Capybara \\ | |
| --use_peft \\ | |
| --lora_r 32 \\ | |
| --lora_alpha 16`,wrap:!1}}),Ve=new h({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHRybCUyRnNjcmlwdHMlMkZzZnQucHklMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1vZGVsX25hbWVfb3JfcGF0aCUyMG1ldGEtbGxhbWElMkZMbGFtYS0yLTcwYi1oZiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9hZF9pbl80Yml0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS11c2VfcGVmdCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbG9yYV9yJTIwMzI=",highlighted:`accelerate launch trl/scripts/sft.py \\ | |
| --model_name_or_path meta-llama/Llama-2-70b-hf \\ | |
| --load_in_4bit \\ | |
| --use_peft \\ | |
| --lora_r 32`,wrap:!1}}),xe=new w({props:{title:"Naive Pipeline Parallelism (NPP) for Large Models",local:"naive-pipeline-parallelism-npp-for-large-models",headingTag:"h3"}}),qe=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckNhdXNhbExNJTBBZnJvbSUyMHBlZnQlMjBpbXBvcnQlMjBMb3JhQ29uZmlnJTBBJTBBJTIzJTIwQ3JlYXRlJTIwY3VzdG9tJTIwZGV2aWNlJTIwbWFwJTIwKHNlZSUyMGFjY2VsZXJhdGUlMjBkb2N1bWVudGF0aW9uKSUwQWRldmljZV9tYXAlMjAlM0QlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjJtb2RlbC5lbWJlZF90b2tlbnMlMjIlM0ElMjAwJTJDJTBBJTIwJTIwJTIwJTIwJTIybW9kZWwubGF5ZXJzLjAlMjIlM0ElMjAwJTJDJTBBJTIwJTIwJTIwJTIwJTIzJTIwLi4uJTIwZGlzdHJpYnV0ZSUyMGxheWVycyUyMGFjcm9zcyUyMEdQVXMlMEElMjAlMjAlMjAlMjAlMjJsbV9oZWFkJTIyJTNBJTIwMCUyQyUyMCUyMCUyMyUyME11c3QlMjBiZSUyMG9uJTIwR1BVJTIwMCUwQSU3RCUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMm1ldGEtbGxhbWElMkZMbGFtYS0yLTcwYi1oZiUyMiUyQyUwQSUyMCUyMCUyMCUyMGRldmljZV9tYXAlM0RkZXZpY2VfbWFwJTJDJTBBJTIwJTIwJTIwJTIwcGVmdF9jb25maWclM0Rsb3JhX2NvbmZpZyUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| <span class="hljs-comment"># Create custom device map (see accelerate documentation)</span> | |
| device_map = { | |
| <span class="hljs-string">"model.embed_tokens"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">"model.layers.0"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-comment"># ... distribute layers across GPUs</span> | |
| <span class="hljs-string">"lm_head"</span>: <span class="hljs-number">0</span>, <span class="hljs-comment"># Must be on GPU 0</span> | |
| } | |
| model = AutoModelForCausalLM.from_pretrained( | |
| <span class="hljs-string">"meta-llama/Llama-2-70b-hf"</span>, | |
| device_map=device_map, | |
| peft_config=lora_config, | |
| )`,wrap:!1}}),De=new w({props:{title:"Resources",local:"resources",headingTag:"h2"}}),Oe=new w({props:{title:"TRL Examples and Notebooks",local:"trl-examples-and-notebooks",headingTag:"h3"}}),tl=new w({props:{title:"Documentation",local:"documentation",headingTag:"h3"}}),ll=new w({props:{title:"Research Papers",local:"research-papers",headingTag:"h3"}}),al=new ni({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/peft_integration.md"}}),{c(){u=T("meta"),Z=a(),j=T("p"),F=a(),o(C.$$.fragment),g=a(),o(f.$$.fragment),c=a(),B=T("p"),B.innerHTML=I,R=a(),$=T("p"),$.textContent=A,i=a(),b=T("p"),b.innerHTML=L,J=a(),o(_.$$.fragment),ol=a(),X=T("p"),X.textContent=Qa,Ml=a(),o(k.$$.fragment),pl=a(),S=T("p"),S.textContent=va,rl=a(),o(H.$$.fragment),ml=a(),o(V.$$.fragment),yl=a(),x=T("p"),x.innerHTML=Wa,Tl=a(),o(N.$$.fragment),dl=a(),z=T("p"),z.textContent=Ga,Ul=a(),o(Y.$$.fragment),fl=a(),o(P.$$.fragment),Jl=a(),q=T("p"),q.textContent=La,wl=a(),o(D.$$.fragment),ul=a(),O=T("p"),O.innerHTML=Xa,cl=a(),o(K.$$.fragment),jl=a(),tt=T("p"),tt.innerHTML=ka,bl=a(),et=T("p"),et.innerHTML=Sa,gl=a(),o(lt.$$.fragment),hl=a(),nt=T("p"),nt.textContent=Ha,Cl=a(),o(at.$$.fragment),$l=a(),st=T("p"),st.innerHTML=Va,Il=a(),it=T("p"),it.innerHTML=xa,Bl=a(),o(ot.$$.fragment),Zl=a(),Mt=T("p"),Mt.textContent=Na,Rl=a(),o(pt.$$.fragment),_l=a(),rt=T("p"),rt.innerHTML=za,Fl=a(),mt=T("p"),mt.innerHTML=Ya,Al=a(),o(yt.$$.fragment),El=a(),Tt=T("p"),Tt.innerHTML=Pa,Ql=a(),dt=T("p"),dt.innerHTML=qa,vl=a(),Ut=T("table"),Ut.innerHTML=Da,Wl=a(),ft=T("blockquote"),ft.innerHTML=Oa,Gl=a(),Jt=T("p"),Jt.innerHTML=Ka,Ll=a(),o(wt.$$.fragment),Xl=a(),ut=T("p"),ut.textContent=ts,kl=a(),o(Q.$$.fragment),Sl=a(),o(ct.$$.fragment),Hl=a(),o(jt.$$.fragment),Vl=a(),bt=T("p"),bt.textContent=es,xl=a(),v=T("blockquote"),v.innerHTML=ls,Nl=a(),gt=T("p"),gt.innerHTML=ns,zl=a(),ht=T("p"),ht.textContent=as,Yl=a(),o(Ct.$$.fragment),Pl=a(),$t=T("p"),$t.innerHTML=ss,ql=a(),It=T("p"),It.textContent=is,Dl=a(),Bt=T("ol"),Bt.innerHTML=os,Ol=a(),W=T("blockquote"),W.innerHTML=Ms,Kl=a(),Zt=T("p"),Zt.innerHTML=ps,tn=a(),Rt=T("p"),Rt.textContent=rs,en=a(),o(_t.$$.fragment),ln=a(),Ft=T("p"),Ft.textContent=ms,nn=a(),o(At.$$.fragment),an=a(),Et=T("p"),Et.innerHTML=ys,sn=a(),Qt=T("p"),Qt.innerHTML=Ts,on=a(),vt=T("p"),vt.textContent=ds,Mn=a(),o(Wt.$$.fragment),pn=a(),o(Gt.$$.fragment),rn=a(),Lt=T("p"),Lt.textContent=Us,mn=a(),o(Xt.$$.fragment),yn=a(),kt=T("ol"),kt.innerHTML=fs,Tn=a(),o(St.$$.fragment),dn=a(),Ht=T("p"),Ht.innerHTML=Js,Un=a(),o(Vt.$$.fragment),fn=a(),o(xt.$$.fragment),Jn=a(),o(Nt.$$.fragment),wn=a(),o(zt.$$.fragment),un=a(),o(Yt.$$.fragment),cn=a(),Pt=T("p"),Pt.innerHTML=ws,jn=a(),o(qt.$$.fragment),bn=a(),Dt=T("p"),Dt.innerHTML=us,gn=a(),Ot=T("ul"),Ot.innerHTML=cs,hn=a(),o(Kt.$$.fragment),Cn=a(),te=T("p"),te.textContent=js,$n=a(),o(ee.$$.fragment),In=a(),le=T("p"),le.textContent=bs,Bn=a(),o(ne.$$.fragment),Zn=a(),o(ae.$$.fragment),Rn=a(),se=T("p"),se.textContent=gs,_n=a(),o(ie.$$.fragment),Fn=a(),oe=T("ol"),oe.innerHTML=hs,An=a(),o(Me.$$.fragment),En=a(),o(pe.$$.fragment),Qn=a(),o(re.$$.fragment),vn=a(),o(me.$$.fragment),Wn=a(),ye=T("p"),ye.innerHTML=Cs,Gn=a(),Te=T("ul"),Te.innerHTML=$s,Ln=a(),o(de.$$.fragment),Xn=a(),Ue=T("table"),Ue.innerHTML=Is,kn=a(),o(fe.$$.fragment),Sn=a(),o(Je.$$.fragment),Hn=a(),o(we.$$.fragment),Vn=a(),ue=T("p"),ue.innerHTML=Bs,xn=a(),ce=T("ul"),ce.innerHTML=Zs,Nn=a(),o(je.$$.fragment),zn=a(),be=T("p"),be.textContent=Rs,Yn=a(),o(ge.$$.fragment),Pn=a(),o(he.$$.fragment),qn=a(),Ce=T("p"),Ce.textContent=_s,Dn=a(),o($e.$$.fragment),On=a(),Ie=T("p"),Ie.textContent=Fs,Kn=a(),Be=T("ul"),Be.innerHTML=As,ta=a(),o(Ze.$$.fragment),ea=a(),o(Re.$$.fragment),la=a(),_e=T("p"),_e.textContent=Es,na=a(),o(Fe.$$.fragment),aa=a(),Ae=T("p"),Ae.textContent=Qs,sa=a(),o(Ee.$$.fragment),ia=a(),Qe=T("p"),Qe.textContent=vs,oa=a(),o(ve.$$.fragment),Ma=a(),o(We.$$.fragment),pa=a(),Ge=T("p"),Ge.textContent=Ws,ra=a(),o(Le.$$.fragment),ma=a(),o(Xe.$$.fragment),ya=a(),ke=T("p"),ke.innerHTML=Gs,Ta=a(),o(Se.$$.fragment),da=a(),He=T("p"),He.textContent=Ls,Ua=a(),o(Ve.$$.fragment),fa=a(),o(xe.$$.fragment),Ja=a(),Ne=T("p"),Ne.innerHTML=Xs,wa=a(),ze=T("p"),ze.innerHTML=ks,ua=a(),Ye=T("p"),Ye.innerHTML=Ss,ca=a(),Pe=T("p"),Pe.innerHTML=Hs,ja=a(),o(qe.$$.fragment),ba=a(),G=T("blockquote"),G.innerHTML=Vs,ga=a(),o(De.$$.fragment),ha=a(),o(Oe.$$.fragment),Ca=a(),Ke=T("ul"),Ke.innerHTML=xs,$a=a(),o(tl.$$.fragment),Ia=a(),el=T("ul"),el.innerHTML=Ns,Ba=a(),o(ll.$$.fragment),Za=a(),nl=T("ul"),nl.innerHTML=zs,Ra=a(),o(al.$$.fragment),_a=a(),sl=T("p"),this.h()},l(t){const e=ti("svelte-u9bgzb",document.head);u=d(e,"META",{name:!0,content:!0}),e.forEach(l),Z=s(t),j=d(t,"P",{}),Ps(j).forEach(l),F=s(t),M(C.$$.fragment,t),g=s(t),M(f.$$.fragment,t),c=s(t),B=d(t,"P",{"data-svelte-h":!0}),U(B)!=="svelte-1aa4si9"&&(B.innerHTML=I),R=s(t),$=d(t,"P",{"data-svelte-h":!0}),U($)!=="svelte-eyysuf"&&($.textContent=A),i=s(t),b=d(t,"P",{"data-svelte-h":!0}),U(b)!=="svelte-vdlhef"&&(b.innerHTML=L),J=s(t),M(_.$$.fragment,t),ol=s(t),X=d(t,"P",{"data-svelte-h":!0}),U(X)!=="svelte-1a7j66l"&&(X.textContent=Qa),Ml=s(t),M(k.$$.fragment,t),pl=s(t),S=d(t,"P",{"data-svelte-h":!0}),U(S)!=="svelte-140s00u"&&(S.textContent=va),rl=s(t),M(H.$$.fragment,t),ml=s(t),M(V.$$.fragment,t),yl=s(t),x=d(t,"P",{"data-svelte-h":!0}),U(x)!=="svelte-145s0q3"&&(x.innerHTML=Wa),Tl=s(t),M(N.$$.fragment,t),dl=s(t),z=d(t,"P",{"data-svelte-h":!0}),U(z)!=="svelte-ubc45n"&&(z.textContent=Ga),Ul=s(t),M(Y.$$.fragment,t),fl=s(t),M(P.$$.fragment,t),Jl=s(t),q=d(t,"P",{"data-svelte-h":!0}),U(q)!=="svelte-195hlna"&&(q.textContent=La),wl=s(t),M(D.$$.fragment,t),ul=s(t),O=d(t,"P",{"data-svelte-h":!0}),U(O)!=="svelte-ytojdr"&&(O.innerHTML=Xa),cl=s(t),M(K.$$.fragment,t),jl=s(t),tt=d(t,"P",{"data-svelte-h":!0}),U(tt)!=="svelte-1h44p9a"&&(tt.innerHTML=ka),bl=s(t),et=d(t,"P",{"data-svelte-h":!0}),U(et)!=="svelte-6pnbnx"&&(et.innerHTML=Sa),gl=s(t),M(lt.$$.fragment,t),hl=s(t),nt=d(t,"P",{"data-svelte-h":!0}),U(nt)!=="svelte-19rmu4f"&&(nt.textContent=Ha),Cl=s(t),M(at.$$.fragment,t),$l=s(t),st=d(t,"P",{"data-svelte-h":!0}),U(st)!=="svelte-1q9st0j"&&(st.innerHTML=Va),Il=s(t),it=d(t,"P",{"data-svelte-h":!0}),U(it)!=="svelte-piihuf"&&(it.innerHTML=xa),Bl=s(t),M(ot.$$.fragment,t),Zl=s(t),Mt=d(t,"P",{"data-svelte-h":!0}),U(Mt)!=="svelte-gkqfv"&&(Mt.textContent=Na),Rl=s(t),M(pt.$$.fragment,t),_l=s(t),rt=d(t,"P",{"data-svelte-h":!0}),U(rt)!=="svelte-7gz8pq"&&(rt.innerHTML=za),Fl=s(t),mt=d(t,"P",{"data-svelte-h":!0}),U(mt)!=="svelte-uvcbqp"&&(mt.innerHTML=Ya),Al=s(t),M(yt.$$.fragment,t),El=s(t),Tt=d(t,"P",{"data-svelte-h":!0}),U(Tt)!=="svelte-19gln0l"&&(Tt.innerHTML=Pa),Ql=s(t),dt=d(t,"P",{"data-svelte-h":!0}),U(dt)!=="svelte-56wxxv"&&(dt.innerHTML=qa),vl=s(t),Ut=d(t,"TABLE",{"data-svelte-h":!0}),U(Ut)!=="svelte-vcu64p"&&(Ut.innerHTML=Da),Wl=s(t),ft=d(t,"BLOCKQUOTE",{"data-svelte-h":!0}),U(ft)!=="svelte-8pdduk"&&(ft.innerHTML=Oa),Gl=s(t),Jt=d(t,"P",{"data-svelte-h":!0}),U(Jt)!=="svelte-1eq8vqg"&&(Jt.innerHTML=Ka),Ll=s(t),M(wt.$$.fragment,t),Xl=s(t),ut=d(t,"P",{"data-svelte-h":!0}),U(ut)!=="svelte-lhxbnr"&&(ut.textContent=ts),kl=s(t),M(Q.$$.fragment,t),Sl=s(t),M(ct.$$.fragment,t),Hl=s(t),M(jt.$$.fragment,t),Vl=s(t),bt=d(t,"P",{"data-svelte-h":!0}),U(bt)!=="svelte-1a94gbn"&&(bt.textContent=es),xl=s(t),v=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(v)!=="svelte-1vy4ntf"&&(v.innerHTML=ls),Nl=s(t),gt=d(t,"P",{"data-svelte-h":!0}),U(gt)!=="svelte-ioylk5"&&(gt.innerHTML=ns),zl=s(t),ht=d(t,"P",{"data-svelte-h":!0}),U(ht)!=="svelte-1gs5c9v"&&(ht.textContent=as),Yl=s(t),M(Ct.$$.fragment,t),Pl=s(t),$t=d(t,"P",{"data-svelte-h":!0}),U($t)!=="svelte-1q7scme"&&($t.innerHTML=ss),ql=s(t),It=d(t,"P",{"data-svelte-h":!0}),U(It)!=="svelte-1cwfc7z"&&(It.textContent=is),Dl=s(t),Bt=d(t,"OL",{"data-svelte-h":!0}),U(Bt)!=="svelte-170dem6"&&(Bt.innerHTML=os),Ol=s(t),W=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(W)!=="svelte-1r7qnul"&&(W.innerHTML=Ms),Kl=s(t),Zt=d(t,"P",{"data-svelte-h":!0}),U(Zt)!=="svelte-1h24w70"&&(Zt.innerHTML=ps),tn=s(t),Rt=d(t,"P",{"data-svelte-h":!0}),U(Rt)!=="svelte-1826sco"&&(Rt.textContent=rs),en=s(t),M(_t.$$.fragment,t),ln=s(t),Ft=d(t,"P",{"data-svelte-h":!0}),U(Ft)!=="svelte-m3b0ez"&&(Ft.textContent=ms),nn=s(t),M(At.$$.fragment,t),an=s(t),Et=d(t,"P",{"data-svelte-h":!0}),U(Et)!=="svelte-iimns0"&&(Et.innerHTML=ys),sn=s(t),Qt=d(t,"P",{"data-svelte-h":!0}),U(Qt)!=="svelte-s0v8y9"&&(Qt.innerHTML=Ts),on=s(t),vt=d(t,"P",{"data-svelte-h":!0}),U(vt)!=="svelte-1u4rolx"&&(vt.textContent=ds),Mn=s(t),M(Wt.$$.fragment,t),pn=s(t),M(Gt.$$.fragment,t),rn=s(t),Lt=d(t,"P",{"data-svelte-h":!0}),U(Lt)!=="svelte-18m242a"&&(Lt.textContent=Us),mn=s(t),M(Xt.$$.fragment,t),yn=s(t),kt=d(t,"OL",{"data-svelte-h":!0}),U(kt)!=="svelte-1q5rstb"&&(kt.innerHTML=fs),Tn=s(t),M(St.$$.fragment,t),dn=s(t),Ht=d(t,"P",{"data-svelte-h":!0}),U(Ht)!=="svelte-2emjj2"&&(Ht.innerHTML=Js),Un=s(t),M(Vt.$$.fragment,t),fn=s(t),M(xt.$$.fragment,t),Jn=s(t),M(Nt.$$.fragment,t),wn=s(t),M(zt.$$.fragment,t),un=s(t),M(Yt.$$.fragment,t),cn=s(t),Pt=d(t,"P",{"data-svelte-h":!0}),U(Pt)!=="svelte-1tzfl0e"&&(Pt.innerHTML=ws),jn=s(t),M(qt.$$.fragment,t),bn=s(t),Dt=d(t,"P",{"data-svelte-h":!0}),U(Dt)!=="svelte-1sihssd"&&(Dt.innerHTML=us),gn=s(t),Ot=d(t,"UL",{"data-svelte-h":!0}),U(Ot)!=="svelte-vp4ki5"&&(Ot.innerHTML=cs),hn=s(t),M(Kt.$$.fragment,t),Cn=s(t),te=d(t,"P",{"data-svelte-h":!0}),U(te)!=="svelte-1xizljh"&&(te.textContent=js),$n=s(t),M(ee.$$.fragment,t),In=s(t),le=d(t,"P",{"data-svelte-h":!0}),U(le)!=="svelte-131mx14"&&(le.textContent=bs),Bn=s(t),M(ne.$$.fragment,t),Zn=s(t),M(ae.$$.fragment,t),Rn=s(t),se=d(t,"P",{"data-svelte-h":!0}),U(se)!=="svelte-1t6iq03"&&(se.textContent=gs),_n=s(t),M(ie.$$.fragment,t),Fn=s(t),oe=d(t,"OL",{"data-svelte-h":!0}),U(oe)!=="svelte-8qfcaq"&&(oe.innerHTML=hs),An=s(t),M(Me.$$.fragment,t),En=s(t),M(pe.$$.fragment,t),Qn=s(t),M(re.$$.fragment,t),vn=s(t),M(me.$$.fragment,t),Wn=s(t),ye=d(t,"P",{"data-svelte-h":!0}),U(ye)!=="svelte-1sihssd"&&(ye.innerHTML=Cs),Gn=s(t),Te=d(t,"UL",{"data-svelte-h":!0}),U(Te)!=="svelte-1aukhjc"&&(Te.innerHTML=$s),Ln=s(t),M(de.$$.fragment,t),Xn=s(t),Ue=d(t,"TABLE",{"data-svelte-h":!0}),U(Ue)!=="svelte-esrk6j"&&(Ue.innerHTML=Is),kn=s(t),M(fe.$$.fragment,t),Sn=s(t),M(Je.$$.fragment,t),Hn=s(t),M(we.$$.fragment,t),Vn=s(t),ue=d(t,"P",{"data-svelte-h":!0}),U(ue)!=="svelte-11zdjd0"&&(ue.innerHTML=Bs),xn=s(t),ce=d(t,"UL",{"data-svelte-h":!0}),U(ce)!=="svelte-heuyrh"&&(ce.innerHTML=Zs),Nn=s(t),M(je.$$.fragment,t),zn=s(t),be=d(t,"P",{"data-svelte-h":!0}),U(be)!=="svelte-ey0006"&&(be.textContent=Rs),Yn=s(t),M(ge.$$.fragment,t),Pn=s(t),M(he.$$.fragment,t),qn=s(t),Ce=d(t,"P",{"data-svelte-h":!0}),U(Ce)!=="svelte-zcediw"&&(Ce.textContent=_s),Dn=s(t),M($e.$$.fragment,t),On=s(t),Ie=d(t,"P",{"data-svelte-h":!0}),U(Ie)!=="svelte-1oizey2"&&(Ie.textContent=Fs),Kn=s(t),Be=d(t,"UL",{"data-svelte-h":!0}),U(Be)!=="svelte-1g0a0nd"&&(Be.innerHTML=As),ta=s(t),M(Ze.$$.fragment,t),ea=s(t),M(Re.$$.fragment,t),la=s(t),_e=d(t,"P",{"data-svelte-h":!0}),U(_e)!=="svelte-1cfqg9t"&&(_e.textContent=Es),na=s(t),M(Fe.$$.fragment,t),aa=s(t),Ae=d(t,"P",{"data-svelte-h":!0}),U(Ae)!=="svelte-dbw3jd"&&(Ae.textContent=Qs),sa=s(t),M(Ee.$$.fragment,t),ia=s(t),Qe=d(t,"P",{"data-svelte-h":!0}),U(Qe)!=="svelte-y2qx63"&&(Qe.textContent=vs),oa=s(t),M(ve.$$.fragment,t),Ma=s(t),M(We.$$.fragment,t),pa=s(t),Ge=d(t,"P",{"data-svelte-h":!0}),U(Ge)!=="svelte-mqk2d0"&&(Ge.textContent=Ws),ra=s(t),M(Le.$$.fragment,t),ma=s(t),M(Xe.$$.fragment,t),ya=s(t),ke=d(t,"P",{"data-svelte-h":!0}),U(ke)!=="svelte-gkn6hn"&&(ke.innerHTML=Gs),Ta=s(t),M(Se.$$.fragment,t),da=s(t),He=d(t,"P",{"data-svelte-h":!0}),U(He)!=="svelte-1jh5w4w"&&(He.textContent=Ls),Ua=s(t),M(Ve.$$.fragment,t),fa=s(t),M(xe.$$.fragment,t),Ja=s(t),Ne=d(t,"P",{"data-svelte-h":!0}),U(Ne)!=="svelte-1lawuya"&&(Ne.innerHTML=Xs),wa=s(t),ze=d(t,"P",{"data-svelte-h":!0}),U(ze)!=="svelte-1ot7jbs"&&(ze.innerHTML=ks),ua=s(t),Ye=d(t,"P",{"data-svelte-h":!0}),U(Ye)!=="svelte-lyjvon"&&(Ye.innerHTML=Ss),ca=s(t),Pe=d(t,"P",{"data-svelte-h":!0}),U(Pe)!=="svelte-oib2x8"&&(Pe.innerHTML=Hs),ja=s(t),M(qe.$$.fragment,t),ba=s(t),G=d(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),U(G)!=="svelte-1ngz8y5"&&(G.innerHTML=Vs),ga=s(t),M(De.$$.fragment,t),ha=s(t),M(Oe.$$.fragment,t),Ca=s(t),Ke=d(t,"UL",{"data-svelte-h":!0}),U(Ke)!=="svelte-1wuy9tc"&&(Ke.innerHTML=xs),$a=s(t),M(tl.$$.fragment,t),Ia=s(t),el=d(t,"UL",{"data-svelte-h":!0}),U(el)!=="svelte-1ru624y"&&(el.innerHTML=Ns),Ba=s(t),M(ll.$$.fragment,t),Za=s(t),nl=d(t,"UL",{"data-svelte-h":!0}),U(nl)!=="svelte-17ay4ku"&&(nl.innerHTML=zs),Ra=s(t),M(al.$$.fragment,t),_a=s(t),sl=d(t,"P",{}),Ps(sl).forEach(l),this.h()},h(){il(u,"name","hf:doc:metadata"),il(u,"content",ri),il(v,"class","warning"),il(W,"class","important"),il(G,"class","important")},m(t,e){ei(document.head,u),n(t,Z,e),n(t,j,e),n(t,F,e),p(C,t,e),n(t,g,e),p(f,t,e),n(t,c,e),n(t,B,e),n(t,R,e),n(t,$,e),n(t,i,e),n(t,b,e),n(t,J,e),p(_,t,e),n(t,ol,e),n(t,X,e),n(t,Ml,e),p(k,t,e),n(t,pl,e),n(t,S,e),n(t,rl,e),p(H,t,e),n(t,ml,e),p(V,t,e),n(t,yl,e),n(t,x,e),n(t,Tl,e),p(N,t,e),n(t,dl,e),n(t,z,e),n(t,Ul,e),p(Y,t,e),n(t,fl,e),p(P,t,e),n(t,Jl,e),n(t,q,e),n(t,wl,e),p(D,t,e),n(t,ul,e),n(t,O,e),n(t,cl,e),p(K,t,e),n(t,jl,e),n(t,tt,e),n(t,bl,e),n(t,et,e),n(t,gl,e),p(lt,t,e),n(t,hl,e),n(t,nt,e),n(t,Cl,e),p(at,t,e),n(t,$l,e),n(t,st,e),n(t,Il,e),n(t,it,e),n(t,Bl,e),p(ot,t,e),n(t,Zl,e),n(t,Mt,e),n(t,Rl,e),p(pt,t,e),n(t,_l,e),n(t,rt,e),n(t,Fl,e),n(t,mt,e),n(t,Al,e),p(yt,t,e),n(t,El,e),n(t,Tt,e),n(t,Ql,e),n(t,dt,e),n(t,vl,e),n(t,Ut,e),n(t,Wl,e),n(t,ft,e),n(t,Gl,e),n(t,Jt,e),n(t,Ll,e),p(wt,t,e),n(t,Xl,e),n(t,ut,e),n(t,kl,e),p(Q,t,e),n(t,Sl,e),p(ct,t,e),n(t,Hl,e),p(jt,t,e),n(t,Vl,e),n(t,bt,e),n(t,xl,e),n(t,v,e),n(t,Nl,e),n(t,gt,e),n(t,zl,e),n(t,ht,e),n(t,Yl,e),p(Ct,t,e),n(t,Pl,e),n(t,$t,e),n(t,ql,e),n(t,It,e),n(t,Dl,e),n(t,Bt,e),n(t,Ol,e),n(t,W,e),n(t,Kl,e),n(t,Zt,e),n(t,tn,e),n(t,Rt,e),n(t,en,e),p(_t,t,e),n(t,ln,e),n(t,Ft,e),n(t,nn,e),p(At,t,e),n(t,an,e),n(t,Et,e),n(t,sn,e),n(t,Qt,e),n(t,on,e),n(t,vt,e),n(t,Mn,e),p(Wt,t,e),n(t,pn,e),p(Gt,t,e),n(t,rn,e),n(t,Lt,e),n(t,mn,e),p(Xt,t,e),n(t,yn,e),n(t,kt,e),n(t,Tn,e),p(St,t,e),n(t,dn,e),n(t,Ht,e),n(t,Un,e),p(Vt,t,e),n(t,fn,e),p(xt,t,e),n(t,Jn,e),p(Nt,t,e),n(t,wn,e),p(zt,t,e),n(t,un,e),p(Yt,t,e),n(t,cn,e),n(t,Pt,e),n(t,jn,e),p(qt,t,e),n(t,bn,e),n(t,Dt,e),n(t,gn,e),n(t,Ot,e),n(t,hn,e),p(Kt,t,e),n(t,Cn,e),n(t,te,e),n(t,$n,e),p(ee,t,e),n(t,In,e),n(t,le,e),n(t,Bn,e),p(ne,t,e),n(t,Zn,e),p(ae,t,e),n(t,Rn,e),n(t,se,e),n(t,_n,e),p(ie,t,e),n(t,Fn,e),n(t,oe,e),n(t,An,e),p(Me,t,e),n(t,En,e),p(pe,t,e),n(t,Qn,e),p(re,t,e),n(t,vn,e),p(me,t,e),n(t,Wn,e),n(t,ye,e),n(t,Gn,e),n(t,Te,e),n(t,Ln,e),p(de,t,e),n(t,Xn,e),n(t,Ue,e),n(t,kn,e),p(fe,t,e),n(t,Sn,e),p(Je,t,e),n(t,Hn,e),p(we,t,e),n(t,Vn,e),n(t,ue,e),n(t,xn,e),n(t,ce,e),n(t,Nn,e),p(je,t,e),n(t,zn,e),n(t,be,e),n(t,Yn,e),p(ge,t,e),n(t,Pn,e),p(he,t,e),n(t,qn,e),n(t,Ce,e),n(t,Dn,e),p($e,t,e),n(t,On,e),n(t,Ie,e),n(t,Kn,e),n(t,Be,e),n(t,ta,e),p(Ze,t,e),n(t,ea,e),p(Re,t,e),n(t,la,e),n(t,_e,e),n(t,na,e),p(Fe,t,e),n(t,aa,e),n(t,Ae,e),n(t,sa,e),p(Ee,t,e),n(t,ia,e),n(t,Qe,e),n(t,oa,e),p(ve,t,e),n(t,Ma,e),p(We,t,e),n(t,pa,e),n(t,Ge,e),n(t,ra,e),p(Le,t,e),n(t,ma,e),p(Xe,t,e),n(t,ya,e),n(t,ke,e),n(t,Ta,e),p(Se,t,e),n(t,da,e),n(t,He,e),n(t,Ua,e),p(Ve,t,e),n(t,fa,e),p(xe,t,e),n(t,Ja,e),n(t,Ne,e),n(t,wa,e),n(t,ze,e),n(t,ua,e),n(t,Ye,e),n(t,ca,e),n(t,Pe,e),n(t,ja,e),p(qe,t,e),n(t,ba,e),n(t,G,e),n(t,ga,e),p(De,t,e),n(t,ha,e),p(Oe,t,e),n(t,Ca,e),n(t,Ke,e),n(t,$a,e),p(tl,t,e),n(t,Ia,e),n(t,el,e),n(t,Ba,e),p(ll,t,e),n(t,Za,e),n(t,nl,e),n(t,Ra,e),p(al,t,e),n(t,_a,e),n(t,sl,e),Fa=!0},p(t,[e]){const Ys={};e&2&&(Ys.$$scope={dirty:e,ctx:t}),Q.$set(Ys)},i(t){Fa||(r(C.$$.fragment,t),r(f.$$.fragment,t),r(_.$$.fragment,t),r(k.$$.fragment,t),r(H.$$.fragment,t),r(V.$$.fragment,t),r(N.$$.fragment,t),r(Y.$$.fragment,t),r(P.$$.fragment,t),r(D.$$.fragment,t),r(K.$$.fragment,t),r(lt.$$.fragment,t),r(at.$$.fragment,t),r(ot.$$.fragment,t),r(pt.$$.fragment,t),r(yt.$$.fragment,t),r(wt.$$.fragment,t),r(Q.$$.fragment,t),r(ct.$$.fragment,t),r(jt.$$.fragment,t),r(Ct.$$.fragment,t),r(_t.$$.fragment,t),r(At.$$.fragment,t),r(Wt.$$.fragment,t),r(Gt.$$.fragment,t),r(Xt.$$.fragment,t),r(St.$$.fragment,t),r(Vt.$$.fragment,t),r(xt.$$.fragment,t),r(Nt.$$.fragment,t),r(zt.$$.fragment,t),r(Yt.$$.fragment,t),r(qt.$$.fragment,t),r(Kt.$$.fragment,t),r(ee.$$.fragment,t),r(ne.$$.fragment,t),r(ae.$$.fragment,t),r(ie.$$.fragment,t),r(Me.$$.fragment,t),r(pe.$$.fragment,t),r(re.$$.fragment,t),r(me.$$.fragment,t),r(de.$$.fragment,t),r(fe.$$.fragment,t),r(Je.$$.fragment,t),r(we.$$.fragment,t),r(je.$$.fragment,t),r(ge.$$.fragment,t),r(he.$$.fragment,t),r($e.$$.fragment,t),r(Ze.$$.fragment,t),r(Re.$$.fragment,t),r(Fe.$$.fragment,t),r(Ee.$$.fragment,t),r(ve.$$.fragment,t),r(We.$$.fragment,t),r(Le.$$.fragment,t),r(Xe.$$.fragment,t),r(Se.$$.fragment,t),r(Ve.$$.fragment,t),r(xe.$$.fragment,t),r(qe.$$.fragment,t),r(De.$$.fragment,t),r(Oe.$$.fragment,t),r(tl.$$.fragment,t),r(ll.$$.fragment,t),r(al.$$.fragment,t),Fa=!0)},o(t){m(C.$$.fragment,t),m(f.$$.fragment,t),m(_.$$.fragment,t),m(k.$$.fragment,t),m(H.$$.fragment,t),m(V.$$.fragment,t),m(N.$$.fragment,t),m(Y.$$.fragment,t),m(P.$$.fragment,t),m(D.$$.fragment,t),m(K.$$.fragment,t),m(lt.$$.fragment,t),m(at.$$.fragment,t),m(ot.$$.fragment,t),m(pt.$$.fragment,t),m(yt.$$.fragment,t),m(wt.$$.fragment,t),m(Q.$$.fragment,t),m(ct.$$.fragment,t),m(jt.$$.fragment,t),m(Ct.$$.fragment,t),m(_t.$$.fragment,t),m(At.$$.fragment,t),m(Wt.$$.fragment,t),m(Gt.$$.fragment,t),m(Xt.$$.fragment,t),m(St.$$.fragment,t),m(Vt.$$.fragment,t),m(xt.$$.fragment,t),m(Nt.$$.fragment,t),m(zt.$$.fragment,t),m(Yt.$$.fragment,t),m(qt.$$.fragment,t),m(Kt.$$.fragment,t),m(ee.$$.fragment,t),m(ne.$$.fragment,t),m(ae.$$.fragment,t),m(ie.$$.fragment,t),m(Me.$$.fragment,t),m(pe.$$.fragment,t),m(re.$$.fragment,t),m(me.$$.fragment,t),m(de.$$.fragment,t),m(fe.$$.fragment,t),m(Je.$$.fragment,t),m(we.$$.fragment,t),m(je.$$.fragment,t),m(ge.$$.fragment,t),m(he.$$.fragment,t),m($e.$$.fragment,t),m(Ze.$$.fragment,t),m(Re.$$.fragment,t),m(Fe.$$.fragment,t),m(Ee.$$.fragment,t),m(ve.$$.fragment,t),m(We.$$.fragment,t),m(Le.$$.fragment,t),m(Xe.$$.fragment,t),m(Se.$$.fragment,t),m(Ve.$$.fragment,t),m(xe.$$.fragment,t),m(qe.$$.fragment,t),m(De.$$.fragment,t),m(Oe.$$.fragment,t),m(tl.$$.fragment,t),m(ll.$$.fragment,t),m(al.$$.fragment,t),Fa=!1},d(t){t&&(l(Z),l(j),l(F),l(g),l(c),l(B),l(R),l($),l(i),l(b),l(J),l(ol),l(X),l(Ml),l(pl),l(S),l(rl),l(ml),l(yl),l(x),l(Tl),l(dl),l(z),l(Ul),l(fl),l(Jl),l(q),l(wl),l(ul),l(O),l(cl),l(jl),l(tt),l(bl),l(et),l(gl),l(hl),l(nt),l(Cl),l($l),l(st),l(Il),l(it),l(Bl),l(Zl),l(Mt),l(Rl),l(_l),l(rt),l(Fl),l(mt),l(Al),l(El),l(Tt),l(Ql),l(dt),l(vl),l(Ut),l(Wl),l(ft),l(Gl),l(Jt),l(Ll),l(Xl),l(ut),l(kl),l(Sl),l(Hl),l(Vl),l(bt),l(xl),l(v),l(Nl),l(gt),l(zl),l(ht),l(Yl),l(Pl),l($t),l(ql),l(It),l(Dl),l(Bt),l(Ol),l(W),l(Kl),l(Zt),l(tn),l(Rt),l(en),l(ln),l(Ft),l(nn),l(an),l(Et),l(sn),l(Qt),l(on),l(vt),l(Mn),l(pn),l(rn),l(Lt),l(mn),l(yn),l(kt),l(Tn),l(dn),l(Ht),l(Un),l(fn),l(Jn),l(wn),l(un),l(cn),l(Pt),l(jn),l(bn),l(Dt),l(gn),l(Ot),l(hn),l(Cn),l(te),l($n),l(In),l(le),l(Bn),l(Zn),l(Rn),l(se),l(_n),l(Fn),l(oe),l(An),l(En),l(Qn),l(vn),l(Wn),l(ye),l(Gn),l(Te),l(Ln),l(Xn),l(Ue),l(kn),l(Sn),l(Hn),l(Vn),l(ue),l(xn),l(ce),l(Nn),l(zn),l(be),l(Yn),l(Pn),l(qn),l(Ce),l(Dn),l(On),l(Ie),l(Kn),l(Be),l(ta),l(ea),l(la),l(_e),l(na),l(aa),l(Ae),l(sa),l(ia),l(Qe),l(oa),l(Ma),l(pa),l(Ge),l(ra),l(ma),l(ya),l(ke),l(Ta),l(da),l(He),l(Ua),l(fa),l(Ja),l(Ne),l(wa),l(ze),l(ua),l(Ye),l(ca),l(Pe),l(ja),l(ba),l(G),l(ga),l(ha),l(Ca),l(Ke),l($a),l(Ia),l(el),l(Ba),l(Za),l(nl),l(Ra),l(_a),l(sl)),l(u),y(C,t),y(f,t),y(_,t),y(k,t),y(H,t),y(V,t),y(N,t),y(Y,t),y(P,t),y(D,t),y(K,t),y(lt,t),y(at,t),y(ot,t),y(pt,t),y(yt,t),y(wt,t),y(Q,t),y(ct,t),y(jt,t),y(Ct,t),y(_t,t),y(At,t),y(Wt,t),y(Gt,t),y(Xt,t),y(St,t),y(Vt,t),y(xt,t),y(Nt,t),y(zt,t),y(Yt,t),y(qt,t),y(Kt,t),y(ee,t),y(ne,t),y(ae,t),y(ie,t),y(Me,t),y(pe,t),y(re,t),y(me,t),y(de,t),y(fe,t),y(Je,t),y(we,t),y(je,t),y(ge,t),y(he,t),y($e,t),y(Ze,t),y(Re,t),y(Fe,t),y(Ee,t),y(ve,t),y(We,t),y(Le,t),y(Xe,t),y(Se,t),y(Ve,t),y(xe,t),y(qe,t),y(De,t),y(Oe,t),y(tl,t),y(ll,t),y(al,t)}}}const ri='{"title":"PEFT Integration","local":"peft-integration","sections":[{"title":"Installation","local":"installation","sections":[],"depth":2},{"title":"Quick Start","local":"quick-start","sections":[],"depth":2},{"title":"Three Ways to Configure PEFT","local":"three-ways-to-configure-peft","sections":[{"title":"1. Using CLI Flags (Simplest)","local":"1-using-cli-flags-simplest","sections":[],"depth":3},{"title":"2. Passing peft_config to Trainer (Recommended)","local":"2-passing-peftconfig-to-trainer-recommended","sections":[],"depth":3},{"title":"3. Applying PEFT to Model Directly (Advanced)","local":"3-applying-peft-to-model-directly-advanced","sections":[],"depth":3}],"depth":2},{"title":"Learning Rate Considerations","local":"learning-rate-considerations","sections":[],"depth":2},{"title":"PEFT with Different Trainers","local":"peft-with-different-trainers","sections":[{"title":"Supervised Fine-Tuning (SFT)","local":"supervised-fine-tuning-sft","sections":[{"title":"With LoRA","local":"with-lora","sections":[],"depth":4},{"title":"Python Example","local":"python-example","sections":[],"depth":4}],"depth":3},{"title":"Direct Preference Optimization (DPO)","local":"direct-preference-optimization-dpo","sections":[{"title":"With LoRA","local":"with-lora","sections":[],"depth":4},{"title":"Python Example","local":"python-example","sections":[],"depth":4}],"depth":3},{"title":"Group Relative Policy Optimization (GRPO)","local":"group-relative-policy-optimization-grpo","sections":[{"title":"With LoRA","local":"with-lora","sections":[],"depth":4},{"title":"Python Example","local":"python-example","sections":[],"depth":4}],"depth":3},{"title":"Proximal Policy Optimization (PPO)","local":"proximal-policy-optimization-ppo","sections":[{"title":"Multi-Adapter RL Training","local":"multi-adapter-rl-training","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"QLoRA: Quantized Low-Rank Adaptation","local":"qlora-quantized-low-rank-adaptation","sections":[{"title":"How QLoRA Works","local":"how-qlora-works","sections":[],"depth":3},{"title":"Using QLoRA with TRL","local":"using-qlora-with-trl","sections":[{"title":"Command Line","local":"command-line","sections":[],"depth":4},{"title":"Python Example","local":"python-example","sections":[],"depth":4}],"depth":3},{"title":"QLoRA Configuration Options","local":"qlora-configuration-options","sections":[],"depth":3},{"title":"8-bit Quantization","local":"8-bit-quantization","sections":[],"depth":3}],"depth":2},{"title":"Prompt Tuning","local":"prompt-tuning","sections":[{"title":"How Prompt Tuning Works","local":"how-prompt-tuning-works","sections":[],"depth":3},{"title":"Using Prompt Tuning with TRL","local":"using-prompt-tuning-with-trl","sections":[],"depth":3},{"title":"Prompt Tuning Configuration","local":"prompt-tuning-configuration","sections":[],"depth":3},{"title":"Prompt Tuning vs LoRA","local":"prompt-tuning-vs-lora","sections":[],"depth":3}],"depth":2},{"title":"Advanced PEFT Configurations","local":"advanced-peft-configurations","sections":[{"title":"LoRA Configuration Parameters","local":"lora-configuration-parameters","sections":[],"depth":3},{"title":"Target Module Selection","local":"target-module-selection","sections":[],"depth":3},{"title":"Using Command-Line Arguments","local":"using-command-line-arguments","sections":[],"depth":3}],"depth":2},{"title":"Saving and Loading PEFT Models","local":"saving-and-loading-peft-models","sections":[{"title":"Saving","local":"saving","sections":[],"depth":3},{"title":"Loading","local":"loading","sections":[],"depth":3},{"title":"Pushing to Hub","local":"pushing-to-hub","sections":[],"depth":3}],"depth":2},{"title":"Multi-GPU Training","local":"multi-gpu-training","sections":[{"title":"Naive Pipeline Parallelism (NPP) for Large Models","local":"naive-pipeline-parallelism-npp-for-large-models","sections":[],"depth":3}],"depth":2},{"title":"Resources","local":"resources","sections":[{"title":"TRL Examples and Notebooks","local":"trl-examples-and-notebooks","sections":[],"depth":3},{"title":"Documentation","local":"documentation","sections":[],"depth":3},{"title":"Research Papers","local":"research-papers","sections":[],"depth":3}],"depth":2}],"depth":1}';function mi(E){return Ds(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ji extends Os{constructor(u){super(),Ks(this,u,mi,pi,qs,{})}}export{Ji as component}; | |
Xet Storage Details
- Size:
- 90.9 kB
- Xet hash:
- d2644052b747b1489163c010bafcb7b59e2f7a12cb39a303413a3f264bad1043
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.