Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / trl /pr_5607 /en /_app /immutable /nodes /25.2f8d7b8f.js

HuggingFaceDocBuilder's picture

HuggingFaceDocBuilder

about 1 month ago

16.3 kB

	import{s as Je,n as Ne,o as We}from"../chunks/scheduler.7b731bd4.js";import{S as Qe,i as Xe,e as i,s as l,c as m,h as Ye,a as s,d as a,b as r,f as Z,j as o,g as c,k as Le,v as x,l as g,m as n,n as p,t as u,o as f,p as b}from"../chunks/index.cc268345.js";import{C as Ze,H as $,E as et}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";function tt(Ce){let v,ee,X,te,T,De='<picture><source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_light.png"/> <img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png"/></picture>',ae,M,ne,R,le,k,Fe=`TRL is a full stack library where we provide a set of tools to train transformer language models with methods like Supervised Fine-Tuning (SFT), Group Relative Policy Optimization (GRPO), Direct Preference Optimization (DPO), Reward Modeling, and more.
	The library is integrated with 🤗 <a href="https://github.com/huggingface/transformers" rel="nofollow">transformers</a>.`,re,O,ie,H,Ee='<strong>TRL v1:</strong> We released TRL v1 — a major milestone that marks a real shift in what TRL is. Read the <a href="https://huggingface.co/blog/trl-v1" rel="nofollow">blog post</a> to learn more.',se,C,oe,D,Se="Below is the current list of TRL trainers, organized by method type (⚡️ = vLLM support; 🧪 = experimental).",ge,w,d,F,_e,J,Ae='<li><a href="grpo_trainer"><code>GRPOTrainer</code></a> ⚡️</li> <li><a href="rloo_trainer"><code>RLOOTrainer</code></a> ⚡️</li> <li><a href="online_dpo_trainer"><code>OnlineDPOTrainer</code></a> 🧪 ⚡️</li> <li><a href="nash_md_trainer"><code>NashMDTrainer</code></a> 🧪 ⚡️</li> <li><a href="ppo_trainer"><code>PPOTrainer</code></a> 🧪</li> <li><a href="xpo_trainer"><code>XPOTrainer</code></a> 🧪 ⚡️</li>',Pe,E,Me,N,Ge='<li><a href="reward_trainer"><code>RewardTrainer</code></a></li> <li><a href="prm_trainer"><code>PRMTrainer</code></a> 🧪</li>',Re,h,S,ke,W,Ie='<li><a href="sft_trainer"><code>SFTTrainer</code></a></li> <li><a href="dpo_trainer"><code>DPOTrainer</code></a></li> <li><a href="bco_trainer"><code>BCOTrainer</code></a> 🧪</li> <li><a href="cpo_trainer"><code>CPOTrainer</code></a> 🧪</li> <li><a href="kto_trainer"><code>KTOTrainer</code></a> 🧪</li> <li><a href="orpo_trainer"><code>ORPOTrainer</code></a> 🧪</li>',Oe,A,He,Q,qe='<li><a href="gkd_trainer"><code>GKDTrainer</code></a> 🧪</li> <li><a href="minillm_trainer"><code>MiniLLMTrainer</code></a> 🧪</li>',de,G,ze='You can also explore TRL-related models, datasets, and demos in the <a href="https://huggingface.co/trl-lib" rel="nofollow">TRL Hugging Face organization</a>.',he,I,me,q,Ue='Learn post-training with TRL and other libraries in 🤗 <a href="https://github.com/huggingface/smol-course" rel="nofollow">smol course</a>.',ce,z,pe,U,Ve="The documentation is organized into the following sections:",ue,V,Be="<li><strong>Getting Started</strong>: installation and quickstart guide.</li> <li><strong>Conceptual Guides</strong>: dataset formats, training FAQ, and understanding logs.</li> <li><strong>How-to Guides</strong>: reducing memory usage, speeding up training, distributing training, etc.</li> <li><strong>Integrations</strong>: DeepSpeed, Liger Kernel, PEFT, etc.</li> <li><strong>Examples</strong>: example overview, community tutorials, etc.</li> <li><strong>API</strong>: trainers, utils, etc.</li>",fe,B,be,y,Ke='<div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5"><a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/trl-v1"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/trl-v1/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published March 27, 2026</p> <p class="text-gray-700">TRL v1: Post-Training Library That Holds When the Field Invalidates Its Own Assumptions</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/openenv"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/openenv/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published October 23, 2025</p> <p class="text-gray-700">Building the Open Agent Ecosystem Together: Introducing OpenEnv</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/trl-vlm-alignment"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/trl_vlm/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on August 7, 2025</p> <p class="text-gray-700">Vision Language Model Alignment in TRL ⚡️</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/vllm-colocate"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/vllm-colocate/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on June 3, 2025</p> <p class="text-gray-700">NO GPU left behind: Unlocking Efficiency with Co-located vLLM in TRL</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/liger-grpo"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/liger-grpo/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on May 25, 2025</p> <p class="text-gray-700">🐯 Liger GRPO meets TRL</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/open-r1"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/open-r1/thumbnails.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on January 28, 2025</p> <p class="text-gray-700">Open-R1: a fully open reproduction of DeepSeek-R1</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/dpo_vlm"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/dpo_vlm/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on July 10, 2024</p> <p class="text-gray-700">Preference Optimization for Vision Language Models with TRL</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/putting_rl_back_in_rlhf_with_rloo"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/putting_rl_back_in_rlhf_with_rloo/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on June 12, 2024</p> <p class="text-gray-700">Putting RL back in RLHF</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/trl-ddpo"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/166_trl_ddpo/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on September 29, 2023</p> <p class="text-gray-700">Finetune Stable Diffusion Models with DDPO via TRL</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/dpo-trl"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/157_dpo_trl/dpo_thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on August 8, 2023</p> <p class="text-gray-700">Fine-tune Llama 2 with DPO</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/stackllama"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/138_stackllama/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on April 5, 2023</p> <p class="text-gray-700">StackLLaMA: A hands-on guide to train LLaMA with RLHF</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/trl-peft"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/133_trl_peft/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on March 9, 2023</p> <p class="text-gray-700">Fine-tuning 20B LLMs with RLHF on a 24GB consumer GPU</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/blog/rlhf"><img src="https://raw.githubusercontent.com/huggingface/blog/main/assets/120_rlhf/thumbnail.png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Published on December 9, 2022</p> <p class="text-gray-700">Illustrating Reinforcement Learning from Human Feedback</p></a></div>',we,K,$e,L,je='<div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5"><a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/Fine%20tuning%20with%20TRL%20(Oct%2025).pdf"><img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/Fine%20tuning%20with%20TRL%20(Oct%2025).png" alt="thumbnail" class="mt-0"/> <p class="text-gray-500 text-sm">Talk given on October 30, 2025</p> <p class="text-gray-700">Fine tuning with TRL</p></a></div>',ve,j,xe,Y,Te;return M=new Ze({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),R=new $({props:{title:"TRL - Transformers Reinforcement Learning",local:"trl---transformers-reinforcement-learning",headingTag:"h1"}}),O=new $({props:{title:"🎉 What’s New",local:"-whats-new",headingTag:"h2"}}),C=new $({props:{title:"Taxonomy",local:"taxonomy",headingTag:"h2"}}),F=new $({props:{title:"Online methods",local:"online-methods",headingTag:"h3"}}),E=new $({props:{title:"Reward modeling",local:"reward-modeling",headingTag:"h3"}}),S=new $({props:{title:"Offline methods",local:"offline-methods",headingTag:"h3"}}),A=new $({props:{title:"Knowledge distillation",local:"knowledge-distillation",headingTag:"h3"}}),I=new $({props:{title:"Learn",local:"learn",headingTag:"h2"}}),z=new $({props:{title:"Contents",local:"contents",headingTag:"h2"}}),B=new $({props:{title:"Blog posts",local:"blog-posts",headingTag:"h2"}}),K=new $({props:{title:"Talks",local:"talks",headingTag:"h2"}}),j=new et({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/index.md"}}),{c(){v=i("meta"),ee=l(),X=i("p"),te=l(),T=i("div"),T.innerHTML=De,ae=l(),m(M.$$.fragment),ne=l(),m(R.$$.fragment),le=l(),k=i("p"),k.innerHTML=Fe,re=l(),m(O.$$.fragment),ie=l(),H=i("p"),H.innerHTML=Ee,se=l(),m(C.$$.fragment),oe=l(),D=i("p"),D.textContent=Se,ge=l(),w=i("div"),d=i("div"),m(F.$$.fragment),_e=l(),J=i("ul"),J.innerHTML=Ae,Pe=l(),m(E.$$.fragment),Me=l(),N=i("ul"),N.innerHTML=Ge,Re=l(),h=i("div"),m(S.$$.fragment),ke=l(),W=i("ul"),W.innerHTML=Ie,Oe=l(),m(A.$$.fragment),He=l(),Q=i("ul"),Q.innerHTML=qe,de=l(),G=i("p"),G.innerHTML=ze,he=l(),m(I.$$.fragment),me=l(),q=i("p"),q.innerHTML=Ue,ce=l(),m(z.$$.fragment),pe=l(),U=i("p"),U.textContent=Ve,ue=l(),V=i("ul"),V.innerHTML=Be,fe=l(),m(B.$$.fragment),be=l(),y=i("div"),y.innerHTML=Ke,we=l(),m(K.$$.fragment),$e=l(),L=i("div"),L.innerHTML=je,ve=l(),m(j.$$.fragment),xe=l(),Y=i("p"),this.h()},l(e){const t=Ye("svelte-u9bgzb",document.head);v=s(t,"META",{name:!0,content:!0}),t.forEach(a),ee=r(e),X=s(e,"P",{}),Z(X).forEach(a),te=r(e),T=s(e,"DIV",{style:!0,"data-svelte-h":!0}),o(T)!=="svelte-122fsk"&&(T.innerHTML=De),ae=r(e),c(M.$$.fragment,e),ne=r(e),c(R.$$.fragment,e),le=r(e),k=s(e,"P",{"data-svelte-h":!0}),o(k)!=="svelte-bsg4qz"&&(k.innerHTML=Fe),re=r(e),c(O.$$.fragment,e),ie=r(e),H=s(e,"P",{"data-svelte-h":!0}),o(H)!=="svelte-1fuomqy"&&(H.innerHTML=Ee),se=r(e),c(C.$$.fragment,e),oe=r(e),D=s(e,"P",{"data-svelte-h":!0}),o(D)!=="svelte-13myssb"&&(D.textContent=Se),ge=r(e),w=s(e,"DIV",{style:!0});var ye=Z(w);d=s(ye,"DIV",{style:!0});var _=Z(d);c(F.$$.fragment,_),_e=r(_),J=s(_,"UL",{"data-svelte-h":!0}),o(J)!=="svelte-97sfcq"&&(J.innerHTML=Ae),Pe=r(_),c(E.$$.fragment,_),Me=r(_),N=s(_,"UL",{"data-svelte-h":!0}),o(N)!=="svelte-1wjhf4q"&&(N.innerHTML=Ge),_.forEach(a),Re=r(ye),h=s(ye,"DIV",{style:!0});var P=Z(h);c(S.$$.fragment,P),ke=r(P),W=s(P,"UL",{"data-svelte-h":!0}),o(W)!=="svelte-yqme9u"&&(W.innerHTML=Ie),Oe=r(P),c(A.$$.fragment,P),He=r(P),Q=s(P,"UL",{"data-svelte-h":!0}),o(Q)!=="svelte-lgnv90"&&(Q.innerHTML=qe),P.forEach(a),ye.forEach(a),de=r(e),G=s(e,"P",{"data-svelte-h":!0}),o(G)!=="svelte-1q9itv5"&&(G.innerHTML=ze),he=r(e),c(I.$$.fragment,e),me=r(e),q=s(e,"P",{"data-svelte-h":!0}),o(q)!=="svelte-1eixgdo"&&(q.innerHTML=Ue),ce=r(e),c(z.$$.fragment,e),pe=r(e),U=s(e,"P",{"data-svelte-h":!0}),o(U)!=="svelte-15lq3ss"&&(U.textContent=Ve),ue=r(e),V=s(e,"UL",{"data-svelte-h":!0}),o(V)!=="svelte-1ta7s5e"&&(V.innerHTML=Be),fe=r(e),c(B.$$.fragment,e),be=r(e),y=s(e,"DIV",{class:!0,"data-svelte-h":!0}),o(y)!=="svelte-6qrp6a"&&(y.innerHTML=Ke),we=r(e),c(K.$$.fragment,e),$e=r(e),L=s(e,"DIV",{class:!0,"data-svelte-h":!0}),o(L)!=="svelte-10y277x"&&(L.innerHTML=je),ve=r(e),c(j.$$.fragment,e),xe=r(e),Y=s(e,"P",{}),Z(Y).forEach(a),this.h()},h(){Le(v,"name","hf:doc:metadata"),Le(v,"content",at),x(T,"text-align","center"),x(d,"flex","1"),x(d,"min-width","0"),x(h,"flex","1"),x(h,"min-width","0"),x(w,"display","flex"),x(w,"justify-content","space-between"),x(w,"width","100%"),x(w,"gap","2rem"),Le(y,"class","mt-10"),Le(L,"class","mt-10")},m(e,t){g(document.head,v),n(e,ee,t),n(e,X,t),n(e,te,t),n(e,T,t),n(e,ae,t),p(M,e,t),n(e,ne,t),p(R,e,t),n(e,le,t),n(e,k,t),n(e,re,t),p(O,e,t),n(e,ie,t),n(e,H,t),n(e,se,t),p(C,e,t),n(e,oe,t),n(e,D,t),n(e,ge,t),n(e,w,t),g(w,d),p(F,d,null),g(d,_e),g(d,J),g(d,Pe),p(E,d,null),g(d,Me),g(d,N),g(w,Re),g(w,h),p(S,h,null),g(h,ke),g(h,W),g(h,Oe),p(A,h,null),g(h,He),g(h,Q),n(e,de,t),n(e,G,t),n(e,he,t),p(I,e,t),n(e,me,t),n(e,q,t),n(e,ce,t),p(z,e,t),n(e,pe,t),n(e,U,t),n(e,ue,t),n(e,V,t),n(e,fe,t),p(B,e,t),n(e,be,t),n(e,y,t),n(e,we,t),p(K,e,t),n(e,$e,t),n(e,L,t),n(e,ve,t),p(j,e,t),n(e,xe,t),n(e,Y,t),Te=!0},p:Ne,i(e){Te\|\|(u(M.$$.fragment,e),u(R.$$.fragment,e),u(O.$$.fragment,e),u(C.$$.fragment,e),u(F.$$.fragment,e),u(E.$$.fragment,e),u(S.$$.fragment,e),u(A.$$.fragment,e),u(I.$$.fragment,e),u(z.$$.fragment,e),u(B.$$.fragment,e),u(K.$$.fragment,e),u(j.$$.fragment,e),Te=!0)},o(e){f(M.$$.fragment,e),f(R.$$.fragment,e),f(O.$$.fragment,e),f(C.$$.fragment,e),f(F.$$.fragment,e),f(E.$$.fragment,e),f(S.$$.fragment,e),f(A.$$.fragment,e),f(I.$$.fragment,e),f(z.$$.fragment,e),f(B.$$.fragment,e),f(K.$$.fragment,e),f(j.$$.fragment,e),Te=!1},d(e){e&&(a(ee),a(X),a(te),a(T),a(ae),a(ne),a(le),a(k),a(re),a(ie),a(H),a(se),a(oe),a(D),a(ge),a(w),a(de),a(G),a(he),a(me),a(q),a(ce),a(pe),a(U),a(ue),a(V),a(fe),a(be),a(y),a(we),a($e),a(L),a(ve),a(xe),a(Y)),a(v),b(M,e),b(R,e),b(O,e),b(C,e),b(F),b(E),b(S),b(A),b(I,e),b(z,e),b(B,e),b(K,e),b(j,e)}}}const at='{"title":"TRL - Transformers Reinforcement Learning","local":"trl---transformers-reinforcement-learning","sections":[{"title":"🎉 What’s New","local":"-whats-new","sections":[],"depth":2},{"title":"Taxonomy","local":"taxonomy","sections":[{"title":"Online methods","local":"online-methods","sections":[],"depth":3},{"title":"Reward modeling","local":"reward-modeling","sections":[],"depth":3},{"title":"Offline methods","local":"offline-methods","sections":[],"depth":3},{"title":"Knowledge distillation","local":"knowledge-distillation","sections":[],"depth":3}],"depth":2},{"title":"Learn","local":"learn","sections":[],"depth":2},{"title":"Contents","local":"contents","sections":[],"depth":2},{"title":"Blog posts","local":"blog-posts","sections":[],"depth":2},{"title":"Talks","local":"talks","sections":[],"depth":2}],"depth":1}';function nt(Ce){return We(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class st extends Qe{constructor(v){super(),Xe(this,v,nt,tt,Je,{})}}export{st as component};

Xet Storage Details

Size:: 16.3 kB
Xet hash:: 2ff8af41924e657b496780d1c2b9377e575773119ea91556421201aa1b0625e5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.