Buckets:
| import{s as ze,n as Je,o as Qe}from"../chunks/scheduler.7b731bd4.js";import{S as Ke,i as et,e as l,s as n,c as p,h as tt,a as s,d as o,b as a,f as qe,g as h,j as i,k as Ye,l as ot,m as r,n as c,t as d,o as m,p as g}from"../chunks/index.cc268345.js";import{C as rt,H as B,E as nt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{C as ke}from"../chunks/CodeBlock.169a125f.js";function at(Me){let f,Y,W,z,b,J,u,Q,_,Ge="This directory contains a collection of examples that demonstrate how to use the TRL library for various applications. We provide both <strong>scripts</strong> for advanced use cases and <strong>notebooks</strong> for an easy start and interactive experimentation.",K,w,Le="The notebooks are self-contained and can run on <strong>free Colab</strong>, while the scripts can run on <strong>single GPU, multi-GPU, or DeepSpeed</strong> setups.",ee,y,Ce="<strong>Getting Started</strong>",te,T,Se="Install TRL and additional dependencies as follows:",oe,v,re,x,Fe='Check for additional optional dependencies <a href="https://github.com/huggingface/trl/blob/main/pyproject.toml" rel="nofollow">here</a>.',ne,O,He="For scripts, you will also need an 🤗 Accelerate config (recommended for multi-gpu settings):",ae,P,le,$,Ue="This allows you to run scripts with <code>accelerate launch</code> in single or multi-GPU settings.",se,R,ie,k,Ae='These notebooks are easier to run and are designed for quick experimentation with TRL. The list of notebooks can be found in the <a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/" rel="nofollow"><code>trl/examples/notebooks/</code></a> directory.',pe,M,Ie='<thead><tr><th>Notebook</th> <th>Description</th> <th>Open in Colab</th></tr></thead> <tbody><tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/grpo_trl_lora_qlora.ipynb" rel="nofollow"><code>grpo_trl_lora_qlora.ipynb</code></a></td> <td>GRPO using QLoRA on free Colab</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/grpo_trl_lora_qlora.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/grpo_agent.ipynb" rel="nofollow"><code>grpo_agent.ipynb</code></a></td> <td>GRPO for agent training</td> <td>Not available due to OOM with Colab GPUs</td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/grpo_rnj_1_instruct.ipynb" rel="nofollow"><code>grpo_rnj_1_instruct.ipynb</code></a></td> <td>GRPO rnj-1-instruct with QLoRA using TRL on Colab to add reasoning capabilities</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/grpo_rnj_1_instruct.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/sft_ministral3_vl.ipynb" rel="nofollow"><code>sft_ministral3_vl.ipynb</code></a></td> <td>Supervised Fine-Tuning (SFT) Ministral 3 with QLoRA using TRL on free Colab</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/sft_ministral3_vl.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/grpo_ministral3_vl.ipynb" rel="nofollow"><code>grpo_ministral3_vl.ipynb</code></a></td> <td>GRPO Ministral 3 with QLoRA using TRL on free Colab</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/grpo_ministral3_vl.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/sft_nemotron_3.ipynb" rel="nofollow"><code>sft_nemotron_3.ipynb</code></a></td> <td>SFT with LoRA on NVIDIA Nemotron 3 models</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/sft_nemotron_3.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/sft_trl_lora_qlora.ipynb" rel="nofollow"><code>sft_trl_lora_qlora.ipynb</code></a></td> <td>Supervised Fine-Tuning (SFT) using QLoRA on free Colab</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/sft_trl_lora_qlora.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/sft_qwen_vl.ipynb" rel="nofollow"><code>sft_qwen_vl.ipynb</code></a></td> <td>Supervised Fine-Tuning (SFT) Qwen3-VL with QLoRA using TRL on free Colab</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/sft_qwen_vl.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/sft_tool_calling.ipynb" rel="nofollow"><code>sft_tool_calling.ipynb</code></a></td> <td>Teaching tool calling to a model without native tool-calling support using SFT with QLoRA</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/sft_tool_calling.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/grpo_qwen3_vl.ipynb" rel="nofollow"><code>grpo_qwen3_vl.ipynb</code></a></td> <td>GRPO Qwen3-VL with QLoRA using TRL on free Colab</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/grpo_qwen3_vl.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr></tbody>',he,G,ce,L,Ve='These notebooks demonstrate how to train models with <a href="openenv">OpenEnv</a> environments using <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a>’s <code>environment_factory</code>. The BrowserGym notebook uses the lower-level <code>rollout_func</code> API instead. See the <a href="openenv">OpenEnv Integration</a> guide for more details.',de,C,Ne='<thead><tr><th>Notebook</th> <th>Description</th> <th>Open in Colab</th></tr></thead> <tbody><tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/openenv_wordle_grpo.ipynb" rel="nofollow"><code>openenv_wordle_grpo.ipynb</code></a></td> <td>GRPO to play Wordle on an OpenEnv environment</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/openenv_wordle_grpo.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/openenv_sudoku_grpo.ipynb" rel="nofollow"><code>openenv_sudoku_grpo.ipynb</code></a></td> <td>GRPO to play Sudoku on an OpenEnv environment</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/openenv_sudoku_grpo.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/grpo_functiongemma_browsergym_openenv.ipynb" rel="nofollow"><code>grpo_functiongemma_browsergym_openenv.ipynb</code></a></td> <td>GRPO on FunctionGemma in the BrowserGym environment</td> <td><a href="https://colab.research.google.com/github/huggingface/trl/blob/main/examples/notebooks/grpo_functiongemma_browsergym_openenv.ipynb" rel="nofollow"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></td></tr></tbody>',me,S,ge,F,Ze='Scripts are maintained in the <a href="https://github.com/huggingface/trl/blob/main/trl/scripts" rel="nofollow"><code>trl/scripts</code></a> and <a href="https://github.com/huggingface/trl/blob/main/examples/scripts" rel="nofollow"><code>examples/scripts</code></a> directories. They show how to use different trainers such as <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a>, <code>PPOTrainer</code>, <a href="/docs/trl/pr_5607/en/bema_for_reference_model#trl.DPOTrainer">DPOTrainer</a>, <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a>, and more.',fe,H,De='<thead><tr><th>File</th> <th>Description</th></tr></thead> <tbody><tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/bco.py" rel="nofollow"><code>examples/scripts/bco.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/kto_trainer#trl.KTOTrainer">experimental.kto.KTOTrainer</a> with the BCO loss to fine-tune a model to increase instruction-following, truthfulness, honesty, and helpfulness using the <a href="https://huggingface.co/datasets/openbmb/UltraFeedback" rel="nofollow">openbmb/UltraFeedback</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/cpo.py" rel="nofollow"><code>examples/scripts/cpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/cpo_trainer#trl.experimental.cpo.CPOTrainer">experimental.cpo.CPOTrainer</a> to fine-tune a model to increase helpfulness and harmlessness using the <a href="https://huggingface.co/datasets/Anthropic/hh-rlhf" rel="nofollow">Anthropic/hh-rlhf</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/trl/scripts/dpo.py" rel="nofollow"><code>trl/scripts/dpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/bema_for_reference_model#trl.DPOTrainer">DPOTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/dpo_vlm.py" rel="nofollow"><code>examples/scripts/dpo_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/bema_for_reference_model#trl.DPOTrainer">DPOTrainer</a> to fine-tune a Vision Language Model to reduce hallucinations using the <a href="https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset" rel="nofollow">openbmb/RLAIF-V-Dataset</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/gkd.py" rel="nofollow"><code>examples/scripts/gkd.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/gkd_trainer#trl.experimental.gkd.GKDTrainer">experimental.gkd.GKDTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/trl/scripts/grpo.py" rel="nofollow"><code>trl/scripts/grpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/trl/scripts/grpo_agent.py" rel="nofollow"><code>trl/scripts/grpo_agent.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> to fine-tune a model to enable agentic usage.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/grpo_vlm.py" rel="nofollow"><code>examples/scripts/grpo_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> to fine-tune a multimodal model for reasoning using the <a href="https://huggingface.co/datasets/lmms-lab/multimodal-open-r1-8k-verified" rel="nofollow">lmms-lab/multimodal-open-r1-8k-verified</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/gspo.py" rel="nofollow"><code>examples/scripts/gspo.py</code></a></td> <td>This script shows how to use GSPO via the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> to fine-tune model for reasoning using the <a href="https://huggingface.co/datasets/AI-MO/NuminaMath-TIR" rel="nofollow">AI-MO/NuminaMath-TIR</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/gspo_vlm.py" rel="nofollow"><code>examples/scripts/gspo_vlm.py</code></a></td> <td>This script shows how to use GSPO via the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> to fine-tune a multimodal model for reasoning using the <a href="https://huggingface.co/datasets/lmms-lab/multimodal-open-r1-8k-verified" rel="nofollow">lmms-lab/multimodal-open-r1-8k-verified</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/kto.py" rel="nofollow"><code>examples/scripts/kto.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/kto_trainer#trl.KTOTrainer">experimental.kto.KTOTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/mpo_vlm.py" rel="nofollow"><code>examples/scripts/mpo_vlm.py</code></a></td> <td>This script shows how to use MPO via the <a href="/docs/trl/pr_5607/en/bema_for_reference_model#trl.DPOTrainer">DPOTrainer</a> to align a model based on preferences using the <a href="https://huggingface.co/datasets/HuggingFaceH4/rlaif-v_formatted" rel="nofollow">HuggingFaceH4/rlaif-v_formatted</a> dataset and a set of loss weights with weights.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/nash_md.py" rel="nofollow"><code>examples/scripts/nash_md.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/nash_md_trainer#trl.experimental.nash_md.NashMDTrainer">experimental.nash_md.NashMDTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/nemo_gym/train_multi_environment.py" rel="nofollow"><code>examples/scripts/nemo_gym/train_multi_environment.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a> to train language models in NVIDIA NeMo-Gym environments. Supports multi-turn and tool calling environments, and multi-environment training. See the <a href="nemo_gym">NeMo-Gym Integration</a> guide for setup and usage.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/online_dpo.py" rel="nofollow"><code>examples/scripts/online_dpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/online_dpo_trainer#trl.experimental.online_dpo.OnlineDPOTrainer">experimental.online_dpo.OnlineDPOTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/online_dpo_vlm.py" rel="nofollow"><code>examples/scripts/online_dpo_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/online_dpo_trainer#trl.experimental.online_dpo.OnlineDPOTrainer">experimental.online_dpo.OnlineDPOTrainer</a> to fine-tune a a Vision Language Model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/orpo.py" rel="nofollow"><code>examples/scripts/orpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/orpo_trainer#trl.experimental.orpo.ORPOTrainer">experimental.orpo.ORPOTrainer</a> to fine-tune a model to increase helpfulness and harmlessness using the <a href="https://huggingface.co/datasets/Anthropic/hh-rlhf" rel="nofollow">Anthropic/hh-rlhf</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/ppo/ppo.py" rel="nofollow"><code>examples/scripts/ppo/ppo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/ppo_trainer#trl.experimental.ppo.PPOTrainer">experimental.ppo.PPOTrainer</a> to fine-tune a model to improve its ability to continue text with positive sentiment or physically descriptive language.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/ppo/ppo_tldr.py" rel="nofollow"><code>examples/scripts/ppo/ppo_tldr.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/ppo_trainer#trl.experimental.ppo.PPOTrainer">experimental.ppo.PPOTrainer</a> to fine-tune a model to improve its ability to generate TL;DR summaries.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/prm.py" rel="nofollow"><code>examples/scripts/prm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/prm_trainer#trl.experimental.prm.PRMTrainer">experimental.prm.PRMTrainer</a> to fine-tune a Process-supervised Reward Model (PRM).</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/reward_modeling.py" rel="nofollow"><code>examples/scripts/reward_modeling.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/reward_trainer#trl.RewardTrainer">RewardTrainer</a> to train an Outcome Reward Model (ORM) on your own dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/rloo.py" rel="nofollow"><code>examples/scripts/rloo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/rloo_trainer#trl.RLOOTrainer">RLOOTrainer</a> to fine-tune a model to improve its ability to solve math questions.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/trl/scripts/sft.py" rel="nofollow"><code>trl/scripts/sft.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_gemma3.py" rel="nofollow"><code>examples/scripts/sft_gemma3.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Gemma 3 model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_nemotron_3.py" rel="nofollow"><code>examples/scripts/sft_nemotron_3.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune an NVIDIA Nemotron 3 model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_tiny_aya_tool_calling.py" rel="nofollow"><code>examples/scripts/sft_tiny_aya_tool_calling.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to teach tool calling to a model without native tool-calling support using the <a href="https://huggingface.co/datasets/bebechien/SimpleToolCalling" rel="nofollow">bebechien/SimpleToolCalling</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_video_llm.py" rel="nofollow"><code>examples/scripts/sft_video_llm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Video Language Model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_vlm.py" rel="nofollow"><code>examples/scripts/sft_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Vision Language Model in a chat setting. The script has only been tested with <a href="https://huggingface.co/llava-hf/llava-1.5-7b-hf" rel="nofollow">LLaVA 1.5</a>, <a href="https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf" rel="nofollow">LLaVA 1.6</a>, and <a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" rel="nofollow">Llama-3.2-11B-Vision-Instruct</a> models, so users may see unexpected behaviour in other model architectures.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_vlm_gemma3.py" rel="nofollow"><code>examples/scripts/sft_vlm_gemma3.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Gemma 3 model on vision to text tasks.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_vlm_smol_vlm.py" rel="nofollow"><code>examples/scripts/sft_vlm_smol_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a SmolVLM model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/xpo.py" rel="nofollow"><code>examples/scripts/xpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_5607/en/xpo_trainer#trl.experimental.xpo.XPOTrainer">experimental.xpo.XPOTrainer</a> to fine-tune a model.</td></tr></tbody>',be,U,ue,A,Ee='These scripts demonstrate how to train models with <a href="openenv">OpenEnv</a> environments using <a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a>’s <code>environment_factory</code>. See the <a href="openenv">OpenEnv Integration</a> guide for more details.',_e,I,Xe='<thead><tr><th>File</th> <th>Description</th></tr></thead> <tbody><tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/echo.py" rel="nofollow"><code>examples/scripts/openenv/echo.py</code></a></td> <td>GRPO training with the Echo environment (minimal example).</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/wordle.py" rel="nofollow"><code>examples/scripts/openenv/wordle.py</code></a></td> <td>GRPO training with the Wordle (TextArena) environment.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/catch.py" rel="nofollow"><code>examples/scripts/openenv/catch.py</code></a></td> <td>GRPO training with the Catch (OpenSpiel) environment.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/sudoku.py" rel="nofollow"><code>examples/scripts/openenv/sudoku.py</code></a></td> <td>GRPO training with the Sudoku environment.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/multi_env.py" rel="nofollow"><code>examples/scripts/openenv/multi_env.py</code></a></td> <td>Multi-environment GRPO training: Wordle + Catch in the same training run.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/browsergym.py" rel="nofollow"><code>examples/scripts/openenv/browsergym.py</code></a></td> <td>GRPO training with the BrowserGym environment for VLMs.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/browsergym_llm.py" rel="nofollow"><code>examples/scripts/openenv/browsergym_llm.py</code></a></td> <td>GRPO training with the BrowserGym environment for LLMs.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/carla.py" rel="nofollow"><code>examples/scripts/openenv/carla.py</code></a></td> <td>GRPO training with the CARLA environment for autonomous driving.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/carla_vlm.py" rel="nofollow"><code>examples/scripts/openenv/carla_vlm.py</code></a></td> <td>GRPO training with CARLA for VLMs with multimodal tool responses (camera images).</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/openenv/carla_vlm_gemma.py" rel="nofollow"><code>examples/scripts/openenv/carla_vlm_gemma.py</code></a></td> <td>GRPO training with CARLA for Gemma 4 with multimodal tool responses (camera images).</td></tr></tbody>',we,V,ye,N,je="You can run scripts on multiple GPUs with 🤗 Accelerate:",Te,Z,ve,D,Be="For DeepSpeed ZeRO-{1,2,3}:",xe,E,Oe,X,We="Adjust <code>NUM_GPUS</code> and <code>--all_arguments_of_the_script</code> as needed.",Pe,j,$e,q,Re;return b=new rt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),u=new B({props:{title:"Examples",local:"examples",headingTag:"h1"}}),v=new ke({props:{code:"cGlwJTIwaW5zdGFsbCUyMC0tdXBncmFkZSUyMHRybCU1QnF1YW50aXphdGlvbiU1RA==",highlighted:"pip install --upgrade trl[quantization]",wrap:!1}}),P=new ke({props:{code:"YWNjZWxlcmF0ZSUyMGNvbmZpZyUyMCUyMyUyMHdpbGwlMjBwcm9tcHQlMjB5b3UlMjB0byUyMGRlZmluZSUyMHRoZSUyMHRyYWluaW5nJTIwY29uZmlndXJhdGlvbg==",highlighted:'accelerate config <span class="hljs-comment"># will prompt you to define the training configuration</span>',wrap:!1}}),R=new B({props:{title:"Notebooks",local:"notebooks",headingTag:"h2"}}),G=new B({props:{title:"OpenEnv Notebooks",local:"openenv-notebooks",headingTag:"h3"}}),S=new B({props:{title:"Scripts",local:"scripts",headingTag:"h2"}}),U=new B({props:{title:"OpenEnv Scripts",local:"openenv-scripts",headingTag:"h3"}}),V=new B({props:{title:"Distributed Training (for scripts)",local:"distributed-training-for-scripts",headingTag:"h2"}}),Z=new ke({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tY29uZmlnX2ZpbGUlM0RleGFtcGxlcyUyRmFjY2VsZXJhdGVfY29uZmlncyUyRm11bHRpX2dwdS55YW1sJTIwLS1udW1fcHJvY2Vzc2VzJTIwJTdCTlVNX0dQVVMlN0QlMjBwYXRoX3RvX3NjcmlwdC5weSUyMC0tYWxsX2FyZ3VtZW50c19vZl90aGVfc2NyaXB0",highlighted:"accelerate launch --config_file=examples/accelerate_configs/multi_gpu.yaml --num_processes {NUM_GPUS} path_to_script.py --all_arguments_of_the_script",wrap:!1}}),E=new ke({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tY29uZmlnX2ZpbGUlM0RleGFtcGxlcyUyRmFjY2VsZXJhdGVfY29uZmlncyUyRmRlZXBzcGVlZF96ZXJvJTdCMSUyQzIlMkMzJTdELnlhbWwlMjAtLW51bV9wcm9jZXNzZXMlMjAlN0JOVU1fR1BVUyU3RCUyMHBhdGhfdG9fc2NyaXB0LnB5JTIwLS1hbGxfYXJndW1lbnRzX29mX3RoZV9zY3JpcHQ=",highlighted:"accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero{1,2,3}.yaml --num_processes {NUM_GPUS} path_to_script.py --all_arguments_of_the_script",wrap:!1}}),j=new nt({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/example_overview.md"}}),{c(){f=l("meta"),Y=n(),W=l("p"),z=n(),p(b.$$.fragment),J=n(),p(u.$$.fragment),Q=n(),_=l("p"),_.innerHTML=Ge,K=n(),w=l("p"),w.innerHTML=Le,ee=n(),y=l("p"),y.innerHTML=Ce,te=n(),T=l("p"),T.textContent=Se,oe=n(),p(v.$$.fragment),re=n(),x=l("p"),x.innerHTML=Fe,ne=n(),O=l("p"),O.textContent=He,ae=n(),p(P.$$.fragment),le=n(),$=l("p"),$.innerHTML=Ue,se=n(),p(R.$$.fragment),ie=n(),k=l("p"),k.innerHTML=Ae,pe=n(),M=l("table"),M.innerHTML=Ie,he=n(),p(G.$$.fragment),ce=n(),L=l("p"),L.innerHTML=Ve,de=n(),C=l("table"),C.innerHTML=Ne,me=n(),p(S.$$.fragment),ge=n(),F=l("p"),F.innerHTML=Ze,fe=n(),H=l("table"),H.innerHTML=De,be=n(),p(U.$$.fragment),ue=n(),A=l("p"),A.innerHTML=Ee,_e=n(),I=l("table"),I.innerHTML=Xe,we=n(),p(V.$$.fragment),ye=n(),N=l("p"),N.textContent=je,Te=n(),p(Z.$$.fragment),ve=n(),D=l("p"),D.textContent=Be,xe=n(),p(E.$$.fragment),Oe=n(),X=l("p"),X.innerHTML=We,Pe=n(),p(j.$$.fragment),$e=n(),q=l("p"),this.h()},l(e){const t=tt("svelte-u9bgzb",document.head);f=s(t,"META",{name:!0,content:!0}),t.forEach(o),Y=a(e),W=s(e,"P",{}),qe(W).forEach(o),z=a(e),h(b.$$.fragment,e),J=a(e),h(u.$$.fragment,e),Q=a(e),_=s(e,"P",{"data-svelte-h":!0}),i(_)!=="svelte-k5mfpg"&&(_.innerHTML=Ge),K=a(e),w=s(e,"P",{"data-svelte-h":!0}),i(w)!=="svelte-1wqwut1"&&(w.innerHTML=Le),ee=a(e),y=s(e,"P",{"data-svelte-h":!0}),i(y)!=="svelte-qtgy0c"&&(y.innerHTML=Ce),te=a(e),T=s(e,"P",{"data-svelte-h":!0}),i(T)!=="svelte-1dkoh1m"&&(T.textContent=Se),oe=a(e),h(v.$$.fragment,e),re=a(e),x=s(e,"P",{"data-svelte-h":!0}),i(x)!=="svelte-1mvzre"&&(x.innerHTML=Fe),ne=a(e),O=s(e,"P",{"data-svelte-h":!0}),i(O)!=="svelte-1v3luk0"&&(O.textContent=He),ae=a(e),h(P.$$.fragment,e),le=a(e),$=s(e,"P",{"data-svelte-h":!0}),i($)!=="svelte-1lneeeh"&&($.innerHTML=Ue),se=a(e),h(R.$$.fragment,e),ie=a(e),k=s(e,"P",{"data-svelte-h":!0}),i(k)!=="svelte-5rbss"&&(k.innerHTML=Ae),pe=a(e),M=s(e,"TABLE",{"data-svelte-h":!0}),i(M)!=="svelte-l52ede"&&(M.innerHTML=Ie),he=a(e),h(G.$$.fragment,e),ce=a(e),L=s(e,"P",{"data-svelte-h":!0}),i(L)!=="svelte-ra53gr"&&(L.innerHTML=Ve),de=a(e),C=s(e,"TABLE",{"data-svelte-h":!0}),i(C)!=="svelte-15a5xeu"&&(C.innerHTML=Ne),me=a(e),h(S.$$.fragment,e),ge=a(e),F=s(e,"P",{"data-svelte-h":!0}),i(F)!=="svelte-12sunt7"&&(F.innerHTML=Ze),fe=a(e),H=s(e,"TABLE",{"data-svelte-h":!0}),i(H)!=="svelte-1q9shcc"&&(H.innerHTML=De),be=a(e),h(U.$$.fragment,e),ue=a(e),A=s(e,"P",{"data-svelte-h":!0}),i(A)!=="svelte-xzepzw"&&(A.innerHTML=Ee),_e=a(e),I=s(e,"TABLE",{"data-svelte-h":!0}),i(I)!=="svelte-cs0cqt"&&(I.innerHTML=Xe),we=a(e),h(V.$$.fragment,e),ye=a(e),N=s(e,"P",{"data-svelte-h":!0}),i(N)!=="svelte-1b16zas"&&(N.textContent=je),Te=a(e),h(Z.$$.fragment,e),ve=a(e),D=s(e,"P",{"data-svelte-h":!0}),i(D)!=="svelte-142p8w8"&&(D.textContent=Be),xe=a(e),h(E.$$.fragment,e),Oe=a(e),X=s(e,"P",{"data-svelte-h":!0}),i(X)!=="svelte-cgbim7"&&(X.innerHTML=We),Pe=a(e),h(j.$$.fragment,e),$e=a(e),q=s(e,"P",{}),qe(q).forEach(o),this.h()},h(){Ye(f,"name","hf:doc:metadata"),Ye(f,"content",lt)},m(e,t){ot(document.head,f),r(e,Y,t),r(e,W,t),r(e,z,t),c(b,e,t),r(e,J,t),c(u,e,t),r(e,Q,t),r(e,_,t),r(e,K,t),r(e,w,t),r(e,ee,t),r(e,y,t),r(e,te,t),r(e,T,t),r(e,oe,t),c(v,e,t),r(e,re,t),r(e,x,t),r(e,ne,t),r(e,O,t),r(e,ae,t),c(P,e,t),r(e,le,t),r(e,$,t),r(e,se,t),c(R,e,t),r(e,ie,t),r(e,k,t),r(e,pe,t),r(e,M,t),r(e,he,t),c(G,e,t),r(e,ce,t),r(e,L,t),r(e,de,t),r(e,C,t),r(e,me,t),c(S,e,t),r(e,ge,t),r(e,F,t),r(e,fe,t),r(e,H,t),r(e,be,t),c(U,e,t),r(e,ue,t),r(e,A,t),r(e,_e,t),r(e,I,t),r(e,we,t),c(V,e,t),r(e,ye,t),r(e,N,t),r(e,Te,t),c(Z,e,t),r(e,ve,t),r(e,D,t),r(e,xe,t),c(E,e,t),r(e,Oe,t),r(e,X,t),r(e,Pe,t),c(j,e,t),r(e,$e,t),r(e,q,t),Re=!0},p:Je,i(e){Re||(d(b.$$.fragment,e),d(u.$$.fragment,e),d(v.$$.fragment,e),d(P.$$.fragment,e),d(R.$$.fragment,e),d(G.$$.fragment,e),d(S.$$.fragment,e),d(U.$$.fragment,e),d(V.$$.fragment,e),d(Z.$$.fragment,e),d(E.$$.fragment,e),d(j.$$.fragment,e),Re=!0)},o(e){m(b.$$.fragment,e),m(u.$$.fragment,e),m(v.$$.fragment,e),m(P.$$.fragment,e),m(R.$$.fragment,e),m(G.$$.fragment,e),m(S.$$.fragment,e),m(U.$$.fragment,e),m(V.$$.fragment,e),m(Z.$$.fragment,e),m(E.$$.fragment,e),m(j.$$.fragment,e),Re=!1},d(e){e&&(o(Y),o(W),o(z),o(J),o(Q),o(_),o(K),o(w),o(ee),o(y),o(te),o(T),o(oe),o(re),o(x),o(ne),o(O),o(ae),o(le),o($),o(se),o(ie),o(k),o(pe),o(M),o(he),o(ce),o(L),o(de),o(C),o(me),o(ge),o(F),o(fe),o(H),o(be),o(ue),o(A),o(_e),o(I),o(we),o(ye),o(N),o(Te),o(ve),o(D),o(xe),o(Oe),o(X),o(Pe),o($e),o(q)),o(f),g(b,e),g(u,e),g(v,e),g(P,e),g(R,e),g(G,e),g(S,e),g(U,e),g(V,e),g(Z,e),g(E,e),g(j,e)}}}const lt='{"title":"Examples","local":"examples","sections":[{"title":"Notebooks","local":"notebooks","sections":[{"title":"OpenEnv Notebooks","local":"openenv-notebooks","sections":[],"depth":3}],"depth":2},{"title":"Scripts","local":"scripts","sections":[{"title":"OpenEnv Scripts","local":"openenv-scripts","sections":[],"depth":3}],"depth":2},{"title":"Distributed Training (for scripts)","local":"distributed-training-for-scripts","sections":[],"depth":2}],"depth":1}';function st(Me){return Qe(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class dt extends Ke{constructor(f){super(),et(this,f,st,at,ze,{})}}export{dt as component}; | |
Xet Storage Details
- Size:
- 31.8 kB
- Xet hash:
- 0af4135c1b5056645be17c7bb1849dca3269322575029850bc31adf490c096b1
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.