Buckets:

hf-doc-build/doc-dev / trl /pr_3582 /en /example_overview.html
rtrm's picture
download
raw
27.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Examples&quot;,&quot;local&quot;:&quot;examples&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Introduction&quot;,&quot;local&quot;:&quot;introduction&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Accelerate Config&quot;,&quot;local&quot;:&quot;accelerate-config&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/trl/pr_3582/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/entry/start.0f0f318c.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/scheduler.d627b047.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/singletons.affb0d47.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/index.a57a1c33.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/paths.15dc14db.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/entry/app.b27a462f.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/index.73c51727.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/nodes/0.8cd8e450.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/nodes/17.28447f7d.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/CodeBlock.5f78c87f.js">
<link rel="modulepreload" href="/docs/trl/pr_3582/en/_app/immutable/chunks/getInferenceSnippets.256dfbf1.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Examples&quot;,&quot;local&quot;:&quot;examples&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Introduction&quot;,&quot;local&quot;:&quot;introduction&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Accelerate Config&quot;,&quot;local&quot;:&quot;accelerate-config&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="examples" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#examples"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Examples</span></h1> <h2 class="relative group"><a id="introduction" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#introduction"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Introduction</span></h2> <p data-svelte-h="svelte-1qml164">The examples should work in any of the following settings (with the same script):</p> <ul data-svelte-h="svelte-y1bwso"><li>single GPU</li> <li>multi GPUS (using PyTorch distributed mode)</li> <li>multi GPUS (using DeepSpeed ZeRO-Offload stages 1, 2, &amp; 3)</li> <li>fp16 (mixed-precision), fp32 (normal precision), or bf16 (bfloat16 precision)</li></ul> <p data-svelte-h="svelte-1gk4czs">To run it in each of these various modes, first initialize the accelerate
configuration with <code>accelerate config</code></p> <p data-svelte-h="svelte-1uwpnnm"><strong>NOTE to train with a 4-bit or 8-bit model</strong>, please run</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install --upgrade trl[quantization]<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="accelerate-config" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#accelerate-config"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Accelerate Config</span></h2> <p data-svelte-h="svelte-8m9xl">For all the examples, you’ll need to generate a 🤗 Accelerate config file with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config # will prompt you to define the training configuration<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fatpmt">Then, it is encouraged to launch jobs with <code>accelerate launch</code>!</p> <h1 class="relative group"><a id="maintained-examples" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#maintained-examples"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Maintained Examples</span></h1> <p data-svelte-h="svelte-7eac1h">Scripts can be used as examples of how to use TRL trainers. They are located in the <a href="https://github.com/huggingface/trl/blob/main/trl/scripts" rel="nofollow"><code>trl/scripts</code></a> directory. Additionally, we provide examples in the <a href="https://github.com/huggingface/trl/blob/main/examples/scripts" rel="nofollow"><code>examples/scripts</code></a> directory. These examples are maintained and tested regularly.</p> <table data-svelte-h="svelte-170clr6"><thead><tr><th>File</th> <th>Description</th></tr></thead> <tbody><tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/alignprop.py" rel="nofollow"><code>examples/scripts/alignprop.py</code></a></td> <td>This script shows how to use the <code>AlignPropTrainer</code> to fine-tune a diffusion model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/bco.py" rel="nofollow"><code>examples/scripts/bco.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/kto_trainer#trl.KTOTrainer">KTOTrainer</a> with the BCO loss to fine-tune a model to increase instruction-following, truthfulness, honesty and helpfulness using the <a href="https://huggingface.co/datasets/openbmb/UltraFeedback" rel="nofollow">openbmb/UltraFeedback</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/cpo.py" rel="nofollow"><code>examples/scripts/cpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/cpo_trainer#trl.CPOTrainer">CPOTrainer</a> to fine-tune a model to increase helpfulness and harmlessness using the <a href="https://huggingface.co/datasets/Anthropic/hh-rlhf" rel="nofollow">Anthropic/hh-rlhf</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/ddpo.py" rel="nofollow"><code>examples/scripts/ddpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/ddpo_trainer#trl.DDPOTrainer">DDPOTrainer</a> to fine-tune a stable diffusion model using reinforcement learning.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/dpo_online.py" rel="nofollow"><code>examples/scripts/dpo_online.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/online_dpo_trainer#trl.OnlineDPOTrainer">OnlineDPOTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/dpo_vlm.py" rel="nofollow"><code>examples/scripts/dpo_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/dpo_trainer#trl.DPOTrainer">DPOTrainer</a> to fine-tune a Vision Language Model to reduce hallucinations using the <a href="https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset" rel="nofollow">openbmb/RLAIF-V-Dataset</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/gkd.py" rel="nofollow"><code>examples/scripts/gkd.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/gkd_trainer#trl.GKDTrainer">GKDTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/nash_md.py" rel="nofollow"><code>examples/scripts/nash_md.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/nash_md_trainer#trl.NashMDTrainer">NashMDTrainer</a> to fine-tune a model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/orpo.py" rel="nofollow"><code>examples/scripts/orpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/orpo_trainer#trl.ORPOTrainer">ORPOTrainer</a> to fine-tune a model to increase helpfulness and harmlessness using the <a href="https://huggingface.co/datasets/Anthropic/hh-rlhf" rel="nofollow">Anthropic/hh-rlhf</a> dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/ppo/ppo.py" rel="nofollow"><code>examples/scripts/ppo/ppo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/ppo_trainer#trl.PPOTrainer">PPOTrainer</a> to fine-tune a model to improve its ability to continue text with positive sentiment or physically descriptive language</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/ppo/ppo_tldr.py" rel="nofollow"><code>examples/scripts/ppo/ppo_tldr.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/ppo_trainer#trl.PPOTrainer">PPOTrainer</a> to fine-tune a model to improve its ability to generate TL;DR summaries.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/prm.py" rel="nofollow"><code>examples/scripts/prm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/prm_trainer#trl.PRMTrainer">PRMTrainer</a> to fine-tune a Process-supervised Reward Model (PRM).</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/reward_modeling.py" rel="nofollow"><code>examples/scripts/reward_modeling.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/reward_trainer#trl.RewardTrainer">RewardTrainer</a> to train a Outcome Reward Model (ORM) on your own dataset.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/rloo/rloo.py" rel="nofollow"><code>examples/scripts/rloo/rloo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/rloo_trainer#trl.RLOOTrainer">RLOOTrainer</a> to fine-tune a model to improve its ability to continue text with positive sentiment or physically descriptive language</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/rloo/rloo_tldr.py" rel="nofollow"><code>examples/scripts/rloo/rloo_tldr.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/rloo_trainer#trl.RLOOTrainer">RLOOTrainer</a> to fine-tune a model to improve its ability to generate TL;DR summaries.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_gemma3.py" rel="nofollow"><code>examples/scripts/sft_gemma3.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Gemma 3 model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_video_llm.py" rel="nofollow"><code>examples/scripts/sft_video_llm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Video Language Model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_vlm_gemma3.py" rel="nofollow"><code>examples/scripts/sft_vlm_gemma3.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Gemma 3 model on vision to text tasks.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_vlm_smol_vlm.py" rel="nofollow"><code>examples/scripts/sft_vlm_smol_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a SmolVLM model.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/sft_vlm.py" rel="nofollow"><code>examples/scripts/sft_vlm.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> to fine-tune a Vision Language Model in a chat setting. The script has only been tested with <a href="https://huggingface.co/llava-hf/llava-1.5-7b-hf" rel="nofollow">LLaVA 1.5</a>, <a href="https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf" rel="nofollow">LLaVA 1.6</a>, and <a href="https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct" rel="nofollow">Llama-3.2-11B-Vision-Instruct</a> models so users may see unexpected behaviour in other model architectures.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/blob/main/examples/scripts/xpo.py" rel="nofollow"><code>examples/scripts/xpo.py</code></a></td> <td>This script shows how to use the <a href="/docs/trl/pr_3582/en/xpo_trainer#trl.XPOTrainer">XPOTrainer</a> to fine-tune a model.</td></tr></tbody></table> <p data-svelte-h="svelte-8xuvqq">Here are also some easier-to-run colab notebooks that you can use to get started with TRL:</p> <table data-svelte-h="svelte-17wv7zp"><thead><tr><th>File</th> <th>Description</th></tr></thead> <tbody><tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/best_of_n.ipynb" rel="nofollow"><code>examples/notebooks/best_of_n.ipynb</code></a></td> <td>This notebook demonstrates how to use the “Best of N” sampling strategy using TRL when fine-tuning your model with PPO.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/gpt2-sentiment.ipynb" rel="nofollow"><code>examples/notebooks/gpt2-sentiment.ipynb</code></a></td> <td>This notebook demonstrates how to reproduce the GPT2 imdb sentiment tuning example on a jupyter notebook.</td></tr> <tr><td><a href="https://github.com/huggingface/trl/tree/main/examples/notebooks/gpt2-control.ipynb" rel="nofollow"><code>examples/notebooks/gpt2-control.ipynb</code></a></td> <td>This notebook demonstrates how to reproduce the GPT2 sentiment control example on a jupyter notebook.</td></tr></tbody></table> <p data-svelte-h="svelte-nr1j2l">We also have some other examples that are less maintained but can be used as a reference:</p> <ol data-svelte-h="svelte-ks8v1x"><li><strong><a href="https://github.com/huggingface/trl/tree/main/examples/research_projects" rel="nofollow">research_projects</a></strong>: Check out this folder to find the scripts used for some research projects that used TRL (LM de-toxification, Stack-Llama, etc.)</li></ol> <h2 class="relative group"><a id="distributed-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#distributed-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Distributed training</span></h2> <p data-svelte-h="svelte-1wqljqa">All of the scripts can be run on multiple GPUs by providing the path of an 🤗 Accelerate config file when calling <code>accelerate launch</code>. To launch one of them on one or multiple GPUs, run the following command (swapping <code>{NUM_GPUS}</code> with the number of GPUs in your machine and <code>--all_arguments_of_the_script</code> with your arguments.)</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch --config_file=examples/accelerate_configs/multi_gpu.yaml --num_processes {NUM_GPUS} path_to_script.py --all_arguments_of_the_script<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12yw9zf">You can also adjust the parameters of the 🤗 Accelerate config file to suit your needs (e.g. training in mixed precision).</p> <h3 class="relative group"><a id="distributed-training-with-deepspeed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#distributed-training-with-deepspeed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Distributed training with DeepSpeed</span></h3> <p data-svelte-h="svelte-155h86t">Most of the scripts can be run on multiple GPUs together with DeepSpeed ZeRO-{1,2,3} for efficient sharding of the optimizer states, gradients, and model weights. To do so, run following command (swapping <code>{NUM_GPUS}</code> with the number of GPUs in your machine, <code>--all_arguments_of_the_script</code> with your arguments, and <code>--deepspeed_config</code> with the path to the DeepSpeed config file such as <code>examples/deepspeed_configs/deepspeed_zero1.yaml</code>):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero{1,2,3}.yaml --num_processes {NUM_GPUS} path_to_script.py --all_arguments_of_the_script<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/trl/blob/main/docs/source/example_overview.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_4tczb2 = {
assets: "/docs/trl/pr_3582/en",
base: "/docs/trl/pr_3582/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/trl/pr_3582/en/_app/immutable/entry/start.0f0f318c.js"),
import("/docs/trl/pr_3582/en/_app/immutable/entry/app.b27a462f.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 17],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
27.1 kB
·
Xet hash:
287d73556d6ad9d938a4792405d9519baa40109e0dd4de32464727861101f169

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.