Buckets:
| import{s as tl,a as al,o as nl,n as sl}from"../chunks/scheduler.7b731bd4.js";import{S as ll,i as ol,e as o,s,c as p,q as Gt,H as An,h as rl,a as r,d as a,b as l,f as U,g as c,j as i,r as Zt,u as Yn,k as w,v as Pn,l as u,m as n,n as m,t as d,o as h,p as g}from"../chunks/index.cc268345.js";import{C as il,H as f,E as pl}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.cec98f15.js";import{D as Bt}from"../chunks/Docstring.42475074.js";import{C as _}from"../chunks/CodeBlock.cbea7109.js";import{E as cl}from"../chunks/ExampleCodeBlock.94981f32.js";function ml(zt){let M,E="Example:",F,J,j;return J=new _({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVFRyYWluZXIlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMnJvbmVuZWxkYW4lMkZUaW55U3RvcmllcyUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lNUIlM0ExJTI1JTVEJTIyKSUwQSUwQXRyYWluZXIlMjAlM0QlMjBTRlRUcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyUXdlbiUyRlF3ZW4yLjUtMC41Qi1JbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0RkYXRhc2V0JTJDJTBBKSUwQXRyYWluZXIudHJhaW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTTrainer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| dataset = load_dataset(<span class="hljs-string">"roneneldan/TinyStories"</span>, split=<span class="hljs-string">"train[:1%]"</span>) | |
| trainer = SFTTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen2.5-0.5B-Instruct"</span>, | |
| train_dataset=dataset, | |
| ) | |
| trainer.train()`,wrap:!1}}),{c(){M=o("p"),M.textContent=E,F=s(),p(J.$$.fragment)},l(T){M=r(T,"P",{"data-svelte-h":!0}),i(M)!=="svelte-11lpom8"&&(M.textContent=E),F=l(T),c(J.$$.fragment,T)},m(T,C){n(T,M,C),n(T,F,C),m(J,T,C),j=!0},p:sl,i(T){j||(d(J.$$.fragment,T),j=!0)},o(T){h(J.$$.fragment,T),j=!1},d(T){T&&(a(M),a(F)),g(J,T)}}}function dl(zt){let M,E,F,J,j,T,C,Qt,H,Dn='<a href="https://huggingface.co/models?other=sft,trl" rel="nofollow"><img src="https://img.shields.io/badge/All_models-SFT-blue" alt="All_models-SFT-blue"/></a> <a href="https://github.com/huggingface/smol-course/tree/main/1_instruction_tuning" rel="nofollow"><img src="https://img.shields.io/badge/smol_course-Chapter_1-yellow" alt="smol_course-Chapter_1-yellow"/></a>',Rt,V,Lt,X,Kn="TRL supports the Supervised Fine-Tuning (SFT) Trainer for training language models.",Wt,A,On='This post-training method was contributed by <a href="https://huggingface.co/ybelkada" rel="nofollow">Younes Belkada</a>.',Et,Y,Ht,P,es='This example demonstrates how to train a language model using the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> from TRL. We train a <a href="https://huggingface.co/Qwen/Qwen3-0.6B" rel="nofollow">Qwen 3 0.6B</a> model on the <a href="https://huggingface.co/datasets/trl-lib/Capybara" rel="nofollow">Capybara dataset</a>, a compact, diverse multi-turn dataset to benchmark reasoning and generalization.',Vt,D,Xt,k,ts,At,K,Yt,O,as='SFT supports both <a href="dataset_formats#language-modeling">language modeling</a> and <a href="dataset_formats#prompt-completion">prompt-completion</a> datasets. The <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> is compatible with both <a href="dataset_formats#standard">standard</a> and <a href="dataset_formats#conversational">conversational</a> dataset formats. When provided with a conversational dataset, the trainer will automatically apply the chat template to the dataset.',Pt,ee,Dt,te,ns='If your dataset is not in one of these formats, you can preprocess it to convert it into the expected format. Here is an example with the <a href="https://huggingface.co/datasets/FreedomIntelligence/medical-o1-reasoning-SFT" rel="nofollow">FreedomIntelligence/medical-o1-reasoning-SFT</a> dataset:',Kt,ae,Ot,ne,ea,se,ta,le,ss="Supervised Fine-Tuning (SFT) is the simplest and most commonly used method to adapt a language model to a target dataset. The model is trained in a fully supervised fashion using pairs of input and output sequences. The goal is to minimize the negative log-likelihood (NLL) of the target sequence, conditioning on the input.",aa,oe,ls="This section breaks down how SFT works in practice, covering the key steps: <strong>preprocessing</strong>, <strong>tokenization</strong> and <strong>loss computation</strong>.",na,re,sa,ie,os=`During training, each example is expected to contain a <strong>text field</strong> or a <strong>(prompt, completion)</strong> pair, depending on the dataset format. For more details on the expected formats, see <a href="dataset_formats">Dataset formats</a>. | |
| The <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> tokenizes each input using the model’s tokenizer. If both prompt and completion are provided separately, they are concatenated before tokenization.`,la,pe,oa,ce,rs='<img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/sft_figure.png" alt="sft_figure"/>',ra,S,Cn,_t,is="token-level cross-entropy loss",In,ia,Ks=`<span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><msub><mi mathvariant="script">L</mi><mtext>SFT</mtext></msub><mo stretchy="false">(</mo><mi>θ</mi><mo stretchy="false">)</mo><mo>=</mo><mo>−</mo><munderover><mo>∑</mo><mrow><mi>t</mi><mo>=</mo><mn>1</mn></mrow><mi>T</mi></munderover><mi>log</mi><mo></mo><msub><mi>p</mi><mi>θ</mi></msub><mo stretchy="false">(</mo><msub><mi>y</mi><mi>t</mi></msub><mo>∣</mo><msub><mi>y</mi><mrow><mo><</mo><mi>t</mi></mrow></msub><mo stretchy="false">)</mo><mo separator="true">,</mo></mrow><annotation encoding="application/x-tex"> | |
| \\mathcal{L}_{\\text{SFT}}(\\theta) = - \\sum_{t=1}^{T} \\log p_\\theta(y_t \\mid y_{<t}), | |
| </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord"><span class="mord mathcal">L</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3283em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">SFT</span></span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathnormal" style="margin-right:0.02778em;">θ</span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:3.0954em;vertical-align:-1.2671em;"></span><span class="mord">−</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mop op-limits"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.8283em;"><span style="top:-1.8829em;margin-left:0em;"><span class="pstrut" style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight">t</span><span class="mrel mtight">=</span><span class="mord mtight">1</span></span></span></span><span style="top:-3.05em;"><span class="pstrut" style="height:3.05em;"></span><span><span class="mop op-symbol large-op">∑</span></span></span><span style="top:-4.3em;margin-left:0em;"><span class="pstrut" style="height:3.05em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathnormal mtight" style="margin-right:0.13889em;">T</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:1.2671em;"><span></span></span></span></span></span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mop">lo<span style="margin-right:0.01389em;">g</span></span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord"><span class="mord mathnormal">p</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight" style="margin-right:0.02778em;">θ</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord"><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">∣</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mrel mtight"><</span><span class="mord mathnormal mtight">t</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.1774em;"><span></span></span></span></span></span></span><span class="mclose">)</span><span class="mpunct">,</span></span></span></span></span>`,pa,$,Fn,ca,Os='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>y</mi><mi>t</mi></msub></mrow><annotation encoding="application/x-tex"> y_t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.625em;vertical-align:-0.1944em;"></span><span class="mord"><span class="mord mathnormal" style="margin-right:0.03588em;">y</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.2806em;"><span style="top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">t</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span>',ma,da,el='<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi></mrow><annotation encoding="application/x-tex"> t </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6151em;"></span><span class="mord mathnormal">t</span></span></span></span>',ha,ga,G,ps='<p>The paper <a href="https://huggingface.co/papers/2508.05629" rel="nofollow">On the Generalization of SFT: A Reinforcement Learning Perspective with Reward Rectification</a> proposes an alternative loss function, called <strong>Dynamic Fine-Tuning (DFT)</strong>, which aims to improve generalization by rectifying the reward signal. This method can be enabled by setting <code>loss_type="dft"</code> in the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>. For more details, see <a href="paper_index#on-the-generalization-of-sft-a-reinforcement-learning-perspective-with-reward-rectification">Paper Index - Dynamic Fine-Tuning</a>.</p>',ua,me,fa,de,cs=`During training, the loss is computed using a <strong>one-token shift</strong>: the model is trained to predict each token in the sequence based on all previous tokens. Specifically, the input sequence is shifted right by one position to form the target labels. | |
| Padding tokens (if present) are ignored in the loss computation by applying an ignore index (default: <code>-100</code>) to the corresponding positions. This ensures that the loss focuses only on meaningful, non-padding tokens.`,Ta,he,ya,ge,ms="While training and evaluating we record the following reward metrics:",_a,ue,ds="<li><code>global_step</code>: The total number of optimizer steps taken so far.</li> <li><code>epoch</code>: The current epoch number, based on dataset iteration.</li> <li><code>num_tokens</code>: The total number of tokens processed so far.</li> <li><code>loss</code>: The average cross-entropy loss computed over non-masked tokens in the current logging interval.</li> <li><code>entropy</code>: The average entropy of the model’s predicted token distribution over non-masked tokens.</li> <li><code>mean_token_accuracy</code>: The proportion of non-masked tokens for which the model’s top-1 prediction matches the ground truth token.</li> <li><code>learning_rate</code>: The current learning rate, which may change dynamically if a scheduler is used.</li> <li><code>grad_norm</code>: The L2 norm of the gradients, computed before gradient clipping.</li>",Ma,fe,wa,Te,ba,ye,hs='You can directly pass the kwargs of the <code>from_pretrained()</code> method to the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>. For example, if you want to load a model in a different precision, analogous to',va,_e,Ja,Me,gs='you can do so by passing the <code>model_init_kwargs={"dtype": torch.bfloat16}</code> argument to the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>.',ja,we,ka,be,us="Note that all keyword arguments of <code>from_pretrained()</code> are supported.",Ua,ve,Ca,Je,fs='<a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> supports <em>example packing</em>, where multiple examples are packed in the same input sequence to increase training efficiency. To enable packing, simply pass <code>packing=True</code> to the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a> constructor.',Ia,je,Fa,ke,Ts='For more details on packing, see <a href="reducing_memory_usage#packing">Packing</a>.',$a,Ue,xa,Ce,ys='To train on assistant messages only, use a <a href="dataset_formats#conversational">conversational</a> dataset and set <code>assistant_only_loss=True</code> in the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>. This setting ensures that loss is computed <strong>only</strong> on the assistant responses, ignoring user or system messages.',Na,Ie,Sa,Fe,_s='<img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/train_on_assistant.png" alt="train_on_assistant"/>',qa,Z,Ms='<p>This functionality requires the chat template to include <code>{% generation %}</code> and <code>{% endgeneration %}</code> keywords. For known model families (e.g. Qwen3), TRL automatically patches the template when <code>assistant_only_loss=True</code>. For other models, check that your chat template includes these keywords — see <a href="https://huggingface.co/HuggingFaceTB/SmolLM3-3B/blob/main/chat_template.jinja#L76-L82" rel="nofollow">HuggingFaceTB/SmolLM3-3B</a> for an example.</p>',Ga,$e,Za,xe,ws='To train on completion only, use a <a href="dataset_formats#prompt-completion">prompt-completion</a> dataset. By default, the trainer computes the loss on the completion tokens only, ignoring the prompt tokens. If you want to train on the full sequence, set <code>completion_only_loss=False</code> in the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>.',Ba,Ne,za,Se,bs='<img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/train_on_completion.png" alt="train_on_completion"/>',Qa,B,vs='<p>Training on completion only is compatible with training on assistant messages only. In this case, use a <a href="dataset_formats#conversational">conversational</a> <a href="dataset_formats#prompt-completion">prompt-completion</a> dataset and set <code>assistant_only_loss=True</code> in the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>.</p>',Ra,qe,La,Ge,Js="We support tight integration with 🤗 PEFT library, allowing any user to conveniently train adapters and share them on the Hub, rather than training the entire model.",Wa,Ze,Ea,Be,js='You can also continue training your <a href="https://huggingface.co/docs/peft/main/en/package_reference/peft_model#peft.PeftModel" rel="nofollow">PeftModel</a>. For that, first load a <code>PeftModel</code> outside <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> and pass it directly to the trainer without the <code>peft_config</code> argument being passed.',Ha,ze,Va,q,Mt,ks="When training adapters, you typically use a higher learning rate (≈1e‑4) since only new parameters are being learned.",$n,Qe,Xa,Re,Aa,Le,Us='Liger Kernel is a collection of Triton kernels for LLM training that boosts multi-GPU throughput by 20%, cuts memory use by 60% (enabling up to 4× longer context), and works seamlessly with tools like FlashAttention, PyTorch FSDP, and DeepSpeed. For more information, see <a href="liger_kernel_integration">Liger Kernel Integration</a>.',Ya,We,Pa,Ee,Cs='RapidFire AI is an open-source experimentation engine that sits on top of TRL and lets you launch multiple SFT configurations at once, even on a single GPU. Instead of trying configurations sequentially, RapidFire lets you <strong>see all their learning curves earlier, stop underperforming runs, and clone promising ones with new settings in flight</strong> without restarting. For more information, see <a href="rapidfire_integration">RapidFire AI Integration</a>.',Da,He,Ka,Ve,Is='Unsloth is an open‑source framework for fine‑tuning and reinforcement learning that trains LLMs (like Llama, Mistral, Gemma, DeepSeek, and more) up to 2× faster with up to 70% less VRAM, while providing a streamlined, Hugging Face–compatible workflow for training, evaluation, and deployment. For more information, see <a href="unsloth_integration">Unsloth Integration</a>.',Oa,Xe,en,Ae,Fs="<strong>Instruction tuning</strong> teaches a base language model to follow user instructions and engage in conversations. This requires:",tn,Ye,$s='<li><strong>Chat template</strong>: Defines how to structure conversations into text sequences, including role markers (user/assistant), special tokens, and turn boundaries. Read more about chat templates in <a href="https://huggingface.co/docs/transformers/chat_templating#templates" rel="nofollow">Chat templates</a>.</li> <li><strong>Conversational dataset</strong>: Contains instruction-response pairs</li>',an,Pe,xs='This example shows how to transform the <a href="https://huggingface.co/Qwen/Qwen3-0.6B-Base" rel="nofollow">Qwen 3 0.6B Base</a> model into an instruction-following model using the <a href="https://huggingface.co/datasets/trl-lib/Capybara" rel="nofollow">Capybara dataset</a> and a chat template from <a href="https://huggingface.co/HuggingFaceTB/SmolLM3-3B" rel="nofollow">HuggingFaceTB/SmolLM3-3B</a>. The SFT Trainer automatically handles tokenizer updates and special token configuration.',nn,De,sn,z,Ns='<p>Some base models, like those from Qwen, have a predefined chat template in the model’s tokenizer. In these cases, it is not necessary to apply <code>clone_chat_template()</code>, as the tokenizer already handles the formatting. However, it is necessary to align the EOS token with the chat template to ensure the model’s responses terminate correctly. In these cases, specify <code>eos_token</code> in <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>; for example, for <code>Qwen/Qwen2.5-1.5B</code>, one should set <code>eos_token="<|im_end|>"</code>.</p>',ln,Ke,Ss="Once trained, your model can now follow instructions and engage in conversations using its new chat template.",on,Oe,rn,et,qs="Alternatively, use the structured conversation format (recommended):",pn,tt,cn,at,mn,nt,Gs='The <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> fully supports fine-tuning models with <em>tool calling</em> capabilities. In this case, each dataset example should include:',dn,st,Zs="<li>The conversation messages, including any tool calls (<code>tool_calls</code>) and tool responses (<code>tool</code> role messages)</li> <li>The list of available tools in the <code>tools</code> column, typically provided as JSON schemas</li>",hn,lt,Bs='For details on the expected dataset structure, see the <a href="dataset_formats#tool-calling">Dataset Format — Tool Calling</a> section.',gn,ot,un,rt,zs=`<a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> fully supports training Vision-Language Models (VLMs). To train a VLM, provide a dataset with either an <code>image</code> column (single image per sample) or an <code>images</code> column (list of images per sample). For more information on the expected dataset structure, see the <a href="dataset_formats#vision-dataset">Dataset Format — Vision Dataset</a> section. | |
| An example of such a dataset is the <a href="https://huggingface.co/datasets/trl-lib/llava-instruct-mix" rel="nofollow">LLaVA Instruct Mix</a>.`,fn,it,Tn,I,wt,Qs='For VLMs, truncating may remove image tokens, leading to errors during training. To avoid this, set <code>max_length=None</code> in the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>. This allows the model to process the full sequence length without truncating image tokens.',xn,pt,Nn,bt,Rs="Only use <code>max_length</code> when you’ve verified that truncation won’t remove image tokens for the entire dataset.",yn,ct,_n,y,mt,Sn,vt,Ls="Trainer for Supervised Fine-Tuning (SFT) method.",qn,Jt,Ws='This class is a wrapper around the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer" rel="nofollow">Trainer</a> class and inherits all of its attributes and methods.',Gn,Q,Zn,R,dt,Bn,jt,Es="Main training entry point.",zn,x,ht,Qn,kt,Hs="Will save the model, so you can reload it using <code>from_pretrained()</code>.",Rn,Ut,Vs="Will only save from the main process.",Ln,L,gt,Wn,Ct,Xs="Upload <code>self.model</code> and <code>self.processing_class</code> to the 🤗 model hub on the repo <code>self.args.hub_model_id</code>.",Mn,ut,wn,b,ft,En,It,As='Configuration class for the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a>.',Hn,Ft,Ys=`This class includes only the parameters that are specific to SFT training. For a full list of training arguments, | |
| please refer to the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a> documentation. Note that default values in this class may | |
| differ from those in <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a>.`,Vn,$t,Ps=`Using <a href="https://huggingface.co/docs/transformers/main/en/internal/trainer_utils#transformers.HfArgumentParser" rel="nofollow">HfArgumentParser</a> we can turn this class into | |
| <a href="https://docs.python.org/3/library/argparse#module-argparse" rel="nofollow">argparse</a> arguments that can be specified on the | |
| command line.`,Xn,Tt,Ds='<p>These parameters have default values different from <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a>:</p> <ul><li><code>logging_steps</code>: Defaults to <code>10</code> instead of <code>500</code>.</li> <li><code>gradient_checkpointing</code>: Defaults to <code>True</code> instead of <code>False</code>.</li> <li><code>bf16</code>: Defaults to <code>True</code> if <code>fp16</code> is not set, instead of <code>False</code>.</li> <li><code>learning_rate</code>: Defaults to <code>2e-5</code> instead of <code>5e-5</code>.</li></ul>',bn,yt,vn,qt,Jn;return j=new il({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),C=new f({props:{title:"SFT Trainer",local:"sft-trainer",headingTag:"h1"}}),V=new f({props:{title:"Overview",local:"overview",headingTag:"h2"}}),Y=new f({props:{title:"Quick start",local:"quick-start",headingTag:"h2"}}),D=new _({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVFRyYWluZXIlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEF0cmFpbmVyJTIwJTNEJTIwU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMlF3ZW4lMkZRd2VuMy0wLjZCJTIyJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGxvYWRfZGF0YXNldCglMjJ0cmwtbGliJTJGQ2FweWJhcmElMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUyQyUwQSklMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTTrainer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| trainer = SFTTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen3-0.6B"</span>, | |
| train_dataset=load_dataset(<span class="hljs-string">"trl-lib/Capybara"</span>, split=<span class="hljs-string">"train"</span>), | |
| ) | |
| trainer.train()`,wrap:!1}}),K=new f({props:{title:"Expected dataset type and format",local:"expected-dataset-type-and-format",headingTag:"h2"}}),ee=new _({props:{code:"JTIzJTIwU3RhbmRhcmQlMjBsYW5ndWFnZSUyMG1vZGVsaW5nJTBBJTdCJTIydGV4dCUyMiUzQSUyMCUyMlRoZSUyMHNreSUyMGlzJTIwYmx1ZS4lMjIlN0QlMEElMEElMjMlMjBDb252ZXJzYXRpb25hbCUyMGxhbmd1YWdlJTIwbW9kZWxpbmclMEElN0IlMjJtZXNzYWdlcyUyMiUzQSUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBjb2xvciUyMGlzJTIwdGhlJTIwc2t5JTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJdCUyMGlzJTIwYmx1ZS4lMjIlN0QlNUQlN0QlMEElMEElMjMlMjBTdGFuZGFyZCUyMHByb21wdC1jb21wbGV0aW9uJTBBJTdCJTIycHJvbXB0JTIyJTNBJTIwJTIyVGhlJTIwc2t5JTIwaXMlMjIlMkMlMEElMjAlMjJjb21wbGV0aW9uJTIyJTNBJTIwJTIyJTIwYmx1ZS4lMjIlN0QlMEElMEElMjMlMjBDb252ZXJzYXRpb25hbCUyMHByb21wdC1jb21wbGV0aW9uJTBBJTdCJTIycHJvbXB0JTIyJTNBJTIwJTVCJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyV2hhdCUyMGNvbG9yJTIwaXMlMjB0aGUlMjBza3klM0YlMjIlN0QlNUQlMkMlMEElMjAlMjJjb21wbGV0aW9uJTIyJTNBJTIwJTVCJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJdCUyMGlzJTIwYmx1ZS4lMjIlN0QlNUQlN0Q=",highlighted:`<span class="hljs-comment"># Standard language modeling</span> | |
| {<span class="hljs-string">"text"</span>: <span class="hljs-string">"The sky is blue."</span>} | |
| <span class="hljs-comment"># Conversational language modeling</span> | |
| {<span class="hljs-string">"messages"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}]} | |
| <span class="hljs-comment"># Standard prompt-completion</span> | |
| {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>, | |
| <span class="hljs-string">"completion"</span>: <span class="hljs-string">" blue."</span>} | |
| <span class="hljs-comment"># Conversational prompt-completion</span> | |
| {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| <span class="hljs-string">"completion"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}]}`,wrap:!1}}),ae=new _({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJGcmVlZG9tSW50ZWxsaWdlbmNlJTJGbWVkaWNhbC1vMS1yZWFzb25pbmctU0ZUJTIyJTJDJTIwJTIyZW4lMjIpJTBBJTBBZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlKSUzQSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnByb21wdCUyMiUzQSUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMGV4YW1wbGUlNUIlMjJRdWVzdGlvbiUyMiU1RCU3RCU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmNvbXBsZXRpb24lMjIlM0ElMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyYXNzaXN0YW50JTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMGYlMjIlM0N0aGluayUzRSU3QmV4YW1wbGUlNUInQ29tcGxleF9Db1QnJTVEJTdEJTNDJTJGdGhpbmslM0UlN0JleGFtcGxlJTVCJ1Jlc3BvbnNlJyU1RCU3RCUyMiU3RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMCU3RCUwQSUwQWRhdGFzZXQlMjAlM0QlMjBkYXRhc2V0Lm1hcChwcmVwcm9jZXNzX2Z1bmN0aW9uJTJDJTIwcmVtb3ZlX2NvbHVtbnMlM0QlNUIlMjJRdWVzdGlvbiUyMiUyQyUyMCUyMlJlc3BvbnNlJTIyJTJDJTIwJTIyQ29tcGxleF9Db1QlMjIlNUQpJTBBcHJpbnQobmV4dChpdGVyKGRhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCkpKQ==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| dataset = load_dataset(<span class="hljs-string">"FreedomIntelligence/medical-o1-reasoning-SFT"</span>, <span class="hljs-string">"en"</span>) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> { | |
| <span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: example[<span class="hljs-string">"Question"</span>]}], | |
| <span class="hljs-string">"completion"</span>: [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">f"<think><span class="hljs-subst">{example[<span class="hljs-string">'Complex_CoT'</span>]}</span></think><span class="hljs-subst">{example[<span class="hljs-string">'Response'</span>]}</span>"</span>} | |
| ], | |
| } | |
| dataset = dataset.<span class="hljs-built_in">map</span>(preprocess_function, remove_columns=[<span class="hljs-string">"Question"</span>, <span class="hljs-string">"Response"</span>, <span class="hljs-string">"Complex_CoT"</span>]) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-built_in">next</span>(<span class="hljs-built_in">iter</span>(dataset[<span class="hljs-string">"train"</span>])))`,wrap:!1}}),ne=new _({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIycHJvbXB0JTIyJTNBJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkdpdmVuJTIwdGhlJTIwc3ltcHRvbXMlMjBvZiUyMHN1ZGRlbiUyMHdlYWtuZXNzJTIwaW4lMjB0aGUlMjBsZWZ0JTIwYXJtJTIwYW5kJTIwbGVnJTJDJTIwcmVjZW50JTIwbG9uZy1kaXN0YW5jZSUyMHRyYXZlbCUyQyUyMGFuZCUyMHRoZSUyMHByZXNlbmNlJTIwb2YlMjBzd29sbGVuJTIwYW5kJTIwdGVuZGVyJTIwcmlnaHQlMjBsb3dlciUyMGxlZyUyQyUyMHdoYXQlMjBzcGVjaWZpYyUyMGNhcmRpYWMlMjBhYm5vcm1hbGl0eSUyMGlzJTIwbW9zdCUyMGxpa2VseSUyMHRvJTIwYmUlMjBmb3VuZCUyMHVwb24lMjBmdXJ0aGVyJTIwZXZhbHVhdGlvbiUyMHRoYXQlMjBjb3VsZCUyMGV4cGxhaW4lMjB0aGVzZSUyMGZpbmRpbmdzJTNGJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjJjb21wbGV0aW9uJTIyJTNBJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMiUzQ3RoaW5rJTNFT2theSUyQyUyMGxldCdzJTIwc2VlJTIwd2hhdCdzJTIwZ29pbmclMjBvbiUyMGhlcmUuJTIwV2UndmUlMjBnb3QlMjBzdWRkZW4lMjB3ZWFrbmVzcyUyMCU1Qi4uLiU1RCUyMGNsaWNrcyUyMGludG8lMjBwbGFjZSElM0MlMkZ0aGluayUzRVRoZSUyMHNwZWNpZmljJTIwY2FyZGlhYyUyMGFibm9ybWFsaXR5JTIwbW9zdCUyMGxpa2VseSUyMHRvJTIwYmUlMjBmb3VuZCUyMGluJTIwJTVCLi4uJTVEJTIwdGhlJTIwcHJlc2VuY2UlMjBvZiUyMGElMjBQRk8lMjBmYWNpbGl0YXRpbmclMjBhJTIwcGFyYWRveGljYWwlMjBlbWJvbGlzbS4lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJyb2xlJTIyJTNBJTIwJTIyYXNzaXN0YW50JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTdE",highlighted:`<span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"prompt"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"content"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"Given the symptoms of sudden weakness in the left arm and leg, recent long-distance travel, and the presence of swollen and tender right lower leg, what specific cardiac abnormality is most likely to be found upon further evaluation that could explain these findings?"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"role"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"user"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"completion"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"content"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"<think>Okay, let's see what's going on here. We've got sudden weakness [...] clicks into place!</think>The specific cardiac abnormality most likely to be found in [...] the presence of a PFO facilitating a paradoxical embolism."</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"role"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"assistant"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">}</span>`,wrap:!1}}),se=new f({props:{title:"Looking deeper into the SFT method",local:"looking-deeper-into-the-sft-method",headingTag:"h2"}}),re=new f({props:{title:"Preprocessing and tokenization",local:"preprocessing-and-tokenization",headingTag:"h3"}}),pe=new f({props:{title:"Computing the loss",local:"computing-the-loss",headingTag:"h3"}}),me=new f({props:{title:"Label shifting and masking",local:"label-shifting-and-masking",headingTag:"h3"}}),he=new f({props:{title:"Logged metrics",local:"logged-metrics",headingTag:"h2"}}),fe=new f({props:{title:"Customization",local:"customization",headingTag:"h2"}}),Te=new f({props:{title:"Model initialization",local:"model-initialization",headingTag:"h3"}}),_e=new _({props:{code:"bW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIyUXdlbiUyRlF3ZW4zLTAuNkIlMjIlMkMlMjBkdHlwZSUzRHRvcmNoLmJmbG9hdDE2KQ==",highlighted:'model = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">"Qwen/Qwen3-0.6B"</span>, dtype=torch.bfloat16)',wrap:!1}}),we=new _({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBTRlRDb25maWcoJTBBJTIwJTIwJTIwJTIwbW9kZWxfaW5pdF9rd2FyZ3MlM0QlN0IlMjJkdHlwZSUyMiUzQSUyMHRvcmNoLmJmbG9hdDE2JTdEJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig | |
| training_args = SFTConfig( | |
| model_init_kwargs={<span class="hljs-string">"dtype"</span>: torch.bfloat16}, | |
| )`,wrap:!1}}),ve=new f({props:{title:"Packing",local:"packing",headingTag:"h3"}}),je=new _({props:{code:"dHJhaW5pbmdfYXJncyUyMCUzRCUyMFNGVENvbmZpZyhwYWNraW5nJTNEVHJ1ZSk=",highlighted:'training_args = SFTConfig(packing=<span class="hljs-literal">True</span>)',wrap:!1}}),Ue=new f({props:{title:"Train on assistant messages only",local:"train-on-assistant-messages-only",headingTag:"h3"}}),Ie=new _({props:{code:"dHJhaW5pbmdfYXJncyUyMCUzRCUyMFNGVENvbmZpZyhhc3Npc3RhbnRfb25seV9sb3NzJTNEVHJ1ZSk=",highlighted:'training_args = SFTConfig(assistant_only_loss=<span class="hljs-literal">True</span>)',wrap:!1}}),$e=new f({props:{title:"Train on completion only",local:"train-on-completion-only",headingTag:"h3"}}),Ne=new _({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUyQyUyMFNGVFRyYWluZXIlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEElMjMlMjBMb2FkJTIwYSUyMHByb21wdC1jb21wbGV0aW9uJTIwZGF0YXNldCUzQiUyMGxvc3MlMjBpcyUyMGNvbXB1dGVkJTIwb24lMjB0aGUlMjBjb21wbGV0aW9uJTIwb25seSUyMGJ5JTIwZGVmYXVsdCUwQWRhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIydHJsLWxpYiUyRmt0by1taXgtMTRrJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEElMEF0cmFpbmVyJTIwJTNEJTIwU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMlF3ZW4lMkZRd2VuMi41LTAuNUItSW5zdHJ1Y3QlMjIlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEU0ZUQ29uZmlnKGNvbXBsZXRpb25fb25seV9sb3NzJTNEVHJ1ZSklMkMlMjAlMjAlMjMlMjBUcnVlJTIwYnklMjBkZWZhdWx0JTIwZm9yJTIwcHJvbXB0LWNvbXBsZXRpb24lMjBkYXRhc2V0cyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0RkYXRhc2V0JTJDJTBBKSUwQXRyYWluZXIudHJhaW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-comment"># Load a prompt-completion dataset; loss is computed on the completion only by default</span> | |
| dataset = load_dataset(<span class="hljs-string">"trl-lib/kto-mix-14k"</span>, split=<span class="hljs-string">"train"</span>) | |
| trainer = SFTTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen2.5-0.5B-Instruct"</span>, | |
| args=SFTConfig(completion_only_loss=<span class="hljs-literal">True</span>), <span class="hljs-comment"># True by default for prompt-completion datasets</span> | |
| train_dataset=dataset, | |
| ) | |
| trainer.train()`,wrap:!1}}),qe=new f({props:{title:"Train adapters with PEFT",local:"train-adapters-with-peft",headingTag:"h3"}}),Ze=new _({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVFRyYWluZXIlMEFmcm9tJTIwcGVmdCUyMGltcG9ydCUyMExvcmFDb25maWclMEElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMnRybC1saWIlMkZDYXB5YmFyYSUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIpJTBBJTBBdHJhaW5lciUyMCUzRCUyMFNGVFRyYWluZXIoJTBBJTIwJTIwJTIwJTIwJTIyUXdlbiUyRlF3ZW4zLTAuNkIlMjIlMkMlMEElMjAlMjAlMjAlMjB0cmFpbl9kYXRhc2V0JTNEZGF0YXNldCUyQyUwQSUyMCUyMCUyMCUyMHBlZnRfY29uZmlnJTNETG9yYUNvbmZpZygpJTJDJTBBKSUwQSUwQXRyYWluZXIudHJhaW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTTrainer | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig | |
| dataset = load_dataset(<span class="hljs-string">"trl-lib/Capybara"</span>, split=<span class="hljs-string">"train"</span>) | |
| trainer = SFTTrainer( | |
| <span class="hljs-string">"Qwen/Qwen3-0.6B"</span>, | |
| train_dataset=dataset, | |
| peft_config=LoraConfig(), | |
| ) | |
| trainer.train()`,wrap:!1}}),ze=new _({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVFRyYWluZXIlMEFmcm9tJTIwcGVmdCUyMGltcG9ydCUyMEF1dG9QZWZ0TW9kZWxGb3JDYXVzYWxMTSUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b1BlZnRNb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJ0cmwtbGliJTJGUXdlbjMtNEItTG9SQSUyMiUyQyUyMGlzX3RyYWluYWJsZSUzRFRydWUpJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ0cmwtbGliJTJGQ2FweWJhcmElMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQSUwQXRyYWluZXIlMjAlM0QlMjBTRlRUcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEbW9kZWwlMkMlMEElMjAlMjAlMjAlMjB0cmFpbl9kYXRhc2V0JTNEZGF0YXNldCUyQyUwQSklMEElMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTTrainer | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> AutoPeftModelForCausalLM | |
| model = AutoPeftModelForCausalLM.from_pretrained(<span class="hljs-string">"trl-lib/Qwen3-4B-LoRA"</span>, is_trainable=<span class="hljs-literal">True</span>) | |
| dataset = load_dataset(<span class="hljs-string">"trl-lib/Capybara"</span>, split=<span class="hljs-string">"train"</span>) | |
| trainer = SFTTrainer( | |
| model=model, | |
| train_dataset=dataset, | |
| ) | |
| trainer.train()`,wrap:!1}}),Qe=new _({props:{code:"U0ZUQ29uZmlnKGxlYXJuaW5nX3JhdGUlM0QxZS00JTJDJTIwLi4uKQ==",highlighted:'SFTConfig(learning_rate=<span class="hljs-number">1e-4</span>, ...)',wrap:!1}}),Re=new f({props:{title:"Train with Liger Kernel",local:"train-with-liger-kernel",headingTag:"h3"}}),We=new f({props:{title:"Rapid Experimentation for SFT",local:"rapid-experimentation-for-sft",headingTag:"h3"}}),He=new f({props:{title:"Train with Unsloth",local:"train-with-unsloth",headingTag:"h3"}}),Xe=new f({props:{title:"Instruction tuning example",local:"instruction-tuning-example",headingTag:"h2"}}),De=new _({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUyQyUyMFNGVFRyYWluZXIlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEF0cmFpbmVyJTIwJTNEJTIwU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMlF3ZW4lMkZRd2VuMy0wLjZCLUJhc2UlMjIlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEU0ZUQ29uZmlnKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG91dHB1dF9kaXIlM0QlMjJRd2VuMy0wLjZCLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2hhdF90ZW1wbGF0ZV9wYXRoJTNEJTIySHVnZ2luZ0ZhY2VUQiUyRlNtb2xMTTMtM0IlMjIlMkMlMEElMjAlMjAlMjAlMjApJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRGxvYWRfZGF0YXNldCglMjJ0cmwtbGliJTJGQ2FweWJhcmElMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUyQyUwQSklMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| trainer = SFTTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen3-0.6B-Base"</span>, | |
| args=SFTConfig( | |
| output_dir=<span class="hljs-string">"Qwen3-0.6B-Instruct"</span>, | |
| chat_template_path=<span class="hljs-string">"HuggingFaceTB/SmolLM3-3B"</span>, | |
| ), | |
| train_dataset=load_dataset(<span class="hljs-string">"trl-lib/Capybara"</span>, split=<span class="hljs-string">"train"</span>), | |
| ) | |
| trainer.train()`,wrap:!1}}),Oe=new _({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMHBpcGVsaW5lJTBBcGlwZSUyMCUzRCUyMHBpcGVsaW5lKCUyMnRleHQtZ2VuZXJhdGlvbiUyMiUyQyUyMG1vZGVsJTNEJTIyUXdlbjMtMC42Qi1JbnN0cnVjdCUyRmNoZWNrcG9pbnQtNTAwMCUyMiklMEFwcm9tcHQlMjAlM0QlMjAlMjIlM0MlN0NpbV9zdGFydCU3QyUzRXVzZXIlNUNuV2hhdCUyMGlzJTIwdGhlJTIwY2FwaXRhbCUyMG9mJTIwRnJhbmNlJTNGJTIwQW5zd2VyJTIwaW4lMjBvbmUlMjB3b3JkLiUzQyU3Q2ltX2VuZCU3QyUzRSU1Q24lM0MlN0NpbV9zdGFydCU3QyUzRWFzc2lzdGFudCU1Q24lMjIlMEFyZXNwb25zZSUyMCUzRCUyMHBpcGUocHJvbXB0KSUwQXJlc3BvbnNlJTVCMCU1RCU1QiUyMmdlbmVyYXRlZF90ZXh0JTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-meta">>>> </span>pipe = pipeline(<span class="hljs-string">"text-generation"</span>, model=<span class="hljs-string">"Qwen3-0.6B-Instruct/checkpoint-5000"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"<|im_start|>user\\nWhat is the capital of France? Answer in one word.<|im_end|>\\n<|im_start|>assistant\\n"</span> | |
| <span class="hljs-meta">>>> </span>response = pipe(prompt) | |
| <span class="hljs-meta">>>> </span>response[<span class="hljs-number">0</span>][<span class="hljs-string">"generated_text"</span>] | |
| <span class="hljs-string">'<|im_start|>user\\nWhat is the capital of France? Answer in one word.<|im_end|>\\n<|im_start|>assistant\\nThe capital of France is Paris.'</span>`,wrap:!1}}),tt=new _({props:{code:"cHJvbXB0JTIwJTNEJTIwJTVCJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyV2hhdCUyMGlzJTIwdGhlJTIwY2FwaXRhbCUyMG9mJTIwRnJhbmNlJTNGJTIwQW5zd2VyJTIwaW4lMjBvbmUlMjB3b3JkLiUyMiU3RCU1RCUwQXJlc3BvbnNlJTIwJTNEJTIwcGlwZShwcm9tcHQpJTBBcmVzcG9uc2UlNUIwJTVEJTVCJTIyZ2VuZXJhdGVkX3RleHQlMjIlNUQ=",highlighted:`<span class="hljs-meta">>>> </span>prompt = [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What is the capital of France? Answer in one word."</span>}] | |
| <span class="hljs-meta">>>> </span>response = pipe(prompt) | |
| <span class="hljs-meta">>>> </span>response[<span class="hljs-number">0</span>][<span class="hljs-string">"generated_text"</span>] | |
| [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What is the capital of France? Answer in one word.'</span>}, {<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'The capital of France is Paris.'</span>}]`,wrap:!1}}),at=new f({props:{title:"Tool Calling with SFT",local:"tool-calling-with-sft",headingTag:"h2"}}),ot=new f({props:{title:"Training Vision Language Models",local:"training-vision-language-models",headingTag:"h2"}}),it=new _({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMFNGVENvbmZpZyUyQyUyMFNGVFRyYWluZXIlMEFmcm9tJTIwZGF0YXNldHMlMjBpbXBvcnQlMjBsb2FkX2RhdGFzZXQlMEElMEF0cmFpbmVyJTIwJTNEJTIwU0ZUVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMlF3ZW4lMkZRd2VuMi41LVZMLTNCLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwYXJncyUzRFNGVENvbmZpZyhtYXhfbGVuZ3RoJTNETm9uZSklMkMlMEElMjAlMjAlMjAlMjB0cmFpbl9kYXRhc2V0JTNEbG9hZF9kYXRhc2V0KCUyMnRybC1saWIlMkZsbGF2YS1pbnN0cnVjdC1taXglMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUyQyUwQSklMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| trainer = SFTTrainer( | |
| model=<span class="hljs-string">"Qwen/Qwen2.5-VL-3B-Instruct"</span>, | |
| args=SFTConfig(max_length=<span class="hljs-literal">None</span>), | |
| train_dataset=load_dataset(<span class="hljs-string">"trl-lib/llava-instruct-mix"</span>, split=<span class="hljs-string">"train"</span>), | |
| ) | |
| trainer.train()`,wrap:!1}}),pt=new _({props:{code:"U0ZUQ29uZmlnKG1heF9sZW5ndGglM0ROb25lJTJDJTIwLi4uKQ==",highlighted:'SFTConfig(max_length=<span class="hljs-literal">None</span>, ...)',wrap:!1}}),ct=new f({props:{title:"SFTTrainer",local:"trl.SFTTrainer",headingTag:"h2"}}),mt=new Bt({props:{name:"class trl.SFTTrainer",anchor:"trl.SFTTrainer",parameters:[{name:"model",val:": str | PreTrainedModel | PeftModel"},{name:"args",val:": trl.trainer.sft_config.SFTConfig | transformers.training_args.TrainingArguments | None = None"},{name:"data_collator",val:": collections.abc.Callable[[list[typing.Any]], dict[str, typing.Any]] | None = None"},{name:"train_dataset",val:": datasets.arrow_dataset.Dataset | datasets.iterable_dataset.IterableDataset | None = None"},{name:"eval_dataset",val:": datasets.arrow_dataset.Dataset | datasets.iterable_dataset.IterableDataset | dict[str, datasets.arrow_dataset.Dataset | datasets.iterable_dataset.IterableDataset] | None = None"},{name:"processing_class",val:": transformers.tokenization_utils_base.PreTrainedTokenizerBase | transformers.processing_utils.ProcessorMixin | None = None"},{name:"compute_loss_func",val:": collections.abc.Callable | None = None"},{name:"compute_metrics",val:": collections.abc.Callable[[transformers.trainer_utils.EvalPrediction], dict] | None = None"},{name:"callbacks",val:": list[transformers.trainer_callback.TrainerCallback] | None = None"},{name:"optimizers",val:": tuple = (None, None)"},{name:"optimizer_cls_and_kwargs",val:": tuple[type[torch.optim.optimizer.Optimizer], dict[str, typing.Any]] | None = None"},{name:"preprocess_logits_for_metrics",val:": collections.abc.Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None"},{name:"peft_config",val:": PeftConfig | None = None"},{name:"formatting_func",val:": collections.abc.Callable[[dict], str] | None = None"}],parametersDescription:[{anchor:"trl.SFTTrainer.model",description:`<strong>model</strong> (<code>str</code> or <a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a> or <a href="https://huggingface.co/docs/peft/main/en/package_reference/peft_model#peft.PeftModel" rel="nofollow">PeftModel</a>) — | |
| Model to be trained. Can be either:</p> | |
| <ul> | |
| <li>A string, being the <em>model id</em> of a pretrained model hosted inside a model repo on huggingface.co, or a | |
| path to a <em>directory</em> containing model weights saved using | |
| <a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.save_pretrained" rel="nofollow">save_pretrained</a>, e.g., <code>'./my_model_directory/'</code>. The model is loaded | |
| using <code><ModelArchitecture>.from_pretrained</code> (where <code><ModelArchitecture></code> is derived from the model | |
| config) with the keyword arguments in <code>args.model_init_kwargs</code>.</li> | |
| <li>A <a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a> object. Only causal language models are supported.</li> | |
| <li>A <a href="https://huggingface.co/docs/peft/main/en/package_reference/peft_model#peft.PeftModel" rel="nofollow">PeftModel</a> object. Only causal language models are supported. | |
| If you’re training a model with an MoE architecture and want to include the load balancing/auxiliary loss | |
| as a part of the final loss, remember to set the <code>output_router_logits</code> config of the model to <code>True</code>.</li> | |
| </ul>`,name:"model"},{anchor:"trl.SFTTrainer.args",description:`<strong>args</strong> (<a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a>, <em>optional</em>) — | |
| Configuration for this trainer. If <code>None</code>, a default configuration is used.`,name:"args"},{anchor:"trl.SFTTrainer.data_collator",description:`<strong>data_collator</strong> (<code>DataCollator</code>, <em>optional</em>) — | |
| Function to use to form a batch from a list of elements of the processed <code>train_dataset</code> or <code>eval_dataset</code>. | |
| Will default to <code>DataCollatorForLanguageModeling</code> if the model is a language model | |
| and <code>DataCollatorForVisionLanguageModeling</code> if the model is a vision-language model. | |
| Custom collators must truncate sequences before padding; the trainer does not apply post-collation | |
| truncation.`,name:"data_collator"},{anchor:"trl.SFTTrainer.train_dataset",description:`<strong>train_dataset</strong> (<a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset" rel="nofollow">Dataset</a> or <a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.IterableDataset" rel="nofollow">IterableDataset</a>) — | |
| Dataset to use for training. This trainer supports both <a href="#language-modeling">language modeling</a> type and | |
| <a href="#prompt-completion">prompt-completion</a> type. The format of the samples can be either:</p> | |
| <ul> | |
| <li><a href="dataset_formats#standard">Standard</a>: Each sample contains plain text.</li> | |
| <li><a href="dataset_formats#conversational">Conversational</a>: Each sample contains structured messages (e.g., role | |
| and content).</li> | |
| </ul> | |
| <p>The trainer also supports processed datasets (tokenized) as long as they contain an <code>input_ids</code> field.`,name:"train_dataset"},{anchor:"trl.SFTTrainer.eval_dataset",description:`<strong>eval_dataset</strong> (<a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset" rel="nofollow">Dataset</a>, <a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.IterableDataset" rel="nofollow">IterableDataset</a> or <code>dict[str, Dataset | IterableDataset]</code>) — | |
| Dataset to use for evaluation. It must meet the same requirements as <code>train_dataset</code>.`,name:"eval_dataset"},{anchor:"trl.SFTTrainer.processing_class",description:`<strong>processing_class</strong> (<a href="https://huggingface.co/docs/transformers/main/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase" rel="nofollow">PreTrainedTokenizerBase</a>, <a href="https://huggingface.co/docs/transformers/main/en/main_classes/processors#transformers.ProcessorMixin" rel="nofollow">ProcessorMixin</a>, <em>optional</em>) — | |
| Processing class used to process the data. If <code>None</code>, the processing class is loaded from the model’s name | |
| with <a href="https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoProcessor.from_pretrained" rel="nofollow">from_pretrained</a>. A padding token, <code>tokenizer.pad_token</code>, must be set. | |
| If the processing class has not set a padding token, <code>tokenizer.eos_token</code> will be used as the default.`,name:"processing_class"},{anchor:"trl.SFTTrainer.compute_loss_func",description:`<strong>compute_loss_func</strong> (<code>Callable</code>, <em>optional</em>) — | |
| A function that accepts the raw model outputs, labels, and the number of items in the entire accumulated | |
| batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default <a href="https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618" rel="nofollow">loss | |
| function</a> | |
| used by <code>Trainer</code>.`,name:"compute_loss_func"},{anchor:"trl.SFTTrainer.compute_metrics",description:`<strong>compute_metrics</strong> (<code>Callable[[EvalPrediction], dict]</code>, <em>optional</em>) — | |
| The function that will be used to compute metrics at evaluation. Must take a | |
| <a href="https://huggingface.co/docs/transformers/main/en/internal/trainer_utils#transformers.EvalPrediction" rel="nofollow">EvalPrediction</a> and return a dictionary string to metric values. When passing | |
| <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTConfig">SFTConfig</a> with <code>batch_eval_metrics</code> set to <code>True</code>, your <code>compute_metrics</code> function must take a boolean | |
| <code>compute_result</code> argument. This will be triggered after the last eval batch to signal that the function | |
| needs to calculate and return the global summary statistics rather than accumulating the batch-level | |
| statistics.`,name:"compute_metrics"},{anchor:"trl.SFTTrainer.callbacks",description:`<strong>callbacks</strong> (list of <a href="https://huggingface.co/docs/transformers/main/en/main_classes/callback#transformers.TrainerCallback" rel="nofollow">TrainerCallback</a>, <em>optional</em>) — | |
| List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed | |
| in <a href="https://huggingface.co/docs/transformers/main_classes/callback" rel="nofollow">here</a>.</p> | |
| <p>If you want to remove one of the default callbacks used, use the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.remove_callback" rel="nofollow">remove_callback</a> | |
| method.`,name:"callbacks"},{anchor:"trl.SFTTrainer.optimizers",description:`<strong>optimizers</strong> (<code>tuple[torch.optim.Optimizer | None, torch.optim.lr_scheduler.LambdaLR | None]</code>, <em>optional</em>, defaults to <code>(None, None)</code>) — | |
| A tuple containing the optimizer and the scheduler to use. Will default to an instance of <code>AdamW</code> on your | |
| model and a scheduler given by <a href="https://huggingface.co/docs/transformers/main/en/main_classes/optimizer_schedules#transformers.get_linear_schedule_with_warmup" rel="nofollow">get_linear_schedule_with_warmup</a> controlled by <code>args</code>.`,name:"optimizers"},{anchor:"trl.SFTTrainer.optimizer_cls_and_kwargs",description:`<strong>optimizer_cls_and_kwargs</strong> (<code>tuple[Type[torch.optim.Optimizer], Dict[str, Any]]</code>, <em>optional</em>) — | |
| A tuple containing the optimizer class and keyword arguments to use. Overrides <code>optim</code> and <code>optim_args</code> in | |
| <code>args</code>. Incompatible with the <code>optimizers</code> argument.</p> | |
| <p>Unlike <code>optimizers</code>, this argument avoids the need to place model parameters on the correct devices before | |
| initializing the Trainer.`,name:"optimizer_cls_and_kwargs"},{anchor:"trl.SFTTrainer.preprocess_logits_for_metrics",description:`<strong>preprocess_logits_for_metrics</strong> (<code>Callable[[torch.Tensor, torch.Tensor], torch.Tensor]</code>, <em>optional</em>) — | |
| A function that preprocess the logits right before caching them at each evaluation step. Must take two | |
| tensors, the logits and the labels, and return the logits once processed as desired. The modifications made | |
| by this function will be reflected in the predictions received by <code>compute_metrics</code>.</p> | |
| <p>Note that the labels (second parameter) will be <code>None</code> if the dataset does not have them.`,name:"preprocess_logits_for_metrics"},{anchor:"trl.SFTTrainer.peft_config",description:`<strong>peft_config</strong> (<a href="https://huggingface.co/docs/peft/main/en/package_reference/config#peft.PeftConfig" rel="nofollow">PeftConfig</a>, <em>optional</em>) — | |
| PEFT configuration used to wrap the model. If <code>None</code>, the model is not wrapped.`,name:"peft_config"},{anchor:"trl.SFTTrainer.formatting_func",description:`<strong>formatting_func</strong> (<code>Callable</code>, <em>optional</em>) — | |
| Formatting function applied to the dataset before tokenization. Applying the formatting function explicitly | |
| converts the dataset into a <a href="#language-modeling">language modeling</a> type.`,name:"formatting_func"}],source:"https://github.com/huggingface/trl/blob/vr_5544/trl/trainer/sft_trainer.py#L543"}}),Q=new cl({props:{anchor:"trl.SFTTrainer.example",$$slots:{default:[ml]},$$scope:{ctx:zt}}}),dt=new Bt({props:{name:"train",anchor:"trl.SFTTrainer.train",parameters:[{name:"resume_from_checkpoint",val:": str | bool | None = None"},{name:"trial",val:": optuna.Trial | dict[str, Any] | None = None"},{name:"ignore_keys_for_eval",val:": list[str] | None = None"}],parametersDescription:[{anchor:"trl.SFTTrainer.train.resume_from_checkpoint",description:`<strong>resume_from_checkpoint</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| If a <code>str</code>, local path to a saved checkpoint as saved by a previous instance of <code>Trainer</code>. If a | |
| <code>bool</code> and equals <code>True</code>, load the last checkpoint in <em>args.output_dir</em> as saved by a previous instance | |
| of <code>Trainer</code>. If present, training will resume from the model/optimizer/scheduler states loaded here.`,name:"resume_from_checkpoint"},{anchor:"trl.SFTTrainer.train.trial",description:`<strong>trial</strong> (<code>optuna.Trial</code> or <code>dict[str, Any]</code>, <em>optional</em>) — | |
| The trial run or the hyperparameter dictionary for hyperparameter search.`,name:"trial"},{anchor:"trl.SFTTrainer.train.ignore_keys_for_eval",description:`<strong>ignore_keys_for_eval</strong> (<code>list[str]</code>, <em>optional</em>) — | |
| A list of keys in the output of your model (if it is a dictionary) that should be ignored when | |
| gathering predictions for evaluation during the training.`,name:"ignore_keys_for_eval"}],source:"https://github.com/huggingface/trl/blob/vr_5544/transformers/trainer.py#L1323",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Object containing the global step count, training loss, and metrics.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~trainer_utils.TrainOutput</code></p> | |
| `}}),ht=new Bt({props:{name:"save_model",anchor:"trl.SFTTrainer.save_model",parameters:[{name:"output_dir",val:": str | None = None"},{name:"_internal_call",val:": bool = False"}],source:"https://github.com/huggingface/trl/blob/vr_5544/transformers/trainer.py#L3746"}}),gt=new Bt({props:{name:"push_to_hub",anchor:"trl.SFTTrainer.push_to_hub",parameters:[{name:"commit_message",val:": str | None = 'End of training'"},{name:"blocking",val:": bool = True"},{name:"token",val:": str | None = None"},{name:"revision",val:": str | None = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"trl.SFTTrainer.push_to_hub.commit_message",description:`<strong>commit_message</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"End of training"</code>) — | |
| Message to commit while pushing.`,name:"commit_message"},{anchor:"trl.SFTTrainer.push_to_hub.blocking",description:`<strong>blocking</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether the function should return only when the <code>git push</code> has finished.`,name:"blocking"},{anchor:"trl.SFTTrainer.push_to_hub.token",description:`<strong>token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Token with write permission to overwrite Trainer’s original args.`,name:"token"},{anchor:"trl.SFTTrainer.push_to_hub.revision",description:`<strong>revision</strong> (<code>str</code>, <em>optional</em>) — | |
| The git revision to commit from. Defaults to the head of the “main” branch.`,name:"revision"},{anchor:"trl.SFTTrainer.push_to_hub.kwargs",description:`<strong>kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) — | |
| Additional keyword arguments passed along to <code>~Trainer.create_model_card</code>.`,name:"kwargs"}],source:"https://github.com/huggingface/trl/blob/vr_5544/transformers/trainer.py#L3993",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The URL of the repository where the model was pushed if <code>blocking=False</code>, or a <code>Future</code> object tracking the | |
| progress of the commit if <code>blocking=True</code>.</p> | |
| `}}),ut=new f({props:{title:"SFTConfig",local:"trl.SFTConfig",headingTag:"h2"}}),ft=new Bt({props:{name:"class trl.SFTConfig",anchor:"trl.SFTConfig",parameters:[{name:"output_dir",val:": str | None = None"},{name:"per_device_train_batch_size",val:": int = 8"},{name:"num_train_epochs",val:": float = 3.0"},{name:"max_steps",val:": int = -1"},{name:"learning_rate",val:": float = 2e-05"},{name:"lr_scheduler_type",val:": transformers.trainer_utils.SchedulerType | str = 'linear'"},{name:"lr_scheduler_kwargs",val:": dict | str | None = None"},{name:"warmup_steps",val:": float = 0"},{name:"optim",val:": transformers.training_args.OptimizerNames | str = 'adamw_torch_fused'"},{name:"optim_args",val:": str | None = None"},{name:"weight_decay",val:": float = 0.0"},{name:"adam_beta1",val:": float = 0.9"},{name:"adam_beta2",val:": float = 0.999"},{name:"adam_epsilon",val:": float = 1e-08"},{name:"optim_target_modules",val:": None | str | list[str] = None"},{name:"gradient_accumulation_steps",val:": int = 1"},{name:"average_tokens_across_devices",val:": bool = True"},{name:"max_grad_norm",val:": float = 1.0"},{name:"label_smoothing_factor",val:": float = 0.0"},{name:"bf16",val:": bool | None = None"},{name:"fp16",val:": bool = False"},{name:"bf16_full_eval",val:": bool = False"},{name:"fp16_full_eval",val:": bool = False"},{name:"tf32",val:": bool | None = None"},{name:"gradient_checkpointing",val:": bool = True"},{name:"gradient_checkpointing_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"torch_compile",val:": bool = False"},{name:"torch_compile_backend",val:": str | None = None"},{name:"torch_compile_mode",val:": str | None = None"},{name:"use_liger_kernel",val:": bool = False"},{name:"liger_kernel_config",val:": dict[str, bool] | None = None"},{name:"use_cache",val:": bool = False"},{name:"neftune_noise_alpha",val:": float | None = None"},{name:"torch_empty_cache_steps",val:": int | None = None"},{name:"auto_find_batch_size",val:": bool = False"},{name:"logging_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'steps'"},{name:"logging_steps",val:": float = 10"},{name:"logging_first_step",val:": bool = False"},{name:"log_on_each_node",val:": bool = True"},{name:"logging_nan_inf_filter",val:": bool = True"},{name:"include_num_input_tokens_seen",val:": str | bool = 'no'"},{name:"log_level",val:": str = 'passive'"},{name:"log_level_replica",val:": str = 'warning'"},{name:"disable_tqdm",val:": bool | None = None"},{name:"report_to",val:": None | str | list[str] = 'none'"},{name:"run_name",val:": str | None = None"},{name:"project",val:": str = 'huggingface'"},{name:"trackio_space_id",val:": str | None = 'trackio'"},{name:"eval_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'no'"},{name:"eval_steps",val:": float | None = None"},{name:"eval_delay",val:": float = 0"},{name:"per_device_eval_batch_size",val:": int = 8"},{name:"prediction_loss_only",val:": bool = False"},{name:"eval_on_start",val:": bool = False"},{name:"eval_do_concat_batches",val:": bool = True"},{name:"eval_use_gather_object",val:": bool = False"},{name:"eval_accumulation_steps",val:": int | None = None"},{name:"include_for_metrics",val:": list = <factory>"},{name:"batch_eval_metrics",val:": bool = False"},{name:"save_only_model",val:": bool = False"},{name:"save_strategy",val:": transformers.trainer_utils.SaveStrategy | str = 'steps'"},{name:"save_steps",val:": float = 500"},{name:"save_on_each_node",val:": bool = False"},{name:"save_total_limit",val:": int | None = None"},{name:"enable_jit_checkpoint",val:": bool = False"},{name:"push_to_hub",val:": bool = False"},{name:"hub_token",val:": str | None = None"},{name:"hub_private_repo",val:": bool | None = None"},{name:"hub_model_id",val:": str | None = None"},{name:"hub_strategy",val:": transformers.trainer_utils.HubStrategy | str = 'every_save'"},{name:"hub_always_push",val:": bool = False"},{name:"hub_revision",val:": str | None = None"},{name:"load_best_model_at_end",val:": bool = False"},{name:"metric_for_best_model",val:": str | None = None"},{name:"greater_is_better",val:": bool | None = None"},{name:"ignore_data_skip",val:": bool = False"},{name:"restore_callback_states_from_checkpoint",val:": bool = False"},{name:"full_determinism",val:": bool = False"},{name:"seed",val:": int = 42"},{name:"data_seed",val:": int | None = None"},{name:"use_cpu",val:": bool = False"},{name:"accelerator_config",val:": dict | str | None = None"},{name:"parallelism_config",val:": accelerate.parallelism_config.ParallelismConfig | None = None"},{name:"dataloader_drop_last",val:": bool = False"},{name:"dataloader_num_workers",val:": int = 0"},{name:"dataloader_pin_memory",val:": bool = True"},{name:"dataloader_persistent_workers",val:": bool = False"},{name:"dataloader_prefetch_factor",val:": int | None = None"},{name:"remove_unused_columns",val:": bool = True"},{name:"label_names",val:": list[str] | None = None"},{name:"train_sampling_strategy",val:": str = 'random'"},{name:"length_column_name",val:": str = 'length'"},{name:"ddp_find_unused_parameters",val:": bool | None = None"},{name:"ddp_bucket_cap_mb",val:": int | None = None"},{name:"ddp_broadcast_buffers",val:": bool | None = None"},{name:"ddp_backend",val:": str | None = None"},{name:"ddp_timeout",val:": int = 1800"},{name:"fsdp",val:": list[transformers.trainer_utils.FSDPOption] | str | None = None"},{name:"fsdp_config",val:": dict[str, typing.Any] | str | None = None"},{name:"deepspeed",val:": dict | str | None = None"},{name:"debug",val:": str | list[transformers.debug_utils.DebugOption] = ''"},{name:"skip_memory_metrics",val:": bool = True"},{name:"do_train",val:": bool = False"},{name:"do_eval",val:": bool = False"},{name:"do_predict",val:": bool = False"},{name:"resume_from_checkpoint",val:": str | None = None"},{name:"warmup_ratio",val:": float | None = None"},{name:"logging_dir",val:": str | None = None"},{name:"local_rank",val:": int = -1"},{name:"model_init_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"chat_template_path",val:": str | None = None"},{name:"dataset_text_field",val:": str = 'text'"},{name:"dataset_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"dataset_num_proc",val:": int | None = None"},{name:"eos_token",val:": str | None = None"},{name:"max_length",val:": int | None = 1024"},{name:"truncation_mode",val:": str = 'keep_start'"},{name:"shuffle_dataset",val:": bool = False"},{name:"packing",val:": bool = False"},{name:"packing_strategy",val:": str = 'bfd'"},{name:"padding_free",val:": bool = False"},{name:"pad_to_multiple_of",val:": int | None = None"},{name:"eval_packing",val:": bool | None = None"},{name:"completion_only_loss",val:": bool | None = None"},{name:"assistant_only_loss",val:": bool = False"},{name:"loss_type",val:": str = 'nll'"},{name:"activation_offloading",val:": bool = False"},{name:"pad_token",val:": str | None = None"}],source:"https://github.com/huggingface/trl/blob/vr_5544/trl/trainer/sft_config.py#L23",parameterGroups:[{title:"Parameters that control the model",parametersDescription:[{anchor:"trl.SFTConfig.model_init_kwargs",description:`<strong>model_init_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) — | |
| Keyword arguments for <a href="https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForCausalLM.from_pretrained" rel="nofollow">from_pretrained</a>, used when the <code>model</code> | |
| argument of the <a href="/docs/trl/pr_5544/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> is provided as a string. If you’re training a MoE architecture and want to | |
| include the load balancing/auxiliary loss as a part of the final loss, remember to set | |
| <code>output_router_logits=True</code> in this dictionary.`,name:"model_init_kwargs"},{anchor:"trl.SFTConfig.chat_template_path",description:`<strong>chat_template_path</strong> (<code>str</code>, <em>optional</em>) — | |
| If specified, sets the model’s chat template. This can either be the path to a tokenizer (local directory | |
| or Hugging Face Hub model) or a direct path to a Jinja template file. When using a Jinja file, you must | |
| ensure that any special tokens referenced in the template are added to the tokenizer and that the model’s | |
| embedding layer is resized accordingly.`,name:"chat_template_path"}]},{title:"Parameters that control the data preprocessing",parametersDescription:[{anchor:"trl.SFTConfig.dataset_text_field",description:`<strong>dataset_text_field</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"text"</code>) — | |
| Name of the column that contains text data in the dataset.`,name:"dataset_text_field"},{anchor:"trl.SFTConfig.dataset_kwargs",description:`<strong>dataset_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) — | |
| Dictionary of optional keyword arguments for the dataset preparation. The only supported key is | |
| <code>skip_prepare_dataset</code>. When the model is a VLM, <code>skip_prepare_dataset</code> is automatically treated as <code>True</code> | |
| regardless of the provided value, since preprocessing is done on the fly.`,name:"dataset_kwargs"},{anchor:"trl.SFTConfig.dataset_num_proc",description:`<strong>dataset_num_proc</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of processes to use for processing the dataset.`,name:"dataset_num_proc"},{anchor:"trl.SFTConfig.eos_token",description:`<strong>eos_token</strong> (<code>str</code>, <em>optional</em>) — | |
| Token used to indicate the end of a turn or sequence. If <code>None</code>, it defaults to | |
| <code>processing_class.eos_token</code>.`,name:"eos_token"},{anchor:"trl.SFTConfig.max_length",description:`<strong>max_length</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>1024</code>) — | |
| Maximum length of the tokenized sequence. Sequences longer than <code>max_length</code> are truncated from the left | |
| or right depending on <code>truncation_mode</code>. If <code>None</code>, no truncation is applied. When packing is enabled, | |
| this value sets the sequence length.`,name:"max_length"},{anchor:"trl.SFTConfig.truncation_mode",description:`<strong>truncation_mode</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"keep_start"</code>) — | |
| Truncation mode to use when the sequence exceeds <code>max_length</code>. The only supported value is | |
| <code>"keep_start"</code>. The <code>"keep_end"</code> value is deprecated and will be removed in v2.0.0.`,name:"truncation_mode"},{anchor:"trl.SFTConfig.shuffle_dataset",description:`<strong>shuffle_dataset</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to shuffle the dataset.`,name:"shuffle_dataset"},{anchor:"trl.SFTConfig.packing",description:`<strong>packing</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to group multiple sequences into fixed-length blocks to improve computational efficiency and reduce | |
| padding. Uses <code>max_length</code> to define sequence length.`,name:"packing"},{anchor:"trl.SFTConfig.packing_strategy",description:`<strong>packing_strategy</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"bfd"</code>) — | |
| Strategy for packing sequences. Can be <code>"bfd"</code> (best-fit decreasing, truncates overflow), <code>"bfd_split"</code> | |
| (best-fit decreasing, splits overflow sequences), or <code>"wrapped"</code> (aggressive, cuts mid-sequence).`,name:"packing_strategy"},{anchor:"trl.SFTConfig.padding_free",description:`<strong>padding_free</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to perform forward passes without padding by flattening all sequences in the batch into a single | |
| continuous sequence. This reduces memory usage by eliminating padding overhead. Currently, this is only | |
| supported with the FlashAttention 2 or 3, which can efficiently handle the flattened batch structure. When | |
| packing is enabled with strategy <code>"bfd"</code>, padding-free is enabled, regardless of the value of this | |
| parameter.`,name:"padding_free"},{anchor:"trl.SFTConfig.pad_to_multiple_of",description:`<strong>pad_to_multiple_of</strong> (<code>int</code>, <em>optional</em>) — | |
| If set, the sequences will be padded to a multiple of this value.`,name:"pad_to_multiple_of"},{anchor:"trl.SFTConfig.eval_packing",description:`<strong>eval_packing</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether to pack the eval dataset. If <code>None</code>, uses the same value as <code>packing</code>.`,name:"eval_packing"}]},{title:"Parameters that control the training",parametersDescription:[{anchor:"trl.SFTConfig.completion_only_loss",description:`<strong>completion_only_loss</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether to compute loss only on the completion part of the sequence. If set to <code>True</code>, loss is computed | |
| only on the completion, which is supported only for <a href="#prompt-completion">prompt-completion</a> datasets. If | |
| <code>False</code>, loss is computed on the entire sequence. If <code>None</code> (default), the behavior depends on the dataset: | |
| loss is computed on the completion for <a href="#prompt-completion">prompt-completion</a> datasets, and on the full | |
| sequence for <a href="#language-modeling">language modeling</a> datasets.`,name:"completion_only_loss"},{anchor:"trl.SFTConfig.assistant_only_loss",description:`<strong>assistant_only_loss</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to compute loss only on the assistant part of the sequence. If set to <code>True</code>, loss is computed only | |
| on the assistant responses, which is supported only for <a href="#conversational">conversational</a> datasets. If | |
| <code>False</code>, loss is computed on the entire sequence.`,name:"assistant_only_loss"},{anchor:"trl.SFTConfig.loss_type",description:`<strong>loss_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"nll"</code>) — | |
| Type of loss to use. Possible values are <code>"nll"</code> (negative log-likelihood, default) and <code>"dft"</code> (Dynamic | |
| Fine-Tuning, as described in <a href="https://huggingface.co/papers/2508.05629" rel="nofollow">this paper</a>).`,name:"loss_type"},{anchor:"trl.SFTConfig.activation_offloading",description:`<strong>activation_offloading</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to offload the activations to the CPU.`,name:"activation_offloading"}]},{title:"Deprecated parameters",parametersDescription:[{anchor:"trl.SFTConfig.pad_token",description:`<strong>pad_token</strong> —</p> | |
| <deprecated version="1.1.0"> | |
| <p>Parameter <code>pad_token</code> is deprecated and will be removed in version v2.0.0. Set <code>tokenizer.pad_token</code> | |
| directly and pass it as <code>processing_class</code> to the trainer instead.</p> | |
| </deprecated>`,name:"pad_token"}]}]}}),yt=new pl({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/sft_trainer.md"}}),{c(){M=o("meta"),E=s(),F=o("p"),J=s(),p(j.$$.fragment),T=s(),p(C.$$.fragment),Qt=s(),H=o("p"),H.innerHTML=Dn,Rt=s(),p(V.$$.fragment),Lt=s(),X=o("p"),X.textContent=Kn,Wt=s(),A=o("p"),A.innerHTML=On,Et=s(),p(Y.$$.fragment),Ht=s(),P=o("p"),P.innerHTML=es,Vt=s(),p(D.$$.fragment),Xt=s(),k=o("iframe"),At=s(),p(K.$$.fragment),Yt=s(),O=o("p"),O.innerHTML=as,Pt=s(),p(ee.$$.fragment),Dt=s(),te=o("p"),te.innerHTML=ns,Kt=s(),p(ae.$$.fragment),Ot=s(),p(ne.$$.fragment),ea=s(),p(se.$$.fragment),ta=s(),le=o("p"),le.textContent=ss,aa=s(),oe=o("p"),oe.innerHTML=ls,na=s(),p(re.$$.fragment),sa=s(),ie=o("p"),ie.innerHTML=os,la=s(),p(pe.$$.fragment),oa=s(),ce=o("p"),ce.innerHTML=rs,ra=s(),S=o("p"),Cn=Gt("The loss used in SFT is the "),_t=o("strong"),_t.textContent=is,In=Gt(`, defined as: | |
| `),ia=new An(!1),pa=s(),$=o("p"),Fn=Gt("where "),ca=new An(!1),ma=Gt(" is the target token at timestep "),da=new An(!1),ha=Gt(", and the model is trained to predict the next token given the previous ones. In practice, padding tokens are masked out during loss computation."),ga=s(),G=o("blockquote"),G.innerHTML=ps,ua=s(),p(me.$$.fragment),fa=s(),de=o("p"),de.innerHTML=cs,Ta=s(),p(he.$$.fragment),ya=s(),ge=o("p"),ge.textContent=ms,_a=s(),ue=o("ul"),ue.innerHTML=ds,Ma=s(),p(fe.$$.fragment),wa=s(),p(Te.$$.fragment),ba=s(),ye=o("p"),ye.innerHTML=hs,va=s(),p(_e.$$.fragment),Ja=s(),Me=o("p"),Me.innerHTML=gs,ja=s(),p(we.$$.fragment),ka=s(),be=o("p"),be.innerHTML=us,Ua=s(),p(ve.$$.fragment),Ca=s(),Je=o("p"),Je.innerHTML=fs,Ia=s(),p(je.$$.fragment),Fa=s(),ke=o("p"),ke.innerHTML=Ts,$a=s(),p(Ue.$$.fragment),xa=s(),Ce=o("p"),Ce.innerHTML=ys,Na=s(),p(Ie.$$.fragment),Sa=s(),Fe=o("p"),Fe.innerHTML=_s,qa=s(),Z=o("blockquote"),Z.innerHTML=Ms,Ga=s(),p($e.$$.fragment),Za=s(),xe=o("p"),xe.innerHTML=ws,Ba=s(),p(Ne.$$.fragment),za=s(),Se=o("p"),Se.innerHTML=bs,Qa=s(),B=o("blockquote"),B.innerHTML=vs,Ra=s(),p(qe.$$.fragment),La=s(),Ge=o("p"),Ge.textContent=Js,Wa=s(),p(Ze.$$.fragment),Ea=s(),Be=o("p"),Be.innerHTML=js,Ha=s(),p(ze.$$.fragment),Va=s(),q=o("blockquote"),Mt=o("p"),Mt.textContent=ks,$n=s(),p(Qe.$$.fragment),Xa=s(),p(Re.$$.fragment),Aa=s(),Le=o("p"),Le.innerHTML=Us,Ya=s(),p(We.$$.fragment),Pa=s(),Ee=o("p"),Ee.innerHTML=Cs,Da=s(),p(He.$$.fragment),Ka=s(),Ve=o("p"),Ve.innerHTML=Is,Oa=s(),p(Xe.$$.fragment),en=s(),Ae=o("p"),Ae.innerHTML=Fs,tn=s(),Ye=o("ol"),Ye.innerHTML=$s,an=s(),Pe=o("p"),Pe.innerHTML=xs,nn=s(),p(De.$$.fragment),sn=s(),z=o("blockquote"),z.innerHTML=Ns,ln=s(),Ke=o("p"),Ke.textContent=Ss,on=s(),p(Oe.$$.fragment),rn=s(),et=o("p"),et.textContent=qs,pn=s(),p(tt.$$.fragment),cn=s(),p(at.$$.fragment),mn=s(),nt=o("p"),nt.innerHTML=Gs,dn=s(),st=o("ul"),st.innerHTML=Zs,hn=s(),lt=o("p"),lt.innerHTML=Bs,gn=s(),p(ot.$$.fragment),un=s(),rt=o("p"),rt.innerHTML=zs,fn=s(),p(it.$$.fragment),Tn=s(),I=o("blockquote"),wt=o("p"),wt.innerHTML=Qs,xn=s(),p(pt.$$.fragment),Nn=s(),bt=o("p"),bt.innerHTML=Rs,yn=s(),p(ct.$$.fragment),_n=s(),y=o("div"),p(mt.$$.fragment),Sn=s(),vt=o("p"),vt.textContent=Ls,qn=s(),Jt=o("p"),Jt.innerHTML=Ws,Gn=s(),p(Q.$$.fragment),Zn=s(),R=o("div"),p(dt.$$.fragment),Bn=s(),jt=o("p"),jt.textContent=Es,zn=s(),x=o("div"),p(ht.$$.fragment),Qn=s(),kt=o("p"),kt.innerHTML=Hs,Rn=s(),Ut=o("p"),Ut.textContent=Vs,Ln=s(),L=o("div"),p(gt.$$.fragment),Wn=s(),Ct=o("p"),Ct.innerHTML=Xs,Mn=s(),p(ut.$$.fragment),wn=s(),b=o("div"),p(ft.$$.fragment),En=s(),It=o("p"),It.innerHTML=As,Hn=s(),Ft=o("p"),Ft.innerHTML=Ys,Vn=s(),$t=o("p"),$t.innerHTML=Ps,Xn=s(),Tt=o("blockquote"),Tt.innerHTML=Ds,bn=s(),p(yt.$$.fragment),vn=s(),qt=o("p"),this.h()},l(e){const t=rl("svelte-u9bgzb",document.head);M=r(t,"META",{name:!0,content:!0}),t.forEach(a),E=l(e),F=r(e,"P",{}),U(F).forEach(a),J=l(e),c(j.$$.fragment,e),T=l(e),c(C.$$.fragment,e),Qt=l(e),H=r(e,"P",{"data-svelte-h":!0}),i(H)!=="svelte-17ndjcp"&&(H.innerHTML=Dn),Rt=l(e),c(V.$$.fragment,e),Lt=l(e),X=r(e,"P",{"data-svelte-h":!0}),i(X)!=="svelte-1fqzqtn"&&(X.textContent=Kn),Wt=l(e),A=r(e,"P",{"data-svelte-h":!0}),i(A)!=="svelte-a1ehbo"&&(A.innerHTML=On),Et=l(e),c(Y.$$.fragment,e),Ht=l(e),P=r(e,"P",{"data-svelte-h":!0}),i(P)!=="svelte-1f6s0km"&&(P.innerHTML=es),Vt=l(e),c(D.$$.fragment,e),Xt=l(e),k=r(e,"IFRAME",{src:!0,style:!0,height:!0,frameborder:!0}),U(k).forEach(a),At=l(e),c(K.$$.fragment,e),Yt=l(e),O=r(e,"P",{"data-svelte-h":!0}),i(O)!=="svelte-1pmqrc5"&&(O.innerHTML=as),Pt=l(e),c(ee.$$.fragment,e),Dt=l(e),te=r(e,"P",{"data-svelte-h":!0}),i(te)!=="svelte-i5vrs6"&&(te.innerHTML=ns),Kt=l(e),c(ae.$$.fragment,e),Ot=l(e),c(ne.$$.fragment,e),ea=l(e),c(se.$$.fragment,e),ta=l(e),le=r(e,"P",{"data-svelte-h":!0}),i(le)!=="svelte-1hfrl4z"&&(le.textContent=ss),aa=l(e),oe=r(e,"P",{"data-svelte-h":!0}),i(oe)!=="svelte-1b4ea4p"&&(oe.innerHTML=ls),na=l(e),c(re.$$.fragment,e),sa=l(e),ie=r(e,"P",{"data-svelte-h":!0}),i(ie)!=="svelte-1dpfixb"&&(ie.innerHTML=os),la=l(e),c(pe.$$.fragment,e),oa=l(e),ce=r(e,"P",{"data-svelte-h":!0}),i(ce)!=="svelte-1nyghd"&&(ce.innerHTML=rs),ra=l(e),S=r(e,"P",{});var W=U(S);Cn=Zt(W,"The loss used in SFT is the "),_t=r(W,"STRONG",{"data-svelte-h":!0}),i(_t)!=="svelte-ck6oz9"&&(_t.textContent=is),In=Zt(W,`, defined as: | |
| `),ia=Yn(W,!1),W.forEach(a),pa=l(e),$=r(e,"P",{});var xt=U($);Fn=Zt(xt,"where "),ca=Yn(xt,!1),ma=Zt(xt," is the target token at timestep "),da=Yn(xt,!1),ha=Zt(xt,", and the model is trained to predict the next token given the previous ones. In practice, padding tokens are masked out during loss computation."),xt.forEach(a),ga=l(e),G=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(G)!=="svelte-14irbdr"&&(G.innerHTML=ps),ua=l(e),c(me.$$.fragment,e),fa=l(e),de=r(e,"P",{"data-svelte-h":!0}),i(de)!=="svelte-v7q0m9"&&(de.innerHTML=cs),Ta=l(e),c(he.$$.fragment,e),ya=l(e),ge=r(e,"P",{"data-svelte-h":!0}),i(ge)!=="svelte-132s7j9"&&(ge.textContent=ms),_a=l(e),ue=r(e,"UL",{"data-svelte-h":!0}),i(ue)!=="svelte-57v9wg"&&(ue.innerHTML=ds),Ma=l(e),c(fe.$$.fragment,e),wa=l(e),c(Te.$$.fragment,e),ba=l(e),ye=r(e,"P",{"data-svelte-h":!0}),i(ye)!=="svelte-1lk8rt0"&&(ye.innerHTML=hs),va=l(e),c(_e.$$.fragment,e),Ja=l(e),Me=r(e,"P",{"data-svelte-h":!0}),i(Me)!=="svelte-b8opm0"&&(Me.innerHTML=gs),ja=l(e),c(we.$$.fragment,e),ka=l(e),be=r(e,"P",{"data-svelte-h":!0}),i(be)!=="svelte-1su7aih"&&(be.innerHTML=us),Ua=l(e),c(ve.$$.fragment,e),Ca=l(e),Je=r(e,"P",{"data-svelte-h":!0}),i(Je)!=="svelte-qvkcn6"&&(Je.innerHTML=fs),Ia=l(e),c(je.$$.fragment,e),Fa=l(e),ke=r(e,"P",{"data-svelte-h":!0}),i(ke)!=="svelte-aslsxg"&&(ke.innerHTML=Ts),$a=l(e),c(Ue.$$.fragment,e),xa=l(e),Ce=r(e,"P",{"data-svelte-h":!0}),i(Ce)!=="svelte-uu9yst"&&(Ce.innerHTML=ys),Na=l(e),c(Ie.$$.fragment,e),Sa=l(e),Fe=r(e,"P",{"data-svelte-h":!0}),i(Fe)!=="svelte-1mqpg03"&&(Fe.innerHTML=_s),qa=l(e),Z=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(Z)!=="svelte-1l23brm"&&(Z.innerHTML=Ms),Ga=l(e),c($e.$$.fragment,e),Za=l(e),xe=r(e,"P",{"data-svelte-h":!0}),i(xe)!=="svelte-14p7ad1"&&(xe.innerHTML=ws),Ba=l(e),c(Ne.$$.fragment,e),za=l(e),Se=r(e,"P",{"data-svelte-h":!0}),i(Se)!=="svelte-20raqr"&&(Se.innerHTML=bs),Qa=l(e),B=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(B)!=="svelte-33jrxz"&&(B.innerHTML=vs),Ra=l(e),c(qe.$$.fragment,e),La=l(e),Ge=r(e,"P",{"data-svelte-h":!0}),i(Ge)!=="svelte-t2zuq8"&&(Ge.textContent=Js),Wa=l(e),c(Ze.$$.fragment,e),Ea=l(e),Be=r(e,"P",{"data-svelte-h":!0}),i(Be)!=="svelte-17gxtzb"&&(Be.innerHTML=js),Ha=l(e),c(ze.$$.fragment,e),Va=l(e),q=r(e,"BLOCKQUOTE",{class:!0});var jn=U(q);Mt=r(jn,"P",{"data-svelte-h":!0}),i(Mt)!=="svelte-18fokcf"&&(Mt.textContent=ks),$n=l(jn),c(Qe.$$.fragment,jn),jn.forEach(a),Xa=l(e),c(Re.$$.fragment,e),Aa=l(e),Le=r(e,"P",{"data-svelte-h":!0}),i(Le)!=="svelte-1i5ejat"&&(Le.innerHTML=Us),Ya=l(e),c(We.$$.fragment,e),Pa=l(e),Ee=r(e,"P",{"data-svelte-h":!0}),i(Ee)!=="svelte-1sxpoxm"&&(Ee.innerHTML=Cs),Da=l(e),c(He.$$.fragment,e),Ka=l(e),Ve=r(e,"P",{"data-svelte-h":!0}),i(Ve)!=="svelte-15c0opu"&&(Ve.innerHTML=Is),Oa=l(e),c(Xe.$$.fragment,e),en=l(e),Ae=r(e,"P",{"data-svelte-h":!0}),i(Ae)!=="svelte-1qzdj9p"&&(Ae.innerHTML=Fs),tn=l(e),Ye=r(e,"OL",{"data-svelte-h":!0}),i(Ye)!=="svelte-17o324u"&&(Ye.innerHTML=$s),an=l(e),Pe=r(e,"P",{"data-svelte-h":!0}),i(Pe)!=="svelte-174o7gh"&&(Pe.innerHTML=xs),nn=l(e),c(De.$$.fragment,e),sn=l(e),z=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(z)!=="svelte-1hr8kfj"&&(z.innerHTML=Ns),ln=l(e),Ke=r(e,"P",{"data-svelte-h":!0}),i(Ke)!=="svelte-zaukez"&&(Ke.textContent=Ss),on=l(e),c(Oe.$$.fragment,e),rn=l(e),et=r(e,"P",{"data-svelte-h":!0}),i(et)!=="svelte-19zyhvl"&&(et.textContent=qs),pn=l(e),c(tt.$$.fragment,e),cn=l(e),c(at.$$.fragment,e),mn=l(e),nt=r(e,"P",{"data-svelte-h":!0}),i(nt)!=="svelte-3yd2l2"&&(nt.innerHTML=Gs),dn=l(e),st=r(e,"UL",{"data-svelte-h":!0}),i(st)!=="svelte-1vlmw2d"&&(st.innerHTML=Zs),hn=l(e),lt=r(e,"P",{"data-svelte-h":!0}),i(lt)!=="svelte-vl4ede"&&(lt.innerHTML=Bs),gn=l(e),c(ot.$$.fragment,e),un=l(e),rt=r(e,"P",{"data-svelte-h":!0}),i(rt)!=="svelte-3h0yn3"&&(rt.innerHTML=zs),fn=l(e),c(it.$$.fragment,e),Tn=l(e),I=r(e,"BLOCKQUOTE",{class:!0});var Nt=U(I);wt=r(Nt,"P",{"data-svelte-h":!0}),i(wt)!=="svelte-1gskers"&&(wt.innerHTML=Qs),xn=l(Nt),c(pt.$$.fragment,Nt),Nn=l(Nt),bt=r(Nt,"P",{"data-svelte-h":!0}),i(bt)!=="svelte-7mhisv"&&(bt.innerHTML=Rs),Nt.forEach(a),yn=l(e),c(ct.$$.fragment,e),_n=l(e),y=r(e,"DIV",{class:!0});var v=U(y);c(mt.$$.fragment,v),Sn=l(v),vt=r(v,"P",{"data-svelte-h":!0}),i(vt)!=="svelte-1pvil1z"&&(vt.textContent=Ls),qn=l(v),Jt=r(v,"P",{"data-svelte-h":!0}),i(Jt)!=="svelte-10vjtjm"&&(Jt.innerHTML=Ws),Gn=l(v),c(Q.$$.fragment,v),Zn=l(v),R=r(v,"DIV",{class:!0});var kn=U(R);c(dt.$$.fragment,kn),Bn=l(kn),jt=r(kn,"P",{"data-svelte-h":!0}),i(jt)!=="svelte-1cilnet"&&(jt.textContent=Es),kn.forEach(a),zn=l(v),x=r(v,"DIV",{class:!0});var St=U(x);c(ht.$$.fragment,St),Qn=l(St),kt=r(St,"P",{"data-svelte-h":!0}),i(kt)!=="svelte-r8h4ov"&&(kt.innerHTML=Hs),Rn=l(St),Ut=r(St,"P",{"data-svelte-h":!0}),i(Ut)!=="svelte-1e6bius"&&(Ut.textContent=Vs),St.forEach(a),Ln=l(v),L=r(v,"DIV",{class:!0});var Un=U(L);c(gt.$$.fragment,Un),Wn=l(Un),Ct=r(Un,"P",{"data-svelte-h":!0}),i(Ct)!=="svelte-8tudwd"&&(Ct.innerHTML=Xs),Un.forEach(a),v.forEach(a),Mn=l(e),c(ut.$$.fragment,e),wn=l(e),b=r(e,"DIV",{class:!0});var N=U(b);c(ft.$$.fragment,N),En=l(N),It=r(N,"P",{"data-svelte-h":!0}),i(It)!=="svelte-dqo6p5"&&(It.innerHTML=As),Hn=l(N),Ft=r(N,"P",{"data-svelte-h":!0}),i(Ft)!=="svelte-ay8xwh"&&(Ft.innerHTML=Ys),Vn=l(N),$t=r(N,"P",{"data-svelte-h":!0}),i($t)!=="svelte-ekuf1t"&&($t.innerHTML=Ps),Xn=l(N),Tt=r(N,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(Tt)!=="svelte-194v1o8"&&(Tt.innerHTML=Ds),N.forEach(a),bn=l(e),c(yt.$$.fragment,e),vn=l(e),qt=r(e,"P",{}),U(qt).forEach(a),this.h()},h(){w(M,"name","hf:doc:metadata"),w(M,"content",hl),al(k.src,ts="https://trl-lib-trackio.hf.space/?project=trl-documentation&metrics=train*&sidebar=hidden&runs=sft_qwen3-0.6B_capybara")||w(k,"src",ts),Pn(k,"width","100%"),Pn(k,"min-width","300px"),Pn(k,"max-width","800px"),w(k,"height","830"),w(k,"frameborder","0"),ia.a=null,ca.a=ma,da.a=ha,w(G,"class","tip"),w(Z,"class","warning"),w(B,"class","tip"),w(q,"class","tip"),w(z,"class","warning"),w(I,"class","tip"),w(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),w(Tt,"class","note"),w(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){u(document.head,M),n(e,E,t),n(e,F,t),n(e,J,t),m(j,e,t),n(e,T,t),m(C,e,t),n(e,Qt,t),n(e,H,t),n(e,Rt,t),m(V,e,t),n(e,Lt,t),n(e,X,t),n(e,Wt,t),n(e,A,t),n(e,Et,t),m(Y,e,t),n(e,Ht,t),n(e,P,t),n(e,Vt,t),m(D,e,t),n(e,Xt,t),n(e,k,t),n(e,At,t),m(K,e,t),n(e,Yt,t),n(e,O,t),n(e,Pt,t),m(ee,e,t),n(e,Dt,t),n(e,te,t),n(e,Kt,t),m(ae,e,t),n(e,Ot,t),m(ne,e,t),n(e,ea,t),m(se,e,t),n(e,ta,t),n(e,le,t),n(e,aa,t),n(e,oe,t),n(e,na,t),m(re,e,t),n(e,sa,t),n(e,ie,t),n(e,la,t),m(pe,e,t),n(e,oa,t),n(e,ce,t),n(e,ra,t),n(e,S,t),u(S,Cn),u(S,_t),u(S,In),ia.m(Ks,S),n(e,pa,t),n(e,$,t),u($,Fn),ca.m(Os,$),u($,ma),da.m(el,$),u($,ha),n(e,ga,t),n(e,G,t),n(e,ua,t),m(me,e,t),n(e,fa,t),n(e,de,t),n(e,Ta,t),m(he,e,t),n(e,ya,t),n(e,ge,t),n(e,_a,t),n(e,ue,t),n(e,Ma,t),m(fe,e,t),n(e,wa,t),m(Te,e,t),n(e,ba,t),n(e,ye,t),n(e,va,t),m(_e,e,t),n(e,Ja,t),n(e,Me,t),n(e,ja,t),m(we,e,t),n(e,ka,t),n(e,be,t),n(e,Ua,t),m(ve,e,t),n(e,Ca,t),n(e,Je,t),n(e,Ia,t),m(je,e,t),n(e,Fa,t),n(e,ke,t),n(e,$a,t),m(Ue,e,t),n(e,xa,t),n(e,Ce,t),n(e,Na,t),m(Ie,e,t),n(e,Sa,t),n(e,Fe,t),n(e,qa,t),n(e,Z,t),n(e,Ga,t),m($e,e,t),n(e,Za,t),n(e,xe,t),n(e,Ba,t),m(Ne,e,t),n(e,za,t),n(e,Se,t),n(e,Qa,t),n(e,B,t),n(e,Ra,t),m(qe,e,t),n(e,La,t),n(e,Ge,t),n(e,Wa,t),m(Ze,e,t),n(e,Ea,t),n(e,Be,t),n(e,Ha,t),m(ze,e,t),n(e,Va,t),n(e,q,t),u(q,Mt),u(q,$n),m(Qe,q,null),n(e,Xa,t),m(Re,e,t),n(e,Aa,t),n(e,Le,t),n(e,Ya,t),m(We,e,t),n(e,Pa,t),n(e,Ee,t),n(e,Da,t),m(He,e,t),n(e,Ka,t),n(e,Ve,t),n(e,Oa,t),m(Xe,e,t),n(e,en,t),n(e,Ae,t),n(e,tn,t),n(e,Ye,t),n(e,an,t),n(e,Pe,t),n(e,nn,t),m(De,e,t),n(e,sn,t),n(e,z,t),n(e,ln,t),n(e,Ke,t),n(e,on,t),m(Oe,e,t),n(e,rn,t),n(e,et,t),n(e,pn,t),m(tt,e,t),n(e,cn,t),m(at,e,t),n(e,mn,t),n(e,nt,t),n(e,dn,t),n(e,st,t),n(e,hn,t),n(e,lt,t),n(e,gn,t),m(ot,e,t),n(e,un,t),n(e,rt,t),n(e,fn,t),m(it,e,t),n(e,Tn,t),n(e,I,t),u(I,wt),u(I,xn),m(pt,I,null),u(I,Nn),u(I,bt),n(e,yn,t),m(ct,e,t),n(e,_n,t),n(e,y,t),m(mt,y,null),u(y,Sn),u(y,vt),u(y,qn),u(y,Jt),u(y,Gn),m(Q,y,null),u(y,Zn),u(y,R),m(dt,R,null),u(R,Bn),u(R,jt),u(y,zn),u(y,x),m(ht,x,null),u(x,Qn),u(x,kt),u(x,Rn),u(x,Ut),u(y,Ln),u(y,L),m(gt,L,null),u(L,Wn),u(L,Ct),n(e,Mn,t),m(ut,e,t),n(e,wn,t),n(e,b,t),m(ft,b,null),u(b,En),u(b,It),u(b,Hn),u(b,Ft),u(b,Vn),u(b,$t),u(b,Xn),u(b,Tt),n(e,bn,t),m(yt,e,t),n(e,vn,t),n(e,qt,t),Jn=!0},p(e,[t]){const W={};t&2&&(W.$$scope={dirty:t,ctx:e}),Q.$set(W)},i(e){Jn||(d(j.$$.fragment,e),d(C.$$.fragment,e),d(V.$$.fragment,e),d(Y.$$.fragment,e),d(D.$$.fragment,e),d(K.$$.fragment,e),d(ee.$$.fragment,e),d(ae.$$.fragment,e),d(ne.$$.fragment,e),d(se.$$.fragment,e),d(re.$$.fragment,e),d(pe.$$.fragment,e),d(me.$$.fragment,e),d(he.$$.fragment,e),d(fe.$$.fragment,e),d(Te.$$.fragment,e),d(_e.$$.fragment,e),d(we.$$.fragment,e),d(ve.$$.fragment,e),d(je.$$.fragment,e),d(Ue.$$.fragment,e),d(Ie.$$.fragment,e),d($e.$$.fragment,e),d(Ne.$$.fragment,e),d(qe.$$.fragment,e),d(Ze.$$.fragment,e),d(ze.$$.fragment,e),d(Qe.$$.fragment,e),d(Re.$$.fragment,e),d(We.$$.fragment,e),d(He.$$.fragment,e),d(Xe.$$.fragment,e),d(De.$$.fragment,e),d(Oe.$$.fragment,e),d(tt.$$.fragment,e),d(at.$$.fragment,e),d(ot.$$.fragment,e),d(it.$$.fragment,e),d(pt.$$.fragment,e),d(ct.$$.fragment,e),d(mt.$$.fragment,e),d(Q.$$.fragment,e),d(dt.$$.fragment,e),d(ht.$$.fragment,e),d(gt.$$.fragment,e),d(ut.$$.fragment,e),d(ft.$$.fragment,e),d(yt.$$.fragment,e),Jn=!0)},o(e){h(j.$$.fragment,e),h(C.$$.fragment,e),h(V.$$.fragment,e),h(Y.$$.fragment,e),h(D.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(ae.$$.fragment,e),h(ne.$$.fragment,e),h(se.$$.fragment,e),h(re.$$.fragment,e),h(pe.$$.fragment,e),h(me.$$.fragment,e),h(he.$$.fragment,e),h(fe.$$.fragment,e),h(Te.$$.fragment,e),h(_e.$$.fragment,e),h(we.$$.fragment,e),h(ve.$$.fragment,e),h(je.$$.fragment,e),h(Ue.$$.fragment,e),h(Ie.$$.fragment,e),h($e.$$.fragment,e),h(Ne.$$.fragment,e),h(qe.$$.fragment,e),h(Ze.$$.fragment,e),h(ze.$$.fragment,e),h(Qe.$$.fragment,e),h(Re.$$.fragment,e),h(We.$$.fragment,e),h(He.$$.fragment,e),h(Xe.$$.fragment,e),h(De.$$.fragment,e),h(Oe.$$.fragment,e),h(tt.$$.fragment,e),h(at.$$.fragment,e),h(ot.$$.fragment,e),h(it.$$.fragment,e),h(pt.$$.fragment,e),h(ct.$$.fragment,e),h(mt.$$.fragment,e),h(Q.$$.fragment,e),h(dt.$$.fragment,e),h(ht.$$.fragment,e),h(gt.$$.fragment,e),h(ut.$$.fragment,e),h(ft.$$.fragment,e),h(yt.$$.fragment,e),Jn=!1},d(e){e&&(a(E),a(F),a(J),a(T),a(Qt),a(H),a(Rt),a(Lt),a(X),a(Wt),a(A),a(Et),a(Ht),a(P),a(Vt),a(Xt),a(k),a(At),a(Yt),a(O),a(Pt),a(Dt),a(te),a(Kt),a(Ot),a(ea),a(ta),a(le),a(aa),a(oe),a(na),a(sa),a(ie),a(la),a(oa),a(ce),a(ra),a(S),a(pa),a($),a(ga),a(G),a(ua),a(fa),a(de),a(Ta),a(ya),a(ge),a(_a),a(ue),a(Ma),a(wa),a(ba),a(ye),a(va),a(Ja),a(Me),a(ja),a(ka),a(be),a(Ua),a(Ca),a(Je),a(Ia),a(Fa),a(ke),a($a),a(xa),a(Ce),a(Na),a(Sa),a(Fe),a(qa),a(Z),a(Ga),a(Za),a(xe),a(Ba),a(za),a(Se),a(Qa),a(B),a(Ra),a(La),a(Ge),a(Wa),a(Ea),a(Be),a(Ha),a(Va),a(q),a(Xa),a(Aa),a(Le),a(Ya),a(Pa),a(Ee),a(Da),a(Ka),a(Ve),a(Oa),a(en),a(Ae),a(tn),a(Ye),a(an),a(Pe),a(nn),a(sn),a(z),a(ln),a(Ke),a(on),a(rn),a(et),a(pn),a(cn),a(mn),a(nt),a(dn),a(st),a(hn),a(lt),a(gn),a(un),a(rt),a(fn),a(Tn),a(I),a(yn),a(_n),a(y),a(Mn),a(wn),a(b),a(bn),a(vn),a(qt)),a(M),g(j,e),g(C,e),g(V,e),g(Y,e),g(D,e),g(K,e),g(ee,e),g(ae,e),g(ne,e),g(se,e),g(re,e),g(pe,e),g(me,e),g(he,e),g(fe,e),g(Te,e),g(_e,e),g(we,e),g(ve,e),g(je,e),g(Ue,e),g(Ie,e),g($e,e),g(Ne,e),g(qe,e),g(Ze,e),g(ze,e),g(Qe),g(Re,e),g(We,e),g(He,e),g(Xe,e),g(De,e),g(Oe,e),g(tt,e),g(at,e),g(ot,e),g(it,e),g(pt),g(ct,e),g(mt),g(Q),g(dt),g(ht),g(gt),g(ut,e),g(ft),g(yt,e)}}}const hl='{"title":"SFT Trainer","local":"sft-trainer","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"Quick start","local":"quick-start","sections":[],"depth":2},{"title":"Expected dataset type and format","local":"expected-dataset-type-and-format","sections":[],"depth":2},{"title":"Looking deeper into the SFT method","local":"looking-deeper-into-the-sft-method","sections":[{"title":"Preprocessing and tokenization","local":"preprocessing-and-tokenization","sections":[],"depth":3},{"title":"Computing the loss","local":"computing-the-loss","sections":[],"depth":3},{"title":"Label shifting and masking","local":"label-shifting-and-masking","sections":[],"depth":3}],"depth":2},{"title":"Logged metrics","local":"logged-metrics","sections":[],"depth":2},{"title":"Customization","local":"customization","sections":[{"title":"Model initialization","local":"model-initialization","sections":[],"depth":3},{"title":"Packing","local":"packing","sections":[],"depth":3},{"title":"Train on assistant messages only","local":"train-on-assistant-messages-only","sections":[],"depth":3},{"title":"Train on completion only","local":"train-on-completion-only","sections":[],"depth":3},{"title":"Train adapters with PEFT","local":"train-adapters-with-peft","sections":[],"depth":3},{"title":"Train with Liger Kernel","local":"train-with-liger-kernel","sections":[],"depth":3},{"title":"Rapid Experimentation for SFT","local":"rapid-experimentation-for-sft","sections":[],"depth":3},{"title":"Train with Unsloth","local":"train-with-unsloth","sections":[],"depth":3}],"depth":2},{"title":"Instruction tuning example","local":"instruction-tuning-example","sections":[],"depth":2},{"title":"Tool Calling with SFT","local":"tool-calling-with-sft","sections":[],"depth":2},{"title":"Training Vision Language Models","local":"training-vision-language-models","sections":[],"depth":2},{"title":"SFTTrainer","local":"trl.SFTTrainer","sections":[],"depth":2},{"title":"SFTConfig","local":"trl.SFTConfig","sections":[],"depth":2}],"depth":1}';function gl(zt){return nl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class wl extends ll{constructor(M){super(),ol(this,M,gl,dl,tl,{})}}export{wl as component}; | |
Xet Storage Details
- Size:
- 98.6 kB
- Xet hash:
- 5a7e15c4af2034f4000e681108e4499ede781d2cc288c7d93929e3488b35ee4f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.