Buckets:

HuggingFaceDocBuilder's picture
download
raw
57.3 kB
import{s as Jn,n as Un,o as kn}from"../chunks/scheduler.7b731bd4.js";import{S as jn,i as Cn,e as s,s as l,c as m,h as Ln,a as r,d as n,b as a,f as k,g as d,j as i,k as M,l as g,m as o,n as c,t as p,o as u,p as _}from"../chunks/index.cc268345.js";import{C as Nn,H as y,E as $n}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{D as Ge}from"../chunks/Docstring.03f7b462.js";import{C as we}from"../chunks/CodeBlock.169a125f.js";function Gn(Ht){let T,Fe,Ie,qe,j,Ze,C,Ee,L,Rt='<a href="https://huggingface.co/models?other=sft,gold" rel="nofollow"><img src="https://img.shields.io/badge/All_models-GOLD-blue" alt="All_models-GOLD-blue"/></a>',ze,N,We,$,Qt=`General Online Logit Distillation (GOLD) is an extension of Universal Logit Distillation (ULD) that supports
student/teacher pairs with different tokenizers. It aligns the textual spans produced by both tokenizers and merges the
associated logits so no completion tokens are dropped. This enables cross-tokenizer knowledge distillation, including
mixed model families (for example, LLaMA students with Qwen teachers).`,Oe,G,Xt="Key capabilities:",Ae,I,Vt='<li><strong>Cross-tokenizer alignment</strong> – GOLD incrementally decodes the student and teacher tokens, groups passages with the same visible text, and merges probabilities inside each group. This guarantees loss terms are computed over the full completion even when token boundaries differ.</li> <li><strong>Hybrid ULD loss</strong> – when <code>uld_use_hybrid_loss</code> is enabled, GOLD compares exact vocabulary matches directly and falls back to the original sorted-probability ULD loss for unmatched tokens. This improves stability for students whose vocabularies only partially overlap with the teacher.</li> <li><strong>Seamless integration with GKD</strong> – GOLD inherits the on-policy vs. off-policy scheduling from the <a href="/docs/trl/pr_5607/en/gkd_trainer#trl.experimental.gkd.GKDTrainer">experimental.gkd.GKDTrainer</a>, so you can combine sequence-level KD, generalized JSD, and cross-tokenizer distillation in a single training run.</li>',He,w,St="<p>GOLD is currently part of the <code>trl.experimental</code> namespace. APIs may change without notice while the feature is iterated on.</p>",Re,D,Qe,F,Bt=`The <code>GOLDTrainer</code> subclasses <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a> and accepts the same datasets as other TRL trainers (lists of ChatML style
messages). Important configuration flags on <code>GOLDConfig</code> include:`,Xe,q,Pt=`<li><code>use_uld_loss</code> – toggles Universal Logit Distillation. Set this to <code>True</code> for cross-tokenizer setups.</li> <li><code>teacher_tokenizer_name_or_path</code> – required when <code>use_uld_loss=True</code>; GOLD uses the teacher tokenizer to align tokens.</li> <li><code>uld_use_hybrid_loss</code>, <code>uld_hybrid_matched_weight</code>, <code>uld_hybrid_unmatched_weight</code> – enables and weights the hybrid
matched/unmatched loss.</li> <li><code>beta</code>, <code>lmbda</code>, <code>seq_kd</code> – inherited from <a href="/docs/trl/pr_5607/en/gkd_trainer#trl.experimental.gkd.GKDConfig">experimental.gkd.GKDConfig</a>, controlling the generalized JSD interpolation and on-policy
sampling ratio.</li> <li><code>num_generations</code>, <code>generation_batch_size</code> – control buffered rollout generation across gradient accumulation windows.
<code>generation_batch_size</code> is the number of unique prompts per worker per optimizer step.</li> <li><code>model_revision</code> – controls which student model revision GOLD loads for training and generation.</li>`,Ve,Z,Yt="A minimal end-to-end example:",Se,E,Be,z,Kt="For quick-start workflows you can rely on string identifiers as shown above—the trainer will load the model and tokenizer for you. Explicitly instantiating <code>AutoModelForCausalLM</code>, <code>AutoTokenizer</code>, or populating <code>GOLDConfig</code> is recommended only for advanced use cases where you need fine-grained control over initialization.",Pe,W,en="A more explicit setup might look like this when you need to customise model loading, tokenizer settings, or training arguments:",Ye,O,Ke,x,tn=`<p>GOLD buffers one full optimizer-window generation batch (<code>per_device_train_batch_size * gradient_accumulation_steps</code>)
and reuses it across accumulation steps. If the final batch is undersized, GOLD warns and drops that last batch
(<code>Dropping last batch due to unexpected batch size</code>). Set <code>dataloader_drop_last=True</code> to avoid this warning.</p>`,et,A,tt,H,nn='GOLD requires a <a href="dataset_formats#conversational">conversational</a> <a href="dataset_formats#language_modeling">language modeling</a> dataset, e.g.:',nt,R,ot,Q,on=`<code>GOLDTrainer</code> keeps the raw messages so the ChatML collator can construct prompts and completions with the correct
boundaries.`,lt,X,at,V,ln="When student and teacher use different tokenizers, the same text may be split differently:",st,S,an="<li><strong>Student</strong>: <code>&quot;Hugging Face&quot;</code> → 1 token</li> <li><strong>Teacher</strong>: <code>&quot;Hugging&quot;</code>, <code>&quot; Face&quot;</code> → 2 tokens</li>",rt,B,sn="GOLD aligns these sequences and merges the teacher’s multi-token probabilities into a single distribution that can be compared with the student’s single-token distribution.",it,P,mt,Y,rn="For a teacher sequence of tokens <code>[token₀, token₁, ..., tokenₖ]</code> that maps to a single student token, GOLD computes:",dt,K,ct,ee,mn="where:",pt,te,dn="<li><code>P(y | context)</code> is the marginal probability distribution over all vocabulary tokens at the first position</li> <li><code>P(tokenᵢ | ..., context)</code> are <strong>scalar</strong> conditional probabilities of the actual tokens that were generated</li>",ut,ne,cn="<strong>Key insight</strong>: Only the conditional probabilities of the <strong>actual continuation tokens</strong> are extracted as scalars. The full marginal distribution at the first position is then scaled by multiplying these scalar probabilities.",_t,oe,pn="This ensures:",gt,le,un="<li><strong>Correct joint probability</strong> for the actual generated sequence (by the chain rule)</li> <li><strong>Reasonable approximation</strong> for counterfactual tokens (scaled by the same continuation likelihood)</li> <li><strong>Unnormalized distributions</strong> that preserve the correct relative probabilities for ULD loss computation</li>",ht,ae,ft,se,_n="Given:",vt,re,bt,ie,gn="If tokens 0 and 1 are merged, and the actual sequence was <code>[&quot;HF&quot;, &quot;is&quot;]</code>:",Mt,me,yt,de,hn="The merged distribution is unnormalized (sums to 0.81), but this is intentional and correct for ULD loss computation, which uses sorting and L1 distance.",Tt,ce,wt,pe,fn='Use <a href="https://github.com/huggingface/trl/blob/main/trl/experimental/gold/gold.py" rel="nofollow"><code>trl/experimental/gold/gold.py</code></a> to launch GOLD training from the command line. The script supports full training and LoRA via the standard <code>ModelConfig</code> flags.',xt,ue,Jt,_e,Ut,h,ge,It,J,he,Dt,xe,vn="Main training entry point.",Ft,Je,fe,qt,v,ve,Zt,Ue,bn="Will save the model, so you can reload it using <code>from_pretrained()</code>.",Et,ke,Mn="Will only save from the main process.",zt,U,be,Wt,je,yn="Upload <code>self.model</code> and <code>self.processing_class</code> to the 🤗 model hub on the repo <code>self.args.hub_model_id</code>.",kt,Me,jt,f,ye,Ot,Ce,Tn="Configuration class for <code>GOLDTrainer</code>.",At,Le,wn=`This class includes only the parameters that are specific to GOLD training. For a full list of training arguments,
please refer to the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a> and <a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTConfig">SFTConfig</a> documentation.`,Ct,Te,Lt,De,Nt;return j=new Nn({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),C=new y({props:{title:"General Online Logit Distillation (GOLD) Trainer",local:"general-online-logit-distillation-gold-trainer",headingTag:"h1"}}),N=new y({props:{title:"Overview",local:"overview",headingTag:"h2"}}),D=new y({props:{title:"Usage tips",local:"usage-tips",headingTag:"h2"}}),E=new we({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZnJvbSUyMHRybC5leHBlcmltZW50YWwuZ29sZCUyMGltcG9ydCUyMEdPTERDb25maWclMkMlMjBHT0xEVHJhaW5lciUwQSUwQXRyYWluX2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTBBJTIwJTIwJTIwJTIwJTIySHVnZ2luZ0ZhY2VUQiUyRk9wZW5SMS1NYXRoLTIyMGstZGVmYXVsdC12ZXJpZmllZCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMmFsbCUyMiUyQyUwQSUyMCUyMCUyMCUyMHNwbGl0JTNEJTIydHJhaW4lNUIlM0ExMDI0JTVEJTIyJTJDJTBBKSUwQSUwQXRyYWluZXIlMjAlM0QlMjBHT0xEVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMm1ldGEtbGxhbWElMkZMbGFtYS0zLjItMUItSW5zdHJ1Y3QlMjIlMkMlMEElMjAlMjAlMjAlMjB0ZWFjaGVyX21vZGVsJTNEJTIyUXdlbiUyRlF3ZW4yLjUtMC41Qi1JbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMGFyZ3MlM0RHT0xEQ29uZmlnKG91dHB1dF9kaXIlM0QlMjJnb2xkLW1vZGVsJTIyJTJDJTIwdXNlX3VsZF9sb3NzJTNEVHJ1ZSUyQyUyMHRlYWNoZXJfdG9rZW5pemVyX25hbWVfb3JfcGF0aCUzRCUyMlF3ZW4lMkZRd2VuMi41LTAuNUItSW5zdHJ1Y3QlMjIpJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRHRyYWluX2RhdGFzZXQlMkMlMEEpJTBBdHJhaW5lci50cmFpbigp",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-keyword">from</span> trl.experimental.gold <span class="hljs-keyword">import</span> GOLDConfig, GOLDTrainer
train_dataset = load_dataset(
<span class="hljs-string">&quot;HuggingFaceTB/OpenR1-Math-220k-default-verified&quot;</span>,
<span class="hljs-string">&quot;all&quot;</span>,
split=<span class="hljs-string">&quot;train[:1024]&quot;</span>,
)
trainer = GOLDTrainer(
model=<span class="hljs-string">&quot;meta-llama/Llama-3.2-1B-Instruct&quot;</span>,
teacher_model=<span class="hljs-string">&quot;Qwen/Qwen2.5-0.5B-Instruct&quot;</span>,
args=GOLDConfig(output_dir=<span class="hljs-string">&quot;gold-model&quot;</span>, use_uld_loss=<span class="hljs-literal">True</span>, teacher_tokenizer_name_or_path=<span class="hljs-string">&quot;Qwen/Qwen2.5-0.5B-Instruct&quot;</span>),
train_dataset=train_dataset,
)
trainer.train()`,wrap:!1}}),O=new we({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZnJvbSUyMHRybCUyMGltcG9ydCUyMEdPTERDb25maWclMkMlMjBHT0xEVHJhaW5lciUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTSUyQyUyMEF1dG9Ub2tlbml6ZXIlMEElMEFzdHVkZW50X25hbWUlMjAlM0QlMjAlMjJtZXRhLWxsYW1hJTJGTGxhbWEtMy4yLTFCLUluc3RydWN0JTIyJTBBdGVhY2hlcl9uYW1lJTIwJTNEJTIwJTIyUXdlbiUyRlF3ZW4yLjUtMC41Qi1JbnN0cnVjdCUyMiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKHN0dWRlbnRfbmFtZSklMEFpZiUyMHRva2VuaXplci5wYWRfdG9rZW4lMjBpcyUyME5vbmUlM0ElMEElMjAlMjAlMjAlMjB0b2tlbml6ZXIucGFkX3Rva2VuJTIwJTNEJTIwdG9rZW5pemVyLmVvc190b2tlbiUwQSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKHN0dWRlbnRfbmFtZSklMEF0ZWFjaGVyX21vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yQ2F1c2FsTE0uZnJvbV9wcmV0cmFpbmVkKHRlYWNoZXJfbmFtZSklMEElMEF0cmFpbl9kYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMCUyMkh1Z2dpbmdGYWNlVEIlMkZDb3VudGRvd24tVGFzay1HT0xEJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIydmVyaWZpZWRfUXdlbjIuNS0wLjVCLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiUyQyUwQSklMEElMEF0cmFpbmluZ19hcmdzJTIwJTNEJTIwR09MRENvbmZpZyglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEJTIyZ29sZC1tb2RlbCUyMiUyQyUwQSUyMCUyMCUyMCUyMHBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUzRDElMkMlMEElMjAlMjAlMjAlMjB0ZWFjaGVyX21vZGVsX25hbWVfb3JfcGF0aCUzRHRlYWNoZXJfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMHRlYWNoZXJfdG9rZW5pemVyX25hbWVfb3JfcGF0aCUzRHRlYWNoZXJfbmFtZSUyQyUwQSUyMCUyMCUyMCUyMHVzZV91bGRfbG9zcyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjB1bGRfdXNlX2h5YnJpZF9sb3NzJTNEVHJ1ZSUyQyUwQSklMEElMEF0cmFpbmVyJTIwJTNEJTIwR09MRFRyYWluZXIoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0Rtb2RlbCUyQyUwQSUyMCUyMCUyMCUyMHRlYWNoZXJfbW9kZWwlM0R0ZWFjaGVyX21vZGVsJTJDJTBBJTIwJTIwJTIwJTIwYXJncyUzRHRyYWluaW5nX2FyZ3MlMkMlMEElMjAlMjAlMjAlMjBwcm9jZXNzaW5nX2NsYXNzJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRHRyYWluX2RhdGFzZXQlMkMlMEEpJTBBdHJhaW5lci50cmFpbigp",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> GOLDConfig, GOLDTrainer
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
student_name = <span class="hljs-string">&quot;meta-llama/Llama-3.2-1B-Instruct&quot;</span>
teacher_name = <span class="hljs-string">&quot;Qwen/Qwen2.5-0.5B-Instruct&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(student_name)
<span class="hljs-keyword">if</span> tokenizer.pad_token <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(student_name)
teacher_model = AutoModelForCausalLM.from_pretrained(teacher_name)
train_dataset = load_dataset(
<span class="hljs-string">&quot;HuggingFaceTB/Countdown-Task-GOLD&quot;</span>,
<span class="hljs-string">&quot;verified_Qwen2.5-0.5B-Instruct&quot;</span>,
split=<span class="hljs-string">&quot;train&quot;</span>,
)
training_args = GOLDConfig(
output_dir=<span class="hljs-string">&quot;gold-model&quot;</span>,
per_device_train_batch_size=<span class="hljs-number">1</span>,
teacher_model_name_or_path=teacher_name,
teacher_tokenizer_name_or_path=teacher_name,
use_uld_loss=<span class="hljs-literal">True</span>,
uld_use_hybrid_loss=<span class="hljs-literal">True</span>,
)
trainer = GOLDTrainer(
model=model,
teacher_model=teacher_model,
args=training_args,
processing_class=tokenizer,
train_dataset=train_dataset,
)
trainer.train()`,wrap:!1}}),A=new y({props:{title:"Expected dataset type",local:"expected-dataset-type",headingTag:"h3"}}),R=new we({props:{code:"JTdCJTIybWVzc2FnZXMlMjIlM0ElMjAlNUIlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGF0JTIwY29sb3IlMjBpcyUyMHRoZSUyMHNreSUzRiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJhc3Npc3RhbnQlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIySXQlMjBpcyUyMGJsdWUuJTIyJTdEJTVEJTdE",highlighted:`{<span class="hljs-string">&quot;messages&quot;</span>: [{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;It is blue.&quot;</span>}]}`,wrap:!1}}),X=new y({props:{title:"How Token Merging Works",local:"how-token-merging-works",headingTag:"h2"}}),P=new y({props:{title:"Probability Merging",local:"probability-merging",headingTag:"h3"}}),K=new we({props:{code:"UF9tZXJnZWQoeSklMjAlM0QlMjBQKHklMjAlN0MlMjBjb250ZXh0KSUyMCVDMyU5NyUyMFAodG9rZW4lRTIlODIlODElMjAlN0MlMjB0b2tlbiVFMiU4MiU4MCUyQyUyMGNvbnRleHQpJTIwJUMzJTk3JTIwLi4uJTIwJUMzJTk3JTIwUCh0b2tlbiVFMiU4MiU5NiUyMCU3QyUyMC4uLiUyQyUyMGNvbnRleHQp",highlighted:'<span class="hljs-constructor">P_merged(<span class="hljs-params">y</span>)</span> = <span class="hljs-constructor">P(<span class="hljs-params">y</span> | <span class="hljs-params">context</span>)</span> × <span class="hljs-constructor">P(<span class="hljs-params">token</span>₁ | <span class="hljs-params">token</span>₀, <span class="hljs-params">context</span>)</span> ×<span class="hljs-operator"> ... </span>× <span class="hljs-constructor">P(<span class="hljs-params">token</span>ₖ | <span class="hljs-operator">...</span>, <span class="hljs-params">context</span>)</span>',wrap:!1}}),ae=new y({props:{title:"Example",local:"example",headingTag:"h3"}}),re=new we({props:{code:"UCh4JUUyJTgyJTgwKSUzQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1QiUyMkhGJTIyJTNBJTIwMC42JTJDJTIwJTIwJTIyaXMlMjIlM0ElMjAwLjMlMkMlMjAlMjAlMjJjb29sJTIyJTNBJTIwMC4xJTVEJTBBUCh4JUUyJTgyJTgxJTIwJTdDJTIwJTIySEYlMjIpJTNBJTIwJTIwJTVCJTIySEYlMjIlM0ElMjAwLjA1JTJDJTIwJTIyaXMlMjIlM0ElMjAwLjklMkMlMjAlMjAlMjJjb29sJTIyJTNBJTIwMC4wNSU1RA==",highlighted:`<span class="hljs-function"><span class="hljs-title">P</span><span class="hljs-params">(x₀)</span></span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;HF&quot;</span>: 0.6, <span class="hljs-string">&quot;is&quot;</span>: 0.3, <span class="hljs-string">&quot;cool&quot;</span>: 0.1]</span>
<span class="hljs-function"><span class="hljs-title">P</span><span class="hljs-params">(x₁ | <span class="hljs-string">&quot;HF&quot;</span>)</span></span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;HF&quot;</span>: 0.05, <span class="hljs-string">&quot;is&quot;</span>: 0.9, <span class="hljs-string">&quot;cool&quot;</span>: 0.05]</span>`,wrap:!1}}),me=new we({props:{code:"UF9tZXJnZWQoJTIySEYlMjIpJTIwJTIwJTIwJTNEJTIwMC42JTIwJUMzJTk3JTIwMC45JTIwJTNEJTIwMC41NCUyMCUyMCVFMiU5QyU5MyUyMChjb3JyZWN0JTIwam9pbnQlMjBwcm9iYWJpbGl0eSklMEFQX21lcmdlZCglMjJpcyUyMiklMjAlMjAlMjAlM0QlMjAwLjMlMjAlQzMlOTclMjAwLjklMjAlM0QlMjAwLjI3JTBBUF9tZXJnZWQoJTIyY29vbCUyMiklMjAlM0QlMjAwLjElMjAlQzMlOTclMjAwLjklMjAlM0QlMjAwLjA5",highlighted:`<span class="hljs-attribute">P_merged</span>(<span class="hljs-string">&quot;HF&quot;</span>) = <span class="hljs-number">0</span>.<span class="hljs-number">6</span> × <span class="hljs-number">0</span>.<span class="hljs-number">9</span> = <span class="hljs-number">0</span>.<span class="hljs-number">54</span> ✓ (correct joint probability)
<span class="hljs-attribute">P_merged</span>(<span class="hljs-string">&quot;is&quot;</span>) = <span class="hljs-number">0</span>.<span class="hljs-number">3</span> × <span class="hljs-number">0</span>.<span class="hljs-number">9</span> = <span class="hljs-number">0</span>.<span class="hljs-number">27</span>
<span class="hljs-attribute">P_merged</span>(<span class="hljs-string">&quot;cool&quot;</span>) = <span class="hljs-number">0</span>.<span class="hljs-number">1</span> × <span class="hljs-number">0</span>.<span class="hljs-number">9</span> = <span class="hljs-number">0</span>.<span class="hljs-number">09</span>`,wrap:!1}}),ce=new y({props:{title:"Example script",local:"example-script",headingTag:"h2"}}),ue=new we({props:{code:"cHl0aG9uJTIwdHJsJTJGZXhwZXJpbWVudGFsJTJGZ29sZCUyRmdvbGQucHklMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1vZGVsX25hbWVfb3JfcGF0aCUyMG1ldGEtbGxhbWElMkZMbGFtYS0zLjItMUItSW5zdHJ1Y3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXRlYWNoZXJfbW9kZWxfbmFtZV9vcl9wYXRoJTIwUXdlbiUyRlF3ZW4yLTEuNUItSW5zdHJ1Y3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWRhdGFzZXRfbmFtZSUyMHRybC1saWIlMkZjaGF0Ym90X2FyZW5hX2NvbXBsZXRpb25zJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1sZWFybmluZ19yYXRlJTIwMmUtNSUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTIwNCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTIwOCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tb3V0cHV0X2RpciUyMGdvbGQtbW9kZWwlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW51bV90cmFpbl9lcG9jaHMlMjAxJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1wdXNoX3RvX2h1Yg==",highlighted:`python trl/experimental/gold/gold.py \\
--model_name_or_path meta-llama/Llama-3.2-1B-Instruct \\
--teacher_model_name_or_path Qwen/Qwen2-1.5B-Instruct \\
--dataset_name trl-lib/chatbot_arena_completions \\
--learning_rate 2e-5 \\
--per_device_train_batch_size 4 \\
--gradient_accumulation_steps 8 \\
--output_dir gold-model \\
--num_train_epochs 1 \\
--push_to_hub`,wrap:!1}}),_e=new y({props:{title:"GOLDTrainer",local:"trl.experimental.gold.GOLDTrainer",headingTag:"h2"}}),ge=new Ge({props:{name:"class trl.experimental.gold.GOLDTrainer",anchor:"trl.experimental.gold.GOLDTrainer",parameters:[{name:"model",val:": transformers.modeling_utils.PreTrainedModel | torch.nn.modules.module.Module | str | None = None"},{name:"teacher_model",val:": transformers.modeling_utils.PreTrainedModel | torch.nn.modules.module.Module | str = None"},{name:"args",val:": trl.experimental.gold.gold_config.GOLDConfig | None = None"},{name:"data_collator",val:": collections.abc.Callable[[list[typing.Any]], dict[str, typing.Any]] | None = None"},{name:"train_dataset",val:": datasets.arrow_dataset.Dataset | None = None"},{name:"eval_dataset",val:": datasets.arrow_dataset.Dataset | dict[str, datasets.arrow_dataset.Dataset] | None = None"},{name:"processing_class",val:": transformers.tokenization_utils_base.PreTrainedTokenizerBase | transformers.image_processing_utils.BaseImageProcessor | transformers.feature_extraction_utils.FeatureExtractionMixin | transformers.processing_utils.ProcessorMixin | None = None"},{name:"compute_metrics",val:": collections.abc.Callable[[transformers.trainer_utils.EvalPrediction], dict] | None = None"},{name:"callbacks",val:": list[transformers.trainer_callback.TrainerCallback] | None = None"},{name:"optimizers",val:": tuple = (None, None)"},{name:"preprocess_logits_for_metrics",val:": collections.abc.Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None"},{name:"peft_config",val:": typing.Optional[ForwardRef('PeftConfig')] = None"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/experimental/gold/gold_trainer.py#L739"}}),he=new Ge({props:{name:"train",anchor:"trl.experimental.gold.GOLDTrainer.train",parameters:[{name:"resume_from_checkpoint",val:": str | bool | None = None"},{name:"trial",val:": optuna.Trial | dict[str, Any] | None = None"},{name:"ignore_keys_for_eval",val:": list[str] | None = None"}],parametersDescription:[{anchor:"trl.experimental.gold.GOLDTrainer.train.resume_from_checkpoint",description:`<strong>resume_from_checkpoint</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) &#x2014;
If a <code>str</code>, local path to a saved checkpoint as saved by a previous instance of <code>Trainer</code>. If a
<code>bool</code> and equals <code>True</code>, load the last checkpoint in <em>args.output_dir</em> as saved by a previous instance
of <code>Trainer</code>. If present, training will resume from the model/optimizer/scheduler states loaded here.`,name:"resume_from_checkpoint"},{anchor:"trl.experimental.gold.GOLDTrainer.train.trial",description:`<strong>trial</strong> (<code>optuna.Trial</code> or <code>dict[str, Any]</code>, <em>optional</em>) &#x2014;
The trial run or the hyperparameter dictionary for hyperparameter search.`,name:"trial"},{anchor:"trl.experimental.gold.GOLDTrainer.train.ignore_keys_for_eval",description:`<strong>ignore_keys_for_eval</strong> (<code>list[str]</code>, <em>optional</em>) &#x2014;
A list of keys in the output of your model (if it is a dictionary) that should be ignored when
gathering predictions for evaluation during the training.`,name:"ignore_keys_for_eval"}],source:"https://github.com/huggingface/trl/blob/vr_5607/transformers/trainer.py#L1323",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>Object containing the global step count, training loss, and metrics.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>~trainer_utils.TrainOutput</code></p>
`}}),fe=new Ge({props:{name:"generate_on_policy_outputs",anchor:"trl.experimental.gold.GOLDTrainer.generate_on_policy_outputs",parameters:[{name:"model",val:""},{name:"inputs",val:""},{name:"generation_config",val:""},{name:"pad_token_id",val:" = None"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/experimental/gold/gold_trainer.py#L1870"}}),ve=new Ge({props:{name:"save_model",anchor:"trl.experimental.gold.GOLDTrainer.save_model",parameters:[{name:"output_dir",val:": str | None = None"},{name:"_internal_call",val:": bool = False"}],source:"https://github.com/huggingface/trl/blob/vr_5607/transformers/trainer.py#L3746"}}),be=new Ge({props:{name:"push_to_hub",anchor:"trl.experimental.gold.GOLDTrainer.push_to_hub",parameters:[{name:"commit_message",val:": str | None = 'End of training'"},{name:"blocking",val:": bool = True"},{name:"token",val:": str | None = None"},{name:"revision",val:": str | None = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"trl.experimental.gold.GOLDTrainer.push_to_hub.commit_message",description:`<strong>commit_message</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;End of training&quot;</code>) &#x2014;
Message to commit while pushing.`,name:"commit_message"},{anchor:"trl.experimental.gold.GOLDTrainer.push_to_hub.blocking",description:`<strong>blocking</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether the function should return only when the <code>git push</code> has finished.`,name:"blocking"},{anchor:"trl.experimental.gold.GOLDTrainer.push_to_hub.token",description:`<strong>token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014;
Token with write permission to overwrite Trainer&#x2019;s original args.`,name:"token"},{anchor:"trl.experimental.gold.GOLDTrainer.push_to_hub.revision",description:`<strong>revision</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The git revision to commit from. Defaults to the head of the &#x201C;main&#x201D; branch.`,name:"revision"},{anchor:"trl.experimental.gold.GOLDTrainer.push_to_hub.kwargs",description:`<strong>kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) &#x2014;
Additional keyword arguments passed along to <code>~Trainer.create_model_card</code>.`,name:"kwargs"}],source:"https://github.com/huggingface/trl/blob/vr_5607/transformers/trainer.py#L3993",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The URL of the repository where the model was pushed if <code>blocking=False</code>, or a <code>Future</code> object tracking the
progress of the commit if <code>blocking=True</code>.</p>
`}}),Me=new y({props:{title:"GOLDConfig",local:"trl.experimental.gold.GOLDConfig",headingTag:"h2"}}),ye=new Ge({props:{name:"class trl.experimental.gold.GOLDConfig",anchor:"trl.experimental.gold.GOLDConfig",parameters:[{name:"output_dir",val:": str | None = None"},{name:"per_device_train_batch_size",val:": int = 8"},{name:"num_train_epochs",val:": float = 3.0"},{name:"max_steps",val:": int = -1"},{name:"learning_rate",val:": float = 1e-07"},{name:"lr_scheduler_type",val:": transformers.trainer_utils.SchedulerType | str = 'linear'"},{name:"lr_scheduler_kwargs",val:": dict | str | None = None"},{name:"warmup_steps",val:": float = 0"},{name:"optim",val:": transformers.training_args.OptimizerNames | str = 'adamw_torch_fused'"},{name:"optim_args",val:": str | None = None"},{name:"weight_decay",val:": float = 0.0"},{name:"adam_beta1",val:": float = 0.9"},{name:"adam_beta2",val:": float = 0.999"},{name:"adam_epsilon",val:": float = 1e-08"},{name:"optim_target_modules",val:": None | str | list[str] = None"},{name:"gradient_accumulation_steps",val:": int = 1"},{name:"average_tokens_across_devices",val:": bool = True"},{name:"max_grad_norm",val:": float = 1.0"},{name:"label_smoothing_factor",val:": float = 0.0"},{name:"bf16",val:": bool | None = None"},{name:"fp16",val:": bool = False"},{name:"bf16_full_eval",val:": bool = False"},{name:"fp16_full_eval",val:": bool = False"},{name:"tf32",val:": bool | None = None"},{name:"gradient_checkpointing",val:": bool = True"},{name:"gradient_checkpointing_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"torch_compile",val:": bool = False"},{name:"torch_compile_backend",val:": str | None = None"},{name:"torch_compile_mode",val:": str | None = None"},{name:"use_liger_kernel",val:": bool = False"},{name:"liger_kernel_config",val:": dict[str, bool] | None = None"},{name:"use_cache",val:": bool = False"},{name:"neftune_noise_alpha",val:": float | None = None"},{name:"torch_empty_cache_steps",val:": int | None = None"},{name:"auto_find_batch_size",val:": bool = False"},{name:"logging_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'steps'"},{name:"logging_steps",val:": float = 10"},{name:"logging_first_step",val:": bool = False"},{name:"log_on_each_node",val:": bool = True"},{name:"logging_nan_inf_filter",val:": bool = True"},{name:"include_num_input_tokens_seen",val:": str | bool = 'no'"},{name:"log_level",val:": str = 'passive'"},{name:"log_level_replica",val:": str = 'warning'"},{name:"disable_tqdm",val:": bool | None = None"},{name:"report_to",val:": None | str | list[str] = 'none'"},{name:"run_name",val:": str | None = None"},{name:"project",val:": str = 'huggingface'"},{name:"trackio_space_id",val:": str | None = 'trackio'"},{name:"eval_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'no'"},{name:"eval_steps",val:": float | None = None"},{name:"eval_delay",val:": float = 0"},{name:"per_device_eval_batch_size",val:": int = 8"},{name:"prediction_loss_only",val:": bool = False"},{name:"eval_on_start",val:": bool = False"},{name:"eval_do_concat_batches",val:": bool = True"},{name:"eval_use_gather_object",val:": bool = False"},{name:"eval_accumulation_steps",val:": int | None = None"},{name:"include_for_metrics",val:": list = <factory>"},{name:"batch_eval_metrics",val:": bool = False"},{name:"save_only_model",val:": bool = False"},{name:"save_strategy",val:": transformers.trainer_utils.SaveStrategy | str = 'steps'"},{name:"save_steps",val:": float = 500"},{name:"save_on_each_node",val:": bool = False"},{name:"save_total_limit",val:": int | None = None"},{name:"enable_jit_checkpoint",val:": bool = False"},{name:"push_to_hub",val:": bool = False"},{name:"hub_token",val:": str | None = None"},{name:"hub_private_repo",val:": bool | None = None"},{name:"hub_model_id",val:": str | None = None"},{name:"hub_strategy",val:": transformers.trainer_utils.HubStrategy | str = 'every_save'"},{name:"hub_always_push",val:": bool = False"},{name:"hub_revision",val:": str | None = None"},{name:"load_best_model_at_end",val:": bool = False"},{name:"metric_for_best_model",val:": str | None = None"},{name:"greater_is_better",val:": bool | None = None"},{name:"ignore_data_skip",val:": bool = False"},{name:"restore_callback_states_from_checkpoint",val:": bool = False"},{name:"full_determinism",val:": bool = False"},{name:"seed",val:": int = 42"},{name:"data_seed",val:": int | None = None"},{name:"use_cpu",val:": bool = False"},{name:"accelerator_config",val:": dict | str | None = None"},{name:"parallelism_config",val:": accelerate.parallelism_config.ParallelismConfig | None = None"},{name:"dataloader_drop_last",val:": bool = False"},{name:"dataloader_num_workers",val:": int = 0"},{name:"dataloader_pin_memory",val:": bool = True"},{name:"dataloader_persistent_workers",val:": bool = False"},{name:"dataloader_prefetch_factor",val:": int | None = None"},{name:"remove_unused_columns",val:": bool = True"},{name:"label_names",val:": list[str] | None = None"},{name:"train_sampling_strategy",val:": str = 'random'"},{name:"length_column_name",val:": str = 'length'"},{name:"ddp_find_unused_parameters",val:": bool | None = None"},{name:"ddp_bucket_cap_mb",val:": int | None = None"},{name:"ddp_broadcast_buffers",val:": bool | None = None"},{name:"ddp_backend",val:": str | None = None"},{name:"ddp_timeout",val:": int = 1800"},{name:"fsdp",val:": list[transformers.trainer_utils.FSDPOption] | str | None = None"},{name:"fsdp_config",val:": dict[str, typing.Any] | str | None = None"},{name:"deepspeed",val:": dict | str | None = None"},{name:"debug",val:": str | list[transformers.debug_utils.DebugOption] = ''"},{name:"skip_memory_metrics",val:": bool = True"},{name:"do_train",val:": bool = False"},{name:"do_eval",val:": bool = False"},{name:"do_predict",val:": bool = False"},{name:"resume_from_checkpoint",val:": str | None = None"},{name:"warmup_ratio",val:": float | None = None"},{name:"logging_dir",val:": str | None = None"},{name:"local_rank",val:": int = -1"},{name:"model_init_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"chat_template_path",val:": str | None = None"},{name:"dataset_text_field",val:": str = 'text'"},{name:"dataset_kwargs",val:": dict[str, typing.Any] | None = None"},{name:"dataset_num_proc",val:": int | None = None"},{name:"eos_token",val:": str | None = None"},{name:"max_length",val:": int | None = 1024"},{name:"truncation_mode",val:": str = 'keep_start'"},{name:"shuffle_dataset",val:": bool = False"},{name:"packing",val:": bool = False"},{name:"packing_strategy",val:": str = 'bfd'"},{name:"padding_free",val:": bool = False"},{name:"pad_to_multiple_of",val:": int | None = None"},{name:"eval_packing",val:": bool | None = None"},{name:"completion_only_loss",val:": bool | None = None"},{name:"assistant_only_loss",val:": bool = False"},{name:"loss_type",val:": str = 'nll'"},{name:"activation_offloading",val:": bool = False"},{name:"pad_token",val:": str | None = None"},{name:"temperature",val:": float = 0.9"},{name:"top_p",val:": float = 0.95"},{name:"top_k",val:": int = 0"},{name:"lmbda",val:": float = 0.5"},{name:"beta",val:": float = 0.5"},{name:"max_completion_length",val:": int = 128"},{name:"teacher_model_name_or_path",val:": str | None = None"},{name:"teacher_model_revision",val:": str | None = None"},{name:"teacher_model_init_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"teacher_tokenizer_name_or_path",val:": str | None = None"},{name:"disable_dropout",val:": bool = True"},{name:"seq_kd",val:": bool = False"},{name:"num_generations",val:": int = 1"},{name:"generation_batch_size",val:": int | None = None"},{name:"use_uld_loss",val:": bool = False"},{name:"use_extended_uld",val:": bool = True"},{name:"uld_use_hybrid_loss",val:": bool = False"},{name:"uld_hybrid_matched_weight",val:": float | None = None"},{name:"uld_hybrid_unmatched_weight",val:": float | None = None"},{name:"uld_crossentropy_weight",val:": float = 0.0"},{name:"uld_distillation_weight",val:": float = 1.0"},{name:"uld_student_temperature",val:": float = 1.0"},{name:"uld_teacher_temperature",val:": float = 1.0"},{name:"uld_skip_student_eos",val:": bool = True"},{name:"uld_skip_teacher_eos",val:": bool = True"},{name:"use_vllm",val:": bool = False"},{name:"vllm_mode",val:": str = 'colocate'"},{name:"vllm_server_base_url",val:": str | None = None"},{name:"vllm_server_host",val:": str = '0.0.0.0'"},{name:"vllm_server_port",val:": int = 8001"},{name:"vllm_server_timeout",val:": float = 240.0"},{name:"vllm_group_port",val:": int = 51216"},{name:"vllm_gpu_memory_utilization",val:": float = 0.9"},{name:"vllm_tensor_parallel_size",val:": int = 1"},{name:"vllm_max_model_length",val:": int | None = None"},{name:"vllm_model_impl",val:": str = 'vllm'"},{name:"vllm_structured_outputs_regex",val:": str | None = None"},{name:"vllm_sync_frequency",val:": int = 1"},{name:"vllm_enable_sleep_mode",val:": bool = False"},{name:"log_completions",val:": bool = False"},{name:"log_completions_steps",val:": int = 100"},{name:"num_completions_to_print",val:": int | None = None"},{name:"wandb_entity",val:": str | None = None"},{name:"wandb_project",val:": str | None = None"},{name:"wandb_run_group",val:": str | None = None"},{name:"wandb_log_unique_prompts",val:": bool = True"},{name:"callbacks",val:": list = <factory>"},{name:"hub_model_revision",val:": str | None = 'main'"},{name:"overwrite_hub_revision",val:": bool = False"},{name:"push_to_hub_revision",val:": bool = False"}],parametersDescription:[{anchor:"trl.experimental.gold.GOLDConfig.temperature",description:`<strong>temperature</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.9</code>) &#x2014;
Temperature for sampling. The higher the temperature, the more random the completions.`,name:"temperature"},{anchor:"trl.experimental.gold.GOLDConfig.lmbda",description:`<strong>lmbda</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.5</code>) &#x2014;
Lambda parameter that controls the student data fraction (i.e., the proportion of on-policy
student-generated outputs).`,name:"lmbda"},{anchor:"trl.experimental.gold.GOLDConfig.beta",description:`<strong>beta</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.5</code>) &#x2014;
Interpolation coefficient between <code>0.0</code> and <code>1.0</code> of the Generalized Jensen-Shannon Divergence loss. When
beta is <code>0.0</code>, the loss is the KL divergence. When beta is <code>1.0</code>, the loss is the Inverse KL Divergence.`,name:"beta"},{anchor:"trl.experimental.gold.GOLDConfig.max_completion_length",description:`<strong>max_completion_length</strong> (<code>int</code>, <em>optional</em>, defaults to <code>128</code>) &#x2014;
Maximum number of tokens to generate per completion.`,name:"max_completion_length"},{anchor:"trl.experimental.gold.GOLDConfig.teacher_model_name_or_path",description:`<strong>teacher_model_name_or_path</strong> (<code>str</code>, <em>optional</em>) &#x2014;
Model name or path of the teacher model. If <code>None</code>, the teacher model will be the same as the model being
trained.`,name:"teacher_model_name_or_path"},{anchor:"trl.experimental.gold.GOLDConfig.teacher_model_revision",description:`<strong>teacher_model_revision</strong> (<code>str</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014;
Model revision of the teacher model (e.g., branch name, tag, or commit hash). If <code>None</code>, the default
revision is used.`,name:"teacher_model_revision"},{anchor:"trl.experimental.gold.GOLDConfig.teacher_model_init_kwargs",description:`<strong>teacher_model_init_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) &#x2014;
Keyword arguments to pass to <code>AutoModelForCausalLM.from_pretrained</code> when instantiating the teacher model
from a string.`,name:"teacher_model_init_kwargs"},{anchor:"trl.experimental.gold.GOLDConfig.teacher_tokenizer_name_or_path",description:`<strong>teacher_tokenizer_name_or_path</strong> (<code>str</code>, <em>optional</em>) &#x2014;
Tokenizer name or path for the teacher model. If None when using ULD loss, will use the same tokenizer as
the student model (not recommended for cross-tokenizer distillation).`,name:"teacher_tokenizer_name_or_path"},{anchor:"trl.experimental.gold.GOLDConfig.disable_dropout",description:`<strong>disable_dropout</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to disable dropout in the model.`,name:"disable_dropout"},{anchor:"trl.experimental.gold.GOLDConfig.seq_kd",description:`<strong>seq_kd</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Seq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on
teacher-generated output).`,name:"seq_kd"},{anchor:"trl.experimental.gold.GOLDConfig.num_generations",description:`<strong>num_generations</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Number of generations per prompt. Each prompt is repeated this many times in the generation batch.`,name:"num_generations"},{anchor:"trl.experimental.gold.GOLDConfig.generation_batch_size",description:`<strong>generation_batch_size</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>None</code>) &#x2014;
Number of unique prompts per worker per optimizer step. If <code>None</code>, it is computed from
<code>(per_device_train_batch_size * gradient_accumulation_steps) // num_generations</code>.`,name:"generation_batch_size"},{anchor:"trl.experimental.gold.GOLDConfig.use_uld_loss",description:`<strong>use_uld_loss</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to use Universal Logit Distillation (ULD) loss instead of Generalized Jensen-Shannon Divergence
loss.`,name:"use_uld_loss"},{anchor:"trl.experimental.gold.GOLDConfig.uld_crossentropy_weight",description:`<strong>uld_crossentropy_weight</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.0</code>) &#x2014;
Weight for the cross-entropy loss component in ULD loss. If 0, only ULD distillation loss is used.`,name:"uld_crossentropy_weight"},{anchor:"trl.experimental.gold.GOLDConfig.uld_distillation_weight",description:`<strong>uld_distillation_weight</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) &#x2014;
Weight for the distillation loss component in ULD loss.`,name:"uld_distillation_weight"},{anchor:"trl.experimental.gold.GOLDConfig.uld_student_temperature",description:`<strong>uld_student_temperature</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) &#x2014;
Temperature for student logits in ULD loss computation.`,name:"uld_student_temperature"},{anchor:"trl.experimental.gold.GOLDConfig.uld_teacher_temperature",description:`<strong>uld_teacher_temperature</strong> (<code>float</code>, <em>optional</em>, defaults to <code>1.0</code>) &#x2014;
Temperature for teacher logits in ULD loss computation.`,name:"uld_teacher_temperature"},{anchor:"trl.experimental.gold.GOLDConfig.uld_skip_student_eos",description:`<strong>uld_skip_student_eos</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to skip EOS token for student in ULD loss computation.`,name:"uld_skip_student_eos"},{anchor:"trl.experimental.gold.GOLDConfig.uld_skip_teacher_eos",description:`<strong>uld_skip_teacher_eos</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to skip EOS token for teacher in ULD loss computation.`,name:"uld_skip_teacher_eos"},{anchor:"trl.experimental.gold.GOLDConfig.use_vllm",description:`<strong>use_vllm</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to use vLLM for generating completions from the student model. Requires <code>vllm</code> to be installed.`,name:"use_vllm"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_mode",description:`<strong>vllm_mode</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;colocate&quot;</code>) &#x2014;
Mode for student vLLM integration. Either <code>&quot;server&quot;</code> (connect to a running TRL vLLM server) or <code>&quot;colocate&quot;</code>
(run vLLM in the same process).`,name:"vllm_mode"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_server_host",description:`<strong>vllm_server_host</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;0.0.0.0&quot;</code>) &#x2014;
Host of the vLLM server for the student model (if <code>vllm_mode=&quot;server&quot;</code>).`,name:"vllm_server_host"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_server_port",description:`<strong>vllm_server_port</strong> (<code>int</code>, <em>optional</em>, defaults to <code>8001</code>) &#x2014;
Port of the vLLM server for the student model (if <code>vllm_mode=&quot;server&quot;</code>).`,name:"vllm_server_port"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_server_timeout",description:`<strong>vllm_server_timeout</strong> (<code>float</code>, <em>optional</em>, defaults to <code>240.0</code>) &#x2014;
Timeout for connecting to the student vLLM server (if <code>vllm_mode=&quot;server&quot;</code>).`,name:"vllm_server_timeout"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_gpu_memory_utilization",description:`<strong>vllm_gpu_memory_utilization</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.9</code>) &#x2014;
GPU memory utilization for the colocated student vLLM engine (if <code>vllm_mode=&quot;colocate&quot;</code>). It is recommended
to set this to a low value if the student and teacher models share the same GPU.`,name:"vllm_gpu_memory_utilization"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_tensor_parallel_size",description:`<strong>vllm_tensor_parallel_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Tensor parallel size for the colocated student vLLM engine (if <code>vllm_mode=&quot;colocate&quot;</code>).`,name:"vllm_tensor_parallel_size"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_structured_outputs_regex",description:`<strong>vllm_structured_outputs_regex</strong> (<code>str</code>, <em>optional</em>) &#x2014;
Regex for vLLM structured outputs for the student model.`,name:"vllm_structured_outputs_regex"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_server_base_url",description:`<strong>vllm_server_base_url</strong> (<code>str</code>, <em>optional</em>) &#x2014;
Base URL for the vLLM server (e.g., <code>&quot;http://localhost:8001&quot;</code>). If provided, <code>vllm_server_host</code> and
<code>vllm_server_port</code> are ignored.`,name:"vllm_server_base_url"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_group_port",description:`<strong>vllm_group_port</strong> (<code>int</code>, <em>optional</em>, defaults to <code>51216</code>) &#x2014;
Port for the vLLM weight-update group (NCCL communicator). Unless the port is occupied, there is no need to
change it.`,name:"vllm_group_port"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_max_model_length",description:`<strong>vllm_max_model_length</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Maximum model sequence length for the colocated vLLM engine when <code>vllm_mode=&quot;colocate&quot;</code>. Defaults to the
model&#x2019;s maximum context length.`,name:"vllm_max_model_length"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_model_impl",description:`<strong>vllm_model_impl</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;vllm&quot;</code>) &#x2014;
Model implementation backend to use in vLLM. Use <code>&quot;vllm&quot;</code> (default) or <code>&quot;transformers&quot;</code>.`,name:"vllm_model_impl"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_sync_frequency",description:`<strong>vllm_sync_frequency</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1</code>) &#x2014;
Frequency (in training steps) to synchronize student model weights to vLLM engine. Set to 1 to sync after
every step.`,name:"vllm_sync_frequency"},{anchor:"trl.experimental.gold.GOLDConfig.vllm_enable_sleep_mode",description:`<strong>vllm_enable_sleep_mode</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Enable vLLM sleep mode to offload student weights/cache during the optimizer step. Keeps GPU memory usage
low, but waking the engine adds host&#x2013;device transfer latency.`,name:"vllm_enable_sleep_mode"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/experimental/gold/gold_config.py#L23"}}),Te=new $n({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/gold_trainer.md"}}),{c(){T=s("meta"),Fe=l(),Ie=s("p"),qe=l(),m(j.$$.fragment),Ze=l(),m(C.$$.fragment),Ee=l(),L=s("p"),L.innerHTML=Rt,ze=l(),m(N.$$.fragment),We=l(),$=s("p"),$.textContent=Qt,Oe=l(),G=s("p"),G.textContent=Xt,Ae=l(),I=s("ol"),I.innerHTML=Vt,He=l(),w=s("blockquote"),w.innerHTML=St,Re=l(),m(D.$$.fragment),Qe=l(),F=s("p"),F.innerHTML=Bt,Xe=l(),q=s("ul"),q.innerHTML=Pt,Ve=l(),Z=s("p"),Z.textContent=Yt,Se=l(),m(E.$$.fragment),Be=l(),z=s("p"),z.innerHTML=Kt,Pe=l(),W=s("p"),W.textContent=en,Ye=l(),m(O.$$.fragment),Ke=l(),x=s("blockquote"),x.innerHTML=tn,et=l(),m(A.$$.fragment),tt=l(),H=s("p"),H.innerHTML=nn,nt=l(),m(R.$$.fragment),ot=l(),Q=s("p"),Q.innerHTML=on,lt=l(),m(X.$$.fragment),at=l(),V=s("p"),V.textContent=ln,st=l(),S=s("ul"),S.innerHTML=an,rt=l(),B=s("p"),B.textContent=sn,it=l(),m(P.$$.fragment),mt=l(),Y=s("p"),Y.innerHTML=rn,dt=l(),m(K.$$.fragment),ct=l(),ee=s("p"),ee.textContent=mn,pt=l(),te=s("ul"),te.innerHTML=dn,ut=l(),ne=s("p"),ne.innerHTML=cn,_t=l(),oe=s("p"),oe.textContent=pn,gt=l(),le=s("ol"),le.innerHTML=un,ht=l(),m(ae.$$.fragment),ft=l(),se=s("p"),se.textContent=_n,vt=l(),m(re.$$.fragment),bt=l(),ie=s("p"),ie.innerHTML=gn,Mt=l(),m(me.$$.fragment),yt=l(),de=s("p"),de.textContent=hn,Tt=l(),m(ce.$$.fragment),wt=l(),pe=s("p"),pe.innerHTML=fn,xt=l(),m(ue.$$.fragment),Jt=l(),m(_e.$$.fragment),Ut=l(),h=s("div"),m(ge.$$.fragment),It=l(),J=s("div"),m(he.$$.fragment),Dt=l(),xe=s("p"),xe.textContent=vn,Ft=l(),Je=s("div"),m(fe.$$.fragment),qt=l(),v=s("div"),m(ve.$$.fragment),Zt=l(),Ue=s("p"),Ue.innerHTML=bn,Et=l(),ke=s("p"),ke.textContent=Mn,zt=l(),U=s("div"),m(be.$$.fragment),Wt=l(),je=s("p"),je.innerHTML=yn,kt=l(),m(Me.$$.fragment),jt=l(),f=s("div"),m(ye.$$.fragment),Ot=l(),Ce=s("p"),Ce.innerHTML=Tn,At=l(),Le=s("p"),Le.innerHTML=wn,Ct=l(),m(Te.$$.fragment),Lt=l(),De=s("p"),this.h()},l(e){const t=Ln("svelte-u9bgzb",document.head);T=r(t,"META",{name:!0,content:!0}),t.forEach(n),Fe=a(e),Ie=r(e,"P",{}),k(Ie).forEach(n),qe=a(e),d(j.$$.fragment,e),Ze=a(e),d(C.$$.fragment,e),Ee=a(e),L=r(e,"P",{"data-svelte-h":!0}),i(L)!=="svelte-1lmge6x"&&(L.innerHTML=Rt),ze=a(e),d(N.$$.fragment,e),We=a(e),$=r(e,"P",{"data-svelte-h":!0}),i($)!=="svelte-1byn2it"&&($.textContent=Qt),Oe=a(e),G=r(e,"P",{"data-svelte-h":!0}),i(G)!=="svelte-129mjhz"&&(G.textContent=Xt),Ae=a(e),I=r(e,"OL",{"data-svelte-h":!0}),i(I)!=="svelte-1tiwzqt"&&(I.innerHTML=Vt),He=a(e),w=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(w)!=="svelte-1j7qzx8"&&(w.innerHTML=St),Re=a(e),d(D.$$.fragment,e),Qe=a(e),F=r(e,"P",{"data-svelte-h":!0}),i(F)!=="svelte-2s3218"&&(F.innerHTML=Bt),Xe=a(e),q=r(e,"UL",{"data-svelte-h":!0}),i(q)!=="svelte-vnjtu4"&&(q.innerHTML=Pt),Ve=a(e),Z=r(e,"P",{"data-svelte-h":!0}),i(Z)!=="svelte-bz81qb"&&(Z.textContent=Yt),Se=a(e),d(E.$$.fragment,e),Be=a(e),z=r(e,"P",{"data-svelte-h":!0}),i(z)!=="svelte-14aioze"&&(z.innerHTML=Kt),Pe=a(e),W=r(e,"P",{"data-svelte-h":!0}),i(W)!=="svelte-ykxg06"&&(W.textContent=en),Ye=a(e),d(O.$$.fragment,e),Ke=a(e),x=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(x)!=="svelte-1gu07te"&&(x.innerHTML=tn),et=a(e),d(A.$$.fragment,e),tt=a(e),H=r(e,"P",{"data-svelte-h":!0}),i(H)!=="svelte-1xs7lec"&&(H.innerHTML=nn),nt=a(e),d(R.$$.fragment,e),ot=a(e),Q=r(e,"P",{"data-svelte-h":!0}),i(Q)!=="svelte-1pneewg"&&(Q.innerHTML=on),lt=a(e),d(X.$$.fragment,e),at=a(e),V=r(e,"P",{"data-svelte-h":!0}),i(V)!=="svelte-1c6ud0s"&&(V.textContent=ln),st=a(e),S=r(e,"UL",{"data-svelte-h":!0}),i(S)!=="svelte-77pkzc"&&(S.innerHTML=an),rt=a(e),B=r(e,"P",{"data-svelte-h":!0}),i(B)!=="svelte-1iri2dm"&&(B.textContent=sn),it=a(e),d(P.$$.fragment,e),mt=a(e),Y=r(e,"P",{"data-svelte-h":!0}),i(Y)!=="svelte-yrurej"&&(Y.innerHTML=rn),dt=a(e),d(K.$$.fragment,e),ct=a(e),ee=r(e,"P",{"data-svelte-h":!0}),i(ee)!=="svelte-uc2av9"&&(ee.textContent=mn),pt=a(e),te=r(e,"UL",{"data-svelte-h":!0}),i(te)!=="svelte-1cd1bgw"&&(te.innerHTML=dn),ut=a(e),ne=r(e,"P",{"data-svelte-h":!0}),i(ne)!=="svelte-1a0eolt"&&(ne.innerHTML=cn),_t=a(e),oe=r(e,"P",{"data-svelte-h":!0}),i(oe)!=="svelte-1ivwx0j"&&(oe.textContent=pn),gt=a(e),le=r(e,"OL",{"data-svelte-h":!0}),i(le)!=="svelte-bofz13"&&(le.innerHTML=un),ht=a(e),d(ae.$$.fragment,e),ft=a(e),se=r(e,"P",{"data-svelte-h":!0}),i(se)!=="svelte-wineyn"&&(se.textContent=_n),vt=a(e),d(re.$$.fragment,e),bt=a(e),ie=r(e,"P",{"data-svelte-h":!0}),i(ie)!=="svelte-m31ura"&&(ie.innerHTML=gn),Mt=a(e),d(me.$$.fragment,e),yt=a(e),de=r(e,"P",{"data-svelte-h":!0}),i(de)!=="svelte-1ge4qkv"&&(de.textContent=hn),Tt=a(e),d(ce.$$.fragment,e),wt=a(e),pe=r(e,"P",{"data-svelte-h":!0}),i(pe)!=="svelte-q6ewei"&&(pe.innerHTML=fn),xt=a(e),d(ue.$$.fragment,e),Jt=a(e),d(_e.$$.fragment,e),Ut=a(e),h=r(e,"DIV",{class:!0});var b=k(h);d(ge.$$.fragment,b),It=a(b),J=r(b,"DIV",{class:!0});var $t=k(J);d(he.$$.fragment,$t),Dt=a($t),xe=r($t,"P",{"data-svelte-h":!0}),i(xe)!=="svelte-1cilnet"&&(xe.textContent=vn),$t.forEach(n),Ft=a(b),Je=r(b,"DIV",{class:!0});var xn=k(Je);d(fe.$$.fragment,xn),xn.forEach(n),qt=a(b),v=r(b,"DIV",{class:!0});var Ne=k(v);d(ve.$$.fragment,Ne),Zt=a(Ne),Ue=r(Ne,"P",{"data-svelte-h":!0}),i(Ue)!=="svelte-r8h4ov"&&(Ue.innerHTML=bn),Et=a(Ne),ke=r(Ne,"P",{"data-svelte-h":!0}),i(ke)!=="svelte-1e6bius"&&(ke.textContent=Mn),Ne.forEach(n),zt=a(b),U=r(b,"DIV",{class:!0});var Gt=k(U);d(be.$$.fragment,Gt),Wt=a(Gt),je=r(Gt,"P",{"data-svelte-h":!0}),i(je)!=="svelte-8tudwd"&&(je.innerHTML=yn),Gt.forEach(n),b.forEach(n),kt=a(e),d(Me.$$.fragment,e),jt=a(e),f=r(e,"DIV",{class:!0});var $e=k(f);d(ye.$$.fragment,$e),Ot=a($e),Ce=r($e,"P",{"data-svelte-h":!0}),i(Ce)!=="svelte-1ccaush"&&(Ce.innerHTML=Tn),At=a($e),Le=r($e,"P",{"data-svelte-h":!0}),i(Le)!=="svelte-1a2zch3"&&(Le.innerHTML=wn),$e.forEach(n),Ct=a(e),d(Te.$$.fragment,e),Lt=a(e),De=r(e,"P",{}),k(De).forEach(n),this.h()},h(){M(T,"name","hf:doc:metadata"),M(T,"content",In),M(w,"class","note"),M(x,"class","note"),M(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),M(Je,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),M(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),M(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),M(h,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),M(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){g(document.head,T),o(e,Fe,t),o(e,Ie,t),o(e,qe,t),c(j,e,t),o(e,Ze,t),c(C,e,t),o(e,Ee,t),o(e,L,t),o(e,ze,t),c(N,e,t),o(e,We,t),o(e,$,t),o(e,Oe,t),o(e,G,t),o(e,Ae,t),o(e,I,t),o(e,He,t),o(e,w,t),o(e,Re,t),c(D,e,t),o(e,Qe,t),o(e,F,t),o(e,Xe,t),o(e,q,t),o(e,Ve,t),o(e,Z,t),o(e,Se,t),c(E,e,t),o(e,Be,t),o(e,z,t),o(e,Pe,t),o(e,W,t),o(e,Ye,t),c(O,e,t),o(e,Ke,t),o(e,x,t),o(e,et,t),c(A,e,t),o(e,tt,t),o(e,H,t),o(e,nt,t),c(R,e,t),o(e,ot,t),o(e,Q,t),o(e,lt,t),c(X,e,t),o(e,at,t),o(e,V,t),o(e,st,t),o(e,S,t),o(e,rt,t),o(e,B,t),o(e,it,t),c(P,e,t),o(e,mt,t),o(e,Y,t),o(e,dt,t),c(K,e,t),o(e,ct,t),o(e,ee,t),o(e,pt,t),o(e,te,t),o(e,ut,t),o(e,ne,t),o(e,_t,t),o(e,oe,t),o(e,gt,t),o(e,le,t),o(e,ht,t),c(ae,e,t),o(e,ft,t),o(e,se,t),o(e,vt,t),c(re,e,t),o(e,bt,t),o(e,ie,t),o(e,Mt,t),c(me,e,t),o(e,yt,t),o(e,de,t),o(e,Tt,t),c(ce,e,t),o(e,wt,t),o(e,pe,t),o(e,xt,t),c(ue,e,t),o(e,Jt,t),c(_e,e,t),o(e,Ut,t),o(e,h,t),c(ge,h,null),g(h,It),g(h,J),c(he,J,null),g(J,Dt),g(J,xe),g(h,Ft),g(h,Je),c(fe,Je,null),g(h,qt),g(h,v),c(ve,v,null),g(v,Zt),g(v,Ue),g(v,Et),g(v,ke),g(h,zt),g(h,U),c(be,U,null),g(U,Wt),g(U,je),o(e,kt,t),c(Me,e,t),o(e,jt,t),o(e,f,t),c(ye,f,null),g(f,Ot),g(f,Ce),g(f,At),g(f,Le),o(e,Ct,t),c(Te,e,t),o(e,Lt,t),o(e,De,t),Nt=!0},p:Un,i(e){Nt||(p(j.$$.fragment,e),p(C.$$.fragment,e),p(N.$$.fragment,e),p(D.$$.fragment,e),p(E.$$.fragment,e),p(O.$$.fragment,e),p(A.$$.fragment,e),p(R.$$.fragment,e),p(X.$$.fragment,e),p(P.$$.fragment,e),p(K.$$.fragment,e),p(ae.$$.fragment,e),p(re.$$.fragment,e),p(me.$$.fragment,e),p(ce.$$.fragment,e),p(ue.$$.fragment,e),p(_e.$$.fragment,e),p(ge.$$.fragment,e),p(he.$$.fragment,e),p(fe.$$.fragment,e),p(ve.$$.fragment,e),p(be.$$.fragment,e),p(Me.$$.fragment,e),p(ye.$$.fragment,e),p(Te.$$.fragment,e),Nt=!0)},o(e){u(j.$$.fragment,e),u(C.$$.fragment,e),u(N.$$.fragment,e),u(D.$$.fragment,e),u(E.$$.fragment,e),u(O.$$.fragment,e),u(A.$$.fragment,e),u(R.$$.fragment,e),u(X.$$.fragment,e),u(P.$$.fragment,e),u(K.$$.fragment,e),u(ae.$$.fragment,e),u(re.$$.fragment,e),u(me.$$.fragment,e),u(ce.$$.fragment,e),u(ue.$$.fragment,e),u(_e.$$.fragment,e),u(ge.$$.fragment,e),u(he.$$.fragment,e),u(fe.$$.fragment,e),u(ve.$$.fragment,e),u(be.$$.fragment,e),u(Me.$$.fragment,e),u(ye.$$.fragment,e),u(Te.$$.fragment,e),Nt=!1},d(e){e&&(n(Fe),n(Ie),n(qe),n(Ze),n(Ee),n(L),n(ze),n(We),n($),n(Oe),n(G),n(Ae),n(I),n(He),n(w),n(Re),n(Qe),n(F),n(Xe),n(q),n(Ve),n(Z),n(Se),n(Be),n(z),n(Pe),n(W),n(Ye),n(Ke),n(x),n(et),n(tt),n(H),n(nt),n(ot),n(Q),n(lt),n(at),n(V),n(st),n(S),n(rt),n(B),n(it),n(mt),n(Y),n(dt),n(ct),n(ee),n(pt),n(te),n(ut),n(ne),n(_t),n(oe),n(gt),n(le),n(ht),n(ft),n(se),n(vt),n(bt),n(ie),n(Mt),n(yt),n(de),n(Tt),n(wt),n(pe),n(xt),n(Jt),n(Ut),n(h),n(kt),n(jt),n(f),n(Ct),n(Lt),n(De)),n(T),_(j,e),_(C,e),_(N,e),_(D,e),_(E,e),_(O,e),_(A,e),_(R,e),_(X,e),_(P,e),_(K,e),_(ae,e),_(re,e),_(me,e),_(ce,e),_(ue,e),_(_e,e),_(ge),_(he),_(fe),_(ve),_(be),_(Me,e),_(ye),_(Te,e)}}}const In='{"title":"General Online Logit Distillation (GOLD) Trainer","local":"general-online-logit-distillation-gold-trainer","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"Usage tips","local":"usage-tips","sections":[{"title":"Expected dataset type","local":"expected-dataset-type","sections":[],"depth":3}],"depth":2},{"title":"How Token Merging Works","local":"how-token-merging-works","sections":[{"title":"Probability Merging","local":"probability-merging","sections":[],"depth":3},{"title":"Example","local":"example","sections":[],"depth":3}],"depth":2},{"title":"Example script","local":"example-script","sections":[],"depth":2},{"title":"GOLDTrainer","local":"trl.experimental.gold.GOLDTrainer","sections":[],"depth":2},{"title":"GOLDConfig","local":"trl.experimental.gold.GOLDConfig","sections":[],"depth":2}],"depth":1}';function Dn(Ht){return kn(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Wn extends jn{constructor(T){super(),Cn(this,T,Dn,Gn,Jn,{})}}export{Wn as component};

Xet Storage Details

Size:
57.3 kB
·
Xet hash:
20f68024d146fd119b15a76c9760f27fde4939a7b73a9acab84684261b7ffacb

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.