Buckets:
| import{s as Ot,n as Ut,o as Zt}from"../chunks/scheduler.7b731bd4.js";import{S as It,i as Ft,e as l,s as o,c as d,h as Ht,a as i,d as a,b as r,f as oe,g as c,j as m,k as C,l as s,m as n,n as p,t as f,o as u,p as _}from"../chunks/index.cc268345.js";import{C as Wt,H as N,E as Pt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js";import{D as ge}from"../chunks/Docstring.03f7b462.js";import{C as Se}from"../chunks/CodeBlock.169a125f.js";function zt(mt){let y,he,ue,be,M,ve,$,Te,k,dt='<a href="https://huggingface.co/models?other=bco,trl" rel="nofollow"><img src="https://img.shields.io/badge/All_models-BCO-blue" alt="model badge"/></a>',ye,B,ct=`TRL supports the Binary Classifier Optimization (BCO). | |
| The <a href="https://huggingface.co/papers/2404.04656" rel="nofollow">BCO</a> authors train a binary classifier whose logit serves as a reward so that the classifier maps {prompt, chosen completion} pairs to 1 and {prompt, rejected completion} pairs to 0. | |
| For a full example have a look at <code>examples/scripts/bco.py</code>.`,xe,J,we,O,pt=`The <a href="/docs/trl/pr_5607/en/bco_trainer#trl.experimental.bco.BCOTrainer">experimental.bco.BCOTrainer</a> requires an <a href="dataset_formats#unpaired-preference">unpaired preference dataset</a>. | |
| The <a href="/docs/trl/pr_5607/en/bco_trainer#trl.experimental.bco.BCOTrainer">experimental.bco.BCOTrainer</a> supports both <a href="dataset_formats#conversational">conversational</a> and <a href="dataset_formats#standard">standard</a> dataset formats. When provided with a conversational dataset, the trainer will automatically apply the chat template to the dataset.`,Ce,U,Ne,Z,ft="The BCO trainer expects a model of <code>AutoModelForCausalLM</code>, compared to PPO that expects <code>AutoModelForCausalLMWithValueHead</code> for the value function.",Me,I,$e,F,ut="For a detailed example have a look at the <code>examples/scripts/bco.py</code> script. At a high level we need to initialize the <code>BCOTrainer</code> with a <code>model</code> we wish to train and a reference <code>ref_model</code> which we will use to calculate the implicit rewards of the preferred and rejected response.",ke,H,_t="The <code>beta</code> refers to the hyperparameter of the implicit reward, and the dataset contains the 3 entries listed above. Note that the <code>model</code> and <code>ref_model</code> need to have the same architecture (ie decoder only or encoder-decoder).",Be,W,Je,P,gt="After this one can then call:",Oe,z,Ue,E,Ze,L,ht=`In practical scenarios, the thumbs-up and thumbs-down datasets are likely to have divergent underlying distributions of prompts. | |
| Consider an LLM deployed for user feedback: if the model excels in writing tasks but underperforms in coding, the thumbs-up dataset will be dominated by writing-related prompts, while the thumbs-down dataset will contain mostly coding-related prompts.<br/> | |
| If the prompts in your desired and undesired datasets differ a lot, it is useful to enable UDM.`,Ie,V,bt="Choose an embedding model and tokenizer:",Fe,G,He,j,vt="Set <code>prompt_sample_size</code> to define how many prompts are selected to train the UDM classifier and start the training with the provided embedding function:",We,X,Pe,A,ze,D,Tt=`MOEs are the most efficient if the load is about equally distributed between experts.<br/> | |
| To ensure that we train MOEs similarly during preference-tuning, it is beneficial to add the auxiliary loss from the load balancer to the final loss.`,Ee,R,yt=`This option is enabled by setting <code>output_router_logits=True</code> in the model config (e.g. MixtralConfig).<br/> | |
| To scale how much the auxiliary loss contributes to the total loss, use the hyperparameter <code>router_aux_loss_coef=...</code> (default: 0.001).`,Le,Q,Ve,g,S,Ye,re,xt='Initialize BCOTrainer from <a href="https://huggingface.co/papers/2404.04656" rel="nofollow">BCO</a> paper.',qe,x,Y,Ke,le,wt="Main training entry point.",et,b,q,tt,ie,Ct="Will save the model, so you can reload it using <code>from_pretrained()</code>.",at,se,Nt="Will only save from the main process.",nt,w,K,ot,me,Mt="Upload <code>self.model</code> and <code>self.processing_class</code> to the 🤗 model hub on the repo <code>self.args.hub_model_id</code>.",Ge,ee,je,h,te,rt,de,$t='Configuration class for the <a href="/docs/trl/pr_5607/en/bco_trainer#trl.experimental.bco.BCOTrainer">experimental.bco.BCOTrainer</a>.',lt,ce,kt=`This class includes only the parameters that are specific to BCO training. For a full list of training arguments, | |
| please refer to the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a> documentation. Note that default values in this class may | |
| differ from those in <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a>.`,it,pe,Bt=`Using <a href="https://huggingface.co/docs/transformers/main/en/internal/trainer_utils#transformers.HfArgumentParser" rel="nofollow">HfArgumentParser</a> we can turn this class into | |
| <a href="https://docs.python.org/3/library/argparse#module-argparse" rel="nofollow">argparse</a> arguments that can be specified on the | |
| command line.`,st,ae,Jt='<p>These parameters have default values different from <a href="https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments" rel="nofollow">TrainingArguments</a>:</p> <ul><li><code>logging_steps</code>: Defaults to <code>10</code> instead of <code>500</code>.</li> <li><code>gradient_checkpointing</code>: Defaults to <code>True</code> instead of <code>False</code>.</li> <li><code>bf16</code>: Defaults to <code>True</code> if <code>fp16</code> is not set, instead of <code>False</code>.</li> <li><code>learning_rate</code>: Defaults to <code>5e-7</code> instead of <code>5e-5</code>.</li></ul>',Xe,ne,Ae,_e,De;return M=new Wt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),$=new N({props:{title:"BCO Trainer",local:"bco-trainer",headingTag:"h1"}}),J=new N({props:{title:"Expected dataset type",local:"expected-dataset-type",headingTag:"h2"}}),U=new N({props:{title:"Expected model format",local:"expected-model-format",headingTag:"h2"}}),I=new N({props:{title:"Using the BCOTrainer",local:"using-the-bcotrainer",headingTag:"h2"}}),W=new Se({props:{code:"ZnJvbSUyMHRybC5leHBlcmltZW50YWwuYmNvJTIwaW1wb3J0JTIwQkNPQ29uZmlnJTJDJTIwQkNPVHJhaW5lciUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBCQ09Db25maWcoJTBBJTIwJTIwJTIwJTIwYmV0YSUzRDAuMSUyQyUwQSklMEElMEFiY29fdHJhaW5lciUyMCUzRCUyMEJDT1RyYWluZXIoJTBBJTIwJTIwJTIwJTIwbW9kZWwlMkMlMEElMjAlMjAlMjAlMjBtb2RlbF9yZWYlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0R0cmFpbl9kYXRhc2V0JTJDJTBBJTIwJTIwJTIwJTIwcHJvY2Vzc2luZ19jbGFzcyUzRHRva2VuaXplciUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> trl.experimental.bco <span class="hljs-keyword">import</span> BCOConfig, BCOTrainer | |
| training_args = BCOConfig( | |
| beta=<span class="hljs-number">0.1</span>, | |
| ) | |
| bco_trainer = BCOTrainer( | |
| model, | |
| model_ref, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| processing_class=tokenizer, | |
| )`,wrap:!1}}),z=new Se({props:{code:"YmNvX3RyYWluZXIudHJhaW4oKQ==",highlighted:"bco_trainer.train()",wrap:!1}}),E=new N({props:{title:"Underlying Distribution matching (UDM)",local:"underlying-distribution-matching-udm",headingTag:"h2"}}),G=new Se({props:{code:"ZW1iZWRkaW5nX21vZGVsJTIwJTNEJTIwQXV0b01vZGVsLmZyb21fcHJldHJhaW5lZCh5b3VyX21vZGVsX2lkKSUwQWVtYmVkZGluZ190b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCh5b3VyX21vZGVsX2lkKSUwQSUwQSUyMyUyMGN1c3RvbWl6ZSUyMHRoaXMlMjBmdW5jdGlvbiUyMGRlcGVuZGluZyUyMG9uJTIweW91ciUyMGVtYmVkZGluZyUyMG1vZGVsJTBBZGVmJTIwZW1iZWRfcHJvbXB0KGlucHV0X2lkcyUyQyUyMGF0dGVudGlvbl9tYXNrJTJDJTIwbW9kZWwpJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKGlucHV0X2lkcyUzRGlucHV0X2lkcyUyQyUyMGF0dGVudGlvbl9tYXNrJTNEYXR0ZW50aW9uX21hc2spJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwb3V0cHV0cy5sYXN0X2hpZGRlbl9zdGF0ZS5tZWFuKGRpbSUzRDEpJTBBJTBBZW1iZWRkaW5nX21vZGVsJTIwJTNEJTIwQWNjZWxlcmF0b3IoKS5wcmVwYXJlX21vZGVsKHNlbGYuZW1iZWRkaW5nX21vZGVsKSUwQWVtYmVkZGluZ19mdW5jJTIwJTNEJTIwcGFydGlhbChlbWJlZF9wcm9tcHQlMkMlMjBtb2RlbCUzRGVtYmVkZGluZ19tb2RlbCk=",highlighted:`embedding_model = AutoModel.from_pretrained(your_model_id) | |
| embedding_tokenizer = AutoTokenizer.from_pretrained(your_model_id) | |
| <span class="hljs-comment"># customize this function depending on your embedding model</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">embed_prompt</span>(<span class="hljs-params">input_ids, attention_mask, model</span>): | |
| outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
| <span class="hljs-keyword">return</span> outputs.last_hidden_state.mean(dim=<span class="hljs-number">1</span>) | |
| embedding_model = Accelerator().prepare_model(self.embedding_model) | |
| embedding_func = partial(embed_prompt, model=embedding_model)`,wrap:!1}}),X=new Se({props:{code:"dHJhaW5pbmdfYXJncyUyMCUzRCUyMEJDT0NvbmZpZyglMEElMjAlMjAlMjAlMjBiZXRhJTNEMC4xJTJDJTBBJTIwJTIwJTIwJTIwcHJvbXB0X3NhbXBsZV9zaXplJTNENTEyJTJDJTBBKSUwQSUwQWJjb190cmFpbmVyJTIwJTNEJTIwQkNPVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUyQyUwQSUyMCUyMCUyMCUyMG1vZGVsX3JlZiUyQyUwQSUyMCUyMCUyMCUyMGFyZ3MlM0R0cmFpbmluZ19hcmdzJTJDJTBBJTIwJTIwJTIwJTIwdHJhaW5fZGF0YXNldCUzRHRyYWluX2RhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBwcm9jZXNzaW5nX2NsYXNzJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwZW1iZWRkaW5nX2Z1bmMlM0RlbWJlZGRpbmdfZnVuYyUyQyUwQSUyMCUyMCUyMCUyMGVtYmVkZGluZ190b2tlbml6ZXIlM0RzZWxmLmVtYmVkZGluZ190b2tlbml6ZXIlMkMlMEEpJTBBJTBBYmNvX3RyYWluZXIudHJhaW4oKQ==",highlighted:`training_args = BCOConfig( | |
| beta=<span class="hljs-number">0.1</span>, | |
| prompt_sample_size=<span class="hljs-number">512</span>, | |
| ) | |
| bco_trainer = BCOTrainer( | |
| model, | |
| model_ref, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| processing_class=tokenizer, | |
| embedding_func=embedding_func, | |
| embedding_tokenizer=self.embedding_tokenizer, | |
| ) | |
| bco_trainer.train()`,wrap:!1}}),A=new N({props:{title:"For Mixture of Experts Models: Enabling the auxiliary loss",local:"for-mixture-of-experts-models-enabling-the-auxiliary-loss",headingTag:"h3"}}),Q=new N({props:{title:"BCOTrainer",local:"trl.experimental.bco.BCOTrainer",headingTag:"h2"}}),S=new ge({props:{name:"class trl.experimental.bco.BCOTrainer",anchor:"trl.experimental.bco.BCOTrainer",parameters:[{name:"model",val:": transformers.modeling_utils.PreTrainedModel | torch.nn.modules.module.Module | str = None"},{name:"ref_model",val:": transformers.modeling_utils.PreTrainedModel | torch.nn.modules.module.Module | str | None = None"},{name:"args",val:": BCOConfig = None"},{name:"train_dataset",val:": datasets.arrow_dataset.Dataset | None = None"},{name:"eval_dataset",val:": datasets.arrow_dataset.Dataset | dict[str, datasets.arrow_dataset.Dataset] | None = None"},{name:"processing_class",val:": transformers.tokenization_utils_base.PreTrainedTokenizerBase | transformers.image_processing_utils.BaseImageProcessor | transformers.feature_extraction_utils.FeatureExtractionMixin | transformers.processing_utils.ProcessorMixin | None = None"},{name:"data_collator",val:": collections.abc.Callable[[list[typing.Any]], dict[str, typing.Any]] | None = None"},{name:"model_init",val:": collections.abc.Callable[[], transformers.modeling_utils.PreTrainedModel] | None = None"},{name:"callbacks",val:": list[transformers.trainer_callback.TrainerCallback] | None = None"},{name:"optimizers",val:": tuple = (None, None)"},{name:"preprocess_logits_for_metrics",val:": collections.abc.Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | None = None"},{name:"peft_config",val:": dict | None = None"},{name:"compute_metrics",val:": collections.abc.Callable[[transformers.trainer_utils.EvalLoopOutput], dict] | None = None"},{name:"model_adapter_name",val:": str | None = None"},{name:"ref_adapter_name",val:": str | None = None"},{name:"embedding_func",val:": collections.abc.Callable | None = None"},{name:"embedding_tokenizer",val:": transformers.tokenization_utils_base.PreTrainedTokenizerBase | None = None"}],parametersDescription:[{anchor:"trl.experimental.bco.BCOTrainer.model",description:`<strong>model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — | |
| The model to train, preferably an <a href="https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForSequenceClassification" rel="nofollow">AutoModelForSequenceClassification</a>.`,name:"model"},{anchor:"trl.experimental.bco.BCOTrainer.ref_model",description:`<strong>ref_model</strong> (<a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel" rel="nofollow">PreTrainedModel</a>) — | |
| Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation | |
| and loss. If no reference model is provided, the trainer will create a reference model with the same | |
| architecture as the model to be optimized.`,name:"ref_model"},{anchor:"trl.experimental.bco.BCOTrainer.args",description:`<strong>args</strong> (<a href="/docs/trl/pr_5607/en/bco_trainer#trl.experimental.bco.BCOConfig">experimental.bco.BCOConfig</a>) — | |
| The arguments to use for training.`,name:"args"},{anchor:"trl.experimental.bco.BCOTrainer.train_dataset",description:`<strong>train_dataset</strong> (<a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset" rel="nofollow">Dataset</a>) — | |
| The dataset to use for training.`,name:"train_dataset"},{anchor:"trl.experimental.bco.BCOTrainer.eval_dataset",description:`<strong>eval_dataset</strong> (<a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset" rel="nofollow">Dataset</a>) — | |
| The dataset to use for evaluation.`,name:"eval_dataset"},{anchor:"trl.experimental.bco.BCOTrainer.processing_class",description:`<strong>processing_class</strong> (<a href="https://huggingface.co/docs/transformers/main/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase" rel="nofollow">PreTrainedTokenizerBase</a>, <a href="https://huggingface.co/docs/transformers/main/en/main_classes/image_processor#transformers.BaseImageProcessor" rel="nofollow">BaseImageProcessor</a>, <a href="https://huggingface.co/docs/transformers/main/en/main_classes/feature_extractor#transformers.FeatureExtractionMixin" rel="nofollow">FeatureExtractionMixin</a> or <a href="https://huggingface.co/docs/transformers/main/en/main_classes/processors#transformers.ProcessorMixin" rel="nofollow">ProcessorMixin</a>, <em>optional</em>) — | |
| Processing class used to process the data. If provided, will be used to automatically process the inputs | |
| for the model, and it will be saved along the model to make it easier to rerun an interrupted training or | |
| reuse the fine-tuned model.`,name:"processing_class"},{anchor:"trl.experimental.bco.BCOTrainer.data_collator",description:`<strong>data_collator</strong> (<code>DataCollator</code>, <em>optional</em>) — | |
| The data collator to use for training. If None is specified, the default data collator | |
| (<code>experimental.utils.DPODataCollatorWithPadding</code>) will be used which will pad the sequences to the | |
| maximum length of the sequences in the batch, given a dataset of paired sequences.`,name:"data_collator"},{anchor:"trl.experimental.bco.BCOTrainer.model_init",description:`<strong>model_init</strong> (<code>Callable[[], transformers.PreTrainedModel]</code>) — | |
| The model initializer to use for training. If None is specified, the default model initializer will be | |
| used.`,name:"model_init"},{anchor:"trl.experimental.bco.BCOTrainer.callbacks",description:`<strong>callbacks</strong> (<code>list[transformers.TrainerCallback]</code>) — | |
| The callbacks to use for training.`,name:"callbacks"},{anchor:"trl.experimental.bco.BCOTrainer.optimizers",description:`<strong>optimizers</strong> (<code>tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]</code>) — | |
| The optimizer and scheduler to use for training.`,name:"optimizers"},{anchor:"trl.experimental.bco.BCOTrainer.preprocess_logits_for_metrics",description:`<strong>preprocess_logits_for_metrics</strong> (<code>Callable[[torch.Tensor, torch.Tensor], torch.Tensor]</code>) — | |
| The function to use to preprocess the logits before computing the metrics.`,name:"preprocess_logits_for_metrics"},{anchor:"trl.experimental.bco.BCOTrainer.peft_config",description:`<strong>peft_config</strong> (<code>dict</code>, defaults to <code>None</code>) — | |
| The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in | |
| a PEFT model.`,name:"peft_config"},{anchor:"trl.experimental.bco.BCOTrainer.compute_metrics",description:`<strong>compute_metrics</strong> (<code>Callable[[EvalPrediction], dict]</code>, <em>optional</em>) — | |
| The function to use to compute the metrics. Must take a <code>EvalPrediction</code> and return a dictionary string to | |
| metric values.`,name:"compute_metrics"},{anchor:"trl.experimental.bco.BCOTrainer.model_adapter_name",description:`<strong>model_adapter_name</strong> (<code>str</code>, defaults to <code>None</code>) — | |
| Name of the train target PEFT adapter, when using LoRA with multiple adapters.`,name:"model_adapter_name"},{anchor:"trl.experimental.bco.BCOTrainer.ref_adapter_name",description:`<strong>ref_adapter_name</strong> (<code>str</code>, defaults to <code>None</code>) — | |
| Name of the reference PEFT adapter, when using LoRA with multiple adapters.`,name:"ref_adapter_name"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/experimental/bco/bco_trainer.py#L349"}}),Y=new ge({props:{name:"train",anchor:"trl.experimental.bco.BCOTrainer.train",parameters:[{name:"resume_from_checkpoint",val:": str | bool | None = None"},{name:"trial",val:": optuna.Trial | dict[str, Any] | None = None"},{name:"ignore_keys_for_eval",val:": list[str] | None = None"}],parametersDescription:[{anchor:"trl.experimental.bco.BCOTrainer.train.resume_from_checkpoint",description:`<strong>resume_from_checkpoint</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| If a <code>str</code>, local path to a saved checkpoint as saved by a previous instance of <code>Trainer</code>. If a | |
| <code>bool</code> and equals <code>True</code>, load the last checkpoint in <em>args.output_dir</em> as saved by a previous instance | |
| of <code>Trainer</code>. If present, training will resume from the model/optimizer/scheduler states loaded here.`,name:"resume_from_checkpoint"},{anchor:"trl.experimental.bco.BCOTrainer.train.trial",description:`<strong>trial</strong> (<code>optuna.Trial</code> or <code>dict[str, Any]</code>, <em>optional</em>) — | |
| The trial run or the hyperparameter dictionary for hyperparameter search.`,name:"trial"},{anchor:"trl.experimental.bco.BCOTrainer.train.ignore_keys_for_eval",description:`<strong>ignore_keys_for_eval</strong> (<code>list[str]</code>, <em>optional</em>) — | |
| A list of keys in the output of your model (if it is a dictionary) that should be ignored when | |
| gathering predictions for evaluation during the training.`,name:"ignore_keys_for_eval"}],source:"https://github.com/huggingface/trl/blob/vr_5607/transformers/trainer.py#L1323",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>Object containing the global step count, training loss, and metrics.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>~trainer_utils.TrainOutput</code></p> | |
| `}}),q=new ge({props:{name:"save_model",anchor:"trl.experimental.bco.BCOTrainer.save_model",parameters:[{name:"output_dir",val:": str | None = None"},{name:"_internal_call",val:": bool = False"}],source:"https://github.com/huggingface/trl/blob/vr_5607/transformers/trainer.py#L3746"}}),K=new ge({props:{name:"push_to_hub",anchor:"trl.experimental.bco.BCOTrainer.push_to_hub",parameters:[{name:"commit_message",val:": str | None = 'End of training'"},{name:"blocking",val:": bool = True"},{name:"token",val:": str | None = None"},{name:"revision",val:": str | None = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"trl.experimental.bco.BCOTrainer.push_to_hub.commit_message",description:`<strong>commit_message</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"End of training"</code>) — | |
| Message to commit while pushing.`,name:"commit_message"},{anchor:"trl.experimental.bco.BCOTrainer.push_to_hub.blocking",description:`<strong>blocking</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether the function should return only when the <code>git push</code> has finished.`,name:"blocking"},{anchor:"trl.experimental.bco.BCOTrainer.push_to_hub.token",description:`<strong>token</strong> (<code>str</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Token with write permission to overwrite Trainer’s original args.`,name:"token"},{anchor:"trl.experimental.bco.BCOTrainer.push_to_hub.revision",description:`<strong>revision</strong> (<code>str</code>, <em>optional</em>) — | |
| The git revision to commit from. Defaults to the head of the “main” branch.`,name:"revision"},{anchor:"trl.experimental.bco.BCOTrainer.push_to_hub.kwargs",description:`<strong>kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) — | |
| Additional keyword arguments passed along to <code>~Trainer.create_model_card</code>.`,name:"kwargs"}],source:"https://github.com/huggingface/trl/blob/vr_5607/transformers/trainer.py#L3993",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The URL of the repository where the model was pushed if <code>blocking=False</code>, or a <code>Future</code> object tracking the | |
| progress of the commit if <code>blocking=True</code>.</p> | |
| `}}),ee=new N({props:{title:"BCOConfig",local:"trl.experimental.bco.BCOConfig",headingTag:"h2"}}),te=new ge({props:{name:"class trl.experimental.bco.BCOConfig",anchor:"trl.experimental.bco.BCOConfig",parameters:[{name:"output_dir",val:": str | None = None"},{name:"per_device_train_batch_size",val:": int = 8"},{name:"num_train_epochs",val:": float = 3.0"},{name:"max_steps",val:": int = -1"},{name:"learning_rate",val:": float = 5e-07"},{name:"lr_scheduler_type",val:": transformers.trainer_utils.SchedulerType | str = 'linear'"},{name:"lr_scheduler_kwargs",val:": dict | str | None = None"},{name:"warmup_steps",val:": float = 0"},{name:"optim",val:": transformers.training_args.OptimizerNames | str = 'adamw_torch_fused'"},{name:"optim_args",val:": str | None = None"},{name:"weight_decay",val:": float = 0.0"},{name:"adam_beta1",val:": float = 0.9"},{name:"adam_beta2",val:": float = 0.999"},{name:"adam_epsilon",val:": float = 1e-08"},{name:"optim_target_modules",val:": None | str | list[str] = None"},{name:"gradient_accumulation_steps",val:": int = 1"},{name:"average_tokens_across_devices",val:": bool = True"},{name:"max_grad_norm",val:": float = 1.0"},{name:"label_smoothing_factor",val:": float = 0.0"},{name:"bf16",val:": bool | None = None"},{name:"fp16",val:": bool = False"},{name:"bf16_full_eval",val:": bool = False"},{name:"fp16_full_eval",val:": bool = False"},{name:"tf32",val:": bool | None = None"},{name:"gradient_checkpointing",val:": bool = True"},{name:"gradient_checkpointing_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"torch_compile",val:": bool = False"},{name:"torch_compile_backend",val:": str | None = None"},{name:"torch_compile_mode",val:": str | None = None"},{name:"use_liger_kernel",val:": bool = False"},{name:"liger_kernel_config",val:": dict[str, bool] | None = None"},{name:"use_cache",val:": bool = False"},{name:"neftune_noise_alpha",val:": float | None = None"},{name:"torch_empty_cache_steps",val:": int | None = None"},{name:"auto_find_batch_size",val:": bool = False"},{name:"logging_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'steps'"},{name:"logging_steps",val:": float = 10"},{name:"logging_first_step",val:": bool = False"},{name:"log_on_each_node",val:": bool = True"},{name:"logging_nan_inf_filter",val:": bool = True"},{name:"include_num_input_tokens_seen",val:": str | bool = 'no'"},{name:"log_level",val:": str = 'passive'"},{name:"log_level_replica",val:": str = 'warning'"},{name:"disable_tqdm",val:": bool | None = None"},{name:"report_to",val:": None | str | list[str] = 'none'"},{name:"run_name",val:": str | None = None"},{name:"project",val:": str = 'huggingface'"},{name:"trackio_space_id",val:": str | None = 'trackio'"},{name:"eval_strategy",val:": transformers.trainer_utils.IntervalStrategy | str = 'no'"},{name:"eval_steps",val:": float | None = None"},{name:"eval_delay",val:": float = 0"},{name:"per_device_eval_batch_size",val:": int = 8"},{name:"prediction_loss_only",val:": bool = False"},{name:"eval_on_start",val:": bool = False"},{name:"eval_do_concat_batches",val:": bool = True"},{name:"eval_use_gather_object",val:": bool = False"},{name:"eval_accumulation_steps",val:": int | None = None"},{name:"include_for_metrics",val:": list = <factory>"},{name:"batch_eval_metrics",val:": bool = False"},{name:"save_only_model",val:": bool = False"},{name:"save_strategy",val:": transformers.trainer_utils.SaveStrategy | str = 'steps'"},{name:"save_steps",val:": float = 500"},{name:"save_on_each_node",val:": bool = False"},{name:"save_total_limit",val:": int | None = None"},{name:"enable_jit_checkpoint",val:": bool = False"},{name:"push_to_hub",val:": bool = False"},{name:"hub_token",val:": str | None = None"},{name:"hub_private_repo",val:": bool | None = None"},{name:"hub_model_id",val:": str | None = None"},{name:"hub_strategy",val:": transformers.trainer_utils.HubStrategy | str = 'every_save'"},{name:"hub_always_push",val:": bool = False"},{name:"hub_revision",val:": str | None = None"},{name:"load_best_model_at_end",val:": bool = False"},{name:"metric_for_best_model",val:": str | None = None"},{name:"greater_is_better",val:": bool | None = None"},{name:"ignore_data_skip",val:": bool = False"},{name:"restore_callback_states_from_checkpoint",val:": bool = False"},{name:"full_determinism",val:": bool = False"},{name:"seed",val:": int = 42"},{name:"data_seed",val:": int | None = None"},{name:"use_cpu",val:": bool = False"},{name:"accelerator_config",val:": dict | str | None = None"},{name:"parallelism_config",val:": accelerate.parallelism_config.ParallelismConfig | None = None"},{name:"dataloader_drop_last",val:": bool = False"},{name:"dataloader_num_workers",val:": int = 0"},{name:"dataloader_pin_memory",val:": bool = True"},{name:"dataloader_persistent_workers",val:": bool = False"},{name:"dataloader_prefetch_factor",val:": int | None = None"},{name:"remove_unused_columns",val:": bool = True"},{name:"label_names",val:": list[str] | None = None"},{name:"train_sampling_strategy",val:": str = 'random'"},{name:"length_column_name",val:": str = 'length'"},{name:"ddp_find_unused_parameters",val:": bool | None = None"},{name:"ddp_bucket_cap_mb",val:": int | None = None"},{name:"ddp_broadcast_buffers",val:": bool | None = None"},{name:"ddp_backend",val:": str | None = None"},{name:"ddp_timeout",val:": int = 1800"},{name:"fsdp",val:": list[transformers.trainer_utils.FSDPOption] | str | None = None"},{name:"fsdp_config",val:": dict[str, typing.Any] | str | None = None"},{name:"deepspeed",val:": dict | str | None = None"},{name:"debug",val:": str | list[transformers.debug_utils.DebugOption] = ''"},{name:"skip_memory_metrics",val:": bool = True"},{name:"do_train",val:": bool = False"},{name:"do_eval",val:": bool = False"},{name:"do_predict",val:": bool = False"},{name:"resume_from_checkpoint",val:": str | None = None"},{name:"warmup_ratio",val:": float | None = None"},{name:"logging_dir",val:": str | None = None"},{name:"local_rank",val:": int = -1"},{name:"max_length",val:": int | None = 1024"},{name:"max_completion_length",val:": int | None = None"},{name:"beta",val:": float = 0.1"},{name:"disable_dropout",val:": bool = True"},{name:"generate_during_eval",val:": bool = False"},{name:"is_encoder_decoder",val:": bool | None = None"},{name:"precompute_ref_log_probs",val:": bool = False"},{name:"model_init_kwargs",val:": dict[str, typing.Any] | str | None = None"},{name:"dataset_num_proc",val:": int | None = None"},{name:"prompt_sample_size",val:": int = 1024"},{name:"min_density_ratio",val:": float = 0.5"},{name:"max_density_ratio",val:": float = 10.0"}],parametersDescription:[{anchor:"trl.experimental.bco.BCOConfig.max_length",description:`<strong>max_length</strong> (<code>int</code> or <code>None</code>, <em>optional</em>, defaults to <code>1024</code>) — | |
| Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want | |
| to use the default data collator.`,name:"max_length"},{anchor:"trl.experimental.bco.BCOConfig.max_completion_length",description:`<strong>max_completion_length</strong> (<code>int</code>, <em>optional</em>) — | |
| Maximum length of the completion. This argument is required if you want to use the default data collator | |
| and your model is an encoder-decoder.`,name:"max_completion_length"},{anchor:"trl.experimental.bco.BCOConfig.beta",description:`<strong>beta</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.1</code>) — | |
| Parameter controlling the deviation from the reference model. Higher β means less deviation from the | |
| reference model.`,name:"beta"},{anchor:"trl.experimental.bco.BCOConfig.disable_dropout",description:`<strong>disable_dropout</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether to disable dropout in the model and reference model.`,name:"disable_dropout"},{anchor:"trl.experimental.bco.BCOConfig.generate_during_eval",description:`<strong>generate_during_eval</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| If <code>True</code>, generates and logs completions from both the model and the reference model to W&B or Comet | |
| during evaluation.`,name:"generate_during_eval"},{anchor:"trl.experimental.bco.BCOConfig.is_encoder_decoder",description:`<strong>is_encoder_decoder</strong> (<code>bool</code>, <em>optional</em>) — | |
| When using the <code>model_init</code> argument (callable) to instantiate the model instead of the <code>model</code> argument, | |
| you need to specify if the model returned by the callable is an encoder-decoder model.`,name:"is_encoder_decoder"},{anchor:"trl.experimental.bco.BCOConfig.precompute_ref_log_probs",description:`<strong>precompute_ref_log_probs</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to precompute reference model log probabilities for training and evaluation datasets. This is | |
| useful when training without the reference model to reduce the total GPU memory needed.`,name:"precompute_ref_log_probs"},{anchor:"trl.experimental.bco.BCOConfig.model_init_kwargs",description:`<strong>model_init_kwargs</strong> (<code>dict[str, Any]</code>, <em>optional</em>) — | |
| Keyword arguments to pass to <code>AutoModelForCausalLM.from_pretrained</code> when instantiating the model and | |
| reference model from strings.`,name:"model_init_kwargs"},{anchor:"trl.experimental.bco.BCOConfig.dataset_num_proc",description:`<strong>dataset_num_proc</strong> (<code>int</code>, <em>optional</em>) — | |
| Number of processes to use for processing the dataset.`,name:"dataset_num_proc"},{anchor:"trl.experimental.bco.BCOConfig.prompt_sample_size",description:`<strong>prompt_sample_size</strong> (<code>int</code>, <em>optional</em>, defaults to <code>1024</code>) — | |
| Number of prompts that are fed to density ratio classifier.`,name:"prompt_sample_size"},{anchor:"trl.experimental.bco.BCOConfig.min_density_ratio",description:`<strong>min_density_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>0.5</code>) — | |
| Minimum value of the density ratio. The estimated density ratio is clamped to this value.`,name:"min_density_ratio"},{anchor:"trl.experimental.bco.BCOConfig.max_density_ratio",description:`<strong>max_density_ratio</strong> (<code>float</code>, <em>optional</em>, defaults to <code>10.0</code>) — | |
| Maximum value of the density ratio. The estimated density ratio is clamped to this value.`,name:"max_density_ratio"}],source:"https://github.com/huggingface/trl/blob/vr_5607/trl/experimental/bco/bco_config.py#L22"}}),ne=new Pt({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/bco_trainer.md"}}),{c(){y=l("meta"),he=o(),ue=l("p"),be=o(),d(M.$$.fragment),ve=o(),d($.$$.fragment),Te=o(),k=l("p"),k.innerHTML=dt,ye=o(),B=l("p"),B.innerHTML=ct,xe=o(),d(J.$$.fragment),we=o(),O=l("p"),O.innerHTML=pt,Ce=o(),d(U.$$.fragment),Ne=o(),Z=l("p"),Z.innerHTML=ft,Me=o(),d(I.$$.fragment),$e=o(),F=l("p"),F.innerHTML=ut,ke=o(),H=l("p"),H.innerHTML=_t,Be=o(),d(W.$$.fragment),Je=o(),P=l("p"),P.textContent=gt,Oe=o(),d(z.$$.fragment),Ue=o(),d(E.$$.fragment),Ze=o(),L=l("p"),L.innerHTML=ht,Ie=o(),V=l("p"),V.textContent=bt,Fe=o(),d(G.$$.fragment),He=o(),j=l("p"),j.innerHTML=vt,We=o(),d(X.$$.fragment),Pe=o(),d(A.$$.fragment),ze=o(),D=l("p"),D.innerHTML=Tt,Ee=o(),R=l("p"),R.innerHTML=yt,Le=o(),d(Q.$$.fragment),Ve=o(),g=l("div"),d(S.$$.fragment),Ye=o(),re=l("p"),re.innerHTML=xt,qe=o(),x=l("div"),d(Y.$$.fragment),Ke=o(),le=l("p"),le.textContent=wt,et=o(),b=l("div"),d(q.$$.fragment),tt=o(),ie=l("p"),ie.innerHTML=Ct,at=o(),se=l("p"),se.textContent=Nt,nt=o(),w=l("div"),d(K.$$.fragment),ot=o(),me=l("p"),me.innerHTML=Mt,Ge=o(),d(ee.$$.fragment),je=o(),h=l("div"),d(te.$$.fragment),rt=o(),de=l("p"),de.innerHTML=$t,lt=o(),ce=l("p"),ce.innerHTML=kt,it=o(),pe=l("p"),pe.innerHTML=Bt,st=o(),ae=l("blockquote"),ae.innerHTML=Jt,Xe=o(),d(ne.$$.fragment),Ae=o(),_e=l("p"),this.h()},l(e){const t=Ht("svelte-u9bgzb",document.head);y=i(t,"META",{name:!0,content:!0}),t.forEach(a),he=r(e),ue=i(e,"P",{}),oe(ue).forEach(a),be=r(e),c(M.$$.fragment,e),ve=r(e),c($.$$.fragment,e),Te=r(e),k=i(e,"P",{"data-svelte-h":!0}),m(k)!=="svelte-1fj387o"&&(k.innerHTML=dt),ye=r(e),B=i(e,"P",{"data-svelte-h":!0}),m(B)!=="svelte-r130jr"&&(B.innerHTML=ct),xe=r(e),c(J.$$.fragment,e),we=r(e),O=i(e,"P",{"data-svelte-h":!0}),m(O)!=="svelte-kx3wyv"&&(O.innerHTML=pt),Ce=r(e),c(U.$$.fragment,e),Ne=r(e),Z=i(e,"P",{"data-svelte-h":!0}),m(Z)!=="svelte-a13xh1"&&(Z.innerHTML=ft),Me=r(e),c(I.$$.fragment,e),$e=r(e),F=i(e,"P",{"data-svelte-h":!0}),m(F)!=="svelte-10dhm8g"&&(F.innerHTML=ut),ke=r(e),H=i(e,"P",{"data-svelte-h":!0}),m(H)!=="svelte-1eil2tw"&&(H.innerHTML=_t),Be=r(e),c(W.$$.fragment,e),Je=r(e),P=i(e,"P",{"data-svelte-h":!0}),m(P)!=="svelte-1lg6q1l"&&(P.textContent=gt),Oe=r(e),c(z.$$.fragment,e),Ue=r(e),c(E.$$.fragment,e),Ze=r(e),L=i(e,"P",{"data-svelte-h":!0}),m(L)!=="svelte-16kpfu0"&&(L.innerHTML=ht),Ie=r(e),V=i(e,"P",{"data-svelte-h":!0}),m(V)!=="svelte-1hlwkqm"&&(V.textContent=bt),Fe=r(e),c(G.$$.fragment,e),He=r(e),j=i(e,"P",{"data-svelte-h":!0}),m(j)!=="svelte-zt3y0f"&&(j.innerHTML=vt),We=r(e),c(X.$$.fragment,e),Pe=r(e),c(A.$$.fragment,e),ze=r(e),D=i(e,"P",{"data-svelte-h":!0}),m(D)!=="svelte-3kfva2"&&(D.innerHTML=Tt),Ee=r(e),R=i(e,"P",{"data-svelte-h":!0}),m(R)!=="svelte-1c01yo3"&&(R.innerHTML=yt),Le=r(e),c(Q.$$.fragment,e),Ve=r(e),g=i(e,"DIV",{class:!0});var v=oe(g);c(S.$$.fragment,v),Ye=r(v),re=i(v,"P",{"data-svelte-h":!0}),m(re)!=="svelte-13v1u5r"&&(re.innerHTML=xt),qe=r(v),x=i(v,"DIV",{class:!0});var Re=oe(x);c(Y.$$.fragment,Re),Ke=r(Re),le=i(Re,"P",{"data-svelte-h":!0}),m(le)!=="svelte-1cilnet"&&(le.textContent=wt),Re.forEach(a),et=r(v),b=i(v,"DIV",{class:!0});var fe=oe(b);c(q.$$.fragment,fe),tt=r(fe),ie=i(fe,"P",{"data-svelte-h":!0}),m(ie)!=="svelte-r8h4ov"&&(ie.innerHTML=Ct),at=r(fe),se=i(fe,"P",{"data-svelte-h":!0}),m(se)!=="svelte-1e6bius"&&(se.textContent=Nt),fe.forEach(a),nt=r(v),w=i(v,"DIV",{class:!0});var Qe=oe(w);c(K.$$.fragment,Qe),ot=r(Qe),me=i(Qe,"P",{"data-svelte-h":!0}),m(me)!=="svelte-8tudwd"&&(me.innerHTML=Mt),Qe.forEach(a),v.forEach(a),Ge=r(e),c(ee.$$.fragment,e),je=r(e),h=i(e,"DIV",{class:!0});var T=oe(h);c(te.$$.fragment,T),rt=r(T),de=i(T,"P",{"data-svelte-h":!0}),m(de)!=="svelte-8pg1iq"&&(de.innerHTML=$t),lt=r(T),ce=i(T,"P",{"data-svelte-h":!0}),m(ce)!=="svelte-1u5i008"&&(ce.innerHTML=kt),it=r(T),pe=i(T,"P",{"data-svelte-h":!0}),m(pe)!=="svelte-ekuf1t"&&(pe.innerHTML=Bt),st=r(T),ae=i(T,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),m(ae)!=="svelte-17fw90z"&&(ae.innerHTML=Jt),T.forEach(a),Xe=r(e),c(ne.$$.fragment,e),Ae=r(e),_e=i(e,"P",{}),oe(_e).forEach(a),this.h()},h(){C(y,"name","hf:doc:metadata"),C(y,"content",Et),C(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(g,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(ae,"class","note"),C(h,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){s(document.head,y),n(e,he,t),n(e,ue,t),n(e,be,t),p(M,e,t),n(e,ve,t),p($,e,t),n(e,Te,t),n(e,k,t),n(e,ye,t),n(e,B,t),n(e,xe,t),p(J,e,t),n(e,we,t),n(e,O,t),n(e,Ce,t),p(U,e,t),n(e,Ne,t),n(e,Z,t),n(e,Me,t),p(I,e,t),n(e,$e,t),n(e,F,t),n(e,ke,t),n(e,H,t),n(e,Be,t),p(W,e,t),n(e,Je,t),n(e,P,t),n(e,Oe,t),p(z,e,t),n(e,Ue,t),p(E,e,t),n(e,Ze,t),n(e,L,t),n(e,Ie,t),n(e,V,t),n(e,Fe,t),p(G,e,t),n(e,He,t),n(e,j,t),n(e,We,t),p(X,e,t),n(e,Pe,t),p(A,e,t),n(e,ze,t),n(e,D,t),n(e,Ee,t),n(e,R,t),n(e,Le,t),p(Q,e,t),n(e,Ve,t),n(e,g,t),p(S,g,null),s(g,Ye),s(g,re),s(g,qe),s(g,x),p(Y,x,null),s(x,Ke),s(x,le),s(g,et),s(g,b),p(q,b,null),s(b,tt),s(b,ie),s(b,at),s(b,se),s(g,nt),s(g,w),p(K,w,null),s(w,ot),s(w,me),n(e,Ge,t),p(ee,e,t),n(e,je,t),n(e,h,t),p(te,h,null),s(h,rt),s(h,de),s(h,lt),s(h,ce),s(h,it),s(h,pe),s(h,st),s(h,ae),n(e,Xe,t),p(ne,e,t),n(e,Ae,t),n(e,_e,t),De=!0},p:Ut,i(e){De||(f(M.$$.fragment,e),f($.$$.fragment,e),f(J.$$.fragment,e),f(U.$$.fragment,e),f(I.$$.fragment,e),f(W.$$.fragment,e),f(z.$$.fragment,e),f(E.$$.fragment,e),f(G.$$.fragment,e),f(X.$$.fragment,e),f(A.$$.fragment,e),f(Q.$$.fragment,e),f(S.$$.fragment,e),f(Y.$$.fragment,e),f(q.$$.fragment,e),f(K.$$.fragment,e),f(ee.$$.fragment,e),f(te.$$.fragment,e),f(ne.$$.fragment,e),De=!0)},o(e){u(M.$$.fragment,e),u($.$$.fragment,e),u(J.$$.fragment,e),u(U.$$.fragment,e),u(I.$$.fragment,e),u(W.$$.fragment,e),u(z.$$.fragment,e),u(E.$$.fragment,e),u(G.$$.fragment,e),u(X.$$.fragment,e),u(A.$$.fragment,e),u(Q.$$.fragment,e),u(S.$$.fragment,e),u(Y.$$.fragment,e),u(q.$$.fragment,e),u(K.$$.fragment,e),u(ee.$$.fragment,e),u(te.$$.fragment,e),u(ne.$$.fragment,e),De=!1},d(e){e&&(a(he),a(ue),a(be),a(ve),a(Te),a(k),a(ye),a(B),a(xe),a(we),a(O),a(Ce),a(Ne),a(Z),a(Me),a($e),a(F),a(ke),a(H),a(Be),a(Je),a(P),a(Oe),a(Ue),a(Ze),a(L),a(Ie),a(V),a(Fe),a(He),a(j),a(We),a(Pe),a(ze),a(D),a(Ee),a(R),a(Le),a(Ve),a(g),a(Ge),a(je),a(h),a(Xe),a(Ae),a(_e)),a(y),_(M,e),_($,e),_(J,e),_(U,e),_(I,e),_(W,e),_(z,e),_(E,e),_(G,e),_(X,e),_(A,e),_(Q,e),_(S),_(Y),_(q),_(K),_(ee,e),_(te),_(ne,e)}}}const Et='{"title":"BCO Trainer","local":"bco-trainer","sections":[{"title":"Expected dataset type","local":"expected-dataset-type","sections":[],"depth":2},{"title":"Expected model format","local":"expected-model-format","sections":[],"depth":2},{"title":"Using the BCOTrainer","local":"using-the-bcotrainer","sections":[],"depth":2},{"title":"Underlying Distribution matching (UDM)","local":"underlying-distribution-matching-udm","sections":[{"title":"For Mixture of Experts Models: Enabling the auxiliary loss","local":"for-mixture-of-experts-models-enabling-the-auxiliary-loss","sections":[],"depth":3}],"depth":2},{"title":"BCOTrainer","local":"trl.experimental.bco.BCOTrainer","sections":[],"depth":2},{"title":"BCOConfig","local":"trl.experimental.bco.BCOConfig","sections":[],"depth":2}],"depth":1}';function Lt(mt){return Zt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Dt extends It{constructor(y){super(),Ft(this,y,Lt,zt,Ot,{})}}export{Dt as component}; | |
Xet Storage Details
- Size:
- 40.5 kB
- Xet hash:
- 179321dcff4bf767cc6b0b796d495ce6fec5713cf1699c6a67a5f73615a49b13
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.