| - sections:
|
| - local: index
|
| title: TRL
|
| - local: installation
|
| title: Installation
|
| - local: quickstart
|
| title: Quickstart
|
| title: Getting started
|
| - sections:
|
| - local: chat_templates
|
| title: Chat Templates
|
| - local: dataset_formats
|
| title: Dataset Formats
|
| - local: paper_index
|
| title: Paper Index
|
| title: Conceptual Guides
|
| - sections:
|
| - local: dpo_trainer
|
| title: DPO
|
| - local: grpo_trainer
|
| title: GRPO
|
| - local: reward_trainer
|
| title: Reward
|
| - local: rloo_trainer
|
| title: RLOO
|
| - local: sft_trainer
|
| title: SFT
|
| title: Trainers
|
| - sections:
|
| - local: clis
|
| title: Command Line Interface (CLI)
|
| - local: jobs_training
|
| title: Training using Jobs
|
| - local: customization
|
| title: Customizing the Training
|
| - local: reducing_memory_usage
|
| title: Reducing Memory Usage
|
| - local: speeding_up_training
|
| title: Speeding Up Training
|
| - local: distributing_training
|
| title: Distributing Training
|
| - local: use_model
|
| title: Using Trained Models
|
| title: How-to guides
|
| - sections:
|
| - local: deepspeed_integration
|
| title: DeepSpeed
|
| - local: kernels_hub
|
| title: Kernels Hub
|
| - local: liger_kernel_integration
|
| title: Liger Kernel
|
| - local: openenv
|
| title: OpenEnv
|
| - local: peft_integration
|
| title: PEFT
|
| - local: ptt_integration
|
| title: Post Training Toolkit
|
| - local: rapidfire_integration
|
| title: RapidFire AI
|
| - local: trackio_integration
|
| title: Trackio
|
| - local: unsloth_integration
|
| title: Unsloth
|
| - local: vllm_integration
|
| title: vLLM
|
| title: Integrations
|
| - sections:
|
| - local: example_overview
|
| title: Example Overview
|
| - local: community_tutorials
|
| title: Community Tutorials
|
| - local: lora_without_regret
|
| title: LoRA Without Regret
|
| title: Examples
|
| - sections:
|
| - sections:
|
| - local: chat_template_utils
|
| title: Chat Template Utilities
|
| - local: data_utils
|
| title: Data Utilities
|
| - local: script_utils
|
| title: Script Utilities
|
| title: Utilities
|
| - local: callbacks
|
| title: Callbacks
|
| - local: rewards
|
| title: Reward Functions
|
| title: API
|
| - sections:
|
| - local: experimental_overview
|
| title: Experimental Overview
|
| - local: async_grpo_trainer
|
| title: Asynchronous GRPO
|
| - local: bema_for_reference_model
|
| title: BEMA for Reference Model
|
| - local: bco_trainer
|
| title: BCO
|
| - local: cpo_trainer
|
| title: CPO
|
| - local: distillation_trainer
|
| title: Distillation
|
| - local: gfpo
|
| title: GFPO
|
| - local: gkd_trainer
|
| title: GKD
|
| - local: gold_trainer
|
| title: GOLD
|
| - local: grpo_with_replay_buffer
|
| title: GRPO With Replay Buffer
|
| - local: gspo_token
|
| title: GSPO-token
|
| - local: kto_trainer
|
| title: KTO
|
| - local: merge_model_callback
|
| title: MergeModelCallback
|
| - local: minillm_trainer
|
| title: MiniLLM
|
| - local: nash_md_trainer
|
| title: Nash-MD
|
| - local: nemo_gym
|
| title: NeMo Gym
|
| - local: online_dpo_trainer
|
| title: Online DPO
|
| - local: orpo_trainer
|
| title: ORPO
|
| - local: papo_trainer
|
| title: PAPO
|
| - local: ppo_trainer
|
| title: PPO
|
| - local: prm_trainer
|
| title: PRM
|
| - local: sdft_trainer
|
| title: SDFT
|
| - local: sdpo_trainer
|
| title: SDPO
|
| - local: ssd_trainer
|
| title: SSD
|
| - local: tpo_trainer
|
| title: TPO
|
| - local: xpo_trainer
|
| title: XPO
|
| title: Experimental
|
|
|