- sections: - local: index title: TRL - local: installation title: Installation - local: quickstart title: Quickstart title: Getting started - sections: - local: chat_templates title: Chat Templates - local: dataset_formats title: Dataset Formats - local: paper_index title: Paper Index title: Conceptual Guides - sections: # Sorted alphabetically - local: dpo_trainer title: DPO - local: grpo_trainer title: GRPO - local: reward_trainer title: Reward - local: rloo_trainer title: RLOO - local: sft_trainer title: SFT title: Trainers - sections: - local: clis title: Command Line Interface (CLI) - local: jobs_training title: Training using Jobs - local: customization title: Customizing the Training - local: reducing_memory_usage title: Reducing Memory Usage - local: speeding_up_training title: Speeding Up Training - local: distributing_training title: Distributing Training - local: use_model title: Using Trained Models title: How-to guides - sections: - local: deepspeed_integration title: DeepSpeed - local: kernels_hub title: Kernels Hub - local: liger_kernel_integration title: Liger Kernel - local: openenv title: OpenEnv - local: peft_integration title: PEFT - local: ptt_integration title: Post Training Toolkit - local: rapidfire_integration title: RapidFire AI - local: trackio_integration title: Trackio - local: unsloth_integration title: Unsloth - local: vllm_integration title: vLLM title: Integrations - sections: - local: example_overview title: Example Overview - local: community_tutorials title: Community Tutorials - local: lora_without_regret title: LoRA Without Regret title: Examples - sections: - sections: - local: chat_template_utils title: Chat Template Utilities - local: data_utils title: Data Utilities - local: script_utils title: Script Utilities title: Utilities - local: callbacks title: Callbacks - local: rewards title: Reward Functions title: API - sections: - local: experimental_overview title: Experimental Overview - local: async_grpo_trainer # Sorted alphabetically title: Asynchronous GRPO - local: bema_for_reference_model title: BEMA for Reference Model - local: bco_trainer title: BCO - local: cpo_trainer title: CPO - local: distillation_trainer title: Distillation - local: gfpo title: GFPO - local: gkd_trainer title: GKD - local: gold_trainer title: GOLD - local: grpo_with_replay_buffer title: GRPO With Replay Buffer - local: gspo_token title: GSPO-token - local: kto_trainer title: KTO - local: merge_model_callback title: MergeModelCallback - local: minillm_trainer title: MiniLLM - local: nash_md_trainer title: Nash-MD - local: nemo_gym title: NeMo Gym - local: online_dpo_trainer title: Online DPO - local: orpo_trainer title: ORPO - local: papo_trainer title: PAPO - local: ppo_trainer title: PPO - local: prm_trainer title: PRM - local: sdft_trainer title: SDFT - local: sdpo_trainer title: SDPO - local: ssd_trainer title: SSD - local: tpo_trainer title: TPO - local: xpo_trainer title: XPO title: Experimental