trl-mcsd / docs /source /_toctree.yml
ihbkaiser's picture
Implement MCSD for experimental SDPO
1fa3c6c verified
- sections:
- local: index
title: TRL
- local: installation
title: Installation
- local: quickstart
title: Quickstart
title: Getting started
- sections:
- local: chat_templates
title: Chat Templates
- local: dataset_formats
title: Dataset Formats
- local: paper_index
title: Paper Index
title: Conceptual Guides
- sections: # Sorted alphabetically
- local: dpo_trainer
title: DPO
- local: grpo_trainer
title: GRPO
- local: reward_trainer
title: Reward
- local: rloo_trainer
title: RLOO
- local: sft_trainer
title: SFT
title: Trainers
- sections:
- local: clis
title: Command Line Interface (CLI)
- local: jobs_training
title: Training using Jobs
- local: customization
title: Customizing the Training
- local: reducing_memory_usage
title: Reducing Memory Usage
- local: speeding_up_training
title: Speeding Up Training
- local: distributing_training
title: Distributing Training
- local: use_model
title: Using Trained Models
title: How-to guides
- sections:
- local: deepspeed_integration
title: DeepSpeed
- local: kernels_hub
title: Kernels Hub
- local: liger_kernel_integration
title: Liger Kernel
- local: openenv
title: OpenEnv
- local: peft_integration
title: PEFT
- local: ptt_integration
title: Post Training Toolkit
- local: rapidfire_integration
title: RapidFire AI
- local: trackio_integration
title: Trackio
- local: unsloth_integration
title: Unsloth
- local: vllm_integration
title: vLLM
title: Integrations
- sections:
- local: example_overview
title: Example Overview
- local: community_tutorials
title: Community Tutorials
- local: lora_without_regret
title: LoRA Without Regret
title: Examples
- sections:
- sections:
- local: chat_template_utils
title: Chat Template Utilities
- local: data_utils
title: Data Utilities
- local: script_utils
title: Script Utilities
title: Utilities
- local: callbacks
title: Callbacks
- local: rewards
title: Reward Functions
title: API
- sections:
- local: experimental_overview
title: Experimental Overview
- local: async_grpo_trainer # Sorted alphabetically
title: Asynchronous GRPO
- local: bema_for_reference_model
title: BEMA for Reference Model
- local: bco_trainer
title: BCO
- local: cpo_trainer
title: CPO
- local: distillation_trainer
title: Distillation
- local: gfpo
title: GFPO
- local: gkd_trainer
title: GKD
- local: gold_trainer
title: GOLD
- local: grpo_with_replay_buffer
title: GRPO With Replay Buffer
- local: gspo_token
title: GSPO-token
- local: kto_trainer
title: KTO
- local: merge_model_callback
title: MergeModelCallback
- local: minillm_trainer
title: MiniLLM
- local: nash_md_trainer
title: Nash-MD
- local: nemo_gym
title: NeMo Gym
- local: online_dpo_trainer
title: Online DPO
- local: orpo_trainer
title: ORPO
- local: papo_trainer
title: PAPO
- local: ppo_trainer
title: PPO
- local: prm_trainer
title: PRM
- local: sdft_trainer
title: SDFT
- local: sdpo_trainer
title: SDPO
- local: ssd_trainer
title: SSD
- local: tpo_trainer
title: TPO
- local: xpo_trainer
title: XPO
title: Experimental