ihbkaiser
/

trl-mcsd

Model card Files Files and versions

trl-mcsd / docs /source /_toctree.yml

ihbkaiser's picture

Implement MCSD for experimental SDPO

1fa3c6c verified 26 days ago

history blame contribute delete

3.53 kB

	- sections:
	- local: index
	title: TRL
	- local: installation
	title: Installation
	- local: quickstart
	title: Quickstart
	title: Getting started
	- sections:
	- local: chat_templates
	title: Chat Templates
	- local: dataset_formats
	title: Dataset Formats
	- local: paper_index
	title: Paper Index
	title: Conceptual Guides
	- sections: # Sorted alphabetically
	- local: dpo_trainer
	title: DPO
	- local: grpo_trainer
	title: GRPO
	- local: reward_trainer
	title: Reward
	- local: rloo_trainer
	title: RLOO
	- local: sft_trainer
	title: SFT
	title: Trainers
	- sections:
	- local: clis
	title: Command Line Interface (CLI)
	- local: jobs_training
	title: Training using Jobs
	- local: customization
	title: Customizing the Training
	- local: reducing_memory_usage
	title: Reducing Memory Usage
	- local: speeding_up_training
	title: Speeding Up Training
	- local: distributing_training
	title: Distributing Training
	- local: use_model
	title: Using Trained Models
	title: How-to guides
	- sections:
	- local: deepspeed_integration
	title: DeepSpeed
	- local: kernels_hub
	title: Kernels Hub
	- local: liger_kernel_integration
	title: Liger Kernel
	- local: openenv
	title: OpenEnv
	- local: peft_integration
	title: PEFT
	- local: ptt_integration
	title: Post Training Toolkit
	- local: rapidfire_integration
	title: RapidFire AI
	- local: trackio_integration
	title: Trackio
	- local: unsloth_integration
	title: Unsloth
	- local: vllm_integration
	title: vLLM
	title: Integrations
	- sections:
	- local: example_overview
	title: Example Overview
	- local: community_tutorials
	title: Community Tutorials
	- local: lora_without_regret
	title: LoRA Without Regret
	title: Examples
	- sections:
	- sections:
	- local: chat_template_utils
	title: Chat Template Utilities
	- local: data_utils
	title: Data Utilities
	- local: script_utils
	title: Script Utilities
	title: Utilities
	- local: callbacks
	title: Callbacks
	- local: rewards
	title: Reward Functions
	title: API
	- sections:
	- local: experimental_overview
	title: Experimental Overview
	- local: async_grpo_trainer # Sorted alphabetically
	title: Asynchronous GRPO
	- local: bema_for_reference_model
	title: BEMA for Reference Model
	- local: bco_trainer
	title: BCO
	- local: cpo_trainer
	title: CPO
	- local: distillation_trainer
	title: Distillation
	- local: gfpo
	title: GFPO
	- local: gkd_trainer
	title: GKD
	- local: gold_trainer
	title: GOLD
	- local: grpo_with_replay_buffer
	title: GRPO With Replay Buffer
	- local: gspo_token
	title: GSPO-token
	- local: kto_trainer
	title: KTO
	- local: merge_model_callback
	title: MergeModelCallback
	- local: minillm_trainer
	title: MiniLLM
	- local: nash_md_trainer
	title: Nash-MD
	- local: nemo_gym
	title: NeMo Gym
	- local: online_dpo_trainer
	title: Online DPO
	- local: orpo_trainer
	title: ORPO
	- local: papo_trainer
	title: PAPO
	- local: ppo_trainer
	title: PPO
	- local: prm_trainer
	title: PRM
	- local: sdft_trainer
	title: SDFT
	- local: sdpo_trainer
	title: SDPO
	- local: ssd_trainer
	title: SSD
	- local: tpo_trainer
	title: TPO
	- local: xpo_trainer
	title: XPO
	title: Experimental