File size: 3,525 Bytes
1fa3c6c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | - sections:
- local: index
title: TRL
- local: installation
title: Installation
- local: quickstart
title: Quickstart
title: Getting started
- sections:
- local: chat_templates
title: Chat Templates
- local: dataset_formats
title: Dataset Formats
- local: paper_index
title: Paper Index
title: Conceptual Guides
- sections: # Sorted alphabetically
- local: dpo_trainer
title: DPO
- local: grpo_trainer
title: GRPO
- local: reward_trainer
title: Reward
- local: rloo_trainer
title: RLOO
- local: sft_trainer
title: SFT
title: Trainers
- sections:
- local: clis
title: Command Line Interface (CLI)
- local: jobs_training
title: Training using Jobs
- local: customization
title: Customizing the Training
- local: reducing_memory_usage
title: Reducing Memory Usage
- local: speeding_up_training
title: Speeding Up Training
- local: distributing_training
title: Distributing Training
- local: use_model
title: Using Trained Models
title: How-to guides
- sections:
- local: deepspeed_integration
title: DeepSpeed
- local: kernels_hub
title: Kernels Hub
- local: liger_kernel_integration
title: Liger Kernel
- local: openenv
title: OpenEnv
- local: peft_integration
title: PEFT
- local: ptt_integration
title: Post Training Toolkit
- local: rapidfire_integration
title: RapidFire AI
- local: trackio_integration
title: Trackio
- local: unsloth_integration
title: Unsloth
- local: vllm_integration
title: vLLM
title: Integrations
- sections:
- local: example_overview
title: Example Overview
- local: community_tutorials
title: Community Tutorials
- local: lora_without_regret
title: LoRA Without Regret
title: Examples
- sections:
- sections:
- local: chat_template_utils
title: Chat Template Utilities
- local: data_utils
title: Data Utilities
- local: script_utils
title: Script Utilities
title: Utilities
- local: callbacks
title: Callbacks
- local: rewards
title: Reward Functions
title: API
- sections:
- local: experimental_overview
title: Experimental Overview
- local: async_grpo_trainer # Sorted alphabetically
title: Asynchronous GRPO
- local: bema_for_reference_model
title: BEMA for Reference Model
- local: bco_trainer
title: BCO
- local: cpo_trainer
title: CPO
- local: distillation_trainer
title: Distillation
- local: gfpo
title: GFPO
- local: gkd_trainer
title: GKD
- local: gold_trainer
title: GOLD
- local: grpo_with_replay_buffer
title: GRPO With Replay Buffer
- local: gspo_token
title: GSPO-token
- local: kto_trainer
title: KTO
- local: merge_model_callback
title: MergeModelCallback
- local: minillm_trainer
title: MiniLLM
- local: nash_md_trainer
title: Nash-MD
- local: nemo_gym
title: NeMo Gym
- local: online_dpo_trainer
title: Online DPO
- local: orpo_trainer
title: ORPO
- local: papo_trainer
title: PAPO
- local: ppo_trainer
title: PPO
- local: prm_trainer
title: PRM
- local: sdft_trainer
title: SDFT
- local: sdpo_trainer
title: SDPO
- local: ssd_trainer
title: SSD
- local: tpo_trainer
title: TPO
- local: xpo_trainer
title: XPO
title: Experimental
|