File size: 1,397 Bytes
fa4458a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
- sections:
  - local: index
    title: TRL
  - local: quickstart
    title: Quickstart
  - local: installation
    title: Installation
  - local: how_to_train
    title: PPO Training FAQ
  - local: use_model
    title: Use Trained Models
  - local: customization
    title: Customize the Training
  - local: logging
    title: Understanding Logs
  title: Get started
- sections:
  - local: models
    title: Model Classes
  - local: trainer
    title: Trainer Classes
  - local: reward_trainer
    title: Reward Model Training
  - local: sft_trainer
    title: Supervised Fine-Tuning
  - local: ppo_trainer
    title: PPO Trainer
  - local: best_of_n
    title: Best of N Sampling
  - local: dpo_trainer
    title: DPO Trainer
  - local: ddpo_trainer
    title: Denoising Diffusion Policy Optimization
  - local: iterative_sft_trainer
    title: Iterative Supervised Fine-Tuning
  - local: text_environments
    title: Text Environments
  title: API
- sections:
  - local: example_overview
    title: Example Overview
  - local: sentiment_tuning
    title: Sentiment Tuning
  - local: lora_tuning_peft
    title: Training with PEFT
  - local: detoxifying_a_lm
    title: Detoxifying a Language Model
  - local: using_llama_models
    title: Training StackLlama
  - local: learning_tools
    title: Learning to Use Tools
  - local: multi_adapter_rl
    title: Multi Adapter RLHF
  title: Examples