| _app | | | 366 items |
| _toctree.yml | 2.79 kB | | 2fbd9842 |
| alignprop_trainer.html | 25 kB | | 68eb8cf5 |
| bco_trainer.html | 178 kB | | 89d465ff |
| best_of_n.html | 15.7 kB | | 4d538907 |
| callbacks.html | 103 kB | | 07c0b6d1 |
| clis.html | 66.2 kB | | c62b80d8 |
| community_tutorials.html | 18.2 kB | | 589bd50d |
| cpo_trainer.html | 151 kB | | 7e0fedf7 |
| customization.html | 23.9 kB | | 60adb9b3 |
| data_utils.html | 158 kB | | 882bff74 |
| dataset_formats.html | 228 kB | | 5a667625 |
| ddpo_trainer.html | 144 kB | | 9471ff7d |
| deepspeed_integration.html | 13.8 kB | | 7d5b9ebc |
| detoxifying_a_lm.html | 46.3 kB | | 377ce448 |
| distributing_training.html | 15.8 kB | | 84b980af |
| dpo_trainer.html | 280 kB | | 224e021b |
| example_overview.html | 23.5 kB | | 0b17f0c8 |
| favicon.png | 1.57 kB | | 6e06dd7b |
| gkd_trainer.html | 101 kB | | 2692a404 |
| grpo_trainer.html | 424 kB | | 12d5a463 |
| how_to_train.html | 20.7 kB | | a3a78011 |
| index.html | 14.6 kB | | 42532155 |
| installation.html | 11.5 kB | | 924916cf |
| iterative_sft_trainer.html | 117 kB | | 838fbfbe |
| judges.html | 117 kB | | bcd09b5b |
| kto_trainer.html | 156 kB | | 8fe948d8 |
| learning_tools.html | 44.9 kB | | 8881885f |
| liger_kernel_integration.html | 8.91 kB | | 98d0826a |
| logging.html | 16.5 kB | | 1a8e4308 |
| lora_tuning_peft.html | 35.7 kB | | d575367a |
| model_utils.html | 36.4 kB | | c846a70c |
| models.html | 124 kB | | 6ef42396 |
| multi_adapter_rl.html | 21.9 kB | | 2616b7bb |
| nash_md_trainer.html | 128 kB | | 965489dc |
| objects.inv | 2.33 kB | | 40031142 |
| online_dpo_trainer.html | 165 kB | | 1a27970e |
| orpo_trainer.html | 136 kB | | 8ed1eab4 |
| others.html | 25.3 kB | | 2d7ea4fa |
| paper_index.html | 15.7 kB | | 04abf8b9 |
| peft_integration.html | 35.8 kB | | 015dd0ad |
| ppo_trainer.html | 128 kB | | 15e4653f |
| prm_trainer.html | 120 kB | | 7e023d50 |
| quickstart.html | 33 kB | | fadb84ec |
| reducing_memory_usage.html | 37.8 kB | | f47d06f9 |
| reward_trainer.html | 98.4 kB | | df36dfe1 |
| rloo_trainer.html | 132 kB | | b2309b0c |
| script_utils.html | 99.2 kB | | e0933a87 |
| sentiment_tuning.html | 11.1 kB | | 8acda2f6 |
| sft_trainer.html | 266 kB | | c8964b4e |
| speeding_up_training.html | 11.1 kB | | 83d280ef |
| text_environments.html | 104 kB | | e4b40eea |
| unsloth_integration.html | 9.68 kB | | c6343585 |
| use_model.html | 15.1 kB | | 38047d39 |
| using_llama_models.html | 30.3 kB | | d0138597 |
| vllm_integration.html | 50.2 kB | | 2f59c890 |
| xpo_trainer.html | 101 kB | | ebb476d5 |