| _app | | | 519 items |
| _toctree.yml | 2.97 kB | | d346ed79 |
| bco_trainer.html | 141 kB | | 8208f24c |
| bco_trainer.md | 27.6 kB | | 176337c4 |
| bema_for_reference_model.html | 10.1 kB | | 17d19cf5 |
| bema_for_reference_model.md | 1.08 kB | | 850342b8 |
| callbacks.html | 131 kB | | 21a18ee7 |
| callbacks.md | 15.7 kB | | 2231d2bb |
| clis.html | 45.7 kB | | 8e7cedae |
| clis.md | 9.37 kB | | 4f01e4f6 |
| community_tutorials.html | 27.1 kB | | 2635b18e |
| community_tutorials.md | 10.3 kB | | 068e8419 |
| cpo_trainer.html | 154 kB | | 98d61ed3 |
| cpo_trainer.md | 35.7 kB | | c3b52725 |
| customization.html | 26.4 kB | | 056773b6 |
| customization.md | 4.6 kB | | 51660c4f |
| data_utils.html | 177 kB | | 30472a08 |
| data_utils.md | 24.7 kB | | c4b74709 |
| dataset_formats.html | 218 kB | | 33c6cf56 |
| dataset_formats.md | 42.4 kB | | 90387303 |
| deepspeed_integration.html | 15.9 kB | | e52efa84 |
| deepspeed_integration.md | 1.63 kB | | 29c8b2bd |
| distributing_training.html | 43.2 kB | | 7c3737d1 |
| distributing_training.md | 9.82 kB | | 09ac1cc8 |
| dpo_trainer.html | 251 kB | | 364ff3dd |
| dpo_trainer.md | 61.3 kB | | 2a805aeb |
| example_overview.html | 30.1 kB | | f5a82215 |
| example_overview.md | 11.9 kB | | 7488c10a |
| experimental.html | 40.6 kB | | f6820619 |
| experimental.md | 6.53 kB | | 7e69ae86 |
| experimental_overview.html | 10.8 kB | | bee0e1ad |
| experimental_overview.md | 1.72 kB | | f844fc14 |
| favicon.png | 1.57 kB | | 6e06dd7b |
| gfpo.html | 11 kB | | 05bb900c |
| gfpo.md | 1.58 kB | | 4f0617e9 |
| gkd_trainer.html | 123 kB | | dfd408d1 |
| gkd_trainer.md | 27.2 kB | | e88a3b6e |
| gold_trainer.html | 144 kB | | 8cf6f6fe |
| gold_trainer.md | 28.8 kB | | 73ac44fd |
| grpo_trainer.html | 427 kB | | 8dbd4a85 |
| grpo_trainer.md | 76.6 kB | | 0318ce2c |
| grpo_with_replay_buffer.html | 11 kB | | efbe93be |
| grpo_with_replay_buffer.md | 1.39 kB | | 3ad1488d |
| gspo_token.html | 17.5 kB | | 0ca2363c |
| gspo_token.md | 946 Bytes | | 1a7ba23b |
| index.html | 28.9 kB | | e6f1b60e |
| index.md | 9.19 kB | | 52b4c245 |
| installation.html | 13.9 kB | | eb1c9b43 |
| installation.md | 924 Bytes | | a54d5614 |
| jobs_training.html | 32.9 kB | | 1cfff6cc |
| jobs_training.md | 7.24 kB | | 5e32c47d |
| judges.html | 119 kB | | 0fd41a3e |
| judges.md | 15.8 kB | | 3c0cfcab |
| kernels_hub.html | 26.8 kB | | 9d793c24 |
| kernels_hub.md | 4.79 kB | | aa7afb04 |
| kto_trainer.html | 158 kB | | b537f778 |
| kto_trainer.md | 34.2 kB | | e13aa6ae |
| liger_kernel_integration.html | 15 kB | | 8eaabf48 |
| liger_kernel_integration.md | 2.5 kB | | 661b48ae |
| llms-full.txt | 1.05 MB | | ccf8d052 |
| llms.txt | 4.47 kB | | 04a15aa7 |
| lora_without_regret.html | 52.3 kB | | e1c9be05 |
| lora_without_regret.md | 18.2 kB | | 661818a8 |
| model_utils.html | 39.4 kB | | 323ae854 |
| model_utils.md | 5.75 kB | | 667a80b4 |
| models.html | 128 kB | | 4e30a936 |
| models.md | 20.2 kB | | b87c7375 |
| multi_adapter_rl.html | 24.3 kB | | 2803d583 |
| multi_adapter_rl.md | 3.91 kB | | 4345e603 |
| nash_md_trainer.html | 134 kB | | a9717b3a |
| nash_md_trainer.md | 30.4 kB | | 0a0b8d42 |
| objects.inv | 2.29 kB | | 6a72271c |
| online_dpo_trainer.html | 193 kB | | 061403c7 |
| online_dpo_trainer.md | 45.4 kB | | 2f531b84 |
| openenv.html | 66 kB | | a4fb3495 |
| openenv.md | 17.5 kB | | ac67861f |
| orpo_trainer.html | 135 kB | | 30a1bba5 |
| orpo_trainer.md | 30.7 kB | | 70df507b |
| others.html | 27.7 kB | | c9f361b7 |
| others.md | 2.7 kB | | d7912429 |
| paper_index.html | 245 kB | | 1eaa0172 |
| paper_index.md | 27.7 kB | | cd7ea2ac |
| papo_trainer.html | 114 kB | | 37b8f399 |
| papo_trainer.md | 23.8 kB | | 081aabca |
| peft_integration.html | 56.1 kB | | d10107a2 |
| peft_integration.md | 8.14 kB | | 26830ac9 |
| ppo_trainer.html | 144 kB | | f95f07ea |
| ppo_trainer.md | 38.1 kB | | 59a9a51a |
| prm_trainer.html | 123 kB | | 5fd8f614 |
| prm_trainer.md | 25.8 kB | | c51a0c9e |
| quickstart.html | 38.1 kB | | 42dc109b |
| quickstart.md | 3.7 kB | | 8bee82e9 |
| rapidfire_integration.html | 81.3 kB | | 54675f79 |
| rapidfire_integration.md | 13 kB | | 80300287 |
| reducing_memory_usage.html | 44.7 kB | | 10cca545 |
| reducing_memory_usage.md | 10.7 kB | | 33523bfa |
| reward_trainer.html | 198 kB | | 257f1be9 |
| reward_trainer.md | 39.1 kB | | a3be6aad |
| rewards.html | 58.9 kB | | 8f000a77 |
| rewards.md | 5.62 kB | | 6570587f |
|