| !deepspeed --module openrlhf.cli.train_dpo \ | |
| --save_path '/mnt/nvme1/will/3_dpo' \ | |
| --save_steps 1024 \ | |
| --logging_steps 10 \ | |
| --eval_steps -1 \ | |
| --train_batch_size 512 \ | |
| --micro_train_batch_size 2 \ | |
| --pretrain 'mistralai/Mistral-Nemo-Instruct-2407' \ | |
| --bf16 \ | |
| --max_epochs 1 \ | |
| --max_len 2048 \ | |
| --zero_stage 3 \ | |
| --learning_rate 5e-6 \ | |
| --dataset '/mnt/nvme1/will/2_dpo' \ | |
| --apply_chat_template \ | |
| --chosen_key chosen \ | |
| --rejected_key rejected \ | |
| --flash_attn \ | |
| --gradient_checkpointing |