2_dpo / README.md
zhenzhe's picture
Create README.md
17bf88a verified
!deepspeed --module openrlhf.cli.train_dpo \
--save_path '/mnt/nvme1/will/3_dpo' \
--save_steps 1024 \
--logging_steps 10 \
--eval_steps -1 \
--train_batch_size 512 \
--micro_train_batch_size 2 \
--pretrain 'mistralai/Mistral-Nemo-Instruct-2407' \
--bf16 \
--max_epochs 1 \
--max_len 2048 \
--zero_stage 3 \
--learning_rate 5e-6 \
--dataset '/mnt/nvme1/will/2_dpo' \
--apply_chat_template \
--chosen_key chosen \
--rejected_key rejected \
--flash_attn \
--gradient_checkpointing