| set -x | |
| export PYTHONUNBUFFERED=1 | |
| export WANDB_BASE_URL=https://api.bandw.top | |
| MODEL_PATH=/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 # replace it with your local file path | |
| python3 -m verl.trainer.main \ | |
| config=config.yaml \ | |
| data.train_files=/nas/shared/kilab/wangyujia/rl_data/deeplocmulti@train \ | |
| data.val_files=/nas/shared/kilab/wangyujia/rl_data/deeplocmulti@validation \ | |
| data.max_prompt_length=4096 \ | |
| data.max_response_length=16384\ | |
| worker.actor.model.model_path=${MODEL_PATH} \ | |
| worker.rollout.tensor_parallel_size=1 \ | |
| worker.rollout.n=5 \ | |
| trainer.experiment_name=qwen2.5_7b_bio_06182042 \ | |
| trainer.n_gpus_per_node=8 \ | |
| trainer.total_epochs=1 \ | |
| trainer.save_checkpoint_path=/oss/wangyujia/BIO/rl/qwen2.5_7b_bio_06182042 |