| ## w/ and w/o gradient accumulation | |
| python benchmark/benchmark.py \ | |
| --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_step_grad_accu --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 128 --ppo_config.log_with wandb" \ | |
| --num-seeds 3 \ | |
| --start-seed 1 \ | |
| --workers 10 \ | |
| --slurm-nodes 1 \ | |
| --slurm-gpus-per-task 1 \ | |
| --slurm-ntasks 1 \ | |
| --slurm-total-cpus 12 \ | |
| --slurm-template-path benchmark/trl.slurm_template | |
| ## w/ different models (gpt2, gpt2-xl, falcon, llama2) | |
| python benchmark/benchmark.py \ | |
| --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_gpt2 --ppo_config.log_with wandb" \ | |
| --num-seeds 3 \ | |
| --start-seed 1 \ | |
| --workers 10 \ | |
| --slurm-nodes 1 \ | |
| --slurm-gpus-per-task 1 \ | |
| --slurm-ntasks 1 \ | |
| --slurm-total-cpus 12 \ | |
| --slurm-template-path benchmark/trl.slurm_template | |
| python benchmark/benchmark.py \ | |
| --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_falcon_rw_1b --ppo_config.model_name tiiuae/falcon-rw-1b --ppo_config.log_with wandb" \ | |
| --num-seeds 3 \ | |
| --start-seed 1 \ | |
| --workers 10 \ | |
| --slurm-nodes 1 \ | |
| --slurm-gpus-per-task 1 \ | |
| --slurm-ntasks 1 \ | |
| --slurm-total-cpus 12 \ | |
| --slurm-template-path benchmark/trl.slurm_template | |
| ## w/ and w/o PEFT | |
| python benchmark/benchmark.py \ | |
| --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_peft --use_peft --ppo_config.log_with wandb" \ | |
| --num-seeds 3 \ | |
| --start-seed 1 \ | |
| --workers 10 \ | |
| --slurm-nodes 1 \ | |
| --slurm-gpus-per-task 1 \ | |
| --slurm-ntasks 1 \ | |
| --slurm-total-cpus 12 \ | |
| --slurm-template-path benchmark/trl.slurm_template |