|
|
SEED=1 |
|
|
if [ -z "$MODEL" ]; then |
|
|
|
|
|
|
|
|
|
|
|
MODEL=EleutherAI/pythia-410m-deduped |
|
|
fi |
|
|
LR=3e-6 |
|
|
REWARD_MODEL_PATH=models/$MODEL/reward_model_$SEED |
|
|
SFT_MODEL_PATH=models/$MODEL/sft_model_$SEED |
|
|
POLICY_MODEL_PATH=models/$MODEL/policy_model_$SEED |
|
|
|
|
|
|
|
|
local_rollout_forward_batch_size=2 |
|
|
gradient_accumulation_steps=64 |
|
|
local_micro_batch_size=1 |
|
|
local_eval_batch_size=1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/sft.py \ |
|
|
--base_model=$MODEL \ |
|
|
--lr=$LR \ |
|
|
--deepspeed \ |
|
|
--track \ |
|
|
--output_dir=$SFT_MODEL_PATH \ |
|
|
--push_to_hub \ |
|
|
--run_eval \ |
|
|
--seed=$SEED |
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/reward.py \ |
|
|
--base_model=$MODEL \ |
|
|
--sft_model_path=$SFT_MODEL_PATH \ |
|
|
--lr=$LR \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--output_dir=$REWARD_MODEL_PATH \ |
|
|
--push_to_hub \ |
|
|
--local_eval_batch_size=$local_eval_batch_size \ |
|
|
--seed=$SEED |
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo.py \ |
|
|
--local_rollout_forward_batch_size=$local_rollout_forward_batch_size \ |
|
|
--gradient_accumulation_steps=$gradient_accumulation_steps \ |
|
|
--local_micro_batch_size=$local_micro_batch_size \ |
|
|
--base_model=$MODEL \ |
|
|
--sft_model_path=$SFT_MODEL_PATH \ |
|
|
--reward_model_path=$REWARD_MODEL_PATH \ |
|
|
--lr=$LR \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--output_dir=$POLICY_MODEL_PATH \ |
|
|
--push_to_hub \ |
|
|
--seed=$SEED |
|
|
|