|
|
poetry run accelerate launch --num_processes=8 old/sft.py |
|
|
poetry run accelerate launch --num_processes=8 summarize_from_feedback_details/sft.py |
|
|
|
|
|
poetry run accelerate launch --num_processes=8 summarize_from_feedback_details/sft.py --num_train_epochs=0 --run_eval --track |
|
|
|
|
|
poetry run accelerate launch --num_processes=8 summarize_from_feedback_details/reward.py |
|
|
|
|
|
poetry run accelerate launch --num_processes=8 summarize_from_feedback_details/reward.py --num_train_epochs=0 --run_eval |
|
|
|
|
|
poetry run python -i summarize_from_feedback_details/dpo.py --num_train_epochs=0 --run_eval |
|
|
|
|
|
|
|
|
poetry run accelerate launch --num_processes=8 old/ppo_left_padding.py |
|
|
poetry run accelerate launch --num_processes=8 summarize_from_feedback_details/ppo_left_padding.py |
|
|
|
|
|
poetry run accelerate launch --num_processes=8 summarize_from_feedback_details/ppo_left_padding.py --num_train_epochs=0 --run_eval |
|
|
|
|
|
|
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding.py \ |
|
|
--local_rollout_forward_batch_size=2 \ |
|
|
--gradient_accumulation_steps=32 \ |
|
|
--local_micro_batch_size=2 \ |
|
|
--base_model=EleutherAI/pythia-6.9b-deduped \ |
|
|
--sft_model_path=EleutherAI/pythia-6.9b-deduped \ |
|
|
--reward_model_path=EleutherAI/pythia-6.9b-deduped \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--push_to_hub \ |
|
|
--track \ |
|
|
--seed=6661 |
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding.py \ |
|
|
--local_rollout_forward_batch_size=4 \ |
|
|
--gradient_accumulation_steps=64 \ |
|
|
--local_micro_batch_size=1 \ |
|
|
--base_model=EleutherAI/pythia-6.9b-deduped \ |
|
|
--sft_model_path=EleutherAI/pythia-6.9b-deduped \ |
|
|
--deepspeed |
|
|
|
|
|
poetry run python -i summarize_from_feedback_details/dpo_on_policy.py --sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 --reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 --base_model=EleutherAI/pythia-1b-deduped |
|
|
|
|
|
|
|
|
|
|
|
if [ "$MODEL" = "EleutherAI/pythia-1b-deduped" ]; then |
|
|
local_rollout_forward_batch_size=64 |
|
|
gradient_accumulation_steps=4 |
|
|
local_micro_batch_size=16 |
|
|
local_eval_batch_size=32 |
|
|
fi |
|
|
|
|
|
sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
|
|
|
poetry run python -i \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
poetry run python -i \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-6.9b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=8 \ |
|
|
--local_rollout_forward_batch_size=8 \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
|
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes2" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=117000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes2" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes2_larger_rm" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=117000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-6.9b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=8 \ |
|
|
--local_rollout_forward_batch_size=8 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes2_larger_rm" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-6.9b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=8 \ |
|
|
--local_rollout_forward_batch_size=8 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes_manual_adjustments" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/fsx/costa/summarize_from_feedback_details/summarize_from_feedback_details/dpo_on_policy.py --base_model=EleutherAI/pythia-1b-deduped --sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 --reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 --local_eval_batch_size=32 --local_rollout_forward_batch_size=32 --deepspeed --run_eval --track --push_to_hub |
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_works_epochs" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_works_epochs" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--num_train_epochs=1 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_works_epochs" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--num_train_epochs=2 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_works_epochs" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--num_train_epochs=4 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
poetry run python -i \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
poetry run python -i \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-6.9b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=8 \ |
|
|
--local_rollout_forward_batch_size=8 \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
|
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes2_betas" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--beta=0.1 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_episodes2_betas" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--beta=0.2 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_episodes2_betas" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--beta=0.4 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes3_no_rejection_sampling" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_episodes3_no_rejection_sampling" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_55513 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_55513 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
WANDB_TAGS="dpo_onpolicy_episodes4" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_episodes4" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_55513 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_55513 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_episodes5_longer_generation" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
WANDB_TAGS="dpo_onpolicy_episodes5_longer_generation" sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/dpo_on_policy_new.py \ |
|
|
--total_episodes=234000 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_55513 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_55513 \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--local_rollout_forward_batch_size=32 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--track \ |
|
|
--push_to_hub |
|
|
|
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_left_padding1.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped |
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_left_padding.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --gradient_accumulation_steps 16 --local_rollout_forward_batch_size 5 |
|
|
|
|
|
|
|
|
python -i summarize_from_feedback_details/reward.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 --num_train_epochs=0 |
|
|
|
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding_d.py \ |
|
|
--local_rollout_forward_batch_size=16 \ |
|
|
--gradient_accumulation_steps=16 \ |
|
|
--local_micro_batch_size=4 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=EleutherAI/pythia-1b-deduped \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed |
|
|
|
|
|
poetry run accelerate launch --config_file deepspeed3.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding_d.py \ |
|
|
--local_rollout_forward_batch_size=16 \ |
|
|
--gradient_accumulation_steps=16 \ |
|
|
--local_micro_batch_size=4 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=EleutherAI/pythia-1b-deduped \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed3 |
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_left_padding1.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_left_padding.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
|
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_left_padding1.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
python -i summarize_from_feedback_details/ppo_left_padding3.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
python -i summarize_from_feedback_details/ppo_left_padding4.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
python -i summarize_from_feedback_details/ppo_left_padding5.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
python -i summarize_from_feedback_details/ppo_left_padding6.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
python -i summarize_from_feedback_details/ppo_left_padding_new1.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
|
|
|
sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding2.py \ |
|
|
--local_rollout_forward_batch_size=64 \ |
|
|
--gradient_accumulation_steps=4 \ |
|
|
--local_micro_batch_size=16 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--push_to_hub \ |
|
|
--track \ |
|
|
--seed=44413 |
|
|
|
|
|
sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding4.py \ |
|
|
--local_rollout_forward_batch_size=64 \ |
|
|
--gradient_accumulation_steps=4 \ |
|
|
--local_micro_batch_size=16 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--push_to_hub \ |
|
|
--track \ |
|
|
--seed=44413 |
|
|
sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding5.py \ |
|
|
--local_rollout_forward_batch_size=64 \ |
|
|
--gradient_accumulation_steps=4 \ |
|
|
--local_micro_batch_size=16 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--push_to_hub \ |
|
|
--track \ |
|
|
--seed=44413 |
|
|
sbatch r.sbatch poetry run accelerate launch --config_file deepspeed.yaml \ |
|
|
summarize_from_feedback_details/ppo_left_padding_new.py \ |
|
|
--local_rollout_forward_batch_size=64 \ |
|
|
--gradient_accumulation_steps=4 \ |
|
|
--local_micro_batch_size=16 \ |
|
|
--base_model=EleutherAI/pythia-1b-deduped \ |
|
|
--sft_model_path=models/EleutherAI/pythia-1b-deduped/sft_model_44413 \ |
|
|
--reward_model_path=models/EleutherAI/pythia-1b-deduped/reward_model_44413 \ |
|
|
--lr=3e-6 \ |
|
|
--deepspeed \ |
|
|
--run_eval \ |
|
|
--push_to_hub \ |
|
|
--track \ |
|
|
--seed=44413 |
|
|
|
|
|
|
|
|
|
|
|
module load cuda/12.2 |
|
|
export WANDB_TAGS=refactor-chosen-rejected3,no-tag-$(git rev-parse --short HEAD) |
|
|
MODELS=("EleutherAI/pythia-2.8b-deduped" "EleutherAI/pythia-1b-deduped") |
|
|
SEEDS=(44413 55513 66613 77713) |
|
|
MODEL_INDEX=$((SLURM_ARRAY_TASK_ID / 4)) |
|
|
SEED_INDEX=$((SLURM_ARRAY_TASK_ID % 4)) |
|
|
MODEL=${MODELS[$MODEL_INDEX]} |
|
|
SEED=${SEEDS[$SEED_INDEX]} |
|
|
|
|
|
echo "Running task $SLURM_ARRAY_TASK_ID with SEED: $SEED and MODEL: $MODEL" |
|
|
|
|
|
if [ -z "$SEED" ]; then |
|
|
SEED=66613 |
|
|
fi |
|
|
if [ -z "$MODEL" ]; then |
|
|
|
|
|
|
|
|
MODEL=EleutherAI/pythia-1b-deduped |
|
|
|
|
|
fi |
|
|
if [ -z "$LR" ]; then |
|
|
LR=3e-6 |
|
|
fi |
|
|
|
|
|
REWARD_MODEL_PATH=models/$MODEL/reward_model_$SEED |
|
|
SFT_MODEL_PATH=models/$MODEL/sft_model_$SEED |
|
|
POLICY_MODEL_PATH=models/$MODEL/policy_model_$SEED |
|
|
DPO_POLICY_MODEL_PATH=models/$MODEL/dpo_policy_model_$SEED |
|
|
|
|
|
poetry run accelerate launch --num_processes 8 \ |
|
|
summarize_from_feedback_details/reward.py \ |
|
|
--num_train_epochs=0 \ |
|
|
--run_eval \ |
|
|
--local_eval_batch_size=32 \ |
|
|
--base_model=$MODEL \ |
|
|
--sft_model_path=$SFT_MODEL_PATH \ |
|
|
--lr=$LR \ |
|
|
--deepspeed \ |
|
|
--exp_name reward_eval \ |
|
|
--track \ |
|
|
--output_dir=$REWARD_MODEL_PATH \ |
|
|
--local_eval_batch_size=$local_eval_batch_size \ |
|
|
--seed=$SEED |
|
|
|
|
|
sbatch r.sbatch python visualize_tokens.py > sft6.9b.txt |
|
|
sbatch r.sbatch python visualize_tokens.py > ppo6.9b.txt |
|
|
sbatch r.sbatch python visualize_tokens.py > dpo6.9b.txt |
|
|
|
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_lora.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 |
|
|
accelerate launch --config_file deepspeed.yaml summarize_from_feedback_details/ppo_lora.py --sft_model_path models/EleutherAI/pythia-6.9b-deduped/sft_model_44413 --base_model EleutherAI/pythia-6.9b-deduped --reward_model_path models/EleutherAI/pythia-6.9b-deduped/reward_model_44413 --local_rollout_forward_batch_size=4 --gradient_accumulation_steps=64 |
|
|
|
|
|
python -i summarize_from_feedback_details/ppo_lora.py --sft_model_path models/EleutherAI/pythia-1b-deduped/sft_model_44413 --base_model EleutherAI/pythia-1b-deduped --reward_model_path models/EleutherAI/pythia-1b-deduped/reward_model_44413 --push-to-hub --total_episodes 64 |