diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..50c9c64582b1f58bda46f16e48226fbaf6a75d74 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,363 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..7f860b59ca743ca8ef7912fbaa391698f943c126 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ccbee634a155d163937d876a11faa882625a779911a16b1b1b06b6c76220d6f +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6cd70b3462c418da7ff586a390236f32e35c8da5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4526eae493758fce89fb0854abd4515230163b755ce353a972a8016d25a830a +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..88af2e1e1f11583a7eb0146a5b2a48cef561902f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d864e9a2b41a8cd58d664c6d28e04f1d28191a77ae3dcc20403698324da3c3cc +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d0a41372ba190a0863ef1cbeb0b432972ec7cd9a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e713cc5ebf2950c96abd16251e318d886bcf51f9eaa1851f4c3af9bf9183c1ff +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5cbaae51144944377edc1e66528c8f2154059387 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfa6ec0ddecbffb08cb33bdbc7a1b23d9ffd299c6dc2e07eac407ccfc195b613 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..283a77440ec9fef4c85703f0d41776544c5b91b9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7656be8e0e7429b16c8b10886f29077e9980ae632e359ae2e8233abbc2e76b +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..28d767f5a980fb7b3be4baf31a4d58b9e634542d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d58777b076a2c6661097c63121b0a5899ab44446f389af1ca55b5b3a7c2ff6d4 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a6e63a3385aee78b92e6f667f248029b65830075 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7d63034e9a6cc1de38d4b32eda9f8f440d7ddb4f3f93526ea30db46d8443442 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e7c74edce9a40dc7882485a2a5e06bbff7764195 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbcd99f57845f9d766d30faa4f8cee03aad08fd73805b8718b36502fe584d786 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..64531a207e22f579c4c2d22ec9474fec945e3980 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52aedde8f61f4831bd01cf0923acf55413ec614c41e6e6fea2b1348ab20cc2ef +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2225a96c6c2fa0f6d8712d423efd0def9f9a52af --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc865706c8567aac68b28e12a424c60fd5efef014b02f1dc6b09ef9507d540b +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7ead272d9a349fa869ab19152ea75ea4adfba20f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b028a37f645739ee195721134d4c180b16c8cdecbaca1e48144f695b234e0a +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..620ec0b12a54583d0ff8b42d1794f231018a987b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e613580c913b5ea83d75726d44e9db07b14ed1fe276130b356aa323c82c2b8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9d1fc7218f655dca4672d359fb380a57cf1f048f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:659b4adfd38a805984f533184fbafe5d67e84bac2b2a5c3216f27352c534f4f5 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1a7f02693a8e073ba48d35902da4d34f789088cf --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b8f79581dfd1fe8463165e7ec4a73eb9d8c66b69a439f03b953312ef7f8b16 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c9953489dfed7cecb01ee668a24ce65259c74e76 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab85cfb568ba9db5d02b846fb5642ff3c0a98574ac51c84f89a5e5474f2b483c +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b8b81a9b332b7ff31e62bc69776d452e639d0e6c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5993088d95e573804388aad9195984f4efbee295810e7d7c195430d782e25f41 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4a727763688924b0b28b6ac00785dd04c0de7c27 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bff9e1e1eff0aa54cbf9124f166bdb0f4b478509ccad824e3a0d0d658756b5d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..f736bfe532d7e99bd98a6a32866d848a21a02d76 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67ac3c4038beb665d2cc4bd735b6f05977897a2757187e8c7c8e6b89fa4ad3d +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b46f01f4effba938049f318258b3987bffd06aa --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219fc23b677e769ae4d4806c12e3df4ff2b78c28b311847bec2ecb99a35a51e6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e5963a1697f39b58d0a25802e72eac30bae3e81 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a352b51cb68b5d8818a7a28f74f32dffb1095d7e281ff4022f0e365fe98a8ee0 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..a84335319e6e42b032d1d6da793e5377d412cf81 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9e8f9785d0d17d62f1ba8a141384aa7a6438a53ecad5f21d877594c31b45d0 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a0ed8248d472d9e71bbf034a061371cbb346355 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a770deea02d60eea9348a15bd08ac4f95c99d6b5b113eb31bc2fa7631dba1988 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..74a809578c3cc5e6c60f1c6d9e0defa9447cc719 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b60fb34596e1922ede5c5a2479b5b98033b4b988dc300df633c12f5e255755 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..84a106e46cc9752aacfad3281bd314777c83b985 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84274411027a8eb72ed1179f8209a875b6f4101ac7c2790eaef04102df49af52 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..10798d00a6ae647b8d8bdb74a97955808519e065 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbc89ce90c5e7e9f362eb48f3be0a6f39aee82e598c876d2d126ef971bbbfdc +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-100/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..18a25b44ce07bc51cbcafce5586c7593482826a5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b84ab1237abc7bd4d31945126355c5b6d9e26cb338d88dae9fd60030b2e1fb3 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-100/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..651483587739aa33cfc62c2c25126912b8059f0b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-100/trainer_state.json @@ -0,0 +1,70 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.05, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0079315433619456e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..cb46a9c25a98c9740ba7eaf2f109c9da0aaf1c42 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c30bb42b1d65a8a4098013d3bfdd02d244a6c567309aa4ef545b4183c6955b03 +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..bdd4c88fd6c051f82ff3222d572e9689a58861f3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b5d1b79711fae3720f449d0d40275c01b0e96b5a7fe864c31b4491a848ee6b +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9aa3d1954f935f96fcc3ea175013439a56a3a965 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:214c1ecb8e75a386ec77f48df13aca4c47a45262025d158dbd2220ad8abc8f11 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0473fb2000cc59837b0c32ede8ca6ceaede16df5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e1d6100902f7681c2f8a9c10708e9b4ecda868e68cedcf3028c60afc75d841 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..31acecac9e925b576e6bc313b56b3eb4223df088 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896b5e929556060b8d63030599ec5e3c635fc17a4887d4d91c8f31890aadae72 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..469ca1c5d871d4ed6e8730809702b6c775c64a98 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3930433a7e8ae485f9636b46bd8f3ab46b6403bfb254338b679a08039732944 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f1aab6105fcfb6cc94ad86808036ee134b20c360 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:320b31ed02c8e90d25d384480cc892c7a6ae9a9163d4c00bbb7c40a1ae2a5292 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b089dfcacb13620c8203e7c5cf4a1de6d1a72132 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233b1396deb616a8530e4a4d8f26a106a03c5769c19c417f63a5b21282749ba7 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ea51bd40bcf2de4dc2d8cd0469c7a6441ea2ef04 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd532f9534fb0ef99704c1ff9e49286ecbfcd50e34215b4469f718397611a0a6 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..37b05598f60e9e4b1dea4ec5134aa1e2aa44ec02 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a539d2277b8ebaeb5dadbef9cb81fa72d4d87b91c8bc3fa56111677171ad484 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4b0bb500863e676a7943fdf4cdf4f00826d210e8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f39d2a3b4181383e2784d531ae06e1ce44271a310e961fbea2fa35b9d13c83 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..11f5b3faf74dd06e351e86764f30bb7582db46f9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a855285e55f2beb08fef520523d1cc65f091495c020bb92842ca3e1e0edb6312 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..613e23af2feb0b1cae2c774245247e60ce998d92 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324f4939b1dfbb48f3709e2221a7a0f27181b660ddabb7135c1d663c310a1023 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..86e6214979b2bc234b7dc17fd6b7b2d5527a728a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0973568ccfef37957714f0cca7b19e5e1d3ceb18e1a4c289045b0d135672010d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f7dae5c9d3370843032b7a8d26026c2cf58a6f19 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc61280e9dfdbb692eed27cd30078122922e015a9d4b2833040857b840010c81 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a50c2ac6e26d0f98a00f463b532b1391f49587a1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1973f8711360db6bac3875d8806bfaeb0491ed682abbe251dbaf3bf672026ced +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c5896a7c323a9f1613e515652046fda615321f41 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e903f27d773d961dfcfac1cc113ef3246b27bf78def4d4420f4ea2d95e4096a +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d97d4992f80b5a3e3ab80fbf54d46271facb6fe0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d5db9f0b5a2bc8e4fd095a167b68b803fd0817f7134809e76d9eb10cfad75d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..d5af0d362a3113d75dda7637fde01a29169fb8ef --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cce6fd095e8164b6174af806d5b65f1592b912a16965a6ac33d77e523c8ae2a +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3452b4c6342c5827692e58dda66dc3088e599489 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89ca7da8c752e198c07a80618c28fafea39abe5f5e38d625a1d96b586893f6e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..404a7e06dec824b6e49e724be0a89e3a76291d21 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a64aa0a7bd0e9443e2c11a9e1b32b905f251349e940dd3776471dd51dc9441 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..501b011b332818b8f0fa85551fc7e8679c367117 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a594654588fc00a315de06ecb649724c8831626a965fe1794770a5720439d77 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd7be1f793b16cca6404cb79e494ff102b8c2ffe --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05c534760702d2b502b038f225706d0fc2398437c12ed59dc6afeacd0f91fdb +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..659abf7b203527bae8d883d7408581f8a26efaa1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5642030f3e712a115127de41444ed858a7a4cd47e591eb5d813c9053141d0ee8 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..502636b8c422c55e279d34385c87e89e89b2c774 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95998bcb1c519354ca7d81b1fc52e904512e12dac25fcb5d083773e08e027ec +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..c1ec569e924cf8b598e112a5174e2f903d038527 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac0d9420ffc0691423064caa05e83bae45e9091902d0a644809c8e2535119b7 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e37d0618b489fc19085acd1bbe069567c7c8447d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b23d844ad7ae2eb6c7cbba3f70be2436823b11da6591df71ddcc7059f5593c4 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1000/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b22864c06299c67634671ed37781188cad00fa16 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1000/trainer_state.json @@ -0,0 +1,394 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5, + "eval_steps": 100, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0079315433619456e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..6730b8067daa498eda9275dba4cca608a19eecef --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e961ee18d779f0d3369e726cab756ca1b33449843b8d10d74d17e10cdc20c9c +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0bc9ee75296f8bdea32cbe89e012e7e428810340 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa83f50cdde2642fc04decc16a975a9a01394dbf693fbb7e56aa7764e31c8020 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7fd36a06f6ac7d0f345a487d271a5d2141fe705c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:240e7782db1bb1ce5e284e6e46047868ba361fb5c77726f185c3c79213d92ecf +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..93d0ae54b31c724abebddb98968db9abf5eff3a0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9c6142a3a35f9f3873fa40a56117d86ac5dc263882af8ad3e8b7081b07859f +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4c7c934f4e3996766df20b8dbe8cfbfde0182cff --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f12a71ab0ae463c7fffa75c6e78c700d04af7274abc800f016714ea79591a2ab +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..495624334c1072e5ef5c453fb71a776f3f3c56cc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7249a565e403809632054da5ec93190e4299b069784ffcce920e5cbc200280fe +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e83165f6c90257c5225ad67b80d681e83b3758b7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58c374f9df6ce75800331de3be35cbff64f3527db140ab02cf7309c643431c83 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fa352fee9438bd9518417d4b558ec01ddf7c4a5e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:094a787100266dd73136126370bc851b6a719b0c5cf5b95785570f8d1aa7fe5c +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0be8ec695a02452e922161264c5e8c840d4ae7c7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf83e13f758325db716e96b2e23a41086e43da22d3de782a496793e852556ef +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..4800e23f880b4a9c1211f9791d546970804e88e6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be04bd09fbd89d9aecf2a7b723f354bc3a02913d211778bd7013fd6ac151f8e +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..136f1119c19ddf5eec4cf576068d6ffcf81f905b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488eb9f9fd34380430625e55aa40e5ed3dd503818884b5feef66e04e896e0982 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..caa225111eec5774f1bb3ca9b5a125ac23719f87 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f6e9b8c1c2f2e5d9d6e817a941c4d55dd9a60692074136bad102f0c8896491 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b2fcdccf099fe794a5b2d6a571d83580c13f07db --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf2bee27c925546913e39b7e7cc750be0ca3d6559ba4afaaf58339e7624f567 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..60043d04c28d619d3a36ce6fa57ab0a103a5159b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f0f2c74e23124164ede5836a2344e8479b52c39a9ad59b2c851116eac2f62f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..14357453f897fe6164b0309ce60c58e3d841eed4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc13ed6e8baadd18d03d9e4784ad671fead787460d8c0556d3f46dabc029bbb +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..67c53413eb17000d70731515cf139a60ac1c1199 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e23d897b0d8f9df57c3895b893729a91de26ede169acc77eb94ba297930dbb +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..08fda036162df0b12c0b8a74191c9e7597ad1c49 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a9dbb6f7f0090fd7502702ffd79b91d1e46933d5ca3eb0c908466aac9e7c207 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..29762ad7b022d9054f4539ca8da14ac7cc51bcfb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2be14162884ec85301d6d7bdd0689bdf10cfa442da29cfe19a5fde9d234e5e2 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..228f1ad5a1b03d68979173cf35d1c2903949b4b0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10951c05f9fb192e43c36f7be898ee80966c186349da1034b098ec9159a5ec9b +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..079ab70e3eb64f8877a09c067ba6f6841daf2e84 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b525dff0f684213798d62adb1acac1209a73873811052adb7c1ab57cebef53 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..6987c37178156f21a931cf21098a254a4a2d339a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6ec805c024be58842b00ba146913cc39f31735ead84ce44b5bc8288671b8c3 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a5fb7aa094abdadccec49f4e3a0cb7cf7671110 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd93f2f6c415f784da2333f3cb1d21155a8fd7d1eec27a52408206334a2aee8 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..d995c17a722803f0fe0facd7d913119c6d0440a1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a6c06f83feb658a3a9dae7756b28bbc7f946e746dbbf609ae7b29aade1ab39 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe2ed8c7907681ecbd60f9ad528dfc8fdcc0dfe8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9848dabf5d790c0b63a0c06050b93ff06a12f036d5a22192fa1c0ea1eea577cd +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e2b6906b013eae0f780865ac0113031b64f0d3d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd49cf108d0c9d7f33194257dd81143851b474af0a01f8ac96dd71a1d515195 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf68cc860287179a359ac7d204b102b9ea9be48b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfed987246f4749dbec2c7d8e35618cdf9dc4d6bb56c4a6fc8b18b57228705b +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..784ad2399b5919ecf3eccc367c9fa04fbfab4619 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2594e962e3980706571eb12f2ed27e8aed3b5e373484af50799e77cad68ebb48 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1100/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c0036f421cf4db667611e00b3e88d65d6c43cb29 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1100/trainer_state.json @@ -0,0 +1,430 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.55, + "eval_steps": 100, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1087246976981402e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..4186ff9dab8ac45c50322c94859b9a329adf0e68 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937e4bdc6ae3f56951409d86d9bb0b288d0ae873eda16b0525d0c8fd157b3c91 +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8d0988cbae1e9425ce1e2f9a3cd17488572f73da --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6931c8d098ebdc826e623d4de4f4ffb84704bf3a61daf9b7446c8dc53602cf57 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..bbe02e04238796e3372fe6bc36c269d4d7884be1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2287ec15b909a6af8345e97c6ccb8c9959e198a9af33cefa026f48f0754c94ea +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8de2cb53b787f6c2d13f68313277d2ecb49d4412 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c3392479c1474d10f033c706eeebc54f6f114454d337062f6817a35a4ffe44 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f0ab11a54ddb445a7b70c74bb46ac979f23e2138 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854da931e16222187206faf7e5bf19bb7c694c43a1662cc4032c710b9b959e75 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0b864ef965b29085f9789ef66ea1d928a602a206 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4951462af865ecded007249b84bce4d9059af45a1969fefeabba13b457dccb +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..18b32a1fc6b4ccaf97fe35dc04a948e31eed1378 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dfa8ba65e986ba35244e3c6c6694756cc851fc043d43011660f46d2402241df +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..28083c101e58a7da5680b41dc266ca2c15bd1ab1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dcd1cee10b75b7bfa3a7dd1d6a488f3183fdf522c6e30be4b885b4b115437c0 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..60fe2f911a61b576e54fe91d34ad0748681e94b5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dedd2acb2c070d42af2cdb8a81d31b563f242f9b003cfca2534cfac0b98452f +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..a78a110e43bff5adf2e0502e9b5a329fc594eccf --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95727d1f4ecd17354d86153b341b29f1651bccaf2356b4792b20e5ec11792243 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..30752cb2b5003ae94475c7f797ce7cf4f16e1f80 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb5db28cb492176d0715cb73e5a850494e1d459cfb93bed3bbc7e061181e080 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fed3db7e53c320c09584bf151386794443534609 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3efebc314663ad045a7830f3beeecdbdb2fc364cd7b41ef2e928016d21e4280 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..db7649d55ecf927f2c910a70e62bdba1c0cd3c90 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008d5d3cc8ab2f36fa3fd20beac8cc0042244799f99dac8656c958dbffd1c7b3 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..814653512aa15eb55d8bf6ec86c6912e18ec91a1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9f58361336db9cbd7c27ef966d666a477339b623ce4acec5124cc6514f2ed8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e2743ca683a307bcb7b13c9eb98142ddf9c62c92 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a918c60f65d108da9aa6882f688b41cd11ca0c95a923d679688b812e71c8eead +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7840f12e858225139fa343981668762bd4a2a4d0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978e15070cfd7b32725df5dac1638b32d76ebb5100191265f7ab76bc072d941b +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1599fcc80d2ec03059923a17adf5d61f02b4f3a8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c08c6a07079fa3600d9275f7626746c654bbab4630accbf7fb66a9d51f266d3 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7c54759bdfb62b062795841e2db519c8c1c88cb0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71b9a8f139fec38010238d52e7af4555d6e52225df7a564a6493fa24f7ca255 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f189e74ec3c2b32bdf53731d15eeabdb45c473c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84c26b2a8acc7af904f88833cb6aa2007f56b758cc2bb09f4af6a136dcf2254e +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7569c9da2848265f26dc13884b4d74d5c78a6d1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158efb2c453739224cd1a02c979b41052987fa4da6c1ca00610e5d806809e0b8 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..96ddbce766334aeca24bedb3ad7523b5858cf5be --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e69bd054b5237b13d22eff3a3128acb49fbeda87aa873c3641f4221fa18abfb +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd11ab4ad1207e0e4d739bbf2e662fc3ae7ad75b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ca716fbae1882edf4fce6f1a9b9ee51f9bcdd08bceaec254a0906850aa5f3a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..133fdff1da712ef7cbf2f6acd126fa45a65dfec5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baca997feb34a76ea1c234c3cba01504c1ec11987d3b0b60d72a72245855c8b5 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..c20e2ebff6f82adba59a56ee75a0f79d8f92695c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1105b7213ffc67ccbe98e30fca965f1bea43cb45c136d5c8d7bdddc87ef6cde +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..30c01500e59bca4e78787572747633ce30a0ff44 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4502eeb033ad80488d0ce1a43b8dcabb5e0552838a1afaa44f198f1fa8519580 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..296cab46339e243450eba8853d7e69a5d544069d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364b0c128330c69d4648979aa306cf1cdc8e9a164f74b3a408e4fe68d4f6da7b +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb1b33cc62da996510524591d7252c6e3ea166f0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b015bc498e5b4ffb7ed88672aba64b1aba2e32e94c4926ce4107ea8baf36834c +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1200/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9b451eb35e6ab33b059d531c2bf4039f0b6c3056 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1200/trainer_state.json @@ -0,0 +1,466 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6, + "eval_steps": 100, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2095178520343347e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..2c4c49b8e0be9b9b017e7697c4eeb2b20c9e7639 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60828ad29d78b25c35a6d5d4d8d1f7a61935bdb5e7f4cb5dfe6450c9c88ecbea +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..189602a075b5ef7b266f8e8a678c3b29c0e71179 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdad6294be4413d4d4443c1ed757c2efcc406242f6086de882a3c0fe704b65cf +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d0e0ceb4a289eb213570b4e852d87b19955ad43c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ab3f483c8464941be5019785180e12ab8ca0864999740f728d6260d3a837ba +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f10f80b99f9c9a917e8434ceb4b4674b036de680 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4794c333aef9dd102a323c557dcc872c03020b5bc91c25d6c4815252a0fb936a +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b28502f7baafb04dcf1b9be82524f536ed8b7e33 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34096f9c79b235d8bdfe8cbbdfc61f1d57d8ade8a9df253f78dade1e064bd957 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..dab018b5732db4042991a932f5c3f3a0065cd0ee --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2886cb5291d4e6a45b75cce6b3c9a2fc877cc779ac4dc508530aab91a8a59dcc +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..082d89eeee7801647efb60c3d66ff245afc18a5b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71209ace5f6c94ed94372ff7dc35e0a5a733d70d53a807d83033a1f499896e4e +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7f263279d6b9de82981b16d127df16b0c0485d5b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ae4c3021c0def673e58cf51af2e8c1fd964542e54a6706f0457c48c27fda52 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fee483779d8ea5bb8ac7f51ea66a2953f2159169 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e10e59fa44faa2805fcd9e8fa10062d1027d14c6bdf6562cd854b931d2bd562 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..d382c22540e8e2ddb8226b29d408d9f267402343 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10bf4d0ce85e8dfad2480ae1d83a70b9c95aa3cb50c69c3dbaa9a9abddafa167 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..48ac8eb1d3559aa243f62517524adb6c101d9233 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b803e503b0d313b5cc3cefed5b2f50bc9fb9648dc63577b8005aa6303c68a5 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7a9daed3992006fd5d135a87686eeff805a8da94 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee42161afdaca94b7c15af7a335a48a572fdb2a5e17e465f08d3737892af02b5 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d8f179ca9f7984097c3170102e5b6ac9f8e5c333 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132738e4be1048f5c85ab869546f791799791240d327e31e0ce6ab963333e3fb +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9d73ad8e2e24f10f2e20fa5c78fb011e98a6e233 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b6c23bf714eacee4baefd52dea32bdd480481d17452c90d5fccddab2a476c7 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1b9e1ef2a958b17e73996b3240c5721f71c90340 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc4b3f1f7d56c62036526e6661b3716240a0866dd7753b535aa6cd12e3e6a45 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8c20b0984ed7a7630e46ab9a481182dc586e637b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c94d669c52eb9038ec8a0dd0d5c5931e664b8246c71e1c98b924b81e10315ba +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7646e6a1355bb484927041507ad9e591c59d1c66 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc1c824fab582b4c2416106d47ed7cadd6bc6889581548a29b1e55ca4ed15e9 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..dcd7fbf2e43243e190b199e9e344c6e2a3f19cc4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31a5b41f6b07da8336f4ff2cb96cfbebb7034846c12f6bb4038bdfe3f00dd3b9 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..02683b94819a040ad40ce62df07cbf9f1df7ac07 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7fd89795021dd0160bb820ad9e658cd1b0d80e3405b507e1c81edf6001bc8ca +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f2d417794539025b8ef9394635570b28a840a69 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c14448ee646c5307c2110c2dbd86e370f0560e1b3ea0772e54e8789b3405d6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..83a36ac16625c9b64cfdbc6cad05b7f4c7fa3422 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5be62bd202108de11264c1be2eb9abd7dc33d1b2edb627ba15cd11e3cb6250d +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..efed9169b167ba01c0f8245575a19958f3143771 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f4f8d1e91666e894d50d7e2886591715ba36ed4a759c2ea2acd4a2145bc0a1 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..f6924c1ba4ca473f751b2d2c970bb6a03fba8a6e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e84b003216499d66cd69ab0951adaa3bcbda1e67fd3962cc82600206da2c25 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..50fedbed6bae3628716cf7e83cf39732524b200b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07b16fc7185bf60589c6dc98ebb7edb5b3e9a7ecf3a0cb1a83bfbc60ed674c2 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..f76d98ab45c53e6a766641c614b055d3fd180f40 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b6bda4561656d04ad5a1edb957a0fd798a9af4aac1623510b9407bc589f070 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a446cf921419ddb5cd7b23d29c622635cedce4a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:088bf86c6b6f9482925a6c46f7a5976920adeb27963828cdc042e3e4328e7bff +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72652d1ca1d20c565375320a634597caa64d6264 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7bd64925acbd9bc3dbd1a44a27b1aa523daf766a871a6ffb2ba33b7fc1ea02 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1300/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6d2be7b646724fab6e248f0f7a586052b1d1dbd3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1300/trainer_state.json @@ -0,0 +1,502 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.65, + "eval_steps": 100, + "global_step": 1300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3103110063705293e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..e4f87947e169f2dfc78300f468edc9398bca5035 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e444c13a172629eb6d7375d1add6c8d165754dc5c3f3d3066db29a0537b3ad3a +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a92db99ad569749e7e38ef932e8c0737296b9bc7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c957544052e0e3ac882917a184927b4b40cf91feb0d87e290bf3f36795a22eb3 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2631c9cd735bfa916e9fb399e780396f261ffab0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1a8e7415755e3e44c1637d7e6a13456d1cd664fa9e88d49f137869358f9254 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8f0257c22ef584d7feb8a058f8d5a240662f238a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a055a39ba32a4c79a1e446b8e5862ba7938d06b11ecea9297d3e730239ef192 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ca750073b37e4434cb502acce1575ac726392513 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6906121d3ddd2e44a66e3fbd6225f812964ad29e29d19ada41cc3f34e11d8c91 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ad0aa7e6c8dd0c1c8a6e04df8e7a2fa422a30611 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83faf9036260db858f84e779a95475c55744e44af203bdeb23e1e7d20f6e8762 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e42bd55322ac00162244348e3823cc733522a5e3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1533f56cea8e918cc6df4fa64b53fd23f4787ed6c2279d2873dd1c7ca43460e +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..04320f3ec6a7ab2cf6cfeaf68eaa190c97333bd3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3375b605ab9ea4cfda58dd4fb1c80f7616569c1a4f71dac3bf0794de7e0d145 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f2753eec61de1621e22603c74eed5714498da958 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fb7623b5dbfd2d496dfa6dff61e23903464e5d902e949b5e7dc0c79435cd64 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..83a67b729bf3d481286bf6332538b35a0486e4b7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c9b00886a3ee8b337922da3cf7473c5136376242ed27827ab952db52642dec +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b8a558804f7098190c1e76984a870ade68e3cf52 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc217f86836cd443126e1011a1bdd1b479c02a29da6bdec26d9ad8db4aebbb8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8ccfa9bafea7e54162b5362ad5abb43b6b41b536 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a82eb0f2111e6bed1c4309345693db579c8260be5c8182083bb8f0e74f2a655 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..67ac9931b243e4d4924029be40aebfa2b247ec8c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb2965a9a5c9970d4ff59d6bb36338918cbeedfa4cb783fc27385da7a915900 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6a651afb98bbcc8b2d4b5ad6ac92c80a058dc11e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae137eb785cde8409cc7e8fc8fe20b85d4469f0e111693e83e91cec9d8b1502 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4c6ba65dd441d74f7f89611823a0dba69f0e0d9a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee9f3942c1de2cd0953074339c8b2196000849cab6ef426eb99693ce01cdf2f1 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..40e1d81915819c250683e917566c9dce7cd1181e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4017632ef5a043e34ee0ad64b7d442960bdbfbc58e22b8101198ae244cfb943 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6b8e72485957b8285ba3f3d55fcd29bb37c5ec52 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:736d727b6d00e88297738d153e5dcbd57813beb90a5659251b544615113d881e +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ebcd24587b14b638925a2f728ce1aff899f184a4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228e12c54216e6000ebd9569f4030bab74003964f0c2421bde96f3fbaeed4084 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..227ec93c3e8ef56f4b4c2cca828cef131af39a71 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42a2eedba7a4091055a96ccd3dd4da2eaca6fbbe25f1c2e80ab817e8886a5de +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..de560c5e245753e21ecc7f31871114a4704ce933 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9179322cd15281e69c1b0d5a3f50657fc311319f087cacb8b2e5938e7eb07e9a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b105f23b205018fb489a337ae2d5b3f7d705293 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575be61fed0627afbdff204b0816dae4e338f50bd0d2049ecaaf1655573b6da8 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6015106f13742c4787da603dabde3c77d68a3d8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78eba4696cda3ab2ac997a1652955234d9cd351ca320a61eebda0d811d802485 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..34a4632a882aa5d234a0ee74fc465997f9dc55d6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079ee4a128bab7511bd7c5d4c741b16adf9d7557be80143b70136278e26989b6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ebabe22764141052f308a11d43efdb26f55c746 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7a288fd74a41f90a133314e24ec753386013d31f41ff53c5be06f970265382 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..291457d8a66ca684b3e5b40da23d5d44f68da956 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86ea638f561758fd19b01eae63675ca617641f68ebc2017b5613bf3f2cf71ff +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..289a19856c5fa6f64baecb063315516f8ad2ca77 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b9e9a5b7eb9ee987ef2641478c13a247070b27f40b93bd75415ecff0952a25 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0a26edbd6707caad9939b100160a29bf32569fa --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00dc98dfb48cefd72fc26922748e64c046f326e2d0dc623af08b2bb7f66af78 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1400/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..688a6b99e360f784ae36efeeba74342a2178653b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1400/trainer_state.json @@ -0,0 +1,538 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7, + "eval_steps": 100, + "global_step": 1400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4111041607067238e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..4ca45811a630fd974cc314b244f2d2eb9060a677 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e687ff23f57834578de00de2a0b9d56d9dd44f8f4ea27ecab9c97fcfd383fff +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6fa831c441f356a8d71d3ba18748f5465ee97e87 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7829ca401fa7617ee7c4c526a26ca439a9dc4b0d88bde77c88f67d06364950c0 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..bfd18af3455f9602c6c243dfbf248a2fc50b55d4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30eba72e56d0fd68521c1f6f23bca3e70be8f974232febe623ecea52d2576cd7 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..13813815f8c2eab63d87033c5a0c022f820dd6a5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d42535f9ac292562cf39c8add5490a1f8a971b0419c8c1a7405bf898b5bc0989 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..698807348efbb0fabff8a4584c9f1132327533a0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607f6ff48312ecb8b423bfbdb87a50f3ef1bdff04ad25a7f59e718ce01388446 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2a292d1923e75d247747af47fa6cdc80cbfb9342 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc656f7193c33934c6495ef285f4d0c46f67392d8f2245d4b73ebb81a616424 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2327b04d113d74726b9b7f1200504b856df90d38 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d99f75188f0f5cf9894e1ad11ca491b9a678e5166a46c6f30f6f95329c80b4a +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8bd4b00f73e15d70a87905a7c67944b6284e97e9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133638343a4a3b8b90f3f678f4a5a7a35d56f9fdfdc2cd34059266b1300ce6ed +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b456e3c348e195539442d435acfbd3565e9f062b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22e657a9dff35185f9c74493334dba6bcf55018e9b5e6d76c3977f823d16304 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..b74068268bc811015e3900d381da447cbd98d881 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a52ec98ce594f8ce36f72d22d01bab092d7e2389e48e2bd89519c47bb4059bf +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0f533cca0e016ba8ff2fb718ec94ff08e1ce2d59 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1520feac99c6035938ac40353f816e06119192032cb40fe2becc9900457b0035 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..836a326434e0c7c545763c9b2df99d1dc17e9083 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a99575c10d1cf05b39725a324b49c62dc943bce3938a1cc0de2c88b6247965 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..07705bf9ce7dc0f8a05077161228ceb6be7a2fe0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd4c72cc2edc24dbe4f9b63788a6685b16255a9623e98bf18ef0d940b7ed3f0 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c1e3ef7961e1a4a3c95a500c4fbff7943a36ec7a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7108113694d88e9173b3f194d419c8927812bfa4017ccfa44344ef18d0e19d06 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fa856bd9a4fb750da2f232f58d879d8962cfe578 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a5f7edfcf7923d3d8743f4fe3beae978d82906dd11f84512ab42fbcc4a0410 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..afe1acfb3445b003b938c9e5d4a75242558cec71 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c32ff0589208668029ac54d2af445a9684dec878810c84218d08ddb1cec799 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7f3eb1060816f17c0be5f3f8ad78823439c3dae6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d767cacca300acc295d17378e2c24cb83247c1f1823274a6047dc7a7e1957af7 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4686cbae124337cf4b4a60333c9156f182395e30 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f988834670916a227e73ec3de1c4a952645cbb9a31b52d0c2d8f810bfd0d84 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e54be6e459745bf20a83cb622e1c86cc10ce8522 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4433e68a7ecfbf84d5b59193fed5be299b3c6bd9661c1b1b3d68a8ab696604cb +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6777150f01befaef1d07adc26bb66fa963a27b17 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39875d8cadf9928a3b808d6ef72fe52aae4e255cd7cd4ad5e8e242f7fd2c7fc +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..b153c00669542a7713a55886f7e0b38fbfa8b6c3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cbd45a2db6acfac5caa9d80d63ece12503b0f63fbb7f6b3b0b69084bbef4738 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d30286c8d6304d83a82b86df629f6de262036a3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c38724293e33d297e0505ec90e3d2ef0c7a688bdc8ebac4eda63333054d3cf9 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..e63dc44d1acdf8dbd85132331ef3498347f00867 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa36cca0956be05ecb2b267f55f966b640b7231db368e3dd74d8f88fbc57f27a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..81c44d72b83ef081e691da62e0cedc7da1972ce0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac3b399bca51abee30dd72b7fbd3555191fac9a8c694b9b8f6c0c1bd78ab9db +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..20d4883dd34f64393221674ff342513408662cdc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfbdf6345e32141c8b4bce93a6ff87c74295d078ebc83b6b99aa4e9a9d59619 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..d982129db3bcab3f2d3ffcae82308af24371c19a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e282b4b95e56ee90ebb39bc1215807db12589d83ab676f7523b66d0ddf085dee +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e0b92a890ced6e2b96196734a43486800291efc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e65cc13c66c2828cdb8f114dee592c488900c0a56fd072ff729fc38f989e26 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1500/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b0623b16e0a7b40c80cd7f1b85659d19b6ad2f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1500/trainer_state.json @@ -0,0 +1,574 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.75, + "eval_steps": 100, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + }, + { + "epoch": 0.7125, + "grad_norm": 0.18059992790222168, + "learning_rate": 9.82581132515907e-05, + "loss": 0.6797, + "step": 1425 + }, + { + "epoch": 0.725, + "grad_norm": 0.15256117284297943, + "learning_rate": 9.045976294343145e-05, + "loss": 0.5596, + "step": 1450 + }, + { + "epoch": 0.7375, + "grad_norm": 0.17262427508831024, + "learning_rate": 8.291496053563699e-05, + "loss": 0.6905, + "step": 1475 + }, + { + "epoch": 0.75, + "grad_norm": 0.17382751405239105, + "learning_rate": 7.563569653821565e-05, + "loss": 0.6772, + "step": 1500 + }, + { + "epoch": 0.75, + "eval_loss": 1.1411069631576538, + "eval_runtime": 323.0865, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 1500 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5118973150429184e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..7c092ce96aed2df82c06d0807ba2ea8ad0201dc0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ce85ee52b3160b4a5e042638a99171dc2a599e343d76592dc08a2a2ad2ae95 +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b113d500f8d9e9d52d031864ff63a925dcd6a108 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a16f8acdb8c06a135688d9f8d0c85a4971bce363673ee11145e9e9ff8872b4 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9980a195490f54dc84c53dc977dcb3609e9c74ae --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0339d35d524be87930469125006ef3f1478dae67f177181a34dafd1ee575ef +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0b7c96b270d659ddc4a93d26b6b8d4288148b8f5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9062360c72fe6beeae8d92dd11f176277b649ef4f0ac0741164f9bdc02ec3f47 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..59b828bdac8fc1d633c0831c4d1ccfc7f44489f8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7afcea35d94048bd3d7faa6b118d9dac5f5b76f5554fb11c32e7925f4e9166c8 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6a7a41bb92eb1e35f02caa20a9c9b234407a7b1a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7584364b82ee724efb05fefdc21eade214c47a2bf77d2b37d071d41ee312e6 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..faffc71c66b29112b642d4ea6a76f933cb37686f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf78fec42fb7bdad9a2d06933ac170c4a847e188bbb70ea3a54dcbed2760ee0 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..adfe36cd88ee044c32177aab5aa79d8ab4d35c44 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a974f0e9d5f37dc5845d1de23bcefbfd87c5a0dfb30772eeb8bdc5c30ae5682 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..873028bcb01de50d877e05511543f19851eeddd9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39f7ba7f51da9014db689090df1ef2be32399125c04bf6cd42ab532c1fed0be1 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..3aedadb685bdade9600b756644212e54737d4b3c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c596ef3964a0a00e9b17752e0cfbcfcde524da034d351f81d9193ca0be1acb41 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..788fab27959b25d668b71239d8d8343c8496a393 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d214dfe75e70c83c1ffc618ad3bd4c93ee778c936dd9f55ad3f55acb46365e1a +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9d134d6f21618fe33bb11582f8577142de31ccb4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1324b62a67f581d61830ef9319081519aff02cc4a82773c3b0c40cd016d2b6c6 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..96cffcce796f333b40628dc60a41872316600352 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2642fb08bd63c9988f801112f3f838408d0435f6762cb6c1479a0970cd5ea8f2 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..42a296e915e782c72a049e543ce740542c0c42b7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76ada9b00f3add6e04c7c84035aedbca2f30eea31a3a813a33e1741557a9904 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..77d35975cc25aa9a11a124da0c3f38f48b8f989e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cdaf591c76c8ef93261273dea0b540fb46a4a23cacc0e560e66400027ef3799 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ed5a603cac3f98b31ffce9be196d404fc9d2cd5e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15dfddbde4a2b7441ee36184e11b1b63e261d12de7bbe003eabf5a8d923d0dd8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..046a513c1235e5db9981bd61f1fbbd48b8296a51 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768985e9f1a8dd701e008c03b01f63a77c638a8621df3a84d6e4010523b7960c +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b8e6e1a78d13cf12c536efc5323f10504e3ec13d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781eaca842b0cee3d7a90aad6ab7700ab355972b8169f61d7ccac16c18720750 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..17d1578450e189113f727a3a0632b3bc530af8ab --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5183506a2396df9160b89ce86aee871710b836d566fa74da4d65a9ae5ca85552 +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf041f9429507850844a465334e15bbc5f3d3e75 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faced7ec28436a2990a43a87872462ff8bcaa07f8aca5783ce1ef461c24a5279 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..22398f7f5735ca48f9ce9163659699f7d974599e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da04d74ce1ce614a1bfa4966f86cceb96723501a6333ccc0bb669f27e8c29bcf +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec5815d1a61ba4dacc884cd287e0b14f53a85485 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2da239fb7c10f993c363a19581ac96c62790624254fd05b4f9411a2c0f8280 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..c28c849918f4a4c68f2ba6112492313ad5813fe1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78bc4edab805c57c2ead6d40ffcfc64de8e97b59e297c2a921893b92f5e9296d +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a6f7f47b3f861e06a4b8b62a48916656b67a3bb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a3d9fee605e6a474ee8a412dc14b0ab906a07cb21430029959b91a634fc6d7 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..c5c29843a667219beeb22f4e97dd3ae688fa8fd2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4530cf8642eadbeef6e8e3bb17cc51305d0d35a97ce2a8a6457153b532ffd3 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..11a550e52cc0ee1bf4b1b0c8a4ec25843b59c9d9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f587f7177f6428865c1ea5f5ca96d5cd2d4fe9f7b6b5b251babc5a37b75a0a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b1256b470e8c7334822cf94f43e0000f8a16dd9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d5f76e918fa0cbbbd298377811314b7fc9c9c89e747f720e3533cb0c69b09c +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1600/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4bc7f954a1377ee648181ae624da228b05d53bdc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1600/trainer_state.json @@ -0,0 +1,610 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8, + "eval_steps": 100, + "global_step": 1600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + }, + { + "epoch": 0.7125, + "grad_norm": 0.18059992790222168, + "learning_rate": 9.82581132515907e-05, + "loss": 0.6797, + "step": 1425 + }, + { + "epoch": 0.725, + "grad_norm": 0.15256117284297943, + "learning_rate": 9.045976294343145e-05, + "loss": 0.5596, + "step": 1450 + }, + { + "epoch": 0.7375, + "grad_norm": 0.17262427508831024, + "learning_rate": 8.291496053563699e-05, + "loss": 0.6905, + "step": 1475 + }, + { + "epoch": 0.75, + "grad_norm": 0.17382751405239105, + "learning_rate": 7.563569653821565e-05, + "loss": 0.6772, + "step": 1500 + }, + { + "epoch": 0.75, + "eval_loss": 1.1411069631576538, + "eval_runtime": 323.0865, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 1500 + }, + { + "epoch": 0.7625, + "grad_norm": 0.15558482706546783, + "learning_rate": 6.863353945662288e-05, + "loss": 0.5172, + "step": 1525 + }, + { + "epoch": 0.775, + "grad_norm": 0.1950470507144928, + "learning_rate": 6.191961740661687e-05, + "loss": 0.7039, + "step": 1550 + }, + { + "epoch": 0.7875, + "grad_norm": 0.17224222421646118, + "learning_rate": 5.550460042899982e-05, + "loss": 0.6914, + "step": 1575 + }, + { + "epoch": 0.8, + "grad_norm": 0.1652670055627823, + "learning_rate": 4.9398683532350855e-05, + "loss": 0.488, + "step": 1600 + }, + { + "epoch": 0.8, + "eval_loss": 1.1589314937591553, + "eval_runtime": 321.6657, + "eval_samples_per_second": 3.41, + "eval_steps_per_second": 0.056, + "step": 1600 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.612690469379113e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..70cbfc270fc78d599e714f53d4d0c53a1be7f88f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:468bfd2df97f215e4c75e78afebf4cf679f2079511436ed58017c45b90b41bdb +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..175ff89400cd0b2850966a011370129eff823d29 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4dde31c3ab198c8ad808ae8b20093155b73b6aea3be61e98c850acd071dfb77 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1141482b923551eb113e3fefad699feac8739a8f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d52a6cefa87000b5c9612c0d487a5883666e0f0792425a87061db4825b0d15e8 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..706d51a06bcb0c242fae3f8bf54d1a484b66e3f0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc809616f1ff335a99fccb8ae89321efcec824da68861ff6f5c3ef7266ba0980 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6cc68e600928bde048285a00bfdf937408eed633 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb9da3dcc13ff751d71b9f106b79faf2eb89ee08f61ffc11d9adfaa200c424a8 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7714c6ffa142cc0be0fcbfe205652441c4dbc7db --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22491b2ba1fa7d9ec31495e4f237d6b4476d5d04d6c6af90a3e8f856289b26a6 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..88e2394108551fa9604908d676f7e75cef016149 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b368a0399dbde176bec50f5a2d06ba7eed8339cef1e73b95af81666bd4e373 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3f0f72ab6ffdc512fea740edcccadce70582fc6e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1d03b87d9778e85b0a02d880ed7799d494d1385aa2e692346643eb91aa91aa +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5be9e4d9ff34a62280584e6cb46e9fb73510d274 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891e5056e200ea3ade2acd66d0a6022940432183851161a42c6e2b7aafef58b6 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..ea6061d3751aca91c68e00f2b7a4a49f7f1ce68e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e38f9cf6990e965d5ccfcfa0a7d78ee9699bd3b4c3daae95b2168b8289168e4 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..57a3f6a4658776f00fcc05279d5d5a6e520c05ea --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9581c47d2b51d456ce350f5d308ccf1115ca80de1f24d205b46a5180cacfd72d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d97461ead66e969434ad465210e41567d01d3fc7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233ab9d73964cac2bf557a3bd405888cfe1497cd27288dcfe77fa20c91f9a8a8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..724e6c64e38d5bc31ea9bc359a4678658754add0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524b06637a2827ec28dd51399212821005b3c11e3dd39fab52db82918aa8c270 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b6048695666be6f458b03867d545bfa65e4f2421 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8241b29695bbdb6199cc1540b9583c51cafaed942357d3800e6947b17ef9158 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1866bda3b5b2b41c28bcf42e9f3b433bd9391b15 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb0357e3ab36509bddb0b54fbda71852f8bf3d6a702a20fd47522ba728ddc69 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ce3c3eba8e249968cbe17e17755c248947cebda5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f3922a63b865450c47e17d2cd8ab5084fe85cdcdc5c15464790ac7ce8bddfd +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..db261a935eede4ae0e486cab6b983834b95f79fb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b442276a8d50335e7638b4c12cd22cf2de2ab3d9d81276dc0e2a30908a149e +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6c6578c4210027b402bdf8c0ba1ebcbe7e896c46 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef95c017cc2f6c7f2a997734567e6fba24bcddcdc814075b9e3978c75cc1d089 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbbb93443d8ac74f5921b1db6a190e4dbf1cb152 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c368f2d5e142f8ec4d8e832f12c3a242f58f04943b01abc4f24753716227bc3 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1c8a9a67875949ca088e24b6ddb6d7abb8cbe1b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7495eab1626b234203500cb092a76fe0e8ee29326c4a214d406965f11ae00f1d +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..927034d062e94e6f7480eb76f009cc6f088f4eae --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e719f7df6c26476159f65277915dfa0fb3de15d19a7f51fb58f93a6e451f669a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..d75d05aa7ee1eafd7e6c78c7d65a48fade012846 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8772b6e0f5613097d80d8a1b900a879f18269d411c7702f8553115591658d5d6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..8af08ad4aea65a30a236b7fc47b84e7cd188dffb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007aca66dfa0ad6f05dbd187566dd645c4fe201a60ffb68ffbbf0988506518ad +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..be0cec73b636df956aac221669db063e8e848c9b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82f8686622b3cb34cb432fe0331d1c0ead3a513a521e46bfc46b764a797f003 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..793ed4585e58a46c9096ceb7444786d7abbbb105 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:238bdc28b08c7e2c5690436b2d75c6c89bce8c123cb1195a3dcec4e4923064de +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..bafba19b5a76475813eaaac5f8c7d956bdb62319 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:823f8a6617c96025013f66bf901be2a45b4251e14fe49e4147b91854b5a49d0e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f32a2b2cd826add882328cd593354c5b68429a27 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddc22e089a0095581797df13adaa44992df016d73b873c38a5d6d0910baea06 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1700/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1b086cb9b1fa586c4d7f8bb56007c7df9e044ce8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1700/trainer_state.json @@ -0,0 +1,646 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.85, + "eval_steps": 100, + "global_step": 1700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + }, + { + "epoch": 0.7125, + "grad_norm": 0.18059992790222168, + "learning_rate": 9.82581132515907e-05, + "loss": 0.6797, + "step": 1425 + }, + { + "epoch": 0.725, + "grad_norm": 0.15256117284297943, + "learning_rate": 9.045976294343145e-05, + "loss": 0.5596, + "step": 1450 + }, + { + "epoch": 0.7375, + "grad_norm": 0.17262427508831024, + "learning_rate": 8.291496053563699e-05, + "loss": 0.6905, + "step": 1475 + }, + { + "epoch": 0.75, + "grad_norm": 0.17382751405239105, + "learning_rate": 7.563569653821565e-05, + "loss": 0.6772, + "step": 1500 + }, + { + "epoch": 0.75, + "eval_loss": 1.1411069631576538, + "eval_runtime": 323.0865, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 1500 + }, + { + "epoch": 0.7625, + "grad_norm": 0.15558482706546783, + "learning_rate": 6.863353945662288e-05, + "loss": 0.5172, + "step": 1525 + }, + { + "epoch": 0.775, + "grad_norm": 0.1950470507144928, + "learning_rate": 6.191961740661687e-05, + "loss": 0.7039, + "step": 1550 + }, + { + "epoch": 0.7875, + "grad_norm": 0.17224222421646118, + "learning_rate": 5.550460042899982e-05, + "loss": 0.6914, + "step": 1575 + }, + { + "epoch": 0.8, + "grad_norm": 0.1652670055627823, + "learning_rate": 4.9398683532350855e-05, + "loss": 0.488, + "step": 1600 + }, + { + "epoch": 0.8, + "eval_loss": 1.1589314937591553, + "eval_runtime": 321.6657, + "eval_samples_per_second": 3.41, + "eval_steps_per_second": 0.056, + "step": 1600 + }, + { + "epoch": 0.8125, + "grad_norm": 0.1697286069393158, + "learning_rate": 4.3611570490698945e-05, + "loss": 0.7453, + "step": 1625 + }, + { + "epoch": 0.825, + "grad_norm": 0.16859295964241028, + "learning_rate": 3.815245842188697e-05, + "loss": 0.6625, + "step": 1650 + }, + { + "epoch": 0.8375, + "grad_norm": 0.16462524235248566, + "learning_rate": 3.30300231711339e-05, + "loss": 0.4719, + "step": 1675 + }, + { + "epoch": 0.85, + "grad_norm": 0.17534367740154266, + "learning_rate": 2.8252405523025106e-05, + "loss": 0.7468, + "step": 1700 + }, + { + "epoch": 0.85, + "eval_loss": 1.161086916923523, + "eval_runtime": 322.1848, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 1700 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7134836237153075e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..feeffe55b6f686277ffd6ea78212f664c54f2c90 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d8d672f6aa9cea422faca467f25fe74008167ecb94b3e3d8d183e1cabca035 +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d08927053913bd52cdaf525556e7b04587a35f20 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a488b3b856c5050e530a94a4528becec9d831f752432c216fe6973bb680a0ba5 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..150caef03495a6ac0de8aeb33f29a21775218664 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05988ed7b2b66d0e97b1d36fbf9b1d311f4f7b615576aa0c8e145988507c4652 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..beda446a2ded8881055d484d5bf8b14f2df985cf --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e66fa7127e41edc866473050fc42361b09e62b5e779449458e1a0383c4eedc +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9ac4e0697ed73bef262109242b840f5d0d6e3599 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8173b3bf9a6089934e38182010b57ae5389fed536e94cf4def027029452cf251 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fa9549de09e6db29913c1bff67d2fd64436b51e0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6f1280ec9ccf66494378d56e39d7df4d8b5be826f9b01e4465b731c6b535a8 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a205a6c8284234b2d0a10695545f1f0ed73e09e1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e6485e5549374bf39fde8a34a6214b4e1bb764829499d3393218c5cc6a5e80 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e7e1e71d5bb1819e44edf7203c044327bb34de27 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f843ac60415c1e4b8464329ee14719759b7dd01bb3995433be198e2e6c381a6a +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1c5f855686ad8c9ff2f44382d1dd8a67e57a3840 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c1ee911612d01626e88528755deb19a2b56f722794a5ed510f5344d17d6178 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..8c12acb54d5013711d3e684ea3d4dc7a4bd17b4b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc6003462e4b36ff6f712ac8f13fe64d5af47da3982a7a7798d10b27f3efadb6 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..24275f8083df8c3f6666f57701e90140e840bcab --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3492260e92a58f4455925d43d7c2ced9796554c6b85c3d87088b8599671af84d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c5cfbf3cefbe952363bd59cd96a0136eced031e0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46f2aeed355e00f064b53dfe1adcf8c370a25f1c9d42e77e852f48b5e00f4f7 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b226877ae5bb2bdeab6ecf8ee6e669602d867101 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6733930135ff10fc8387920e5e207eb53f19d899cb651452d91c17ae1d6f12 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..861202c492c3fe7bf66a4c47cadff5bf70f36cbf --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04476990240ee5ec4efa66f91803a06753493c4e00f25a089bd6d49f79f92f51 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0d293e629ffbe09f5839af27a5472948d29472cf --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a8eab352ef6466a47cf47847e4ec56f63e81d8671b1e755a30448b51e2af56 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..121fa284088bb9ba4a51ac52e638b3b3c369dae6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73fbd93772952c45253b61dfe2b597d69a76995152ad12591ebd2cb99bb5c3c +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..42d915a3f03f14d3d4f0652b020d9ef4b464c0f2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e28a649cf91fd67782eda325d4b2af272f34a81024cee49bd4a6691ed681278 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6a2da0901273a4bc363c4c072f90f8e66443063b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb39e4607262decc6263c4a19dbd2d599b48085d0e317f6a2c28d04c6d3cf68 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..505e48e1c04b347b57874cd7f13152a3dc45a0fe --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e5cb1ddfe0c1c113a20f40807a9ad8a341a0f5f2da8d9dcb3174ddc5cf42a45 +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe5748fb3992b8bcbd1be2615a41e0f76c9cfda0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:257ec6a5e84a6f656652535f26794b006e614fc4cb0992ade63cffdc8b66bc3c +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e4391b1af62e2a91418f873b5f4ae861d1aad2a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b869fbc5f677cfa936b763dce05a4b33480318d270959b91a7b2f16349b67fb3 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..5af57fe4b8fa5a039585c0691bd55735f58d70a9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0bae2eff0323fac0bd9c07883bce61210db1d9f3805d41ab334ed75c8981f0e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..11da143134b0c23f06ce1046f65b5cf00acfac26 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70c37cdd018075af133d8de2fa13cffdff177f5300346385daa81e4e8106320 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..4a6eacc3ecbe6bb10f5324b2ac6e1aa7bcaf0c75 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26e1c981b255bbc49483ee7b1834d16eb6d6ba193d8f8318f9403b16962c959 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f797500c9870775d0a99fe1ecb3bbb2e52e0c70 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f928fca3214ad4844bda0e7b59b964b307eacf836c78d92010134bcf2ae7b76 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..82692af4a6c1b7f536bf6f26495b604c264ea19a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997cceb19a4774c17f6fd70d3635ecb66a8a48b908144a94910112e19f13bba1 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..221f9ffd1eb177554f1e976499d655666e61abff --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d973c55cadccf6ff6ef9e217145fc1e52fea2fbcc1c6a26f6ebb6c7cd1ee47c +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1800/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c0590fe03dc221b1a1ac086e1205bbd50d42b42 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1800/trainer_state.json @@ -0,0 +1,682 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9, + "eval_steps": 100, + "global_step": 1800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + }, + { + "epoch": 0.7125, + "grad_norm": 0.18059992790222168, + "learning_rate": 9.82581132515907e-05, + "loss": 0.6797, + "step": 1425 + }, + { + "epoch": 0.725, + "grad_norm": 0.15256117284297943, + "learning_rate": 9.045976294343145e-05, + "loss": 0.5596, + "step": 1450 + }, + { + "epoch": 0.7375, + "grad_norm": 0.17262427508831024, + "learning_rate": 8.291496053563699e-05, + "loss": 0.6905, + "step": 1475 + }, + { + "epoch": 0.75, + "grad_norm": 0.17382751405239105, + "learning_rate": 7.563569653821565e-05, + "loss": 0.6772, + "step": 1500 + }, + { + "epoch": 0.75, + "eval_loss": 1.1411069631576538, + "eval_runtime": 323.0865, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 1500 + }, + { + "epoch": 0.7625, + "grad_norm": 0.15558482706546783, + "learning_rate": 6.863353945662288e-05, + "loss": 0.5172, + "step": 1525 + }, + { + "epoch": 0.775, + "grad_norm": 0.1950470507144928, + "learning_rate": 6.191961740661687e-05, + "loss": 0.7039, + "step": 1550 + }, + { + "epoch": 0.7875, + "grad_norm": 0.17224222421646118, + "learning_rate": 5.550460042899982e-05, + "loss": 0.6914, + "step": 1575 + }, + { + "epoch": 0.8, + "grad_norm": 0.1652670055627823, + "learning_rate": 4.9398683532350855e-05, + "loss": 0.488, + "step": 1600 + }, + { + "epoch": 0.8, + "eval_loss": 1.1589314937591553, + "eval_runtime": 321.6657, + "eval_samples_per_second": 3.41, + "eval_steps_per_second": 0.056, + "step": 1600 + }, + { + "epoch": 0.8125, + "grad_norm": 0.1697286069393158, + "learning_rate": 4.3611570490698945e-05, + "loss": 0.7453, + "step": 1625 + }, + { + "epoch": 0.825, + "grad_norm": 0.16859295964241028, + "learning_rate": 3.815245842188697e-05, + "loss": 0.6625, + "step": 1650 + }, + { + "epoch": 0.8375, + "grad_norm": 0.16462524235248566, + "learning_rate": 3.30300231711339e-05, + "loss": 0.4719, + "step": 1675 + }, + { + "epoch": 0.85, + "grad_norm": 0.17534367740154266, + "learning_rate": 2.8252405523025106e-05, + "loss": 0.7468, + "step": 1700 + }, + { + "epoch": 0.85, + "eval_loss": 1.161086916923523, + "eval_runtime": 322.1848, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 1700 + }, + { + "epoch": 0.8625, + "grad_norm": 0.16568109393119812, + "learning_rate": 2.3827198263843162e-05, + "loss": 0.709, + "step": 1725 + }, + { + "epoch": 0.875, + "grad_norm": 0.17694273591041565, + "learning_rate": 1.9761434114799497e-05, + "loss": 0.5756, + "step": 1750 + }, + { + "epoch": 0.8875, + "grad_norm": 0.16244478523731232, + "learning_rate": 1.606157455534535e-05, + "loss": 0.6545, + "step": 1775 + }, + { + "epoch": 0.9, + "grad_norm": 0.1651735007762909, + "learning_rate": 1.2733499554322708e-05, + "loss": 0.6352, + "step": 1800 + }, + { + "epoch": 0.9, + "eval_loss": 1.1563351154327393, + "eval_runtime": 320.396, + "eval_samples_per_second": 3.424, + "eval_steps_per_second": 0.056, + "step": 1800 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.814276778051502e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..ed0bb31c6ede19f401db2623d8c8681a33bfae2a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd43534290ab048c293c2ec29c1d9bb1dc029b4116a486fd4e7ff61da69c812 +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cb2ac224e888ad6943c0297ab5b00e9872ff2622 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8f68e458659a7d24ab9ce501a82ab49c41d50dc9a5bc23c22a579413281ffa +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..01743f860913666fa3a2aefd55eeec526b1508d3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c216ee3cac7901e3332184c9d0942a685b699947eea664c26fa3cebb78e96ba1 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2f6851dab0fa56f4b1584b8794ac374ee30b8f49 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9da1dd4cff55ca159914401d481dd7ee9a08f49835e2e7dbb32923067606e2 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e089c9c5c391253a4d001f186c79f404deacc715 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df837138c4df5f719ec4da1aaf01a8a9aa42379e0b941305ad11674b85dd2d85 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8677c4ff4bd5498729a1fae93864b135b5b33be3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8dd0efb22404af6d4ab8357b5c15a0a45677669619f0a4f008f7899053c489 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d1a6c1b21d45a3116fe0168adcf1d5b534223576 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d3027775ccd5ca2ba7ba2c3ea889e1c43d845875f83156ac1e0ab558277eea +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fd3e6c700d6446469b02d44bada2ebf5ae6dd35b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3864ca26510f109dd0fe35a903c8914e75c0d967b1d6f59303aec6a084965cc1 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1c888adc9be3318f543ba500bf9727ac62f4f6c1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c076c68a071c2ea375e4f6be1711edbfd07dc498a59ca4cf91b95e41638d42b1 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..ff6d22bc5a8c94776e43826398c62fc102649987 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237d408cf6d8ecd4cc6ce3d421893e895a385269cfb20c57d409faa9c73cd68d +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..71998751fafa5391dffe65a5ac56e6e98b0b1c95 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f512420d26c97f4c0e54a3b4090f99a21622c2990dc3753bf0f38c275f9dcdde +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5f0788f2611ceb89c4724ff7ac17755ee606949b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec17c857cf1742703fbc85a058c2b18310828bb881b66f7144ca70c4d1adbe24 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4dc02aea3798ed27fa4ceef55fc402a53bfe840e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb3434ba596c14167433be1971cb946c52e8259d5ae3979db4c0facb6120a3f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d52ad71a304571325e17744d35389eb0236b1318 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092c38299094e7d3413bfc96759863dffbb6547ea9631c08f29e80a2e69afbac +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..10e0e938310c3e7dc957776873f44029530aa444 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d958d19cb102c41e9642b820c042a95a791da01ee9ae1203c7a62cf53a4f8b68 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..faad2d117c4b86c1ff69ea1c2d0893d5e2e8455b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e06855ac5edb6611214a01841a59b1356bf6631228180ab7961dde6015a4d1a +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3c5848c0dc3eaa6da85b050c243537a404c3cf10 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b80a615e2cc4f2fc66484bcfb91d79dd3c062e75c96faceeb581f8b5eb5a2ac +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0d01f21f17e317459b83db0224d5933c89335db1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3b6ef892071ad4001cd561a520d28faaba15f0b78a9ae916658d2271c28ab6 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..93dc3bdd8b544c34e17bf9996dbcdab801ea0f97 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e409faa47bc79c0ad718c95292033fc1402bf7b7ab48bbec1e96ee3b980028 +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b8768e2aeb7dd408d7467dfd254c45c618b7390 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a55605f0f83843e4c91757a79dac5b24454a60f0028fc9599812d32f683d434a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3b2f621de46c3576e38f4f41e43532d1b3f0e919 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c31c4f35ed9f40bf17f26f645bbb184ad6e9f7def045d9d0266215523c86b2 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..ceb10e858887091a0caae6cb0d452bb1d51f3314 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce24c14f7dabcc1e9c93031afb3e354d5cdf49d6eb3bef0dbaa358b0fad50527 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..36efcc64f5934958950738bafcdfe20b132230d5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2122ecd749d6091c466d968c7b005c78eaa2b9179e618afc8c409b6236761ef0 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..da8c4dcb813b72b75aabd47c95060126cb65216e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d192fa9d3d7305db8099dc5198a4c974a1210b9e552f80c8ae30c88456dbfa +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..74358107123f2f8aa3277a1e65a7a052f9f66c82 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de15ef0c441e909cf4d389f94942f48bf79c0569709ff15dadb83798e850a4a1 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e56d9c206ef2e43663ba68c1476535cbc6551f5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c8b9b13c12a6e57a19e8cb21bc74781b7df61084f81874e75d9dfdbfd68ab0 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-1900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..88d6546af3495fc26241dfd833bbbb2f711934ac --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ea56670e55a6b485cbf0f38ec022a88b913275368ecee1cfea14b0bc438f1b +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-1900/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-1900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ad94bb53ce6e9430741e97b6e0b2f715d1fc39d4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-1900/trainer_state.json @@ -0,0 +1,718 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.95, + "eval_steps": 100, + "global_step": 1900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + }, + { + "epoch": 0.7125, + "grad_norm": 0.18059992790222168, + "learning_rate": 9.82581132515907e-05, + "loss": 0.6797, + "step": 1425 + }, + { + "epoch": 0.725, + "grad_norm": 0.15256117284297943, + "learning_rate": 9.045976294343145e-05, + "loss": 0.5596, + "step": 1450 + }, + { + "epoch": 0.7375, + "grad_norm": 0.17262427508831024, + "learning_rate": 8.291496053563699e-05, + "loss": 0.6905, + "step": 1475 + }, + { + "epoch": 0.75, + "grad_norm": 0.17382751405239105, + "learning_rate": 7.563569653821565e-05, + "loss": 0.6772, + "step": 1500 + }, + { + "epoch": 0.75, + "eval_loss": 1.1411069631576538, + "eval_runtime": 323.0865, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 1500 + }, + { + "epoch": 0.7625, + "grad_norm": 0.15558482706546783, + "learning_rate": 6.863353945662288e-05, + "loss": 0.5172, + "step": 1525 + }, + { + "epoch": 0.775, + "grad_norm": 0.1950470507144928, + "learning_rate": 6.191961740661687e-05, + "loss": 0.7039, + "step": 1550 + }, + { + "epoch": 0.7875, + "grad_norm": 0.17224222421646118, + "learning_rate": 5.550460042899982e-05, + "loss": 0.6914, + "step": 1575 + }, + { + "epoch": 0.8, + "grad_norm": 0.1652670055627823, + "learning_rate": 4.9398683532350855e-05, + "loss": 0.488, + "step": 1600 + }, + { + "epoch": 0.8, + "eval_loss": 1.1589314937591553, + "eval_runtime": 321.6657, + "eval_samples_per_second": 3.41, + "eval_steps_per_second": 0.056, + "step": 1600 + }, + { + "epoch": 0.8125, + "grad_norm": 0.1697286069393158, + "learning_rate": 4.3611570490698945e-05, + "loss": 0.7453, + "step": 1625 + }, + { + "epoch": 0.825, + "grad_norm": 0.16859295964241028, + "learning_rate": 3.815245842188697e-05, + "loss": 0.6625, + "step": 1650 + }, + { + "epoch": 0.8375, + "grad_norm": 0.16462524235248566, + "learning_rate": 3.30300231711339e-05, + "loss": 0.4719, + "step": 1675 + }, + { + "epoch": 0.85, + "grad_norm": 0.17534367740154266, + "learning_rate": 2.8252405523025106e-05, + "loss": 0.7468, + "step": 1700 + }, + { + "epoch": 0.85, + "eval_loss": 1.161086916923523, + "eval_runtime": 322.1848, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 1700 + }, + { + "epoch": 0.8625, + "grad_norm": 0.16568109393119812, + "learning_rate": 2.3827198263843162e-05, + "loss": 0.709, + "step": 1725 + }, + { + "epoch": 0.875, + "grad_norm": 0.17694273591041565, + "learning_rate": 1.9761434114799497e-05, + "loss": 0.5756, + "step": 1750 + }, + { + "epoch": 0.8875, + "grad_norm": 0.16244478523731232, + "learning_rate": 1.606157455534535e-05, + "loss": 0.6545, + "step": 1775 + }, + { + "epoch": 0.9, + "grad_norm": 0.1651735007762909, + "learning_rate": 1.2733499554322708e-05, + "loss": 0.6352, + "step": 1800 + }, + { + "epoch": 0.9, + "eval_loss": 1.1563351154327393, + "eval_runtime": 320.396, + "eval_samples_per_second": 3.424, + "eval_steps_per_second": 0.056, + "step": 1800 + }, + { + "epoch": 0.9125, + "grad_norm": 0.17625375092029572, + "learning_rate": 9.782498225276437e-06, + "loss": 0.5515, + "step": 1825 + }, + { + "epoch": 0.925, + "grad_norm": 0.17532864212989807, + "learning_rate": 7.213260420777607e-06, + "loss": 0.686, + "step": 1850 + }, + { + "epoch": 0.9375, + "grad_norm": 0.15695761144161224, + "learning_rate": 5.029869279117167e-06, + "loss": 0.6296, + "step": 1875 + }, + { + "epoch": 0.95, + "grad_norm": 0.17587953805923462, + "learning_rate": 3.235794735214709e-06, + "loss": 0.569, + "step": 1900 + }, + { + "epoch": 0.95, + "eval_loss": 1.1628855466842651, + "eval_runtime": 324.1332, + "eval_samples_per_second": 3.384, + "eval_steps_per_second": 0.056, + "step": 1900 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.9150699323876966e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..a91f2633e7b0b6425919d3f5c21fcdbd22ae36b5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1579e268577c476a421e02acd25792db7207100f64d0594997403ca9556d65 +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..34e2e3fd732a4324b8d9bd0b94c5633b97a794e4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6207f7b58aefaef0545c6da0dc50a74ed3f233f281297b1948b9ed3d9b876129 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e73cba2efb4d5b4f187e8dce5225d302ff61110e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35227391133a1e51e2b1592a513c0da60ce827b38bbecf29921021c25c208b4b +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..debcf1692a59eb10951c6fc01946dc204027db2f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61e4a784828a36a39fbfa1928ce8b466056668952cd1ce1ffcaa241555e7175 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2d236ea61f8e0cfa4fac0f78d21d4a79a72776c0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05869c277afb1475ac9a71673ab05156911ad377287aaa667ee27162a88e0bd1 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..97651f600ad3024a75d3b2886dddf07299023796 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2e226746b1c4188c100149a1145fcffd2318af5b01ac5117392f8f72b632e2 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..142a8722d84dc306201b2bd315b4ebea6a573053 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8770c8854f832f78d2bf7cca75a5a3759b0a499b3b968e0054f17f2d001864 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c24e29fd610d0fc44b8e5b185f4f7f17b67c8c02 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d944876c7c1456074e57bd1b7452a3528f0eff74344ade28c5bd473e33bd69a9 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3af264aad924cef9bbcb8648aa778dc79be1e691 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a12ec5c72a915068b1a97a0aee33ffbae930996484086ac902d152c237a2ee +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..0c43e32ee9c84861cedc3e28e9d4838cbac2dd7a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab941193cf2a31ef10ce7f3a76975c304db31b9495331e16933de30f2f0a5b5e +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3d269df3c0791bac3f8697f674bd1c0026f479d6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e73a1d774d4063f2badce5f8bbd34a947a4dfb9b7b18c31b39f1e164206096 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c69d16cd81939144ff43287cc203bfba74a4b4e7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b821269cf2db17452705dc3a1dfe6b00d1a433de2507001d4971d25e6d853d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d173f13987769f095366b210867f084a233f4330 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d04fb92ca35ae1f73eafb1f71124f1c6135219bfa7d8b916bd9286b907e267 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..95e6d0c73e372fb0a3b35fbcab635656be7271b0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ae045148586bf36ccb5a031f00a4964e138652e50949599ace95b26c079fcf +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6a908fa1073b23217af069d77624694238850541 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46c44b4601e4f1d063bbb762fe575f0bb1647a76c55ae48958c2f6acc58baf8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7d9e186100348bbd2b0d1df263da53f8ab3186ae --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f502a70b7386e83a2f090f33290ad2c2db6a78db79d4135eea9f3d5df165865d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..df9848760f8e8a87f56a24ccb42d3363fcd9f4e7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216cb53ee4f20fb8d1750a28c32133c87d498d28708954ea4eb9a9e6e311f464 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0361cd53e58aa1d2dee14e7fd80cfd3e6f904d91 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e1d9a25b9fd20f0fc406de49d027e23bdd1efb070d6d08798337a36209895c +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..16b69078618b6c5c44fe97eb4a33f5d7f2c2b6d1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0639e2524b3606de92cf704efe87f4f42e6b531536716338096cdcb997c8f523 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9c62011595a852aae341a8454b41adf3693e94d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7f49d9db6183dc24e6704956551cd47c1f5a209075611fe04ca451437a895e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..46f7047665cf1a3970a3b63ed2c2a0c96af8ab3a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46e59be97e565494bebb1430b0c9995dec568fb4b79287f2dff3dbf2730430a +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..d987862d727896ff71382bdfdae9b0bcdd01daf2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0bb1325f8952acda6b977e4e52f785ac1892d58ddb3f31f0d60ae566525666 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b36f99e0e9ebc014c97a490a15ccedf89960d6a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338387d0d962d6ea549d773166e09059d382f352f9b68a7f8f49f176fdb24478 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ce5464236860b95d6c73e13afc55f7c87f56249 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57875a0f71eaf92d41a7e95ed7a6061e2351c52735063fb8199f0a2528b42b27 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a5976d4ad4a4ee1c3a7350e5dc3e92d9d9b63a3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8634b4b2a740673155b568734eb1f609f0037798280a25133c4a979cfbf6c1c2 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-200/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..db7f177e1377fe2219ab10be57b5cdcd399980c8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab80e51cd15becf6304340fc463ec3aa562bffed9ca0ae82e20eacbd1641316e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-200/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..85540d266d7ef6a9fd6cc6c6a50cee279aee5c43 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1407dea8b779520dd0c3e208f8c82d3dffd12c0548e2e910a4f9aca30c2908c +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-200/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..62611d2a12aa1d888d5f9e96f84bb026b508c0cc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-200/trainer_state.json @@ -0,0 +1,106 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1, + "eval_steps": 100, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.015863086723891e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..e56dd3f44739ab710582d41c8fe988a315f1c47d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893798081ae27f9f8902e5de6a62756099eca4e71ecbdd3b941c2dbf2b481618 +size 869352 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c0191b084ce4b87880a74e2ec27ac1326599c873 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4494d00fa5724e6b393b7eb4937cb039c204a57d9b96acdad5a0e944c0d81aed +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6b60627d5555bd1dedaee62d3ef268c00311dc1e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfde03e72faed9ca00f9b287bc44b5eea3a32fa852fed54db611c6670444a631 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..694c2d44c92e64ca2e422f79de6ed481b2736a0f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3480ad95aa258540c6119dd6351e1abc4981bf7a522139bb31d3ddf00dc8254b +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b34904046f8cd791cb2b055b07823d5d32b5e830 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe059f0b8e477235732bc85addb716838759948c2468289c35c5fa384d15b0a2 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..06229ab2eeba6caec098111417388cf9bd60e0a6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bee5ef51223949b65c655059b7198edc1efade17f718c04d18383f2ad268140 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..866010f175810e042bc845bfe40600e152c318ac --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a61648170442364a3135d96dfc0090bf50159b1cb15d4cbd16b9ee5a476989a7 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9b9dd60aba5135f8cbaf3b5a83c896ba45555a4b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:214dd5d8d2408c976040b77c279aba5d3d662e0cb07e834e74eba04e70f51e46 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5dbe08d7868edfcd03d97d3af906b004cb3bc33c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f4ba64803b3a356a57f4dab499d21e4068134c4f15cb279ee6ef56023f2834 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..7339cbcd1f40c83ac47cf76b7134f32e34a6494b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842cdf8e0bfddeef6442529c8d997e11b900cbcdd36e5127ddf1aa605cb78728 +size 339842 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5851660cddaa6bced0a20b4f23d98b1b55662e5a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c357465b81b19b1717847280888994f24a4e030c96815490e33586dea619a7f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0bc1b03db3c36847878c852550907f98f41f7e4d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55a30bec4c3a1875dc60ef7cb3020ea0b91c6e0bdbba5b636647e5c46cf3dc8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2d8fcfbff7fa3ba91634cd9d4c07333784c5f288 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a750c2362635ce2f9cdd5d767624f242fa664ab896d73b33ff790b667ce34ca +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0e5360b69e284d564b5c6b79730f8573e4f503e6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5106aacfb3cae64ea824550c1fe4e273cc4bb37dab52a3618bb52db0ec572102 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0d180226ebb4ea4bed6a00bc611304f895635b74 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c623c62423f0f46255c3f6bf8232a72c32173585e396b539405b5a5af4a43f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b6f8622abd963167cacf3f986d88ee6282550665 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:160e1fe5605c3319fa6dbd6a8fddb68505061f485316d5c6c165f23704d78599 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..53856020ee00f38f223081e74657f78bcb766033 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5df9c72e18f82162e5c774c099ca999eb55049a027dc6bdf67b1b358a6d323cb +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1b1d955ca05ee2592e0334eca88c5aadc61e89c4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf2209f50ae0f023ca48f646734c0e793d6123c264c2eaf31be984b0a68131c9 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5081f4643aed8e540561d0a4379dfa4490bb1ccd --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e13e042fb10504d07fab2f1ec3eb2fa0a34c92dc5fe441ac5ab9d5b42330a4 +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d88142f8c20d884c5128f5d1117de5044e0ffb0a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0b470305d099d6097f58f0dcefdc4f8a97dd53e726fb377a8b71b69d667b20 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..15b4b1ce3e2d63e38c45fd9e3b06ddb2c03994cb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d5c6e421cff6a3c78e8e1f97418a767aeb2642aa9dee41ebdfdc68feea96143 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..def6ee1890ef6f56f2e88d33438e382e3d66f4cc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d985edaa2c6b455c79c0e8fb434449314207f69f7896473ec6dcfc0512e694f +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e6466f40ab982262a66ddc2945524626eb48360 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f6638a61cecbe433054d715c778adb0e98d1216ee478eb335aa90e4b11324e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0f9a126eb2201b88083979892d33e4b39c747eb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10695be217f2cb83adfccd3704472700ad7d3848e568a35a138e892e94a3698e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf78c9946c999d5414f01fab17621ad41da200ca --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0e7dd56dc37eabbb37cfb18dc0a8b0c66fff54746ac0f68012da8409cf9f3d +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..12487fd469c9018e6ca5b05dd4ccf44219da6912 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a358eee0f2b44755165b449b7b02df94e167ea66869d2475f5cd8df3f5cd6c +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4daf7f0b80cf589d696b1c0cece5028868dfa3d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dc8c5c688c0f02c11f16f1d370c55a4c1e62ffdc212b9c6748e19f0536a865 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-2000/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2bba80e4dcf3e0b3f76ce190189af76670978bbd --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-2000/trainer_state.json @@ -0,0 +1,754 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + }, + { + "epoch": 0.4625, + "grad_norm": 0.16132202744483948, + "learning_rate": 0.00028615260794273236, + "loss": 0.7259, + "step": 925 + }, + { + "epoch": 0.475, + "grad_norm": 0.16069433093070984, + "learning_rate": 0.00027626427720662416, + "loss": 0.4996, + "step": 950 + }, + { + "epoch": 0.4875, + "grad_norm": 0.16033512353897095, + "learning_rate": 0.00026633420620195917, + "loss": 0.7768, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 0.16236689686775208, + "learning_rate": 0.00025637817620561263, + "loss": 0.7225, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 1.1207813024520874, + "eval_runtime": 322.5881, + "eval_samples_per_second": 3.401, + "eval_steps_per_second": 0.056, + "step": 1000 + }, + { + "epoch": 0.5125, + "grad_norm": 0.15189126133918762, + "learning_rate": 0.0002464120097495559, + "loss": 0.4932, + "step": 1025 + }, + { + "epoch": 0.525, + "grad_norm": 0.1648908108472824, + "learning_rate": 0.00023645154547503855, + "loss": 0.7902, + "step": 1050 + }, + { + "epoch": 0.5375, + "grad_norm": 0.16001655161380768, + "learning_rate": 0.00022651261296116894, + "loss": 0.7003, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 0.1633903682231903, + "learning_rate": 0.00021661100756789666, + "loss": 0.4709, + "step": 1100 + }, + { + "epoch": 0.55, + "eval_loss": 1.1372770071029663, + "eval_runtime": 323.5516, + "eval_samples_per_second": 3.39, + "eval_steps_per_second": 0.056, + "step": 1100 + }, + { + "epoch": 0.5625, + "grad_norm": 0.1652471274137497, + "learning_rate": 0.00020676246533337764, + "loss": 0.8076, + "step": 1125 + }, + { + "epoch": 0.575, + "grad_norm": 0.17891553044319153, + "learning_rate": 0.00019698263796561526, + "loss": 0.7156, + "step": 1150 + }, + { + "epoch": 0.5875, + "grad_norm": 0.16443009674549103, + "learning_rate": 0.00018728706796812333, + "loss": 0.6316, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 0.16446325182914734, + "learning_rate": 0.00017769116393914037, + "loss": 0.6956, + "step": 1200 + }, + { + "epoch": 0.6, + "eval_loss": 1.1236783266067505, + "eval_runtime": 320.7164, + "eval_samples_per_second": 3.42, + "eval_steps_per_second": 0.056, + "step": 1200 + }, + { + "epoch": 0.6125, + "grad_norm": 0.1620441973209381, + "learning_rate": 0.00016821017608365264, + "loss": 0.6163, + "step": 1225 + }, + { + "epoch": 0.625, + "grad_norm": 0.17003227770328522, + "learning_rate": 0.00015885917197714112, + "loss": 0.6232, + "step": 1250 + }, + { + "epoch": 0.6375, + "grad_norm": 0.17415954172611237, + "learning_rate": 0.00014965301261957238, + "loss": 0.6991, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 0.1617245227098465, + "learning_rate": 0.00014060632881768558, + "loss": 0.599, + "step": 1300 + }, + { + "epoch": 0.65, + "eval_loss": 1.1370735168457031, + "eval_runtime": 320.7099, + "eval_samples_per_second": 3.421, + "eval_steps_per_second": 0.056, + "step": 1300 + }, + { + "epoch": 0.6625, + "grad_norm": 0.1753346472978592, + "learning_rate": 0.00013173349793311424, + "loss": 0.6611, + "step": 1325 + }, + { + "epoch": 0.675, + "grad_norm": 0.17510834336280823, + "learning_rate": 0.0001230486210332916, + "loss": 0.6814, + "step": 1350 + }, + { + "epoch": 0.6875, + "grad_norm": 0.17805688083171844, + "learning_rate": 0.00011456550048145536, + "loss": 0.5757, + "step": 1375 + }, + { + "epoch": 0.7, + "grad_norm": 0.17829716205596924, + "learning_rate": 0.00010629761800136473, + "loss": 0.6646, + "step": 1400 + }, + { + "epoch": 0.7, + "eval_loss": 1.1476235389709473, + "eval_runtime": 321.7747, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 1400 + }, + { + "epoch": 0.7125, + "grad_norm": 0.18059992790222168, + "learning_rate": 9.82581132515907e-05, + "loss": 0.6797, + "step": 1425 + }, + { + "epoch": 0.725, + "grad_norm": 0.15256117284297943, + "learning_rate": 9.045976294343145e-05, + "loss": 0.5596, + "step": 1450 + }, + { + "epoch": 0.7375, + "grad_norm": 0.17262427508831024, + "learning_rate": 8.291496053563699e-05, + "loss": 0.6905, + "step": 1475 + }, + { + "epoch": 0.75, + "grad_norm": 0.17382751405239105, + "learning_rate": 7.563569653821565e-05, + "loss": 0.6772, + "step": 1500 + }, + { + "epoch": 0.75, + "eval_loss": 1.1411069631576538, + "eval_runtime": 323.0865, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 1500 + }, + { + "epoch": 0.7625, + "grad_norm": 0.15558482706546783, + "learning_rate": 6.863353945662288e-05, + "loss": 0.5172, + "step": 1525 + }, + { + "epoch": 0.775, + "grad_norm": 0.1950470507144928, + "learning_rate": 6.191961740661687e-05, + "loss": 0.7039, + "step": 1550 + }, + { + "epoch": 0.7875, + "grad_norm": 0.17224222421646118, + "learning_rate": 5.550460042899982e-05, + "loss": 0.6914, + "step": 1575 + }, + { + "epoch": 0.8, + "grad_norm": 0.1652670055627823, + "learning_rate": 4.9398683532350855e-05, + "loss": 0.488, + "step": 1600 + }, + { + "epoch": 0.8, + "eval_loss": 1.1589314937591553, + "eval_runtime": 321.6657, + "eval_samples_per_second": 3.41, + "eval_steps_per_second": 0.056, + "step": 1600 + }, + { + "epoch": 0.8125, + "grad_norm": 0.1697286069393158, + "learning_rate": 4.3611570490698945e-05, + "loss": 0.7453, + "step": 1625 + }, + { + "epoch": 0.825, + "grad_norm": 0.16859295964241028, + "learning_rate": 3.815245842188697e-05, + "loss": 0.6625, + "step": 1650 + }, + { + "epoch": 0.8375, + "grad_norm": 0.16462524235248566, + "learning_rate": 3.30300231711339e-05, + "loss": 0.4719, + "step": 1675 + }, + { + "epoch": 0.85, + "grad_norm": 0.17534367740154266, + "learning_rate": 2.8252405523025106e-05, + "loss": 0.7468, + "step": 1700 + }, + { + "epoch": 0.85, + "eval_loss": 1.161086916923523, + "eval_runtime": 322.1848, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 1700 + }, + { + "epoch": 0.8625, + "grad_norm": 0.16568109393119812, + "learning_rate": 2.3827198263843162e-05, + "loss": 0.709, + "step": 1725 + }, + { + "epoch": 0.875, + "grad_norm": 0.17694273591041565, + "learning_rate": 1.9761434114799497e-05, + "loss": 0.5756, + "step": 1750 + }, + { + "epoch": 0.8875, + "grad_norm": 0.16244478523731232, + "learning_rate": 1.606157455534535e-05, + "loss": 0.6545, + "step": 1775 + }, + { + "epoch": 0.9, + "grad_norm": 0.1651735007762909, + "learning_rate": 1.2733499554322708e-05, + "loss": 0.6352, + "step": 1800 + }, + { + "epoch": 0.9, + "eval_loss": 1.1563351154327393, + "eval_runtime": 320.396, + "eval_samples_per_second": 3.424, + "eval_steps_per_second": 0.056, + "step": 1800 + }, + { + "epoch": 0.9125, + "grad_norm": 0.17625375092029572, + "learning_rate": 9.782498225276437e-06, + "loss": 0.5515, + "step": 1825 + }, + { + "epoch": 0.925, + "grad_norm": 0.17532864212989807, + "learning_rate": 7.213260420777607e-06, + "loss": 0.686, + "step": 1850 + }, + { + "epoch": 0.9375, + "grad_norm": 0.15695761144161224, + "learning_rate": 5.029869279117167e-06, + "loss": 0.6296, + "step": 1875 + }, + { + "epoch": 0.95, + "grad_norm": 0.17587953805923462, + "learning_rate": 3.235794735214709e-06, + "loss": 0.569, + "step": 1900 + }, + { + "epoch": 0.95, + "eval_loss": 1.1628855466842651, + "eval_runtime": 324.1332, + "eval_samples_per_second": 3.384, + "eval_steps_per_second": 0.056, + "step": 1900 + }, + { + "epoch": 0.9625, + "grad_norm": 0.17684130370616913, + "learning_rate": 1.8338880060553287e-06, + "loss": 0.6861, + "step": 1925 + }, + { + "epoch": 0.975, + "grad_norm": 0.16606929898262024, + "learning_rate": 8.263770594185149e-07, + "loss": 0.6187, + "step": 1950 + }, + { + "epoch": 0.9875, + "grad_norm": 0.1606944501399994, + "learning_rate": 2.1486307310000787e-07, + "loss": 0.5915, + "step": 1975 + }, + { + "epoch": 1.0, + "grad_norm": 0.16651476919651031, + "learning_rate": 3.1789025450867925e-10, + "loss": 0.6817, + "step": 2000 + }, + { + "epoch": 1.0, + "eval_loss": 1.160001277923584, + "eval_runtime": 322.869, + "eval_samples_per_second": 3.398, + "eval_steps_per_second": 0.056, + "step": 2000 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.0158630867238912e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..cb08525831a74020e2c712065478d55475be5a65 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1fd7780badfa0cb5ea703ffdd6417e0ae772759e5f0ddf3495606ee88a623f +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6f1498d113f062d28ed6daf5d753fcdc9a16878a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99fa44c8ee6da0c3dad679854cc9007d98a8fbab4995d24af614f8a02b187ac1 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c6087048047c4980ade298039d102ee1837f2d0f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891faaf150bf71f1af30e38f6dbcd8445c51d3a724f4ab6420ff6340fe9bd34f +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..01decbed19876867d31aa4538e3157b91b763057 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9642f672a096286f3771b1ac97674706c5b329c601a67dde3e1b085f3acdf25 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..18db386cd7df977b8a125612186d74ddebadadf5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977cde61778a722e449c73fd3c84ea7f50143bdd665b08fe6d4d489167b97e0f +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..43d2309ac9eaff752a8dde2e6ae75993255ba2f4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9213dfe632ee36fd907598891040f1bd78a1f300df5c683ff7c2b193dd6c83 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cda9b45ae2000b93e95c8d3dbf18be23918adf42 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe97076e72fbf65e062dc67803ab840ad7f4d2a6fc11f6c96deeb8f039014d2f +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..55b03ad0773e7db5d1e19d34467f99212e67aa80 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a067f7acd5448eec1de10d3818c09a36a4745fb93416645c0cc8da399fd1a8 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7670dcdb8129312d0cc1de3363f05a08c523e4c7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e945a5236d2ca712bf021178d634e035cbd1fc57b7172aabecfd847bda1fb15e +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..4a22f3fd1dfa47e6995153548b886c012f410d5a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7c1bd5322c53786a1f407a97c0446d15434268e873f86aafac490fab6c05e1 +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8163d7016f08724cf250c94a86c2036a2e2a9733 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb0bbe902830822c40d78361e04e8f0e7af0a3a1f6e4119c65e60b5ddbf2e5b +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..343feb99102d2c2767ece3ba142669fe1f426a33 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39225ef65fcf9315e6ccb4fea23851ac916f45ceb20a956d9118514a2e5f6e76 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8afb8f480c1fa53d0260642effe7f1f0a24de1b6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cafdde099f129e87f2ac57ba31b9522467d5cbbdc734e7bbe843dddb8378833c +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d9c6f548aa0829055eeed2634485e1e48bfaf3f6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7faf1a3f3d6c79a9eab0ce49631f23f48f9eb7d35c3f41dc63f2b808d64dfe +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7747ab18697d6ece5ec08b880608db0c165ab4df --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee38c70ae6c5277da4001be902279a661290776911693df02d8fcbd51cb33a40 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..331945b767e0ff1038c15fa5058a56c10727b3f8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a5e3689af8090645ddfb05b862a85e7f59868217868aa2ef6f8f5fae071c1f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5a8387c5f8eced7899e56f910e4589fde6836ba7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49756f218ff0f37c8a4951f8d50afc892281fa9a9821a8390cb2c28ce55aab8a +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6851ff34e5dd19e5f579e37dce6fbe7a1f558f45 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab37f776e459dcd7a97eb70f49583f616123bbf7fd7c23658d3eba4fa79451b +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..44607170276938b9d9b86f56ebe7a61418d7dca6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78635405a75ff05020872a238d2d5fdc4ef85c11a5aa732aaf9e33f0fb8e6585 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..473886890d701a7a85e4c02958a316903a2765c9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7d526848276a402f7bc1beae10b45087e901b93154c2c78f477e274d809f9c3 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..1dc41557c2eca229a3d3952e02a32fe1fd75b3b0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5996a515b3450a0364280c9b9bb73f90207e512975241b5f61ef7321ae5cc30f +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e5e3074fc1fd124389bcdb4a23a0011c7746d3a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137e0d50809da2617d6e0ed6c57dd8d60eadf6c2b4ebd9ed853378846c8f6bf5 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..132e2a133b99fe66dab9093da203aa0f0e3bbdfd --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b268a58ef386239793155fd0899c5233e71fa56e6fd3e57ea0dd3bda7e952e1e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..af26fca67e87daa5f7dd8707e6996ff055ae6e92 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d11709cd2016c543dd6d19b0f04760f961a2ef1cbd3d0c35107ad9a94b95665 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..559f3f58001a0620b426704ba864225367e421f5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3097153c203c8aac2363aa681f5a0996960cf90714eb5b681f7757df49e9a4e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-300/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca7376cc0fb27fbffd6603b001029e46bf790277 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe6c7b052c669c6b10fcc44b899680eb1044abdbb7045fb7926eab1e7d098c4 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-300/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4a593887a192de425ef90f1dd945f6d4a63561b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8948ab0329c2a6866caa5cd565decc3046a34770df93f963374ee2edaa3e1fbf +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-300/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..045dc49cc32d2f16b870c283a442802ef0650eee --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-300/trainer_state.json @@ -0,0 +1,142 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.15, + "eval_steps": 100, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.023794630085837e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..565df4b9fbd8573a344d6496157e84be84299860 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1bb4c6f4540a61659dcdf119c5d10333227061decb4d673558d31a42ecbef94 +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..228e2d85411575ea6dfdb72908f15763e5b10be6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b1b581b573489ac04104e5a025f36a44c83ae8b48eb4c14c79b3abda1695e8 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4021cf505e2d67aba7af413c45b34fb7952d1c32 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09e1df5dd0f17cfbd929401a94af9f2f8e592d63b7212551dd070d1ecaea15d +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..79320d12dd6f1c244b9e6cd48ccba4eb2d28a893 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb12b609a2a0cad31d77990bc75b3e8bba3fdd9dcad44d591da416724c1e5c35 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2b9e41bce72c2d62d78810b5e9fd1e45c9c75edc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6352824e80c08cf38cc008db805085a7139373818df6c92f1bd8dc55ea7ef3b4 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e2223e78b8dec8dca09ba06045c05c8b769f0816 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ad312812560996855abf90b6cebad640ee8a95cc8158ea5e2462a656a5b9a7 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..97e68a82a30503e6558349fe254cb2c8c1b08322 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0495d5f5f3554402749a9531128118174c82c2f37ed549a11c638f24096845 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..54ec55a777b1229158409f10f7f110b447785f32 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4490d9b0615089cce850c4926fb8b6760b828769048f6db834e0a37f1758f2 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b4ba9cd55c2c26c4343fe2a2df7b61f6e468be02 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990b5bcbb63ae253985e32a9a54101b05c39d00b666e6ee1d2d2bc4c57b156a7 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..4f4ec83b946688928e9e4a468d99035c73b1fe95 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89de34247a056ccd1fa49f4dc4bfc4a1ba93012c275a3cc014b30f46bf5f552a +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e21184d7f8a3b9b2f1884e38bf13e45378098f6f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b21511c41413da29f9dbf67e39226086398dc7ad1935c873ec42d2c0448d7e +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..61a0f5dc7c5320c4dbc27c6382d1416b657bfbb0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30673ee6c34bd46188a23ca40a89ccbe2d403cf30f07aec5cd443fb08b366e0d +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..38f59d06e0cd3c97cb0eb2e7593e35eaea675103 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:871bc21c9356d79c758d50e6cea8e22a405dca457bb205ec00dca96bbf3a6b16 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4fc57a10dfaccddb731558454125ac955f4b7bbc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a3f653db87f137850d42d983867fd62f0dbb8aad851fb662fee211d03dedab +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e64c42620be5e9e82a23317d8ed173c2c245fdc9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ab6756f6ee6644772b7517a16a87e0b9512f8e53c0224391a19ebea8ed15aa +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a289cdcbcbf884037ca95d7cdb2434a54737a8d2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9529e95ddfb6403e5f87be43463b98567ea9ce90f3e6c689502f6a804f7b86b +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..33031d887f18b088e2cb5f466233a1ee39d812d4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b11035b2782d8d7e2cff17b8b47e7a9d3b1b8c9f364ad348bdec04abdc83eee0 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a5926e8fe9cf2f374afb9c1994e43345201ed435 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d8f8b2da817a73a551371242e8dcb95b8c24af61d159979939eab1805d7cc4 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..08ab431ed3d6023acb46afeb060b2bbc4e17e7d2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b8976a1d18f6b03f88945806f1ce2916bbfcae48e4b272dded3a6d29242251 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..30e500c3cb2e2ad2da307bf4f23bfe0292318b56 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098d6bac0efc38fc3b20a23d1fe696b2b1bd80001f27e26e9d2e80d6a9bc914d +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbc4f544882477314dc3c6e8813da4d593171e88 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757130cd10dd14fcc61c8e47b0423982e9f6232c009e802da8d3016005900192 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..7ed2ecae26799f84eababd04ff7802674b0035ea --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72943d33e58e0f9960fe52b4d961c4aa6906a68c34e17129e0d5333b787e1208 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..9efa819df6d036dbd8ded3420ebc7e7fa05234a2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d714ece876a944d0cb591472fd0e3d59fddad92b2c4898c12934c1b75a918bbf +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..0365c590a24c3ca2baac2143a3b28f34fbdacea2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e459ac04a35241d3f7e7c709e75d01bc47307c6667df5b234448443eecc478e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..351d3794dfa0fb7bb29c699429a91608befe4d83 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d2bff8d2d1aae1314c6be02255f8b63c4963be07c252a8777856f2fd3694c6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-400/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b3127672c15442038bad73eb467448946889382 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9789da649a1c984e33a7981799e36bbe7bf52a545da2459c7682edf25c418cfb +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-400/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b235b58bb083cf95264c44d97a93c27205471afb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b56d2c6bd1c2aa83c05c7033b3af13e0206ee25c12092046a84dc057490c52d +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-400/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f39bc89984163f68dbd664e6aaf135b26f6175b5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-400/trainer_state.json @@ -0,0 +1,178 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2, + "eval_steps": 100, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.031726173447782e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..9c8be2bb17b4aa4de58551c8eaee3bf8d457d82a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc54caf2a00885bff038d676b50db5d14fb4ca423d407f0c8e0049281f716eea +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c66d904afe8a3e537985f2137e123ec9b5ced7a9 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2784cbc5e719e1a182fbce9b554fe06158e582c6bd030f8d19fca5762aa2149 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3abec405104c83d703d66f1fe37226b4ef238702 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abf9695b81f008afa3004e2ba86982ba798c02b0df10b9f28ddd02cf6a46eb1 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a9c7283a8170a066fd7f39e3c78f9a89dad8b176 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b7f1ddc00e96b719ce34e61892b5f6697e05462442c406fb722993078ba566 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a91446e13a1779d8359396ba5b1af425920c750b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d1b114ee3a7228e1462b4ce769acd9c87683157214f605372ad6f646d719ba +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..682bf476bbecd64e31b9f21527ca356d221d00b8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e64b7770f00885f276f4635daf41f28fc1c9a98c67f4496724979479de5369c +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8f061759ad50b716c828611eeb4c8c9b93756908 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44e052ce4ca7a9a1f9b8d07720275e90764ed11827091999e76116e612af27d +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7f8bda69daa98b42b99128a41bff272bd9554e0a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c17cf229295c690727a3b17d6505ef4ad19f1af8688ac62afaa1fb04cb72f7b7 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..55fa9c520ef59bf718bda902413ba353d2f8d461 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6692959dea4d97e0d7d7a0a6fa91f48abb500c635e8f95944b4f1fb84c3c4558 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..95678cdce851da43000046eb336c82e14a941813 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592195fbee3218deac34acfe1706ea09b71c03856b4f2fdda2c13fad262bcc18 +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cb631a95d9cca441a29066e1c90e5b788464000a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346e3e2dac36d8e555a2d0b97baa8f38ce039593e6a732631b53ecb3df1dbca2 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d3880d6fa6d12fa718b750b015ea782a3237be40 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd04f44f125ad9bad8f1ed05efa0777fc4f17f6169df8e6b1c21009db960faff +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c4ee8fba2377886c02715394500229a6e9c58703 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6088caf6789a01df7b91cfd2038dc31ca08486d59c406868accc5d429dd2f456 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..24a077564f7cec5bcb3dee60aa74ea52f656e0e0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67eea4a080e2ce03fba1263b8df98afdf300bc097b349bedca49e44ca7cc2b8f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f025e70b322ad94d67b62299f88dc5ff979b0233 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5529275aa9478a239fb8c61bda8a0bd4795047da809f4e539c2293431ac282c4 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..915231694e0836d4ab336551bffcd1717b4f9fad --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:563d87bc249e8f79149e38f1eec8b209bca6abe5375d31a7a2d6661474b51b62 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..25f914009458838a5efb082d9c519bca82b75f74 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d294071d86f31f87c909bd0de0480762da328cee16b2b0a9eccd1578a44a097c +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e194f44af420ad034246f60f75483a7c157bc6f5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be25c873ee93c0ffaa3389cd3ff91bcd715adcc5839dff93731890ac8018ced7 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..699403fdac20c1753b0c0ae0b94a6414158f3257 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f620b269378694edf449ca02b2bfeac974979b2e2447e87f07029a0fed826d7 +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..050d63a5485b5258fd9c948c66f3b4e3794c4d90 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9cb03eee71de2be3538288b2f1981537922a835272dad01c4069bfdca300bc +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..efa423c8fcfb306b8e66e50cb301a6c2ab76ffb7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83178132bb595ebe8b479ea8f105f64cce3a1f9d8ed04b481c05928607ca1513 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ac9ee3678df4eb7e0ca5c1ef371205c2d005cc5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd3e2d5b57a6bd25638c48f3b5b207d746b7faa0a195b8bbad1b546064dd8f7 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..34a9402a0db7efea3756caf4840667911b86a37b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202a4071f09999f050e9768c1a3a518b6b3bff189c23039fe733f9f24291210f +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..d598e109dca963639e8e4f2dba04edaf08d7b9e6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0212e64fa50a81a26672f3500b949ccc2f145f3fcf63b35bc77484ecf00c0c08 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..19e1f04b247c5fc37a394707f3f802eca220c93d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:879c494ee06aa75a54e0e810621a4cb24f07dfba633a1043ab54566bc9cd7870 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-500/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..4cbc6a61e7ad7add8a5412ad34a6085eb835717a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d230e0857cdd767606b7f9cc69f92c1f0b0c46e8929ac5cc39e3a522ede1c7bc +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-500/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7a8fc1beb3f842bafa7113ae4a09e73a121df7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc95850e72613f7a4a1684465dd4f848b800fdca2b51b79a9ac67435988b218d +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-500/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b0fdd2db8ac66e69dccdf47807a34c52e4b8dbd7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-500/trainer_state.json @@ -0,0 +1,214 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.25, + "eval_steps": 100, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.039657716809728e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..28abf815fe816035f378db5d2e19404a5fd6a88a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc702245113cca6902cc312c834760c40e858ddda55eeebe7cdbdc5ee485667 +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..34357b1fd4fca6a45e3e45c3475ac84bffb4b2f3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5465fb401e93c9a9db99abcf0da28baf3f7965e6f5b3c33db66731f92d470381 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2e0f009d4472c9bf225d837810be1f12816f0bb8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c57f4bbd5d290e7feb0ffcae4dd4313f08ed001d65fa1b0b5b2c5ab056d8ee8 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b1dafccdf16b64637fea47931d28c6948fe6ace1 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d321c328cefc339044e6daa82d8e3d2bac1c11dfa96eee018d98d39b4804143 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d4fcbf55077efd7680920f4da82079fc9199e434 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8182d23f6d678dc56cf2de95e1ff6c38651897795778cb8475fe7dd5d543ebc2 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c88121709a1a07bdeaba2e8659d17183ae9efa8b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d069d09a52b37054a8be2faadb91b6a868af6e3f0ba601dcdca7ea1add4926 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ff12d70b571691fd970015c828c890c8a437ff8a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d9c9c1baa01f48499ad2a952f7b941aea6ebfe6ec0d680427ad1f6867936ac +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1d59db31972d47176e3968a919455d5759b4da30 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68affd03c61e6099e066ba27db0701add76f6bb27d451c5a1512c2935a3820ca +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4a2440fe73e997f75be9cef6e8b869f6970f8c65 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533d2a287749e66dc7198a6d09a0c493cb40c2a93f0f04cab6fdc3d63fb5ccb6 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..83cfb231a74e275f811367e700fb241cb26f3feb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d95b1fa8347b44c9471392cdfdfd8ab37a7e7e4872aa723fb703e0d2d52cd4c +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ba2232e49f026c50e8a1b5deb9b77890254799c7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74d607e7f26d0332c67332d94fa4212edc4450714e5b0b53430dbf04e0ef997 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9f06d7f2eab5d0581ddfa5a20bd4970bbee3ca60 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7462fed97e180144bfea872e88cc6cf956b4812a467afa963301dbe9d1b1b0 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2790f8b52882a17e08398fdd0ca91ba3e5f14a2a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:147d72026c70302749d733d8e709ca4d48059ddaff9510c939a377593a5009f4 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ccadad3c853a85022cb42e1b24cb6acf467ab66f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a1710afbe637295ecdb697e90678cf0f078f7ff83dcdd422e96f3265cfb6e8 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..49c08b0cd93944df84b1f89d5e90236d796d98ff --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdd9085ffdfbb31208c68127c3427b59b2a894db068d0a93954ea1138a361e0 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6ea547ba816afcec5815b9c3c28c1915871835a6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5f009928ebdc7c12625931d860b9efe15c32ff962c6fda0dab658010d886c6 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..01fc136b1c7de6e1435e607c0fc649d84712fa2e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03964c707130a2fe8036f961f900fc3ca09ba4d34c3a1feea51bae3b449a030a +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..95b8870e68fa9da1440dad05dccdaebd8528cd38 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa0818a010caf9dfcb68c02f6614f101c9c7a689f1b449ffbb7b39d05e04508 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6fd6e0a521df4fcbb775eee743abac77c33e99f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a51c34116e74976603507ba08705aca7a521f5063e0d5280afc2eab68c4bf36 +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..91b5f7b3616eba14f6616b60b318f0a38e42ad39 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0e94a8e4fb8632c1a2bbfafc9954dea009a47a89164eaa6b4e9ad4738a61866 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..f61758714f0af0a51664275e8c183b1723ee985c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581c40d10e64ee3415fa1edb81e4df35bcb7d2cccace07218e909d4a0cd39670 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..49c24790df1059ce6d97cc85aa0bbafbbe5bef44 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5c5fbd89816b8ec2f69e0f019ea2ae70ee8c86981f8d64e3a701572708e36e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..caa359520023e2247cb8bb95d569e113ecb91360 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a729343aef347ba34536cc6a4fb56f898d13aad7a247e651a1c36796fef1e4 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..dec6b5929d6f20adfb88bf325d934067adb7ee8e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:889462d35b322a22d2431ed49ed60a340bdbfc709dabefd05137283df1487d2e +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..b819bbb64675781cd44fea373d7fa2b4890a7bd3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b701aac9cf3c38cff274fe22ba3612c50d5a982e2585585497f2ca2d0a4729 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-600/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..397efd8a009b165f197504e261e6eeb57f633301 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f35a94dc3a8dc4b2ae30697f159aedf54d139373b80885849c908f85376243 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-600/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fa9750d60fe6a490c749218369d6d200b7b6ffc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fdf508bbbc1f82a3989be901b295050953f0512d0841595d9dae3ad6c0a857 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-600/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..751bb3ef90e13021ef63b458974a878d55d1c935 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-600/trainer_state.json @@ -0,0 +1,250 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3, + "eval_steps": 100, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.047589260171674e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..a8bf981bc1e32209e82a4c648d8d1b750ad0c14a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28d3e6def3c44793342bde02334ee4e845a4533f4e2391fa3dce92153d7dfcb +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4a13385bdb54b0f0d0a4b3d60d41879d5df84d6e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c03467866bb81a7c7778b3b9898c1aefaf741370448b7ea12628e07257ea65d3 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..625f9b017e99d4ff2c535c34de157746acb6bbce --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c06ac6638c8c3ba1e1c73a31926097c2899c8d9e5ba911f5fb6d8a3b8bdf83 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..46080baf3bcec541cab919347fc8f9baf8783bc6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e2690a27603ebf38d44659825f5d4866f8ea90a919656f3703dd9457f15213 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3048d9ea4f2ae1c98d8fa2785f141b185bebc26f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1cc7d6c9914306371d38d3d10e5372d844ac611ec2497ebef095dc21698f28 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c8eddf56e54de2ee3bb6241ca27ed60b30751582 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82406e0cbcf82ecb4f6c89141c4be060965f2bc43a36010f40a3812a4d81554b +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..834545aad2a44df289290a9d4a248188ef157a42 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812a75a721486b276124455f259e1b78aa4d337c1834417f45fd2994036024d8 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..0989564ecda99a2f5039a5fcaa5a6053dfc5e71f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1ceb4472bf7d5d69e726c6bf33415556ea09de76cd61c2760b474aaf615f89 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3fbbb878373b37016eba25503d185cb4ea0e81ad --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2260bf319cd04128aa02e64de28ded12172dd48084c1ddf096294637329e1c1c +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..802920a0c0e3128cf47637bad3df154c261b001c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8311228a66a4e37e7a12736370b4b5d4128a632844a3d0ccace9eb422c04ced +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8e63ca72dcb12859045affa9cbe81a294c6ee907 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f2e48f6c70876ddf6f2a86202800606cf3e4c42e4a14eecdb50f49a5dae8ac +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..257fe2065d4ec890b6f5d403b21161527371a634 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4df438da1be0272d775ac2eb5fe85c8b582e537425e1781fc434b536cfc9a06 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..87dd1f314f8c079cc085f87bab2dbff99fe59342 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67fc125bcf057e0591050cf475dc976a7f99aae2a1bf328e1baaca20e72b252 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..06974d78582aca3ac53fb0d5e1e3cecf2b431469 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378eb2652959e9df20a7ad9ee3212450520a0b9d186ddda4326e80b9c0bb1381 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..59b50d92ffc310ddc6c688d97043089b0762ec6d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb17c63907dfaf49fef2553522318afce87695fecce8a57c4b03831f135aed0f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ad82aa5fbbd3e3df52aa6ffe4ef7e064599746ae --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82bf7e777115685c3d5d7d50e72b599374da050cb560d9b1fc90ece2bd76267 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d94cadb0d604294129e0f5bae87e7caa0051f1d6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:175e5d9b8cba81b002efb2bd4cb826f7cc60238c6577d5295d1ad39cbc459677 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..61f7ea7445755d8417e3313e76a9ff369fd9c93c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1504f42a2818f37b34fd5f2ca5b608d5870304ddef101eeabe86f65ee8872d79 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5de689d9fd1a61dc6709321a6aa331d6e005be36 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c87cd53dec4801e4dcb6d4bb9548ff38aa0045659cdd86a18577ca7fcd9a3e +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..48342dcb0785b7c96ce94a1e65ad7ee7a0b93dbc --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f6fcbe44fc6f1a64ef2e4229329192390d155590e99d4c4819c2d087a49fe7 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..4dc040c521ab500222e26dbc5e64f11d4660c409 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb1ee5c68161bb5c64f0b872e58bb8fc2ec4b387d82190a48748f4bb20aceb2 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..644a69426ebb1f4d81e7e519fff7817ec70f4c0c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5331cf0a58fb23632a83b360557d02d5fc89b8a8c2f4e8abeb139ab0cec5acb6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5750a227925088c8490d2e426cc9b2ec8040e296 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3056627d13df37bbea46087f62d9c8e5eed572e8607868c71a241cfe13de1ac6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..48a0d5f5be44dab6078d368b51b38930d64f1475 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d44298040b1507a5b96aef6d7f8aa7e27fb07ba806caeaf7d2308f4bbff608 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..1aa7e59b4e0f8ab340bac71838c6bd072bfa78ca --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc7c28fd2bad2438655e4c93594ac44c548ed1530b724512ffdda3a05c275d2 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-700/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..794e5a4bcac7ac5857121ab7fc543d81364d4cb2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54038d56bc152013057e4b82ee86ab0a30d67fd1bf3f57326f8342e3b27946bb +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-700/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cb42d4426a8299f4a6337041b5f31cab6f971c5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6345684fba5839b30835cdc28a18266062e5d7b4c59bdd0ceab08c6e41c7a958 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-700/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a603d795344a831b05fbbc5b9b775638237b28e7 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-700/trainer_state.json @@ -0,0 +1,286 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.35, + "eval_steps": 100, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.055520803533619e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..cfda10269af8a5a10aa9ea0a8ea58a5e655d331b --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69d35289ec7c702d08671c81ca83e89800f0b8c2ee81a9560f5d708f6e915a1 +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ce2c9c188c7a9953d9535af40533a40a02bf7373 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3144a6911f270f7f5d12ec2644a5b12c27246ce34e9af2ccfbf88cc7ccae07b +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2370ba0bd628bab632059e8b1a3d42bad05be96a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb9a05aef4016c5300eb2bd9b6e2aa39df7880fd6a6951fc12b1db764bdb193 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c3ae66bfb7636fda17acafaa513b948e1e5464fe --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:613285ff15337c5ae5a68026fad4bb4aff6e3926cd62618180e554fd1c0890c0 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ae407f0d194bb8d3d884ee7e95c8e7b0f77a3fff --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc3bce60fe24e774bec3a0fbfe2244ea2484e78d232d2edfa3275a6d4321a2c0 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..66ee136f6cbae986b4b710d86ea4331fa58d23cd --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918fe08d547b45278a4dfcbf5795da06039c04bbd4fd7167768c5a5638110e87 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..ec2a6a5d75cb24e58d533301549b0cc6a152a02f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a40c0cad645c6bfad1bd2b54596fa0f6247c5b447cf11f5dac993bd8ec563d +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6bbed5afccd19d10ec483f8635629f3ee316d698 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0a27ba69beabeb78ef795e5afc48e502845f38eade05c92031846984dc9c09 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..41cbe8c76aaea175bb998f7e47a40c8e2fac4d49 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc56b668a70b41bf5b1598a92506d8bcf4d2e33a1627f1bbafb2e2322eb5f29 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..11b9c590fd1c652e033124a4806b64f333f5ae81 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f25ac76c3f289f3b6bea241733b79e3e28294c28f425e11caf9c567a648440 +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..311684cf2794431800cb8c0efabd30a9df5d67a6 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc3e85228a4ea2bfdf54244f84b9c3d62d9768c9881f0f4288be941c39ffe1d4 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1d6075ffaed570d36825582427b328c806c097ff --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467560295b9499264c885ea25c066a4a7fc6071dd344e95c866b52b8903ae1a4 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..789e64e14483dd0384db3b30d9056fe382fedb5e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3401e3077a0f071ad7c1670213d28c33a6977c6cc5f802dad7632c69fda7b39 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..40746ac2e20eef530d8472d45316a4d23f8ce94f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2cdcd3a5fc33f064bc420d8b1bd226e1ae1d6dd49ec67120cbde7599eb81d7 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4caba33b41dda3557eddeb4cc3aca17b686bae6a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf129f65396b75947fc48361e4e9f30ae3d2e69faf83ba9b804629d6b5056ad +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..64525f78aa93702d0fb618c4ec7df7135f2012b2 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c5f0f9f982c782e190c84a03402dc210da8a1e45e2b9891d18810db8700a6f +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b27078d5859d4350fde7c6713ea49addb484d5d4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b418774f0760251a9631e77c12f3c7425768e21223d5d310c8d33c22cce9e342 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9346ea351b60990aaff053bc076161fc9986f511 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07b97b4cc34474c4baf1d5f69ab407928db2db441860fd09fa9f4bf9c300c7e +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..60f6acd1c374668f2a5587e26990b9bd686873b3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da38903189d77ebca677952019ddffb7dc5e4dff9e4e4d1d2a62ff346e14cd1e +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..cad0c6517f208136366ea009ff0ec32fcfadb9cb --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d7e4cf0e0adc364a659de9420fa4d12fb279f876e679a4b79e7a01fb98c8580 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..dcbe0a0928a33931cc06580007b1dcebba9ea56f --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f02eefc8db7444d78f255ccaa290083f51941b3a85ff2920cad0bdeaf64ec20 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..61ef41217922d31e2e68b3deefa6b100ff7d69d3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b85bbe008ff0367b8be7c5b22ebec4e6c57a6c257812f2923dd9db1590d8fbb +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..33d1fcde67e6ebc100b7d8763871261985f2621a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d693a5e5e3cf8ad4afea3c62093fd15960c5ef4039e21b8699187772ed46244b +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7324bdd0fcc8b76ecacd006e8ebacb6783d10d3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e89d03c65eb3124c5f266341e1acfe9513d5e84d6a484888bcec59658a101d6 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..aee6d6df3a75f3d97cd7a9a0974b47206c729386 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bdfa8d967dfcbcd71a8bafeac0ad93d130db3f163ed82de33d0ae3e01f671d5 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-800/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..5fe000e2503db4be1cfcf332723aed922265e3e8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c691d6dc153751df913aa6b27aabff61d809c06b4ef74d0b96634fe71f71c1 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-800/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a08acd119a7aab21ab3547c87f67a43d79bfe92d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5707b44605991a4e2707814032fbd5e2ffa2f78529edb47686673bff4f2f267b +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-800/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c9fc1252d6fbc48c5e63c111d5d8cf6f8ef520db --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-800/trainer_state.json @@ -0,0 +1,322 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4, + "eval_steps": 100, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.063452346895565e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/.metadata b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..719fca8ad608e1991bc07af8710a65e7a5732962 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:641ba875f2330f1026187e27625501166e1ff1ac1de72680d04875f6441c42f2 +size 869351 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fd4e97b9a6c4f2a514569075e0533a4246fb9cfd --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a37b08ab47022fef4e1f478705d34e435eadb99093ac827fd67ac7c5bfee85 +size 6008476 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..61f42de2b33c25ed137cc62be054e604e7415826 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bad1a30fc78352096ee8a71c8c6a0c0d79d667fb2a3b02517140e21dfce97fe5 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2187fd89c27e83c056ac65ea294f782a2940e6e4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0171f82cdfc5a66653493772d49dfa4367d330083a5fc8ab91894d7354fb5261 +size 6041200 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4311e9418015c05b18f62b8151c92d0c6e14f06c --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e82694f79a468a495dcce6bb1d3b51f3c46e3dbac06a781c8ef02d756d2b17 +size 6043476 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fff86704a1c376c2a6525e6f9d926d474626bbe0 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5944250dc8fc10d8feeab4f750e3c042945df23ea9f09b20b1845e07d646f487 +size 6057364 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..957a52b8c6828ab0787fa33d357daff3e0cb03f4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e77f8dbba26973d22db8cf4311b94bbe142440ee1b4eb156efa1236870a98cb +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a40cd176fd86aafab8ed1b2ced619936f07d4e08 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba83d10d4de5a6c8099d1ae211c4d07c9c23c045f3990850ac1e5e3a9596ec6 +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6dc35a23545df95ecf95e8db62056da9e93fdfa8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929a3c7da677e2e58dd8b0fe55318cd0b102e06751ae5f84ca6b15b2bc4687ba +size 6042612 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/.metadata b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..284f3434a6f97ca48c2cd9fe76fed072308e5140 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d68e5654e3aefff03149119db347e6af374ca6123083fc729edfb99e5caaf70 +size 339841 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__0_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1b42820b8b2c54b6cc716ed07893f46a4ce90fd5 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b959ad133f2756d7a70f09b0b373c5702f289d980947a4df94f80ceec92a45a1 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__1_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f87c89f4e029986bbfecd1a76b66856ba3d9a616 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee37bc4d7cd025b14fb78070380ab4edf3237c2995434b49d127d97f9371f4d0 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__2_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d438141f99e77cf9adb66f11ef634de533582faf --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc831d090af774647139a9e79f7fea6f9acbdbf3ad4e34723ca04b41cb4767ff +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__3_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..edba66445ec2e6cadf62240abef141821f6a6641 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57436da4fa385d6b9506fea373c562ad3466eb8828393a36ae6f431f3a3229b +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__4_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..486b48894b342ce69d3f3ee4d9431a9297fccf9a --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d453b06c066f4f29919ee7256bc4e7c1fd19627a45be81a9dc2cb4ad358cea +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__5_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3fd87d6208b2015e918ae2bb65b63c52730d628d --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7d9ea3dfb182d6052a0b9390eb8d98eff102a166a3bb4784103e0155e3bd30 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__6_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a844b136ca54af50e11a29228d5184f0f0091887 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d7fb3c8391977785faa55e1b23ca27a6c16bfb2fa2ca9a14785dcf889caa17 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__7_0.distcp b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4d7f9fedb115520f4d1576ef56c6607a359c0da3 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6787b43bd76d15971a34b2d8ffe38370aa8181fa78ed91325f98c4bac2a562 +size 3003648 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_0.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f7212620384ca896ddbb8c23d9b0098e49cd3ff --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d13a36e628236695a972ee9fb76f88cc36c00608604d5bd263206cb5124dbd7d +size 15920 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_1.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f150aed2990427a652f468bc7413a6d9482ff81 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f82f54b09d8f2dac91585825ecf36127e36b821dbab5ed25796ec90d0e9b04 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_2.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..08cbb03242a69ade96d61402b2b294aa0f01232e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138709c99223895b208c586f871a225b0faf3972db74e752eb5533badae97672 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_3.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd7a3edee7d843e261b8f2d0704079bf9aa33a96 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:356d53601f7cbb7bd3c2b24fa4b30c939a248284d345d26fe1b313b58687aea7 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_4.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..8936b7750cd6a493f9a495f396ad3616522a5a25 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f832fb5ca9e51ef7d884042fbdebee0a99641db20294d78e0bbbd1eb8faca1 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_5.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a4a53e31d212b6f2550bae6e5c38d65c6b72b5e --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762540bbafcd78de93499144446247bf3b263115df9227761d32e784f965a8d5 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_6.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..996605c16fe19ccb4add64768e7a35890431b5e4 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a06a2dfb2b1b635be1df9ea42d970b61333bd2b01e51d9e7dbf3873784b30e00 +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/rng_state_7.pth b/peft-starcoder-lora-a100/checkpoint-900/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..24fcd3eb7ec7ad8e89ae99d0e36b21b1fa6b28b8 --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2b8ee4118d412fb54eaa07e2a5280ba0272c643737804cc19a5ab5c30bc01b +size 15984 diff --git a/peft-starcoder-lora-a100/checkpoint-900/scheduler.pt b/peft-starcoder-lora-a100/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72a700f2d08b7ec27a4b1bf9093382c22731b5de --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b9752ff9fe4b64d860340a91f85fd2a22d00770203412a2f6bf01b73aa0f846 +size 1064 diff --git a/peft-starcoder-lora-a100/checkpoint-900/trainer_state.json b/peft-starcoder-lora-a100/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3edd6c7f89b2d9dfe530b1c9a7f6fc5997e644ee --- /dev/null +++ b/peft-starcoder-lora-a100/checkpoint-900/trainer_state.json @@ -0,0 +1,358 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.45, + "eval_steps": 100, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0125, + "grad_norm": 0.14092598855495453, + "learning_rate": 0.0004, + "loss": 1.1529, + "step": 25 + }, + { + "epoch": 0.025, + "grad_norm": 0.14281609654426575, + "learning_rate": 0.0004998852503731983, + "loss": 1.0472, + "step": 50 + }, + { + "epoch": 0.0375, + "grad_norm": 0.24040694534778595, + "learning_rate": 0.0004993848168027977, + "loss": 0.8532, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 0.14735238254070282, + "learning_rate": 0.0004984880506341147, + "loss": 0.9761, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.9448406100273132, + "eval_runtime": 322.186, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.056, + "step": 100 + }, + { + "epoch": 0.0625, + "grad_norm": 0.1359478235244751, + "learning_rate": 0.0004971963770447935, + "loss": 0.9568, + "step": 125 + }, + { + "epoch": 0.075, + "grad_norm": 0.17497147619724274, + "learning_rate": 0.0004955118488155782, + "loss": 0.7111, + "step": 150 + }, + { + "epoch": 0.0875, + "grad_norm": 0.14382271468639374, + "learning_rate": 0.0004934371430679492, + "loss": 0.9413, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 0.16079047322273254, + "learning_rate": 0.0004909755570095319, + "loss": 0.8981, + "step": 200 + }, + { + "epoch": 0.1, + "eval_loss": 0.9723050594329834, + "eval_runtime": 322.7576, + "eval_samples_per_second": 3.399, + "eval_steps_per_second": 0.056, + "step": 200 + }, + { + "epoch": 0.1125, + "grad_norm": 0.14729101955890656, + "learning_rate": 0.0004881310026940389, + "loss": 0.6378, + "step": 225 + }, + { + "epoch": 0.125, + "grad_norm": 0.15030288696289062, + "learning_rate": 0.0004849080008040734, + "loss": 0.9271, + "step": 250 + }, + { + "epoch": 0.1375, + "grad_norm": 0.1613348424434662, + "learning_rate": 0.00048131167346667446, + "loss": 0.8457, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 0.15532569587230682, + "learning_rate": 0.00047734773611302284, + "loss": 0.603, + "step": 300 + }, + { + "epoch": 0.15, + "eval_loss": 1.006589651107788, + "eval_runtime": 323.2105, + "eval_samples_per_second": 3.394, + "eval_steps_per_second": 0.056, + "step": 300 + }, + { + "epoch": 0.1625, + "grad_norm": 0.16015686094760895, + "learning_rate": 0.0004730224883952422, + "loss": 0.9036, + "step": 325 + }, + { + "epoch": 0.175, + "grad_norm": 0.15767253935337067, + "learning_rate": 0.0004683428041747334, + "loss": 0.8283, + "step": 350 + }, + { + "epoch": 0.1875, + "grad_norm": 0.17757417261600494, + "learning_rate": 0.0004633161205979517, + "loss": 0.5945, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 0.17248600721359253, + "learning_rate": 0.0004579504262769877, + "loss": 0.8655, + "step": 400 + }, + { + "epoch": 0.2, + "eval_loss": 1.0158599615097046, + "eval_runtime": 323.8034, + "eval_samples_per_second": 3.388, + "eval_steps_per_second": 0.056, + "step": 400 + }, + { + "epoch": 0.2125, + "grad_norm": 0.17826460301876068, + "learning_rate": 0.0004522542485937369, + "loss": 0.8079, + "step": 425 + }, + { + "epoch": 0.225, + "grad_norm": 0.19307631254196167, + "learning_rate": 0.00044623664014783386, + "loss": 0.5737, + "step": 450 + }, + { + "epoch": 0.2375, + "grad_norm": 0.1877959966659546, + "learning_rate": 0.00043990716436988924, + "loss": 0.8605, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 0.15268854796886444, + "learning_rate": 0.0004332758803228925, + "loss": 0.7674, + "step": 500 + }, + { + "epoch": 0.25, + "eval_loss": 1.0431231260299683, + "eval_runtime": 323.1376, + "eval_samples_per_second": 3.395, + "eval_steps_per_second": 0.056, + "step": 500 + }, + { + "epoch": 0.2625, + "grad_norm": 0.16773808002471924, + "learning_rate": 0.00042635332671593575, + "loss": 0.5884, + "step": 525 + }, + { + "epoch": 0.275, + "grad_norm": 0.15766142308712006, + "learning_rate": 0.00041915050515566445, + "loss": 0.8178, + "step": 550 + }, + { + "epoch": 0.2875, + "grad_norm": 0.16790153086185455, + "learning_rate": 0.00041167886266207167, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 0.15149210393428802, + "learning_rate": 0.0004039502734764241, + "loss": 0.7334, + "step": 600 + }, + { + "epoch": 0.3, + "eval_loss": 1.0655592679977417, + "eval_runtime": 321.6139, + "eval_samples_per_second": 3.411, + "eval_steps_per_second": 0.056, + "step": 600 + }, + { + "epoch": 0.3125, + "grad_norm": 0.15414434671401978, + "learning_rate": 0.0003959770201902294, + "loss": 0.744, + "step": 625 + }, + { + "epoch": 0.325, + "grad_norm": 0.1529635190963745, + "learning_rate": 0.0003877717742252371, + "loss": 0.6345, + "step": 650 + }, + { + "epoch": 0.3375, + "grad_norm": 0.16185611486434937, + "learning_rate": 0.00037934757569549495, + "loss": 0.7354, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 0.16656433045864105, + "learning_rate": 0.00037071781268346345, + "loss": 0.7455, + "step": 700 + }, + { + "epoch": 0.35, + "eval_loss": 1.0835301876068115, + "eval_runtime": 322.4593, + "eval_samples_per_second": 3.402, + "eval_steps_per_second": 0.056, + "step": 700 + }, + { + "epoch": 0.3625, + "grad_norm": 0.15590643882751465, + "learning_rate": 0.00036189619996312495, + "loss": 0.5972, + "step": 725 + }, + { + "epoch": 0.375, + "grad_norm": 0.1837926208972931, + "learning_rate": 0.00035289675720390174, + "loss": 0.7592, + "step": 750 + }, + { + "epoch": 0.3875, + "grad_norm": 0.1620703488588333, + "learning_rate": 0.00034373378669002105, + "loss": 0.736, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 0.16613048315048218, + "learning_rate": 0.00033442185059073706, + "loss": 0.564, + "step": 800 + }, + { + "epoch": 0.4, + "eval_loss": 1.096523642539978, + "eval_runtime": 321.7553, + "eval_samples_per_second": 3.409, + "eval_steps_per_second": 0.056, + "step": 800 + }, + { + "epoch": 0.4125, + "grad_norm": 0.1587529182434082, + "learning_rate": 0.00032497574781753367, + "loss": 0.7598, + "step": 825 + }, + { + "epoch": 0.425, + "grad_norm": 0.16457463800907135, + "learning_rate": 0.000315410490505086, + "loss": 0.7292, + "step": 850 + }, + { + "epoch": 0.4375, + "grad_norm": 0.16429653763771057, + "learning_rate": 0.0003057412801533589, + "loss": 0.5329, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 0.16424661874771118, + "learning_rate": 0.0002959834834687587, + "loss": 0.7785, + "step": 900 + }, + { + "epoch": 0.45, + "eval_loss": 1.0958806276321411, + "eval_runtime": 322.3489, + "eval_samples_per_second": 3.403, + "eval_steps_per_second": 0.056, + "step": 900 + } + ], + "logging_steps": 25, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.07138389025751e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}