Training in progress, epoch 1, checkpoint

Browse files

Files changed (13) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/global_step1200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step1200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step1200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step1200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
last-checkpoint/global_step1200/mp_rank_00_model_states.pt +3 -0
last-checkpoint/latest +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0762ec721b93d1a0e10ada578c7538ccb87f010928b297b4505a645b3aec697
 size 98088784

 version https://git-lfs.github.com/spec/v1
+oid sha256:01f2d5d0f7a2b7fede001e37991bd6985fc274f063f73ff62dc59d392b4e63a6
 size 98088784

last-checkpoint/global_step1200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fab8ffa94bdc27c1cc20ad5cc46550ded319ddd7deec8f5a4a8a5fe810936ac4
+size 73939813

last-checkpoint/global_step1200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0b05b9eadac0276f908244e2a084bbb3b6806cbe8f7998440b46725b88d99b2
+size 73939813

last-checkpoint/global_step1200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b84ae5befcf9368f35047b2404051cfe9aa23d0602a2b9b5e44ad1dc94e35bbf
+size 73939877

last-checkpoint/global_step1200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4d9cfbca81ff0d2f09b172c91877eee8aef7d78f584e23310e2f9d6aba5d9d0
+size 73939877

last-checkpoint/global_step1200/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab095ce3b82f509fc00d1719d519d97ecfb3a34cd304cff3cee56d691d7ae983
+size 564993061

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step1150~~


1	+ global_step1200

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32e2c783f044e208693875b6618820b4692ab8369227ed5fcfe75de8c98cb2f5
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb8226f3cefe922b522e2875b7ca4cafd422d0b379b34caed43be50f8a6af00c
 size 15429

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9a009ec584589b323bfde6fb332132397a948a68665dbf47ae6b13108a76ac8
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:27e5beba2802aecc2c31190f0e1445fda449914542cb3a995952912264b92bf2
 size 15429

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9eba47f1f3f2aaeb1ee30212c3d28966395e9b15ce04d718f220251a1b885544
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7ccf030e1a7531894174f97468eb482cc1210a67efd80cadbf1d6b45c1e05c6
 size 15429

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:969e35a2eee24aa5d0640e276157b14ed3586e426e68f6139c80b9bdb3012f62
 size 15429

 version https://git-lfs.github.com/spec/v1
+oid sha256:b14d6df95725c0e3824b9ffbf675c3cdedc21103310c246d38cae48315d53791
 size 15429

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f599b3f2fdaee9f298de483bc342667a86479cffdd08dfb05aebfb998561b471
 size 1401

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8dd9ccd3b73af1b44ab373f6253ca88811f20b0e9b7b73611705899de6d0dbb
 size 1401

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "best_global_step": null,
-  "best_metric": 0.6847204566001892,
   "best_model_checkpoint": null,
-  "epoch": 1.127372933251684,
   "eval_steps": 50,
-  "global_step": 1150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1802,6 +1802,84 @@
       "eval_samples_per_second": 125.864,
       "eval_steps_per_second": 15.741,
       "step": 1150
     }
   ],
   "logging_steps": 5,
@@ -1830,7 +1908,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.936913961881436e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_global_step": null,
+  "best_metric": 0.6825479865074158,
   "best_model_checkpoint": null,
+  "epoch": 1.1763625229638701,
   "eval_steps": 50,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 125.864,
       "eval_steps_per_second": 15.741,
       "step": 1150
+    },
+    {
+      "epoch": 1.1322718922229027,
+      "grad_norm": 0.22210359573364258,
+      "learning_rate": 9.862231492251444e-05,
+      "loss": 0.6897,
+      "step": 1155
+    },
+    {
+      "epoch": 1.1371708511941212,
+      "grad_norm": 0.1984894573688507,
+      "learning_rate": 9.851101062895398e-05,
+      "loss": 0.7213,
+      "step": 1160
+    },
+    {
+      "epoch": 1.14206981016534,
+      "grad_norm": 0.2018108069896698,
+      "learning_rate": 9.839926328906811e-05,
+      "loss": 0.6896,
+      "step": 1165
+    },
+    {
+      "epoch": 1.1469687691365584,
+      "grad_norm": 0.19112059473991394,
+      "learning_rate": 9.828707405123364e-05,
+      "loss": 0.7003,
+      "step": 1170
+    },
+    {
+      "epoch": 1.1518677281077772,
+      "grad_norm": 0.2068580538034439,
+      "learning_rate": 9.817444406836856e-05,
+      "loss": 0.716,
+      "step": 1175
+    },
+    {
+      "epoch": 1.1567666870789957,
+      "grad_norm": 0.2238154113292694,
+      "learning_rate": 9.80613744979202e-05,
+      "loss": 0.7058,
+      "step": 1180
+    },
+    {
+      "epoch": 1.1616656460502144,
+      "grad_norm": 0.19843433797359467,
+      "learning_rate": 9.794786650185339e-05,
+      "loss": 0.6938,
+      "step": 1185
+    },
+    {
+      "epoch": 1.1665646050214329,
+      "grad_norm": 0.23146703839302063,
+      "learning_rate": 9.783392124663834e-05,
+      "loss": 0.6892,
+      "step": 1190
+    },
+    {
+      "epoch": 1.1714635639926516,
+      "grad_norm": 0.22127410769462585,
+      "learning_rate": 9.77195399032389e-05,
+      "loss": 0.6976,
+      "step": 1195
+    },
+    {
+      "epoch": 1.1763625229638701,
+      "grad_norm": 0.20067089796066284,
+      "learning_rate": 9.760472364710031e-05,
+      "loss": 0.7033,
+      "step": 1200
+    },
+    {
+      "epoch": 1.1763625229638701,
+      "eval_loss": 0.6825479865074158,
+      "eval_runtime": 15.459,
+      "eval_samples_per_second": 126.722,
+      "eval_steps_per_second": 15.848,
+      "step": 1200
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 6.194548673033011e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null