Training in progress, step 25600, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/README.md +8 -0
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +58 -2

last-checkpoint/README.md CHANGED Viewed

@@ -1531,6 +1531,14 @@ You can finetune this model on your own dataset.
 | 0.4435 | 25100 | 0.3773        |
 | 0.4444 | 25150 | 0.3372        |
 | 0.4453 | 25200 | 0.3178        |
 </details>

 | 0.4435 | 25100 | 0.3773        |
 | 0.4444 | 25150 | 0.3372        |
 | 0.4453 | 25200 | 0.3178        |
+| 0.4462 | 25250 | 0.2745        |
+| 0.4471 | 25300 | 0.2773        |
+| 0.4480 | 25350 | 0.3822        |
+| 0.4488 | 25400 | 0.3851        |
+| 0.4497 | 25450 | 0.3805        |
+| 0.4506 | 25500 | 0.3245        |
+| 0.4515 | 25550 | 0.2978        |
+| 0.4524 | 25600 | 0.3397        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:841fdf3c58c8f29d1dbc4b6aa518d6d4f0e4c702d0d79eae99a7a6f05440afb8
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b4c41f05edc2d7e0e5b6aa96c7280a269723970b3d9f5db97a908d698c18e46
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d042b648040ac616e95ada7c18231cfcd360f4f2b7dea533dbe8440f0bfa84b
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ad0f634d22e8d985415b64b93836e5380971e245fef96c6b35c06a87043b680
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3ad48efbb9bf93a84390a5aea5643acbd41bf262a8aa17ab3278f0f314a581a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3d11ea4dc06732960ed6abb1d44f0f58d98e38680260f859b6e949eb007dff1
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8f8fa593fe1292958817d4226b917c242a5dd7ed49104de560771ecc5cb6968
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:42f87f293944d5e456973842209f35aaec89127a12621bc86fb62b420000afe5
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8139c38e3bba457a10055977964d7aab9920cd7bb0ccf9d0c0ca174f5b19226
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0157ec5a2cd87b323072c019eeb626da0770a47c34b4436db6020558b163b6ea
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.44530048947712536,
   "eval_steps": 500,
-  "global_step": 25200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3536,6 +3536,62 @@
       "learning_rate": 3.082601951660089e-05,
       "loss": 0.3178,
       "step": 25200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4523687512148575,
   "eval_steps": 500,
+  "global_step": 25600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.082601951660089e-05,
       "loss": 0.3178,
       "step": 25200
+    },
+    {
+      "epoch": 0.44618402219434183,
+      "grad_norm": 1.6131466627120972,
+      "learning_rate": 3.077693349826236e-05,
+      "loss": 0.2745,
+      "step": 25250
+    },
+    {
+      "epoch": 0.44706755491155836,
+      "grad_norm": 1.5419201850891113,
+      "learning_rate": 3.072784747992382e-05,
+      "loss": 0.2773,
+      "step": 25300
+    },
+    {
+      "epoch": 0.4479510876287749,
+      "grad_norm": 1.6418931484222412,
+      "learning_rate": 3.067876146158528e-05,
+      "loss": 0.3822,
+      "step": 25350
+    },
+    {
+      "epoch": 0.4488346203459914,
+      "grad_norm": 1.288121223449707,
+      "learning_rate": 3.0629675443246745e-05,
+      "loss": 0.3851,
+      "step": 25400
+    },
+    {
+      "epoch": 0.44971815306320795,
+      "grad_norm": 1.9523035287857056,
+      "learning_rate": 3.058058942490821e-05,
+      "loss": 0.3805,
+      "step": 25450
+    },
+    {
+      "epoch": 0.4506016857804245,
+      "grad_norm": 3.3735404014587402,
+      "learning_rate": 3.0531503406569674e-05,
+      "loss": 0.3245,
+      "step": 25500
+    },
+    {
+      "epoch": 0.45148521849764095,
+      "grad_norm": 1.4013001918792725,
+      "learning_rate": 3.048241738823114e-05,
+      "loss": 0.2978,
+      "step": 25550
+    },
+    {
+      "epoch": 0.4523687512148575,
+      "grad_norm": 1.9055225849151611,
+      "learning_rate": 3.0433331369892604e-05,
+      "loss": 0.3397,
+      "step": 25600
     }
   ],
   "logging_steps": 50,