Training in progress, step 22600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1475,6 +1475,10 @@ You can finetune this model on your own dataset.
 | 0.3941 | 22300 | 0.3211        |
 | 0.3949 | 22350 | 0.2505        |
 | 0.3958 | 22400 | 0.3824        |
 </details>

 | 0.3941 | 22300 | 0.3211        |
 | 0.3949 | 22350 | 0.2505        |
 | 0.3958 | 22400 | 0.3824        |
+| 0.3967 | 22450 | 0.331         |
+| 0.3976 | 22500 | 0.3136        |
+| 0.3985 | 22550 | 0.297         |
+| 0.3994 | 22600 | 0.2933        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f1b271b327c67d5e5cbcfe8f4ced2720b67a2ad4b86d7e49064afada335f989
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:497b7a6acdb3a5a1604b3f8ab3267058baa92f80bee7efa6e237ea075655a4d8
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26d8d52c8f794240dfe200811cae13f3270be5d7cc20f4372d66fcdeb02f2c84
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:e163c7782d125cbae55d87a883f0afe29090c268911a4edc6e746df10bc1f663
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d33d0eca02823fd38f9f6cbfa8199ab5481b75e21905025d31754984da79697e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6edf8471c36d7596b23118388dc68d9d67f46cc4a6b1356e1a12264545ade3b8
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9af5256c96c40ab41bd91dd8bfffa192e31a7c4ee253a2eb604282e3fbc22ea6
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9f128257bebc4cb3f00ccba35e93cf76368736113fc2faf057244a587c9ee62
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1c0f9c08fc5a654c07e97009bca360a3a7487ba6dd7aed26e14bcde767a3c57
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8b0e2d20704f293e6229a166caadb9f1aabe09be5b221f331974406b383c826
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3958226573130003,
   "eval_steps": 500,
-  "global_step": 22400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3144,6 +3144,34 @@
       "learning_rate": 3.357287310282539e-05,
       "loss": 0.3824,
       "step": 22400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3993567881818664,
   "eval_steps": 500,
+  "global_step": 22600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.357287310282539e-05,
       "loss": 0.3824,
       "step": 22400
+    },
+    {
+      "epoch": 0.39670619003021684,
+      "grad_norm": 2.3490004539489746,
+      "learning_rate": 3.352378708448686e-05,
+      "loss": 0.331,
+      "step": 22450
+    },
+    {
+      "epoch": 0.3975897227474333,
+      "grad_norm": 1.5686146020889282,
+      "learning_rate": 3.3474701066148316e-05,
+      "loss": 0.3136,
+      "step": 22500
+    },
+    {
+      "epoch": 0.39847325546464984,
+      "grad_norm": 1.5068285465240479,
+      "learning_rate": 3.342561504780978e-05,
+      "loss": 0.297,
+      "step": 22550
+    },
+    {
+      "epoch": 0.3993567881818664,
+      "grad_norm": 1.81602942943573,
+      "learning_rate": 3.337652902947125e-05,
+      "loss": 0.2933,
+      "step": 22600
     }
   ],
   "logging_steps": 50,