Training in progress, step 26000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1543,6 +1543,10 @@ You can finetune this model on your own dataset.
 | 0.4541 | 25700 | 0.2669        |
 | 0.4550 | 25750 | 0.3316        |
 | 0.4559 | 25800 | 0.3395        |
 </details>

 | 0.4541 | 25700 | 0.2669        |
 | 0.4550 | 25750 | 0.3316        |
 | 0.4559 | 25800 | 0.3395        |
+| 0.4568 | 25850 | 0.3631        |
+| 0.4577 | 25900 | 0.3418        |
+| 0.4586 | 25950 | 0.3351        |
+| 0.4594 | 26000 | 0.3117        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd62d182588cf8dcdefa578689850921060508dd744c2b3d5b8b216b2f913c93
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d9b275662690960867e4c5004de25c023e37f1f7c4ebe3e55950090be859db5
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c66245bb4fd1461fe3283febbf948bf75039365f0ad45b5077d2a510d710e8d9
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccf58858fe88830e35307ea10b8a5b6180e3bcc733d316d0262f05ad1036e132
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a2d25e27ed0772b40c719933dfce361fb32d9594edb2389a78ead3d7c0f02d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa2ed85cbcfc26a558906734eea48312aeda57b3baa4220e882e04b5dde95173
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ed2d3227321b546366709c7d41896c93e3aed6b33762555946b5ad3ddd21662
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5a1824f2390986b90c617282d9a88df5bdc60e3f12c5d84551697fb36352fa3
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2352e4a13b9010953b7c8382f227a4c5da82a9b4a73a8c91fe1b7aa1258dd370
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:67c6214bd46d06bb31b87e2550447bf08cbff7452cc6c1e25562bc8a18251786
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.45590288208372354,
   "eval_steps": 500,
-  "global_step": 25800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3620,6 +3620,34 @@
       "learning_rate": 3.0236987296538455e-05,
       "loss": 0.3395,
       "step": 25800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.45943701295258965,
   "eval_steps": 500,
+  "global_step": 26000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.0236987296538455e-05,
       "loss": 0.3395,
       "step": 25800
+    },
+    {
+      "epoch": 0.45678641480094007,
+      "grad_norm": 2.8250796794891357,
+      "learning_rate": 3.018790127819992e-05,
+      "loss": 0.3631,
+      "step": 25850
+    },
+    {
+      "epoch": 0.4576699475181566,
+      "grad_norm": 1.1532173156738281,
+      "learning_rate": 3.013881525986138e-05,
+      "loss": 0.3418,
+      "step": 25900
+    },
+    {
+      "epoch": 0.4585534802353731,
+      "grad_norm": 1.687465786933899,
+      "learning_rate": 3.0089729241522846e-05,
+      "loss": 0.3351,
+      "step": 25950
+    },
+    {
+      "epoch": 0.45943701295258965,
+      "grad_norm": 4.05789852142334,
+      "learning_rate": 3.004064322318431e-05,
+      "loss": 0.3117,
+      "step": 26000
     }
   ],
   "logging_steps": 50,