Training in progress, step 23000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1483,6 +1483,10 @@ You can finetune this model on your own dataset.
 | 0.4011 | 22700 | 0.3567        |
 | 0.4020 | 22750 | 0.3711        |
 | 0.4029 | 22800 | 0.378         |
 </details>

 | 0.4011 | 22700 | 0.3567        |
 | 0.4020 | 22750 | 0.3711        |
 | 0.4029 | 22800 | 0.378         |
+| 0.4038 | 22850 | 0.355         |
+| 0.4047 | 22900 | 0.3044        |
+| 0.4055 | 22950 | 0.357         |
+| 0.4064 | 23000 | 0.3616        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0d300e6229f3196e296d216f86e3dd9be751afa58c003b8627129e48bb13ede
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:98ed5a687dc4715c9a1f73849a4f0d4d7c62e130aa33342cba96e08102f6c698
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12655e53ef206d715a3442591f73e41f1140d40b95f274af76ba0a35f493edb8
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5b742ce9948974967852b654f7a2cac82bec18f14050c7f0ca588c15abe7d89
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d13ae5493c4db5b0f6cf3aad8b771f4e49fde2a92cb50952dbcb5b64ecf9ff46
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:064c7e0e17301c143c7c07f874d0616a425160e061b611df1d69f8d935c1df1b
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1df82080c2918595026373e489d29f4cedf15be12e39f7a209676e04ceaf08e
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:7bbcf9b03f6f3eaf6d9b52b58335c3ada2b9905606e34981c29f16b7f619afd0
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e0f6905bb46dec1582dbcee904a68ad035cd8e9d18fe2b7fb6877bd69d63c7d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc8e908f008b7878a7add6eeb64232fb20b49cfb0dadbfe98582c9d55cae1621
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.40289091905073243,
   "eval_steps": 500,
-  "global_step": 22800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3200,6 +3200,34 @@
       "learning_rate": 3.31801849561171e-05,
       "loss": 0.378,
       "step": 22800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.40642504991959855,
   "eval_steps": 500,
+  "global_step": 23000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.31801849561171e-05,
       "loss": 0.378,
       "step": 22800
+    },
+    {
+      "epoch": 0.40377445176794896,
+      "grad_norm": 2.0981173515319824,
+      "learning_rate": 3.313109893777857e-05,
+      "loss": 0.355,
+      "step": 22850
+    },
+    {
+      "epoch": 0.4046579844851655,
+      "grad_norm": 1.6996448040008545,
+      "learning_rate": 3.308201291944003e-05,
+      "loss": 0.3044,
+      "step": 22900
+    },
+    {
+      "epoch": 0.405541517202382,
+      "grad_norm": 1.3511463403701782,
+      "learning_rate": 3.303292690110149e-05,
+      "loss": 0.357,
+      "step": 22950
+    },
+    {
+      "epoch": 0.40642504991959855,
+      "grad_norm": 1.7596737146377563,
+      "learning_rate": 3.2983840882762956e-05,
+      "loss": 0.3616,
+      "step": 23000
     }
   ],
   "logging_steps": 50,