Training in progress, step 23200, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1487,6 +1487,10 @@ You can finetune this model on your own dataset.
 | 0.4047 | 22900 | 0.3044        |
 | 0.4055 | 22950 | 0.357         |
 | 0.4064 | 23000 | 0.3616        |
 </details>

 | 0.4047 | 22900 | 0.3044        |
 | 0.4055 | 22950 | 0.357         |
 | 0.4064 | 23000 | 0.3616        |
+| 0.4073 | 23050 | 0.3139        |
+| 0.4082 | 23100 | 0.3474        |
+| 0.4091 | 23150 | 0.3208        |
+| 0.4100 | 23200 | 0.3798        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98ed5a687dc4715c9a1f73849a4f0d4d7c62e130aa33342cba96e08102f6c698
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:7cfe587849d1306f54e0ca75ee4b8dc42ffa4c0050923c00408ab072955907d3
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5b742ce9948974967852b654f7a2cac82bec18f14050c7f0ca588c15abe7d89
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c016c7f7476d35ba0914e4807cd567e2323f8abd2649d40533bb1edf8afea2d
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:064c7e0e17301c143c7c07f874d0616a425160e061b611df1d69f8d935c1df1b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7301644e101f87025474e0abd0c4e21251cc4c43a5173ce57ba0318fade3400
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bbcf9b03f6f3eaf6d9b52b58335c3ada2b9905606e34981c29f16b7f619afd0
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8bb4812f9196d1a220df2036c293c5cb5d81dc224d96c15decb25fee077dd8a
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc8e908f008b7878a7add6eeb64232fb20b49cfb0dadbfe98582c9d55cae1621
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:55ce77f59b929ccf856f258ba2d8bdee259c33a00d44ab9b7d2ff7d9ff4f481c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.40642504991959855,
   "eval_steps": 500,
-  "global_step": 23000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3228,6 +3228,34 @@
       "learning_rate": 3.2983840882762956e-05,
       "loss": 0.3616,
       "step": 23000
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4099591807884646,
   "eval_steps": 500,
+  "global_step": 23200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.2983840882762956e-05,
       "loss": 0.3616,
       "step": 23000
+    },
+    {
+      "epoch": 0.407308582636815,
+      "grad_norm": 2.8382747173309326,
+      "learning_rate": 3.2934754864424413e-05,
+      "loss": 0.3139,
+      "step": 23050
+    },
+    {
+      "epoch": 0.40819211535403155,
+      "grad_norm": 3.052281618118286,
+      "learning_rate": 3.2885668846085885e-05,
+      "loss": 0.3474,
+      "step": 23100
+    },
+    {
+      "epoch": 0.4090756480712481,
+      "grad_norm": 1.373552680015564,
+      "learning_rate": 3.283756454811412e-05,
+      "loss": 0.3208,
+      "step": 23150
+    },
+    {
+      "epoch": 0.4099591807884646,
+      "grad_norm": 1.6797386407852173,
+      "learning_rate": 3.278847852977558e-05,
+      "loss": 0.3798,
+      "step": 23200
     }
   ],
   "logging_steps": 50,