Training in progress, step 22400, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1471,6 +1471,10 @@ You can finetune this model on your own dataset.
 | 0.3905 | 22100 | 0.4232        |
 | 0.3914 | 22150 | 0.266         |
 | 0.3923 | 22200 | 0.3767        |
 </details>

 | 0.3905 | 22100 | 0.4232        |
 | 0.3914 | 22150 | 0.266         |
 | 0.3923 | 22200 | 0.3767        |
+| 0.3932 | 22250 | 0.366         |
+| 0.3941 | 22300 | 0.3211        |
+| 0.3949 | 22350 | 0.2505        |
+| 0.3958 | 22400 | 0.3824        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03c8968fca47e48633874837fddb47eb63f3f1f5e2be0fe1b553d5aa0a6701f6
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f1b271b327c67d5e5cbcfe8f4ced2720b67a2ad4b86d7e49064afada335f989
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f4f75680b753beac3dd7a2a07e701a0454ccd7703bd9a6386af2d91cda8e1a0
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:26d8d52c8f794240dfe200811cae13f3270be5d7cc20f4372d66fcdeb02f2c84
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de7c8f475ae53bd43a56e2cee3897dc4f63fdcbe385245382e65191e77948087
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d33d0eca02823fd38f9f6cbfa8199ab5481b75e21905025d31754984da79697e
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96bcea01a349704fb7ccf988b63e7772fe6dbee601549f7bef592655d9ef9498
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:9af5256c96c40ab41bd91dd8bfffa192e31a7c4ee253a2eb604282e3fbc22ea6
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecedb650d6c7c528ffb3f5b6952960cedfd2e3e0172bdfac9efd808168f5cb27
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1c0f9c08fc5a654c07e97009bca360a3a7487ba6dd7aed26e14bcde767a3c57
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3922885264441342,
   "eval_steps": 500,
-  "global_step": 22200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3116,6 +3116,34 @@
       "learning_rate": 3.376921717617954e-05,
       "loss": 0.3767,
       "step": 22200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3958226573130003,
   "eval_steps": 500,
+  "global_step": 22400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.376921717617954e-05,
       "loss": 0.3767,
       "step": 22200
+    },
+    {
+      "epoch": 0.39317205916135073,
+      "grad_norm": 1.7168455123901367,
+      "learning_rate": 3.3720131157841e-05,
+      "loss": 0.366,
+      "step": 22250
+    },
+    {
+      "epoch": 0.39405559187856726,
+      "grad_norm": 3.360104560852051,
+      "learning_rate": 3.3671045139502464e-05,
+      "loss": 0.3211,
+      "step": 22300
+    },
+    {
+      "epoch": 0.3949391245957838,
+      "grad_norm": 1.527031660079956,
+      "learning_rate": 3.3621959121163935e-05,
+      "loss": 0.2505,
+      "step": 22350
+    },
+    {
+      "epoch": 0.3958226573130003,
+      "grad_norm": 1.7586029767990112,
+      "learning_rate": 3.357287310282539e-05,
+      "loss": 0.3824,
+      "step": 22400
     }
   ],
   "logging_steps": 50,