Training in progress, step 21800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1459,6 +1459,10 @@ You can finetune this model on your own dataset.
 | 0.3799 | 21500 | 0.2605        |
 | 0.3808 | 21550 | 0.3658        |
 | 0.3817 | 21600 | 0.3274        |
 </details>

 | 0.3799 | 21500 | 0.2605        |
 | 0.3808 | 21550 | 0.3658        |
 | 0.3817 | 21600 | 0.3274        |
+| 0.3826 | 21650 | 0.3094        |
+| 0.3835 | 21700 | 0.3556        |
+| 0.3843 | 21750 | 0.2877        |
+| 0.3852 | 21800 | 0.3203        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c445c6520c6e5f38ea82be358a6aa5ed9b820190229b0b058633fcc33d08a87d
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:71536271e303b359bc8885941a3b51c93842494cf3d4dd02a6e2f6ca81dff99c
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b19db173fc766529b02b99cfb2249ee2f6c33d285fe8b221a72a08efcd3e58d8
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7f26a81bbdd86a74215227f787a393193d8868c1c4b4f7123d8f3e9f866ce49
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b87c442444260c01d6f6ddc3b986a278c1c2c567a0e9dfe7a512d3828e95a3e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c3344300d9b776b16db056048b915b6ad2ce1e2919d2dcc21729cfbaff84521
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c7f4b6f777ecf36ed08c5fe9d684383c44178ad62db77048c5678684b40b9a1
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:512e3b81dd92786746b027cf126ada5052ba205d7c4f2693485fe5d762f3208f
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00d6dd904f2193bc75011c53daf8f9374a62437ce21a5da2951c5f768935bcd3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:50c93e9773694165737861e364ae6ffe7000e5f2e5bc790ae63fee35e5d02bb7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.381686133837536,
   "eval_steps": 500,
-  "global_step": 21600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3032,6 +3032,34 @@
       "learning_rate": 3.435824939624198e-05,
       "loss": 0.3274,
       "step": 21600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3852202647064021,
   "eval_steps": 500,
+  "global_step": 21800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.435824939624198e-05,
       "loss": 0.3274,
       "step": 21600
+    },
+    {
+      "epoch": 0.3825696665547525,
+      "grad_norm": 1.5224860906600952,
+      "learning_rate": 3.430916337790344e-05,
+      "loss": 0.3094,
+      "step": 21650
+    },
+    {
+      "epoch": 0.383453199271969,
+      "grad_norm": 3.36433482170105,
+      "learning_rate": 3.42600773595649e-05,
+      "loss": 0.3556,
+      "step": 21700
+    },
+    {
+      "epoch": 0.38433673198918555,
+      "grad_norm": 1.9824773073196411,
+      "learning_rate": 3.4210991341226366e-05,
+      "loss": 0.2877,
+      "step": 21750
+    },
+    {
+      "epoch": 0.3852202647064021,
+      "grad_norm": 1.5103614330291748,
+      "learning_rate": 3.416190532288783e-05,
+      "loss": 0.3203,
+      "step": 21800
     }
   ],
   "logging_steps": 50,