Training in progress, step 15000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1323,6 +1323,10 @@ You can finetune this model on your own dataset.
 | 0.2598 | 14700 | 0.3622        |
 | 0.2606 | 14750 | 0.2782        |
 | 0.2615 | 14800 | 0.36          |
 </details>

 | 0.2598 | 14700 | 0.3622        |
 | 0.2606 | 14750 | 0.2782        |
 | 0.2615 | 14800 | 0.36          |
+| 0.2624 | 14850 | 0.486         |
+| 0.2633 | 14900 | 0.406         |
+| 0.2642 | 14950 | 0.357         |
+| 0.2651 | 15000 | 0.2855        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8e3fa7de06aa25ef9e92bb424c00fdc808b42dd23a9e9bb43d0a3c2c3136371
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:62a5255fda70735f530277b068143dfdac403e8121852574084f116cd8a54ad2
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:092bf8b9d76f58d80effc72b6148feaeea3102b809911377eb0d572cb5bf7575
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:6061f93c451e2da3513d0e7ebb36b42a40ee13e63c2294272c5bca7237bb74f6
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97edb58b8967751d3dda12e8eb802aa7613cd23fb975e45f9749685794078420
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:58d6a61e95480d63adba518dcc060210f3fb21899a87b9a86415b16500e0e3bf
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f13f9cc3a0ef2dfdf6369b4685eb176c05f57535fd4a80cceb72b81393a5eb1b
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:b117d26681ce36f6295204f3c0c1542a5dbd8eba633804a8fd8e040b3e8c6050
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29d3d14fe806d58e2a7b675de49d429f0f4e1b8b1cfd250c3c74b902fbff4430
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4647cbe535a4ae95aadfbebe85aa748dcd49df21d2f520f23e9e1a18d456097d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.26152568429608947,
   "eval_steps": 500,
-  "global_step": 14800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2080,6 +2080,34 @@
       "learning_rate": 4.103002100881585e-05,
       "loss": 0.36,
       "step": 14800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2650598151649556,
   "eval_steps": 500,
+  "global_step": 15000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.103002100881585e-05,
       "loss": 0.36,
       "step": 14800
+    },
+    {
+      "epoch": 0.262409217013306,
+      "grad_norm": 1.5133942365646362,
+      "learning_rate": 4.098093499047732e-05,
+      "loss": 0.486,
+      "step": 14850
+    },
+    {
+      "epoch": 0.2632927497305225,
+      "grad_norm": 1.848177194595337,
+      "learning_rate": 4.0932830692505546e-05,
+      "loss": 0.406,
+      "step": 14900
+    },
+    {
+      "epoch": 0.26417628244773905,
+      "grad_norm": 3.320469379425049,
+      "learning_rate": 4.088374467416701e-05,
+      "loss": 0.357,
+      "step": 14950
+    },
+    {
+      "epoch": 0.2650598151649556,
+      "grad_norm": 1.417015790939331,
+      "learning_rate": 4.0834658655828475e-05,
+      "loss": 0.2855,
+      "step": 15000
     }
   ],
   "logging_steps": 50,