Training in progress, step 15800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1339,6 +1339,10 @@ You can finetune this model on your own dataset.
 | 0.2739 | 15500 | 0.3889        |
 | 0.2748 | 15550 | 0.3618        |
 | 0.2757 | 15600 | 0.4126        |
 </details>

 | 0.2739 | 15500 | 0.3889        |
 | 0.2748 | 15550 | 0.3618        |
 | 0.2757 | 15600 | 0.4126        |
+| 0.2765 | 15650 | 0.3771        |
+| 0.2774 | 15700 | 0.4377        |
+| 0.2783 | 15750 | 0.4041        |
+| 0.2792 | 15800 | 0.375         |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36f741855bfd9ab9855541a5740f7ebf89eb5ff81a18f782d3409b6a6441f247
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3d55e45825a3c654f0b7eaf0ef140162fe612ee219ff9374e5295592d1444b9
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f354c3d85cd72229183ba1225666edf02dbe0f02e60066712ba24c7167587d87
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3a74062f2a6f56b8b0e799cc086809df8bd6dad796e1d4cdc42ce905c2701af
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ece4d2ab35a1b635eb36bee8c77e304be4a94349bd773a556811f31851337605
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d06578a61ae44cb9a0de8333b92d3619c8147705d79a090a8054c07810546369
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab0c87524374314917bf4b3aa26c95868e23e32a6021e3f7a331e9d46fadb1d1
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e95ff6f2b0a235804a0c3d7872a59d6fa2f6701ff13d24c3cdd11982303ea58
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7630ebf9456fffd04ad85b9111cf3e42b6fca916321363c383a4caf1724f287
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:adfca4433908d3fab5c9a1fa33bf6028dd834c726d10f65fd18b1dbca2367df8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.27566220777155376,
   "eval_steps": 500,
-  "global_step": 15600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2192,6 +2192,34 @@
       "learning_rate": 4.024562643576604e-05,
       "loss": 0.4126,
       "step": 15600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.27919633864041987,
   "eval_steps": 500,
+  "global_step": 15800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.024562643576604e-05,
       "loss": 0.4126,
       "step": 15600
+    },
+    {
+      "epoch": 0.2765457404887703,
+      "grad_norm": 1.5262032747268677,
+      "learning_rate": 4.01965404174275e-05,
+      "loss": 0.3771,
+      "step": 15650
+    },
+    {
+      "epoch": 0.2774292732059868,
+      "grad_norm": 1.8245854377746582,
+      "learning_rate": 4.014745439908896e-05,
+      "loss": 0.4377,
+      "step": 15700
+    },
+    {
+      "epoch": 0.27831280592320334,
+      "grad_norm": 2.8566267490386963,
+      "learning_rate": 4.0098368380750425e-05,
+      "loss": 0.4041,
+      "step": 15750
+    },
+    {
+      "epoch": 0.27919633864041987,
+      "grad_norm": 2.0167641639709473,
+      "learning_rate": 4.00492823624119e-05,
+      "loss": 0.375,
+      "step": 15800
     }
   ],
   "logging_steps": 50,