Training in progress, step 18600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1395,6 +1395,10 @@ You can finetune this model on your own dataset.
 | 0.3234 | 18300 | 0.31          |
 | 0.3243 | 18350 | 0.306         |
 | 0.3251 | 18400 | 0.3426        |
 </details>

 | 0.3234 | 18300 | 0.31          |
 | 0.3243 | 18350 | 0.306         |
 | 0.3251 | 18400 | 0.3426        |
+| 0.3260 | 18450 | 0.2807        |
+| 0.3269 | 18500 | 0.3856        |
+| 0.3278 | 18550 | 0.3575        |
+| 0.3287 | 18600 | 0.347         |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90f49bf378de383189a093b0a2bf919799fdcc55993cf62d4aee6a7a981b2f6f
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:c659f7470485136b016f37853007297ca9974233845b53c7106a151f1185c5ff
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06e4ba3df85c6e099d0bc980346fc8364af443b293c36414cd30caab13dab2b5
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:5134fd47d1ddef6b8fafbfb0e5b5ac6fce8d4885f9aa6d68d2550fe5fe73399a
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdb07bb76d154c255f7a6a65b33edbee34264ab63583d144665a60ece83d7919
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8876eb9659d218313ffcb704a4ab5032bff1b5e2e75c2dbc7a464331691d0ba1
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:356d49be14a74ca1fba66e6f1f5cf686b54ed1e4a1626358e76328c151c43051
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:346a0b991711eadeb589f86e15866b208715d0ef237bad5b888484a4e3892901
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15d8f9a4e8d57a5144e44194422a5d35ad70c28c3e656d9069d30d3e3c869476
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d4a93a47d6ad046cc4b29ddb3b4c48d6d603705a4f414700fa29b5fa5270c50
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3251400399356788,
   "eval_steps": 500,
-  "global_step": 18400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2584,6 +2584,34 @@
       "learning_rate": 3.749779112917477e-05,
       "loss": 0.3426,
       "step": 18400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3286741708045449,
   "eval_steps": 500,
+  "global_step": 18600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.749779112917477e-05,
       "loss": 0.3426,
       "step": 18400
+    },
+    {
+      "epoch": 0.32602357265289533,
+      "grad_norm": 3.006504535675049,
+      "learning_rate": 3.744870511083623e-05,
+      "loss": 0.2807,
+      "step": 18450
+    },
+    {
+      "epoch": 0.32690710537011186,
+      "grad_norm": 1.5666753053665161,
+      "learning_rate": 3.73996190924977e-05,
+      "loss": 0.3856,
+      "step": 18500
+    },
+    {
+      "epoch": 0.3277906380873284,
+      "grad_norm": 1.9692752361297607,
+      "learning_rate": 3.735053307415916e-05,
+      "loss": 0.3575,
+      "step": 18550
+    },
+    {
+      "epoch": 0.3286741708045449,
+      "grad_norm": 3.517622232437134,
+      "learning_rate": 3.730144705582062e-05,
+      "loss": 0.347,
+      "step": 18600
     }
   ],
   "logging_steps": 50,