Training in progress, step 18400, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1391,6 +1391,10 @@ You can finetune this model on your own dataset.
 | 0.3198 | 18100 | 0.3279        |
 | 0.3207 | 18150 | 0.3062        |
 | 0.3216 | 18200 | 0.2973        |
 </details>

 | 0.3198 | 18100 | 0.3279        |
 | 0.3207 | 18150 | 0.3062        |
 | 0.3216 | 18200 | 0.2973        |
+| 0.3225 | 18250 | 0.4078        |
+| 0.3234 | 18300 | 0.31          |
+| 0.3243 | 18350 | 0.306         |
+| 0.3251 | 18400 | 0.3426        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c561d22f3f5062bc767250ffdb3fa4a0f7bd3dbdb65e4c11cfceaa01995c64c
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:90f49bf378de383189a093b0a2bf919799fdcc55993cf62d4aee6a7a981b2f6f
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:979b402a40bd2a435e70fee699ca07b55766750d192cbf2268122098c8ea3e92
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:06e4ba3df85c6e099d0bc980346fc8364af443b293c36414cd30caab13dab2b5
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28d8ccebbb7f7b52e625ef2554e1cd3690dd81aea7ece9b35eeb250cf32f7566
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdb07bb76d154c255f7a6a65b33edbee34264ab63583d144665a60ece83d7919
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1747e186a8b719e713dac067421a4083615d1c151147e0b1a41977c8731e3e98
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:356d49be14a74ca1fba66e6f1f5cf686b54ed1e4a1626358e76328c151c43051
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:896e9fd82393923d4baed79b5528ea09727379e27a1318af2376e0aaf0f43d15
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:15d8f9a4e8d57a5144e44194422a5d35ad70c28c3e656d9069d30d3e3c869476
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.32160590906681275,
   "eval_steps": 500,
-  "global_step": 18200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2556,6 +2556,34 @@
       "learning_rate": 3.7694135202528916e-05,
       "loss": 0.2973,
       "step": 18200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3251400399356788,
   "eval_steps": 500,
+  "global_step": 18400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.7694135202528916e-05,
       "loss": 0.2973,
       "step": 18200
+    },
+    {
+      "epoch": 0.3224894417840293,
+      "grad_norm": 2.819748640060425,
+      "learning_rate": 3.764504918419038e-05,
+      "loss": 0.4078,
+      "step": 18250
+    },
+    {
+      "epoch": 0.3233729745012458,
+      "grad_norm": 1.5743447542190552,
+      "learning_rate": 3.759596316585184e-05,
+      "loss": 0.31,
+      "step": 18300
+    },
+    {
+      "epoch": 0.3242565072184623,
+      "grad_norm": 1.8966853618621826,
+      "learning_rate": 3.7546877147513303e-05,
+      "loss": 0.306,
+      "step": 18350
+    },
+    {
+      "epoch": 0.3251400399356788,
+      "grad_norm": 2.7652056217193604,
+      "learning_rate": 3.749779112917477e-05,
+      "loss": 0.3426,
+      "step": 18400
     }
   ],
   "logging_steps": 50,