Training in progress, step 20400, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1431,6 +1431,10 @@ You can finetune this model on your own dataset.
 | 0.3552 | 20100 | 0.3256        |
 | 0.3561 | 20150 | 0.3179        |
 | 0.3569 | 20200 | 0.4226        |
 </details>

 | 0.3552 | 20100 | 0.3256        |
 | 0.3561 | 20150 | 0.3179        |
 | 0.3569 | 20200 | 0.4226        |
+| 0.3578 | 20250 | 0.4196        |
+| 0.3587 | 20300 | 0.3618        |
+| 0.3596 | 20350 | 0.4093        |
+| 0.3605 | 20400 | 0.3051        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ef29b19104cdd2fee93b8217bc742ea19781fda755240150a391d0a52137790
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e24285b7fee77e14610cf519cac1441a781f70c15e6815c8a668c74a1781441
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b84ead54747767096d47dd620e6c1d4484392809c929ebd262e713e54fdf126a
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1038bc1f6238eff236fd9c4b9cc4d1b074566fa9976e684c92c8e7bc2cf6ef1
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67d7c2b4ab7e82df68b4a7c174bebe88de85db56b4421a6d44a9a14dfc374b05
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2529eb4186b974d86a3033c917ecbb272c298c66ef6254d4ce1dc8e6139bb050
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75cb2a5d4e61624cc82e548180286aa5f7b7073a2bd43f3a7494227c84590e41
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ade052dc4070f716b1bebceb9d5e60fb9f374ab26eb8ae1c89ae4e4e0acc23c
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a28e3c14c246edc1f658e8b3ed3e4804f86c97afddb1e9e8b6cfd065ec104723
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:884f056648270af6936f71ef50e4c4799a892dfb36fffd709576ecbe5d41efd5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3569472177554735,
   "eval_steps": 500,
-  "global_step": 20200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2836,6 +2836,34 @@
       "learning_rate": 3.5731676189354225e-05,
       "loss": 0.4226,
       "step": 20200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.36048134862433956,
   "eval_steps": 500,
+  "global_step": 20400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.5731676189354225e-05,
       "loss": 0.4226,
       "step": 20200
+    },
+    {
+      "epoch": 0.35783075047269003,
+      "grad_norm": 1.3192092180252075,
+      "learning_rate": 3.568259017101569e-05,
+      "loss": 0.4196,
+      "step": 20250
+    },
+    {
+      "epoch": 0.3587142831899065,
+      "grad_norm": 1.421736717224121,
+      "learning_rate": 3.5633504152677155e-05,
+      "loss": 0.3618,
+      "step": 20300
+    },
+    {
+      "epoch": 0.35959781590712303,
+      "grad_norm": 2.0631330013275146,
+      "learning_rate": 3.558441813433861e-05,
+      "loss": 0.4093,
+      "step": 20350
+    },
+    {
+      "epoch": 0.36048134862433956,
+      "grad_norm": 1.6250920295715332,
+      "learning_rate": 3.5535332116000084e-05,
+      "loss": 0.3051,
+      "step": 20400
     }
   ],
   "logging_steps": 50,