Training in progress, step 19000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1403,6 +1403,10 @@ You can finetune this model on your own dataset.
 | 0.3304 | 18700 | 0.3543        |
 | 0.3313 | 18750 | 0.3642        |
 | 0.3322 | 18800 | 0.3415        |
 </details>

 | 0.3304 | 18700 | 0.3543        |
 | 0.3313 | 18750 | 0.3642        |
 | 0.3322 | 18800 | 0.3415        |
+| 0.3331 | 18850 | 0.3069        |
+| 0.3340 | 18900 | 0.2962        |
+| 0.3349 | 18950 | 0.3156        |
+| 0.3357 | 19000 | 0.3791        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6283a957a66c184eb3662df7aa262cd154fb1dcee802f75857776d94240bc13
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:123a3c7e068975f2d73a559688da89498d96c9f5f3906b2f60a8f55923c134f5
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78292edbaee380ac230c50b2f8b68c5dd37e7df2ddfa15b2c43a017c23edfbeb
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e186b88037a18eebc7b613039df85cfc79c02be57538ce16444dea0485fdf79
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83eb2f002d2d9da0dc4755fcda636e68d5b4a41e8c2e5a2b8c82d014a65d58ff
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:03af62e0682690ebbb7904f13bcb54bda8192bd3193a8746111a4da158c48b8c
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c934cf056ab14aad09651705c56e19636a65538c7a4a6a0401dc0dfa92a2b37
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:10840146a6e19998f1fdbf028a41f37469dd8a3b8988531196e63b1251245096
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0b05eb17181bcecd1e03b83742ebacffcd8b0f6bb6d68e2982a23c3f445018e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:23958946bfe31c250bee11672fccb9ed52902b5ac707ac8090942157c16267c1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.332208301673411,
   "eval_steps": 500,
-  "global_step": 18800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2640,6 +2640,34 @@
       "learning_rate": 3.710510298246648e-05,
       "loss": 0.3415,
       "step": 18800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.33574243254227704,
   "eval_steps": 500,
+  "global_step": 19000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.710510298246648e-05,
       "loss": 0.3415,
       "step": 18800
+    },
+    {
+      "epoch": 0.3330918343906275,
+      "grad_norm": 1.4388914108276367,
+      "learning_rate": 3.7056016964127937e-05,
+      "loss": 0.3069,
+      "step": 18850
+    },
+    {
+      "epoch": 0.333975367107844,
+      "grad_norm": 1.5527664422988892,
+      "learning_rate": 3.70069309457894e-05,
+      "loss": 0.2962,
+      "step": 18900
+    },
+    {
+      "epoch": 0.3348588998250605,
+      "grad_norm": 1.6680736541748047,
+      "learning_rate": 3.6957844927450866e-05,
+      "loss": 0.3156,
+      "step": 18950
+    },
+    {
+      "epoch": 0.33574243254227704,
+      "grad_norm": 2.266108274459839,
+      "learning_rate": 3.69097406294791e-05,
+      "loss": 0.3791,
+      "step": 19000
     }
   ],
   "logging_steps": 50,