Training in progress, step 22800, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1479,6 +1479,10 @@ You can finetune this model on your own dataset.
 | 0.3976 | 22500 | 0.3136        |
 | 0.3985 | 22550 | 0.297         |
 | 0.3994 | 22600 | 0.2933        |
 </details>

 | 0.3976 | 22500 | 0.3136        |
 | 0.3985 | 22550 | 0.297         |
 | 0.3994 | 22600 | 0.2933        |
+| 0.4002 | 22650 | 0.4026        |
+| 0.4011 | 22700 | 0.3567        |
+| 0.4020 | 22750 | 0.3711        |
+| 0.4029 | 22800 | 0.378         |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:497b7a6acdb3a5a1604b3f8ab3267058baa92f80bee7efa6e237ea075655a4d8
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0d300e6229f3196e296d216f86e3dd9be751afa58c003b8627129e48bb13ede
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e163c7782d125cbae55d87a883f0afe29090c268911a4edc6e746df10bc1f663
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:12655e53ef206d715a3442591f73e41f1140d40b95f274af76ba0a35f493edb8
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6edf8471c36d7596b23118388dc68d9d67f46cc4a6b1356e1a12264545ade3b8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d13ae5493c4db5b0f6cf3aad8b771f4e49fde2a92cb50952dbcb5b64ecf9ff46
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9f128257bebc4cb3f00ccba35e93cf76368736113fc2faf057244a587c9ee62
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1df82080c2918595026373e489d29f4cedf15be12e39f7a209676e04ceaf08e
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8b0e2d20704f293e6229a166caadb9f1aabe09be5b221f331974406b383c826
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e0f6905bb46dec1582dbcee904a68ad035cd8e9d18fe2b7fb6877bd69d63c7d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3993567881818664,
   "eval_steps": 500,
-  "global_step": 22600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3172,6 +3172,34 @@
       "learning_rate": 3.337652902947125e-05,
       "loss": 0.2933,
       "step": 22600
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.40289091905073243,
   "eval_steps": 500,
+  "global_step": 22800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.337652902947125e-05,
       "loss": 0.2933,
       "step": 22600
+    },
+    {
+      "epoch": 0.4002403208990829,
+      "grad_norm": 3.4516189098358154,
+      "learning_rate": 3.332744301113271e-05,
+      "loss": 0.4026,
+      "step": 22650
+    },
+    {
+      "epoch": 0.40112385361629943,
+      "grad_norm": 1.5759230852127075,
+      "learning_rate": 3.3278356992794174e-05,
+      "loss": 0.3567,
+      "step": 22700
+    },
+    {
+      "epoch": 0.4020073863335159,
+      "grad_norm": 1.9385254383087158,
+      "learning_rate": 3.322927097445564e-05,
+      "loss": 0.3711,
+      "step": 22750
+    },
+    {
+      "epoch": 0.40289091905073243,
+      "grad_norm": 1.6334116458892822,
+      "learning_rate": 3.31801849561171e-05,
+      "loss": 0.378,
+      "step": 22800
     }
   ],
   "logging_steps": 50,