Training in progress, step 14600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1315,6 +1315,10 @@ You can finetune this model on your own dataset.
 | 0.2527 | 14300 | 0.4161        |
 | 0.2536 | 14350 | 0.3431        |
 | 0.2545 | 14400 | 0.3576        |
 </details>

 | 0.2527 | 14300 | 0.4161        |
 | 0.2536 | 14350 | 0.3431        |
 | 0.2545 | 14400 | 0.3576        |
+| 0.2553 | 14450 | 0.3563        |
+| 0.2562 | 14500 | 0.3406        |
+| 0.2571 | 14550 | 0.4397        |
+| 0.2580 | 14600 | 0.411         |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c70d1baeacb306a6e3e4e7da1daf6668ef9a846b36b36844f97182da49b7189d
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:861228ee1e7303d55d206ed18a0e9a9f72cd4fceb74ae8597ddd8227ba4baaa6
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c7cc231dc6a04e0cb1fde6371ac1117217c084c2ff5861f99992f6271b0b2ae
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:01e800f5f76e35b4a2c15cb36bcf0bae4a36cc21e80874b3eade1a26b448ee14
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1575cd53f4b8b110d92e8c23aa79a7bcf9dc9c336882843030d5971034763624
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ffe6d2e694b6c2093052ae77ba6c6348d6c889151265b032757dc965de9eabd
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97511a6060aacb39a58a4067ef60a5d731c4117afaf40a314802e695ac048d50
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d1981a845c6e1942265cf518562623d63fa9e701ce29d4ce24d1e25e2691cb
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:696c7475f37eb1308f8246ec63f84866eb3d506203673b38e04a231cb4eacd6d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:71ef9de9a97c35ae1f236f3cf953b678aeb5931825d667a15b405ae0448fa416
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.25445742255835735,
   "eval_steps": 500,
-  "global_step": 14400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2024,6 +2024,34 @@
       "learning_rate": 4.142270915552414e-05,
       "loss": 0.3576,
       "step": 14400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2579915534272234,
   "eval_steps": 500,
+  "global_step": 14600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.142270915552414e-05,
       "loss": 0.3576,
       "step": 14400
+    },
+    {
+      "epoch": 0.2553409552755739,
+      "grad_norm": 1.3540493249893188,
+      "learning_rate": 4.137362313718561e-05,
+      "loss": 0.3563,
+      "step": 14450
+    },
+    {
+      "epoch": 0.25622448799279035,
+      "grad_norm": 1.7373064756393433,
+      "learning_rate": 4.1324537118847066e-05,
+      "loss": 0.3406,
+      "step": 14500
+    },
+    {
+      "epoch": 0.2571080207100069,
+      "grad_norm": 2.6311392784118652,
+      "learning_rate": 4.127545110050853e-05,
+      "loss": 0.4397,
+      "step": 14550
+    },
+    {
+      "epoch": 0.2579915534272234,
+      "grad_norm": 1.845186471939087,
+      "learning_rate": 4.122636508217e-05,
+      "loss": 0.411,
+      "step": 14600
     }
   ],
   "logging_steps": 50,