Training in progress, step 41600, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1347,6 +1347,10 @@ You can finetune this model on your own dataset.
 | 0.7298 | 41300 | 0.3289        |
 | 0.7307 | 41350 | 0.3119        |
 | 0.7316 | 41400 | 0.3007        |
 </details>

 | 0.7298 | 41300 | 0.3289        |
 | 0.7307 | 41350 | 0.3119        |
 | 0.7316 | 41400 | 0.3007        |
+| 0.7324 | 41450 | 0.3499        |
+| 0.7333 | 41500 | 0.3016        |
+| 0.7342 | 41550 | 0.3232        |
+| 0.7351 | 41600 | 0.2577        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f43c6066a3ed8a797462ed1304e6dd3a538745bae56d08936296e98af5636e68
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:5968fa9233e27ceab0b50d025526b14878df9c3a5cc0dd8ab8e3bfb4e8731159
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1672d02aa4d93aaa27f89d6d65e2a20859a6cb3dd9c4432eeb82f6752f1dfc2
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f9b287a89c14a672aa169e1144107274fac0f234f450dc15728e411970cfec4
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7960809e3d7046461ba4b9405b6f71ec0619b99775d0184c422fcc4823ed73f8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:29c7015785925a4c92109508d00189934d2658c038f9a9b3126f34a88d70d109
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:590d2263ee28ffeb3fc18603d4b5a73198cf1c208cbf77110c99f455e29bc8ec
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd530e72b22c1ce77a70b73b0e2553d90cdc69cb660d5df5b7ea28fe660cd767
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b3482c7848cfcb119d950c3c5d13bc2dc1e7095a6ad4e7fc2a9ca4e5c264983
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:36751f2c6aca47874fa4d1553b1d00dffae813bb24cca7967068d98d45a147e3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7315650898552774,
   "eval_steps": 500,
-  "global_step": 41400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5804,6 +5804,34 @@
       "learning_rate": 1.4929021617482478e-05,
       "loss": 0.3007,
       "step": 41400
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7350992207241435,
   "eval_steps": 500,
+  "global_step": 41600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4929021617482478e-05,
       "loss": 0.3007,
       "step": 41400
+    },
+    {
+      "epoch": 0.7324486225724939,
+      "grad_norm": 2.910383939743042,
+      "learning_rate": 1.4879935599143941e-05,
+      "loss": 0.3499,
+      "step": 41450
+    },
+    {
+      "epoch": 0.7333321552897104,
+      "grad_norm": 1.0191997289657593,
+      "learning_rate": 1.4830849580805404e-05,
+      "loss": 0.3016,
+      "step": 41500
+    },
+    {
+      "epoch": 0.7342156880069269,
+      "grad_norm": 0.9859305620193481,
+      "learning_rate": 1.4781763562466869e-05,
+      "loss": 0.3232,
+      "step": 41550
+    },
+    {
+      "epoch": 0.7350992207241435,
+      "grad_norm": 1.4275975227355957,
+      "learning_rate": 1.4732677544128332e-05,
+      "loss": 0.2577,
+      "step": 41600
     }
   ],
   "logging_steps": 50,