Training in progress, step 9200, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1205,6 +1205,10 @@ You can finetune this model on your own dataset.
 | 0.1573 | 8900 | 0.4081        |
 | 0.1582 | 8950 | 0.4648        |
 | 0.1590 | 9000 | 0.4321        |
 ### Framework Versions

 | 0.1573 | 8900 | 0.4081        |
 | 0.1582 | 8950 | 0.4648        |
 | 0.1590 | 9000 | 0.4321        |
+| 0.1599 | 9050 | 0.4226        |
+| 0.1608 | 9100 | 0.3634        |
+| 0.1617 | 9150 | 0.4252        |
+| 0.1626 | 9200 | 0.3899        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6833db3941c48530d63f16a4641cb58d29681db52bfc4aaba9787486862f28a0
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:880e624eb7279ea2c15d3d6797440086ddb9b6116f0e9bd2fea229b00f3eafcc
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc8f323339f8042019b8128089add42f66d2a56c4dfd5f02e68b4884b9f08cad
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8dea7d6f8279c47cc3d797dc40c5bd4cbb324255364a56bfc64a2536ad6b318
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8b31b0e1d81266033020b3fd3836c1b6a14642adb57f047a1bc520e453a6aba
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:beb5381b8cfab2c43d778d817204fe174c7b328e5f3861902f2432aa4cc89685
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0a063eb0473b3b545a1a6d08a6917737011cdcb54e4175440233b96c566f83c
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:df2589efb63b68f20dc42380f13074262282edb3a23f326bcb9e05c733236848
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f78894f5196d0952d3fc803e041442456b12fd7839191717af3aecab8279af9f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:aeb87116cd3b0b49516d866a0e3b848d38858f80863640384bfc6aab2a1d3a1d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.15903588909897334,
   "eval_steps": 500,
-  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1268,6 +1268,34 @@
       "learning_rate": 4.672203569535254e-05,
       "loss": 0.4321,
       "step": 9000
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1625700199678394,
   "eval_steps": 500,
+  "global_step": 9200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.672203569535254e-05,
       "loss": 0.4321,
       "step": 9000
+    },
+    {
+      "epoch": 0.15991942181618984,
+      "grad_norm": 1.8130481243133545,
+      "learning_rate": 4.6672949677014e-05,
+      "loss": 0.4226,
+      "step": 9050
+    },
+    {
+      "epoch": 0.16080295453340637,
+      "grad_norm": 2.4127371311187744,
+      "learning_rate": 4.6623863658675464e-05,
+      "loss": 0.3634,
+      "step": 9100
+    },
+    {
+      "epoch": 0.1616864872506229,
+      "grad_norm": 2.362494707107544,
+      "learning_rate": 4.657477764033693e-05,
+      "loss": 0.4252,
+      "step": 9150
+    },
+    {
+      "epoch": 0.1625700199678394,
+      "grad_norm": 1.855000615119934,
+      "learning_rate": 4.6525691621998393e-05,
+      "loss": 0.3899,
+      "step": 9200
     }
   ],
   "logging_steps": 50,