Training in progress, step 23600, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/README.md +8 -0
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +58 -2

last-checkpoint/README.md CHANGED Viewed

@@ -1491,6 +1491,14 @@ You can finetune this model on your own dataset.
 | 0.4082 | 23100 | 0.3474        |
 | 0.4091 | 23150 | 0.3208        |
 | 0.4100 | 23200 | 0.3798        |
 </details>

 | 0.4082 | 23100 | 0.3474        |
 | 0.4091 | 23150 | 0.3208        |
 | 0.4100 | 23200 | 0.3798        |
+| 0.4108 | 23250 | 0.3282        |
+| 0.4117 | 23300 | 0.3302        |
+| 0.4126 | 23350 | 0.3599        |
+| 0.4135 | 23400 | 0.3608        |
+| 0.4144 | 23450 | 0.3387        |
+| 0.4153 | 23500 | 0.3987        |
+| 0.4161 | 23550 | 0.3387        |
+| 0.4170 | 23600 | 0.2989        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cfe587849d1306f54e0ca75ee4b8dc42ffa4c0050923c00408ab072955907d3
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:1617eb2ae4888507c4f4075423705e736487e0fd06011313c271b8a67d2121e7
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c016c7f7476d35ba0914e4807cd567e2323f8abd2649d40533bb1edf8afea2d
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:8082e636e89c0305931d4fed9e511d53d0c861249cb9eb1baa51ec94b573d123
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7301644e101f87025474e0abd0c4e21251cc4c43a5173ce57ba0318fade3400
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7c3077f3b0e21db426cf04aaf6706b3f8e724b43a1c804482891604f1539c3f
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8bb4812f9196d1a220df2036c293c5cb5d81dc224d96c15decb25fee077dd8a
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2a1548d715b309492a66002f720121ae6b58979a558a4ea26d5d559620bd59b
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55ce77f59b929ccf856f258ba2d8bdee259c33a00d44ab9b7d2ff7d9ff4f481c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c549b0e10abd21bebaa5ec4fd4b6a6e95036a423d8901ec4f127ce499a3bb98
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4099591807884646,
   "eval_steps": 500,
-  "global_step": 23200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3256,6 +3256,62 @@
       "learning_rate": 3.278847852977558e-05,
       "loss": 0.3798,
       "step": 23200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4170274425261967,
   "eval_steps": 500,
+  "global_step": 23600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.278847852977558e-05,
       "loss": 0.3798,
       "step": 23200
+    },
+    {
+      "epoch": 0.41084271350568113,
+      "grad_norm": 1.8930203914642334,
+      "learning_rate": 3.273939251143704e-05,
+      "loss": 0.3282,
+      "step": 23250
+    },
+    {
+      "epoch": 0.4117262462228976,
+      "grad_norm": 1.256135106086731,
+      "learning_rate": 3.2690306493098507e-05,
+      "loss": 0.3302,
+      "step": 23300
+    },
+    {
+      "epoch": 0.41260977894011414,
+      "grad_norm": 1.952988862991333,
+      "learning_rate": 3.264122047475997e-05,
+      "loss": 0.3599,
+      "step": 23350
+    },
+    {
+      "epoch": 0.41349331165733066,
+      "grad_norm": 1.3686082363128662,
+      "learning_rate": 3.2592134456421436e-05,
+      "loss": 0.3608,
+      "step": 23400
+    },
+    {
+      "epoch": 0.4143768443745472,
+      "grad_norm": 1.56107759475708,
+      "learning_rate": 3.2543048438082894e-05,
+      "loss": 0.3387,
+      "step": 23450
+    },
+    {
+      "epoch": 0.4152603770917637,
+      "grad_norm": 1.823240876197815,
+      "learning_rate": 3.249396241974436e-05,
+      "loss": 0.3987,
+      "step": 23500
+    },
+    {
+      "epoch": 0.41614390980898025,
+      "grad_norm": 1.2912514209747314,
+      "learning_rate": 3.244487640140583e-05,
+      "loss": 0.3387,
+      "step": 23550
+    },
+    {
+      "epoch": 0.4170274425261967,
+      "grad_norm": 1.5520604848861694,
+      "learning_rate": 3.239579038306729e-05,
+      "loss": 0.2989,
+      "step": 23600
     }
   ],
   "logging_steps": 50,