Training in progress, step 240, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +284 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5fbd2e60526489e6f6f39cf288ada5ee34355bdd563beb7e5399d0ac622a5c3e
 size 373077376

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d5cb32555f036cc29e510981db920df70d9e90bc6775cbdddef9c9bc689ca68
 size 373077376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f6e8dff52c08eae75b95c752c0f6f01bfbb1ed09c5bce28a0e4593cda5e5c80
 size 422377867

 version https://git-lfs.github.com/spec/v1
+oid sha256:d96a6c94d5ea65063709fd1b3a2c97e499172bb263cf2e84c9c5a02acf2a0620
 size 422377867

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39a79e2827280868096ff650d0ee6e4723dddc824192c37a937a656d54903350
 size 1401

 version https://git-lfs.github.com/spec/v1
+oid sha256:37ec1b5270d425b99d0e1cc607e50e995f87916d2f0845b877477bb69e081603
 size 1401

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8333333333333334,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1422,6 +1422,286 @@
       "eval_samples_per_second": 13.356,
       "eval_steps_per_second": 0.835,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1436,12 +1716,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.018894554759168e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 100,
+  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.356,
       "eval_steps_per_second": 0.835,
       "step": 200
+    },
+    {
+      "epoch": 0.8375,
+      "grad_norm": 0.326171875,
+      "learning_rate": 7.404029558083653e-05,
+      "loss": 5.6536,
+      "step": 201
+    },
+    {
+      "epoch": 0.8416666666666667,
+      "grad_norm": 0.27734375,
+      "learning_rate": 7.047302281505735e-05,
+      "loss": 5.6053,
+      "step": 202
+    },
+    {
+      "epoch": 0.8458333333333333,
+      "grad_norm": 0.275390625,
+      "learning_rate": 6.698729810778065e-05,
+      "loss": 5.6449,
+      "step": 203
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 0.29296875,
+      "learning_rate": 6.358378324300929e-05,
+      "loss": 5.6066,
+      "step": 204
+    },
+    {
+      "epoch": 0.8541666666666666,
+      "grad_norm": 0.26953125,
+      "learning_rate": 6.026312439675552e-05,
+      "loss": 5.6563,
+      "step": 205
+    },
+    {
+      "epoch": 0.8583333333333333,
+      "grad_norm": 0.234375,
+      "learning_rate": 5.7025952014361004e-05,
+      "loss": 5.6946,
+      "step": 206
+    },
+    {
+      "epoch": 0.8625,
+      "grad_norm": 0.259765625,
+      "learning_rate": 5.387288069080298e-05,
+      "loss": 5.6036,
+      "step": 207
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 5.080450905401057e-05,
+      "loss": 5.6453,
+      "step": 208
+    },
+    {
+      "epoch": 0.8708333333333333,
+      "grad_norm": 0.296875,
+      "learning_rate": 4.7821419651211284e-05,
+      "loss": 5.4334,
+      "step": 209
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": 0.283203125,
+      "learning_rate": 4.492417883833155e-05,
+      "loss": 5.5169,
+      "step": 210
+    },
+    {
+      "epoch": 0.8791666666666667,
+      "grad_norm": 0.30078125,
+      "learning_rate": 4.211333667247125e-05,
+      "loss": 5.4803,
+      "step": 211
+    },
+    {
+      "epoch": 0.8833333333333333,
+      "grad_norm": 0.279296875,
+      "learning_rate": 3.938942680747176e-05,
+      "loss": 5.5033,
+      "step": 212
+    },
+    {
+      "epoch": 0.8875,
+      "grad_norm": 0.23828125,
+      "learning_rate": 3.675296639259912e-05,
+      "loss": 5.7133,
+      "step": 213
+    },
+    {
+      "epoch": 0.8916666666666667,
+      "grad_norm": 0.232421875,
+      "learning_rate": 3.420445597436056e-05,
+      "loss": 5.5607,
+      "step": 214
+    },
+    {
+      "epoch": 0.8958333333333334,
+      "grad_norm": 0.26171875,
+      "learning_rate": 3.174437940147268e-05,
+      "loss": 5.4607,
+      "step": 215
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.28125,
+      "learning_rate": 2.9373203733000232e-05,
+      "loss": 5.4973,
+      "step": 216
+    },
+    {
+      "epoch": 0.9041666666666667,
+      "grad_norm": 0.25390625,
+      "learning_rate": 2.709137914968268e-05,
+      "loss": 5.5641,
+      "step": 217
+    },
+    {
+      "epoch": 0.9083333333333333,
+      "grad_norm": 0.2294921875,
+      "learning_rate": 2.4899338868464407e-05,
+      "loss": 5.6054,
+      "step": 218
+    },
+    {
+      "epoch": 0.9125,
+      "grad_norm": 0.251953125,
+      "learning_rate": 2.2797499060246252e-05,
+      "loss": 5.4569,
+      "step": 219
+    },
+    {
+      "epoch": 0.9166666666666666,
+      "grad_norm": 0.279296875,
+      "learning_rate": 2.0786258770873646e-05,
+      "loss": 5.5549,
+      "step": 220
+    },
+    {
+      "epoch": 0.9208333333333333,
+      "grad_norm": 0.306640625,
+      "learning_rate": 1.886599984537479e-05,
+      "loss": 5.5161,
+      "step": 221
+    },
+    {
+      "epoch": 0.925,
+      "grad_norm": 0.259765625,
+      "learning_rate": 1.70370868554659e-05,
+      "loss": 5.509,
+      "step": 222
+    },
+    {
+      "epoch": 0.9291666666666667,
+      "grad_norm": 0.31640625,
+      "learning_rate": 1.5299867030334813e-05,
+      "loss": 5.3626,
+      "step": 223
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.265625,
+      "learning_rate": 1.3654670190718033e-05,
+      "loss": 5.6124,
+      "step": 224
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 0.29296875,
+      "learning_rate": 1.210180868628219e-05,
+      "loss": 5.5415,
+      "step": 225
+    },
+    {
+      "epoch": 0.9416666666666667,
+      "grad_norm": 0.271484375,
+      "learning_rate": 1.064157733632276e-05,
+      "loss": 5.7405,
+      "step": 226
+    },
+    {
+      "epoch": 0.9458333333333333,
+      "grad_norm": 0.2451171875,
+      "learning_rate": 9.274253373791064e-06,
+      "loss": 5.6103,
+      "step": 227
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 0.294921875,
+      "learning_rate": 8.000096392660028e-06,
+      "loss": 5.5475,
+      "step": 228
+    },
+    {
+      "epoch": 0.9541666666666667,
+      "grad_norm": 0.29296875,
+      "learning_rate": 6.819348298638839e-06,
+      "loss": 5.493,
+      "step": 229
+    },
+    {
+      "epoch": 0.9583333333333334,
+      "grad_norm": 0.2890625,
+      "learning_rate": 5.732233263245845e-06,
+      "loss": 5.5532,
+      "step": 230
+    },
+    {
+      "epoch": 0.9625,
+      "grad_norm": 0.27734375,
+      "learning_rate": 4.738957681248379e-06,
+      "loss": 5.495,
+      "step": 231
+    },
+    {
+      "epoch": 0.9666666666666667,
+      "grad_norm": 0.298828125,
+      "learning_rate": 3.839710131477492e-06,
+      "loss": 5.4654,
+      "step": 232
+    },
+    {
+      "epoch": 0.9708333333333333,
+      "grad_norm": 0.26171875,
+      "learning_rate": 3.034661341025258e-06,
+      "loss": 5.3983,
+      "step": 233
+    },
+    {
+      "epoch": 0.975,
+      "grad_norm": 0.26171875,
+      "learning_rate": 2.323964152831426e-06,
+      "loss": 5.5638,
+      "step": 234
+    },
+    {
+      "epoch": 0.9791666666666666,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 1.7077534966650766e-06,
+      "loss": 5.6353,
+      "step": 235
+    },
+    {
+      "epoch": 0.9833333333333333,
+      "grad_norm": 0.322265625,
+      "learning_rate": 1.1861463635077786e-06,
+      "loss": 5.513,
+      "step": 236
+    },
+    {
+      "epoch": 0.9875,
+      "grad_norm": 0.25,
+      "learning_rate": 7.592417833419129e-07,
+      "loss": 5.5575,
+      "step": 237
+    },
+    {
+      "epoch": 0.9916666666666667,
+      "grad_norm": 0.265625,
+      "learning_rate": 4.2712080634949023e-07,
+      "loss": 5.5291,
+      "step": 238
+    },
+    {
+      "epoch": 0.9958333333333333,
+      "grad_norm": 0.25390625,
+      "learning_rate": 1.8984648752429223e-07,
+      "loss": 5.6134,
+      "step": 239
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.271484375,
+      "learning_rate": 4.746387470044855e-08,
+      "loss": 5.5174,
+      "step": 240
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2226734657110016e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null