Training in progress, step 4325, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +459 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78ceaf7b4c6832fe7c3a0ce4db6804e3a4a45637e25d2fbbd3020b6b766ac936
 size 791869518

 version https://git-lfs.github.com/spec/v1
+oid sha256:4433c2138519f63d9d5aa7ff598665f22cd57b538e906f9544c0f9720c57fab2
 size 791869518

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e7e5c41a20485a34443c968085367f9007a4ef50055321ddfa6d282064f469d
 size 2375752250

 version https://git-lfs.github.com/spec/v1
+oid sha256:9062e822dc347a524608daf51b672d8000ac0bfa81cd6464bba773f938a940fc
 size 2375752250

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bef6a7bf53166ec3a9709e315e5a7afc807cf01be6b61a09c96b7113cbb6fd6
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:46d4edc1ba983c008c39d87f9a0be72b701bdd2dc74240b405a1e67990d5bd14
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9247018197842845,
   "eval_steps": 500,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5671,6 +5671,461 @@
       "eval_samples_per_second": 607.208,
       "eval_steps_per_second": 37.951,
       "step": 4000
     }
   ],
   "logging_steps": 5,
@@ -5685,12 +6140,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.7329431971155149e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9998338426417576,
   "eval_steps": 500,
+  "global_step": 4325,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 607.208,
       "eval_steps_per_second": 37.951,
       "step": 4000
+    },
+    {
+      "epoch": 0.9258576970590148,
+      "grad_norm": 155.25,
+      "learning_rate": 8.221993833504626e-07,
+      "loss": 68.988,
+      "step": 4005
+    },
+    {
+      "epoch": 0.9270135743337451,
+      "grad_norm": 146.5,
+      "learning_rate": 8.093525179856115e-07,
+      "loss": 67.9789,
+      "step": 4010
+    },
+    {
+      "epoch": 0.9281694516084754,
+      "grad_norm": 147.625,
+      "learning_rate": 7.965056526207606e-07,
+      "loss": 68.1106,
+      "step": 4015
+    },
+    {
+      "epoch": 0.9293253288832058,
+      "grad_norm": 141.5,
+      "learning_rate": 7.836587872559097e-07,
+      "loss": 68.5058,
+      "step": 4020
+    },
+    {
+      "epoch": 0.9304812061579362,
+      "grad_norm": 136.875,
+      "learning_rate": 7.708119218910587e-07,
+      "loss": 66.9191,
+      "step": 4025
+    },
+    {
+      "epoch": 0.9316370834326665,
+      "grad_norm": 154.375,
+      "learning_rate": 7.579650565262076e-07,
+      "loss": 68.1875,
+      "step": 4030
+    },
+    {
+      "epoch": 0.9327929607073969,
+      "grad_norm": 149.625,
+      "learning_rate": 7.451181911613567e-07,
+      "loss": 69.5334,
+      "step": 4035
+    },
+    {
+      "epoch": 0.9339488379821272,
+      "grad_norm": 147.25,
+      "learning_rate": 7.322713257965057e-07,
+      "loss": 68.1886,
+      "step": 4040
+    },
+    {
+      "epoch": 0.9351047152568576,
+      "grad_norm": 160.875,
+      "learning_rate": 7.194244604316547e-07,
+      "loss": 68.1861,
+      "step": 4045
+    },
+    {
+      "epoch": 0.936260592531588,
+      "grad_norm": 156.375,
+      "learning_rate": 7.065775950668037e-07,
+      "loss": 66.4576,
+      "step": 4050
+    },
+    {
+      "epoch": 0.9374164698063183,
+      "grad_norm": 141.625,
+      "learning_rate": 6.937307297019528e-07,
+      "loss": 67.8205,
+      "step": 4055
+    },
+    {
+      "epoch": 0.9385723470810486,
+      "grad_norm": 156.25,
+      "learning_rate": 6.808838643371019e-07,
+      "loss": 68.9202,
+      "step": 4060
+    },
+    {
+      "epoch": 0.9397282243557791,
+      "grad_norm": 148.5,
+      "learning_rate": 6.680369989722508e-07,
+      "loss": 68.9684,
+      "step": 4065
+    },
+    {
+      "epoch": 0.9408841016305094,
+      "grad_norm": 143.5,
+      "learning_rate": 6.551901336073999e-07,
+      "loss": 68.7812,
+      "step": 4070
+    },
+    {
+      "epoch": 0.9420399789052397,
+      "grad_norm": 149.75,
+      "learning_rate": 6.423432682425489e-07,
+      "loss": 68.0078,
+      "step": 4075
+    },
+    {
+      "epoch": 0.94319585617997,
+      "grad_norm": 139.5,
+      "learning_rate": 6.294964028776979e-07,
+      "loss": 67.9145,
+      "step": 4080
+    },
+    {
+      "epoch": 0.9443517334547005,
+      "grad_norm": 159.625,
+      "learning_rate": 6.16649537512847e-07,
+      "loss": 68.2476,
+      "step": 4085
+    },
+    {
+      "epoch": 0.9455076107294308,
+      "grad_norm": 151.0,
+      "learning_rate": 6.038026721479959e-07,
+      "loss": 68.8499,
+      "step": 4090
+    },
+    {
+      "epoch": 0.9466634880041611,
+      "grad_norm": 148.25,
+      "learning_rate": 5.90955806783145e-07,
+      "loss": 66.8455,
+      "step": 4095
+    },
+    {
+      "epoch": 0.9478193652788915,
+      "grad_norm": 150.375,
+      "learning_rate": 5.78108941418294e-07,
+      "loss": 69.0889,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9489752425536219,
+      "grad_norm": 145.625,
+      "learning_rate": 5.65262076053443e-07,
+      "loss": 68.773,
+      "step": 4105
+    },
+    {
+      "epoch": 0.9501311198283522,
+      "grad_norm": 145.875,
+      "learning_rate": 5.524152106885921e-07,
+      "loss": 68.0571,
+      "step": 4110
+    },
+    {
+      "epoch": 0.9512869971030826,
+      "grad_norm": 137.75,
+      "learning_rate": 5.39568345323741e-07,
+      "loss": 68.6678,
+      "step": 4115
+    },
+    {
+      "epoch": 0.9524428743778129,
+      "grad_norm": 152.75,
+      "learning_rate": 5.267214799588901e-07,
+      "loss": 70.0085,
+      "step": 4120
+    },
+    {
+      "epoch": 0.9535987516525433,
+      "grad_norm": 157.75,
+      "learning_rate": 5.138746145940391e-07,
+      "loss": 67.1869,
+      "step": 4125
+    },
+    {
+      "epoch": 0.9547546289272737,
+      "grad_norm": 154.75,
+      "learning_rate": 5.010277492291881e-07,
+      "loss": 69.5982,
+      "step": 4130
+    },
+    {
+      "epoch": 0.955910506202004,
+      "grad_norm": 153.5,
+      "learning_rate": 4.881808838643371e-07,
+      "loss": 68.2536,
+      "step": 4135
+    },
+    {
+      "epoch": 0.9570663834767343,
+      "grad_norm": 142.125,
+      "learning_rate": 4.753340184994862e-07,
+      "loss": 67.8511,
+      "step": 4140
+    },
+    {
+      "epoch": 0.9582222607514647,
+      "grad_norm": 152.25,
+      "learning_rate": 4.624871531346352e-07,
+      "loss": 68.5388,
+      "step": 4145
+    },
+    {
+      "epoch": 0.9593781380261951,
+      "grad_norm": 142.625,
+      "learning_rate": 4.496402877697842e-07,
+      "loss": 69.31,
+      "step": 4150
+    },
+    {
+      "epoch": 0.9605340153009254,
+      "grad_norm": 151.125,
+      "learning_rate": 4.3679342240493327e-07,
+      "loss": 68.8235,
+      "step": 4155
+    },
+    {
+      "epoch": 0.9616898925756557,
+      "grad_norm": 137.625,
+      "learning_rate": 4.2394655704008227e-07,
+      "loss": 67.4872,
+      "step": 4160
+    },
+    {
+      "epoch": 0.9628457698503862,
+      "grad_norm": 148.625,
+      "learning_rate": 4.110996916752313e-07,
+      "loss": 68.9608,
+      "step": 4165
+    },
+    {
+      "epoch": 0.9640016471251165,
+      "grad_norm": 134.875,
+      "learning_rate": 3.982528263103803e-07,
+      "loss": 68.4842,
+      "step": 4170
+    },
+    {
+      "epoch": 0.9651575243998468,
+      "grad_norm": 151.5,
+      "learning_rate": 3.8540596094552934e-07,
+      "loss": 68.2285,
+      "step": 4175
+    },
+    {
+      "epoch": 0.9663134016745772,
+      "grad_norm": 152.625,
+      "learning_rate": 3.7255909558067835e-07,
+      "loss": 67.0058,
+      "step": 4180
+    },
+    {
+      "epoch": 0.9674692789493076,
+      "grad_norm": 141.625,
+      "learning_rate": 3.5971223021582736e-07,
+      "loss": 67.6228,
+      "step": 4185
+    },
+    {
+      "epoch": 0.9686251562240379,
+      "grad_norm": 139.125,
+      "learning_rate": 3.468653648509764e-07,
+      "loss": 67.6947,
+      "step": 4190
+    },
+    {
+      "epoch": 0.9697810334987683,
+      "grad_norm": 152.75,
+      "learning_rate": 3.340184994861254e-07,
+      "loss": 67.1925,
+      "step": 4195
+    },
+    {
+      "epoch": 0.9709369107734986,
+      "grad_norm": 156.5,
+      "learning_rate": 3.2117163412127443e-07,
+      "loss": 67.7996,
+      "step": 4200
+    },
+    {
+      "epoch": 0.972092788048229,
+      "grad_norm": 152.75,
+      "learning_rate": 3.083247687564235e-07,
+      "loss": 67.6101,
+      "step": 4205
+    },
+    {
+      "epoch": 0.9732486653229593,
+      "grad_norm": 143.75,
+      "learning_rate": 2.954779033915725e-07,
+      "loss": 67.7158,
+      "step": 4210
+    },
+    {
+      "epoch": 0.9744045425976897,
+      "grad_norm": 149.125,
+      "learning_rate": 2.826310380267215e-07,
+      "loss": 67.5787,
+      "step": 4215
+    },
+    {
+      "epoch": 0.97556041987242,
+      "grad_norm": 157.625,
+      "learning_rate": 2.697841726618705e-07,
+      "loss": 68.4845,
+      "step": 4220
+    },
+    {
+      "epoch": 0.9767162971471504,
+      "grad_norm": 134.125,
+      "learning_rate": 2.5693730729701956e-07,
+      "loss": 69.1448,
+      "step": 4225
+    },
+    {
+      "epoch": 0.9778721744218808,
+      "grad_norm": 139.875,
+      "learning_rate": 2.4409044193216857e-07,
+      "loss": 68.03,
+      "step": 4230
+    },
+    {
+      "epoch": 0.9790280516966111,
+      "grad_norm": 147.25,
+      "learning_rate": 2.312435765673176e-07,
+      "loss": 68.5574,
+      "step": 4235
+    },
+    {
+      "epoch": 0.9801839289713414,
+      "grad_norm": 154.25,
+      "learning_rate": 2.1839671120246663e-07,
+      "loss": 67.5709,
+      "step": 4240
+    },
+    {
+      "epoch": 0.9813398062460719,
+      "grad_norm": 149.875,
+      "learning_rate": 2.0554984583761564e-07,
+      "loss": 68.7623,
+      "step": 4245
+    },
+    {
+      "epoch": 0.9824956835208022,
+      "grad_norm": 141.625,
+      "learning_rate": 1.9270298047276467e-07,
+      "loss": 68.6813,
+      "step": 4250
+    },
+    {
+      "epoch": 0.9836515607955325,
+      "grad_norm": 156.0,
+      "learning_rate": 1.7985611510791368e-07,
+      "loss": 68.6549,
+      "step": 4255
+    },
+    {
+      "epoch": 0.9848074380702629,
+      "grad_norm": 147.25,
+      "learning_rate": 1.670092497430627e-07,
+      "loss": 68.4055,
+      "step": 4260
+    },
+    {
+      "epoch": 0.9859633153449933,
+      "grad_norm": 143.875,
+      "learning_rate": 1.5416238437821174e-07,
+      "loss": 68.2161,
+      "step": 4265
+    },
+    {
+      "epoch": 0.9871191926197236,
+      "grad_norm": 155.875,
+      "learning_rate": 1.4131551901336075e-07,
+      "loss": 68.8521,
+      "step": 4270
+    },
+    {
+      "epoch": 0.9882750698944539,
+      "grad_norm": 152.375,
+      "learning_rate": 1.2846865364850978e-07,
+      "loss": 68.8391,
+      "step": 4275
+    },
+    {
+      "epoch": 0.9894309471691843,
+      "grad_norm": 151.125,
+      "learning_rate": 1.156217882836588e-07,
+      "loss": 67.5471,
+      "step": 4280
+    },
+    {
+      "epoch": 0.9905868244439147,
+      "grad_norm": 153.875,
+      "learning_rate": 1.0277492291880782e-07,
+      "loss": 68.6625,
+      "step": 4285
+    },
+    {
+      "epoch": 0.991742701718645,
+      "grad_norm": 139.375,
+      "learning_rate": 8.992805755395684e-08,
+      "loss": 68.1066,
+      "step": 4290
+    },
+    {
+      "epoch": 0.9928985789933754,
+      "grad_norm": 156.375,
+      "learning_rate": 7.708119218910587e-08,
+      "loss": 68.376,
+      "step": 4295
+    },
+    {
+      "epoch": 0.9940544562681057,
+      "grad_norm": 149.375,
+      "learning_rate": 6.423432682425489e-08,
+      "loss": 68.4129,
+      "step": 4300
+    },
+    {
+      "epoch": 0.9952103335428361,
+      "grad_norm": 139.0,
+      "learning_rate": 5.138746145940391e-08,
+      "loss": 67.743,
+      "step": 4305
+    },
+    {
+      "epoch": 0.9963662108175665,
+      "grad_norm": 137.375,
+      "learning_rate": 3.8540596094552936e-08,
+      "loss": 68.2336,
+      "step": 4310
+    },
+    {
+      "epoch": 0.9975220880922968,
+      "grad_norm": 147.125,
+      "learning_rate": 2.5693730729701955e-08,
+      "loss": 67.8423,
+      "step": 4315
+    },
+    {
+      "epoch": 0.9986779653670271,
+      "grad_norm": 148.875,
+      "learning_rate": 1.2846865364850977e-08,
+      "loss": 68.4695,
+      "step": 4320
+    },
+    {
+      "epoch": 0.9998338426417576,
+      "grad_norm": 151.25,
+      "learning_rate": 0.0,
+      "loss": 67.3571,
+      "step": 4325
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.8737448318811505e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null