Training in progress, step 237, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +403 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f57fed67a0a24032810adbe0e03ab8a7bf6dee246be2e490def9aa34d56def6
 size 25192496

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1c7840385f12fee7e74973749b9ba9470366178c241f0416b8d447a2249977c
 size 25192496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:388f822a50fd19459e4ab02c6bc13e74177d0578716f88456b8df9bfde914d13
 size 13005178

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d186c5b450df85102e6f0d49946a662cbe5a139e38bac5221450562139d428d
 size 13005178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f29e05bcbde75ce27a9713f89d1d62332544e44f262dd85c2f7f43afaa9c494
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fa75adfc30a7af2ef9a87d78b6749bb4f19e86bd670abdfb90a4c932cb27ed2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0950e2dffdf70f6969e672506b8287212d20b088ff729b93c9b723972fc5a09f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e6ec607bcb824b32dbf9532bd2490e6acd122f00ac7ed381d94679fcb4d357f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7619047619047619,
   "eval_steps": 60,
-  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1299,6 +1299,405 @@
       "eval_samples_per_second": 68.347,
       "eval_steps_per_second": 34.173,
       "step": 180
     }
   ],
   "logging_steps": 1,
@@ -1313,12 +1712,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5374302643814400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0031746031746032,
   "eval_steps": 60,
+  "global_step": 237,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 68.347,
       "eval_steps_per_second": 34.173,
       "step": 180
+    },
+    {
+      "epoch": 0.7661375661375661,
+      "grad_norm": 7.957937240600586,
+      "learning_rate": 2.855918772175522e-05,
+      "loss": 3.4826,
+      "step": 181
+    },
+    {
+      "epoch": 0.7703703703703704,
+      "grad_norm": 6.855284690856934,
+      "learning_rate": 2.7597661864045233e-05,
+      "loss": 3.4164,
+      "step": 182
+    },
+    {
+      "epoch": 0.7746031746031746,
+      "grad_norm": 5.783848285675049,
+      "learning_rate": 2.6650003363154963e-05,
+      "loss": 2.3278,
+      "step": 183
+    },
+    {
+      "epoch": 0.7788359788359789,
+      "grad_norm": 15.872426986694336,
+      "learning_rate": 2.5716393725910215e-05,
+      "loss": 3.0732,
+      "step": 184
+    },
+    {
+      "epoch": 0.783068783068783,
+      "grad_norm": 6.1523871421813965,
+      "learning_rate": 2.47970117683313e-05,
+      "loss": 2.9557,
+      "step": 185
+    },
+    {
+      "epoch": 0.7873015873015873,
+      "grad_norm": 8.40322494506836,
+      "learning_rate": 2.389203358138419e-05,
+      "loss": 3.7876,
+      "step": 186
+    },
+    {
+      "epoch": 0.7915343915343915,
+      "grad_norm": 6.189878463745117,
+      "learning_rate": 2.3001632497253424e-05,
+      "loss": 2.9313,
+      "step": 187
+    },
+    {
+      "epoch": 0.7957671957671958,
+      "grad_norm": 4.903008460998535,
+      "learning_rate": 2.2125979056143364e-05,
+      "loss": 1.6503,
+      "step": 188
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 8.661479949951172,
+      "learning_rate": 2.1265240973614486e-05,
+      "loss": 3.0821,
+      "step": 189
+    },
+    {
+      "epoch": 0.8042328042328042,
+      "grad_norm": 5.910584926605225,
+      "learning_rate": 2.0419583108460418e-05,
+      "loss": 2.6028,
+      "step": 190
+    },
+    {
+      "epoch": 0.8084656084656084,
+      "grad_norm": 6.254149436950684,
+      "learning_rate": 1.958916743113214e-05,
+      "loss": 2.7991,
+      "step": 191
+    },
+    {
+      "epoch": 0.8126984126984127,
+      "grad_norm": 7.573146820068359,
+      "learning_rate": 1.877415299271561e-05,
+      "loss": 2.5724,
+      "step": 192
+    },
+    {
+      "epoch": 0.816931216931217,
+      "grad_norm": 5.690131187438965,
+      "learning_rate": 1.7974695894468384e-05,
+      "loss": 2.2762,
+      "step": 193
+    },
+    {
+      "epoch": 0.8211640211640212,
+      "grad_norm": 5.798933982849121,
+      "learning_rate": 1.7190949257921196e-05,
+      "loss": 2.1385,
+      "step": 194
+    },
+    {
+      "epoch": 0.8253968253968254,
+      "grad_norm": 7.621517658233643,
+      "learning_rate": 1.642306319555027e-05,
+      "loss": 2.5064,
+      "step": 195
+    },
+    {
+      "epoch": 0.8296296296296296,
+      "grad_norm": 7.2165350914001465,
+      "learning_rate": 1.5671184782026106e-05,
+      "loss": 2.7742,
+      "step": 196
+    },
+    {
+      "epoch": 0.8338624338624339,
+      "grad_norm": 6.8323211669921875,
+      "learning_rate": 1.4935458026043959e-05,
+      "loss": 2.869,
+      "step": 197
+    },
+    {
+      "epoch": 0.8380952380952381,
+      "grad_norm": 6.338479042053223,
+      "learning_rate": 1.4216023842741455e-05,
+      "loss": 2.9435,
+      "step": 198
+    },
+    {
+      "epoch": 0.8423280423280424,
+      "grad_norm": 6.278661727905273,
+      "learning_rate": 1.3513020026709023e-05,
+      "loss": 2.7868,
+      "step": 199
+    },
+    {
+      "epoch": 0.8465608465608465,
+      "grad_norm": 5.375467300415039,
+      "learning_rate": 1.2826581225597767e-05,
+      "loss": 2.6017,
+      "step": 200
+    },
+    {
+      "epoch": 0.8507936507936508,
+      "grad_norm": 7.244228839874268,
+      "learning_rate": 1.2156838914330072e-05,
+      "loss": 3.1561,
+      "step": 201
+    },
+    {
+      "epoch": 0.855026455026455,
+      "grad_norm": 6.201519012451172,
+      "learning_rate": 1.1503921369918091e-05,
+      "loss": 2.5623,
+      "step": 202
+    },
+    {
+      "epoch": 0.8592592592592593,
+      "grad_norm": 5.793484210968018,
+      "learning_rate": 1.0867953646894525e-05,
+      "loss": 2.8732,
+      "step": 203
+    },
+    {
+      "epoch": 0.8634920634920635,
+      "grad_norm": 7.211161136627197,
+      "learning_rate": 1.0249057553360742e-05,
+      "loss": 3.4671,
+      "step": 204
+    },
+    {
+      "epoch": 0.8677248677248677,
+      "grad_norm": 6.088332176208496,
+      "learning_rate": 9.647351627656543e-06,
+      "loss": 1.7759,
+      "step": 205
+    },
+    {
+      "epoch": 0.8719576719576719,
+      "grad_norm": 6.6968488693237305,
+      "learning_rate": 9.062951115656403e-06,
+      "loss": 3.3001,
+      "step": 206
+    },
+    {
+      "epoch": 0.8761904761904762,
+      "grad_norm": 5.636354446411133,
+      "learning_rate": 8.495967948696192e-06,
+      "loss": 2.7173,
+      "step": 207
+    },
+    {
+      "epoch": 0.8804232804232804,
+      "grad_norm": 5.944347858428955,
+      "learning_rate": 7.946510722134692e-06,
+      "loss": 2.454,
+      "step": 208
+    },
+    {
+      "epoch": 0.8846560846560847,
+      "grad_norm": 6.995573997497559,
+      "learning_rate": 7.4146846745541506e-06,
+      "loss": 3.2652,
+      "step": 209
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 7.945988178253174,
+      "learning_rate": 6.900591667603751e-06,
+      "loss": 3.5859,
+      "step": 210
+    },
+    {
+      "epoch": 0.8931216931216931,
+      "grad_norm": 5.948593616485596,
+      "learning_rate": 6.40433016648988e-06,
+      "loss": 2.3406,
+      "step": 211
+    },
+    {
+      "epoch": 0.8973544973544973,
+      "grad_norm": 6.893688201904297,
+      "learning_rate": 5.925995221116853e-06,
+      "loss": 2.5966,
+      "step": 212
+    },
+    {
+      "epoch": 0.9015873015873016,
+      "grad_norm": 6.056822776794434,
+      "learning_rate": 5.465678447881828e-06,
+      "loss": 3.1498,
+      "step": 213
+    },
+    {
+      "epoch": 0.9058201058201059,
+      "grad_norm": 5.484859943389893,
+      "learning_rate": 5.023468012127364e-06,
+      "loss": 2.3254,
+      "step": 214
+    },
+    {
+      "epoch": 0.91005291005291,
+      "grad_norm": 5.663562774658203,
+      "learning_rate": 4.599448611254964e-06,
+      "loss": 2.4197,
+      "step": 215
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 7.122875690460205,
+      "learning_rate": 4.193701458502807e-06,
+      "loss": 3.4631,
+      "step": 216
+    },
+    {
+      "epoch": 0.9185185185185185,
+      "grad_norm": 5.062466621398926,
+      "learning_rate": 3.80630426739077e-06,
+      "loss": 2.0242,
+      "step": 217
+    },
+    {
+      "epoch": 0.9227513227513228,
+      "grad_norm": 6.852725505828857,
+      "learning_rate": 3.4373312368358944e-06,
+      "loss": 2.3947,
+      "step": 218
+    },
+    {
+      "epoch": 0.926984126984127,
+      "grad_norm": 6.722269058227539,
+      "learning_rate": 3.086853036940862e-06,
+      "loss": 2.9278,
+      "step": 219
+    },
+    {
+      "epoch": 0.9312169312169312,
+      "grad_norm": 7.214760780334473,
+      "learning_rate": 2.754936795458485e-06,
+      "loss": 2.5268,
+      "step": 220
+    },
+    {
+      "epoch": 0.9354497354497354,
+      "grad_norm": 7.218380451202393,
+      "learning_rate": 2.4416460849345123e-06,
+      "loss": 2.9904,
+      "step": 221
+    },
+    {
+      "epoch": 0.9396825396825397,
+      "grad_norm": 6.950320720672607,
+      "learning_rate": 2.1470409105315283e-06,
+      "loss": 2.7091,
+      "step": 222
+    },
+    {
+      "epoch": 0.9439153439153439,
+      "grad_norm": 5.87589168548584,
+      "learning_rate": 1.8711776985360308e-06,
+      "loss": 2.4052,
+      "step": 223
+    },
+    {
+      "epoch": 0.9481481481481482,
+      "grad_norm": 5.747050762176514,
+      "learning_rate": 1.61410928555098e-06,
+      "loss": 2.5603,
+      "step": 224
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 6.162868976593018,
+      "learning_rate": 1.3758849083759352e-06,
+      "loss": 2.5383,
+      "step": 225
+    },
+    {
+      "epoch": 0.9566137566137566,
+      "grad_norm": 6.223538875579834,
+      "learning_rate": 1.1565501945766222e-06,
+      "loss": 2.7093,
+      "step": 226
+    },
+    {
+      "epoch": 0.9608465608465608,
+      "grad_norm": 6.424678802490234,
+      "learning_rate": 9.56147153745779e-07,
+      "loss": 2.2974,
+      "step": 227
+    },
+    {
+      "epoch": 0.9650793650793651,
+      "grad_norm": 8.89910888671875,
+      "learning_rate": 7.747141694570026e-07,
+      "loss": 3.2458,
+      "step": 228
+    },
+    {
+      "epoch": 0.9693121693121693,
+      "grad_norm": 5.710629463195801,
+      "learning_rate": 6.122859919130974e-07,
+      "loss": 3.1255,
+      "step": 229
+    },
+    {
+      "epoch": 0.9735449735449735,
+      "grad_norm": 5.598289489746094,
+      "learning_rate": 4.6889373129022085e-07,
+      "loss": 2.3627,
+      "step": 230
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 6.710612773895264,
+      "learning_rate": 3.445648517793942e-07,
+      "loss": 2.4085,
+      "step": 231
+    },
+    {
+      "epoch": 0.982010582010582,
+      "grad_norm": 6.431200981140137,
+      "learning_rate": 2.3932316632614416e-07,
+      "loss": 2.8684,
+      "step": 232
+    },
+    {
+      "epoch": 0.9862433862433863,
+      "grad_norm": 6.007854461669922,
+      "learning_rate": 1.5318883206962842e-07,
+      "loss": 2.7014,
+      "step": 233
+    },
+    {
+      "epoch": 0.9904761904761905,
+      "grad_norm": 5.230172634124756,
+      "learning_rate": 8.617834648185774e-08,
+      "loss": 2.6608,
+      "step": 234
+    },
+    {
+      "epoch": 0.9947089947089947,
+      "grad_norm": 6.711563587188721,
+      "learning_rate": 3.8304544207945495e-08,
+      "loss": 2.612,
+      "step": 235
+    },
+    {
+      "epoch": 0.9989417989417989,
+      "grad_norm": 5.968123912811279,
+      "learning_rate": 9.576594607807465e-09,
+      "loss": 2.2378,
+      "step": 236
+    },
+    {
+      "epoch": 1.0031746031746032,
+      "grad_norm": 6.985171318054199,
+      "learning_rate": 0.0,
+      "loss": 2.7958,
+      "step": 237
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7072432993075200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null