Training in progress, step 301, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +523 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65dc7c94f10d1fd8453ca692791a5b0828673812ffffb21a0d1de89e54ec7d6b
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:c59e4672ef7cc36f9f5487ce55f2ccfa0a52617841525b4bb1ac622d5b4aa80f
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8315c06588c0eadc24b6c01f05d7044bc9877ba97394bcc91e35c44e4df5291d
-size 41119636

 version https://git-lfs.github.com/spec/v1
+oid sha256:00570a8c2ef93f07116ec0c50f9481945aab0c0c66ec1e76db067833c5ce46f6
+size 41120084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d4268bb260bb0810b614917f265d2383e6575486808e3cbab30ba6a3e94dab5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d35b3a9dd0a1f8f9486755fe2563ec30d796d72743863e4be5bb2ab47e364b5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ad89cd045fe380c9e3c8629b5b884bcbf8e3c6af3cb83ad82730d6d0ed22b56
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:207ebf8b22c8abb89ae63b0d6979b5f27d625d140e3311a16b44653eec5ce343
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7587354409317804,
   "eval_steps": 500,
-  "global_step": 228,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1603,6 +1603,525 @@
       "learning_rate": 1.427141579677374e-05,
       "loss": 0.5648,
       "step": 228
     }
   ],
   "logging_steps": 1,
@@ -1617,12 +2136,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.9872743929767526e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0016638935108153,
   "eval_steps": 500,
+  "global_step": 301,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.427141579677374e-05,
       "loss": 0.5648,
       "step": 228
+    },
+    {
+      "epoch": 0.762063227953411,
+      "grad_norm": 0.5607278943061829,
+      "learning_rate": 1.3902195302273779e-05,
+      "loss": 0.3263,
+      "step": 229
+    },
+    {
+      "epoch": 0.7653910149750416,
+      "grad_norm": 0.5801076889038086,
+      "learning_rate": 1.3537041048046695e-05,
+      "loss": 0.4707,
+      "step": 230
+    },
+    {
+      "epoch": 0.7687188019966722,
+      "grad_norm": 0.7465184926986694,
+      "learning_rate": 1.3175994166924394e-05,
+      "loss": 0.4633,
+      "step": 231
+    },
+    {
+      "epoch": 0.7720465890183028,
+      "grad_norm": 0.893689751625061,
+      "learning_rate": 1.2819095329063469e-05,
+      "loss": 0.4621,
+      "step": 232
+    },
+    {
+      "epoch": 0.7753743760399334,
+      "grad_norm": 0.6686207056045532,
+      "learning_rate": 1.246638473736378e-05,
+      "loss": 0.5619,
+      "step": 233
+    },
+    {
+      "epoch": 0.778702163061564,
+      "grad_norm": 0.8450759053230286,
+      "learning_rate": 1.2117902122939861e-05,
+      "loss": 0.578,
+      "step": 234
+    },
+    {
+      "epoch": 0.7820299500831946,
+      "grad_norm": 0.7605635523796082,
+      "learning_rate": 1.1773686740645384e-05,
+      "loss": 0.6534,
+      "step": 235
+    },
+    {
+      "epoch": 0.7853577371048253,
+      "grad_norm": 0.9870882630348206,
+      "learning_rate": 1.1433777364651271e-05,
+      "loss": 0.8853,
+      "step": 236
+    },
+    {
+      "epoch": 0.7886855241264559,
+      "grad_norm": 0.5999244451522827,
+      "learning_rate": 1.1098212284078036e-05,
+      "loss": 0.3816,
+      "step": 237
+    },
+    {
+      "epoch": 0.7920133111480865,
+      "grad_norm": 0.8197599053382874,
+      "learning_rate": 1.076702929868264e-05,
+      "loss": 0.8135,
+      "step": 238
+    },
+    {
+      "epoch": 0.7953410981697171,
+      "grad_norm": 0.7237629294395447,
+      "learning_rate": 1.0440265714600572e-05,
+      "loss": 0.6637,
+      "step": 239
+    },
+    {
+      "epoch": 0.7986688851913477,
+      "grad_norm": 0.8603866696357727,
+      "learning_rate": 1.0117958340143507e-05,
+      "loss": 0.8529,
+      "step": 240
+    },
+    {
+      "epoch": 0.8019966722129783,
+      "grad_norm": 0.7199721336364746,
+      "learning_rate": 9.800143481652979e-06,
+      "loss": 0.8565,
+      "step": 241
+    },
+    {
+      "epoch": 0.8053244592346089,
+      "grad_norm": 0.6758659482002258,
+      "learning_rate": 9.48685693941067e-06,
+      "loss": 0.794,
+      "step": 242
+    },
+    {
+      "epoch": 0.8086522462562395,
+      "grad_norm": 0.7372802495956421,
+      "learning_rate": 9.17813400360572e-06,
+      "loss": 0.8753,
+      "step": 243
+    },
+    {
+      "epoch": 0.8119800332778702,
+      "grad_norm": 0.6816331148147583,
+      "learning_rate": 8.874009450359427e-06,
+      "loss": 0.7685,
+      "step": 244
+    },
+    {
+      "epoch": 0.8153078202995009,
+      "grad_norm": 0.6845401525497437,
+      "learning_rate": 8.574517537807897e-06,
+      "loss": 0.7908,
+      "step": 245
+    },
+    {
+      "epoch": 0.8186356073211315,
+      "grad_norm": 0.7325373291969299,
+      "learning_rate": 8.279692002243027e-06,
+      "loss": 0.9694,
+      "step": 246
+    },
+    {
+      "epoch": 0.8219633943427621,
+      "grad_norm": 0.7457829713821411,
+      "learning_rate": 7.989566054312287e-06,
+      "loss": 0.6347,
+      "step": 247
+    },
+    {
+      "epoch": 0.8252911813643927,
+      "grad_norm": 0.7450778484344482,
+      "learning_rate": 7.704172375277691e-06,
+      "loss": 0.8447,
+      "step": 248
+    },
+    {
+      "epoch": 0.8286189683860233,
+      "grad_norm": 0.7165175676345825,
+      "learning_rate": 7.423543113334436e-06,
+      "loss": 0.786,
+      "step": 249
+    },
+    {
+      "epoch": 0.831946755407654,
+      "grad_norm": 1.0344496965408325,
+      "learning_rate": 7.14770987998954e-06,
+      "loss": 0.8206,
+      "step": 250
+    },
+    {
+      "epoch": 0.8352745424292846,
+      "grad_norm": 0.6772722005844116,
+      "learning_rate": 6.876703746500984e-06,
+      "loss": 0.4412,
+      "step": 251
+    },
+    {
+      "epoch": 0.8386023294509152,
+      "grad_norm": 0.6309685111045837,
+      "learning_rate": 6.610555240377652e-06,
+      "loss": 0.4023,
+      "step": 252
+    },
+    {
+      "epoch": 0.8419301164725458,
+      "grad_norm": 0.8043044805526733,
+      "learning_rate": 6.349294341940593e-06,
+      "loss": 0.4782,
+      "step": 253
+    },
+    {
+      "epoch": 0.8452579034941764,
+      "grad_norm": 0.6318528056144714,
+      "learning_rate": 6.092950480945897e-06,
+      "loss": 0.4712,
+      "step": 254
+    },
+    {
+      "epoch": 0.848585690515807,
+      "grad_norm": 0.5549326539039612,
+      "learning_rate": 5.841552533269534e-06,
+      "loss": 0.3927,
+      "step": 255
+    },
+    {
+      "epoch": 0.8519134775374376,
+      "grad_norm": 0.5588014721870422,
+      "learning_rate": 5.595128817654638e-06,
+      "loss": 0.3666,
+      "step": 256
+    },
+    {
+      "epoch": 0.8552412645590682,
+      "grad_norm": 0.6045800447463989,
+      "learning_rate": 5.353707092521582e-06,
+      "loss": 0.5723,
+      "step": 257
+    },
+    {
+      "epoch": 0.8585690515806988,
+      "grad_norm": 0.6416806578636169,
+      "learning_rate": 5.117314552841052e-06,
+      "loss": 0.4304,
+      "step": 258
+    },
+    {
+      "epoch": 0.8618968386023295,
+      "grad_norm": 0.5945389866828918,
+      "learning_rate": 4.885977827070748e-06,
+      "loss": 0.343,
+      "step": 259
+    },
+    {
+      "epoch": 0.8652246256239601,
+      "grad_norm": 0.6209045052528381,
+      "learning_rate": 4.659722974155767e-06,
+      "loss": 0.3872,
+      "step": 260
+    },
+    {
+      "epoch": 0.8685524126455907,
+      "grad_norm": 0.627796471118927,
+      "learning_rate": 4.43857548059321e-06,
+      "loss": 0.3544,
+      "step": 261
+    },
+    {
+      "epoch": 0.8718801996672213,
+      "grad_norm": 0.5913266539573669,
+      "learning_rate": 4.2225602575612755e-06,
+      "loss": 0.4773,
+      "step": 262
+    },
+    {
+      "epoch": 0.8752079866888519,
+      "grad_norm": 0.6416683793067932,
+      "learning_rate": 4.011701638113063e-06,
+      "loss": 0.412,
+      "step": 263
+    },
+    {
+      "epoch": 0.8785357737104825,
+      "grad_norm": 0.8090274930000305,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.5336,
+      "step": 264
+    },
+    {
+      "epoch": 0.8818635607321131,
+      "grad_norm": 0.5056114792823792,
+      "learning_rate": 3.605548635174533e-06,
+      "loss": 0.2786,
+      "step": 265
+    },
+    {
+      "epoch": 0.8851913477537438,
+      "grad_norm": 0.566896915435791,
+      "learning_rate": 3.410300002823691e-06,
+      "loss": 0.3718,
+      "step": 266
+    },
+    {
+      "epoch": 0.8885191347753744,
+      "grad_norm": 0.5743393898010254,
+      "learning_rate": 3.220299471181898e-06,
+      "loss": 0.4044,
+      "step": 267
+    },
+    {
+      "epoch": 0.891846921797005,
+      "grad_norm": 0.634122908115387,
+      "learning_rate": 3.035568442875136e-06,
+      "loss": 0.6042,
+      "step": 268
+    },
+    {
+      "epoch": 0.8951747088186356,
+      "grad_norm": 0.39157751202583313,
+      "learning_rate": 2.85612772694579e-06,
+      "loss": 0.153,
+      "step": 269
+    },
+    {
+      "epoch": 0.8985024958402662,
+      "grad_norm": 0.5736538171768188,
+      "learning_rate": 2.6819975365085237e-06,
+      "loss": 0.3097,
+      "step": 270
+    },
+    {
+      "epoch": 0.9018302828618968,
+      "grad_norm": 0.5841122269630432,
+      "learning_rate": 2.5131974864734066e-06,
+      "loss": 0.4648,
+      "step": 271
+    },
+    {
+      "epoch": 0.9051580698835274,
+      "grad_norm": 0.5904918313026428,
+      "learning_rate": 2.349746591336405e-06,
+      "loss": 0.5159,
+      "step": 272
+    },
+    {
+      "epoch": 0.908485856905158,
+      "grad_norm": 0.5497831702232361,
+      "learning_rate": 2.191663263037458e-06,
+      "loss": 0.3491,
+      "step": 273
+    },
+    {
+      "epoch": 0.9118136439267887,
+      "grad_norm": 0.6325455904006958,
+      "learning_rate": 2.0389653088865036e-06,
+      "loss": 0.4999,
+      "step": 274
+    },
+    {
+      "epoch": 0.9151414309484193,
+      "grad_norm": 0.5889260172843933,
+      "learning_rate": 1.8916699295575324e-06,
+      "loss": 0.4644,
+      "step": 275
+    },
+    {
+      "epoch": 0.9184692179700499,
+      "grad_norm": 0.5664623379707336,
+      "learning_rate": 1.7497937171510547e-06,
+      "loss": 0.3335,
+      "step": 276
+    },
+    {
+      "epoch": 0.9217970049916805,
+      "grad_norm": 0.49772658944129944,
+      "learning_rate": 1.6133526533250565e-06,
+      "loss": 0.3306,
+      "step": 277
+    },
+    {
+      "epoch": 0.9251247920133111,
+      "grad_norm": 0.4992808699607849,
+      "learning_rate": 1.4823621074947503e-06,
+      "loss": 0.354,
+      "step": 278
+    },
+    {
+      "epoch": 0.9284525790349417,
+      "grad_norm": 0.4994637966156006,
+      "learning_rate": 1.3568368351012717e-06,
+      "loss": 0.3448,
+      "step": 279
+    },
+    {
+      "epoch": 0.9317803660565723,
+      "grad_norm": 0.48505714535713196,
+      "learning_rate": 1.236790975949592e-06,
+      "loss": 0.3501,
+      "step": 280
+    },
+    {
+      "epoch": 0.9351081530782029,
+      "grad_norm": 1.016489028930664,
+      "learning_rate": 1.1222380526156928e-06,
+      "loss": 0.4423,
+      "step": 281
+    },
+    {
+      "epoch": 0.9384359400998337,
+      "grad_norm": 0.7452896237373352,
+      "learning_rate": 1.0131909689233442e-06,
+      "loss": 0.6174,
+      "step": 282
+    },
+    {
+      "epoch": 0.9417637271214643,
+      "grad_norm": 1.0553852319717407,
+      "learning_rate": 9.096620084905472e-07,
+      "loss": 0.7014,
+      "step": 283
+    },
+    {
+      "epoch": 0.9450915141430949,
+      "grad_norm": 0.7587735652923584,
+      "learning_rate": 8.11662833345822e-07,
+      "loss": 0.6485,
+      "step": 284
+    },
+    {
+      "epoch": 0.9484193011647255,
+      "grad_norm": 0.7494546175003052,
+      "learning_rate": 7.192044826145771e-07,
+      "loss": 0.7379,
+      "step": 285
+    },
+    {
+      "epoch": 0.9517470881863561,
+      "grad_norm": 0.8453697562217712,
+      "learning_rate": 6.322973712755697e-07,
+      "loss": 0.8027,
+      "step": 286
+    },
+    {
+      "epoch": 0.9550748752079867,
+      "grad_norm": 0.6284394860267639,
+      "learning_rate": 5.509512889877333e-07,
+      "loss": 0.5964,
+      "step": 287
+    },
+    {
+      "epoch": 0.9584026622296173,
+      "grad_norm": 0.8004696369171143,
+      "learning_rate": 4.7517539898741524e-07,
+      "loss": 0.6745,
+      "step": 288
+    },
+    {
+      "epoch": 0.961730449251248,
+      "grad_norm": 0.7253815531730652,
+      "learning_rate": 4.049782370561583e-07,
+      "loss": 0.7672,
+      "step": 289
+    },
+    {
+      "epoch": 0.9650582362728786,
+      "grad_norm": 0.7120758891105652,
+      "learning_rate": 3.4036771055923066e-07,
+      "loss": 0.6682,
+      "step": 290
+    },
+    {
+      "epoch": 0.9683860232945092,
+      "grad_norm": 0.7899070978164673,
+      "learning_rate": 2.813510975548772e-07,
+      "loss": 0.7882,
+      "step": 291
+    },
+    {
+      "epoch": 0.9717138103161398,
+      "grad_norm": 0.8241245150566101,
+      "learning_rate": 2.2793504597447002e-07,
+      "loss": 0.8099,
+      "step": 292
+    },
+    {
+      "epoch": 0.9750415973377704,
+      "grad_norm": 0.7875584363937378,
+      "learning_rate": 1.8012557287367392e-07,
+      "loss": 1.0536,
+      "step": 293
+    },
+    {
+      "epoch": 0.978369384359401,
+      "grad_norm": 0.8683494329452515,
+      "learning_rate": 1.379280637546443e-07,
+      "loss": 0.8868,
+      "step": 294
+    },
+    {
+      "epoch": 0.9816971713810316,
+      "grad_norm": 0.7692094445228577,
+      "learning_rate": 1.0134727195937333e-07,
+      "loss": 0.7747,
+      "step": 295
+    },
+    {
+      "epoch": 0.9850249584026622,
+      "grad_norm": 0.7598888278007507,
+      "learning_rate": 7.038731813426291e-08,
+      "loss": 0.7562,
+      "step": 296
+    },
+    {
+      "epoch": 0.9883527454242929,
+      "grad_norm": 0.759955108165741,
+      "learning_rate": 4.5051689765929214e-08,
+      "loss": 0.775,
+      "step": 297
+    },
+    {
+      "epoch": 0.9916805324459235,
+      "grad_norm": 0.764261782169342,
+      "learning_rate": 2.534324078837802e-08,
+      "loss": 0.6648,
+      "step": 298
+    },
+    {
+      "epoch": 0.9950083194675541,
+      "grad_norm": 0.8436802625656128,
+      "learning_rate": 1.1264191261528557e-08,
+      "loss": 0.848,
+      "step": 299
+    },
+    {
+      "epoch": 0.9983361064891847,
+      "grad_norm": 0.9775305390357971,
+      "learning_rate": 2.8161271211024633e-09,
+      "loss": 0.7154,
+      "step": 300
+    },
+    {
+      "epoch": 0.9983361064891847,
+      "eval_loss": 0.49492859840393066,
+      "eval_runtime": 26.7842,
+      "eval_samples_per_second": 9.446,
+      "eval_steps_per_second": 2.389,
+      "step": 300
+    },
+    {
+      "epoch": 1.0016638935108153,
+      "grad_norm": 0.9796708226203918,
+      "learning_rate": 0.0,
+      "loss": 0.4979,
+      "step": 301
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.93702389745451e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null