Training in progress, step 440, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +782 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:622986e1420dc3e7ee7885ea8da70861d0cf96bdea007127fbd180958f052a07
 size 36981072

 version https://git-lfs.github.com/spec/v1
+oid sha256:13cce9bb0e996351713314d3f8524bbdb49c9ae98bbc5d38e88243b1df518f08
 size 36981072

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:935c98ae461b3b8ed892a66114875574ebb0e183dd718956b230f0941978eab4
 size 19859524

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb676f03323a195f09bb3fb1a56057c8e39c831ecf332633428cccdf75a1efdb
 size 19859524

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d71f7343cca5753750cd88e4abed19d43187403cccb3b5aa9f782268b78a61c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e99cd4efdc8ae360618f7c31cdaf24ebec835e26008dd443222ea8c2ad794ce3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91f6f24b84240d20fbf3f3b9ae432352426752db5e8618bd928fe6a5ad410144
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3aa6d4cd0a1d119d88746df8d17b061da99249879d9cb64d05543ac4d112a2c5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7504263786242183,
   "eval_steps": 500,
-  "global_step": 330,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2317,6 +2317,784 @@
       "learning_rate": 1.4965269896332885e-05,
       "loss": 0.6016,
       "step": 330
     }
   ],
   "logging_steps": 1,
@@ -2331,12 +3109,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.410215207043072e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0017055144968732,
   "eval_steps": 500,
+  "global_step": 440,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4965269896332885e-05,
       "loss": 0.6016,
       "step": 330
+    },
+    {
+      "epoch": 0.7527003979533826,
+      "grad_norm": 3.569581985473633,
+      "learning_rate": 1.4708553057981355e-05,
+      "loss": 0.4036,
+      "step": 331
+    },
+    {
+      "epoch": 0.7549744172825469,
+      "grad_norm": 3.5513603687286377,
+      "learning_rate": 1.4453676944196476e-05,
+      "loss": 0.3642,
+      "step": 332
+    },
+    {
+      "epoch": 0.7572484366117112,
+      "grad_norm": 3.9503798484802246,
+      "learning_rate": 1.4200654848757994e-05,
+      "loss": 0.3798,
+      "step": 333
+    },
+    {
+      "epoch": 0.7595224559408755,
+      "grad_norm": 3.2657759189605713,
+      "learning_rate": 1.3949499968744206e-05,
+      "loss": 0.3445,
+      "step": 334
+    },
+    {
+      "epoch": 0.7617964752700398,
+      "grad_norm": 1.5095345973968506,
+      "learning_rate": 1.3700225403843469e-05,
+      "loss": 0.1506,
+      "step": 335
+    },
+    {
+      "epoch": 0.7640704945992041,
+      "grad_norm": 3.1756370067596436,
+      "learning_rate": 1.3452844155671052e-05,
+      "loss": 0.3452,
+      "step": 336
+    },
+    {
+      "epoch": 0.7663445139283684,
+      "grad_norm": 2.36387038230896,
+      "learning_rate": 1.3207369127090985e-05,
+      "loss": 0.2018,
+      "step": 337
+    },
+    {
+      "epoch": 0.7686185332575327,
+      "grad_norm": 1.7574656009674072,
+      "learning_rate": 1.296381312154305e-05,
+      "loss": 0.1288,
+      "step": 338
+    },
+    {
+      "epoch": 0.770892552586697,
+      "grad_norm": 3.010063409805298,
+      "learning_rate": 1.2722188842374966e-05,
+      "loss": 0.2819,
+      "step": 339
+    },
+    {
+      "epoch": 0.7731665719158612,
+      "grad_norm": 1.5432311296463013,
+      "learning_rate": 1.2482508892179884e-05,
+      "loss": 0.0887,
+      "step": 340
+    },
+    {
+      "epoch": 0.7754405912450256,
+      "grad_norm": 2.749730110168457,
+      "learning_rate": 1.2244785772138972e-05,
+      "loss": 0.1669,
+      "step": 341
+    },
+    {
+      "epoch": 0.7777146105741899,
+      "grad_norm": 4.618091583251953,
+      "learning_rate": 1.2009031881369431e-05,
+      "loss": 0.227,
+      "step": 342
+    },
+    {
+      "epoch": 0.7799886299033542,
+      "grad_norm": 1.5743058919906616,
+      "learning_rate": 1.177525951627781e-05,
+      "loss": 0.1983,
+      "step": 343
+    },
+    {
+      "epoch": 0.7822626492325184,
+      "grad_norm": 1.480060338973999,
+      "learning_rate": 1.1543480869918555e-05,
+      "loss": 0.0891,
+      "step": 344
+    },
+    {
+      "epoch": 0.7845366685616828,
+      "grad_norm": 1.6388338804244995,
+      "learning_rate": 1.1313708031358183e-05,
+      "loss": 0.0913,
+      "step": 345
+    },
+    {
+      "epoch": 0.7868106878908471,
+      "grad_norm": 0.9587397575378418,
+      "learning_rate": 1.1085952985044634e-05,
+      "loss": 0.041,
+      "step": 346
+    },
+    {
+      "epoch": 0.7890847072200113,
+      "grad_norm": 1.376420497894287,
+      "learning_rate": 1.0860227610182222e-05,
+      "loss": 0.071,
+      "step": 347
+    },
+    {
+      "epoch": 0.7913587265491757,
+      "grad_norm": 1.479590892791748,
+      "learning_rate": 1.0636543680112044e-05,
+      "loss": 0.0905,
+      "step": 348
+    },
+    {
+      "epoch": 0.79363274587834,
+      "grad_norm": 1.486275553703308,
+      "learning_rate": 1.04149128616979e-05,
+      "loss": 0.1012,
+      "step": 349
+    },
+    {
+      "epoch": 0.7959067652075043,
+      "grad_norm": 0.9402182698249817,
+      "learning_rate": 1.0195346714717813e-05,
+      "loss": 0.1013,
+      "step": 350
+    },
+    {
+      "epoch": 0.7981807845366685,
+      "grad_norm": 4.111584186553955,
+      "learning_rate": 9.977856691261057e-06,
+      "loss": 0.6617,
+      "step": 351
+    },
+    {
+      "epoch": 0.8004548038658329,
+      "grad_norm": 4.183617115020752,
+      "learning_rate": 9.762454135130828e-06,
+      "loss": 0.5664,
+      "step": 352
+    },
+    {
+      "epoch": 0.8027288231949972,
+      "grad_norm": 3.7775447368621826,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.4659,
+      "step": 353
+    },
+    {
+      "epoch": 0.8050028425241614,
+      "grad_norm": 3.4957637786865234,
+      "learning_rate": 9.337956255088237e-06,
+      "loss": 0.344,
+      "step": 354
+    },
+    {
+      "epoch": 0.8072768618533257,
+      "grad_norm": 3.0935065746307373,
+      "learning_rate": 9.12888307205541e-06,
+      "loss": 0.3546,
+      "step": 355
+    },
+    {
+      "epoch": 0.8095508811824901,
+      "grad_norm": 4.095314025878906,
+      "learning_rate": 8.921941636953435e-06,
+      "loss": 0.4984,
+      "step": 356
+    },
+    {
+      "epoch": 0.8118249005116543,
+      "grad_norm": 3.417351007461548,
+      "learning_rate": 8.717142743394236e-06,
+      "loss": 0.3108,
+      "step": 357
+    },
+    {
+      "epoch": 0.8140989198408186,
+      "grad_norm": 2.5139520168304443,
+      "learning_rate": 8.514497073239491e-06,
+      "loss": 0.3556,
+      "step": 358
+    },
+    {
+      "epoch": 0.816372939169983,
+      "grad_norm": 4.973466873168945,
+      "learning_rate": 8.3140151960435e-06,
+      "loss": 0.5424,
+      "step": 359
+    },
+    {
+      "epoch": 0.8186469584991473,
+      "grad_norm": 4.196943759918213,
+      "learning_rate": 8.115707568501768e-06,
+      "loss": 0.4552,
+      "step": 360
+    },
+    {
+      "epoch": 0.8209209778283115,
+      "grad_norm": 4.142265796661377,
+      "learning_rate": 7.919584533905777e-06,
+      "loss": 0.3666,
+      "step": 361
+    },
+    {
+      "epoch": 0.8231949971574758,
+      "grad_norm": 3.281536102294922,
+      "learning_rate": 7.725656321603413e-06,
+      "loss": 0.3572,
+      "step": 362
+    },
+    {
+      "epoch": 0.8254690164866402,
+      "grad_norm": 5.057455539703369,
+      "learning_rate": 7.533933046465419e-06,
+      "loss": 0.5856,
+      "step": 363
+    },
+    {
+      "epoch": 0.8277430358158044,
+      "grad_norm": 5.18012809753418,
+      "learning_rate": 7.344424708357867e-06,
+      "loss": 0.4198,
+      "step": 364
+    },
+    {
+      "epoch": 0.8300170551449687,
+      "grad_norm": 2.427621841430664,
+      "learning_rate": 7.157141191620548e-06,
+      "loss": 0.1815,
+      "step": 365
+    },
+    {
+      "epoch": 0.832291074474133,
+      "grad_norm": 0.5157700777053833,
+      "learning_rate": 6.972092264551438e-06,
+      "loss": 0.0464,
+      "step": 366
+    },
+    {
+      "epoch": 0.8345650938032974,
+      "grad_norm": 0.9035636782646179,
+      "learning_rate": 6.789287578897252e-06,
+      "loss": 0.0749,
+      "step": 367
+    },
+    {
+      "epoch": 0.8368391131324616,
+      "grad_norm": 0.434658020734787,
+      "learning_rate": 6.6087366693499295e-06,
+      "loss": 0.0233,
+      "step": 368
+    },
+    {
+      "epoch": 0.8391131324616259,
+      "grad_norm": 0.6567199230194092,
+      "learning_rate": 6.430448953049434e-06,
+      "loss": 0.017,
+      "step": 369
+    },
+    {
+      "epoch": 0.8413871517907903,
+      "grad_norm": 0.537125289440155,
+      "learning_rate": 6.2544337290925185e-06,
+      "loss": 0.0145,
+      "step": 370
+    },
+    {
+      "epoch": 0.8436611711199545,
+      "grad_norm": 0.5989580750465393,
+      "learning_rate": 6.080700178047688e-06,
+      "loss": 0.0196,
+      "step": 371
+    },
+    {
+      "epoch": 0.8459351904491188,
+      "grad_norm": 0.7875169515609741,
+      "learning_rate": 5.909257361476405e-06,
+      "loss": 0.0282,
+      "step": 372
+    },
+    {
+      "epoch": 0.8482092097782831,
+      "grad_norm": 0.6391351819038391,
+      "learning_rate": 5.740114221460424e-06,
+      "loss": 0.0167,
+      "step": 373
+    },
+    {
+      "epoch": 0.8504832291074474,
+      "grad_norm": 0.4413357377052307,
+      "learning_rate": 5.573279580135438e-06,
+      "loss": 0.0425,
+      "step": 374
+    },
+    {
+      "epoch": 0.8527572484366117,
+      "grad_norm": 0.4121001958847046,
+      "learning_rate": 5.408762139230888e-06,
+      "loss": 0.0165,
+      "step": 375
+    },
+    {
+      "epoch": 0.855031267765776,
+      "grad_norm": 0.6832776665687561,
+      "learning_rate": 5.246570479616103e-06,
+      "loss": 0.0272,
+      "step": 376
+    },
+    {
+      "epoch": 0.8573052870949404,
+      "grad_norm": 3.090912342071533,
+      "learning_rate": 5.086713060852788e-06,
+      "loss": 0.176,
+      "step": 377
+    },
+    {
+      "epoch": 0.8595793064241046,
+      "grad_norm": 2.9185822010040283,
+      "learning_rate": 4.929198220753722e-06,
+      "loss": 0.2692,
+      "step": 378
+    },
+    {
+      "epoch": 0.8618533257532689,
+      "grad_norm": 5.094744682312012,
+      "learning_rate": 4.774034174947922e-06,
+      "loss": 0.4768,
+      "step": 379
+    },
+    {
+      "epoch": 0.8641273450824332,
+      "grad_norm": 4.020772933959961,
+      "learning_rate": 4.621229016452156e-06,
+      "loss": 0.332,
+      "step": 380
+    },
+    {
+      "epoch": 0.8664013644115975,
+      "grad_norm": 4.474671840667725,
+      "learning_rate": 4.4707907152487405e-06,
+      "loss": 0.3265,
+      "step": 381
+    },
+    {
+      "epoch": 0.8686753837407618,
+      "grad_norm": 3.18890643119812,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.2424,
+      "step": 382
+    },
+    {
+      "epoch": 0.8709494030699261,
+      "grad_norm": 4.199413299560547,
+      "learning_rate": 4.1770459469887005e-06,
+      "loss": 0.2787,
+      "step": 383
+    },
+    {
+      "epoch": 0.8732234223990903,
+      "grad_norm": 2.4939513206481934,
+      "learning_rate": 4.033754801015732e-06,
+      "loss": 0.1588,
+      "step": 384
+    },
+    {
+      "epoch": 0.8754974417282547,
+      "grad_norm": 3.3816030025482178,
+      "learning_rate": 3.892861153703342e-06,
+      "loss": 0.2011,
+      "step": 385
+    },
+    {
+      "epoch": 0.877771461057419,
+      "grad_norm": 2.142763376235962,
+      "learning_rate": 3.7543723537555585e-06,
+      "loss": 0.1491,
+      "step": 386
+    },
+    {
+      "epoch": 0.8800454803865833,
+      "grad_norm": 2.4426450729370117,
+      "learning_rate": 3.6182956244448117e-06,
+      "loss": 0.1654,
+      "step": 387
+    },
+    {
+      "epoch": 0.8823194997157476,
+      "grad_norm": 1.4727312326431274,
+      "learning_rate": 3.4846380632352458e-06,
+      "loss": 0.0619,
+      "step": 388
+    },
+    {
+      "epoch": 0.8845935190449119,
+      "grad_norm": 1.7243990898132324,
+      "learning_rate": 3.35340664141246e-06,
+      "loss": 0.0671,
+      "step": 389
+    },
+    {
+      "epoch": 0.8868675383740762,
+      "grad_norm": 1.6698848009109497,
+      "learning_rate": 3.2246082037199532e-06,
+      "loss": 0.0581,
+      "step": 390
+    },
+    {
+      "epoch": 0.8891415577032404,
+      "grad_norm": 2.760230302810669,
+      "learning_rate": 3.0982494680021177e-06,
+      "loss": 0.1919,
+      "step": 391
+    },
+    {
+      "epoch": 0.8914155770324048,
+      "grad_norm": 4.160801410675049,
+      "learning_rate": 2.9743370248538017e-06,
+      "loss": 0.3573,
+      "step": 392
+    },
+    {
+      "epoch": 0.8936895963615691,
+      "grad_norm": 1.2745391130447388,
+      "learning_rate": 2.8528773372766216e-06,
+      "loss": 0.0565,
+      "step": 393
+    },
+    {
+      "epoch": 0.8959636156907334,
+      "grad_norm": 1.1233683824539185,
+      "learning_rate": 2.7338767403418287e-06,
+      "loss": 0.0371,
+      "step": 394
+    },
+    {
+      "epoch": 0.8982376350198976,
+      "grad_norm": 1.4613378047943115,
+      "learning_rate": 2.6173414408598827e-06,
+      "loss": 0.0638,
+      "step": 395
+    },
+    {
+      "epoch": 0.900511654349062,
+      "grad_norm": 1.0064411163330078,
+      "learning_rate": 2.503277517056729e-06,
+      "loss": 0.0226,
+      "step": 396
+    },
+    {
+      "epoch": 0.9027856736782263,
+      "grad_norm": 1.1050655841827393,
+      "learning_rate": 2.3916909182567782e-06,
+      "loss": 0.0482,
+      "step": 397
+    },
+    {
+      "epoch": 0.9050596930073905,
+      "grad_norm": 1.100659966468811,
+      "learning_rate": 2.282587464572594e-06,
+      "loss": 0.0531,
+      "step": 398
+    },
+    {
+      "epoch": 0.9073337123365549,
+      "grad_norm": 0.8102996945381165,
+      "learning_rate": 2.175972846601343e-06,
+      "loss": 0.0399,
+      "step": 399
+    },
+    {
+      "epoch": 0.9096077316657192,
+      "grad_norm": 2.706613779067993,
+      "learning_rate": 2.0718526251279346e-06,
+      "loss": 0.2256,
+      "step": 400
+    },
+    {
+      "epoch": 0.9118817509948834,
+      "grad_norm": 4.959632396697998,
+      "learning_rate": 1.9702322308350674e-06,
+      "loss": 0.6795,
+      "step": 401
+    },
+    {
+      "epoch": 0.9141557703240477,
+      "grad_norm": 4.623620510101318,
+      "learning_rate": 1.8711169640198977e-06,
+      "loss": 0.3954,
+      "step": 402
+    },
+    {
+      "epoch": 0.9164297896532121,
+      "grad_norm": 3.527909517288208,
+      "learning_rate": 1.774511994317629e-06,
+      "loss": 0.4934,
+      "step": 403
+    },
+    {
+      "epoch": 0.9187038089823764,
+      "grad_norm": 4.126560688018799,
+      "learning_rate": 1.6804223604318825e-06,
+      "loss": 0.2839,
+      "step": 404
+    },
+    {
+      "epoch": 0.9209778283115406,
+      "grad_norm": 2.926298141479492,
+      "learning_rate": 1.5888529698718346e-06,
+      "loss": 0.2572,
+      "step": 405
+    },
+    {
+      "epoch": 0.9232518476407049,
+      "grad_norm": 4.207761764526367,
+      "learning_rate": 1.4998085986963283e-06,
+      "loss": 0.4188,
+      "step": 406
+    },
+    {
+      "epoch": 0.9255258669698693,
+      "grad_norm": 4.310345649719238,
+      "learning_rate": 1.413293891264722e-06,
+      "loss": 0.3598,
+      "step": 407
+    },
+    {
+      "epoch": 0.9277998862990335,
+      "grad_norm": 2.979948043823242,
+      "learning_rate": 1.3293133599946329e-06,
+      "loss": 0.2512,
+      "step": 408
+    },
+    {
+      "epoch": 0.9300739056281978,
+      "grad_norm": 2.9833507537841797,
+      "learning_rate": 1.2478713851266088e-06,
+      "loss": 0.305,
+      "step": 409
+    },
+    {
+      "epoch": 0.9323479249573622,
+      "grad_norm": 3.2704060077667236,
+      "learning_rate": 1.1689722144956671e-06,
+      "loss": 0.431,
+      "step": 410
+    },
+    {
+      "epoch": 0.9346219442865265,
+      "grad_norm": 3.7926714420318604,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.3448,
+      "step": 411
+    },
+    {
+      "epoch": 0.9368959636156907,
+      "grad_norm": 3.6115946769714355,
+      "learning_rate": 1.0188186139349354e-06,
+      "loss": 0.4043,
+      "step": 412
+    },
+    {
+      "epoch": 0.939169982944855,
+      "grad_norm": 1.762302041053772,
+      "learning_rate": 9.475720156880419e-07,
+      "loss": 0.099,
+      "step": 413
+    },
+    {
+      "epoch": 0.9414440022740194,
+      "grad_norm": 0.5029296278953552,
+      "learning_rate": 8.788838846355341e-07,
+      "loss": 0.0218,
+      "step": 414
+    },
+    {
+      "epoch": 0.9437180216031836,
+      "grad_norm": 0.4769535958766937,
+      "learning_rate": 8.127578033998662e-07,
+      "loss": 0.0152,
+      "step": 415
+    },
+    {
+      "epoch": 0.9459920409323479,
+      "grad_norm": 0.8436768651008606,
+      "learning_rate": 7.491972209725806e-07,
+      "loss": 0.0319,
+      "step": 416
+    },
+    {
+      "epoch": 0.9482660602615123,
+      "grad_norm": 0.6843982934951782,
+      "learning_rate": 6.88205452534435e-07,
+      "loss": 0.038,
+      "step": 417
+    },
+    {
+      "epoch": 0.9505400795906765,
+      "grad_norm": 0.6065624356269836,
+      "learning_rate": 6.297856792824741e-07,
+      "loss": 0.0266,
+      "step": 418
+    },
+    {
+      "epoch": 0.9528140989198408,
+      "grad_norm": 1.784542202949524,
+      "learning_rate": 5.739409482640956e-07,
+      "loss": 0.041,
+      "step": 419
+    },
+    {
+      "epoch": 0.9550881182490051,
+      "grad_norm": 0.4885413646697998,
+      "learning_rate": 5.206741722181386e-07,
+      "loss": 0.0198,
+      "step": 420
+    },
+    {
+      "epoch": 0.9573621375781695,
+      "grad_norm": 0.47082552313804626,
+      "learning_rate": 4.699881294229602e-07,
+      "loss": 0.0176,
+      "step": 421
+    },
+    {
+      "epoch": 0.9596361569073337,
+      "grad_norm": 0.7649410963058472,
+      "learning_rate": 4.2188546355153013e-07,
+      "loss": 0.0433,
+      "step": 422
+    },
+    {
+      "epoch": 0.961910176236498,
+      "grad_norm": 2.893561840057373,
+      "learning_rate": 3.763686835335345e-07,
+      "loss": 0.2227,
+      "step": 423
+    },
+    {
+      "epoch": 0.9641841955656623,
+      "grad_norm": 2.788997173309326,
+      "learning_rate": 3.334401634245032e-07,
+      "loss": 0.226,
+      "step": 424
+    },
+    {
+      "epoch": 0.9664582148948266,
+      "grad_norm": 5.2662739753723145,
+      "learning_rate": 2.9310214228202013e-07,
+      "loss": 0.5212,
+      "step": 425
+    },
+    {
+      "epoch": 0.9687322342239909,
+      "grad_norm": 2.4451019763946533,
+      "learning_rate": 2.553567240489052e-07,
+      "loss": 0.235,
+      "step": 426
+    },
+    {
+      "epoch": 0.9710062535531552,
+      "grad_norm": 3.208662748336792,
+      "learning_rate": 2.202058774434912e-07,
+      "loss": 0.2381,
+      "step": 427
+    },
+    {
+      "epoch": 0.9732802728823196,
+      "grad_norm": 3.0693438053131104,
+      "learning_rate": 1.8765143585693922e-07,
+      "loss": 0.2596,
+      "step": 428
+    },
+    {
+      "epoch": 0.9755542922114838,
+      "grad_norm": 4.01469612121582,
+      "learning_rate": 1.5769509725760966e-07,
+      "loss": 0.2541,
+      "step": 429
+    },
+    {
+      "epoch": 0.9778283115406481,
+      "grad_norm": 2.0251407623291016,
+      "learning_rate": 1.3033842410251075e-07,
+      "loss": 0.1398,
+      "step": 430
+    },
+    {
+      "epoch": 0.9801023308698124,
+      "grad_norm": 1.4239128828048706,
+      "learning_rate": 1.0558284325578038e-07,
+      "loss": 0.0793,
+      "step": 431
+    },
+    {
+      "epoch": 0.9823763501989767,
+      "grad_norm": 1.6692121028900146,
+      "learning_rate": 8.342964591430136e-08,
+      "loss": 0.1298,
+      "step": 432
+    },
+    {
+      "epoch": 0.984650369528141,
+      "grad_norm": 1.8359993696212769,
+      "learning_rate": 6.38799875403051e-08,
+      "loss": 0.0778,
+      "step": 433
+    },
+    {
+      "epoch": 0.9869243888573053,
+      "grad_norm": 1.6211203336715698,
+      "learning_rate": 4.6934887801164396e-08,
+      "loss": 0.148,
+      "step": 434
+    },
+    {
+      "epoch": 0.9891984081864695,
+      "grad_norm": 0.7776852250099182,
+      "learning_rate": 3.259523051615254e-08,
+      "loss": 0.0437,
+      "step": 435
+    },
+    {
+      "epoch": 0.9914724275156339,
+      "grad_norm": 0.6490265727043152,
+      "learning_rate": 2.086176361038583e-08,
+      "loss": 0.0175,
+      "step": 436
+    },
+    {
+      "epoch": 0.9937464468447982,
+      "grad_norm": 1.8342341184616089,
+      "learning_rate": 1.173509907579362e-08,
+      "loss": 0.0702,
+      "step": 437
+    },
+    {
+      "epoch": 0.9960204661739624,
+      "grad_norm": 1.4983391761779785,
+      "learning_rate": 5.215712939210526e-09,
+      "loss": 0.0609,
+      "step": 438
+    },
+    {
+      "epoch": 0.9982944855031268,
+      "grad_norm": 0.8002070784568787,
+      "learning_rate": 1.3039452375351868e-09,
+      "loss": 0.0323,
+      "step": 439
+    },
+    {
+      "epoch": 0.9982944855031268,
+      "eval_loss": 0.180589497089386,
+      "eval_runtime": 13.3573,
+      "eval_samples_per_second": 27.775,
+      "eval_steps_per_second": 6.962,
+      "step": 439
+    },
+    {
+      "epoch": 1.0017055144968732,
+      "grad_norm": 3.280466079711914,
+      "learning_rate": 0.0,
+      "loss": 0.4975,
+      "step": 440
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.712732842491904e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null