Training in progress, step 570, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +991 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d796c5d280566587a8ef2b2ab38733ede8d9e5b67c2e12612a3cec8eb5a83f3d
 size 81576

 version https://git-lfs.github.com/spec/v1
+oid sha256:51e98f67765a2eb2368c2f2e215e5056e6877b63a99d2c749fa289541026b705
 size 81576

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48f1265ac635ad13e4f6c1a2becc081b202cbe5b61e836c335c4bdf0be18801c
 size 173094

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cfe11ad55a96cad8bdac1448ebc4e55b90395474681435aab23be456282687f
 size 173094

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a286108953ad03a08dcce39ded4bd4d34d44de383cc996117af0f2b57e23a641
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2b1b4d4fc5af089dfffe964a368b7be036a5a5e60d94d3627c20a7cd7efe1e0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20b6399ed1f4a4e925c86dd58b4b559fa96fc0370b46ab280a68ad58e4333e47
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3efe9a7dcecfc294170eaf9d37d88929a06a263232a2f5ff76c24580b1ffbbf
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7526315789473684,
   "eval_steps": 143,
-  "global_step": 429,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3042,6 +3042,993 @@
       "eval_samples_per_second": 317.257,
       "eval_steps_per_second": 158.629,
       "step": 429
     }
   ],
   "logging_steps": 1,
@@ -3056,12 +4043,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 42819438772224.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 143,
+  "global_step": 570,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 317.257,
       "eval_steps_per_second": 158.629,
       "step": 429
+    },
+    {
+      "epoch": 0.7543859649122807,
+      "grad_norm": 0.29503875970840454,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 10.691,
+      "step": 430
+    },
+    {
+      "epoch": 0.756140350877193,
+      "grad_norm": 0.2280651479959488,
+      "learning_rate": 2.8893750684111975e-05,
+      "loss": 10.7043,
+      "step": 431
+    },
+    {
+      "epoch": 0.7578947368421053,
+      "grad_norm": 0.2564852237701416,
+      "learning_rate": 2.850041733353247e-05,
+      "loss": 10.6683,
+      "step": 432
+    },
+    {
+      "epoch": 0.7596491228070176,
+      "grad_norm": 0.19316798448562622,
+      "learning_rate": 2.8109334208542993e-05,
+      "loss": 10.6773,
+      "step": 433
+    },
+    {
+      "epoch": 0.7614035087719299,
+      "grad_norm": 0.18743948638439178,
+      "learning_rate": 2.7720513617260856e-05,
+      "loss": 10.6676,
+      "step": 434
+    },
+    {
+      "epoch": 0.7631578947368421,
+      "grad_norm": 0.21178874373435974,
+      "learning_rate": 2.7333967796597315e-05,
+      "loss": 10.7064,
+      "step": 435
+    },
+    {
+      "epoch": 0.7649122807017544,
+      "grad_norm": 0.19119100272655487,
+      "learning_rate": 2.694970891187225e-05,
+      "loss": 10.6512,
+      "step": 436
+    },
+    {
+      "epoch": 0.7666666666666667,
+      "grad_norm": 0.25586599111557007,
+      "learning_rate": 2.6567749056431467e-05,
+      "loss": 10.6905,
+      "step": 437
+    },
+    {
+      "epoch": 0.7684210526315789,
+      "grad_norm": 0.17666222155094147,
+      "learning_rate": 2.6188100251265945e-05,
+      "loss": 10.6544,
+      "step": 438
+    },
+    {
+      "epoch": 0.7701754385964912,
+      "grad_norm": 0.1568339616060257,
+      "learning_rate": 2.5810774444633644e-05,
+      "loss": 10.6804,
+      "step": 439
+    },
+    {
+      "epoch": 0.7719298245614035,
+      "grad_norm": 0.2483753114938736,
+      "learning_rate": 2.5435783511683443e-05,
+      "loss": 10.6641,
+      "step": 440
+    },
+    {
+      "epoch": 0.7736842105263158,
+      "grad_norm": 0.22110183537006378,
+      "learning_rate": 2.506313925408127e-05,
+      "loss": 10.6885,
+      "step": 441
+    },
+    {
+      "epoch": 0.775438596491228,
+      "grad_norm": 0.1530926376581192,
+      "learning_rate": 2.4692853399638917e-05,
+      "loss": 10.6533,
+      "step": 442
+    },
+    {
+      "epoch": 0.7771929824561403,
+      "grad_norm": 0.23491761088371277,
+      "learning_rate": 2.4324937601944685e-05,
+      "loss": 10.6488,
+      "step": 443
+    },
+    {
+      "epoch": 0.7789473684210526,
+      "grad_norm": 0.21756233274936676,
+      "learning_rate": 2.3959403439996907e-05,
+      "loss": 10.6742,
+      "step": 444
+    },
+    {
+      "epoch": 0.7807017543859649,
+      "grad_norm": 0.18125437200069427,
+      "learning_rate": 2.3596262417839255e-05,
+      "loss": 10.6726,
+      "step": 445
+    },
+    {
+      "epoch": 0.7824561403508772,
+      "grad_norm": 0.23887260258197784,
+      "learning_rate": 2.323552596419889e-05,
+      "loss": 10.6596,
+      "step": 446
+    },
+    {
+      "epoch": 0.7842105263157895,
+      "grad_norm": 0.18337543308734894,
+      "learning_rate": 2.2877205432126657e-05,
+      "loss": 10.6511,
+      "step": 447
+    },
+    {
+      "epoch": 0.7859649122807018,
+      "grad_norm": 0.2415776550769806,
+      "learning_rate": 2.2521312098639914e-05,
+      "loss": 10.6739,
+      "step": 448
+    },
+    {
+      "epoch": 0.787719298245614,
+      "grad_norm": 0.18577896058559418,
+      "learning_rate": 2.2167857164367422e-05,
+      "loss": 10.6954,
+      "step": 449
+    },
+    {
+      "epoch": 0.7894736842105263,
+      "grad_norm": 0.2739926278591156,
+      "learning_rate": 2.181685175319702e-05,
+      "loss": 10.6477,
+      "step": 450
+    },
+    {
+      "epoch": 0.7912280701754386,
+      "grad_norm": 0.2336394488811493,
+      "learning_rate": 2.146830691192553e-05,
+      "loss": 10.6947,
+      "step": 451
+    },
+    {
+      "epoch": 0.7929824561403509,
+      "grad_norm": 0.22242297232151031,
+      "learning_rate": 2.11222336099109e-05,
+      "loss": 10.6602,
+      "step": 452
+    },
+    {
+      "epoch": 0.7947368421052632,
+      "grad_norm": 0.18758748471736908,
+      "learning_rate": 2.0778642738727272e-05,
+      "loss": 10.6843,
+      "step": 453
+    },
+    {
+      "epoch": 0.7964912280701755,
+      "grad_norm": 0.1851445883512497,
+      "learning_rate": 2.043754511182191e-05,
+      "loss": 10.6776,
+      "step": 454
+    },
+    {
+      "epoch": 0.7982456140350878,
+      "grad_norm": 0.1795864701271057,
+      "learning_rate": 2.009895146417512e-05,
+      "loss": 10.6947,
+      "step": 455
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.21275727450847626,
+      "learning_rate": 1.9762872451962212e-05,
+      "loss": 10.69,
+      "step": 456
+    },
+    {
+      "epoch": 0.8017543859649123,
+      "grad_norm": 0.1958458423614502,
+      "learning_rate": 1.9429318652218276e-05,
+      "loss": 10.6889,
+      "step": 457
+    },
+    {
+      "epoch": 0.8035087719298246,
+      "grad_norm": 0.3333258032798767,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 10.652,
+      "step": 458
+    },
+    {
+      "epoch": 0.8052631578947368,
+      "grad_norm": 0.22521115839481354,
+      "learning_rate": 1.8769828600581496e-05,
+      "loss": 10.6767,
+      "step": 459
+    },
+    {
+      "epoch": 0.8070175438596491,
+      "grad_norm": 0.22420458495616913,
+      "learning_rate": 1.8443913104073983e-05,
+      "loss": 10.6827,
+      "step": 460
+    },
+    {
+      "epoch": 0.8087719298245614,
+      "grad_norm": 0.28353458642959595,
+      "learning_rate": 1.8120564330152923e-05,
+      "loss": 10.6371,
+      "step": 461
+    },
+    {
+      "epoch": 0.8105263157894737,
+      "grad_norm": 0.27955901622772217,
+      "learning_rate": 1.7799792455209018e-05,
+      "loss": 10.6391,
+      "step": 462
+    },
+    {
+      "epoch": 0.8122807017543859,
+      "grad_norm": 0.24275584518909454,
+      "learning_rate": 1.7481607574533044e-05,
+      "loss": 10.669,
+      "step": 463
+    },
+    {
+      "epoch": 0.8140350877192982,
+      "grad_norm": 0.21551206707954407,
+      "learning_rate": 1.716601970199836e-05,
+      "loss": 10.6878,
+      "step": 464
+    },
+    {
+      "epoch": 0.8157894736842105,
+      "grad_norm": 0.20878919959068298,
+      "learning_rate": 1.6853038769745467e-05,
+      "loss": 10.6937,
+      "step": 465
+    },
+    {
+      "epoch": 0.8175438596491228,
+      "grad_norm": 0.21660566329956055,
+      "learning_rate": 1.6542674627869737e-05,
+      "loss": 10.6919,
+      "step": 466
+    },
+    {
+      "epoch": 0.8192982456140351,
+      "grad_norm": 0.2458791881799698,
+      "learning_rate": 1.6234937044111152e-05,
+      "loss": 10.6738,
+      "step": 467
+    },
+    {
+      "epoch": 0.8210526315789474,
+      "grad_norm": 0.22026145458221436,
+      "learning_rate": 1.5929835703546993e-05,
+      "loss": 10.6797,
+      "step": 468
+    },
+    {
+      "epoch": 0.8228070175438597,
+      "grad_norm": 0.21497632563114166,
+      "learning_rate": 1.5627380208287114e-05,
+      "loss": 10.6807,
+      "step": 469
+    },
+    {
+      "epoch": 0.8245614035087719,
+      "grad_norm": 0.24622896313667297,
+      "learning_rate": 1.5327580077171587e-05,
+      "loss": 10.6612,
+      "step": 470
+    },
+    {
+      "epoch": 0.8263157894736842,
+      "grad_norm": 0.2639427185058594,
+      "learning_rate": 1.5030444745471294e-05,
+      "loss": 10.6531,
+      "step": 471
+    },
+    {
+      "epoch": 0.8280701754385965,
+      "grad_norm": 0.21809430420398712,
+      "learning_rate": 1.4735983564590783e-05,
+      "loss": 10.6821,
+      "step": 472
+    },
+    {
+      "epoch": 0.8298245614035088,
+      "grad_norm": 0.2837026119232178,
+      "learning_rate": 1.4444205801774202e-05,
+      "loss": 10.6852,
+      "step": 473
+    },
+    {
+      "epoch": 0.8315789473684211,
+      "grad_norm": 0.2315063625574112,
+      "learning_rate": 1.415512063981339e-05,
+      "loss": 10.6723,
+      "step": 474
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.2142920196056366,
+      "learning_rate": 1.3868737176759106e-05,
+      "loss": 10.6347,
+      "step": 475
+    },
+    {
+      "epoch": 0.8350877192982457,
+      "grad_norm": 0.22398516535758972,
+      "learning_rate": 1.3585064425634542e-05,
+      "loss": 10.6954,
+      "step": 476
+    },
+    {
+      "epoch": 0.8368421052631579,
+      "grad_norm": 0.18427734076976776,
+      "learning_rate": 1.330411131415169e-05,
+      "loss": 10.6796,
+      "step": 477
+    },
+    {
+      "epoch": 0.8385964912280702,
+      "grad_norm": 0.20222590863704681,
+      "learning_rate": 1.3025886684430467e-05,
+      "loss": 10.6647,
+      "step": 478
+    },
+    {
+      "epoch": 0.8403508771929824,
+      "grad_norm": 0.19547833502292633,
+      "learning_rate": 1.2750399292720283e-05,
+      "loss": 10.6801,
+      "step": 479
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.23069126904010773,
+      "learning_rate": 1.2477657809124631e-05,
+      "loss": 10.6665,
+      "step": 480
+    },
+    {
+      "epoch": 0.843859649122807,
+      "grad_norm": 0.17966169118881226,
+      "learning_rate": 1.2207670817328066e-05,
+      "loss": 10.6755,
+      "step": 481
+    },
+    {
+      "epoch": 0.8456140350877193,
+      "grad_norm": 0.2698460817337036,
+      "learning_rate": 1.19404468143262e-05,
+      "loss": 10.6617,
+      "step": 482
+    },
+    {
+      "epoch": 0.8473684210526315,
+      "grad_norm": 0.1589714139699936,
+      "learning_rate": 1.1675994210158181e-05,
+      "loss": 10.6625,
+      "step": 483
+    },
+    {
+      "epoch": 0.8491228070175438,
+      "grad_norm": 0.23473794758319855,
+      "learning_rate": 1.141432132764202e-05,
+      "loss": 10.6698,
+      "step": 484
+    },
+    {
+      "epoch": 0.8508771929824561,
+      "grad_norm": 0.25006183981895447,
+      "learning_rate": 1.1155436402112785e-05,
+      "loss": 10.6668,
+      "step": 485
+    },
+    {
+      "epoch": 0.8526315789473684,
+      "grad_norm": 0.200173020362854,
+      "learning_rate": 1.0899347581163221e-05,
+      "loss": 10.6611,
+      "step": 486
+    },
+    {
+      "epoch": 0.8543859649122807,
+      "grad_norm": 0.18156000971794128,
+      "learning_rate": 1.0646062924387512e-05,
+      "loss": 10.6593,
+      "step": 487
+    },
+    {
+      "epoch": 0.856140350877193,
+      "grad_norm": 0.2163092941045761,
+      "learning_rate": 1.0395590403127486e-05,
+      "loss": 10.6704,
+      "step": 488
+    },
+    {
+      "epoch": 0.8578947368421053,
+      "grad_norm": 0.29484280943870544,
+      "learning_rate": 1.0147937900221883e-05,
+      "loss": 10.6809,
+      "step": 489
+    },
+    {
+      "epoch": 0.8596491228070176,
+      "grad_norm": 0.2818056046962738,
+      "learning_rate": 9.903113209758096e-06,
+      "loss": 10.6291,
+      "step": 490
+    },
+    {
+      "epoch": 0.8614035087719298,
+      "grad_norm": 0.19995582103729248,
+      "learning_rate": 9.661124036827063e-06,
+      "loss": 10.6645,
+      "step": 491
+    },
+    {
+      "epoch": 0.8631578947368421,
+      "grad_norm": 0.25295835733413696,
+      "learning_rate": 9.421977997280596e-06,
+      "loss": 10.6527,
+      "step": 492
+    },
+    {
+      "epoch": 0.8649122807017544,
+      "grad_norm": 0.1981017291545868,
+      "learning_rate": 9.185682617491863e-06,
+      "loss": 10.6562,
+      "step": 493
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 0.16144883632659912,
+      "learning_rate": 8.952245334118414e-06,
+      "loss": 10.6626,
+      "step": 494
+    },
+    {
+      "epoch": 0.868421052631579,
+      "grad_norm": 0.23267914354801178,
+      "learning_rate": 8.72167349386811e-06,
+      "loss": 10.6941,
+      "step": 495
+    },
+    {
+      "epoch": 0.8701754385964913,
+      "grad_norm": 0.20202142000198364,
+      "learning_rate": 8.493974353268019e-06,
+      "loss": 10.6771,
+      "step": 496
+    },
+    {
+      "epoch": 0.8719298245614036,
+      "grad_norm": 0.34231188893318176,
+      "learning_rate": 8.269155078435931e-06,
+      "loss": 10.7252,
+      "step": 497
+    },
+    {
+      "epoch": 0.8736842105263158,
+      "grad_norm": 0.17752091586589813,
+      "learning_rate": 8.047222744854943e-06,
+      "loss": 10.6724,
+      "step": 498
+    },
+    {
+      "epoch": 0.875438596491228,
+      "grad_norm": 0.26747509837150574,
+      "learning_rate": 7.828184337150613e-06,
+      "loss": 10.7056,
+      "step": 499
+    },
+    {
+      "epoch": 0.8771929824561403,
+      "grad_norm": 0.26659560203552246,
+      "learning_rate": 7.612046748871327e-06,
+      "loss": 10.7047,
+      "step": 500
+    },
+    {
+      "epoch": 0.8789473684210526,
+      "grad_norm": 0.19917453825473785,
+      "learning_rate": 7.398816782271223e-06,
+      "loss": 10.704,
+      "step": 501
+    },
+    {
+      "epoch": 0.8807017543859649,
+      "grad_norm": 0.23498232662677765,
+      "learning_rate": 7.1885011480961164e-06,
+      "loss": 10.6675,
+      "step": 502
+    },
+    {
+      "epoch": 0.8824561403508772,
+      "grad_norm": 0.21307386457920074,
+      "learning_rate": 6.981106465372389e-06,
+      "loss": 10.6562,
+      "step": 503
+    },
+    {
+      "epoch": 0.8842105263157894,
+      "grad_norm": 0.3048202097415924,
+      "learning_rate": 6.776639261198581e-06,
+      "loss": 10.6504,
+      "step": 504
+    },
+    {
+      "epoch": 0.8859649122807017,
+      "grad_norm": 0.221920445561409,
+      "learning_rate": 6.5751059705400295e-06,
+      "loss": 10.6859,
+      "step": 505
+    },
+    {
+      "epoch": 0.887719298245614,
+      "grad_norm": 0.19181109964847565,
+      "learning_rate": 6.37651293602628e-06,
+      "loss": 10.6674,
+      "step": 506
+    },
+    {
+      "epoch": 0.8894736842105263,
+      "grad_norm": 0.23929765820503235,
+      "learning_rate": 6.180866407751595e-06,
+      "loss": 10.6818,
+      "step": 507
+    },
+    {
+      "epoch": 0.8912280701754386,
+      "grad_norm": 0.28008386492729187,
+      "learning_rate": 5.988172543078097e-06,
+      "loss": 10.7264,
+      "step": 508
+    },
+    {
+      "epoch": 0.8929824561403509,
+      "grad_norm": 0.31902721524238586,
+      "learning_rate": 5.7984374064421035e-06,
+      "loss": 10.6314,
+      "step": 509
+    },
+    {
+      "epoch": 0.8947368421052632,
+      "grad_norm": 0.22529898583889008,
+      "learning_rate": 5.611666969163243e-06,
+      "loss": 10.6476,
+      "step": 510
+    },
+    {
+      "epoch": 0.8964912280701754,
+      "grad_norm": 0.18052135407924652,
+      "learning_rate": 5.427867109256457e-06,
+      "loss": 10.6507,
+      "step": 511
+    },
+    {
+      "epoch": 0.8982456140350877,
+      "grad_norm": 0.20663322508335114,
+      "learning_rate": 5.247043611247127e-06,
+      "loss": 10.6917,
+      "step": 512
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.19402727484703064,
+      "learning_rate": 5.0692021659888735e-06,
+      "loss": 10.6435,
+      "step": 513
+    },
+    {
+      "epoch": 0.9017543859649123,
+      "grad_norm": 0.18608134984970093,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 10.6597,
+      "step": 514
+    },
+    {
+      "epoch": 0.9035087719298246,
+      "grad_norm": 0.23060499131679535,
+      "learning_rate": 4.722487727710368e-06,
+      "loss": 10.673,
+      "step": 515
+    },
+    {
+      "epoch": 0.9052631578947369,
+      "grad_norm": 0.2104937583208084,
+      "learning_rate": 4.553625646441928e-06,
+      "loss": 10.6911,
+      "step": 516
+    },
+    {
+      "epoch": 0.9070175438596492,
+      "grad_norm": 0.17810438573360443,
+      "learning_rate": 4.3877674410848e-06,
+      "loss": 10.6848,
+      "step": 517
+    },
+    {
+      "epoch": 0.9087719298245615,
+      "grad_norm": 0.25825297832489014,
+      "learning_rate": 4.224918331506955e-06,
+      "loss": 10.6735,
+      "step": 518
+    },
+    {
+      "epoch": 0.9105263157894737,
+      "grad_norm": 0.16333693265914917,
+      "learning_rate": 4.065083442874418e-06,
+      "loss": 10.6632,
+      "step": 519
+    },
+    {
+      "epoch": 0.9122807017543859,
+      "grad_norm": 0.21222874522209167,
+      "learning_rate": 3.908267805490051e-06,
+      "loss": 10.6984,
+      "step": 520
+    },
+    {
+      "epoch": 0.9140350877192982,
+      "grad_norm": 0.20313438773155212,
+      "learning_rate": 3.7544763546352834e-06,
+      "loss": 10.6889,
+      "step": 521
+    },
+    {
+      "epoch": 0.9157894736842105,
+      "grad_norm": 0.201041117310524,
+      "learning_rate": 3.6037139304146762e-06,
+      "loss": 10.6284,
+      "step": 522
+    },
+    {
+      "epoch": 0.9175438596491228,
+      "grad_norm": 0.2828705906867981,
+      "learning_rate": 3.455985277603713e-06,
+      "loss": 10.6829,
+      "step": 523
+    },
+    {
+      "epoch": 0.9192982456140351,
+      "grad_norm": 0.2506016492843628,
+      "learning_rate": 3.311295045499363e-06,
+      "loss": 10.6884,
+      "step": 524
+    },
+    {
+      "epoch": 0.9210526315789473,
+      "grad_norm": 0.42757734656333923,
+      "learning_rate": 3.169647787773866e-06,
+      "loss": 10.6356,
+      "step": 525
+    },
+    {
+      "epoch": 0.9228070175438596,
+      "grad_norm": 0.2824893295764923,
+      "learning_rate": 3.0310479623313127e-06,
+      "loss": 10.7104,
+      "step": 526
+    },
+    {
+      "epoch": 0.9245614035087719,
+      "grad_norm": 0.2309618890285492,
+      "learning_rate": 2.8954999311674558e-06,
+      "loss": 10.68,
+      "step": 527
+    },
+    {
+      "epoch": 0.9263157894736842,
+      "grad_norm": 0.23892293870449066,
+      "learning_rate": 2.7630079602323442e-06,
+      "loss": 10.6244,
+      "step": 528
+    },
+    {
+      "epoch": 0.9280701754385965,
+      "grad_norm": 0.21768365800380707,
+      "learning_rate": 2.6335762192960743e-06,
+      "loss": 10.6715,
+      "step": 529
+    },
+    {
+      "epoch": 0.9298245614035088,
+      "grad_norm": 0.22950419783592224,
+      "learning_rate": 2.5072087818176382e-06,
+      "loss": 10.6401,
+      "step": 530
+    },
+    {
+      "epoch": 0.9315789473684211,
+      "grad_norm": 0.20661982893943787,
+      "learning_rate": 2.383909624816616e-06,
+      "loss": 10.6522,
+      "step": 531
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.18806463479995728,
+      "learning_rate": 2.2636826287480873e-06,
+      "loss": 10.6811,
+      "step": 532
+    },
+    {
+      "epoch": 0.9350877192982456,
+      "grad_norm": 0.3133368194103241,
+      "learning_rate": 2.1465315773804616e-06,
+      "loss": 10.6325,
+      "step": 533
+    },
+    {
+      "epoch": 0.9368421052631579,
+      "grad_norm": 0.23937764763832092,
+      "learning_rate": 2.032460157676452e-06,
+      "loss": 10.669,
+      "step": 534
+    },
+    {
+      "epoch": 0.9385964912280702,
+      "grad_norm": 0.21383170783519745,
+      "learning_rate": 1.921471959676957e-06,
+      "loss": 10.6751,
+      "step": 535
+    },
+    {
+      "epoch": 0.9403508771929825,
+      "grad_norm": 0.3915383219718933,
+      "learning_rate": 1.81357047638816e-06,
+      "loss": 10.6673,
+      "step": 536
+    },
+    {
+      "epoch": 0.9421052631578948,
+      "grad_norm": 0.21916410326957703,
+      "learning_rate": 1.7087591036715534e-06,
+      "loss": 10.7053,
+      "step": 537
+    },
+    {
+      "epoch": 0.9438596491228071,
+      "grad_norm": 0.26692306995391846,
+      "learning_rate": 1.6070411401370334e-06,
+      "loss": 10.6689,
+      "step": 538
+    },
+    {
+      "epoch": 0.9456140350877194,
+      "grad_norm": 0.19968879222869873,
+      "learning_rate": 1.5084197870391837e-06,
+      "loss": 10.7208,
+      "step": 539
+    },
+    {
+      "epoch": 0.9473684210526315,
+      "grad_norm": 0.21784716844558716,
+      "learning_rate": 1.4128981481764115e-06,
+      "loss": 10.6356,
+      "step": 540
+    },
+    {
+      "epoch": 0.9491228070175438,
+      "grad_norm": 0.1975242644548416,
+      "learning_rate": 1.3204792297933588e-06,
+      "loss": 10.7024,
+      "step": 541
+    },
+    {
+      "epoch": 0.9508771929824561,
+      "grad_norm": 0.2712066173553467,
+      "learning_rate": 1.231165940486234e-06,
+      "loss": 10.6415,
+      "step": 542
+    },
+    {
+      "epoch": 0.9526315789473684,
+      "grad_norm": 0.19951169192790985,
+      "learning_rate": 1.1449610911112741e-06,
+      "loss": 10.6615,
+      "step": 543
+    },
+    {
+      "epoch": 0.9543859649122807,
+      "grad_norm": 0.21636833250522614,
+      "learning_rate": 1.0618673946963365e-06,
+      "loss": 10.67,
+      "step": 544
+    },
+    {
+      "epoch": 0.956140350877193,
+      "grad_norm": 0.24349869787693024,
+      "learning_rate": 9.818874663554357e-07,
+      "loss": 10.658,
+      "step": 545
+    },
+    {
+      "epoch": 0.9578947368421052,
+      "grad_norm": 0.22371384501457214,
+      "learning_rate": 9.0502382320653e-07,
+      "loss": 10.6703,
+      "step": 546
+    },
+    {
+      "epoch": 0.9596491228070175,
+      "grad_norm": 0.2083524912595749,
+      "learning_rate": 8.31278884292186e-07,
+      "loss": 10.6766,
+      "step": 547
+    },
+    {
+      "epoch": 0.9614035087719298,
+      "grad_norm": 0.2264506220817566,
+      "learning_rate": 7.606549705035937e-07,
+      "loss": 10.672,
+      "step": 548
+    },
+    {
+      "epoch": 0.9631578947368421,
+      "grad_norm": 0.21471793949604034,
+      "learning_rate": 6.931543045073708e-07,
+      "loss": 10.7041,
+      "step": 549
+    },
+    {
+      "epoch": 0.9649122807017544,
+      "grad_norm": 0.20497629046440125,
+      "learning_rate": 6.287790106757396e-07,
+      "loss": 10.6923,
+      "step": 550
+    },
+    {
+      "epoch": 0.9666666666666667,
+      "grad_norm": 0.18738481402397156,
+      "learning_rate": 5.675311150195928e-07,
+      "loss": 10.6622,
+      "step": 551
+    },
+    {
+      "epoch": 0.968421052631579,
+      "grad_norm": 0.24018672108650208,
+      "learning_rate": 5.094125451247655e-07,
+      "loss": 10.629,
+      "step": 552
+    },
+    {
+      "epoch": 0.9701754385964912,
+      "grad_norm": 0.23290672898292542,
+      "learning_rate": 4.544251300913405e-07,
+      "loss": 10.6888,
+      "step": 553
+    },
+    {
+      "epoch": 0.9719298245614035,
+      "grad_norm": 0.3501298129558563,
+      "learning_rate": 4.025706004760932e-07,
+      "loss": 10.6527,
+      "step": 554
+    },
+    {
+      "epoch": 0.9736842105263158,
+      "grad_norm": 0.2251330465078354,
+      "learning_rate": 3.5385058823809156e-07,
+      "loss": 10.696,
+      "step": 555
+    },
+    {
+      "epoch": 0.9754385964912281,
+      "grad_norm": 0.24375322461128235,
+      "learning_rate": 3.0826662668720364e-07,
+      "loss": 10.7063,
+      "step": 556
+    },
+    {
+      "epoch": 0.9771929824561404,
+      "grad_norm": 0.2920641303062439,
+      "learning_rate": 2.658201504359803e-07,
+      "loss": 10.645,
+      "step": 557
+    },
+    {
+      "epoch": 0.9789473684210527,
+      "grad_norm": 0.29870396852493286,
+      "learning_rate": 2.265124953543918e-07,
+      "loss": 10.6955,
+      "step": 558
+    },
+    {
+      "epoch": 0.980701754385965,
+      "grad_norm": 0.2032945305109024,
+      "learning_rate": 1.9034489852787218e-07,
+      "loss": 10.7028,
+      "step": 559
+    },
+    {
+      "epoch": 0.9824561403508771,
+      "grad_norm": 0.16992153227329254,
+      "learning_rate": 1.5731849821833954e-07,
+      "loss": 10.6516,
+      "step": 560
+    },
+    {
+      "epoch": 0.9842105263157894,
+      "grad_norm": 0.19095556437969208,
+      "learning_rate": 1.274343338283801e-07,
+      "loss": 10.6713,
+      "step": 561
+    },
+    {
+      "epoch": 0.9859649122807017,
+      "grad_norm": 0.2375078797340393,
+      "learning_rate": 1.0069334586854107e-07,
+      "loss": 10.6812,
+      "step": 562
+    },
+    {
+      "epoch": 0.987719298245614,
+      "grad_norm": 0.1917848289012909,
+      "learning_rate": 7.709637592770991e-08,
+      "loss": 10.6814,
+      "step": 563
+    },
+    {
+      "epoch": 0.9894736842105263,
+      "grad_norm": 0.2119506448507309,
+      "learning_rate": 5.6644166646668826e-08,
+      "loss": 10.7086,
+      "step": 564
+    },
+    {
+      "epoch": 0.9912280701754386,
+      "grad_norm": 0.21006697416305542,
+      "learning_rate": 3.933736169471347e-08,
+      "loss": 10.6638,
+      "step": 565
+    },
+    {
+      "epoch": 0.9929824561403509,
+      "grad_norm": 0.19843615591526031,
+      "learning_rate": 2.5176505749346936e-08,
+      "loss": 10.6587,
+      "step": 566
+    },
+    {
+      "epoch": 0.9947368421052631,
+      "grad_norm": 0.292624831199646,
+      "learning_rate": 1.4162044479182346e-08,
+      "loss": 10.6584,
+      "step": 567
+    },
+    {
+      "epoch": 0.9964912280701754,
+      "grad_norm": 0.14934279024600983,
+      "learning_rate": 6.294324529942941e-09,
+      "loss": 10.6876,
+      "step": 568
+    },
+    {
+      "epoch": 0.9982456140350877,
+      "grad_norm": 0.24475471675395966,
+      "learning_rate": 1.5735935134708613e-09,
+      "loss": 10.6859,
+      "step": 569
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.17199726402759552,
+      "learning_rate": 0.0,
+      "loss": 10.7093,
+      "step": 570
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 56846496301056.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null