Training in progress, step 570, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +991 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95f9ee5321bf184cccf6bd1768794472ea0dab0c045ac19a3e73094ef57ab439
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba7f260598f9dfc78b063cf2291dafcd9b997ea4c973ea9b07cc51602e495ab9
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e51d1808ca4450a035ddae04d6c29f94fd7739dbaad6d0281e13d8ef19e86dc
 size 41120084

 version https://git-lfs.github.com/spec/v1
+oid sha256:671d4c3971b26a85c5e792fea63ce43be94833ab0aec2967ea53390f68b186a9
 size 41120084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed6575e00bcb98b4d07e384c21771313f1fae9b22686c7f08a765d9d0109e4e2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b9fa5acaeecc23f2ae7c39a001e08deaa87f6cd3040e3d7ee614526b4af35df
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20b6399ed1f4a4e925c86dd58b4b559fa96fc0370b46ab280a68ad58e4333e47
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3efe9a7dcecfc294170eaf9d37d88929a06a263232a2f5ff76c24580b1ffbbf
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7526315789473684,
   "eval_steps": 143,
-  "global_step": 429,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3042,6 +3042,993 @@
       "eval_samples_per_second": 17.652,
       "eval_steps_per_second": 8.826,
       "step": 429
     }
   ],
   "logging_steps": 1,
@@ -3056,12 +4043,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.465694981896274e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 143,
+  "global_step": 570,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.652,
       "eval_steps_per_second": 8.826,
       "step": 429
+    },
+    {
+      "epoch": 0.7543859649122807,
+      "grad_norm": 0.21846020221710205,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 1.4571,
+      "step": 430
+    },
+    {
+      "epoch": 0.756140350877193,
+      "grad_norm": 0.22741371393203735,
+      "learning_rate": 2.8893750684111975e-05,
+      "loss": 1.1949,
+      "step": 431
+    },
+    {
+      "epoch": 0.7578947368421053,
+      "grad_norm": 0.23230372369289398,
+      "learning_rate": 2.850041733353247e-05,
+      "loss": 1.2444,
+      "step": 432
+    },
+    {
+      "epoch": 0.7596491228070176,
+      "grad_norm": 0.20818418264389038,
+      "learning_rate": 2.8109334208542993e-05,
+      "loss": 0.899,
+      "step": 433
+    },
+    {
+      "epoch": 0.7614035087719299,
+      "grad_norm": 0.18375477194786072,
+      "learning_rate": 2.7720513617260856e-05,
+      "loss": 1.089,
+      "step": 434
+    },
+    {
+      "epoch": 0.7631578947368421,
+      "grad_norm": 0.21327602863311768,
+      "learning_rate": 2.7333967796597315e-05,
+      "loss": 1.2009,
+      "step": 435
+    },
+    {
+      "epoch": 0.7649122807017544,
+      "grad_norm": 0.2226010113954544,
+      "learning_rate": 2.694970891187225e-05,
+      "loss": 1.3526,
+      "step": 436
+    },
+    {
+      "epoch": 0.7666666666666667,
+      "grad_norm": 0.18763844668865204,
+      "learning_rate": 2.6567749056431467e-05,
+      "loss": 1.0013,
+      "step": 437
+    },
+    {
+      "epoch": 0.7684210526315789,
+      "grad_norm": 0.18595463037490845,
+      "learning_rate": 2.6188100251265945e-05,
+      "loss": 0.8139,
+      "step": 438
+    },
+    {
+      "epoch": 0.7701754385964912,
+      "grad_norm": 0.226368248462677,
+      "learning_rate": 2.5810774444633644e-05,
+      "loss": 1.1663,
+      "step": 439
+    },
+    {
+      "epoch": 0.7719298245614035,
+      "grad_norm": 0.1991311013698578,
+      "learning_rate": 2.5435783511683443e-05,
+      "loss": 1.0207,
+      "step": 440
+    },
+    {
+      "epoch": 0.7736842105263158,
+      "grad_norm": 0.226077601313591,
+      "learning_rate": 2.506313925408127e-05,
+      "loss": 1.3249,
+      "step": 441
+    },
+    {
+      "epoch": 0.775438596491228,
+      "grad_norm": 0.23103055357933044,
+      "learning_rate": 2.4692853399638917e-05,
+      "loss": 1.0387,
+      "step": 442
+    },
+    {
+      "epoch": 0.7771929824561403,
+      "grad_norm": 0.195194810628891,
+      "learning_rate": 2.4324937601944685e-05,
+      "loss": 1.1331,
+      "step": 443
+    },
+    {
+      "epoch": 0.7789473684210526,
+      "grad_norm": 0.23875974118709564,
+      "learning_rate": 2.3959403439996907e-05,
+      "loss": 1.2292,
+      "step": 444
+    },
+    {
+      "epoch": 0.7807017543859649,
+      "grad_norm": 0.18718209862709045,
+      "learning_rate": 2.3596262417839255e-05,
+      "loss": 1.1075,
+      "step": 445
+    },
+    {
+      "epoch": 0.7824561403508772,
+      "grad_norm": 0.17584474384784698,
+      "learning_rate": 2.323552596419889e-05,
+      "loss": 0.9849,
+      "step": 446
+    },
+    {
+      "epoch": 0.7842105263157895,
+      "grad_norm": 0.16845519840717316,
+      "learning_rate": 2.2877205432126657e-05,
+      "loss": 0.9737,
+      "step": 447
+    },
+    {
+      "epoch": 0.7859649122807018,
+      "grad_norm": 0.19468963146209717,
+      "learning_rate": 2.2521312098639914e-05,
+      "loss": 1.0789,
+      "step": 448
+    },
+    {
+      "epoch": 0.787719298245614,
+      "grad_norm": 0.19649626314640045,
+      "learning_rate": 2.2167857164367422e-05,
+      "loss": 0.9901,
+      "step": 449
+    },
+    {
+      "epoch": 0.7894736842105263,
+      "grad_norm": 0.19049738347530365,
+      "learning_rate": 2.181685175319702e-05,
+      "loss": 0.9139,
+      "step": 450
+    },
+    {
+      "epoch": 0.7912280701754386,
+      "grad_norm": 0.20821937918663025,
+      "learning_rate": 2.146830691192553e-05,
+      "loss": 1.2032,
+      "step": 451
+    },
+    {
+      "epoch": 0.7929824561403509,
+      "grad_norm": 0.21067368984222412,
+      "learning_rate": 2.11222336099109e-05,
+      "loss": 0.8502,
+      "step": 452
+    },
+    {
+      "epoch": 0.7947368421052632,
+      "grad_norm": 0.2066434919834137,
+      "learning_rate": 2.0778642738727272e-05,
+      "loss": 1.0896,
+      "step": 453
+    },
+    {
+      "epoch": 0.7964912280701755,
+      "grad_norm": 0.19079746305942535,
+      "learning_rate": 2.043754511182191e-05,
+      "loss": 0.9596,
+      "step": 454
+    },
+    {
+      "epoch": 0.7982456140350878,
+      "grad_norm": 0.17518748342990875,
+      "learning_rate": 2.009895146417512e-05,
+      "loss": 0.9326,
+      "step": 455
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.20753763616085052,
+      "learning_rate": 1.9762872451962212e-05,
+      "loss": 1.0991,
+      "step": 456
+    },
+    {
+      "epoch": 0.8017543859649123,
+      "grad_norm": 0.20544378459453583,
+      "learning_rate": 1.9429318652218276e-05,
+      "loss": 1.2205,
+      "step": 457
+    },
+    {
+      "epoch": 0.8035087719298246,
+      "grad_norm": 0.20627406239509583,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 0.8546,
+      "step": 458
+    },
+    {
+      "epoch": 0.8052631578947368,
+      "grad_norm": 0.20179207623004913,
+      "learning_rate": 1.8769828600581496e-05,
+      "loss": 1.0499,
+      "step": 459
+    },
+    {
+      "epoch": 0.8070175438596491,
+      "grad_norm": 0.21462082862854004,
+      "learning_rate": 1.8443913104073983e-05,
+      "loss": 1.2047,
+      "step": 460
+    },
+    {
+      "epoch": 0.8087719298245614,
+      "grad_norm": 0.20801608264446259,
+      "learning_rate": 1.8120564330152923e-05,
+      "loss": 1.1157,
+      "step": 461
+    },
+    {
+      "epoch": 0.8105263157894737,
+      "grad_norm": 0.22594526410102844,
+      "learning_rate": 1.7799792455209018e-05,
+      "loss": 1.1778,
+      "step": 462
+    },
+    {
+      "epoch": 0.8122807017543859,
+      "grad_norm": 0.18215923011302948,
+      "learning_rate": 1.7481607574533044e-05,
+      "loss": 1.0573,
+      "step": 463
+    },
+    {
+      "epoch": 0.8140350877192982,
+      "grad_norm": 0.19557355344295502,
+      "learning_rate": 1.716601970199836e-05,
+      "loss": 1.1006,
+      "step": 464
+    },
+    {
+      "epoch": 0.8157894736842105,
+      "grad_norm": 0.23207271099090576,
+      "learning_rate": 1.6853038769745467e-05,
+      "loss": 1.1726,
+      "step": 465
+    },
+    {
+      "epoch": 0.8175438596491228,
+      "grad_norm": 0.1910407841205597,
+      "learning_rate": 1.6542674627869737e-05,
+      "loss": 1.1038,
+      "step": 466
+    },
+    {
+      "epoch": 0.8192982456140351,
+      "grad_norm": 0.1775582730770111,
+      "learning_rate": 1.6234937044111152e-05,
+      "loss": 0.8225,
+      "step": 467
+    },
+    {
+      "epoch": 0.8210526315789474,
+      "grad_norm": 0.21295365691184998,
+      "learning_rate": 1.5929835703546993e-05,
+      "loss": 1.1882,
+      "step": 468
+    },
+    {
+      "epoch": 0.8228070175438597,
+      "grad_norm": 0.23779192566871643,
+      "learning_rate": 1.5627380208287114e-05,
+      "loss": 0.9332,
+      "step": 469
+    },
+    {
+      "epoch": 0.8245614035087719,
+      "grad_norm": 0.21569402515888214,
+      "learning_rate": 1.5327580077171587e-05,
+      "loss": 0.9752,
+      "step": 470
+    },
+    {
+      "epoch": 0.8263157894736842,
+      "grad_norm": 0.20171040296554565,
+      "learning_rate": 1.5030444745471294e-05,
+      "loss": 0.8921,
+      "step": 471
+    },
+    {
+      "epoch": 0.8280701754385965,
+      "grad_norm": 0.2244572937488556,
+      "learning_rate": 1.4735983564590783e-05,
+      "loss": 1.2602,
+      "step": 472
+    },
+    {
+      "epoch": 0.8298245614035088,
+      "grad_norm": 0.21642790734767914,
+      "learning_rate": 1.4444205801774202e-05,
+      "loss": 1.41,
+      "step": 473
+    },
+    {
+      "epoch": 0.8315789473684211,
+      "grad_norm": 0.20672833919525146,
+      "learning_rate": 1.415512063981339e-05,
+      "loss": 1.066,
+      "step": 474
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.23660632967948914,
+      "learning_rate": 1.3868737176759106e-05,
+      "loss": 1.247,
+      "step": 475
+    },
+    {
+      "epoch": 0.8350877192982457,
+      "grad_norm": 0.19157983362674713,
+      "learning_rate": 1.3585064425634542e-05,
+      "loss": 0.8976,
+      "step": 476
+    },
+    {
+      "epoch": 0.8368421052631579,
+      "grad_norm": 0.21549120545387268,
+      "learning_rate": 1.330411131415169e-05,
+      "loss": 1.0631,
+      "step": 477
+    },
+    {
+      "epoch": 0.8385964912280702,
+      "grad_norm": 0.21641142666339874,
+      "learning_rate": 1.3025886684430467e-05,
+      "loss": 1.327,
+      "step": 478
+    },
+    {
+      "epoch": 0.8403508771929824,
+      "grad_norm": 0.2355552315711975,
+      "learning_rate": 1.2750399292720283e-05,
+      "loss": 1.3794,
+      "step": 479
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 0.21204498410224915,
+      "learning_rate": 1.2477657809124631e-05,
+      "loss": 1.0638,
+      "step": 480
+    },
+    {
+      "epoch": 0.843859649122807,
+      "grad_norm": 0.19920291006565094,
+      "learning_rate": 1.2207670817328066e-05,
+      "loss": 0.8266,
+      "step": 481
+    },
+    {
+      "epoch": 0.8456140350877193,
+      "grad_norm": 0.20642384886741638,
+      "learning_rate": 1.19404468143262e-05,
+      "loss": 1.1196,
+      "step": 482
+    },
+    {
+      "epoch": 0.8473684210526315,
+      "grad_norm": 0.1852245032787323,
+      "learning_rate": 1.1675994210158181e-05,
+      "loss": 0.9718,
+      "step": 483
+    },
+    {
+      "epoch": 0.8491228070175438,
+      "grad_norm": 0.2029811590909958,
+      "learning_rate": 1.141432132764202e-05,
+      "loss": 0.9548,
+      "step": 484
+    },
+    {
+      "epoch": 0.8508771929824561,
+      "grad_norm": 0.23442597687244415,
+      "learning_rate": 1.1155436402112785e-05,
+      "loss": 1.3182,
+      "step": 485
+    },
+    {
+      "epoch": 0.8526315789473684,
+      "grad_norm": 0.2029426246881485,
+      "learning_rate": 1.0899347581163221e-05,
+      "loss": 1.0673,
+      "step": 486
+    },
+    {
+      "epoch": 0.8543859649122807,
+      "grad_norm": 0.2036561220884323,
+      "learning_rate": 1.0646062924387512e-05,
+      "loss": 1.1552,
+      "step": 487
+    },
+    {
+      "epoch": 0.856140350877193,
+      "grad_norm": 0.1849490851163864,
+      "learning_rate": 1.0395590403127486e-05,
+      "loss": 0.7817,
+      "step": 488
+    },
+    {
+      "epoch": 0.8578947368421053,
+      "grad_norm": 0.196690633893013,
+      "learning_rate": 1.0147937900221883e-05,
+      "loss": 1.0408,
+      "step": 489
+    },
+    {
+      "epoch": 0.8596491228070176,
+      "grad_norm": 0.21207116544246674,
+      "learning_rate": 9.903113209758096e-06,
+      "loss": 1.1144,
+      "step": 490
+    },
+    {
+      "epoch": 0.8614035087719298,
+      "grad_norm": 0.2588272988796234,
+      "learning_rate": 9.661124036827063e-06,
+      "loss": 1.0656,
+      "step": 491
+    },
+    {
+      "epoch": 0.8631578947368421,
+      "grad_norm": 0.2087583690881729,
+      "learning_rate": 9.421977997280596e-06,
+      "loss": 0.9854,
+      "step": 492
+    },
+    {
+      "epoch": 0.8649122807017544,
+      "grad_norm": 0.2295677214860916,
+      "learning_rate": 9.185682617491863e-06,
+      "loss": 1.1204,
+      "step": 493
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 0.23894089460372925,
+      "learning_rate": 8.952245334118414e-06,
+      "loss": 1.171,
+      "step": 494
+    },
+    {
+      "epoch": 0.868421052631579,
+      "grad_norm": 0.22390300035476685,
+      "learning_rate": 8.72167349386811e-06,
+      "loss": 1.2307,
+      "step": 495
+    },
+    {
+      "epoch": 0.8701754385964913,
+      "grad_norm": 0.19924871623516083,
+      "learning_rate": 8.493974353268019e-06,
+      "loss": 1.0831,
+      "step": 496
+    },
+    {
+      "epoch": 0.8719298245614036,
+      "grad_norm": 0.23712003231048584,
+      "learning_rate": 8.269155078435931e-06,
+      "loss": 1.2277,
+      "step": 497
+    },
+    {
+      "epoch": 0.8736842105263158,
+      "grad_norm": 0.19795198738574982,
+      "learning_rate": 8.047222744854943e-06,
+      "loss": 0.9915,
+      "step": 498
+    },
+    {
+      "epoch": 0.875438596491228,
+      "grad_norm": 0.246077299118042,
+      "learning_rate": 7.828184337150613e-06,
+      "loss": 1.334,
+      "step": 499
+    },
+    {
+      "epoch": 0.8771929824561403,
+      "grad_norm": 0.16609066724777222,
+      "learning_rate": 7.612046748871327e-06,
+      "loss": 0.8738,
+      "step": 500
+    },
+    {
+      "epoch": 0.8789473684210526,
+      "grad_norm": 0.212895929813385,
+      "learning_rate": 7.398816782271223e-06,
+      "loss": 1.1872,
+      "step": 501
+    },
+    {
+      "epoch": 0.8807017543859649,
+      "grad_norm": 0.21837033331394196,
+      "learning_rate": 7.1885011480961164e-06,
+      "loss": 0.8359,
+      "step": 502
+    },
+    {
+      "epoch": 0.8824561403508772,
+      "grad_norm": 0.20409606397151947,
+      "learning_rate": 6.981106465372389e-06,
+      "loss": 0.9836,
+      "step": 503
+    },
+    {
+      "epoch": 0.8842105263157894,
+      "grad_norm": 0.23097316920757294,
+      "learning_rate": 6.776639261198581e-06,
+      "loss": 1.1037,
+      "step": 504
+    },
+    {
+      "epoch": 0.8859649122807017,
+      "grad_norm": 0.2189425528049469,
+      "learning_rate": 6.5751059705400295e-06,
+      "loss": 1.104,
+      "step": 505
+    },
+    {
+      "epoch": 0.887719298245614,
+      "grad_norm": 0.1863974928855896,
+      "learning_rate": 6.37651293602628e-06,
+      "loss": 1.0283,
+      "step": 506
+    },
+    {
+      "epoch": 0.8894736842105263,
+      "grad_norm": 0.19999191164970398,
+      "learning_rate": 6.180866407751595e-06,
+      "loss": 1.1252,
+      "step": 507
+    },
+    {
+      "epoch": 0.8912280701754386,
+      "grad_norm": 0.2026892900466919,
+      "learning_rate": 5.988172543078097e-06,
+      "loss": 0.9766,
+      "step": 508
+    },
+    {
+      "epoch": 0.8929824561403509,
+      "grad_norm": 0.2486003041267395,
+      "learning_rate": 5.7984374064421035e-06,
+      "loss": 1.2422,
+      "step": 509
+    },
+    {
+      "epoch": 0.8947368421052632,
+      "grad_norm": 0.23102660477161407,
+      "learning_rate": 5.611666969163243e-06,
+      "loss": 0.9549,
+      "step": 510
+    },
+    {
+      "epoch": 0.8964912280701754,
+      "grad_norm": 0.20781514048576355,
+      "learning_rate": 5.427867109256457e-06,
+      "loss": 0.9169,
+      "step": 511
+    },
+    {
+      "epoch": 0.8982456140350877,
+      "grad_norm": 0.24224388599395752,
+      "learning_rate": 5.247043611247127e-06,
+      "loss": 1.1469,
+      "step": 512
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.20646372437477112,
+      "learning_rate": 5.0692021659888735e-06,
+      "loss": 1.0301,
+      "step": 513
+    },
+    {
+      "epoch": 0.9017543859649123,
+      "grad_norm": 0.19209584593772888,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 0.8421,
+      "step": 514
+    },
+    {
+      "epoch": 0.9035087719298246,
+      "grad_norm": 0.19765536487102509,
+      "learning_rate": 4.722487727710368e-06,
+      "loss": 0.7961,
+      "step": 515
+    },
+    {
+      "epoch": 0.9052631578947369,
+      "grad_norm": 0.19135533273220062,
+      "learning_rate": 4.553625646441928e-06,
+      "loss": 0.9,
+      "step": 516
+    },
+    {
+      "epoch": 0.9070175438596492,
+      "grad_norm": 0.2040165811777115,
+      "learning_rate": 4.3877674410848e-06,
+      "loss": 1.0021,
+      "step": 517
+    },
+    {
+      "epoch": 0.9087719298245615,
+      "grad_norm": 0.2542368769645691,
+      "learning_rate": 4.224918331506955e-06,
+      "loss": 1.1327,
+      "step": 518
+    },
+    {
+      "epoch": 0.9105263157894737,
+      "grad_norm": 0.22248725593090057,
+      "learning_rate": 4.065083442874418e-06,
+      "loss": 1.0234,
+      "step": 519
+    },
+    {
+      "epoch": 0.9122807017543859,
+      "grad_norm": 0.2187497317790985,
+      "learning_rate": 3.908267805490051e-06,
+      "loss": 1.1397,
+      "step": 520
+    },
+    {
+      "epoch": 0.9140350877192982,
+      "grad_norm": 0.22156085073947906,
+      "learning_rate": 3.7544763546352834e-06,
+      "loss": 1.1903,
+      "step": 521
+    },
+    {
+      "epoch": 0.9157894736842105,
+      "grad_norm": 0.18726098537445068,
+      "learning_rate": 3.6037139304146762e-06,
+      "loss": 0.8838,
+      "step": 522
+    },
+    {
+      "epoch": 0.9175438596491228,
+      "grad_norm": 0.24216888844966888,
+      "learning_rate": 3.455985277603713e-06,
+      "loss": 1.1866,
+      "step": 523
+    },
+    {
+      "epoch": 0.9192982456140351,
+      "grad_norm": 0.22994445264339447,
+      "learning_rate": 3.311295045499363e-06,
+      "loss": 1.2912,
+      "step": 524
+    },
+    {
+      "epoch": 0.9210526315789473,
+      "grad_norm": 0.2445104867219925,
+      "learning_rate": 3.169647787773866e-06,
+      "loss": 1.2097,
+      "step": 525
+    },
+    {
+      "epoch": 0.9228070175438596,
+      "grad_norm": 0.2259332537651062,
+      "learning_rate": 3.0310479623313127e-06,
+      "loss": 1.3334,
+      "step": 526
+    },
+    {
+      "epoch": 0.9245614035087719,
+      "grad_norm": 0.21811023354530334,
+      "learning_rate": 2.8954999311674558e-06,
+      "loss": 0.9423,
+      "step": 527
+    },
+    {
+      "epoch": 0.9263157894736842,
+      "grad_norm": 0.20350219309329987,
+      "learning_rate": 2.7630079602323442e-06,
+      "loss": 1.2072,
+      "step": 528
+    },
+    {
+      "epoch": 0.9280701754385965,
+      "grad_norm": 0.2146158218383789,
+      "learning_rate": 2.6335762192960743e-06,
+      "loss": 1.0334,
+      "step": 529
+    },
+    {
+      "epoch": 0.9298245614035088,
+      "grad_norm": 0.24354608356952667,
+      "learning_rate": 2.5072087818176382e-06,
+      "loss": 0.9747,
+      "step": 530
+    },
+    {
+      "epoch": 0.9315789473684211,
+      "grad_norm": 0.20511949062347412,
+      "learning_rate": 2.383909624816616e-06,
+      "loss": 1.1739,
+      "step": 531
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 0.21031689643859863,
+      "learning_rate": 2.2636826287480873e-06,
+      "loss": 0.8473,
+      "step": 532
+    },
+    {
+      "epoch": 0.9350877192982456,
+      "grad_norm": 0.19460001587867737,
+      "learning_rate": 2.1465315773804616e-06,
+      "loss": 1.1534,
+      "step": 533
+    },
+    {
+      "epoch": 0.9368421052631579,
+      "grad_norm": 0.23352767527103424,
+      "learning_rate": 2.032460157676452e-06,
+      "loss": 1.1901,
+      "step": 534
+    },
+    {
+      "epoch": 0.9385964912280702,
+      "grad_norm": 0.2155674397945404,
+      "learning_rate": 1.921471959676957e-06,
+      "loss": 1.214,
+      "step": 535
+    },
+    {
+      "epoch": 0.9403508771929825,
+      "grad_norm": 0.19896399974822998,
+      "learning_rate": 1.81357047638816e-06,
+      "loss": 0.9388,
+      "step": 536
+    },
+    {
+      "epoch": 0.9421052631578948,
+      "grad_norm": 0.2220386415719986,
+      "learning_rate": 1.7087591036715534e-06,
+      "loss": 1.1168,
+      "step": 537
+    },
+    {
+      "epoch": 0.9438596491228071,
+      "grad_norm": 0.1958460658788681,
+      "learning_rate": 1.6070411401370334e-06,
+      "loss": 0.8961,
+      "step": 538
+    },
+    {
+      "epoch": 0.9456140350877194,
+      "grad_norm": 0.23341479897499084,
+      "learning_rate": 1.5084197870391837e-06,
+      "loss": 1.0279,
+      "step": 539
+    },
+    {
+      "epoch": 0.9473684210526315,
+      "grad_norm": 0.21169790625572205,
+      "learning_rate": 1.4128981481764115e-06,
+      "loss": 1.1875,
+      "step": 540
+    },
+    {
+      "epoch": 0.9491228070175438,
+      "grad_norm": 0.18905304372310638,
+      "learning_rate": 1.3204792297933588e-06,
+      "loss": 1.0736,
+      "step": 541
+    },
+    {
+      "epoch": 0.9508771929824561,
+      "grad_norm": 0.22275826334953308,
+      "learning_rate": 1.231165940486234e-06,
+      "loss": 1.1324,
+      "step": 542
+    },
+    {
+      "epoch": 0.9526315789473684,
+      "grad_norm": 0.22516103088855743,
+      "learning_rate": 1.1449610911112741e-06,
+      "loss": 1.1521,
+      "step": 543
+    },
+    {
+      "epoch": 0.9543859649122807,
+      "grad_norm": 0.18578921258449554,
+      "learning_rate": 1.0618673946963365e-06,
+      "loss": 0.9419,
+      "step": 544
+    },
+    {
+      "epoch": 0.956140350877193,
+      "grad_norm": 0.19390179216861725,
+      "learning_rate": 9.818874663554357e-07,
+      "loss": 1.0505,
+      "step": 545
+    },
+    {
+      "epoch": 0.9578947368421052,
+      "grad_norm": 0.2348068505525589,
+      "learning_rate": 9.0502382320653e-07,
+      "loss": 1.0465,
+      "step": 546
+    },
+    {
+      "epoch": 0.9596491228070175,
+      "grad_norm": 0.20014607906341553,
+      "learning_rate": 8.31278884292186e-07,
+      "loss": 0.9987,
+      "step": 547
+    },
+    {
+      "epoch": 0.9614035087719298,
+      "grad_norm": 0.22346830368041992,
+      "learning_rate": 7.606549705035937e-07,
+      "loss": 1.2411,
+      "step": 548
+    },
+    {
+      "epoch": 0.9631578947368421,
+      "grad_norm": 0.22480875253677368,
+      "learning_rate": 6.931543045073708e-07,
+      "loss": 1.2412,
+      "step": 549
+    },
+    {
+      "epoch": 0.9649122807017544,
+      "grad_norm": 0.22292938828468323,
+      "learning_rate": 6.287790106757396e-07,
+      "loss": 0.9878,
+      "step": 550
+    },
+    {
+      "epoch": 0.9666666666666667,
+      "grad_norm": 0.20601260662078857,
+      "learning_rate": 5.675311150195928e-07,
+      "loss": 1.1647,
+      "step": 551
+    },
+    {
+      "epoch": 0.968421052631579,
+      "grad_norm": 0.21411311626434326,
+      "learning_rate": 5.094125451247655e-07,
+      "loss": 1.1315,
+      "step": 552
+    },
+    {
+      "epoch": 0.9701754385964912,
+      "grad_norm": 0.21345672011375427,
+      "learning_rate": 4.544251300913405e-07,
+      "loss": 1.0022,
+      "step": 553
+    },
+    {
+      "epoch": 0.9719298245614035,
+      "grad_norm": 0.18871097266674042,
+      "learning_rate": 4.025706004760932e-07,
+      "loss": 0.9219,
+      "step": 554
+    },
+    {
+      "epoch": 0.9736842105263158,
+      "grad_norm": 0.2253376692533493,
+      "learning_rate": 3.5385058823809156e-07,
+      "loss": 1.065,
+      "step": 555
+    },
+    {
+      "epoch": 0.9754385964912281,
+      "grad_norm": 0.20358805358409882,
+      "learning_rate": 3.0826662668720364e-07,
+      "loss": 1.0352,
+      "step": 556
+    },
+    {
+      "epoch": 0.9771929824561404,
+      "grad_norm": 0.22318677604198456,
+      "learning_rate": 2.658201504359803e-07,
+      "loss": 1.0816,
+      "step": 557
+    },
+    {
+      "epoch": 0.9789473684210527,
+      "grad_norm": 0.2127983570098877,
+      "learning_rate": 2.265124953543918e-07,
+      "loss": 1.0055,
+      "step": 558
+    },
+    {
+      "epoch": 0.980701754385965,
+      "grad_norm": 0.18631552159786224,
+      "learning_rate": 1.9034489852787218e-07,
+      "loss": 0.9969,
+      "step": 559
+    },
+    {
+      "epoch": 0.9824561403508771,
+      "grad_norm": 0.19769078493118286,
+      "learning_rate": 1.5731849821833954e-07,
+      "loss": 0.9535,
+      "step": 560
+    },
+    {
+      "epoch": 0.9842105263157894,
+      "grad_norm": 0.18470965325832367,
+      "learning_rate": 1.274343338283801e-07,
+      "loss": 0.8852,
+      "step": 561
+    },
+    {
+      "epoch": 0.9859649122807017,
+      "grad_norm": 0.2604463994503021,
+      "learning_rate": 1.0069334586854107e-07,
+      "loss": 1.0838,
+      "step": 562
+    },
+    {
+      "epoch": 0.987719298245614,
+      "grad_norm": 0.22031089663505554,
+      "learning_rate": 7.709637592770991e-08,
+      "loss": 0.9439,
+      "step": 563
+    },
+    {
+      "epoch": 0.9894736842105263,
+      "grad_norm": 0.20898091793060303,
+      "learning_rate": 5.6644166646668826e-08,
+      "loss": 1.0976,
+      "step": 564
+    },
+    {
+      "epoch": 0.9912280701754386,
+      "grad_norm": 0.22906498610973358,
+      "learning_rate": 3.933736169471347e-08,
+      "loss": 1.2958,
+      "step": 565
+    },
+    {
+      "epoch": 0.9929824561403509,
+      "grad_norm": 0.2085491120815277,
+      "learning_rate": 2.5176505749346936e-08,
+      "loss": 0.8897,
+      "step": 566
+    },
+    {
+      "epoch": 0.9947368421052631,
+      "grad_norm": 0.17532259225845337,
+      "learning_rate": 1.4162044479182346e-08,
+      "loss": 0.8908,
+      "step": 567
+    },
+    {
+      "epoch": 0.9964912280701754,
+      "grad_norm": 0.23514819145202637,
+      "learning_rate": 6.294324529942941e-09,
+      "loss": 1.0965,
+      "step": 568
+    },
+    {
+      "epoch": 0.9982456140350877,
+      "grad_norm": 0.19628407061100006,
+      "learning_rate": 1.5735935134708613e-09,
+      "loss": 0.8857,
+      "step": 569
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.21578288078308105,
+      "learning_rate": 0.0,
+      "loss": 0.9811,
+      "step": 570
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.9503234629291213e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null