baseline_2D_move_15000 / trainer_state.json
qjuu's picture
Upload folder using huggingface_hub
73d3a37 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.48780487804878,
"eval_steps": 500,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02032520325203252,
"grad_norm": 13.226722717285156,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.9641,
"step": 10
},
{
"epoch": 0.04065040650406504,
"grad_norm": 6.338687896728516,
"learning_rate": 2.5333333333333334e-06,
"loss": 0.855,
"step": 20
},
{
"epoch": 0.06097560975609756,
"grad_norm": 2.033618211746216,
"learning_rate": 3.866666666666667e-06,
"loss": 0.5057,
"step": 30
},
{
"epoch": 0.08130081300813008,
"grad_norm": 1.127258539199829,
"learning_rate": 5.2e-06,
"loss": 0.3473,
"step": 40
},
{
"epoch": 0.1016260162601626,
"grad_norm": 1.834140419960022,
"learning_rate": 6.533333333333333e-06,
"loss": 0.2456,
"step": 50
},
{
"epoch": 0.12195121951219512,
"grad_norm": 2.0608911514282227,
"learning_rate": 7.866666666666667e-06,
"loss": 0.1905,
"step": 60
},
{
"epoch": 0.14227642276422764,
"grad_norm": 1.4218802452087402,
"learning_rate": 9.2e-06,
"loss": 0.1439,
"step": 70
},
{
"epoch": 0.16260162601626016,
"grad_norm": 0.7227293848991394,
"learning_rate": 1.0533333333333335e-05,
"loss": 0.1313,
"step": 80
},
{
"epoch": 0.18292682926829268,
"grad_norm": 0.9034302830696106,
"learning_rate": 1.1866666666666668e-05,
"loss": 0.1073,
"step": 90
},
{
"epoch": 0.2032520325203252,
"grad_norm": 1.4675464630126953,
"learning_rate": 1.32e-05,
"loss": 0.1265,
"step": 100
},
{
"epoch": 0.22357723577235772,
"grad_norm": 1.0163527727127075,
"learning_rate": 1.4533333333333335e-05,
"loss": 0.1173,
"step": 110
},
{
"epoch": 0.24390243902439024,
"grad_norm": 0.5881352424621582,
"learning_rate": 1.586666666666667e-05,
"loss": 0.0945,
"step": 120
},
{
"epoch": 0.26422764227642276,
"grad_norm": 1.0253976583480835,
"learning_rate": 1.7199999999999998e-05,
"loss": 0.0968,
"step": 130
},
{
"epoch": 0.2845528455284553,
"grad_norm": 0.696062445640564,
"learning_rate": 1.8533333333333334e-05,
"loss": 0.0893,
"step": 140
},
{
"epoch": 0.3048780487804878,
"grad_norm": 0.6593964099884033,
"learning_rate": 1.9866666666666667e-05,
"loss": 0.0863,
"step": 150
},
{
"epoch": 0.3252032520325203,
"grad_norm": 0.9947993159294128,
"learning_rate": 2.12e-05,
"loss": 0.0906,
"step": 160
},
{
"epoch": 0.34552845528455284,
"grad_norm": 1.0011829137802124,
"learning_rate": 2.2533333333333333e-05,
"loss": 0.0846,
"step": 170
},
{
"epoch": 0.36585365853658536,
"grad_norm": 0.9370313286781311,
"learning_rate": 2.3866666666666666e-05,
"loss": 0.0927,
"step": 180
},
{
"epoch": 0.3861788617886179,
"grad_norm": 0.5689650774002075,
"learning_rate": 2.5200000000000003e-05,
"loss": 0.0768,
"step": 190
},
{
"epoch": 0.4065040650406504,
"grad_norm": 0.4828910529613495,
"learning_rate": 2.6533333333333332e-05,
"loss": 0.0811,
"step": 200
},
{
"epoch": 0.4268292682926829,
"grad_norm": 0.62357097864151,
"learning_rate": 2.786666666666667e-05,
"loss": 0.0885,
"step": 210
},
{
"epoch": 0.44715447154471544,
"grad_norm": 0.5117200016975403,
"learning_rate": 2.9199999999999998e-05,
"loss": 0.0866,
"step": 220
},
{
"epoch": 0.46747967479674796,
"grad_norm": 0.791079580783844,
"learning_rate": 3.0533333333333335e-05,
"loss": 0.0827,
"step": 230
},
{
"epoch": 0.4878048780487805,
"grad_norm": 0.5069907307624817,
"learning_rate": 3.1866666666666664e-05,
"loss": 0.0843,
"step": 240
},
{
"epoch": 0.508130081300813,
"grad_norm": 0.7022382616996765,
"learning_rate": 3.32e-05,
"loss": 0.0837,
"step": 250
},
{
"epoch": 0.5284552845528455,
"grad_norm": 0.7299018502235413,
"learning_rate": 3.453333333333334e-05,
"loss": 0.0773,
"step": 260
},
{
"epoch": 0.5487804878048781,
"grad_norm": 0.532425582408905,
"learning_rate": 3.586666666666667e-05,
"loss": 0.0881,
"step": 270
},
{
"epoch": 0.5691056910569106,
"grad_norm": 1.002967357635498,
"learning_rate": 3.72e-05,
"loss": 0.0768,
"step": 280
},
{
"epoch": 0.5894308943089431,
"grad_norm": 0.891633152961731,
"learning_rate": 3.853333333333334e-05,
"loss": 0.0913,
"step": 290
},
{
"epoch": 0.6097560975609756,
"grad_norm": 1.0027371644973755,
"learning_rate": 3.986666666666667e-05,
"loss": 0.0776,
"step": 300
},
{
"epoch": 0.6300813008130082,
"grad_norm": 0.8202036619186401,
"learning_rate": 4.12e-05,
"loss": 0.0849,
"step": 310
},
{
"epoch": 0.6504065040650406,
"grad_norm": 0.7221282720565796,
"learning_rate": 4.2533333333333335e-05,
"loss": 0.0787,
"step": 320
},
{
"epoch": 0.6707317073170732,
"grad_norm": 0.5890057682991028,
"learning_rate": 4.3866666666666665e-05,
"loss": 0.0836,
"step": 330
},
{
"epoch": 0.6910569105691057,
"grad_norm": 0.43137913942337036,
"learning_rate": 4.52e-05,
"loss": 0.0726,
"step": 340
},
{
"epoch": 0.7113821138211383,
"grad_norm": 0.59128737449646,
"learning_rate": 4.653333333333334e-05,
"loss": 0.0673,
"step": 350
},
{
"epoch": 0.7317073170731707,
"grad_norm": 0.5243335366249084,
"learning_rate": 4.7866666666666674e-05,
"loss": 0.0737,
"step": 360
},
{
"epoch": 0.7520325203252033,
"grad_norm": 0.4521591067314148,
"learning_rate": 4.92e-05,
"loss": 0.0712,
"step": 370
},
{
"epoch": 0.7723577235772358,
"grad_norm": 0.5582333207130432,
"learning_rate": 5.053333333333333e-05,
"loss": 0.0797,
"step": 380
},
{
"epoch": 0.7926829268292683,
"grad_norm": 0.4571887254714966,
"learning_rate": 5.1866666666666676e-05,
"loss": 0.0698,
"step": 390
},
{
"epoch": 0.8130081300813008,
"grad_norm": 0.8901530504226685,
"learning_rate": 5.3200000000000006e-05,
"loss": 0.0715,
"step": 400
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.7452099919319153,
"learning_rate": 5.4533333333333335e-05,
"loss": 0.0775,
"step": 410
},
{
"epoch": 0.8536585365853658,
"grad_norm": 0.5491011142730713,
"learning_rate": 5.5866666666666665e-05,
"loss": 0.0753,
"step": 420
},
{
"epoch": 0.8739837398373984,
"grad_norm": 0.6520193219184875,
"learning_rate": 5.72e-05,
"loss": 0.0757,
"step": 430
},
{
"epoch": 0.8943089430894309,
"grad_norm": 0.6066526770591736,
"learning_rate": 5.853333333333334e-05,
"loss": 0.0733,
"step": 440
},
{
"epoch": 0.9146341463414634,
"grad_norm": 0.633453905582428,
"learning_rate": 5.9866666666666674e-05,
"loss": 0.072,
"step": 450
},
{
"epoch": 0.9349593495934959,
"grad_norm": 0.5940499901771545,
"learning_rate": 6.12e-05,
"loss": 0.0808,
"step": 460
},
{
"epoch": 0.9552845528455285,
"grad_norm": 0.6593416929244995,
"learning_rate": 6.253333333333333e-05,
"loss": 0.0752,
"step": 470
},
{
"epoch": 0.975609756097561,
"grad_norm": 0.8113526701927185,
"learning_rate": 6.386666666666667e-05,
"loss": 0.072,
"step": 480
},
{
"epoch": 0.9959349593495935,
"grad_norm": 0.6880797147750854,
"learning_rate": 6.52e-05,
"loss": 0.0733,
"step": 490
},
{
"epoch": 1.016260162601626,
"grad_norm": 0.6124709248542786,
"learning_rate": 6.653333333333334e-05,
"loss": 0.0682,
"step": 500
},
{
"epoch": 1.0365853658536586,
"grad_norm": 0.6620608568191528,
"learning_rate": 6.786666666666667e-05,
"loss": 0.075,
"step": 510
},
{
"epoch": 1.056910569105691,
"grad_norm": 0.44583699107170105,
"learning_rate": 6.92e-05,
"loss": 0.0649,
"step": 520
},
{
"epoch": 1.0772357723577235,
"grad_norm": 0.5196840763092041,
"learning_rate": 7.053333333333334e-05,
"loss": 0.0698,
"step": 530
},
{
"epoch": 1.0975609756097562,
"grad_norm": 0.43396124243736267,
"learning_rate": 7.186666666666667e-05,
"loss": 0.0643,
"step": 540
},
{
"epoch": 1.1178861788617886,
"grad_norm": 0.6352965831756592,
"learning_rate": 7.32e-05,
"loss": 0.0656,
"step": 550
},
{
"epoch": 1.1382113821138211,
"grad_norm": 0.6376074552536011,
"learning_rate": 7.453333333333333e-05,
"loss": 0.0639,
"step": 560
},
{
"epoch": 1.1585365853658536,
"grad_norm": 0.7378827929496765,
"learning_rate": 7.586666666666668e-05,
"loss": 0.0707,
"step": 570
},
{
"epoch": 1.1788617886178863,
"grad_norm": 0.7607502341270447,
"learning_rate": 7.72e-05,
"loss": 0.0667,
"step": 580
},
{
"epoch": 1.1991869918699187,
"grad_norm": 0.4571978747844696,
"learning_rate": 7.853333333333334e-05,
"loss": 0.0727,
"step": 590
},
{
"epoch": 1.2195121951219512,
"grad_norm": 0.5919597148895264,
"learning_rate": 7.986666666666667e-05,
"loss": 0.0673,
"step": 600
},
{
"epoch": 1.2398373983739837,
"grad_norm": 0.46557170152664185,
"learning_rate": 8.120000000000001e-05,
"loss": 0.0738,
"step": 610
},
{
"epoch": 1.2601626016260163,
"grad_norm": 0.5049855709075928,
"learning_rate": 8.253333333333334e-05,
"loss": 0.062,
"step": 620
},
{
"epoch": 1.2804878048780488,
"grad_norm": 0.5219482779502869,
"learning_rate": 8.386666666666667e-05,
"loss": 0.0609,
"step": 630
},
{
"epoch": 1.3008130081300813,
"grad_norm": 0.5961984395980835,
"learning_rate": 8.52e-05,
"loss": 0.062,
"step": 640
},
{
"epoch": 1.321138211382114,
"grad_norm": 0.5225047469139099,
"learning_rate": 8.653333333333333e-05,
"loss": 0.0712,
"step": 650
},
{
"epoch": 1.3414634146341464,
"grad_norm": 0.5839872360229492,
"learning_rate": 8.786666666666667e-05,
"loss": 0.0625,
"step": 660
},
{
"epoch": 1.3617886178861789,
"grad_norm": 0.71927809715271,
"learning_rate": 8.92e-05,
"loss": 0.0755,
"step": 670
},
{
"epoch": 1.3821138211382114,
"grad_norm": 0.4162643551826477,
"learning_rate": 9.053333333333334e-05,
"loss": 0.0643,
"step": 680
},
{
"epoch": 1.4024390243902438,
"grad_norm": 0.514099657535553,
"learning_rate": 9.186666666666667e-05,
"loss": 0.0622,
"step": 690
},
{
"epoch": 1.4227642276422765,
"grad_norm": 0.5861640572547913,
"learning_rate": 9.320000000000002e-05,
"loss": 0.0682,
"step": 700
},
{
"epoch": 1.443089430894309,
"grad_norm": 0.484695702791214,
"learning_rate": 9.453333333333335e-05,
"loss": 0.0753,
"step": 710
},
{
"epoch": 1.4634146341463414,
"grad_norm": 0.3829363286495209,
"learning_rate": 9.586666666666667e-05,
"loss": 0.0615,
"step": 720
},
{
"epoch": 1.4837398373983741,
"grad_norm": 0.5547038912773132,
"learning_rate": 9.72e-05,
"loss": 0.0717,
"step": 730
},
{
"epoch": 1.5040650406504064,
"grad_norm": 0.4040966033935547,
"learning_rate": 9.853333333333333e-05,
"loss": 0.063,
"step": 740
},
{
"epoch": 1.524390243902439,
"grad_norm": 0.36058497428894043,
"learning_rate": 9.986666666666668e-05,
"loss": 0.0648,
"step": 750
},
{
"epoch": 1.5447154471544715,
"grad_norm": 0.5361801981925964,
"learning_rate": 9.999990157738453e-05,
"loss": 0.0653,
"step": 760
},
{
"epoch": 1.565040650406504,
"grad_norm": 0.4126802980899811,
"learning_rate": 9.999956135155687e-05,
"loss": 0.0608,
"step": 770
},
{
"epoch": 1.5853658536585367,
"grad_norm": 0.47281134128570557,
"learning_rate": 9.99989781090763e-05,
"loss": 0.0641,
"step": 780
},
{
"epoch": 1.6056910569105691,
"grad_norm": 0.682732105255127,
"learning_rate": 9.999815185277755e-05,
"loss": 0.0621,
"step": 790
},
{
"epoch": 1.6260162601626016,
"grad_norm": 0.5980079770088196,
"learning_rate": 9.999708258667652e-05,
"loss": 0.0718,
"step": 800
},
{
"epoch": 1.6463414634146343,
"grad_norm": 0.5243200659751892,
"learning_rate": 9.999577031597029e-05,
"loss": 0.0672,
"step": 810
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.7068093419075012,
"learning_rate": 9.999421504703696e-05,
"loss": 0.0758,
"step": 820
},
{
"epoch": 1.6869918699186992,
"grad_norm": 0.3850698173046112,
"learning_rate": 9.999241678743574e-05,
"loss": 0.0663,
"step": 830
},
{
"epoch": 1.7073170731707317,
"grad_norm": 0.5258147716522217,
"learning_rate": 9.999037554590683e-05,
"loss": 0.0567,
"step": 840
},
{
"epoch": 1.7276422764227641,
"grad_norm": 0.6071597933769226,
"learning_rate": 9.998809133237143e-05,
"loss": 0.0707,
"step": 850
},
{
"epoch": 1.7479674796747968,
"grad_norm": 0.48662713170051575,
"learning_rate": 9.998556415793169e-05,
"loss": 0.0627,
"step": 860
},
{
"epoch": 1.7682926829268293,
"grad_norm": 0.47866326570510864,
"learning_rate": 9.998279403487062e-05,
"loss": 0.0683,
"step": 870
},
{
"epoch": 1.7886178861788617,
"grad_norm": 0.5103737115859985,
"learning_rate": 9.997978097665205e-05,
"loss": 0.0599,
"step": 880
},
{
"epoch": 1.8089430894308944,
"grad_norm": 0.5845749378204346,
"learning_rate": 9.99765249979206e-05,
"loss": 0.06,
"step": 890
},
{
"epoch": 1.8292682926829267,
"grad_norm": 0.28062498569488525,
"learning_rate": 9.997302611450154e-05,
"loss": 0.065,
"step": 900
},
{
"epoch": 1.8495934959349594,
"grad_norm": 0.5082411766052246,
"learning_rate": 9.996928434340073e-05,
"loss": 0.0527,
"step": 910
},
{
"epoch": 1.8699186991869918,
"grad_norm": 0.3810727298259735,
"learning_rate": 9.996529970280462e-05,
"loss": 0.0579,
"step": 920
},
{
"epoch": 1.8902439024390243,
"grad_norm": 0.6368547677993774,
"learning_rate": 9.996107221208004e-05,
"loss": 0.0584,
"step": 930
},
{
"epoch": 1.910569105691057,
"grad_norm": 0.3547976613044739,
"learning_rate": 9.995660189177419e-05,
"loss": 0.0534,
"step": 940
},
{
"epoch": 1.9308943089430894,
"grad_norm": 0.40976643562316895,
"learning_rate": 9.995188876361451e-05,
"loss": 0.0594,
"step": 950
},
{
"epoch": 1.951219512195122,
"grad_norm": 0.33262038230895996,
"learning_rate": 9.994693285050857e-05,
"loss": 0.0499,
"step": 960
},
{
"epoch": 1.9715447154471546,
"grad_norm": 0.3865192234516144,
"learning_rate": 9.994173417654395e-05,
"loss": 0.0549,
"step": 970
},
{
"epoch": 1.9918699186991868,
"grad_norm": 0.36267194151878357,
"learning_rate": 9.993629276698821e-05,
"loss": 0.0508,
"step": 980
},
{
"epoch": 2.0121951219512195,
"grad_norm": 0.53200763463974,
"learning_rate": 9.993060864828858e-05,
"loss": 0.058,
"step": 990
},
{
"epoch": 2.032520325203252,
"grad_norm": 0.3534744381904602,
"learning_rate": 9.992468184807206e-05,
"loss": 0.0555,
"step": 1000
},
{
"epoch": 2.0528455284552845,
"grad_norm": 0.4305236041545868,
"learning_rate": 9.991851239514511e-05,
"loss": 0.0554,
"step": 1010
},
{
"epoch": 2.073170731707317,
"grad_norm": 0.45501708984375,
"learning_rate": 9.991210031949359e-05,
"loss": 0.0574,
"step": 1020
},
{
"epoch": 2.0934959349593494,
"grad_norm": 0.38146859407424927,
"learning_rate": 9.990544565228259e-05,
"loss": 0.0522,
"step": 1030
},
{
"epoch": 2.113821138211382,
"grad_norm": 0.3539182245731354,
"learning_rate": 9.989854842585631e-05,
"loss": 0.0557,
"step": 1040
},
{
"epoch": 2.1341463414634148,
"grad_norm": 0.3897205889225006,
"learning_rate": 9.989140867373783e-05,
"loss": 0.0534,
"step": 1050
},
{
"epoch": 2.154471544715447,
"grad_norm": 0.44654178619384766,
"learning_rate": 9.988402643062907e-05,
"loss": 0.0626,
"step": 1060
},
{
"epoch": 2.1747967479674797,
"grad_norm": 0.43859946727752686,
"learning_rate": 9.987640173241046e-05,
"loss": 0.0554,
"step": 1070
},
{
"epoch": 2.1951219512195124,
"grad_norm": 0.299344003200531,
"learning_rate": 9.986853461614093e-05,
"loss": 0.051,
"step": 1080
},
{
"epoch": 2.2154471544715446,
"grad_norm": 0.28412163257598877,
"learning_rate": 9.986042512005763e-05,
"loss": 0.0537,
"step": 1090
},
{
"epoch": 2.2357723577235773,
"grad_norm": 0.501313328742981,
"learning_rate": 9.985207328357573e-05,
"loss": 0.056,
"step": 1100
},
{
"epoch": 2.2560975609756095,
"grad_norm": 0.37945660948753357,
"learning_rate": 9.984347914728829e-05,
"loss": 0.052,
"step": 1110
},
{
"epoch": 2.2764227642276422,
"grad_norm": 0.5372085571289062,
"learning_rate": 9.983464275296605e-05,
"loss": 0.0528,
"step": 1120
},
{
"epoch": 2.296747967479675,
"grad_norm": 0.33128976821899414,
"learning_rate": 9.982556414355724e-05,
"loss": 0.0565,
"step": 1130
},
{
"epoch": 2.317073170731707,
"grad_norm": 0.2715194523334503,
"learning_rate": 9.981624336318726e-05,
"loss": 0.0573,
"step": 1140
},
{
"epoch": 2.33739837398374,
"grad_norm": 0.48618754744529724,
"learning_rate": 9.980668045715864e-05,
"loss": 0.0541,
"step": 1150
},
{
"epoch": 2.3577235772357725,
"grad_norm": 0.4522000253200531,
"learning_rate": 9.979687547195066e-05,
"loss": 0.0554,
"step": 1160
},
{
"epoch": 2.3780487804878048,
"grad_norm": 0.4867287576198578,
"learning_rate": 9.978682845521927e-05,
"loss": 0.053,
"step": 1170
},
{
"epoch": 2.3983739837398375,
"grad_norm": 0.35228079557418823,
"learning_rate": 9.977653945579673e-05,
"loss": 0.05,
"step": 1180
},
{
"epoch": 2.41869918699187,
"grad_norm": 0.35814642906188965,
"learning_rate": 9.976600852369144e-05,
"loss": 0.0534,
"step": 1190
},
{
"epoch": 2.4390243902439024,
"grad_norm": 0.5843226909637451,
"learning_rate": 9.975523571008769e-05,
"loss": 0.0524,
"step": 1200
},
{
"epoch": 2.459349593495935,
"grad_norm": 0.5159884691238403,
"learning_rate": 9.97442210673454e-05,
"loss": 0.05,
"step": 1210
},
{
"epoch": 2.4796747967479673,
"grad_norm": 0.5046380162239075,
"learning_rate": 9.973296464899988e-05,
"loss": 0.0486,
"step": 1220
},
{
"epoch": 2.5,
"grad_norm": 0.3927498757839203,
"learning_rate": 9.972146650976154e-05,
"loss": 0.0521,
"step": 1230
},
{
"epoch": 2.5203252032520327,
"grad_norm": 0.36474692821502686,
"learning_rate": 9.970972670551566e-05,
"loss": 0.0549,
"step": 1240
},
{
"epoch": 2.540650406504065,
"grad_norm": 0.35434460639953613,
"learning_rate": 9.969774529332212e-05,
"loss": 0.0604,
"step": 1250
},
{
"epoch": 2.5609756097560976,
"grad_norm": 0.3699084520339966,
"learning_rate": 9.968552233141504e-05,
"loss": 0.0547,
"step": 1260
},
{
"epoch": 2.58130081300813,
"grad_norm": 0.3049577474594116,
"learning_rate": 9.967305787920264e-05,
"loss": 0.0543,
"step": 1270
},
{
"epoch": 2.6016260162601625,
"grad_norm": 0.3516198992729187,
"learning_rate": 9.966035199726684e-05,
"loss": 0.0508,
"step": 1280
},
{
"epoch": 2.6219512195121952,
"grad_norm": 0.37977465987205505,
"learning_rate": 9.9647404747363e-05,
"loss": 0.0558,
"step": 1290
},
{
"epoch": 2.642276422764228,
"grad_norm": 0.4862557053565979,
"learning_rate": 9.96342161924196e-05,
"loss": 0.0527,
"step": 1300
},
{
"epoch": 2.66260162601626,
"grad_norm": 0.4220629334449768,
"learning_rate": 9.962078639653797e-05,
"loss": 0.0537,
"step": 1310
},
{
"epoch": 2.682926829268293,
"grad_norm": 0.29033005237579346,
"learning_rate": 9.960711542499202e-05,
"loss": 0.0542,
"step": 1320
},
{
"epoch": 2.703252032520325,
"grad_norm": 0.29148346185684204,
"learning_rate": 9.959320334422772e-05,
"loss": 0.046,
"step": 1330
},
{
"epoch": 2.7235772357723578,
"grad_norm": 0.260502427816391,
"learning_rate": 9.957905022186309e-05,
"loss": 0.0527,
"step": 1340
},
{
"epoch": 2.7439024390243905,
"grad_norm": 0.4924025535583496,
"learning_rate": 9.956465612668757e-05,
"loss": 0.0483,
"step": 1350
},
{
"epoch": 2.7642276422764227,
"grad_norm": 0.4993506968021393,
"learning_rate": 9.95500211286619e-05,
"loss": 0.0508,
"step": 1360
},
{
"epoch": 2.7845528455284554,
"grad_norm": 0.4157140552997589,
"learning_rate": 9.953514529891763e-05,
"loss": 0.0448,
"step": 1370
},
{
"epoch": 2.8048780487804876,
"grad_norm": 0.2626568377017975,
"learning_rate": 9.952002870975693e-05,
"loss": 0.0527,
"step": 1380
},
{
"epoch": 2.8252032520325203,
"grad_norm": 0.3102031946182251,
"learning_rate": 9.950467143465207e-05,
"loss": 0.0416,
"step": 1390
},
{
"epoch": 2.845528455284553,
"grad_norm": 0.32706791162490845,
"learning_rate": 9.94890735482452e-05,
"loss": 0.0511,
"step": 1400
},
{
"epoch": 2.8658536585365852,
"grad_norm": 0.2708079516887665,
"learning_rate": 9.947323512634788e-05,
"loss": 0.0486,
"step": 1410
},
{
"epoch": 2.886178861788618,
"grad_norm": 0.3637838363647461,
"learning_rate": 9.945715624594081e-05,
"loss": 0.0488,
"step": 1420
},
{
"epoch": 2.90650406504065,
"grad_norm": 0.3679792582988739,
"learning_rate": 9.944083698517339e-05,
"loss": 0.0476,
"step": 1430
},
{
"epoch": 2.926829268292683,
"grad_norm": 0.29563894867897034,
"learning_rate": 9.942427742336334e-05,
"loss": 0.0481,
"step": 1440
},
{
"epoch": 2.9471544715447155,
"grad_norm": 0.2425679713487625,
"learning_rate": 9.940747764099638e-05,
"loss": 0.0456,
"step": 1450
},
{
"epoch": 2.9674796747967482,
"grad_norm": 0.404369980096817,
"learning_rate": 9.939043771972574e-05,
"loss": 0.0463,
"step": 1460
},
{
"epoch": 2.9878048780487805,
"grad_norm": 0.3223326802253723,
"learning_rate": 9.937315774237186e-05,
"loss": 0.0468,
"step": 1470
},
{
"epoch": 3.008130081300813,
"grad_norm": 0.35848402976989746,
"learning_rate": 9.93556377929219e-05,
"loss": 0.0518,
"step": 1480
},
{
"epoch": 3.0284552845528454,
"grad_norm": 0.2589901089668274,
"learning_rate": 9.933787795652942e-05,
"loss": 0.0437,
"step": 1490
},
{
"epoch": 3.048780487804878,
"grad_norm": 0.4505438506603241,
"learning_rate": 9.931987831951386e-05,
"loss": 0.054,
"step": 1500
},
{
"epoch": 3.069105691056911,
"grad_norm": 0.41765499114990234,
"learning_rate": 9.930163896936027e-05,
"loss": 0.0515,
"step": 1510
},
{
"epoch": 3.089430894308943,
"grad_norm": 0.33510351181030273,
"learning_rate": 9.92831599947187e-05,
"loss": 0.0502,
"step": 1520
},
{
"epoch": 3.1097560975609757,
"grad_norm": 0.35163936018943787,
"learning_rate": 9.926444148540393e-05,
"loss": 0.0458,
"step": 1530
},
{
"epoch": 3.130081300813008,
"grad_norm": 0.393381267786026,
"learning_rate": 9.924548353239495e-05,
"loss": 0.053,
"step": 1540
},
{
"epoch": 3.1504065040650406,
"grad_norm": 0.27485236525535583,
"learning_rate": 9.922628622783451e-05,
"loss": 0.0479,
"step": 1550
},
{
"epoch": 3.1707317073170733,
"grad_norm": 0.23032088577747345,
"learning_rate": 9.920684966502878e-05,
"loss": 0.0465,
"step": 1560
},
{
"epoch": 3.1910569105691056,
"grad_norm": 0.36205798387527466,
"learning_rate": 9.918717393844669e-05,
"loss": 0.046,
"step": 1570
},
{
"epoch": 3.2113821138211383,
"grad_norm": 0.30545753240585327,
"learning_rate": 9.916725914371969e-05,
"loss": 0.0484,
"step": 1580
},
{
"epoch": 3.231707317073171,
"grad_norm": 0.3029427230358124,
"learning_rate": 9.914710537764117e-05,
"loss": 0.049,
"step": 1590
},
{
"epoch": 3.252032520325203,
"grad_norm": 0.21751540899276733,
"learning_rate": 9.912671273816601e-05,
"loss": 0.0437,
"step": 1600
},
{
"epoch": 3.272357723577236,
"grad_norm": 0.4366188943386078,
"learning_rate": 9.910608132441008e-05,
"loss": 0.0442,
"step": 1610
},
{
"epoch": 3.292682926829268,
"grad_norm": 0.25756415724754333,
"learning_rate": 9.908521123664981e-05,
"loss": 0.0398,
"step": 1620
},
{
"epoch": 3.313008130081301,
"grad_norm": 0.34685221314430237,
"learning_rate": 9.906410257632168e-05,
"loss": 0.0453,
"step": 1630
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.3353132903575897,
"learning_rate": 9.904275544602169e-05,
"loss": 0.0468,
"step": 1640
},
{
"epoch": 3.3536585365853657,
"grad_norm": 0.3785768747329712,
"learning_rate": 9.902116994950493e-05,
"loss": 0.0468,
"step": 1650
},
{
"epoch": 3.3739837398373984,
"grad_norm": 0.23636843264102936,
"learning_rate": 9.899934619168501e-05,
"loss": 0.0434,
"step": 1660
},
{
"epoch": 3.394308943089431,
"grad_norm": 0.25570765137672424,
"learning_rate": 9.89772842786336e-05,
"loss": 0.0476,
"step": 1670
},
{
"epoch": 3.4146341463414633,
"grad_norm": 0.389314740896225,
"learning_rate": 9.895498431757989e-05,
"loss": 0.0536,
"step": 1680
},
{
"epoch": 3.434959349593496,
"grad_norm": 0.35203060507774353,
"learning_rate": 9.893244641691006e-05,
"loss": 0.05,
"step": 1690
},
{
"epoch": 3.4552845528455283,
"grad_norm": 0.33974689245224,
"learning_rate": 9.890967068616677e-05,
"loss": 0.0479,
"step": 1700
},
{
"epoch": 3.475609756097561,
"grad_norm": 0.3649257719516754,
"learning_rate": 9.888665723604864e-05,
"loss": 0.0493,
"step": 1710
},
{
"epoch": 3.4959349593495936,
"grad_norm": 0.3207753002643585,
"learning_rate": 9.886340617840968e-05,
"loss": 0.0451,
"step": 1720
},
{
"epoch": 3.516260162601626,
"grad_norm": 0.40109390020370483,
"learning_rate": 9.883991762625876e-05,
"loss": 0.052,
"step": 1730
},
{
"epoch": 3.5365853658536586,
"grad_norm": 0.3837567865848541,
"learning_rate": 9.881619169375908e-05,
"loss": 0.0455,
"step": 1740
},
{
"epoch": 3.556910569105691,
"grad_norm": 0.327057421207428,
"learning_rate": 9.879222849622758e-05,
"loss": 0.0487,
"step": 1750
},
{
"epoch": 3.5772357723577235,
"grad_norm": 0.34808802604675293,
"learning_rate": 9.876802815013439e-05,
"loss": 0.049,
"step": 1760
},
{
"epoch": 3.597560975609756,
"grad_norm": 0.28107383847236633,
"learning_rate": 9.87435907731023e-05,
"loss": 0.0449,
"step": 1770
},
{
"epoch": 3.617886178861789,
"grad_norm": 0.41478756070137024,
"learning_rate": 9.871891648390614e-05,
"loss": 0.0465,
"step": 1780
},
{
"epoch": 3.638211382113821,
"grad_norm": 0.40074285864830017,
"learning_rate": 9.869400540247223e-05,
"loss": 0.0507,
"step": 1790
},
{
"epoch": 3.658536585365854,
"grad_norm": 0.45639923214912415,
"learning_rate": 9.866885764987776e-05,
"loss": 0.0483,
"step": 1800
},
{
"epoch": 3.678861788617886,
"grad_norm": 0.36190950870513916,
"learning_rate": 9.86434733483503e-05,
"loss": 0.0479,
"step": 1810
},
{
"epoch": 3.6991869918699187,
"grad_norm": 0.24315151572227478,
"learning_rate": 9.861785262126705e-05,
"loss": 0.0432,
"step": 1820
},
{
"epoch": 3.7195121951219514,
"grad_norm": 0.391323983669281,
"learning_rate": 9.85919955931544e-05,
"loss": 0.0439,
"step": 1830
},
{
"epoch": 3.7398373983739837,
"grad_norm": 0.3237808346748352,
"learning_rate": 9.856590238968721e-05,
"loss": 0.0488,
"step": 1840
},
{
"epoch": 3.7601626016260163,
"grad_norm": 0.3073022663593292,
"learning_rate": 9.853957313768824e-05,
"loss": 0.0442,
"step": 1850
},
{
"epoch": 3.7804878048780486,
"grad_norm": 0.30783456563949585,
"learning_rate": 9.851300796512755e-05,
"loss": 0.0515,
"step": 1860
},
{
"epoch": 3.8008130081300813,
"grad_norm": 0.2766580879688263,
"learning_rate": 9.848620700112188e-05,
"loss": 0.0484,
"step": 1870
},
{
"epoch": 3.821138211382114,
"grad_norm": 0.4244973063468933,
"learning_rate": 9.845917037593396e-05,
"loss": 0.0405,
"step": 1880
},
{
"epoch": 3.841463414634146,
"grad_norm": 0.29562023282051086,
"learning_rate": 9.843189822097196e-05,
"loss": 0.039,
"step": 1890
},
{
"epoch": 3.861788617886179,
"grad_norm": 0.29623448848724365,
"learning_rate": 9.84043906687888e-05,
"loss": 0.044,
"step": 1900
},
{
"epoch": 3.882113821138211,
"grad_norm": 0.3283941149711609,
"learning_rate": 9.837664785308149e-05,
"loss": 0.0449,
"step": 1910
},
{
"epoch": 3.902439024390244,
"grad_norm": 0.32358860969543457,
"learning_rate": 9.834866990869059e-05,
"loss": 0.0447,
"step": 1920
},
{
"epoch": 3.9227642276422765,
"grad_norm": 0.18848779797554016,
"learning_rate": 9.832045697159938e-05,
"loss": 0.0466,
"step": 1930
},
{
"epoch": 3.943089430894309,
"grad_norm": 0.38725459575653076,
"learning_rate": 9.829200917893334e-05,
"loss": 0.0446,
"step": 1940
},
{
"epoch": 3.9634146341463414,
"grad_norm": 0.3182893991470337,
"learning_rate": 9.826332666895944e-05,
"loss": 0.0422,
"step": 1950
},
{
"epoch": 3.983739837398374,
"grad_norm": 0.27023324370384216,
"learning_rate": 9.823440958108545e-05,
"loss": 0.0374,
"step": 1960
},
{
"epoch": 4.004065040650406,
"grad_norm": 0.47869518399238586,
"learning_rate": 9.820525805585927e-05,
"loss": 0.0466,
"step": 1970
},
{
"epoch": 4.024390243902439,
"grad_norm": 0.3792267441749573,
"learning_rate": 9.81758722349683e-05,
"loss": 0.0469,
"step": 1980
},
{
"epoch": 4.044715447154472,
"grad_norm": 0.3714332580566406,
"learning_rate": 9.814625226123862e-05,
"loss": 0.0398,
"step": 1990
},
{
"epoch": 4.065040650406504,
"grad_norm": 0.35444778203964233,
"learning_rate": 9.811639827863449e-05,
"loss": 0.0406,
"step": 2000
},
{
"epoch": 4.085365853658536,
"grad_norm": 0.23252159357070923,
"learning_rate": 9.808631043225741e-05,
"loss": 0.0378,
"step": 2010
},
{
"epoch": 4.105691056910569,
"grad_norm": 0.3677350580692291,
"learning_rate": 9.805598886834567e-05,
"loss": 0.041,
"step": 2020
},
{
"epoch": 4.126016260162602,
"grad_norm": 0.30878278613090515,
"learning_rate": 9.802543373427344e-05,
"loss": 0.0427,
"step": 2030
},
{
"epoch": 4.146341463414634,
"grad_norm": 0.24736954271793365,
"learning_rate": 9.799464517855018e-05,
"loss": 0.0475,
"step": 2040
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.3511386215686798,
"learning_rate": 9.79636233508198e-05,
"loss": 0.045,
"step": 2050
},
{
"epoch": 4.186991869918699,
"grad_norm": 0.41636860370635986,
"learning_rate": 9.793236840186005e-05,
"loss": 0.0392,
"step": 2060
},
{
"epoch": 4.2073170731707314,
"grad_norm": 0.4612429141998291,
"learning_rate": 9.790088048358175e-05,
"loss": 0.0462,
"step": 2070
},
{
"epoch": 4.227642276422764,
"grad_norm": 0.30272865295410156,
"learning_rate": 9.786915974902798e-05,
"loss": 0.0436,
"step": 2080
},
{
"epoch": 4.247967479674797,
"grad_norm": 0.39128997921943665,
"learning_rate": 9.783720635237343e-05,
"loss": 0.0366,
"step": 2090
},
{
"epoch": 4.2682926829268295,
"grad_norm": 0.2508282959461212,
"learning_rate": 9.780502044892362e-05,
"loss": 0.0383,
"step": 2100
},
{
"epoch": 4.288617886178862,
"grad_norm": 0.39258867502212524,
"learning_rate": 9.777260219511415e-05,
"loss": 0.0387,
"step": 2110
},
{
"epoch": 4.308943089430894,
"grad_norm": 0.25197938084602356,
"learning_rate": 9.773995174850989e-05,
"loss": 0.0361,
"step": 2120
},
{
"epoch": 4.329268292682927,
"grad_norm": 0.32829299569129944,
"learning_rate": 9.770706926780428e-05,
"loss": 0.0406,
"step": 2130
},
{
"epoch": 4.349593495934959,
"grad_norm": 0.33921363949775696,
"learning_rate": 9.767395491281855e-05,
"loss": 0.0389,
"step": 2140
},
{
"epoch": 4.369918699186992,
"grad_norm": 0.23240751028060913,
"learning_rate": 9.764060884450086e-05,
"loss": 0.0471,
"step": 2150
},
{
"epoch": 4.390243902439025,
"grad_norm": 0.3543383777141571,
"learning_rate": 9.76070312249257e-05,
"loss": 0.0397,
"step": 2160
},
{
"epoch": 4.4105691056910565,
"grad_norm": 0.2758557200431824,
"learning_rate": 9.757322221729283e-05,
"loss": 0.0361,
"step": 2170
},
{
"epoch": 4.430894308943089,
"grad_norm": 0.37993648648262024,
"learning_rate": 9.753918198592682e-05,
"loss": 0.0417,
"step": 2180
},
{
"epoch": 4.451219512195122,
"grad_norm": 0.2048022300004959,
"learning_rate": 9.750491069627593e-05,
"loss": 0.0374,
"step": 2190
},
{
"epoch": 4.471544715447155,
"grad_norm": 0.3218154013156891,
"learning_rate": 9.747040851491149e-05,
"loss": 0.037,
"step": 2200
},
{
"epoch": 4.491869918699187,
"grad_norm": 0.20155727863311768,
"learning_rate": 9.743567560952711e-05,
"loss": 0.0388,
"step": 2210
},
{
"epoch": 4.512195121951219,
"grad_norm": 0.2601895034313202,
"learning_rate": 9.740071214893773e-05,
"loss": 0.038,
"step": 2220
},
{
"epoch": 4.532520325203252,
"grad_norm": 0.18895843625068665,
"learning_rate": 9.736551830307892e-05,
"loss": 0.0402,
"step": 2230
},
{
"epoch": 4.5528455284552845,
"grad_norm": 0.25968900322914124,
"learning_rate": 9.733009424300597e-05,
"loss": 0.036,
"step": 2240
},
{
"epoch": 4.573170731707317,
"grad_norm": 0.22050248086452484,
"learning_rate": 9.729444014089314e-05,
"loss": 0.0399,
"step": 2250
},
{
"epoch": 4.59349593495935,
"grad_norm": 0.3891421854496002,
"learning_rate": 9.725855617003275e-05,
"loss": 0.0418,
"step": 2260
},
{
"epoch": 4.613821138211382,
"grad_norm": 0.2956850528717041,
"learning_rate": 9.72224425048344e-05,
"loss": 0.0378,
"step": 2270
},
{
"epoch": 4.634146341463414,
"grad_norm": 0.2368791252374649,
"learning_rate": 9.718609932082405e-05,
"loss": 0.041,
"step": 2280
},
{
"epoch": 4.654471544715447,
"grad_norm": 0.3309593200683594,
"learning_rate": 9.714952679464323e-05,
"loss": 0.0397,
"step": 2290
},
{
"epoch": 4.67479674796748,
"grad_norm": 0.32733872532844543,
"learning_rate": 9.711272510404816e-05,
"loss": 0.0382,
"step": 2300
},
{
"epoch": 4.695121951219512,
"grad_norm": 0.30426713824272156,
"learning_rate": 9.70756944279089e-05,
"loss": 0.0443,
"step": 2310
},
{
"epoch": 4.715447154471545,
"grad_norm": 0.23480118811130524,
"learning_rate": 9.70384349462084e-05,
"loss": 0.0346,
"step": 2320
},
{
"epoch": 4.735772357723577,
"grad_norm": 0.45616215467453003,
"learning_rate": 9.700094684004182e-05,
"loss": 0.0452,
"step": 2330
},
{
"epoch": 4.7560975609756095,
"grad_norm": 0.3060383200645447,
"learning_rate": 9.696323029161535e-05,
"loss": 0.0399,
"step": 2340
},
{
"epoch": 4.776422764227642,
"grad_norm": 0.410624235868454,
"learning_rate": 9.692528548424567e-05,
"loss": 0.0443,
"step": 2350
},
{
"epoch": 4.796747967479675,
"grad_norm": 0.38253292441368103,
"learning_rate": 9.688711260235872e-05,
"loss": 0.0377,
"step": 2360
},
{
"epoch": 4.817073170731708,
"grad_norm": 0.2503706216812134,
"learning_rate": 9.684871183148912e-05,
"loss": 0.0356,
"step": 2370
},
{
"epoch": 4.83739837398374,
"grad_norm": 0.2619319260120392,
"learning_rate": 9.681008335827898e-05,
"loss": 0.0361,
"step": 2380
},
{
"epoch": 4.857723577235772,
"grad_norm": 0.18718576431274414,
"learning_rate": 9.677122737047724e-05,
"loss": 0.0363,
"step": 2390
},
{
"epoch": 4.878048780487805,
"grad_norm": 0.27881988883018494,
"learning_rate": 9.673214405693857e-05,
"loss": 0.0361,
"step": 2400
},
{
"epoch": 4.8983739837398375,
"grad_norm": 0.22834157943725586,
"learning_rate": 9.669283360762258e-05,
"loss": 0.0303,
"step": 2410
},
{
"epoch": 4.91869918699187,
"grad_norm": 0.23578988015651703,
"learning_rate": 9.66532962135928e-05,
"loss": 0.0348,
"step": 2420
},
{
"epoch": 4.939024390243903,
"grad_norm": 0.3880244195461273,
"learning_rate": 9.661353206701582e-05,
"loss": 0.0328,
"step": 2430
},
{
"epoch": 4.959349593495935,
"grad_norm": 0.26335278153419495,
"learning_rate": 9.657354136116035e-05,
"loss": 0.0358,
"step": 2440
},
{
"epoch": 4.979674796747967,
"grad_norm": 0.37027862668037415,
"learning_rate": 9.653332429039625e-05,
"loss": 0.039,
"step": 2450
},
{
"epoch": 5.0,
"grad_norm": 0.29535719752311707,
"learning_rate": 9.649288105019356e-05,
"loss": 0.0394,
"step": 2460
},
{
"epoch": 5.020325203252033,
"grad_norm": 0.32351961731910706,
"learning_rate": 9.645221183712165e-05,
"loss": 0.0358,
"step": 2470
},
{
"epoch": 5.040650406504065,
"grad_norm": 0.2990739345550537,
"learning_rate": 9.641131684884817e-05,
"loss": 0.0349,
"step": 2480
},
{
"epoch": 5.060975609756097,
"grad_norm": 0.4529745876789093,
"learning_rate": 9.637019628413813e-05,
"loss": 0.0351,
"step": 2490
},
{
"epoch": 5.08130081300813,
"grad_norm": 0.2561455965042114,
"learning_rate": 9.632885034285291e-05,
"loss": 0.0368,
"step": 2500
},
{
"epoch": 5.1016260162601625,
"grad_norm": 0.3451431691646576,
"learning_rate": 9.628727922594931e-05,
"loss": 0.0353,
"step": 2510
},
{
"epoch": 5.121951219512195,
"grad_norm": 0.25922685861587524,
"learning_rate": 9.624548313547862e-05,
"loss": 0.0351,
"step": 2520
},
{
"epoch": 5.142276422764228,
"grad_norm": 0.29846322536468506,
"learning_rate": 9.620346227458547e-05,
"loss": 0.0327,
"step": 2530
},
{
"epoch": 5.16260162601626,
"grad_norm": 0.2221139669418335,
"learning_rate": 9.616121684750712e-05,
"loss": 0.0381,
"step": 2540
},
{
"epoch": 5.182926829268292,
"grad_norm": 0.26518428325653076,
"learning_rate": 9.611874705957215e-05,
"loss": 0.0381,
"step": 2550
},
{
"epoch": 5.203252032520325,
"grad_norm": 0.20282354950904846,
"learning_rate": 9.607605311719972e-05,
"loss": 0.0381,
"step": 2560
},
{
"epoch": 5.223577235772358,
"grad_norm": 0.25701841711997986,
"learning_rate": 9.603313522789841e-05,
"loss": 0.0355,
"step": 2570
},
{
"epoch": 5.2439024390243905,
"grad_norm": 0.33082619309425354,
"learning_rate": 9.598999360026529e-05,
"loss": 0.036,
"step": 2580
},
{
"epoch": 5.264227642276423,
"grad_norm": 0.23188990354537964,
"learning_rate": 9.59466284439849e-05,
"loss": 0.0368,
"step": 2590
},
{
"epoch": 5.284552845528455,
"grad_norm": 0.3070012927055359,
"learning_rate": 9.590303996982815e-05,
"loss": 0.0318,
"step": 2600
},
{
"epoch": 5.304878048780488,
"grad_norm": 0.2157573103904724,
"learning_rate": 9.585922838965145e-05,
"loss": 0.0311,
"step": 2610
},
{
"epoch": 5.32520325203252,
"grad_norm": 0.16244249045848846,
"learning_rate": 9.581519391639549e-05,
"loss": 0.0335,
"step": 2620
},
{
"epoch": 5.345528455284553,
"grad_norm": 0.23334026336669922,
"learning_rate": 9.577093676408439e-05,
"loss": 0.0293,
"step": 2630
},
{
"epoch": 5.365853658536586,
"grad_norm": 0.27338385581970215,
"learning_rate": 9.572645714782453e-05,
"loss": 0.0307,
"step": 2640
},
{
"epoch": 5.3861788617886175,
"grad_norm": 0.2784765958786011,
"learning_rate": 9.568175528380354e-05,
"loss": 0.0366,
"step": 2650
},
{
"epoch": 5.40650406504065,
"grad_norm": 0.3815922439098358,
"learning_rate": 9.56368313892893e-05,
"loss": 0.0393,
"step": 2660
},
{
"epoch": 5.426829268292683,
"grad_norm": 0.431367427110672,
"learning_rate": 9.55916856826288e-05,
"loss": 0.0372,
"step": 2670
},
{
"epoch": 5.4471544715447155,
"grad_norm": 0.3358304798603058,
"learning_rate": 9.554631838324713e-05,
"loss": 0.0398,
"step": 2680
},
{
"epoch": 5.467479674796748,
"grad_norm": 0.24542434513568878,
"learning_rate": 9.55007297116464e-05,
"loss": 0.0346,
"step": 2690
},
{
"epoch": 5.487804878048781,
"grad_norm": 0.35996758937835693,
"learning_rate": 9.545491988940472e-05,
"loss": 0.0389,
"step": 2700
},
{
"epoch": 5.508130081300813,
"grad_norm": 0.34098583459854126,
"learning_rate": 9.540888913917501e-05,
"loss": 0.0399,
"step": 2710
},
{
"epoch": 5.528455284552845,
"grad_norm": 0.4673711955547333,
"learning_rate": 9.536263768468401e-05,
"loss": 0.033,
"step": 2720
},
{
"epoch": 5.548780487804878,
"grad_norm": 0.24956433475017548,
"learning_rate": 9.531616575073117e-05,
"loss": 0.0364,
"step": 2730
},
{
"epoch": 5.569105691056911,
"grad_norm": 0.3244880139827728,
"learning_rate": 9.526947356318754e-05,
"loss": 0.0365,
"step": 2740
},
{
"epoch": 5.5894308943089435,
"grad_norm": 0.28384968638420105,
"learning_rate": 9.52225613489947e-05,
"loss": 0.0331,
"step": 2750
},
{
"epoch": 5.609756097560975,
"grad_norm": 0.20069807767868042,
"learning_rate": 9.517542933616365e-05,
"loss": 0.0335,
"step": 2760
},
{
"epoch": 5.630081300813008,
"grad_norm": 0.28787484765052795,
"learning_rate": 9.512807775377366e-05,
"loss": 0.0376,
"step": 2770
},
{
"epoch": 5.650406504065041,
"grad_norm": 0.3660148084163666,
"learning_rate": 9.508050683197121e-05,
"loss": 0.0411,
"step": 2780
},
{
"epoch": 5.670731707317073,
"grad_norm": 0.2562386095523834,
"learning_rate": 9.503271680196888e-05,
"loss": 0.0394,
"step": 2790
},
{
"epoch": 5.691056910569106,
"grad_norm": 0.3705272674560547,
"learning_rate": 9.498470789604413e-05,
"loss": 0.0393,
"step": 2800
},
{
"epoch": 5.711382113821138,
"grad_norm": 0.23900169134140015,
"learning_rate": 9.49364803475383e-05,
"loss": 0.0362,
"step": 2810
},
{
"epoch": 5.7317073170731705,
"grad_norm": 0.3208228349685669,
"learning_rate": 9.48880343908554e-05,
"loss": 0.0334,
"step": 2820
},
{
"epoch": 5.752032520325203,
"grad_norm": 0.22293923795223236,
"learning_rate": 9.4839370261461e-05,
"loss": 0.0334,
"step": 2830
},
{
"epoch": 5.772357723577236,
"grad_norm": 0.24014882743358612,
"learning_rate": 9.479048819588098e-05,
"loss": 0.0375,
"step": 2840
},
{
"epoch": 5.7926829268292686,
"grad_norm": 0.2373969554901123,
"learning_rate": 9.474138843170063e-05,
"loss": 0.0361,
"step": 2850
},
{
"epoch": 5.8130081300813,
"grad_norm": 0.22191618382930756,
"learning_rate": 9.46920712075632e-05,
"loss": 0.0327,
"step": 2860
},
{
"epoch": 5.833333333333333,
"grad_norm": 0.21625275909900665,
"learning_rate": 9.464253676316893e-05,
"loss": 0.03,
"step": 2870
},
{
"epoch": 5.853658536585366,
"grad_norm": 0.21678130328655243,
"learning_rate": 9.459278533927384e-05,
"loss": 0.0314,
"step": 2880
},
{
"epoch": 5.873983739837398,
"grad_norm": 0.26047956943511963,
"learning_rate": 9.454281717768854e-05,
"loss": 0.0329,
"step": 2890
},
{
"epoch": 5.894308943089431,
"grad_norm": 0.24275684356689453,
"learning_rate": 9.449263252127708e-05,
"loss": 0.0352,
"step": 2900
},
{
"epoch": 5.914634146341464,
"grad_norm": 0.3038444519042969,
"learning_rate": 9.444223161395573e-05,
"loss": 0.0405,
"step": 2910
},
{
"epoch": 5.934959349593496,
"grad_norm": 0.21918287873268127,
"learning_rate": 9.439161470069184e-05,
"loss": 0.0322,
"step": 2920
},
{
"epoch": 5.955284552845528,
"grad_norm": 0.3447254002094269,
"learning_rate": 9.43407820275026e-05,
"loss": 0.0302,
"step": 2930
},
{
"epoch": 5.975609756097561,
"grad_norm": 0.23503296077251434,
"learning_rate": 9.428973384145396e-05,
"loss": 0.0322,
"step": 2940
},
{
"epoch": 5.995934959349594,
"grad_norm": 0.21576885879039764,
"learning_rate": 9.423847039065922e-05,
"loss": 0.0353,
"step": 2950
},
{
"epoch": 6.016260162601626,
"grad_norm": 0.23675313591957092,
"learning_rate": 9.418699192427805e-05,
"loss": 0.0378,
"step": 2960
},
{
"epoch": 6.036585365853658,
"grad_norm": 0.16742359101772308,
"learning_rate": 9.41352986925151e-05,
"loss": 0.0372,
"step": 2970
},
{
"epoch": 6.056910569105691,
"grad_norm": 0.21645894646644592,
"learning_rate": 9.408339094661895e-05,
"loss": 0.0329,
"step": 2980
},
{
"epoch": 6.0772357723577235,
"grad_norm": 0.2730005383491516,
"learning_rate": 9.40312689388807e-05,
"loss": 0.0348,
"step": 2990
},
{
"epoch": 6.097560975609756,
"grad_norm": 0.2974849343299866,
"learning_rate": 9.397893292263292e-05,
"loss": 0.0351,
"step": 3000
},
{
"epoch": 6.117886178861789,
"grad_norm": 0.2710169851779938,
"learning_rate": 9.392638315224829e-05,
"loss": 0.0342,
"step": 3010
},
{
"epoch": 6.138211382113822,
"grad_norm": 0.2089296132326126,
"learning_rate": 9.387361988313846e-05,
"loss": 0.0388,
"step": 3020
},
{
"epoch": 6.158536585365853,
"grad_norm": 0.2190776765346527,
"learning_rate": 9.38206433717527e-05,
"loss": 0.0313,
"step": 3030
},
{
"epoch": 6.178861788617886,
"grad_norm": 0.29918304085731506,
"learning_rate": 9.376745387557681e-05,
"loss": 0.0289,
"step": 3040
},
{
"epoch": 6.199186991869919,
"grad_norm": 0.39586132764816284,
"learning_rate": 9.371405165313169e-05,
"loss": 0.0392,
"step": 3050
},
{
"epoch": 6.219512195121951,
"grad_norm": 0.2960090935230255,
"learning_rate": 9.366043696397222e-05,
"loss": 0.0374,
"step": 3060
},
{
"epoch": 6.239837398373984,
"grad_norm": 0.2631392478942871,
"learning_rate": 9.360661006868592e-05,
"loss": 0.0336,
"step": 3070
},
{
"epoch": 6.260162601626016,
"grad_norm": 0.2667069137096405,
"learning_rate": 9.355257122889173e-05,
"loss": 0.0308,
"step": 3080
},
{
"epoch": 6.280487804878049,
"grad_norm": 0.1516672670841217,
"learning_rate": 9.349832070723871e-05,
"loss": 0.0316,
"step": 3090
},
{
"epoch": 6.300813008130081,
"grad_norm": 0.22945436835289001,
"learning_rate": 9.34438587674048e-05,
"loss": 0.0329,
"step": 3100
},
{
"epoch": 6.321138211382114,
"grad_norm": 0.19599388539791107,
"learning_rate": 9.338918567409545e-05,
"loss": 0.0349,
"step": 3110
},
{
"epoch": 6.341463414634147,
"grad_norm": 0.19596216082572937,
"learning_rate": 9.333430169304247e-05,
"loss": 0.0285,
"step": 3120
},
{
"epoch": 6.361788617886178,
"grad_norm": 0.3172271251678467,
"learning_rate": 9.327920709100259e-05,
"loss": 0.035,
"step": 3130
},
{
"epoch": 6.382113821138211,
"grad_norm": 0.29143592715263367,
"learning_rate": 9.322390213575631e-05,
"loss": 0.0339,
"step": 3140
},
{
"epoch": 6.402439024390244,
"grad_norm": 0.21700842678546906,
"learning_rate": 9.316838709610648e-05,
"loss": 0.0307,
"step": 3150
},
{
"epoch": 6.4227642276422765,
"grad_norm": 0.33240920305252075,
"learning_rate": 9.311266224187706e-05,
"loss": 0.0317,
"step": 3160
},
{
"epoch": 6.443089430894309,
"grad_norm": 0.25290894508361816,
"learning_rate": 9.305672784391175e-05,
"loss": 0.0333,
"step": 3170
},
{
"epoch": 6.463414634146342,
"grad_norm": 0.30824029445648193,
"learning_rate": 9.300058417407276e-05,
"loss": 0.0284,
"step": 3180
},
{
"epoch": 6.483739837398374,
"grad_norm": 0.24259643256664276,
"learning_rate": 9.29442315052394e-05,
"loss": 0.0306,
"step": 3190
},
{
"epoch": 6.504065040650406,
"grad_norm": 0.2715694010257721,
"learning_rate": 9.288767011130684e-05,
"loss": 0.0372,
"step": 3200
},
{
"epoch": 6.524390243902439,
"grad_norm": 0.22602279484272003,
"learning_rate": 9.283090026718466e-05,
"loss": 0.0332,
"step": 3210
},
{
"epoch": 6.544715447154472,
"grad_norm": 0.24035237729549408,
"learning_rate": 9.277392224879568e-05,
"loss": 0.0301,
"step": 3220
},
{
"epoch": 6.565040650406504,
"grad_norm": 0.2930043637752533,
"learning_rate": 9.271673633307445e-05,
"loss": 0.0343,
"step": 3230
},
{
"epoch": 6.585365853658536,
"grad_norm": 0.27956822514533997,
"learning_rate": 9.265934279796602e-05,
"loss": 0.0266,
"step": 3240
},
{
"epoch": 6.605691056910569,
"grad_norm": 0.2060442715883255,
"learning_rate": 9.260174192242453e-05,
"loss": 0.0282,
"step": 3250
},
{
"epoch": 6.626016260162602,
"grad_norm": 0.18642476201057434,
"learning_rate": 9.254393398641185e-05,
"loss": 0.0384,
"step": 3260
},
{
"epoch": 6.646341463414634,
"grad_norm": 0.28357410430908203,
"learning_rate": 9.248591927089628e-05,
"loss": 0.0362,
"step": 3270
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.23892942070960999,
"learning_rate": 9.242769805785115e-05,
"loss": 0.032,
"step": 3280
},
{
"epoch": 6.6869918699187,
"grad_norm": 0.3044309616088867,
"learning_rate": 9.236927063025342e-05,
"loss": 0.0327,
"step": 3290
},
{
"epoch": 6.7073170731707314,
"grad_norm": 0.300341933965683,
"learning_rate": 9.231063727208234e-05,
"loss": 0.03,
"step": 3300
},
{
"epoch": 6.727642276422764,
"grad_norm": 0.32395273447036743,
"learning_rate": 9.225179826831807e-05,
"loss": 0.0282,
"step": 3310
},
{
"epoch": 6.747967479674797,
"grad_norm": 0.2567685842514038,
"learning_rate": 9.219275390494024e-05,
"loss": 0.0302,
"step": 3320
},
{
"epoch": 6.7682926829268295,
"grad_norm": 0.2030506134033203,
"learning_rate": 9.213350446892668e-05,
"loss": 0.0318,
"step": 3330
},
{
"epoch": 6.788617886178862,
"grad_norm": 0.22847704589366913,
"learning_rate": 9.207405024825186e-05,
"loss": 0.0311,
"step": 3340
},
{
"epoch": 6.808943089430894,
"grad_norm": 0.18336626887321472,
"learning_rate": 9.201439153188569e-05,
"loss": 0.032,
"step": 3350
},
{
"epoch": 6.829268292682927,
"grad_norm": 0.21650265157222748,
"learning_rate": 9.19545286097919e-05,
"loss": 0.0271,
"step": 3360
},
{
"epoch": 6.849593495934959,
"grad_norm": 0.2715393304824829,
"learning_rate": 9.189446177292679e-05,
"loss": 0.042,
"step": 3370
},
{
"epoch": 6.869918699186992,
"grad_norm": 0.3490676283836365,
"learning_rate": 9.183419131323778e-05,
"loss": 0.0332,
"step": 3380
},
{
"epoch": 6.890243902439025,
"grad_norm": 0.22679398953914642,
"learning_rate": 9.177371752366191e-05,
"loss": 0.0354,
"step": 3390
},
{
"epoch": 6.9105691056910565,
"grad_norm": 0.22772999107837677,
"learning_rate": 9.171304069812454e-05,
"loss": 0.0338,
"step": 3400
},
{
"epoch": 6.930894308943089,
"grad_norm": 0.21342279016971588,
"learning_rate": 9.165216113153782e-05,
"loss": 0.0342,
"step": 3410
},
{
"epoch": 6.951219512195122,
"grad_norm": 0.22726966440677643,
"learning_rate": 9.159107911979936e-05,
"loss": 0.0315,
"step": 3420
},
{
"epoch": 6.971544715447155,
"grad_norm": 0.2902209758758545,
"learning_rate": 9.152979495979063e-05,
"loss": 0.0303,
"step": 3430
},
{
"epoch": 6.991869918699187,
"grad_norm": 0.24953265488147736,
"learning_rate": 9.146830894937571e-05,
"loss": 0.0315,
"step": 3440
},
{
"epoch": 7.012195121951219,
"grad_norm": 0.21846653521060944,
"learning_rate": 9.140662138739969e-05,
"loss": 0.0263,
"step": 3450
},
{
"epoch": 7.032520325203252,
"grad_norm": 0.212110698223114,
"learning_rate": 9.134473257368732e-05,
"loss": 0.0301,
"step": 3460
},
{
"epoch": 7.0528455284552845,
"grad_norm": 0.2702498733997345,
"learning_rate": 9.128264280904145e-05,
"loss": 0.0255,
"step": 3470
},
{
"epoch": 7.073170731707317,
"grad_norm": 0.30093133449554443,
"learning_rate": 9.122035239524169e-05,
"loss": 0.0349,
"step": 3480
},
{
"epoch": 7.09349593495935,
"grad_norm": 0.2671796977519989,
"learning_rate": 9.115786163504285e-05,
"loss": 0.026,
"step": 3490
},
{
"epoch": 7.1138211382113825,
"grad_norm": 0.25653183460235596,
"learning_rate": 9.10951708321735e-05,
"loss": 0.0346,
"step": 3500
},
{
"epoch": 7.134146341463414,
"grad_norm": 0.3087276518344879,
"learning_rate": 9.10322802913345e-05,
"loss": 0.032,
"step": 3510
},
{
"epoch": 7.154471544715447,
"grad_norm": 0.21461734175682068,
"learning_rate": 9.096919031819751e-05,
"loss": 0.0317,
"step": 3520
},
{
"epoch": 7.17479674796748,
"grad_norm": 0.18722882866859436,
"learning_rate": 9.090590121940348e-05,
"loss": 0.0262,
"step": 3530
},
{
"epoch": 7.195121951219512,
"grad_norm": 0.2623171806335449,
"learning_rate": 9.084241330256121e-05,
"loss": 0.0329,
"step": 3540
},
{
"epoch": 7.215447154471545,
"grad_norm": 0.15801429748535156,
"learning_rate": 9.077872687624586e-05,
"loss": 0.0314,
"step": 3550
},
{
"epoch": 7.235772357723577,
"grad_norm": 0.3193058371543884,
"learning_rate": 9.071484224999735e-05,
"loss": 0.0336,
"step": 3560
},
{
"epoch": 7.2560975609756095,
"grad_norm": 0.22909541428089142,
"learning_rate": 9.0650759734319e-05,
"loss": 0.0324,
"step": 3570
},
{
"epoch": 7.276422764227642,
"grad_norm": 0.2797752320766449,
"learning_rate": 9.05864796406759e-05,
"loss": 0.0322,
"step": 3580
},
{
"epoch": 7.296747967479675,
"grad_norm": 0.19688351452350616,
"learning_rate": 9.052200228149343e-05,
"loss": 0.0272,
"step": 3590
},
{
"epoch": 7.317073170731708,
"grad_norm": 0.30110496282577515,
"learning_rate": 9.04573279701558e-05,
"loss": 0.0308,
"step": 3600
},
{
"epoch": 7.33739837398374,
"grad_norm": 0.19201114773750305,
"learning_rate": 9.039245702100448e-05,
"loss": 0.0257,
"step": 3610
},
{
"epoch": 7.357723577235772,
"grad_norm": 0.2802717387676239,
"learning_rate": 9.032738974933664e-05,
"loss": 0.0289,
"step": 3620
},
{
"epoch": 7.378048780487805,
"grad_norm": 0.2547508180141449,
"learning_rate": 9.026212647140365e-05,
"loss": 0.0285,
"step": 3630
},
{
"epoch": 7.3983739837398375,
"grad_norm": 0.34926745295524597,
"learning_rate": 9.019666750440956e-05,
"loss": 0.0263,
"step": 3640
},
{
"epoch": 7.41869918699187,
"grad_norm": 0.1898447424173355,
"learning_rate": 9.013101316650956e-05,
"loss": 0.0351,
"step": 3650
},
{
"epoch": 7.439024390243903,
"grad_norm": 0.33788618445396423,
"learning_rate": 9.00651637768084e-05,
"loss": 0.0243,
"step": 3660
},
{
"epoch": 7.459349593495935,
"grad_norm": 0.2711202800273895,
"learning_rate": 8.999911965535885e-05,
"loss": 0.0289,
"step": 3670
},
{
"epoch": 7.479674796747967,
"grad_norm": 0.22120773792266846,
"learning_rate": 8.993288112316014e-05,
"loss": 0.0313,
"step": 3680
},
{
"epoch": 7.5,
"grad_norm": 0.17247223854064941,
"learning_rate": 8.986644850215644e-05,
"loss": 0.0317,
"step": 3690
},
{
"epoch": 7.520325203252033,
"grad_norm": 0.2290961742401123,
"learning_rate": 8.979982211523523e-05,
"loss": 0.0308,
"step": 3700
},
{
"epoch": 7.540650406504065,
"grad_norm": 0.2302488088607788,
"learning_rate": 8.97330022862258e-05,
"loss": 0.0375,
"step": 3710
},
{
"epoch": 7.560975609756097,
"grad_norm": 0.2837287485599518,
"learning_rate": 8.96659893398976e-05,
"loss": 0.0364,
"step": 3720
},
{
"epoch": 7.58130081300813,
"grad_norm": 0.19260552525520325,
"learning_rate": 8.959878360195876e-05,
"loss": 0.0254,
"step": 3730
},
{
"epoch": 7.6016260162601625,
"grad_norm": 0.1930834949016571,
"learning_rate": 8.953138539905438e-05,
"loss": 0.0306,
"step": 3740
},
{
"epoch": 7.621951219512195,
"grad_norm": 0.2278723120689392,
"learning_rate": 8.946379505876506e-05,
"loss": 0.0325,
"step": 3750
},
{
"epoch": 7.642276422764228,
"grad_norm": 0.3849963843822479,
"learning_rate": 8.939601290960527e-05,
"loss": 0.0296,
"step": 3760
},
{
"epoch": 7.66260162601626,
"grad_norm": 0.34938275814056396,
"learning_rate": 8.932803928102167e-05,
"loss": 0.0415,
"step": 3770
},
{
"epoch": 7.682926829268292,
"grad_norm": 0.2133302241563797,
"learning_rate": 8.925987450339168e-05,
"loss": 0.0309,
"step": 3780
},
{
"epoch": 7.703252032520325,
"grad_norm": 0.1982925832271576,
"learning_rate": 8.919151890802172e-05,
"loss": 0.0261,
"step": 3790
},
{
"epoch": 7.723577235772358,
"grad_norm": 0.21407748758792877,
"learning_rate": 8.912297282714564e-05,
"loss": 0.0315,
"step": 3800
},
{
"epoch": 7.7439024390243905,
"grad_norm": 0.2122614085674286,
"learning_rate": 8.905423659392316e-05,
"loss": 0.0268,
"step": 3810
},
{
"epoch": 7.764227642276423,
"grad_norm": 0.2770064175128937,
"learning_rate": 8.898531054243822e-05,
"loss": 0.0331,
"step": 3820
},
{
"epoch": 7.784552845528455,
"grad_norm": 0.25917738676071167,
"learning_rate": 8.891619500769729e-05,
"loss": 0.0339,
"step": 3830
},
{
"epoch": 7.804878048780488,
"grad_norm": 0.20752471685409546,
"learning_rate": 8.884689032562785e-05,
"loss": 0.0282,
"step": 3840
},
{
"epoch": 7.82520325203252,
"grad_norm": 0.1928926408290863,
"learning_rate": 8.87773968330767e-05,
"loss": 0.0257,
"step": 3850
},
{
"epoch": 7.845528455284553,
"grad_norm": 0.18641775846481323,
"learning_rate": 8.870771486780832e-05,
"loss": 0.0288,
"step": 3860
},
{
"epoch": 7.865853658536586,
"grad_norm": 0.2024519294500351,
"learning_rate": 8.863784476850322e-05,
"loss": 0.0264,
"step": 3870
},
{
"epoch": 7.886178861788618,
"grad_norm": 0.37722542881965637,
"learning_rate": 8.856778687475635e-05,
"loss": 0.0265,
"step": 3880
},
{
"epoch": 7.90650406504065,
"grad_norm": 0.20428268611431122,
"learning_rate": 8.849754152707541e-05,
"loss": 0.0269,
"step": 3890
},
{
"epoch": 7.926829268292683,
"grad_norm": 0.20872460305690765,
"learning_rate": 8.842710906687916e-05,
"loss": 0.0294,
"step": 3900
},
{
"epoch": 7.9471544715447155,
"grad_norm": 0.24264350533485413,
"learning_rate": 8.83564898364958e-05,
"loss": 0.0302,
"step": 3910
},
{
"epoch": 7.967479674796748,
"grad_norm": 0.3859758973121643,
"learning_rate": 8.828568417916136e-05,
"loss": 0.0294,
"step": 3920
},
{
"epoch": 7.987804878048781,
"grad_norm": 0.4600236415863037,
"learning_rate": 8.821469243901794e-05,
"loss": 0.0316,
"step": 3930
},
{
"epoch": 8.008130081300813,
"grad_norm": 0.30919063091278076,
"learning_rate": 8.814351496111201e-05,
"loss": 0.0335,
"step": 3940
},
{
"epoch": 8.028455284552846,
"grad_norm": 0.22191697359085083,
"learning_rate": 8.807215209139293e-05,
"loss": 0.0315,
"step": 3950
},
{
"epoch": 8.048780487804878,
"grad_norm": 0.30846142768859863,
"learning_rate": 8.8000604176711e-05,
"loss": 0.0311,
"step": 3960
},
{
"epoch": 8.06910569105691,
"grad_norm": 0.195384681224823,
"learning_rate": 8.792887156481598e-05,
"loss": 0.0313,
"step": 3970
},
{
"epoch": 8.089430894308943,
"grad_norm": 0.19313912093639374,
"learning_rate": 8.785695460435534e-05,
"loss": 0.0263,
"step": 3980
},
{
"epoch": 8.109756097560975,
"grad_norm": 0.17431680858135223,
"learning_rate": 8.778485364487248e-05,
"loss": 0.0247,
"step": 3990
},
{
"epoch": 8.130081300813009,
"grad_norm": 0.3186069428920746,
"learning_rate": 8.771256903680519e-05,
"loss": 0.031,
"step": 4000
},
{
"epoch": 8.15040650406504,
"grad_norm": 0.31058987975120544,
"learning_rate": 8.764010113148382e-05,
"loss": 0.0306,
"step": 4010
},
{
"epoch": 8.170731707317072,
"grad_norm": 0.18163661658763885,
"learning_rate": 8.756745028112959e-05,
"loss": 0.0281,
"step": 4020
},
{
"epoch": 8.191056910569106,
"grad_norm": 0.22458802163600922,
"learning_rate": 8.749461683885296e-05,
"loss": 0.0312,
"step": 4030
},
{
"epoch": 8.211382113821138,
"grad_norm": 0.2205217480659485,
"learning_rate": 8.742160115865179e-05,
"loss": 0.0318,
"step": 4040
},
{
"epoch": 8.231707317073171,
"grad_norm": 0.23504704236984253,
"learning_rate": 8.734840359540974e-05,
"loss": 0.0298,
"step": 4050
},
{
"epoch": 8.252032520325203,
"grad_norm": 0.2698604166507721,
"learning_rate": 8.727502450489446e-05,
"loss": 0.0279,
"step": 4060
},
{
"epoch": 8.272357723577235,
"grad_norm": 0.25933247804641724,
"learning_rate": 8.720146424375591e-05,
"loss": 0.0292,
"step": 4070
},
{
"epoch": 8.292682926829269,
"grad_norm": 0.2248280793428421,
"learning_rate": 8.712772316952458e-05,
"loss": 0.021,
"step": 4080
},
{
"epoch": 8.3130081300813,
"grad_norm": 0.14027050137519836,
"learning_rate": 8.705380164060982e-05,
"loss": 0.0257,
"step": 4090
},
{
"epoch": 8.333333333333334,
"grad_norm": 0.17492854595184326,
"learning_rate": 8.697970001629799e-05,
"loss": 0.0309,
"step": 4100
},
{
"epoch": 8.353658536585366,
"grad_norm": 0.20115551352500916,
"learning_rate": 8.690541865675084e-05,
"loss": 0.0272,
"step": 4110
},
{
"epoch": 8.373983739837398,
"grad_norm": 0.2150316685438156,
"learning_rate": 8.68309579230037e-05,
"loss": 0.0267,
"step": 4120
},
{
"epoch": 8.394308943089431,
"grad_norm": 0.26918819546699524,
"learning_rate": 8.675631817696372e-05,
"loss": 0.0248,
"step": 4130
},
{
"epoch": 8.414634146341463,
"grad_norm": 0.26900121569633484,
"learning_rate": 8.668149978140808e-05,
"loss": 0.03,
"step": 4140
},
{
"epoch": 8.434959349593496,
"grad_norm": 0.2637103199958801,
"learning_rate": 8.66065030999823e-05,
"loss": 0.0251,
"step": 4150
},
{
"epoch": 8.455284552845528,
"grad_norm": 0.19074967503547668,
"learning_rate": 8.653132849719845e-05,
"loss": 0.0231,
"step": 4160
},
{
"epoch": 8.475609756097562,
"grad_norm": 0.2522426247596741,
"learning_rate": 8.64559763384333e-05,
"loss": 0.0297,
"step": 4170
},
{
"epoch": 8.495934959349594,
"grad_norm": 0.2497992217540741,
"learning_rate": 8.638044698992669e-05,
"loss": 0.0277,
"step": 4180
},
{
"epoch": 8.516260162601625,
"grad_norm": 0.2424316704273224,
"learning_rate": 8.630474081877959e-05,
"loss": 0.0313,
"step": 4190
},
{
"epoch": 8.536585365853659,
"grad_norm": 0.23851478099822998,
"learning_rate": 8.62288581929525e-05,
"loss": 0.0267,
"step": 4200
},
{
"epoch": 8.55691056910569,
"grad_norm": 0.2154664844274521,
"learning_rate": 8.615279948126343e-05,
"loss": 0.028,
"step": 4210
},
{
"epoch": 8.577235772357724,
"grad_norm": 0.19693239033222198,
"learning_rate": 8.60765650533863e-05,
"loss": 0.0257,
"step": 4220
},
{
"epoch": 8.597560975609756,
"grad_norm": 0.2548753321170807,
"learning_rate": 8.60001552798491e-05,
"loss": 0.023,
"step": 4230
},
{
"epoch": 8.617886178861788,
"grad_norm": 0.23749999701976776,
"learning_rate": 8.592357053203202e-05,
"loss": 0.0273,
"step": 4240
},
{
"epoch": 8.638211382113822,
"grad_norm": 0.23216712474822998,
"learning_rate": 8.58468111821657e-05,
"loss": 0.0284,
"step": 4250
},
{
"epoch": 8.658536585365853,
"grad_norm": 0.17603376507759094,
"learning_rate": 8.576987760332943e-05,
"loss": 0.0296,
"step": 4260
},
{
"epoch": 8.678861788617887,
"grad_norm": 0.21435929834842682,
"learning_rate": 8.56927701694493e-05,
"loss": 0.0269,
"step": 4270
},
{
"epoch": 8.699186991869919,
"grad_norm": 0.20830167829990387,
"learning_rate": 8.561548925529643e-05,
"loss": 0.0257,
"step": 4280
},
{
"epoch": 8.71951219512195,
"grad_norm": 0.17705325782299042,
"learning_rate": 8.553803523648506e-05,
"loss": 0.0241,
"step": 4290
},
{
"epoch": 8.739837398373984,
"grad_norm": 0.2524889409542084,
"learning_rate": 8.546040848947086e-05,
"loss": 0.0264,
"step": 4300
},
{
"epoch": 8.760162601626016,
"grad_norm": 0.27627524733543396,
"learning_rate": 8.538260939154894e-05,
"loss": 0.0268,
"step": 4310
},
{
"epoch": 8.78048780487805,
"grad_norm": 0.20441976189613342,
"learning_rate": 8.530463832085218e-05,
"loss": 0.0255,
"step": 4320
},
{
"epoch": 8.800813008130081,
"grad_norm": 0.2210913449525833,
"learning_rate": 8.522649565634927e-05,
"loss": 0.0242,
"step": 4330
},
{
"epoch": 8.821138211382113,
"grad_norm": 0.2106960266828537,
"learning_rate": 8.51481817778429e-05,
"loss": 0.0224,
"step": 4340
},
{
"epoch": 8.841463414634147,
"grad_norm": 0.17626696825027466,
"learning_rate": 8.506969706596797e-05,
"loss": 0.0232,
"step": 4350
},
{
"epoch": 8.861788617886178,
"grad_norm": 0.17255190014839172,
"learning_rate": 8.499104190218964e-05,
"loss": 0.0238,
"step": 4360
},
{
"epoch": 8.882113821138212,
"grad_norm": 0.22408510744571686,
"learning_rate": 8.49122166688016e-05,
"loss": 0.0233,
"step": 4370
},
{
"epoch": 8.902439024390244,
"grad_norm": 0.14295317232608795,
"learning_rate": 8.483322174892404e-05,
"loss": 0.0228,
"step": 4380
},
{
"epoch": 8.922764227642276,
"grad_norm": 0.20528960227966309,
"learning_rate": 8.475405752650199e-05,
"loss": 0.0301,
"step": 4390
},
{
"epoch": 8.94308943089431,
"grad_norm": 0.2917337417602539,
"learning_rate": 8.467472438630328e-05,
"loss": 0.0309,
"step": 4400
},
{
"epoch": 8.963414634146341,
"grad_norm": 0.2525673806667328,
"learning_rate": 8.459522271391682e-05,
"loss": 0.0248,
"step": 4410
},
{
"epoch": 8.983739837398375,
"grad_norm": 0.2192648947238922,
"learning_rate": 8.451555289575057e-05,
"loss": 0.0293,
"step": 4420
},
{
"epoch": 9.004065040650406,
"grad_norm": 0.2079612761735916,
"learning_rate": 8.443571531902981e-05,
"loss": 0.0287,
"step": 4430
},
{
"epoch": 9.024390243902438,
"grad_norm": 0.27692246437072754,
"learning_rate": 8.435571037179512e-05,
"loss": 0.0329,
"step": 4440
},
{
"epoch": 9.044715447154472,
"grad_norm": 0.21819686889648438,
"learning_rate": 8.427553844290062e-05,
"loss": 0.0248,
"step": 4450
},
{
"epoch": 9.065040650406504,
"grad_norm": 0.14863276481628418,
"learning_rate": 8.419519992201201e-05,
"loss": 0.027,
"step": 4460
},
{
"epoch": 9.085365853658537,
"grad_norm": 0.269767701625824,
"learning_rate": 8.411469519960469e-05,
"loss": 0.0246,
"step": 4470
},
{
"epoch": 9.105691056910569,
"grad_norm": 0.1592557430267334,
"learning_rate": 8.403402466696182e-05,
"loss": 0.0242,
"step": 4480
},
{
"epoch": 9.126016260162602,
"grad_norm": 0.21700291335582733,
"learning_rate": 8.395318871617255e-05,
"loss": 0.0245,
"step": 4490
},
{
"epoch": 9.146341463414634,
"grad_norm": 0.16219979524612427,
"learning_rate": 8.387218774012992e-05,
"loss": 0.0231,
"step": 4500
},
{
"epoch": 9.166666666666666,
"grad_norm": 0.19992409646511078,
"learning_rate": 8.379102213252915e-05,
"loss": 0.0294,
"step": 4510
},
{
"epoch": 9.1869918699187,
"grad_norm": 0.22747237980365753,
"learning_rate": 8.370969228786556e-05,
"loss": 0.024,
"step": 4520
},
{
"epoch": 9.207317073170731,
"grad_norm": 0.1960316151380539,
"learning_rate": 8.362819860143275e-05,
"loss": 0.0292,
"step": 4530
},
{
"epoch": 9.227642276422765,
"grad_norm": 0.20597191154956818,
"learning_rate": 8.354654146932066e-05,
"loss": 0.0258,
"step": 4540
},
{
"epoch": 9.247967479674797,
"grad_norm": 0.2013852894306183,
"learning_rate": 8.346472128841364e-05,
"loss": 0.0211,
"step": 4550
},
{
"epoch": 9.268292682926829,
"grad_norm": 0.20590724050998688,
"learning_rate": 8.338273845638848e-05,
"loss": 0.0244,
"step": 4560
},
{
"epoch": 9.288617886178862,
"grad_norm": 0.2510227859020233,
"learning_rate": 8.330059337171258e-05,
"loss": 0.0228,
"step": 4570
},
{
"epoch": 9.308943089430894,
"grad_norm": 0.22839580476284027,
"learning_rate": 8.32182864336419e-05,
"loss": 0.0247,
"step": 4580
},
{
"epoch": 9.329268292682928,
"grad_norm": 0.2431192696094513,
"learning_rate": 8.313581804221908e-05,
"loss": 0.0246,
"step": 4590
},
{
"epoch": 9.34959349593496,
"grad_norm": 0.17871959507465363,
"learning_rate": 8.305318859827147e-05,
"loss": 0.0218,
"step": 4600
},
{
"epoch": 9.369918699186991,
"grad_norm": 0.18869948387145996,
"learning_rate": 8.297039850340923e-05,
"loss": 0.0286,
"step": 4610
},
{
"epoch": 9.390243902439025,
"grad_norm": 0.24030964076519012,
"learning_rate": 8.288744816002331e-05,
"loss": 0.0264,
"step": 4620
},
{
"epoch": 9.410569105691057,
"grad_norm": 0.25945621728897095,
"learning_rate": 8.280433797128357e-05,
"loss": 0.0258,
"step": 4630
},
{
"epoch": 9.43089430894309,
"grad_norm": 0.28410178422927856,
"learning_rate": 8.272106834113674e-05,
"loss": 0.0264,
"step": 4640
},
{
"epoch": 9.451219512195122,
"grad_norm": 0.26565882563591003,
"learning_rate": 8.26376396743045e-05,
"loss": 0.0262,
"step": 4650
},
{
"epoch": 9.471544715447154,
"grad_norm": 0.27305731177330017,
"learning_rate": 8.25540523762815e-05,
"loss": 0.0337,
"step": 4660
},
{
"epoch": 9.491869918699187,
"grad_norm": 0.25521767139434814,
"learning_rate": 8.247030685333346e-05,
"loss": 0.0273,
"step": 4670
},
{
"epoch": 9.512195121951219,
"grad_norm": 0.22723345458507538,
"learning_rate": 8.238640351249503e-05,
"loss": 0.0209,
"step": 4680
},
{
"epoch": 9.532520325203253,
"grad_norm": 0.23032833635807037,
"learning_rate": 8.2302342761568e-05,
"loss": 0.0259,
"step": 4690
},
{
"epoch": 9.552845528455284,
"grad_norm": 0.26375725865364075,
"learning_rate": 8.221812500911919e-05,
"loss": 0.0295,
"step": 4700
},
{
"epoch": 9.573170731707316,
"grad_norm": 0.1927800178527832,
"learning_rate": 8.213375066447853e-05,
"loss": 0.0251,
"step": 4710
},
{
"epoch": 9.59349593495935,
"grad_norm": 0.2849136292934418,
"learning_rate": 8.204922013773702e-05,
"loss": 0.0258,
"step": 4720
},
{
"epoch": 9.613821138211382,
"grad_norm": 0.287185937166214,
"learning_rate": 8.196453383974478e-05,
"loss": 0.0265,
"step": 4730
},
{
"epoch": 9.634146341463415,
"grad_norm": 0.20117615163326263,
"learning_rate": 8.187969218210904e-05,
"loss": 0.0245,
"step": 4740
},
{
"epoch": 9.654471544715447,
"grad_norm": 0.2523585557937622,
"learning_rate": 8.179469557719213e-05,
"loss": 0.0265,
"step": 4750
},
{
"epoch": 9.67479674796748,
"grad_norm": 0.17117203772068024,
"learning_rate": 8.170954443810948e-05,
"loss": 0.0264,
"step": 4760
},
{
"epoch": 9.695121951219512,
"grad_norm": 0.17058758437633514,
"learning_rate": 8.162423917872764e-05,
"loss": 0.0277,
"step": 4770
},
{
"epoch": 9.715447154471544,
"grad_norm": 0.19005297124385834,
"learning_rate": 8.153878021366217e-05,
"loss": 0.0243,
"step": 4780
},
{
"epoch": 9.735772357723578,
"grad_norm": 0.2171137034893036,
"learning_rate": 8.14531679582758e-05,
"loss": 0.025,
"step": 4790
},
{
"epoch": 9.75609756097561,
"grad_norm": 0.21392473578453064,
"learning_rate": 8.136740282867621e-05,
"loss": 0.0296,
"step": 4800
},
{
"epoch": 9.776422764227643,
"grad_norm": 0.2003464698791504,
"learning_rate": 8.128148524171418e-05,
"loss": 0.0252,
"step": 4810
},
{
"epoch": 9.796747967479675,
"grad_norm": 0.17530317604541779,
"learning_rate": 8.119541561498146e-05,
"loss": 0.0221,
"step": 4820
},
{
"epoch": 9.817073170731707,
"grad_norm": 0.16956830024719238,
"learning_rate": 8.110919436680877e-05,
"loss": 0.0233,
"step": 4830
},
{
"epoch": 9.83739837398374,
"grad_norm": 0.1363949030637741,
"learning_rate": 8.102282191626378e-05,
"loss": 0.0305,
"step": 4840
},
{
"epoch": 9.857723577235772,
"grad_norm": 0.1618584543466568,
"learning_rate": 8.0936298683149e-05,
"loss": 0.0225,
"step": 4850
},
{
"epoch": 9.878048780487806,
"grad_norm": 0.18399667739868164,
"learning_rate": 8.084962508799991e-05,
"loss": 0.0234,
"step": 4860
},
{
"epoch": 9.898373983739837,
"grad_norm": 0.24996976554393768,
"learning_rate": 8.076280155208273e-05,
"loss": 0.021,
"step": 4870
},
{
"epoch": 9.91869918699187,
"grad_norm": 0.25863030552864075,
"learning_rate": 8.067582849739245e-05,
"loss": 0.0288,
"step": 4880
},
{
"epoch": 9.939024390243903,
"grad_norm": 0.22144946455955505,
"learning_rate": 8.058870634665079e-05,
"loss": 0.0232,
"step": 4890
},
{
"epoch": 9.959349593495935,
"grad_norm": 0.16196279227733612,
"learning_rate": 8.050143552330414e-05,
"loss": 0.021,
"step": 4900
},
{
"epoch": 9.979674796747968,
"grad_norm": 0.2325410395860672,
"learning_rate": 8.041401645152151e-05,
"loss": 0.0248,
"step": 4910
},
{
"epoch": 10.0,
"grad_norm": 0.20593298971652985,
"learning_rate": 8.032644955619239e-05,
"loss": 0.0229,
"step": 4920
},
{
"epoch": 10.020325203252032,
"grad_norm": 0.19367890059947968,
"learning_rate": 8.023873526292483e-05,
"loss": 0.0256,
"step": 4930
},
{
"epoch": 10.040650406504065,
"grad_norm": 0.2022327035665512,
"learning_rate": 8.015087399804322e-05,
"loss": 0.0227,
"step": 4940
},
{
"epoch": 10.060975609756097,
"grad_norm": 0.17880947887897491,
"learning_rate": 8.006286618858635e-05,
"loss": 0.0278,
"step": 4950
},
{
"epoch": 10.08130081300813,
"grad_norm": 0.1883138120174408,
"learning_rate": 7.99747122623052e-05,
"loss": 0.0208,
"step": 4960
},
{
"epoch": 10.101626016260163,
"grad_norm": 0.22545097768306732,
"learning_rate": 7.988641264766097e-05,
"loss": 0.0255,
"step": 4970
},
{
"epoch": 10.121951219512194,
"grad_norm": 0.14307765662670135,
"learning_rate": 7.9797967773823e-05,
"loss": 0.0299,
"step": 4980
},
{
"epoch": 10.142276422764228,
"grad_norm": 0.1591491848230362,
"learning_rate": 7.970937807066659e-05,
"loss": 0.0255,
"step": 4990
},
{
"epoch": 10.16260162601626,
"grad_norm": 0.26083746552467346,
"learning_rate": 7.962064396877098e-05,
"loss": 0.0259,
"step": 5000
},
{
"epoch": 10.182926829268293,
"grad_norm": 0.22072488069534302,
"learning_rate": 7.953176589941722e-05,
"loss": 0.0228,
"step": 5010
},
{
"epoch": 10.203252032520325,
"grad_norm": 0.2204706072807312,
"learning_rate": 7.944274429458614e-05,
"loss": 0.0237,
"step": 5020
},
{
"epoch": 10.223577235772357,
"grad_norm": 0.3005303740501404,
"learning_rate": 7.93535795869562e-05,
"loss": 0.0243,
"step": 5030
},
{
"epoch": 10.24390243902439,
"grad_norm": 0.15468287467956543,
"learning_rate": 7.926427220990134e-05,
"loss": 0.0236,
"step": 5040
},
{
"epoch": 10.264227642276422,
"grad_norm": 0.23432765901088715,
"learning_rate": 7.9174822597489e-05,
"loss": 0.0231,
"step": 5050
},
{
"epoch": 10.284552845528456,
"grad_norm": 0.212994784116745,
"learning_rate": 7.908523118447789e-05,
"loss": 0.0213,
"step": 5060
},
{
"epoch": 10.304878048780488,
"grad_norm": 0.2055157721042633,
"learning_rate": 7.89954984063159e-05,
"loss": 0.0194,
"step": 5070
},
{
"epoch": 10.32520325203252,
"grad_norm": 0.1862349510192871,
"learning_rate": 7.890562469913811e-05,
"loss": 0.0251,
"step": 5080
},
{
"epoch": 10.345528455284553,
"grad_norm": 0.18946106731891632,
"learning_rate": 7.881561049976447e-05,
"loss": 0.025,
"step": 5090
},
{
"epoch": 10.365853658536585,
"grad_norm": 0.12222699075937271,
"learning_rate": 7.872545624569779e-05,
"loss": 0.0273,
"step": 5100
},
{
"epoch": 10.386178861788618,
"grad_norm": 0.19618262350559235,
"learning_rate": 7.863516237512164e-05,
"loss": 0.0224,
"step": 5110
},
{
"epoch": 10.40650406504065,
"grad_norm": 0.2092956155538559,
"learning_rate": 7.854472932689815e-05,
"loss": 0.0269,
"step": 5120
},
{
"epoch": 10.426829268292684,
"grad_norm": 0.14048688113689423,
"learning_rate": 7.845415754056591e-05,
"loss": 0.0195,
"step": 5130
},
{
"epoch": 10.447154471544716,
"grad_norm": 0.24474139511585236,
"learning_rate": 7.836344745633783e-05,
"loss": 0.021,
"step": 5140
},
{
"epoch": 10.467479674796747,
"grad_norm": 0.20247799158096313,
"learning_rate": 7.8272599515099e-05,
"loss": 0.0202,
"step": 5150
},
{
"epoch": 10.487804878048781,
"grad_norm": 0.1718481332063675,
"learning_rate": 7.818161415840453e-05,
"loss": 0.0247,
"step": 5160
},
{
"epoch": 10.508130081300813,
"grad_norm": 0.1697309911251068,
"learning_rate": 7.809049182847745e-05,
"loss": 0.0182,
"step": 5170
},
{
"epoch": 10.528455284552846,
"grad_norm": 0.23599614202976227,
"learning_rate": 7.799923296820653e-05,
"loss": 0.0222,
"step": 5180
},
{
"epoch": 10.548780487804878,
"grad_norm": 0.15558257699012756,
"learning_rate": 7.790783802114408e-05,
"loss": 0.0262,
"step": 5190
},
{
"epoch": 10.56910569105691,
"grad_norm": 0.21534393727779388,
"learning_rate": 7.781630743150392e-05,
"loss": 0.028,
"step": 5200
},
{
"epoch": 10.589430894308943,
"grad_norm": 0.22029508650302887,
"learning_rate": 7.772464164415907e-05,
"loss": 0.0233,
"step": 5210
},
{
"epoch": 10.609756097560975,
"grad_norm": 0.1564619094133377,
"learning_rate": 7.763284110463973e-05,
"loss": 0.0221,
"step": 5220
},
{
"epoch": 10.630081300813009,
"grad_norm": 0.17775817215442657,
"learning_rate": 7.754090625913099e-05,
"loss": 0.0218,
"step": 5230
},
{
"epoch": 10.65040650406504,
"grad_norm": 0.3057042360305786,
"learning_rate": 7.744883755447075e-05,
"loss": 0.0235,
"step": 5240
},
{
"epoch": 10.670731707317072,
"grad_norm": 0.2097449004650116,
"learning_rate": 7.735663543814749e-05,
"loss": 0.0229,
"step": 5250
},
{
"epoch": 10.691056910569106,
"grad_norm": 0.2134266197681427,
"learning_rate": 7.726430035829813e-05,
"loss": 0.0239,
"step": 5260
},
{
"epoch": 10.711382113821138,
"grad_norm": 0.16942216455936432,
"learning_rate": 7.717183276370586e-05,
"loss": 0.0247,
"step": 5270
},
{
"epoch": 10.731707317073171,
"grad_norm": 0.20789335668087006,
"learning_rate": 7.707923310379794e-05,
"loss": 0.022,
"step": 5280
},
{
"epoch": 10.752032520325203,
"grad_norm": 0.17640726268291473,
"learning_rate": 7.698650182864351e-05,
"loss": 0.0219,
"step": 5290
},
{
"epoch": 10.772357723577235,
"grad_norm": 0.16752611100673676,
"learning_rate": 7.689363938895138e-05,
"loss": 0.0259,
"step": 5300
},
{
"epoch": 10.792682926829269,
"grad_norm": 0.2684347331523895,
"learning_rate": 7.680064623606791e-05,
"loss": 0.021,
"step": 5310
},
{
"epoch": 10.8130081300813,
"grad_norm": 0.238786518573761,
"learning_rate": 7.670752282197476e-05,
"loss": 0.0237,
"step": 5320
},
{
"epoch": 10.833333333333334,
"grad_norm": 0.21161924302577972,
"learning_rate": 7.66142695992867e-05,
"loss": 0.0195,
"step": 5330
},
{
"epoch": 10.853658536585366,
"grad_norm": 0.2526039183139801,
"learning_rate": 7.652088702124944e-05,
"loss": 0.0245,
"step": 5340
},
{
"epoch": 10.8739837398374,
"grad_norm": 0.25313472747802734,
"learning_rate": 7.64273755417374e-05,
"loss": 0.0195,
"step": 5350
},
{
"epoch": 10.894308943089431,
"grad_norm": 0.19629882276058197,
"learning_rate": 7.633373561525148e-05,
"loss": 0.0224,
"step": 5360
},
{
"epoch": 10.914634146341463,
"grad_norm": 0.3054946959018707,
"learning_rate": 7.623996769691691e-05,
"loss": 0.0238,
"step": 5370
},
{
"epoch": 10.934959349593496,
"grad_norm": 0.19822648167610168,
"learning_rate": 7.614607224248103e-05,
"loss": 0.0209,
"step": 5380
},
{
"epoch": 10.955284552845528,
"grad_norm": 0.15420231223106384,
"learning_rate": 7.605204970831096e-05,
"loss": 0.0197,
"step": 5390
},
{
"epoch": 10.975609756097562,
"grad_norm": 0.18817901611328125,
"learning_rate": 7.595790055139163e-05,
"loss": 0.0215,
"step": 5400
},
{
"epoch": 10.995934959349594,
"grad_norm": 0.20940722525119781,
"learning_rate": 7.586362522932323e-05,
"loss": 0.0225,
"step": 5410
},
{
"epoch": 11.016260162601625,
"grad_norm": 0.1898982971906662,
"learning_rate": 7.576922420031929e-05,
"loss": 0.0186,
"step": 5420
},
{
"epoch": 11.036585365853659,
"grad_norm": 0.28592678904533386,
"learning_rate": 7.567469792320428e-05,
"loss": 0.0238,
"step": 5430
},
{
"epoch": 11.05691056910569,
"grad_norm": 0.2053905427455902,
"learning_rate": 7.558004685741137e-05,
"loss": 0.0207,
"step": 5440
},
{
"epoch": 11.077235772357724,
"grad_norm": 0.2409668266773224,
"learning_rate": 7.548527146298036e-05,
"loss": 0.0267,
"step": 5450
},
{
"epoch": 11.097560975609756,
"grad_norm": 0.16952195763587952,
"learning_rate": 7.539037220055527e-05,
"loss": 0.0165,
"step": 5460
},
{
"epoch": 11.117886178861788,
"grad_norm": 0.1960296481847763,
"learning_rate": 7.529534953138213e-05,
"loss": 0.0225,
"step": 5470
},
{
"epoch": 11.138211382113822,
"grad_norm": 0.17874296009540558,
"learning_rate": 7.520020391730684e-05,
"loss": 0.0212,
"step": 5480
},
{
"epoch": 11.158536585365853,
"grad_norm": 0.16215310990810394,
"learning_rate": 7.510493582077281e-05,
"loss": 0.0197,
"step": 5490
},
{
"epoch": 11.178861788617887,
"grad_norm": 0.1989363729953766,
"learning_rate": 7.500954570481882e-05,
"loss": 0.0176,
"step": 5500
},
{
"epoch": 11.199186991869919,
"grad_norm": 0.16380657255649567,
"learning_rate": 7.491403403307662e-05,
"loss": 0.0199,
"step": 5510
},
{
"epoch": 11.21951219512195,
"grad_norm": 0.25316646695137024,
"learning_rate": 7.481840126976885e-05,
"loss": 0.0187,
"step": 5520
},
{
"epoch": 11.239837398373984,
"grad_norm": 0.19226357340812683,
"learning_rate": 7.472264787970666e-05,
"loss": 0.0297,
"step": 5530
},
{
"epoch": 11.260162601626016,
"grad_norm": 0.16172873973846436,
"learning_rate": 7.462677432828751e-05,
"loss": 0.0178,
"step": 5540
},
{
"epoch": 11.28048780487805,
"grad_norm": 0.217548206448555,
"learning_rate": 7.453078108149287e-05,
"loss": 0.0191,
"step": 5550
},
{
"epoch": 11.300813008130081,
"grad_norm": 0.1852748543024063,
"learning_rate": 7.443466860588599e-05,
"loss": 0.0207,
"step": 5560
},
{
"epoch": 11.321138211382113,
"grad_norm": 0.212760791182518,
"learning_rate": 7.43384373686096e-05,
"loss": 0.018,
"step": 5570
},
{
"epoch": 11.341463414634147,
"grad_norm": 0.146515354514122,
"learning_rate": 7.424208783738367e-05,
"loss": 0.0219,
"step": 5580
},
{
"epoch": 11.361788617886178,
"grad_norm": 0.18900711834430695,
"learning_rate": 7.414562048050315e-05,
"loss": 0.0216,
"step": 5590
},
{
"epoch": 11.382113821138212,
"grad_norm": 0.2096913754940033,
"learning_rate": 7.404903576683559e-05,
"loss": 0.0242,
"step": 5600
},
{
"epoch": 11.402439024390244,
"grad_norm": 0.17401063442230225,
"learning_rate": 7.3952334165819e-05,
"loss": 0.0219,
"step": 5610
},
{
"epoch": 11.422764227642276,
"grad_norm": 0.18080481886863708,
"learning_rate": 7.385551614745952e-05,
"loss": 0.0211,
"step": 5620
},
{
"epoch": 11.44308943089431,
"grad_norm": 0.20653265714645386,
"learning_rate": 7.375858218232905e-05,
"loss": 0.0212,
"step": 5630
},
{
"epoch": 11.463414634146341,
"grad_norm": 0.24207787215709686,
"learning_rate": 7.366153274156312e-05,
"loss": 0.0262,
"step": 5640
},
{
"epoch": 11.483739837398375,
"grad_norm": 0.2646133303642273,
"learning_rate": 7.356436829685844e-05,
"loss": 0.0159,
"step": 5650
},
{
"epoch": 11.504065040650406,
"grad_norm": 0.1713922768831253,
"learning_rate": 7.346708932047074e-05,
"loss": 0.0215,
"step": 5660
},
{
"epoch": 11.524390243902438,
"grad_norm": 0.18752823770046234,
"learning_rate": 7.336969628521237e-05,
"loss": 0.0221,
"step": 5670
},
{
"epoch": 11.544715447154472,
"grad_norm": 0.1929113268852234,
"learning_rate": 7.32721896644501e-05,
"loss": 0.0219,
"step": 5680
},
{
"epoch": 11.565040650406504,
"grad_norm": 0.17113937437534332,
"learning_rate": 7.317456993210272e-05,
"loss": 0.0271,
"step": 5690
},
{
"epoch": 11.585365853658537,
"grad_norm": 0.21283072233200073,
"learning_rate": 7.307683756263881e-05,
"loss": 0.0176,
"step": 5700
},
{
"epoch": 11.605691056910569,
"grad_norm": 0.19880950450897217,
"learning_rate": 7.297899303107441e-05,
"loss": 0.0214,
"step": 5710
},
{
"epoch": 11.6260162601626,
"grad_norm": 0.14940781891345978,
"learning_rate": 7.288103681297068e-05,
"loss": 0.0231,
"step": 5720
},
{
"epoch": 11.646341463414634,
"grad_norm": 0.16071899235248566,
"learning_rate": 7.278296938443166e-05,
"loss": 0.026,
"step": 5730
},
{
"epoch": 11.666666666666666,
"grad_norm": 0.11081783473491669,
"learning_rate": 7.26847912221019e-05,
"loss": 0.0202,
"step": 5740
},
{
"epoch": 11.6869918699187,
"grad_norm": 0.1508558690547943,
"learning_rate": 7.258650280316415e-05,
"loss": 0.0207,
"step": 5750
},
{
"epoch": 11.707317073170731,
"grad_norm": 0.15217100083827972,
"learning_rate": 7.248810460533706e-05,
"loss": 0.0205,
"step": 5760
},
{
"epoch": 11.727642276422765,
"grad_norm": 0.20388838648796082,
"learning_rate": 7.238959710687282e-05,
"loss": 0.0206,
"step": 5770
},
{
"epoch": 11.747967479674797,
"grad_norm": 0.16872060298919678,
"learning_rate": 7.229098078655489e-05,
"loss": 0.0228,
"step": 5780
},
{
"epoch": 11.768292682926829,
"grad_norm": 0.16713246703147888,
"learning_rate": 7.219225612369565e-05,
"loss": 0.0242,
"step": 5790
},
{
"epoch": 11.788617886178862,
"grad_norm": 0.14406579732894897,
"learning_rate": 7.209342359813404e-05,
"loss": 0.0195,
"step": 5800
},
{
"epoch": 11.808943089430894,
"grad_norm": 0.1978255659341812,
"learning_rate": 7.199448369023327e-05,
"loss": 0.0193,
"step": 5810
},
{
"epoch": 11.829268292682928,
"grad_norm": 0.24132715165615082,
"learning_rate": 7.189543688087845e-05,
"loss": 0.0195,
"step": 5820
},
{
"epoch": 11.84959349593496,
"grad_norm": 0.2294216752052307,
"learning_rate": 7.17962836514743e-05,
"loss": 0.0219,
"step": 5830
},
{
"epoch": 11.869918699186991,
"grad_norm": 0.2080947607755661,
"learning_rate": 7.169702448394279e-05,
"loss": 0.0203,
"step": 5840
},
{
"epoch": 11.890243902439025,
"grad_norm": 0.16323475539684296,
"learning_rate": 7.159765986072071e-05,
"loss": 0.0277,
"step": 5850
},
{
"epoch": 11.910569105691057,
"grad_norm": 0.2618698179721832,
"learning_rate": 7.149819026475751e-05,
"loss": 0.0214,
"step": 5860
},
{
"epoch": 11.93089430894309,
"grad_norm": 0.22444747388362885,
"learning_rate": 7.139861617951275e-05,
"loss": 0.0197,
"step": 5870
},
{
"epoch": 11.951219512195122,
"grad_norm": 0.22842451930046082,
"learning_rate": 7.129893808895395e-05,
"loss": 0.0279,
"step": 5880
},
{
"epoch": 11.971544715447154,
"grad_norm": 0.1826316863298416,
"learning_rate": 7.119915647755404e-05,
"loss": 0.0195,
"step": 5890
},
{
"epoch": 11.991869918699187,
"grad_norm": 0.2494143843650818,
"learning_rate": 7.109927183028914e-05,
"loss": 0.0222,
"step": 5900
},
{
"epoch": 12.012195121951219,
"grad_norm": 0.13060876727104187,
"learning_rate": 7.099928463263619e-05,
"loss": 0.0215,
"step": 5910
},
{
"epoch": 12.032520325203253,
"grad_norm": 0.13381272554397583,
"learning_rate": 7.08991953705705e-05,
"loss": 0.0205,
"step": 5920
},
{
"epoch": 12.052845528455284,
"grad_norm": 0.23280222713947296,
"learning_rate": 7.07990045305635e-05,
"loss": 0.0183,
"step": 5930
},
{
"epoch": 12.073170731707316,
"grad_norm": 0.13017290830612183,
"learning_rate": 7.069871259958034e-05,
"loss": 0.0192,
"step": 5940
},
{
"epoch": 12.09349593495935,
"grad_norm": 0.17966489493846893,
"learning_rate": 7.059832006507745e-05,
"loss": 0.0191,
"step": 5950
},
{
"epoch": 12.113821138211382,
"grad_norm": 0.15518441796302795,
"learning_rate": 7.049782741500028e-05,
"loss": 0.0193,
"step": 5960
},
{
"epoch": 12.134146341463415,
"grad_norm": 0.13540403544902802,
"learning_rate": 7.039723513778087e-05,
"loss": 0.0169,
"step": 5970
},
{
"epoch": 12.154471544715447,
"grad_norm": 0.19922709465026855,
"learning_rate": 7.029654372233544e-05,
"loss": 0.0206,
"step": 5980
},
{
"epoch": 12.17479674796748,
"grad_norm": 0.23210512101650238,
"learning_rate": 7.019575365806215e-05,
"loss": 0.0205,
"step": 5990
},
{
"epoch": 12.195121951219512,
"grad_norm": 0.12691457569599152,
"learning_rate": 7.009486543483858e-05,
"loss": 0.0214,
"step": 6000
},
{
"epoch": 12.215447154471544,
"grad_norm": 0.1701710820198059,
"learning_rate": 6.999387954301934e-05,
"loss": 0.0204,
"step": 6010
},
{
"epoch": 12.235772357723578,
"grad_norm": 0.15799103677272797,
"learning_rate": 6.989279647343388e-05,
"loss": 0.0228,
"step": 6020
},
{
"epoch": 12.25609756097561,
"grad_norm": 0.14731904864311218,
"learning_rate": 6.979161671738382e-05,
"loss": 0.0166,
"step": 6030
},
{
"epoch": 12.276422764227643,
"grad_norm": 0.1335950791835785,
"learning_rate": 6.969034076664085e-05,
"loss": 0.0188,
"step": 6040
},
{
"epoch": 12.296747967479675,
"grad_norm": 0.18221591413021088,
"learning_rate": 6.958896911344411e-05,
"loss": 0.0146,
"step": 6050
},
{
"epoch": 12.317073170731707,
"grad_norm": 0.18732091784477234,
"learning_rate": 6.948750225049791e-05,
"loss": 0.0183,
"step": 6060
},
{
"epoch": 12.33739837398374,
"grad_norm": 0.1735702008008957,
"learning_rate": 6.938594067096936e-05,
"loss": 0.0174,
"step": 6070
},
{
"epoch": 12.357723577235772,
"grad_norm": 0.14985719323158264,
"learning_rate": 6.928428486848587e-05,
"loss": 0.0228,
"step": 6080
},
{
"epoch": 12.378048780487806,
"grad_norm": 0.14891484379768372,
"learning_rate": 6.918253533713282e-05,
"loss": 0.0183,
"step": 6090
},
{
"epoch": 12.398373983739837,
"grad_norm": 0.18387916684150696,
"learning_rate": 6.908069257145118e-05,
"loss": 0.0163,
"step": 6100
},
{
"epoch": 12.41869918699187,
"grad_norm": 0.15331916511058807,
"learning_rate": 6.897875706643506e-05,
"loss": 0.0196,
"step": 6110
},
{
"epoch": 12.439024390243903,
"grad_norm": 0.22543849050998688,
"learning_rate": 6.887672931752927e-05,
"loss": 0.0201,
"step": 6120
},
{
"epoch": 12.459349593495935,
"grad_norm": 0.15449005365371704,
"learning_rate": 6.877460982062706e-05,
"loss": 0.0192,
"step": 6130
},
{
"epoch": 12.479674796747968,
"grad_norm": 0.12795570492744446,
"learning_rate": 6.86723990720675e-05,
"loss": 0.0152,
"step": 6140
},
{
"epoch": 12.5,
"grad_norm": 0.14221183955669403,
"learning_rate": 6.857009756863326e-05,
"loss": 0.0167,
"step": 6150
},
{
"epoch": 12.520325203252032,
"grad_norm": 0.17970189452171326,
"learning_rate": 6.846770580754807e-05,
"loss": 0.0236,
"step": 6160
},
{
"epoch": 12.540650406504065,
"grad_norm": 0.2241237312555313,
"learning_rate": 6.836522428647438e-05,
"loss": 0.0195,
"step": 6170
},
{
"epoch": 12.560975609756097,
"grad_norm": 0.23520494997501373,
"learning_rate": 6.826265350351083e-05,
"loss": 0.021,
"step": 6180
},
{
"epoch": 12.58130081300813,
"grad_norm": 0.22116564214229584,
"learning_rate": 6.815999395719e-05,
"loss": 0.0214,
"step": 6190
},
{
"epoch": 12.601626016260163,
"grad_norm": 0.212169349193573,
"learning_rate": 6.805724614647586e-05,
"loss": 0.0194,
"step": 6200
},
{
"epoch": 12.621951219512194,
"grad_norm": 0.2118973284959793,
"learning_rate": 6.795441057076136e-05,
"loss": 0.0207,
"step": 6210
},
{
"epoch": 12.642276422764228,
"grad_norm": 0.21665440499782562,
"learning_rate": 6.785148772986603e-05,
"loss": 0.0191,
"step": 6220
},
{
"epoch": 12.66260162601626,
"grad_norm": 0.1934552788734436,
"learning_rate": 6.774847812403355e-05,
"loss": 0.0194,
"step": 6230
},
{
"epoch": 12.682926829268293,
"grad_norm": 0.22638703882694244,
"learning_rate": 6.76453822539293e-05,
"loss": 0.0219,
"step": 6240
},
{
"epoch": 12.703252032520325,
"grad_norm": 0.20078861713409424,
"learning_rate": 6.754220062063793e-05,
"loss": 0.0213,
"step": 6250
},
{
"epoch": 12.723577235772357,
"grad_norm": 0.18275423347949982,
"learning_rate": 6.743893372566099e-05,
"loss": 0.0218,
"step": 6260
},
{
"epoch": 12.74390243902439,
"grad_norm": 0.15998774766921997,
"learning_rate": 6.733558207091434e-05,
"loss": 0.0194,
"step": 6270
},
{
"epoch": 12.764227642276422,
"grad_norm": 0.14701031148433685,
"learning_rate": 6.723214615872585e-05,
"loss": 0.0221,
"step": 6280
},
{
"epoch": 12.784552845528456,
"grad_norm": 0.310570627450943,
"learning_rate": 6.712862649183295e-05,
"loss": 0.0223,
"step": 6290
},
{
"epoch": 12.804878048780488,
"grad_norm": 0.1784200817346573,
"learning_rate": 6.70250235733801e-05,
"loss": 0.0194,
"step": 6300
},
{
"epoch": 12.82520325203252,
"grad_norm": 0.2551976144313812,
"learning_rate": 6.692133790691639e-05,
"loss": 0.024,
"step": 6310
},
{
"epoch": 12.845528455284553,
"grad_norm": 0.14004677534103394,
"learning_rate": 6.681756999639311e-05,
"loss": 0.0202,
"step": 6320
},
{
"epoch": 12.865853658536585,
"grad_norm": 0.16402773559093475,
"learning_rate": 6.671372034616132e-05,
"loss": 0.0165,
"step": 6330
},
{
"epoch": 12.886178861788618,
"grad_norm": 0.18575695157051086,
"learning_rate": 6.660978946096933e-05,
"loss": 0.0192,
"step": 6340
},
{
"epoch": 12.90650406504065,
"grad_norm": 0.1559947431087494,
"learning_rate": 6.650577784596026e-05,
"loss": 0.0181,
"step": 6350
},
{
"epoch": 12.926829268292684,
"grad_norm": 0.21637220680713654,
"learning_rate": 6.640168600666967e-05,
"loss": 0.0215,
"step": 6360
},
{
"epoch": 12.947154471544716,
"grad_norm": 0.17190884053707123,
"learning_rate": 6.629751444902299e-05,
"loss": 0.0194,
"step": 6370
},
{
"epoch": 12.967479674796747,
"grad_norm": 0.1417016088962555,
"learning_rate": 6.619326367933312e-05,
"loss": 0.0189,
"step": 6380
},
{
"epoch": 12.987804878048781,
"grad_norm": 0.19975803792476654,
"learning_rate": 6.608893420429798e-05,
"loss": 0.0208,
"step": 6390
},
{
"epoch": 13.008130081300813,
"grad_norm": 0.1410190463066101,
"learning_rate": 6.598452653099803e-05,
"loss": 0.0206,
"step": 6400
},
{
"epoch": 13.028455284552846,
"grad_norm": 0.12187732011079788,
"learning_rate": 6.588004116689375e-05,
"loss": 0.0189,
"step": 6410
},
{
"epoch": 13.048780487804878,
"grad_norm": 0.19220778346061707,
"learning_rate": 6.57754786198233e-05,
"loss": 0.0199,
"step": 6420
},
{
"epoch": 13.06910569105691,
"grad_norm": 0.11318925768136978,
"learning_rate": 6.567083939799992e-05,
"loss": 0.0164,
"step": 6430
},
{
"epoch": 13.089430894308943,
"grad_norm": 0.20521433651447296,
"learning_rate": 6.556612401000954e-05,
"loss": 0.0192,
"step": 6440
},
{
"epoch": 13.109756097560975,
"grad_norm": 0.17373771965503693,
"learning_rate": 6.54613329648083e-05,
"loss": 0.0151,
"step": 6450
},
{
"epoch": 13.130081300813009,
"grad_norm": 0.13978348672389984,
"learning_rate": 6.535646677172005e-05,
"loss": 0.0175,
"step": 6460
},
{
"epoch": 13.15040650406504,
"grad_norm": 0.1641884446144104,
"learning_rate": 6.52515259404339e-05,
"loss": 0.0211,
"step": 6470
},
{
"epoch": 13.170731707317072,
"grad_norm": 0.25421059131622314,
"learning_rate": 6.514651098100167e-05,
"loss": 0.0169,
"step": 6480
},
{
"epoch": 13.191056910569106,
"grad_norm": 0.18321838974952698,
"learning_rate": 6.504142240383555e-05,
"loss": 0.017,
"step": 6490
},
{
"epoch": 13.211382113821138,
"grad_norm": 0.24143821001052856,
"learning_rate": 6.493626071970549e-05,
"loss": 0.0192,
"step": 6500
},
{
"epoch": 13.231707317073171,
"grad_norm": 0.18711033463478088,
"learning_rate": 6.483102643973682e-05,
"loss": 0.0208,
"step": 6510
},
{
"epoch": 13.252032520325203,
"grad_norm": 0.17849287390708923,
"learning_rate": 6.472572007540764e-05,
"loss": 0.0233,
"step": 6520
},
{
"epoch": 13.272357723577235,
"grad_norm": 0.21863636374473572,
"learning_rate": 6.462034213854645e-05,
"loss": 0.0207,
"step": 6530
},
{
"epoch": 13.292682926829269,
"grad_norm": 0.12835566699504852,
"learning_rate": 6.451489314132962e-05,
"loss": 0.0159,
"step": 6540
},
{
"epoch": 13.3130081300813,
"grad_norm": 0.24241842329502106,
"learning_rate": 6.440937359627893e-05,
"loss": 0.0198,
"step": 6550
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.1560855656862259,
"learning_rate": 6.430378401625894e-05,
"loss": 0.0187,
"step": 6560
},
{
"epoch": 13.353658536585366,
"grad_norm": 0.17956890165805817,
"learning_rate": 6.419812491447472e-05,
"loss": 0.0224,
"step": 6570
},
{
"epoch": 13.373983739837398,
"grad_norm": 0.23864564299583435,
"learning_rate": 6.409239680446919e-05,
"loss": 0.024,
"step": 6580
},
{
"epoch": 13.394308943089431,
"grad_norm": 0.19811518490314484,
"learning_rate": 6.398660020012072e-05,
"loss": 0.0175,
"step": 6590
},
{
"epoch": 13.414634146341463,
"grad_norm": 0.2658018469810486,
"learning_rate": 6.38807356156405e-05,
"loss": 0.0195,
"step": 6600
},
{
"epoch": 13.434959349593496,
"grad_norm": 0.20414608716964722,
"learning_rate": 6.377480356557022e-05,
"loss": 0.019,
"step": 6610
},
{
"epoch": 13.455284552845528,
"grad_norm": 0.2046128660440445,
"learning_rate": 6.366880456477942e-05,
"loss": 0.0168,
"step": 6620
},
{
"epoch": 13.475609756097562,
"grad_norm": 0.19554445147514343,
"learning_rate": 6.356273912846312e-05,
"loss": 0.0197,
"step": 6630
},
{
"epoch": 13.495934959349594,
"grad_norm": 0.2673018276691437,
"learning_rate": 6.34566077721391e-05,
"loss": 0.0194,
"step": 6640
},
{
"epoch": 13.516260162601625,
"grad_norm": 0.17518429458141327,
"learning_rate": 6.335041101164569e-05,
"loss": 0.0185,
"step": 6650
},
{
"epoch": 13.536585365853659,
"grad_norm": 0.19595539569854736,
"learning_rate": 6.324414936313904e-05,
"loss": 0.0193,
"step": 6660
},
{
"epoch": 13.55691056910569,
"grad_norm": 0.19745945930480957,
"learning_rate": 6.313782334309066e-05,
"loss": 0.0178,
"step": 6670
},
{
"epoch": 13.577235772357724,
"grad_norm": 0.11477528512477875,
"learning_rate": 6.303143346828499e-05,
"loss": 0.0205,
"step": 6680
},
{
"epoch": 13.597560975609756,
"grad_norm": 0.18739905953407288,
"learning_rate": 6.292498025581674e-05,
"loss": 0.0189,
"step": 6690
},
{
"epoch": 13.617886178861788,
"grad_norm": 0.22458316385746002,
"learning_rate": 6.281846422308857e-05,
"loss": 0.0186,
"step": 6700
},
{
"epoch": 13.638211382113822,
"grad_norm": 0.11343209445476532,
"learning_rate": 6.271188588780839e-05,
"loss": 0.0173,
"step": 6710
},
{
"epoch": 13.658536585365853,
"grad_norm": 0.2310679703950882,
"learning_rate": 6.260524576798694e-05,
"loss": 0.0188,
"step": 6720
},
{
"epoch": 13.678861788617887,
"grad_norm": 0.2177576720714569,
"learning_rate": 6.249854438193528e-05,
"loss": 0.0218,
"step": 6730
},
{
"epoch": 13.699186991869919,
"grad_norm": 0.18544794619083405,
"learning_rate": 6.239178224826224e-05,
"loss": 0.0223,
"step": 6740
},
{
"epoch": 13.71951219512195,
"grad_norm": 0.22832602262496948,
"learning_rate": 6.228495988587188e-05,
"loss": 0.0159,
"step": 6750
},
{
"epoch": 13.739837398373984,
"grad_norm": 0.24159882962703705,
"learning_rate": 6.217807781396106e-05,
"loss": 0.0197,
"step": 6760
},
{
"epoch": 13.760162601626016,
"grad_norm": 0.22287015616893768,
"learning_rate": 6.207113655201676e-05,
"loss": 0.0163,
"step": 6770
},
{
"epoch": 13.78048780487805,
"grad_norm": 0.1738264560699463,
"learning_rate": 6.196413661981368e-05,
"loss": 0.0171,
"step": 6780
},
{
"epoch": 13.800813008130081,
"grad_norm": 0.22115349769592285,
"learning_rate": 6.185707853741175e-05,
"loss": 0.018,
"step": 6790
},
{
"epoch": 13.821138211382113,
"grad_norm": 0.18646171689033508,
"learning_rate": 6.174996282515344e-05,
"loss": 0.024,
"step": 6800
},
{
"epoch": 13.841463414634147,
"grad_norm": 0.18444843590259552,
"learning_rate": 6.164279000366131e-05,
"loss": 0.0146,
"step": 6810
},
{
"epoch": 13.861788617886178,
"grad_norm": 0.14431391656398773,
"learning_rate": 6.153556059383561e-05,
"loss": 0.015,
"step": 6820
},
{
"epoch": 13.882113821138212,
"grad_norm": 0.15836864709854126,
"learning_rate": 6.142827511685152e-05,
"loss": 0.0143,
"step": 6830
},
{
"epoch": 13.902439024390244,
"grad_norm": 0.24831633269786835,
"learning_rate": 6.132093409415678e-05,
"loss": 0.018,
"step": 6840
},
{
"epoch": 13.922764227642276,
"grad_norm": 0.17718598246574402,
"learning_rate": 6.121353804746907e-05,
"loss": 0.0157,
"step": 6850
},
{
"epoch": 13.94308943089431,
"grad_norm": 0.19644102454185486,
"learning_rate": 6.110608749877352e-05,
"loss": 0.0195,
"step": 6860
},
{
"epoch": 13.963414634146341,
"grad_norm": 0.16622968018054962,
"learning_rate": 6.0998582970320205e-05,
"loss": 0.0198,
"step": 6870
},
{
"epoch": 13.983739837398375,
"grad_norm": 0.2506231367588043,
"learning_rate": 6.0891024984621506e-05,
"loss": 0.0247,
"step": 6880
},
{
"epoch": 14.004065040650406,
"grad_norm": 0.21900992095470428,
"learning_rate": 6.078341406444961e-05,
"loss": 0.02,
"step": 6890
},
{
"epoch": 14.024390243902438,
"grad_norm": 0.1864655762910843,
"learning_rate": 6.067575073283405e-05,
"loss": 0.0191,
"step": 6900
},
{
"epoch": 14.044715447154472,
"grad_norm": 0.22156104445457458,
"learning_rate": 6.0568035513059073e-05,
"loss": 0.0198,
"step": 6910
},
{
"epoch": 14.065040650406504,
"grad_norm": 0.19245333969593048,
"learning_rate": 6.046026892866109e-05,
"loss": 0.0268,
"step": 6920
},
{
"epoch": 14.085365853658537,
"grad_norm": 0.16246770322322845,
"learning_rate": 6.0352451503426214e-05,
"loss": 0.0163,
"step": 6930
},
{
"epoch": 14.105691056910569,
"grad_norm": 0.1798103153705597,
"learning_rate": 6.024458376138762e-05,
"loss": 0.015,
"step": 6940
},
{
"epoch": 14.126016260162602,
"grad_norm": 0.14961481094360352,
"learning_rate": 6.013666622682306e-05,
"loss": 0.0201,
"step": 6950
},
{
"epoch": 14.146341463414634,
"grad_norm": 0.17163357138633728,
"learning_rate": 6.002869942425231e-05,
"loss": 0.0173,
"step": 6960
},
{
"epoch": 14.166666666666666,
"grad_norm": 0.22165465354919434,
"learning_rate": 5.992068387843459e-05,
"loss": 0.0174,
"step": 6970
},
{
"epoch": 14.1869918699187,
"grad_norm": 0.15428420901298523,
"learning_rate": 5.981262011436603e-05,
"loss": 0.0175,
"step": 6980
},
{
"epoch": 14.207317073170731,
"grad_norm": 0.16303707659244537,
"learning_rate": 5.970450865727712e-05,
"loss": 0.0196,
"step": 6990
},
{
"epoch": 14.227642276422765,
"grad_norm": 0.1545059233903885,
"learning_rate": 5.9596350032630156e-05,
"loss": 0.0182,
"step": 7000
},
{
"epoch": 14.247967479674797,
"grad_norm": 0.20550492405891418,
"learning_rate": 5.9488144766116714e-05,
"loss": 0.0185,
"step": 7010
},
{
"epoch": 14.268292682926829,
"grad_norm": 0.1110968366265297,
"learning_rate": 5.9379893383655006e-05,
"loss": 0.0155,
"step": 7020
},
{
"epoch": 14.288617886178862,
"grad_norm": 0.12301554530858994,
"learning_rate": 5.927159641138744e-05,
"loss": 0.018,
"step": 7030
},
{
"epoch": 14.308943089430894,
"grad_norm": 0.12619097530841827,
"learning_rate": 5.916325437567799e-05,
"loss": 0.0183,
"step": 7040
},
{
"epoch": 14.329268292682928,
"grad_norm": 0.1619870513677597,
"learning_rate": 5.905486780310966e-05,
"loss": 0.0222,
"step": 7050
},
{
"epoch": 14.34959349593496,
"grad_norm": 0.20483283698558807,
"learning_rate": 5.8946437220481887e-05,
"loss": 0.0149,
"step": 7060
},
{
"epoch": 14.369918699186991,
"grad_norm": 0.11128581315279007,
"learning_rate": 5.883796315480805e-05,
"loss": 0.0204,
"step": 7070
},
{
"epoch": 14.390243902439025,
"grad_norm": 0.12385226041078568,
"learning_rate": 5.872944613331288e-05,
"loss": 0.0137,
"step": 7080
},
{
"epoch": 14.410569105691057,
"grad_norm": 0.12931764125823975,
"learning_rate": 5.862088668342986e-05,
"loss": 0.0141,
"step": 7090
},
{
"epoch": 14.43089430894309,
"grad_norm": 0.1460898071527481,
"learning_rate": 5.8512285332798714e-05,
"loss": 0.0172,
"step": 7100
},
{
"epoch": 14.451219512195122,
"grad_norm": 0.1483217179775238,
"learning_rate": 5.840364260926277e-05,
"loss": 0.0166,
"step": 7110
},
{
"epoch": 14.471544715447154,
"grad_norm": 0.19570297002792358,
"learning_rate": 5.8294959040866505e-05,
"loss": 0.0181,
"step": 7120
},
{
"epoch": 14.491869918699187,
"grad_norm": 0.2213359773159027,
"learning_rate": 5.818623515585292e-05,
"loss": 0.017,
"step": 7130
},
{
"epoch": 14.512195121951219,
"grad_norm": 0.16189566254615784,
"learning_rate": 5.8077471482660896e-05,
"loss": 0.0206,
"step": 7140
},
{
"epoch": 14.532520325203253,
"grad_norm": 0.1307971179485321,
"learning_rate": 5.796866854992276e-05,
"loss": 0.0188,
"step": 7150
},
{
"epoch": 14.552845528455284,
"grad_norm": 0.12432897835969925,
"learning_rate": 5.7859826886461676e-05,
"loss": 0.0218,
"step": 7160
},
{
"epoch": 14.573170731707316,
"grad_norm": 0.20361848175525665,
"learning_rate": 5.775094702128899e-05,
"loss": 0.0177,
"step": 7170
},
{
"epoch": 14.59349593495935,
"grad_norm": 0.18532606959342957,
"learning_rate": 5.7642029483601746e-05,
"loss": 0.0187,
"step": 7180
},
{
"epoch": 14.613821138211382,
"grad_norm": 0.15062542259693146,
"learning_rate": 5.753307480278012e-05,
"loss": 0.0231,
"step": 7190
},
{
"epoch": 14.634146341463415,
"grad_norm": 0.17582647502422333,
"learning_rate": 5.742408350838478e-05,
"loss": 0.0192,
"step": 7200
},
{
"epoch": 14.654471544715447,
"grad_norm": 0.14718493819236755,
"learning_rate": 5.7315056130154374e-05,
"loss": 0.017,
"step": 7210
},
{
"epoch": 14.67479674796748,
"grad_norm": 0.25164714455604553,
"learning_rate": 5.720599319800292e-05,
"loss": 0.0262,
"step": 7220
},
{
"epoch": 14.695121951219512,
"grad_norm": 0.17039872705936432,
"learning_rate": 5.709689524201722e-05,
"loss": 0.0203,
"step": 7230
},
{
"epoch": 14.715447154471544,
"grad_norm": 0.21887938678264618,
"learning_rate": 5.698776279245437e-05,
"loss": 0.0201,
"step": 7240
},
{
"epoch": 14.735772357723578,
"grad_norm": 0.19460436701774597,
"learning_rate": 5.6878596379739036e-05,
"loss": 0.0249,
"step": 7250
},
{
"epoch": 14.75609756097561,
"grad_norm": 0.25877246260643005,
"learning_rate": 5.676939653446103e-05,
"loss": 0.0208,
"step": 7260
},
{
"epoch": 14.776422764227643,
"grad_norm": 0.22199095785617828,
"learning_rate": 5.666016378737261e-05,
"loss": 0.022,
"step": 7270
},
{
"epoch": 14.796747967479675,
"grad_norm": 0.14779351651668549,
"learning_rate": 5.655089866938596e-05,
"loss": 0.0168,
"step": 7280
},
{
"epoch": 14.817073170731707,
"grad_norm": 0.2230490893125534,
"learning_rate": 5.6441601711570615e-05,
"loss": 0.0189,
"step": 7290
},
{
"epoch": 14.83739837398374,
"grad_norm": 0.17648757994174957,
"learning_rate": 5.633227344515085e-05,
"loss": 0.0204,
"step": 7300
},
{
"epoch": 14.857723577235772,
"grad_norm": 0.1408594250679016,
"learning_rate": 5.6222914401503116e-05,
"loss": 0.0192,
"step": 7310
},
{
"epoch": 14.878048780487806,
"grad_norm": 0.14772988855838776,
"learning_rate": 5.611352511215343e-05,
"loss": 0.0176,
"step": 7320
},
{
"epoch": 14.898373983739837,
"grad_norm": 0.14210304617881775,
"learning_rate": 5.600410610877488e-05,
"loss": 0.019,
"step": 7330
},
{
"epoch": 14.91869918699187,
"grad_norm": 0.16473866999149323,
"learning_rate": 5.58946579231849e-05,
"loss": 0.0183,
"step": 7340
},
{
"epoch": 14.939024390243903,
"grad_norm": 0.15072952210903168,
"learning_rate": 5.578518108734279e-05,
"loss": 0.0149,
"step": 7350
},
{
"epoch": 14.959349593495935,
"grad_norm": 0.17104724049568176,
"learning_rate": 5.5675676133347096e-05,
"loss": 0.0169,
"step": 7360
},
{
"epoch": 14.979674796747968,
"grad_norm": 0.09811348468065262,
"learning_rate": 5.556614359343307e-05,
"loss": 0.0168,
"step": 7370
},
{
"epoch": 15.0,
"grad_norm": 0.18304497003555298,
"learning_rate": 5.545658399996999e-05,
"loss": 0.0158,
"step": 7380
},
{
"epoch": 15.020325203252032,
"grad_norm": 0.1639220118522644,
"learning_rate": 5.534699788545862e-05,
"loss": 0.0183,
"step": 7390
},
{
"epoch": 15.040650406504065,
"grad_norm": 0.13093116879463196,
"learning_rate": 5.523738578252867e-05,
"loss": 0.0193,
"step": 7400
},
{
"epoch": 15.060975609756097,
"grad_norm": 0.1492796391248703,
"learning_rate": 5.512774822393614e-05,
"loss": 0.0148,
"step": 7410
},
{
"epoch": 15.08130081300813,
"grad_norm": 0.23351886868476868,
"learning_rate": 5.5018085742560744e-05,
"loss": 0.0191,
"step": 7420
},
{
"epoch": 15.101626016260163,
"grad_norm": 0.19075217843055725,
"learning_rate": 5.4908398871403365e-05,
"loss": 0.0168,
"step": 7430
},
{
"epoch": 15.121951219512194,
"grad_norm": 0.1332959085702896,
"learning_rate": 5.4798688143583375e-05,
"loss": 0.0173,
"step": 7440
},
{
"epoch": 15.142276422764228,
"grad_norm": 0.11717317998409271,
"learning_rate": 5.468895409233615e-05,
"loss": 0.0143,
"step": 7450
},
{
"epoch": 15.16260162601626,
"grad_norm": 0.17847301065921783,
"learning_rate": 5.4579197251010414e-05,
"loss": 0.0172,
"step": 7460
},
{
"epoch": 15.182926829268293,
"grad_norm": 0.18227623403072357,
"learning_rate": 5.446941815306563e-05,
"loss": 0.0152,
"step": 7470
},
{
"epoch": 15.203252032520325,
"grad_norm": 0.1648562103509903,
"learning_rate": 5.435961733206947e-05,
"loss": 0.0184,
"step": 7480
},
{
"epoch": 15.223577235772357,
"grad_norm": 0.21375487744808197,
"learning_rate": 5.424979532169516e-05,
"loss": 0.0179,
"step": 7490
},
{
"epoch": 15.24390243902439,
"grad_norm": 0.16193552315235138,
"learning_rate": 5.413995265571895e-05,
"loss": 0.0156,
"step": 7500
},
{
"epoch": 15.264227642276422,
"grad_norm": 0.20847506821155548,
"learning_rate": 5.403008986801746e-05,
"loss": 0.013,
"step": 7510
},
{
"epoch": 15.284552845528456,
"grad_norm": 0.11255639046430588,
"learning_rate": 5.3920207492565114e-05,
"loss": 0.0177,
"step": 7520
},
{
"epoch": 15.304878048780488,
"grad_norm": 0.221723273396492,
"learning_rate": 5.381030606343154e-05,
"loss": 0.0192,
"step": 7530
},
{
"epoch": 15.32520325203252,
"grad_norm": 0.24689878523349762,
"learning_rate": 5.370038611477894e-05,
"loss": 0.0164,
"step": 7540
},
{
"epoch": 15.345528455284553,
"grad_norm": 0.17949624359607697,
"learning_rate": 5.359044818085963e-05,
"loss": 0.0166,
"step": 7550
},
{
"epoch": 15.365853658536585,
"grad_norm": 0.23291277885437012,
"learning_rate": 5.3480492796013214e-05,
"loss": 0.0168,
"step": 7560
},
{
"epoch": 15.386178861788618,
"grad_norm": 0.20025502145290375,
"learning_rate": 5.33705204946642e-05,
"loss": 0.0164,
"step": 7570
},
{
"epoch": 15.40650406504065,
"grad_norm": 0.23018495738506317,
"learning_rate": 5.326053181131927e-05,
"loss": 0.0199,
"step": 7580
},
{
"epoch": 15.426829268292684,
"grad_norm": 0.1864101141691208,
"learning_rate": 5.3150527280564776e-05,
"loss": 0.0171,
"step": 7590
},
{
"epoch": 15.447154471544716,
"grad_norm": 0.14866892993450165,
"learning_rate": 5.3040507437064034e-05,
"loss": 0.0171,
"step": 7600
},
{
"epoch": 15.467479674796747,
"grad_norm": 0.16208836436271667,
"learning_rate": 5.293047281555482e-05,
"loss": 0.0141,
"step": 7610
},
{
"epoch": 15.487804878048781,
"grad_norm": 0.12760621309280396,
"learning_rate": 5.2820423950846765e-05,
"loss": 0.0154,
"step": 7620
},
{
"epoch": 15.508130081300813,
"grad_norm": 0.17664365470409393,
"learning_rate": 5.2710361377818696e-05,
"loss": 0.0165,
"step": 7630
},
{
"epoch": 15.528455284552846,
"grad_norm": 0.17898857593536377,
"learning_rate": 5.2600285631416026e-05,
"loss": 0.016,
"step": 7640
},
{
"epoch": 15.548780487804878,
"grad_norm": 0.09963621944189072,
"learning_rate": 5.249019724664826e-05,
"loss": 0.0189,
"step": 7650
},
{
"epoch": 15.56910569105691,
"grad_norm": 0.15128383040428162,
"learning_rate": 5.2380096758586315e-05,
"loss": 0.0158,
"step": 7660
},
{
"epoch": 15.589430894308943,
"grad_norm": 0.12781989574432373,
"learning_rate": 5.226998470235993e-05,
"loss": 0.0158,
"step": 7670
},
{
"epoch": 15.609756097560975,
"grad_norm": 0.17681658267974854,
"learning_rate": 5.215986161315507e-05,
"loss": 0.0141,
"step": 7680
},
{
"epoch": 15.630081300813009,
"grad_norm": 0.1291031688451767,
"learning_rate": 5.20497280262113e-05,
"loss": 0.0154,
"step": 7690
},
{
"epoch": 15.65040650406504,
"grad_norm": 0.1151236891746521,
"learning_rate": 5.193958447681924e-05,
"loss": 0.0142,
"step": 7700
},
{
"epoch": 15.670731707317072,
"grad_norm": 0.16700609028339386,
"learning_rate": 5.182943150031793e-05,
"loss": 0.0156,
"step": 7710
},
{
"epoch": 15.691056910569106,
"grad_norm": 0.22444726526737213,
"learning_rate": 5.1719269632092204e-05,
"loss": 0.0161,
"step": 7720
},
{
"epoch": 15.711382113821138,
"grad_norm": 0.1340688318014145,
"learning_rate": 5.160909940757015e-05,
"loss": 0.0193,
"step": 7730
},
{
"epoch": 15.731707317073171,
"grad_norm": 0.10712575912475586,
"learning_rate": 5.149892136222043e-05,
"loss": 0.0133,
"step": 7740
},
{
"epoch": 15.752032520325203,
"grad_norm": 0.1398649662733078,
"learning_rate": 5.1388736031549744e-05,
"loss": 0.0149,
"step": 7750
},
{
"epoch": 15.772357723577235,
"grad_norm": 0.1141858771443367,
"learning_rate": 5.127854395110021e-05,
"loss": 0.0155,
"step": 7760
},
{
"epoch": 15.792682926829269,
"grad_norm": 0.16649234294891357,
"learning_rate": 5.116834565644671e-05,
"loss": 0.0122,
"step": 7770
},
{
"epoch": 15.8130081300813,
"grad_norm": 0.14712487161159515,
"learning_rate": 5.10581416831944e-05,
"loss": 0.0169,
"step": 7780
},
{
"epoch": 15.833333333333334,
"grad_norm": 0.1849357783794403,
"learning_rate": 5.094793256697593e-05,
"loss": 0.0141,
"step": 7790
},
{
"epoch": 15.853658536585366,
"grad_norm": 0.20242075622081757,
"learning_rate": 5.0837718843449075e-05,
"loss": 0.0151,
"step": 7800
},
{
"epoch": 15.8739837398374,
"grad_norm": 0.16380542516708374,
"learning_rate": 5.07275010482939e-05,
"loss": 0.0143,
"step": 7810
},
{
"epoch": 15.894308943089431,
"grad_norm": 0.20087213814258575,
"learning_rate": 5.061727971721032e-05,
"loss": 0.0179,
"step": 7820
},
{
"epoch": 15.914634146341463,
"grad_norm": 0.12002623826265335,
"learning_rate": 5.050705538591538e-05,
"loss": 0.0213,
"step": 7830
},
{
"epoch": 15.934959349593496,
"grad_norm": 0.16061867773532867,
"learning_rate": 5.0396828590140785e-05,
"loss": 0.0159,
"step": 7840
},
{
"epoch": 15.955284552845528,
"grad_norm": 0.2045939415693283,
"learning_rate": 5.0286599865630157e-05,
"loss": 0.0136,
"step": 7850
},
{
"epoch": 15.975609756097562,
"grad_norm": 0.11727229505777359,
"learning_rate": 5.017636974813649e-05,
"loss": 0.0125,
"step": 7860
},
{
"epoch": 15.995934959349594,
"grad_norm": 0.1615324318408966,
"learning_rate": 5.006613877341959e-05,
"loss": 0.0141,
"step": 7870
},
{
"epoch": 16.016260162601625,
"grad_norm": 0.17539259791374207,
"learning_rate": 4.99559074772434e-05,
"loss": 0.0166,
"step": 7880
},
{
"epoch": 16.036585365853657,
"grad_norm": 0.1632055640220642,
"learning_rate": 4.9845676395373455e-05,
"loss": 0.0163,
"step": 7890
},
{
"epoch": 16.056910569105693,
"grad_norm": 0.1939256489276886,
"learning_rate": 4.9735446063574184e-05,
"loss": 0.0148,
"step": 7900
},
{
"epoch": 16.077235772357724,
"grad_norm": 0.22934892773628235,
"learning_rate": 4.962521701760645e-05,
"loss": 0.0199,
"step": 7910
},
{
"epoch": 16.097560975609756,
"grad_norm": 0.09549865126609802,
"learning_rate": 4.951498979322482e-05,
"loss": 0.0146,
"step": 7920
},
{
"epoch": 16.117886178861788,
"grad_norm": 0.12365490198135376,
"learning_rate": 4.9404764926174996e-05,
"loss": 0.019,
"step": 7930
},
{
"epoch": 16.13821138211382,
"grad_norm": 0.1805732101202011,
"learning_rate": 4.929454295219127e-05,
"loss": 0.0163,
"step": 7940
},
{
"epoch": 16.158536585365855,
"grad_norm": 0.11624112725257874,
"learning_rate": 4.9184324406993844e-05,
"loss": 0.0203,
"step": 7950
},
{
"epoch": 16.178861788617887,
"grad_norm": 0.11672668159008026,
"learning_rate": 4.907410982628623e-05,
"loss": 0.0174,
"step": 7960
},
{
"epoch": 16.19918699186992,
"grad_norm": 0.2546161115169525,
"learning_rate": 4.896389974575273e-05,
"loss": 0.0174,
"step": 7970
},
{
"epoch": 16.21951219512195,
"grad_norm": 0.16103267669677734,
"learning_rate": 4.885369470105571e-05,
"loss": 0.0231,
"step": 7980
},
{
"epoch": 16.239837398373982,
"grad_norm": 0.14676164090633392,
"learning_rate": 4.874349522783313e-05,
"loss": 0.0167,
"step": 7990
},
{
"epoch": 16.260162601626018,
"grad_norm": 0.1342374086380005,
"learning_rate": 4.863330186169581e-05,
"loss": 0.0135,
"step": 8000
},
{
"epoch": 16.28048780487805,
"grad_norm": 0.1830085813999176,
"learning_rate": 4.8523115138224885e-05,
"loss": 0.0159,
"step": 8010
},
{
"epoch": 16.30081300813008,
"grad_norm": 0.12788920104503632,
"learning_rate": 4.841293559296928e-05,
"loss": 0.0133,
"step": 8020
},
{
"epoch": 16.321138211382113,
"grad_norm": 0.13414596021175385,
"learning_rate": 4.830276376144295e-05,
"loss": 0.0157,
"step": 8030
},
{
"epoch": 16.341463414634145,
"grad_norm": 0.15788240730762482,
"learning_rate": 4.819260017912237e-05,
"loss": 0.0128,
"step": 8040
},
{
"epoch": 16.36178861788618,
"grad_norm": 0.20657338201999664,
"learning_rate": 4.808244538144396e-05,
"loss": 0.0147,
"step": 8050
},
{
"epoch": 16.382113821138212,
"grad_norm": 0.1241159588098526,
"learning_rate": 4.797229990380142e-05,
"loss": 0.014,
"step": 8060
},
{
"epoch": 16.402439024390244,
"grad_norm": 0.14420954883098602,
"learning_rate": 4.786216428154317e-05,
"loss": 0.0139,
"step": 8070
},
{
"epoch": 16.422764227642276,
"grad_norm": 0.15971283614635468,
"learning_rate": 4.7752039049969685e-05,
"loss": 0.0148,
"step": 8080
},
{
"epoch": 16.443089430894307,
"grad_norm": 0.11482447385787964,
"learning_rate": 4.7641924744330956e-05,
"loss": 0.0133,
"step": 8090
},
{
"epoch": 16.463414634146343,
"grad_norm": 0.17538700997829437,
"learning_rate": 4.7531821899823925e-05,
"loss": 0.0122,
"step": 8100
},
{
"epoch": 16.483739837398375,
"grad_norm": 0.13359621167182922,
"learning_rate": 4.742173105158973e-05,
"loss": 0.0141,
"step": 8110
},
{
"epoch": 16.504065040650406,
"grad_norm": 0.13688933849334717,
"learning_rate": 4.731165273471129e-05,
"loss": 0.0146,
"step": 8120
},
{
"epoch": 16.524390243902438,
"grad_norm": 0.11756113171577454,
"learning_rate": 4.720158748421057e-05,
"loss": 0.011,
"step": 8130
},
{
"epoch": 16.54471544715447,
"grad_norm": 0.1685679405927658,
"learning_rate": 4.709153583504602e-05,
"loss": 0.0172,
"step": 8140
},
{
"epoch": 16.565040650406505,
"grad_norm": 0.12983818352222443,
"learning_rate": 4.6981498322110027e-05,
"loss": 0.0157,
"step": 8150
},
{
"epoch": 16.585365853658537,
"grad_norm": 0.13751214742660522,
"learning_rate": 4.6871475480226256e-05,
"loss": 0.0135,
"step": 8160
},
{
"epoch": 16.60569105691057,
"grad_norm": 0.14502060413360596,
"learning_rate": 4.6761467844147004e-05,
"loss": 0.0126,
"step": 8170
},
{
"epoch": 16.6260162601626,
"grad_norm": 0.14088159799575806,
"learning_rate": 4.665147594855076e-05,
"loss": 0.0158,
"step": 8180
},
{
"epoch": 16.646341463414632,
"grad_norm": 0.12108345329761505,
"learning_rate": 4.654150032803943e-05,
"loss": 0.0142,
"step": 8190
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.12457619607448578,
"learning_rate": 4.643154151713588e-05,
"loss": 0.0162,
"step": 8200
},
{
"epoch": 16.6869918699187,
"grad_norm": 0.19529499113559723,
"learning_rate": 4.6321600050281225e-05,
"loss": 0.0155,
"step": 8210
},
{
"epoch": 16.70731707317073,
"grad_norm": 0.10389215499162674,
"learning_rate": 4.6211676461832264e-05,
"loss": 0.016,
"step": 8220
},
{
"epoch": 16.727642276422763,
"grad_norm": 0.18743805587291718,
"learning_rate": 4.610177128605899e-05,
"loss": 0.0162,
"step": 8230
},
{
"epoch": 16.747967479674795,
"grad_norm": 0.14482690393924713,
"learning_rate": 4.599188505714184e-05,
"loss": 0.0142,
"step": 8240
},
{
"epoch": 16.76829268292683,
"grad_norm": 0.08358287066221237,
"learning_rate": 4.588201830916912e-05,
"loss": 0.0142,
"step": 8250
},
{
"epoch": 16.788617886178862,
"grad_norm": 0.11201161891222,
"learning_rate": 4.577217157613456e-05,
"loss": 0.0125,
"step": 8260
},
{
"epoch": 16.808943089430894,
"grad_norm": 0.18672020733356476,
"learning_rate": 4.566234539193452e-05,
"loss": 0.021,
"step": 8270
},
{
"epoch": 16.829268292682926,
"grad_norm": 0.11721731722354889,
"learning_rate": 4.555254029036555e-05,
"loss": 0.013,
"step": 8280
},
{
"epoch": 16.84959349593496,
"grad_norm": 0.16724960505962372,
"learning_rate": 4.544275680512165e-05,
"loss": 0.0143,
"step": 8290
},
{
"epoch": 16.869918699186993,
"grad_norm": 0.17072100937366486,
"learning_rate": 4.5332995469791836e-05,
"loss": 0.0125,
"step": 8300
},
{
"epoch": 16.890243902439025,
"grad_norm": 0.16732802987098694,
"learning_rate": 4.522325681785744e-05,
"loss": 0.0161,
"step": 8310
},
{
"epoch": 16.910569105691057,
"grad_norm": 0.15675050020217896,
"learning_rate": 4.511354138268952e-05,
"loss": 0.0147,
"step": 8320
},
{
"epoch": 16.93089430894309,
"grad_norm": 0.1608588695526123,
"learning_rate": 4.50038496975463e-05,
"loss": 0.0145,
"step": 8330
},
{
"epoch": 16.951219512195124,
"grad_norm": 0.1279238760471344,
"learning_rate": 4.489418229557063e-05,
"loss": 0.0113,
"step": 8340
},
{
"epoch": 16.971544715447155,
"grad_norm": 0.11838237196207047,
"learning_rate": 4.478453970978722e-05,
"loss": 0.0162,
"step": 8350
},
{
"epoch": 16.991869918699187,
"grad_norm": 0.11877836287021637,
"learning_rate": 4.4674922473100286e-05,
"loss": 0.0139,
"step": 8360
},
{
"epoch": 17.01219512195122,
"grad_norm": 0.20165173709392548,
"learning_rate": 4.4565331118290756e-05,
"loss": 0.0113,
"step": 8370
},
{
"epoch": 17.03252032520325,
"grad_norm": 0.09803508222103119,
"learning_rate": 4.4455766178013775e-05,
"loss": 0.0139,
"step": 8380
},
{
"epoch": 17.052845528455286,
"grad_norm": 0.15660974383354187,
"learning_rate": 4.434622818479615e-05,
"loss": 0.0155,
"step": 8390
},
{
"epoch": 17.073170731707318,
"grad_norm": 0.1324063241481781,
"learning_rate": 4.4236717671033646e-05,
"loss": 0.0134,
"step": 8400
},
{
"epoch": 17.09349593495935,
"grad_norm": 0.15221671760082245,
"learning_rate": 4.412723516898853e-05,
"loss": 0.0124,
"step": 8410
},
{
"epoch": 17.11382113821138,
"grad_norm": 0.10603677481412888,
"learning_rate": 4.40177812107869e-05,
"loss": 0.0131,
"step": 8420
},
{
"epoch": 17.134146341463413,
"grad_norm": 0.12352833151817322,
"learning_rate": 4.390835632841606e-05,
"loss": 0.0121,
"step": 8430
},
{
"epoch": 17.15447154471545,
"grad_norm": 0.16352899372577667,
"learning_rate": 4.3798961053722115e-05,
"loss": 0.0128,
"step": 8440
},
{
"epoch": 17.17479674796748,
"grad_norm": 0.128566175699234,
"learning_rate": 4.368959591840718e-05,
"loss": 0.0116,
"step": 8450
},
{
"epoch": 17.195121951219512,
"grad_norm": 0.13584142923355103,
"learning_rate": 4.3580261454026865e-05,
"loss": 0.0117,
"step": 8460
},
{
"epoch": 17.215447154471544,
"grad_norm": 0.17808644473552704,
"learning_rate": 4.3470958191987786e-05,
"loss": 0.0125,
"step": 8470
},
{
"epoch": 17.235772357723576,
"grad_norm": 0.13361839950084686,
"learning_rate": 4.336168666354484e-05,
"loss": 0.019,
"step": 8480
},
{
"epoch": 17.25609756097561,
"grad_norm": 0.13176748156547546,
"learning_rate": 4.325244739979873e-05,
"loss": 0.0153,
"step": 8490
},
{
"epoch": 17.276422764227643,
"grad_norm": 0.135588139295578,
"learning_rate": 4.314324093169332e-05,
"loss": 0.0118,
"step": 8500
},
{
"epoch": 17.296747967479675,
"grad_norm": 0.1549520641565323,
"learning_rate": 4.303406779001302e-05,
"loss": 0.0133,
"step": 8510
},
{
"epoch": 17.317073170731707,
"grad_norm": 0.16039688885211945,
"learning_rate": 4.292492850538038e-05,
"loss": 0.0145,
"step": 8520
},
{
"epoch": 17.33739837398374,
"grad_norm": 0.1547240912914276,
"learning_rate": 4.28158236082533e-05,
"loss": 0.0116,
"step": 8530
},
{
"epoch": 17.357723577235774,
"grad_norm": 0.15895530581474304,
"learning_rate": 4.270675362892256e-05,
"loss": 0.0119,
"step": 8540
},
{
"epoch": 17.378048780487806,
"grad_norm": 0.10659079998731613,
"learning_rate": 4.2597719097509246e-05,
"loss": 0.0147,
"step": 8550
},
{
"epoch": 17.398373983739837,
"grad_norm": 0.1645200252532959,
"learning_rate": 4.2488720543962146e-05,
"loss": 0.0118,
"step": 8560
},
{
"epoch": 17.41869918699187,
"grad_norm": 0.14108268916606903,
"learning_rate": 4.23797584980552e-05,
"loss": 0.0158,
"step": 8570
},
{
"epoch": 17.4390243902439,
"grad_norm": 0.1322506219148636,
"learning_rate": 4.227083348938486e-05,
"loss": 0.012,
"step": 8580
},
{
"epoch": 17.459349593495936,
"grad_norm": 0.1606353223323822,
"learning_rate": 4.2161946047367586e-05,
"loss": 0.0131,
"step": 8590
},
{
"epoch": 17.479674796747968,
"grad_norm": 0.20726698637008667,
"learning_rate": 4.2053096701237294e-05,
"loss": 0.0168,
"step": 8600
},
{
"epoch": 17.5,
"grad_norm": 0.18475304543972015,
"learning_rate": 4.1944285980042656e-05,
"loss": 0.0144,
"step": 8610
},
{
"epoch": 17.520325203252032,
"grad_norm": 0.13266246020793915,
"learning_rate": 4.183551441264469e-05,
"loss": 0.0153,
"step": 8620
},
{
"epoch": 17.540650406504064,
"grad_norm": 0.1916080266237259,
"learning_rate": 4.172678252771408e-05,
"loss": 0.015,
"step": 8630
},
{
"epoch": 17.5609756097561,
"grad_norm": 0.1388619840145111,
"learning_rate": 4.16180908537286e-05,
"loss": 0.0122,
"step": 8640
},
{
"epoch": 17.58130081300813,
"grad_norm": 0.19394637644290924,
"learning_rate": 4.150943991897065e-05,
"loss": 0.0169,
"step": 8650
},
{
"epoch": 17.601626016260163,
"grad_norm": 0.1618029922246933,
"learning_rate": 4.1400830251524605e-05,
"loss": 0.0116,
"step": 8660
},
{
"epoch": 17.621951219512194,
"grad_norm": 0.11678081750869751,
"learning_rate": 4.1292262379274215e-05,
"loss": 0.0144,
"step": 8670
},
{
"epoch": 17.642276422764226,
"grad_norm": 0.11671017855405807,
"learning_rate": 4.118373682990016e-05,
"loss": 0.0159,
"step": 8680
},
{
"epoch": 17.66260162601626,
"grad_norm": 0.1848084032535553,
"learning_rate": 4.107525413087737e-05,
"loss": 0.0139,
"step": 8690
},
{
"epoch": 17.682926829268293,
"grad_norm": 0.17534123361110687,
"learning_rate": 4.096681480947252e-05,
"loss": 0.0143,
"step": 8700
},
{
"epoch": 17.703252032520325,
"grad_norm": 0.12786678969860077,
"learning_rate": 4.085841939274146e-05,
"loss": 0.0109,
"step": 8710
},
{
"epoch": 17.723577235772357,
"grad_norm": 0.1237667053937912,
"learning_rate": 4.075006840752662e-05,
"loss": 0.0139,
"step": 8720
},
{
"epoch": 17.74390243902439,
"grad_norm": 0.15702199935913086,
"learning_rate": 4.0641762380454515e-05,
"loss": 0.0132,
"step": 8730
},
{
"epoch": 17.764227642276424,
"grad_norm": 0.10557420551776886,
"learning_rate": 4.0533501837933134e-05,
"loss": 0.0132,
"step": 8740
},
{
"epoch": 17.784552845528456,
"grad_norm": 0.17406034469604492,
"learning_rate": 4.042528730614936e-05,
"loss": 0.0151,
"step": 8750
},
{
"epoch": 17.804878048780488,
"grad_norm": 0.13269108533859253,
"learning_rate": 4.0317119311066486e-05,
"loss": 0.0159,
"step": 8760
},
{
"epoch": 17.82520325203252,
"grad_norm": 0.12828195095062256,
"learning_rate": 4.02089983784216e-05,
"loss": 0.0132,
"step": 8770
},
{
"epoch": 17.84552845528455,
"grad_norm": 0.17932254076004028,
"learning_rate": 4.010092503372309e-05,
"loss": 0.0152,
"step": 8780
},
{
"epoch": 17.865853658536587,
"grad_norm": 0.2130482792854309,
"learning_rate": 3.999289980224797e-05,
"loss": 0.0145,
"step": 8790
},
{
"epoch": 17.88617886178862,
"grad_norm": 0.20235665142536163,
"learning_rate": 3.9884923209039455e-05,
"loss": 0.0128,
"step": 8800
},
{
"epoch": 17.90650406504065,
"grad_norm": 0.09971121698617935,
"learning_rate": 3.977699577890439e-05,
"loss": 0.0148,
"step": 8810
},
{
"epoch": 17.926829268292682,
"grad_norm": 0.14858904480934143,
"learning_rate": 3.96691180364106e-05,
"loss": 0.0158,
"step": 8820
},
{
"epoch": 17.947154471544714,
"grad_norm": 0.15000762045383453,
"learning_rate": 3.956129050588446e-05,
"loss": 0.0132,
"step": 8830
},
{
"epoch": 17.96747967479675,
"grad_norm": 0.15528957545757294,
"learning_rate": 3.9453513711408275e-05,
"loss": 0.0127,
"step": 8840
},
{
"epoch": 17.98780487804878,
"grad_norm": 0.162289097905159,
"learning_rate": 3.934578817681774e-05,
"loss": 0.0136,
"step": 8850
},
{
"epoch": 18.008130081300813,
"grad_norm": 0.16899584233760834,
"learning_rate": 3.9238114425699465e-05,
"loss": 0.0139,
"step": 8860
},
{
"epoch": 18.028455284552845,
"grad_norm": 0.13523997366428375,
"learning_rate": 3.91304929813883e-05,
"loss": 0.0115,
"step": 8870
},
{
"epoch": 18.048780487804876,
"grad_norm": 0.15300580859184265,
"learning_rate": 3.902292436696489e-05,
"loss": 0.0159,
"step": 8880
},
{
"epoch": 18.06910569105691,
"grad_norm": 0.16981197893619537,
"learning_rate": 3.891540910525316e-05,
"loss": 0.0115,
"step": 8890
},
{
"epoch": 18.089430894308943,
"grad_norm": 0.15623369812965393,
"learning_rate": 3.8807947718817624e-05,
"loss": 0.0138,
"step": 8900
},
{
"epoch": 18.109756097560975,
"grad_norm": 0.16787086427211761,
"learning_rate": 3.870054072996103e-05,
"loss": 0.012,
"step": 8910
},
{
"epoch": 18.130081300813007,
"grad_norm": 0.12681931257247925,
"learning_rate": 3.859318866072168e-05,
"loss": 0.0115,
"step": 8920
},
{
"epoch": 18.150406504065042,
"grad_norm": 0.12948468327522278,
"learning_rate": 3.8485892032870965e-05,
"loss": 0.0111,
"step": 8930
},
{
"epoch": 18.170731707317074,
"grad_norm": 0.11079384386539459,
"learning_rate": 3.83786513679108e-05,
"loss": 0.0119,
"step": 8940
},
{
"epoch": 18.191056910569106,
"grad_norm": 0.1111801415681839,
"learning_rate": 3.8271467187071134e-05,
"loss": 0.0123,
"step": 8950
},
{
"epoch": 18.211382113821138,
"grad_norm": 0.1817050576210022,
"learning_rate": 3.816434001130732e-05,
"loss": 0.015,
"step": 8960
},
{
"epoch": 18.23170731707317,
"grad_norm": 0.16134339570999146,
"learning_rate": 3.8057270361297706e-05,
"loss": 0.0119,
"step": 8970
},
{
"epoch": 18.252032520325205,
"grad_norm": 0.07816947251558304,
"learning_rate": 3.7950258757440985e-05,
"loss": 0.015,
"step": 8980
},
{
"epoch": 18.272357723577237,
"grad_norm": 0.13936541974544525,
"learning_rate": 3.78433057198538e-05,
"loss": 0.0129,
"step": 8990
},
{
"epoch": 18.29268292682927,
"grad_norm": 0.10490652918815613,
"learning_rate": 3.773641176836807e-05,
"loss": 0.015,
"step": 9000
},
{
"epoch": 18.3130081300813,
"grad_norm": 0.0925993099808693,
"learning_rate": 3.7629577422528555e-05,
"loss": 0.0118,
"step": 9010
},
{
"epoch": 18.333333333333332,
"grad_norm": 0.15427745878696442,
"learning_rate": 3.7522803201590325e-05,
"loss": 0.0093,
"step": 9020
},
{
"epoch": 18.353658536585368,
"grad_norm": 0.07753149420022964,
"learning_rate": 3.741608962451621e-05,
"loss": 0.0103,
"step": 9030
},
{
"epoch": 18.3739837398374,
"grad_norm": 0.08675969392061234,
"learning_rate": 3.730943720997427e-05,
"loss": 0.0117,
"step": 9040
},
{
"epoch": 18.39430894308943,
"grad_norm": 0.06992247700691223,
"learning_rate": 3.720284647633532e-05,
"loss": 0.0108,
"step": 9050
},
{
"epoch": 18.414634146341463,
"grad_norm": 0.10455995798110962,
"learning_rate": 3.7096317941670365e-05,
"loss": 0.0145,
"step": 9060
},
{
"epoch": 18.434959349593495,
"grad_norm": 0.13448111712932587,
"learning_rate": 3.698985212374814e-05,
"loss": 0.0127,
"step": 9070
},
{
"epoch": 18.45528455284553,
"grad_norm": 0.13202343881130219,
"learning_rate": 3.6883449540032477e-05,
"loss": 0.012,
"step": 9080
},
{
"epoch": 18.475609756097562,
"grad_norm": 0.1117219552397728,
"learning_rate": 3.6777110707679905e-05,
"loss": 0.0119,
"step": 9090
},
{
"epoch": 18.495934959349594,
"grad_norm": 0.12106060236692429,
"learning_rate": 3.667083614353715e-05,
"loss": 0.012,
"step": 9100
},
{
"epoch": 18.516260162601625,
"grad_norm": 0.14695453643798828,
"learning_rate": 3.6564626364138465e-05,
"loss": 0.0169,
"step": 9110
},
{
"epoch": 18.536585365853657,
"grad_norm": 0.12978288531303406,
"learning_rate": 3.645848188570331e-05,
"loss": 0.0145,
"step": 9120
},
{
"epoch": 18.556910569105693,
"grad_norm": 0.10005785524845123,
"learning_rate": 3.635240322413374e-05,
"loss": 0.0095,
"step": 9130
},
{
"epoch": 18.577235772357724,
"grad_norm": 0.13557754456996918,
"learning_rate": 3.624639089501187e-05,
"loss": 0.0115,
"step": 9140
},
{
"epoch": 18.597560975609756,
"grad_norm": 0.12689892947673798,
"learning_rate": 3.614044541359749e-05,
"loss": 0.0152,
"step": 9150
},
{
"epoch": 18.617886178861788,
"grad_norm": 0.10084543377161026,
"learning_rate": 3.603456729482541e-05,
"loss": 0.0133,
"step": 9160
},
{
"epoch": 18.63821138211382,
"grad_norm": 0.15835894644260406,
"learning_rate": 3.5928757053303055e-05,
"loss": 0.0163,
"step": 9170
},
{
"epoch": 18.658536585365855,
"grad_norm": 0.1506408303976059,
"learning_rate": 3.5823015203308e-05,
"loss": 0.0146,
"step": 9180
},
{
"epoch": 18.678861788617887,
"grad_norm": 0.10163554549217224,
"learning_rate": 3.57173422587853e-05,
"loss": 0.0123,
"step": 9190
},
{
"epoch": 18.69918699186992,
"grad_norm": 0.09243937581777573,
"learning_rate": 3.561173873334522e-05,
"loss": 0.0145,
"step": 9200
},
{
"epoch": 18.71951219512195,
"grad_norm": 0.128352090716362,
"learning_rate": 3.550620514026056e-05,
"loss": 0.0111,
"step": 9210
},
{
"epoch": 18.739837398373982,
"grad_norm": 0.14999257028102875,
"learning_rate": 3.54007419924642e-05,
"loss": 0.0156,
"step": 9220
},
{
"epoch": 18.760162601626018,
"grad_norm": 0.18344224989414215,
"learning_rate": 3.52953498025467e-05,
"loss": 0.0135,
"step": 9230
},
{
"epoch": 18.78048780487805,
"grad_norm": 0.12680011987686157,
"learning_rate": 3.519002908275368e-05,
"loss": 0.0107,
"step": 9240
},
{
"epoch": 18.80081300813008,
"grad_norm": 0.20007681846618652,
"learning_rate": 3.508478034498339e-05,
"loss": 0.0158,
"step": 9250
},
{
"epoch": 18.821138211382113,
"grad_norm": 0.11284324526786804,
"learning_rate": 3.497960410078427e-05,
"loss": 0.0114,
"step": 9260
},
{
"epoch": 18.841463414634145,
"grad_norm": 0.17493268847465515,
"learning_rate": 3.487450086135236e-05,
"loss": 0.0148,
"step": 9270
},
{
"epoch": 18.86178861788618,
"grad_norm": 0.1810353547334671,
"learning_rate": 3.476947113752891e-05,
"loss": 0.0096,
"step": 9280
},
{
"epoch": 18.882113821138212,
"grad_norm": 0.18835191428661346,
"learning_rate": 3.4664515439797823e-05,
"loss": 0.0114,
"step": 9290
},
{
"epoch": 18.902439024390244,
"grad_norm": 0.1354275792837143,
"learning_rate": 3.45596342782832e-05,
"loss": 0.0146,
"step": 9300
},
{
"epoch": 18.922764227642276,
"grad_norm": 0.12778376042842865,
"learning_rate": 3.4454828162746936e-05,
"loss": 0.0122,
"step": 9310
},
{
"epoch": 18.943089430894307,
"grad_norm": 0.13828104734420776,
"learning_rate": 3.435009760258608e-05,
"loss": 0.0123,
"step": 9320
},
{
"epoch": 18.963414634146343,
"grad_norm": 0.1193520799279213,
"learning_rate": 3.424544310683057e-05,
"loss": 0.0133,
"step": 9330
},
{
"epoch": 18.983739837398375,
"grad_norm": 0.12786108255386353,
"learning_rate": 3.41408651841405e-05,
"loss": 0.0126,
"step": 9340
},
{
"epoch": 19.004065040650406,
"grad_norm": 0.0895795002579689,
"learning_rate": 3.403636434280388e-05,
"loss": 0.0096,
"step": 9350
},
{
"epoch": 19.024390243902438,
"grad_norm": 0.14265698194503784,
"learning_rate": 3.393194109073411e-05,
"loss": 0.0134,
"step": 9360
},
{
"epoch": 19.04471544715447,
"grad_norm": 0.1201973482966423,
"learning_rate": 3.3827595935467376e-05,
"loss": 0.0148,
"step": 9370
},
{
"epoch": 19.065040650406505,
"grad_norm": 0.14567726850509644,
"learning_rate": 3.3723329384160344e-05,
"loss": 0.0149,
"step": 9380
},
{
"epoch": 19.085365853658537,
"grad_norm": 0.13860636949539185,
"learning_rate": 3.3619141943587646e-05,
"loss": 0.0141,
"step": 9390
},
{
"epoch": 19.10569105691057,
"grad_norm": 0.09485989809036255,
"learning_rate": 3.351503412013935e-05,
"loss": 0.0087,
"step": 9400
},
{
"epoch": 19.1260162601626,
"grad_norm": 0.14823415875434875,
"learning_rate": 3.341100641981863e-05,
"loss": 0.0115,
"step": 9410
},
{
"epoch": 19.146341463414632,
"grad_norm": 0.17353856563568115,
"learning_rate": 3.330705934823919e-05,
"loss": 0.0124,
"step": 9420
},
{
"epoch": 19.166666666666668,
"grad_norm": 0.1345943808555603,
"learning_rate": 3.3203193410622804e-05,
"loss": 0.0121,
"step": 9430
},
{
"epoch": 19.1869918699187,
"grad_norm": 0.19979223608970642,
"learning_rate": 3.309940911179701e-05,
"loss": 0.0138,
"step": 9440
},
{
"epoch": 19.20731707317073,
"grad_norm": 0.15768824517726898,
"learning_rate": 3.2995706956192465e-05,
"loss": 0.0124,
"step": 9450
},
{
"epoch": 19.227642276422763,
"grad_norm": 0.15080216526985168,
"learning_rate": 3.289208744784059e-05,
"loss": 0.0145,
"step": 9460
},
{
"epoch": 19.247967479674795,
"grad_norm": 0.11168956756591797,
"learning_rate": 3.2788551090371164e-05,
"loss": 0.012,
"step": 9470
},
{
"epoch": 19.26829268292683,
"grad_norm": 0.10030027478933334,
"learning_rate": 3.268509838700974e-05,
"loss": 0.0109,
"step": 9480
},
{
"epoch": 19.288617886178862,
"grad_norm": 0.15326887369155884,
"learning_rate": 3.258172984057535e-05,
"loss": 0.0137,
"step": 9490
},
{
"epoch": 19.308943089430894,
"grad_norm": 0.16193649172782898,
"learning_rate": 3.247844595347798e-05,
"loss": 0.013,
"step": 9500
},
{
"epoch": 19.329268292682926,
"grad_norm": 0.1177881509065628,
"learning_rate": 3.2375247227716077e-05,
"loss": 0.0122,
"step": 9510
},
{
"epoch": 19.34959349593496,
"grad_norm": 0.11959271878004074,
"learning_rate": 3.2272134164874264e-05,
"loss": 0.0141,
"step": 9520
},
{
"epoch": 19.369918699186993,
"grad_norm": 0.13897369801998138,
"learning_rate": 3.216910726612073e-05,
"loss": 0.012,
"step": 9530
},
{
"epoch": 19.390243902439025,
"grad_norm": 0.13143090903759003,
"learning_rate": 3.2066167032204956e-05,
"loss": 0.014,
"step": 9540
},
{
"epoch": 19.410569105691057,
"grad_norm": 0.15241993963718414,
"learning_rate": 3.196331396345512e-05,
"loss": 0.0145,
"step": 9550
},
{
"epoch": 19.43089430894309,
"grad_norm": 0.17542560398578644,
"learning_rate": 3.186054855977577e-05,
"loss": 0.0144,
"step": 9560
},
{
"epoch": 19.451219512195124,
"grad_norm": 0.1463475078344345,
"learning_rate": 3.175787132064542e-05,
"loss": 0.014,
"step": 9570
},
{
"epoch": 19.471544715447155,
"grad_norm": 0.12420643121004105,
"learning_rate": 3.165528274511397e-05,
"loss": 0.0125,
"step": 9580
},
{
"epoch": 19.491869918699187,
"grad_norm": 0.11292680352926254,
"learning_rate": 3.155278333180047e-05,
"loss": 0.0094,
"step": 9590
},
{
"epoch": 19.51219512195122,
"grad_norm": 0.11575955152511597,
"learning_rate": 3.14503735788906e-05,
"loss": 0.0124,
"step": 9600
},
{
"epoch": 19.53252032520325,
"grad_norm": 0.1290699690580368,
"learning_rate": 3.134805398413419e-05,
"loss": 0.0135,
"step": 9610
},
{
"epoch": 19.552845528455286,
"grad_norm": 0.12234389036893845,
"learning_rate": 3.1245825044842954e-05,
"loss": 0.0136,
"step": 9620
},
{
"epoch": 19.573170731707318,
"grad_norm": 0.13453805446624756,
"learning_rate": 3.114368725788791e-05,
"loss": 0.0132,
"step": 9630
},
{
"epoch": 19.59349593495935,
"grad_norm": 0.15979017317295074,
"learning_rate": 3.1041641119697075e-05,
"loss": 0.0112,
"step": 9640
},
{
"epoch": 19.61382113821138,
"grad_norm": 0.12622177600860596,
"learning_rate": 3.093968712625306e-05,
"loss": 0.0137,
"step": 9650
},
{
"epoch": 19.634146341463413,
"grad_norm": 0.1539682000875473,
"learning_rate": 3.0837825773090535e-05,
"loss": 0.0115,
"step": 9660
},
{
"epoch": 19.65447154471545,
"grad_norm": 0.11947452276945114,
"learning_rate": 3.073605755529395e-05,
"loss": 0.0141,
"step": 9670
},
{
"epoch": 19.67479674796748,
"grad_norm": 0.14298219978809357,
"learning_rate": 3.063438296749511e-05,
"loss": 0.014,
"step": 9680
},
{
"epoch": 19.695121951219512,
"grad_norm": 0.11294319480657578,
"learning_rate": 3.053280250387067e-05,
"loss": 0.0096,
"step": 9690
},
{
"epoch": 19.715447154471544,
"grad_norm": 0.13358741998672485,
"learning_rate": 3.043131665813988e-05,
"loss": 0.0103,
"step": 9700
},
{
"epoch": 19.735772357723576,
"grad_norm": 0.08656350523233414,
"learning_rate": 3.0329925923562073e-05,
"loss": 0.0132,
"step": 9710
},
{
"epoch": 19.75609756097561,
"grad_norm": 0.2658238410949707,
"learning_rate": 3.0228630792934277e-05,
"loss": 0.0129,
"step": 9720
},
{
"epoch": 19.776422764227643,
"grad_norm": 0.1336117833852768,
"learning_rate": 3.0127431758588918e-05,
"loss": 0.0142,
"step": 9730
},
{
"epoch": 19.796747967479675,
"grad_norm": 0.15740816295146942,
"learning_rate": 3.002632931239133e-05,
"loss": 0.0113,
"step": 9740
},
{
"epoch": 19.817073170731707,
"grad_norm": 0.14007125794887543,
"learning_rate": 2.992532394573735e-05,
"loss": 0.0107,
"step": 9750
},
{
"epoch": 19.83739837398374,
"grad_norm": 0.11406854540109634,
"learning_rate": 2.982441614955105e-05,
"loss": 0.01,
"step": 9760
},
{
"epoch": 19.857723577235774,
"grad_norm": 0.07972941547632217,
"learning_rate": 2.972360641428218e-05,
"loss": 0.01,
"step": 9770
},
{
"epoch": 19.878048780487806,
"grad_norm": 0.11841104924678802,
"learning_rate": 2.9622895229903973e-05,
"loss": 0.0094,
"step": 9780
},
{
"epoch": 19.898373983739837,
"grad_norm": 0.12949933111667633,
"learning_rate": 2.9522283085910612e-05,
"loss": 0.0124,
"step": 9790
},
{
"epoch": 19.91869918699187,
"grad_norm": 0.1191362589597702,
"learning_rate": 2.942177047131489e-05,
"loss": 0.0093,
"step": 9800
},
{
"epoch": 19.9390243902439,
"grad_norm": 0.11803624778985977,
"learning_rate": 2.9321357874645905e-05,
"loss": 0.0129,
"step": 9810
},
{
"epoch": 19.959349593495936,
"grad_norm": 0.13236430287361145,
"learning_rate": 2.9221045783946577e-05,
"loss": 0.0092,
"step": 9820
},
{
"epoch": 19.979674796747968,
"grad_norm": 0.09027262032032013,
"learning_rate": 2.9120834686771394e-05,
"loss": 0.0138,
"step": 9830
},
{
"epoch": 20.0,
"grad_norm": 0.1077914610505104,
"learning_rate": 2.902072507018392e-05,
"loss": 0.0153,
"step": 9840
},
{
"epoch": 20.020325203252032,
"grad_norm": 0.1104595884680748,
"learning_rate": 2.892071742075446e-05,
"loss": 0.0124,
"step": 9850
},
{
"epoch": 20.040650406504064,
"grad_norm": 0.1770085245370865,
"learning_rate": 2.8820812224557812e-05,
"loss": 0.0138,
"step": 9860
},
{
"epoch": 20.0609756097561,
"grad_norm": 0.16476783156394958,
"learning_rate": 2.8721009967170764e-05,
"loss": 0.0106,
"step": 9870
},
{
"epoch": 20.08130081300813,
"grad_norm": 0.1422470360994339,
"learning_rate": 2.8621311133669748e-05,
"loss": 0.0127,
"step": 9880
},
{
"epoch": 20.101626016260163,
"grad_norm": 0.14497612416744232,
"learning_rate": 2.8521716208628595e-05,
"loss": 0.0155,
"step": 9890
},
{
"epoch": 20.121951219512194,
"grad_norm": 0.1829124540090561,
"learning_rate": 2.8422225676116015e-05,
"loss": 0.0135,
"step": 9900
},
{
"epoch": 20.142276422764226,
"grad_norm": 0.1418256163597107,
"learning_rate": 2.832284001969342e-05,
"loss": 0.0122,
"step": 9910
},
{
"epoch": 20.16260162601626,
"grad_norm": 0.14839938282966614,
"learning_rate": 2.8223559722412408e-05,
"loss": 0.0144,
"step": 9920
},
{
"epoch": 20.182926829268293,
"grad_norm": 0.16532327234745026,
"learning_rate": 2.8124385266812516e-05,
"loss": 0.0085,
"step": 9930
},
{
"epoch": 20.203252032520325,
"grad_norm": 0.09330819547176361,
"learning_rate": 2.802531713491886e-05,
"loss": 0.0151,
"step": 9940
},
{
"epoch": 20.223577235772357,
"grad_norm": 0.17031468451023102,
"learning_rate": 2.7926355808239822e-05,
"loss": 0.0146,
"step": 9950
},
{
"epoch": 20.24390243902439,
"grad_norm": 0.11071918159723282,
"learning_rate": 2.782750176776458e-05,
"loss": 0.0112,
"step": 9960
},
{
"epoch": 20.264227642276424,
"grad_norm": 0.13747528195381165,
"learning_rate": 2.7728755493960946e-05,
"loss": 0.0122,
"step": 9970
},
{
"epoch": 20.284552845528456,
"grad_norm": 0.11134276539087296,
"learning_rate": 2.7630117466772876e-05,
"loss": 0.0098,
"step": 9980
},
{
"epoch": 20.304878048780488,
"grad_norm": 0.11619052290916443,
"learning_rate": 2.7531588165618278e-05,
"loss": 0.0119,
"step": 9990
},
{
"epoch": 20.32520325203252,
"grad_norm": 0.1301652193069458,
"learning_rate": 2.7433168069386533e-05,
"loss": 0.0116,
"step": 10000
},
{
"epoch": 20.34552845528455,
"grad_norm": 0.1226324513554573,
"learning_rate": 2.7334857656436308e-05,
"loss": 0.0116,
"step": 10010
},
{
"epoch": 20.365853658536587,
"grad_norm": 0.11929962784051895,
"learning_rate": 2.7236657404593157e-05,
"loss": 0.0107,
"step": 10020
},
{
"epoch": 20.38617886178862,
"grad_norm": 0.11318682879209518,
"learning_rate": 2.713856779114716e-05,
"loss": 0.011,
"step": 10030
},
{
"epoch": 20.40650406504065,
"grad_norm": 0.18702766299247742,
"learning_rate": 2.704058929285074e-05,
"loss": 0.0138,
"step": 10040
},
{
"epoch": 20.426829268292682,
"grad_norm": 0.11912211030721664,
"learning_rate": 2.6942722385916175e-05,
"loss": 0.0109,
"step": 10050
},
{
"epoch": 20.447154471544714,
"grad_norm": 0.12498341500759125,
"learning_rate": 2.6844967546013394e-05,
"loss": 0.0131,
"step": 10060
},
{
"epoch": 20.46747967479675,
"grad_norm": 0.13249894976615906,
"learning_rate": 2.6747325248267673e-05,
"loss": 0.0126,
"step": 10070
},
{
"epoch": 20.48780487804878,
"grad_norm": 0.09908751398324966,
"learning_rate": 2.664979596725724e-05,
"loss": 0.0121,
"step": 10080
},
{
"epoch": 20.508130081300813,
"grad_norm": 0.12584403157234192,
"learning_rate": 2.655238017701105e-05,
"loss": 0.011,
"step": 10090
},
{
"epoch": 20.528455284552845,
"grad_norm": 0.14452055096626282,
"learning_rate": 2.6455078351006455e-05,
"loss": 0.0105,
"step": 10100
},
{
"epoch": 20.548780487804876,
"grad_norm": 0.10302133113145828,
"learning_rate": 2.6357890962166866e-05,
"loss": 0.0125,
"step": 10110
},
{
"epoch": 20.56910569105691,
"grad_norm": 0.12645643949508667,
"learning_rate": 2.6260818482859534e-05,
"loss": 0.0108,
"step": 10120
},
{
"epoch": 20.589430894308943,
"grad_norm": 0.08891498297452927,
"learning_rate": 2.6163861384893156e-05,
"loss": 0.0096,
"step": 10130
},
{
"epoch": 20.609756097560975,
"grad_norm": 0.12570464611053467,
"learning_rate": 2.606702013951564e-05,
"loss": 0.0087,
"step": 10140
},
{
"epoch": 20.630081300813007,
"grad_norm": 0.07609464228153229,
"learning_rate": 2.5970295217411844e-05,
"loss": 0.0094,
"step": 10150
},
{
"epoch": 20.65040650406504,
"grad_norm": 0.10837171971797943,
"learning_rate": 2.5873687088701236e-05,
"loss": 0.0107,
"step": 10160
},
{
"epoch": 20.670731707317074,
"grad_norm": 0.12332095205783844,
"learning_rate": 2.5777196222935596e-05,
"loss": 0.0137,
"step": 10170
},
{
"epoch": 20.691056910569106,
"grad_norm": 0.11973092705011368,
"learning_rate": 2.5680823089096807e-05,
"loss": 0.0122,
"step": 10180
},
{
"epoch": 20.711382113821138,
"grad_norm": 0.07781854271888733,
"learning_rate": 2.558456815559448e-05,
"loss": 0.0107,
"step": 10190
},
{
"epoch": 20.73170731707317,
"grad_norm": 0.08621246367692947,
"learning_rate": 2.548843189026378e-05,
"loss": 0.0109,
"step": 10200
},
{
"epoch": 20.752032520325205,
"grad_norm": 0.10767322778701782,
"learning_rate": 2.5392414760363048e-05,
"loss": 0.0117,
"step": 10210
},
{
"epoch": 20.772357723577237,
"grad_norm": 0.15958262979984283,
"learning_rate": 2.529651723257162e-05,
"loss": 0.0136,
"step": 10220
},
{
"epoch": 20.79268292682927,
"grad_norm": 0.08153461664915085,
"learning_rate": 2.5200739772987537e-05,
"loss": 0.0118,
"step": 10230
},
{
"epoch": 20.8130081300813,
"grad_norm": 0.10004610568284988,
"learning_rate": 2.5105082847125184e-05,
"loss": 0.009,
"step": 10240
},
{
"epoch": 20.833333333333332,
"grad_norm": 0.13535667955875397,
"learning_rate": 2.5009546919913218e-05,
"loss": 0.0097,
"step": 10250
},
{
"epoch": 20.853658536585368,
"grad_norm": 0.11622301489114761,
"learning_rate": 2.4914132455692098e-05,
"loss": 0.0105,
"step": 10260
},
{
"epoch": 20.8739837398374,
"grad_norm": 0.12948839366436005,
"learning_rate": 2.4818839918211962e-05,
"loss": 0.009,
"step": 10270
},
{
"epoch": 20.89430894308943,
"grad_norm": 0.15754957497119904,
"learning_rate": 2.4723669770630376e-05,
"loss": 0.0143,
"step": 10280
},
{
"epoch": 20.914634146341463,
"grad_norm": 0.14372849464416504,
"learning_rate": 2.4628622475509972e-05,
"loss": 0.012,
"step": 10290
},
{
"epoch": 20.934959349593495,
"grad_norm": 0.1215457022190094,
"learning_rate": 2.4533698494816342e-05,
"loss": 0.0095,
"step": 10300
},
{
"epoch": 20.95528455284553,
"grad_norm": 0.08143052458763123,
"learning_rate": 2.44388982899157e-05,
"loss": 0.0102,
"step": 10310
},
{
"epoch": 20.975609756097562,
"grad_norm": 0.08094421029090881,
"learning_rate": 2.4344222321572636e-05,
"loss": 0.0157,
"step": 10320
},
{
"epoch": 20.995934959349594,
"grad_norm": 0.11086557060480118,
"learning_rate": 2.4249671049947954e-05,
"loss": 0.0096,
"step": 10330
},
{
"epoch": 21.016260162601625,
"grad_norm": 0.09536684304475784,
"learning_rate": 2.4155244934596333e-05,
"loss": 0.0084,
"step": 10340
},
{
"epoch": 21.036585365853657,
"grad_norm": 0.10263389348983765,
"learning_rate": 2.406094443446416e-05,
"loss": 0.0096,
"step": 10350
},
{
"epoch": 21.056910569105693,
"grad_norm": 0.12646539509296417,
"learning_rate": 2.3966770007887317e-05,
"loss": 0.0078,
"step": 10360
},
{
"epoch": 21.077235772357724,
"grad_norm": 0.15264266729354858,
"learning_rate": 2.3872722112588903e-05,
"loss": 0.0118,
"step": 10370
},
{
"epoch": 21.097560975609756,
"grad_norm": 0.16815584897994995,
"learning_rate": 2.3778801205676997e-05,
"loss": 0.0106,
"step": 10380
},
{
"epoch": 21.117886178861788,
"grad_norm": 0.1217648908495903,
"learning_rate": 2.3685007743642524e-05,
"loss": 0.0081,
"step": 10390
},
{
"epoch": 21.13821138211382,
"grad_norm": 0.11896563321352005,
"learning_rate": 2.3591342182356914e-05,
"loss": 0.0125,
"step": 10400
},
{
"epoch": 21.158536585365855,
"grad_norm": 0.11437961459159851,
"learning_rate": 2.3497804977070016e-05,
"loss": 0.0102,
"step": 10410
},
{
"epoch": 21.178861788617887,
"grad_norm": 0.10344325751066208,
"learning_rate": 2.3404396582407777e-05,
"loss": 0.0094,
"step": 10420
},
{
"epoch": 21.19918699186992,
"grad_norm": 0.1586674153804779,
"learning_rate": 2.331111745237007e-05,
"loss": 0.0105,
"step": 10430
},
{
"epoch": 21.21951219512195,
"grad_norm": 0.15335488319396973,
"learning_rate": 2.3217968040328526e-05,
"loss": 0.0126,
"step": 10440
},
{
"epoch": 21.239837398373982,
"grad_norm": 0.16755497455596924,
"learning_rate": 2.3124948799024286e-05,
"loss": 0.0125,
"step": 10450
},
{
"epoch": 21.260162601626018,
"grad_norm": 0.1016865074634552,
"learning_rate": 2.3032060180565828e-05,
"loss": 0.0094,
"step": 10460
},
{
"epoch": 21.28048780487805,
"grad_norm": 0.13073071837425232,
"learning_rate": 2.2939302636426724e-05,
"loss": 0.0105,
"step": 10470
},
{
"epoch": 21.30081300813008,
"grad_norm": 0.13936924934387207,
"learning_rate": 2.2846676617443458e-05,
"loss": 0.0096,
"step": 10480
},
{
"epoch": 21.321138211382113,
"grad_norm": 0.1346622258424759,
"learning_rate": 2.275418257381332e-05,
"loss": 0.0102,
"step": 10490
},
{
"epoch": 21.341463414634145,
"grad_norm": 0.13846242427825928,
"learning_rate": 2.2661820955092083e-05,
"loss": 0.009,
"step": 10500
},
{
"epoch": 21.36178861788618,
"grad_norm": 0.10490627586841583,
"learning_rate": 2.256959221019193e-05,
"loss": 0.0136,
"step": 10510
},
{
"epoch": 21.382113821138212,
"grad_norm": 0.159734308719635,
"learning_rate": 2.2477496787379227e-05,
"loss": 0.0127,
"step": 10520
},
{
"epoch": 21.402439024390244,
"grad_norm": 0.10712343454360962,
"learning_rate": 2.238553513427229e-05,
"loss": 0.0116,
"step": 10530
},
{
"epoch": 21.422764227642276,
"grad_norm": 0.1423291116952896,
"learning_rate": 2.2293707697839344e-05,
"loss": 0.0104,
"step": 10540
},
{
"epoch": 21.443089430894307,
"grad_norm": 0.11537821590900421,
"learning_rate": 2.2202014924396214e-05,
"loss": 0.0088,
"step": 10550
},
{
"epoch": 21.463414634146343,
"grad_norm": 0.10486509650945663,
"learning_rate": 2.21104572596042e-05,
"loss": 0.0091,
"step": 10560
},
{
"epoch": 21.483739837398375,
"grad_norm": 0.0840308740735054,
"learning_rate": 2.2019035148468e-05,
"loss": 0.0097,
"step": 10570
},
{
"epoch": 21.504065040650406,
"grad_norm": 0.1811951994895935,
"learning_rate": 2.1927749035333374e-05,
"loss": 0.0095,
"step": 10580
},
{
"epoch": 21.524390243902438,
"grad_norm": 0.13960686326026917,
"learning_rate": 2.1836599363885152e-05,
"loss": 0.0093,
"step": 10590
},
{
"epoch": 21.54471544715447,
"grad_norm": 0.09911748021841049,
"learning_rate": 2.1745586577144993e-05,
"loss": 0.0109,
"step": 10600
},
{
"epoch": 21.565040650406505,
"grad_norm": 0.1075323298573494,
"learning_rate": 2.1654711117469207e-05,
"loss": 0.0092,
"step": 10610
},
{
"epoch": 21.585365853658537,
"grad_norm": 0.08886104822158813,
"learning_rate": 2.1563973426546702e-05,
"loss": 0.0092,
"step": 10620
},
{
"epoch": 21.60569105691057,
"grad_norm": 0.09648662060499191,
"learning_rate": 2.1473373945396728e-05,
"loss": 0.0096,
"step": 10630
},
{
"epoch": 21.6260162601626,
"grad_norm": 0.1114833727478981,
"learning_rate": 2.138291311436679e-05,
"loss": 0.0086,
"step": 10640
},
{
"epoch": 21.646341463414632,
"grad_norm": 0.07100456207990646,
"learning_rate": 2.1292591373130518e-05,
"loss": 0.0082,
"step": 10650
},
{
"epoch": 21.666666666666668,
"grad_norm": 0.08392198383808136,
"learning_rate": 2.1202409160685528e-05,
"loss": 0.0113,
"step": 10660
},
{
"epoch": 21.6869918699187,
"grad_norm": 0.12464640289545059,
"learning_rate": 2.1112366915351228e-05,
"loss": 0.0098,
"step": 10670
},
{
"epoch": 21.70731707317073,
"grad_norm": 0.0857594907283783,
"learning_rate": 2.102246507476679e-05,
"loss": 0.0112,
"step": 10680
},
{
"epoch": 21.727642276422763,
"grad_norm": 0.12353429198265076,
"learning_rate": 2.09327040758889e-05,
"loss": 0.0096,
"step": 10690
},
{
"epoch": 21.747967479674795,
"grad_norm": 0.14459079504013062,
"learning_rate": 2.0843084354989767e-05,
"loss": 0.0084,
"step": 10700
},
{
"epoch": 21.76829268292683,
"grad_norm": 0.2441745102405548,
"learning_rate": 2.0753606347654892e-05,
"loss": 0.0137,
"step": 10710
},
{
"epoch": 21.788617886178862,
"grad_norm": 0.09381800144910812,
"learning_rate": 2.0664270488780985e-05,
"loss": 0.0074,
"step": 10720
},
{
"epoch": 21.808943089430894,
"grad_norm": 0.085075244307518,
"learning_rate": 2.0575077212573905e-05,
"loss": 0.0081,
"step": 10730
},
{
"epoch": 21.829268292682926,
"grad_norm": 0.1668519526720047,
"learning_rate": 2.0486026952546484e-05,
"loss": 0.0116,
"step": 10740
},
{
"epoch": 21.84959349593496,
"grad_norm": 0.10299837589263916,
"learning_rate": 2.0397120141516457e-05,
"loss": 0.0094,
"step": 10750
},
{
"epoch": 21.869918699186993,
"grad_norm": 0.09836846590042114,
"learning_rate": 2.0308357211604313e-05,
"loss": 0.0097,
"step": 10760
},
{
"epoch": 21.890243902439025,
"grad_norm": 0.10448900610208511,
"learning_rate": 2.0219738594231224e-05,
"loss": 0.0092,
"step": 10770
},
{
"epoch": 21.910569105691057,
"grad_norm": 0.0666520744562149,
"learning_rate": 2.0131264720116993e-05,
"loss": 0.0079,
"step": 10780
},
{
"epoch": 21.93089430894309,
"grad_norm": 0.08662780374288559,
"learning_rate": 2.0042936019277853e-05,
"loss": 0.0117,
"step": 10790
},
{
"epoch": 21.951219512195124,
"grad_norm": 0.11221914738416672,
"learning_rate": 1.99547529210245e-05,
"loss": 0.0088,
"step": 10800
},
{
"epoch": 21.971544715447155,
"grad_norm": 0.08087283372879028,
"learning_rate": 1.9866715853959934e-05,
"loss": 0.0097,
"step": 10810
},
{
"epoch": 21.991869918699187,
"grad_norm": 0.12656255066394806,
"learning_rate": 1.977882524597734e-05,
"loss": 0.0131,
"step": 10820
},
{
"epoch": 22.01219512195122,
"grad_norm": 0.12034684419631958,
"learning_rate": 1.969108152425813e-05,
"loss": 0.0102,
"step": 10830
},
{
"epoch": 22.03252032520325,
"grad_norm": 0.08809592574834824,
"learning_rate": 1.9603485115269744e-05,
"loss": 0.0113,
"step": 10840
},
{
"epoch": 22.052845528455286,
"grad_norm": 0.07460938394069672,
"learning_rate": 1.9516036444763613e-05,
"loss": 0.0102,
"step": 10850
},
{
"epoch": 22.073170731707318,
"grad_norm": 0.08779294788837433,
"learning_rate": 1.9428735937773173e-05,
"loss": 0.0109,
"step": 10860
},
{
"epoch": 22.09349593495935,
"grad_norm": 0.09611085057258606,
"learning_rate": 1.9341584018611646e-05,
"loss": 0.0084,
"step": 10870
},
{
"epoch": 22.11382113821138,
"grad_norm": 0.1048993468284607,
"learning_rate": 1.9254581110870123e-05,
"loss": 0.009,
"step": 10880
},
{
"epoch": 22.134146341463413,
"grad_norm": 0.11739665269851685,
"learning_rate": 1.916772763741544e-05,
"loss": 0.0095,
"step": 10890
},
{
"epoch": 22.15447154471545,
"grad_norm": 0.1125992089509964,
"learning_rate": 1.908102402038807e-05,
"loss": 0.0095,
"step": 10900
},
{
"epoch": 22.17479674796748,
"grad_norm": 0.10314033180475235,
"learning_rate": 1.8994470681200204e-05,
"loss": 0.0086,
"step": 10910
},
{
"epoch": 22.195121951219512,
"grad_norm": 0.07227867096662521,
"learning_rate": 1.8908068040533578e-05,
"loss": 0.0075,
"step": 10920
},
{
"epoch": 22.215447154471544,
"grad_norm": 0.0778256207704544,
"learning_rate": 1.8821816518337455e-05,
"loss": 0.0113,
"step": 10930
},
{
"epoch": 22.235772357723576,
"grad_norm": 0.11512289941310883,
"learning_rate": 1.8735716533826663e-05,
"loss": 0.0071,
"step": 10940
},
{
"epoch": 22.25609756097561,
"grad_norm": 0.1081552803516388,
"learning_rate": 1.8649768505479476e-05,
"loss": 0.0089,
"step": 10950
},
{
"epoch": 22.276422764227643,
"grad_norm": 0.12834273278713226,
"learning_rate": 1.8563972851035616e-05,
"loss": 0.0082,
"step": 10960
},
{
"epoch": 22.296747967479675,
"grad_norm": 0.11064130812883377,
"learning_rate": 1.847832998749418e-05,
"loss": 0.0081,
"step": 10970
},
{
"epoch": 22.317073170731707,
"grad_norm": 0.11904542148113251,
"learning_rate": 1.8392840331111644e-05,
"loss": 0.0084,
"step": 10980
},
{
"epoch": 22.33739837398374,
"grad_norm": 0.08828697353601456,
"learning_rate": 1.830750429739989e-05,
"loss": 0.0107,
"step": 10990
},
{
"epoch": 22.357723577235774,
"grad_norm": 0.13146309554576874,
"learning_rate": 1.822232230112409e-05,
"loss": 0.0135,
"step": 11000
},
{
"epoch": 22.378048780487806,
"grad_norm": 0.12254566699266434,
"learning_rate": 1.813729475630071e-05,
"loss": 0.0078,
"step": 11010
},
{
"epoch": 22.398373983739837,
"grad_norm": 0.10865466296672821,
"learning_rate": 1.8052422076195635e-05,
"loss": 0.0084,
"step": 11020
},
{
"epoch": 22.41869918699187,
"grad_norm": 0.1604381948709488,
"learning_rate": 1.7967704673321918e-05,
"loss": 0.0127,
"step": 11030
},
{
"epoch": 22.4390243902439,
"grad_norm": 0.12828870117664337,
"learning_rate": 1.7883142959438004e-05,
"loss": 0.0071,
"step": 11040
},
{
"epoch": 22.459349593495936,
"grad_norm": 0.1228393092751503,
"learning_rate": 1.779873734554558e-05,
"loss": 0.0101,
"step": 11050
},
{
"epoch": 22.479674796747968,
"grad_norm": 0.07193803787231445,
"learning_rate": 1.771448824188761e-05,
"loss": 0.0073,
"step": 11060
},
{
"epoch": 22.5,
"grad_norm": 0.10907436907291412,
"learning_rate": 1.763039605794644e-05,
"loss": 0.0092,
"step": 11070
},
{
"epoch": 22.520325203252032,
"grad_norm": 0.10717453062534332,
"learning_rate": 1.754646120244164e-05,
"loss": 0.0097,
"step": 11080
},
{
"epoch": 22.540650406504064,
"grad_norm": 0.09861317276954651,
"learning_rate": 1.7462684083328144e-05,
"loss": 0.0093,
"step": 11090
},
{
"epoch": 22.5609756097561,
"grad_norm": 0.13518258929252625,
"learning_rate": 1.7379065107794262e-05,
"loss": 0.0106,
"step": 11100
},
{
"epoch": 22.58130081300813,
"grad_norm": 0.08812890946865082,
"learning_rate": 1.7295604682259586e-05,
"loss": 0.0128,
"step": 11110
},
{
"epoch": 22.601626016260163,
"grad_norm": 0.11440128833055496,
"learning_rate": 1.7212303212373175e-05,
"loss": 0.0091,
"step": 11120
},
{
"epoch": 22.621951219512194,
"grad_norm": 0.11984211951494217,
"learning_rate": 1.712916110301146e-05,
"loss": 0.0098,
"step": 11130
},
{
"epoch": 22.642276422764226,
"grad_norm": 0.11255427449941635,
"learning_rate": 1.7046178758276298e-05,
"loss": 0.0142,
"step": 11140
},
{
"epoch": 22.66260162601626,
"grad_norm": 0.1485426425933838,
"learning_rate": 1.696335658149309e-05,
"loss": 0.0153,
"step": 11150
},
{
"epoch": 22.682926829268293,
"grad_norm": 0.1576770395040512,
"learning_rate": 1.6880694975208727e-05,
"loss": 0.0115,
"step": 11160
},
{
"epoch": 22.703252032520325,
"grad_norm": 0.07304835319519043,
"learning_rate": 1.6798194341189687e-05,
"loss": 0.0096,
"step": 11170
},
{
"epoch": 22.723577235772357,
"grad_norm": 0.0978945642709732,
"learning_rate": 1.671585508042003e-05,
"loss": 0.0087,
"step": 11180
},
{
"epoch": 22.74390243902439,
"grad_norm": 0.11587736010551453,
"learning_rate": 1.6633677593099483e-05,
"loss": 0.0142,
"step": 11190
},
{
"epoch": 22.764227642276424,
"grad_norm": 0.08535769581794739,
"learning_rate": 1.655166227864154e-05,
"loss": 0.0121,
"step": 11200
},
{
"epoch": 22.784552845528456,
"grad_norm": 0.07661209255456924,
"learning_rate": 1.6469809535671426e-05,
"loss": 0.0136,
"step": 11210
},
{
"epoch": 22.804878048780488,
"grad_norm": 0.10255829989910126,
"learning_rate": 1.638811976202421e-05,
"loss": 0.0094,
"step": 11220
},
{
"epoch": 22.82520325203252,
"grad_norm": 0.11505745351314545,
"learning_rate": 1.6306593354742895e-05,
"loss": 0.0131,
"step": 11230
},
{
"epoch": 22.84552845528455,
"grad_norm": 0.07341789454221725,
"learning_rate": 1.6225230710076455e-05,
"loss": 0.008,
"step": 11240
},
{
"epoch": 22.865853658536587,
"grad_norm": 0.11690469831228256,
"learning_rate": 1.6144032223477924e-05,
"loss": 0.0125,
"step": 11250
},
{
"epoch": 22.88617886178862,
"grad_norm": 0.1037013903260231,
"learning_rate": 1.606299828960243e-05,
"loss": 0.008,
"step": 11260
},
{
"epoch": 22.90650406504065,
"grad_norm": 0.06866220384836197,
"learning_rate": 1.5982129302305337e-05,
"loss": 0.008,
"step": 11270
},
{
"epoch": 22.926829268292682,
"grad_norm": 0.06975048035383224,
"learning_rate": 1.590142565464032e-05,
"loss": 0.0085,
"step": 11280
},
{
"epoch": 22.947154471544714,
"grad_norm": 0.13513271510601044,
"learning_rate": 1.5820887738857408e-05,
"loss": 0.0086,
"step": 11290
},
{
"epoch": 22.96747967479675,
"grad_norm": 0.08723549544811249,
"learning_rate": 1.5740515946401134e-05,
"loss": 0.0102,
"step": 11300
},
{
"epoch": 22.98780487804878,
"grad_norm": 0.09078790247440338,
"learning_rate": 1.5660310667908634e-05,
"loss": 0.0093,
"step": 11310
},
{
"epoch": 23.008130081300813,
"grad_norm": 0.10816732048988342,
"learning_rate": 1.5580272293207655e-05,
"loss": 0.009,
"step": 11320
},
{
"epoch": 23.028455284552845,
"grad_norm": 0.09973511844873428,
"learning_rate": 1.5500401211314796e-05,
"loss": 0.0122,
"step": 11330
},
{
"epoch": 23.048780487804876,
"grad_norm": 0.13049659132957458,
"learning_rate": 1.542069781043351e-05,
"loss": 0.0129,
"step": 11340
},
{
"epoch": 23.06910569105691,
"grad_norm": 0.11349806189537048,
"learning_rate": 1.534116247795226e-05,
"loss": 0.0102,
"step": 11350
},
{
"epoch": 23.089430894308943,
"grad_norm": 0.09644364565610886,
"learning_rate": 1.526179560044267e-05,
"loss": 0.0073,
"step": 11360
},
{
"epoch": 23.109756097560975,
"grad_norm": 0.0995858833193779,
"learning_rate": 1.5182597563657552e-05,
"loss": 0.0122,
"step": 11370
},
{
"epoch": 23.130081300813007,
"grad_norm": 0.07347644865512848,
"learning_rate": 1.5103568752529135e-05,
"loss": 0.0081,
"step": 11380
},
{
"epoch": 23.150406504065042,
"grad_norm": 0.07966236770153046,
"learning_rate": 1.5024709551167142e-05,
"loss": 0.0072,
"step": 11390
},
{
"epoch": 23.170731707317074,
"grad_norm": 0.08882099390029907,
"learning_rate": 1.4946020342856898e-05,
"loss": 0.0063,
"step": 11400
},
{
"epoch": 23.191056910569106,
"grad_norm": 0.11498501151800156,
"learning_rate": 1.4867501510057546e-05,
"loss": 0.0089,
"step": 11410
},
{
"epoch": 23.211382113821138,
"grad_norm": 0.09351756423711777,
"learning_rate": 1.4789153434400094e-05,
"loss": 0.0122,
"step": 11420
},
{
"epoch": 23.23170731707317,
"grad_norm": 0.13294996321201324,
"learning_rate": 1.4710976496685614e-05,
"loss": 0.0097,
"step": 11430
},
{
"epoch": 23.252032520325205,
"grad_norm": 0.11491288244724274,
"learning_rate": 1.4632971076883406e-05,
"loss": 0.0084,
"step": 11440
},
{
"epoch": 23.272357723577237,
"grad_norm": 0.08280199766159058,
"learning_rate": 1.4555137554129117e-05,
"loss": 0.0108,
"step": 11450
},
{
"epoch": 23.29268292682927,
"grad_norm": 0.13369178771972656,
"learning_rate": 1.4477476306722925e-05,
"loss": 0.0121,
"step": 11460
},
{
"epoch": 23.3130081300813,
"grad_norm": 0.06917362660169601,
"learning_rate": 1.439998771212766e-05,
"loss": 0.0101,
"step": 11470
},
{
"epoch": 23.333333333333332,
"grad_norm": 0.06669972836971283,
"learning_rate": 1.4322672146966982e-05,
"loss": 0.009,
"step": 11480
},
{
"epoch": 23.353658536585368,
"grad_norm": 0.10002262890338898,
"learning_rate": 1.4245529987023621e-05,
"loss": 0.0068,
"step": 11490
},
{
"epoch": 23.3739837398374,
"grad_norm": 0.09954614192247391,
"learning_rate": 1.4168561607237436e-05,
"loss": 0.0094,
"step": 11500
},
{
"epoch": 23.39430894308943,
"grad_norm": 0.11286766827106476,
"learning_rate": 1.4091767381703657e-05,
"loss": 0.0078,
"step": 11510
},
{
"epoch": 23.414634146341463,
"grad_norm": 0.06297627091407776,
"learning_rate": 1.4015147683671087e-05,
"loss": 0.0112,
"step": 11520
},
{
"epoch": 23.434959349593495,
"grad_norm": 0.09290836751461029,
"learning_rate": 1.3938702885540239e-05,
"loss": 0.0101,
"step": 11530
},
{
"epoch": 23.45528455284553,
"grad_norm": 0.09421167522668839,
"learning_rate": 1.3862433358861576e-05,
"loss": 0.008,
"step": 11540
},
{
"epoch": 23.475609756097562,
"grad_norm": 0.10376714915037155,
"learning_rate": 1.3786339474333636e-05,
"loss": 0.0102,
"step": 11550
},
{
"epoch": 23.495934959349594,
"grad_norm": 0.11214353144168854,
"learning_rate": 1.3710421601801265e-05,
"loss": 0.0089,
"step": 11560
},
{
"epoch": 23.516260162601625,
"grad_norm": 0.05647756904363632,
"learning_rate": 1.3634680110253883e-05,
"loss": 0.0085,
"step": 11570
},
{
"epoch": 23.536585365853657,
"grad_norm": 0.08150362223386765,
"learning_rate": 1.3559115367823556e-05,
"loss": 0.0075,
"step": 11580
},
{
"epoch": 23.556910569105693,
"grad_norm": 0.07136379182338715,
"learning_rate": 1.3483727741783342e-05,
"loss": 0.0102,
"step": 11590
},
{
"epoch": 23.577235772357724,
"grad_norm": 0.11355695873498917,
"learning_rate": 1.3408517598545444e-05,
"loss": 0.0083,
"step": 11600
},
{
"epoch": 23.597560975609756,
"grad_norm": 0.0908714309334755,
"learning_rate": 1.3333485303659381e-05,
"loss": 0.0091,
"step": 11610
},
{
"epoch": 23.617886178861788,
"grad_norm": 0.10554931312799454,
"learning_rate": 1.3258631221810331e-05,
"loss": 0.0116,
"step": 11620
},
{
"epoch": 23.63821138211382,
"grad_norm": 0.10275840759277344,
"learning_rate": 1.3183955716817232e-05,
"loss": 0.009,
"step": 11630
},
{
"epoch": 23.658536585365855,
"grad_norm": 0.09203638881444931,
"learning_rate": 1.3109459151631076e-05,
"loss": 0.0106,
"step": 11640
},
{
"epoch": 23.678861788617887,
"grad_norm": 0.08367224037647247,
"learning_rate": 1.3035141888333202e-05,
"loss": 0.0081,
"step": 11650
},
{
"epoch": 23.69918699186992,
"grad_norm": 0.09575346112251282,
"learning_rate": 1.2961004288133388e-05,
"loss": 0.0069,
"step": 11660
},
{
"epoch": 23.71951219512195,
"grad_norm": 0.134243905544281,
"learning_rate": 1.2887046711368245e-05,
"loss": 0.0091,
"step": 11670
},
{
"epoch": 23.739837398373982,
"grad_norm": 0.111112080514431,
"learning_rate": 1.2813269517499399e-05,
"loss": 0.0076,
"step": 11680
},
{
"epoch": 23.760162601626018,
"grad_norm": 0.12154891341924667,
"learning_rate": 1.273967306511169e-05,
"loss": 0.0089,
"step": 11690
},
{
"epoch": 23.78048780487805,
"grad_norm": 0.15236243605613708,
"learning_rate": 1.2666257711911566e-05,
"loss": 0.0094,
"step": 11700
},
{
"epoch": 23.80081300813008,
"grad_norm": 0.0687737986445427,
"learning_rate": 1.2593023814725214e-05,
"loss": 0.0127,
"step": 11710
},
{
"epoch": 23.821138211382113,
"grad_norm": 0.10326932370662689,
"learning_rate": 1.251997172949686e-05,
"loss": 0.007,
"step": 11720
},
{
"epoch": 23.841463414634145,
"grad_norm": 0.1114373728632927,
"learning_rate": 1.2447101811287109e-05,
"loss": 0.0091,
"step": 11730
},
{
"epoch": 23.86178861788618,
"grad_norm": 0.08497825264930725,
"learning_rate": 1.237441441427114e-05,
"loss": 0.0082,
"step": 11740
},
{
"epoch": 23.882113821138212,
"grad_norm": 0.10188353061676025,
"learning_rate": 1.2301909891737018e-05,
"loss": 0.0067,
"step": 11750
},
{
"epoch": 23.902439024390244,
"grad_norm": 0.12289886176586151,
"learning_rate": 1.2229588596083957e-05,
"loss": 0.0089,
"step": 11760
},
{
"epoch": 23.922764227642276,
"grad_norm": 0.11247570812702179,
"learning_rate": 1.2157450878820608e-05,
"loss": 0.0064,
"step": 11770
},
{
"epoch": 23.943089430894307,
"grad_norm": 0.11704237014055252,
"learning_rate": 1.2085497090563407e-05,
"loss": 0.0066,
"step": 11780
},
{
"epoch": 23.963414634146343,
"grad_norm": 0.055815473198890686,
"learning_rate": 1.2013727581034783e-05,
"loss": 0.0084,
"step": 11790
},
{
"epoch": 23.983739837398375,
"grad_norm": 0.07490572333335876,
"learning_rate": 1.1942142699061498e-05,
"loss": 0.0075,
"step": 11800
},
{
"epoch": 24.004065040650406,
"grad_norm": 0.10884711891412735,
"learning_rate": 1.1870742792572992e-05,
"loss": 0.0079,
"step": 11810
},
{
"epoch": 24.024390243902438,
"grad_norm": 0.0868370532989502,
"learning_rate": 1.1799528208599637e-05,
"loss": 0.0114,
"step": 11820
},
{
"epoch": 24.04471544715447,
"grad_norm": 0.09366288781166077,
"learning_rate": 1.1728499293271079e-05,
"loss": 0.0092,
"step": 11830
},
{
"epoch": 24.065040650406505,
"grad_norm": 0.10993952304124832,
"learning_rate": 1.1657656391814509e-05,
"loss": 0.0071,
"step": 11840
},
{
"epoch": 24.085365853658537,
"grad_norm": 0.06420900672674179,
"learning_rate": 1.1586999848553043e-05,
"loss": 0.0108,
"step": 11850
},
{
"epoch": 24.10569105691057,
"grad_norm": 0.10133112967014313,
"learning_rate": 1.1516530006904053e-05,
"loss": 0.012,
"step": 11860
},
{
"epoch": 24.1260162601626,
"grad_norm": 0.15871313214302063,
"learning_rate": 1.1446247209377403e-05,
"loss": 0.0099,
"step": 11870
},
{
"epoch": 24.146341463414632,
"grad_norm": 0.12604136765003204,
"learning_rate": 1.1376151797573925e-05,
"loss": 0.0062,
"step": 11880
},
{
"epoch": 24.166666666666668,
"grad_norm": 0.13042505085468292,
"learning_rate": 1.1306244112183662e-05,
"loss": 0.0112,
"step": 11890
},
{
"epoch": 24.1869918699187,
"grad_norm": 0.1565147340297699,
"learning_rate": 1.1236524492984203e-05,
"loss": 0.0113,
"step": 11900
},
{
"epoch": 24.20731707317073,
"grad_norm": 0.09882476925849915,
"learning_rate": 1.116699327883911e-05,
"loss": 0.0099,
"step": 11910
},
{
"epoch": 24.227642276422763,
"grad_norm": 0.1394849717617035,
"learning_rate": 1.1097650807696209e-05,
"loss": 0.0073,
"step": 11920
},
{
"epoch": 24.247967479674795,
"grad_norm": 0.07821723818778992,
"learning_rate": 1.1028497416585931e-05,
"loss": 0.006,
"step": 11930
},
{
"epoch": 24.26829268292683,
"grad_norm": 0.06742383539676666,
"learning_rate": 1.0959533441619762e-05,
"loss": 0.009,
"step": 11940
},
{
"epoch": 24.288617886178862,
"grad_norm": 0.06799096614122391,
"learning_rate": 1.0890759217988527e-05,
"loss": 0.007,
"step": 11950
},
{
"epoch": 24.308943089430894,
"grad_norm": 0.08466274291276932,
"learning_rate": 1.0822175079960806e-05,
"loss": 0.0078,
"step": 11960
},
{
"epoch": 24.329268292682926,
"grad_norm": 0.07107431441545486,
"learning_rate": 1.0753781360881265e-05,
"loss": 0.0111,
"step": 11970
},
{
"epoch": 24.34959349593496,
"grad_norm": 0.09816905856132507,
"learning_rate": 1.0685578393169055e-05,
"loss": 0.0075,
"step": 11980
},
{
"epoch": 24.369918699186993,
"grad_norm": 0.100242480635643,
"learning_rate": 1.061756650831625e-05,
"loss": 0.0081,
"step": 11990
},
{
"epoch": 24.390243902439025,
"grad_norm": 0.07489926367998123,
"learning_rate": 1.054974603688616e-05,
"loss": 0.007,
"step": 12000
},
{
"epoch": 24.410569105691057,
"grad_norm": 0.10150320082902908,
"learning_rate": 1.048211730851173e-05,
"loss": 0.0087,
"step": 12010
},
{
"epoch": 24.43089430894309,
"grad_norm": 0.11625931411981583,
"learning_rate": 1.0414680651894004e-05,
"loss": 0.0111,
"step": 12020
},
{
"epoch": 24.451219512195124,
"grad_norm": 0.11474552005529404,
"learning_rate": 1.034743639480047e-05,
"loss": 0.0093,
"step": 12030
},
{
"epoch": 24.471544715447155,
"grad_norm": 0.0563468337059021,
"learning_rate": 1.0280384864063497e-05,
"loss": 0.0061,
"step": 12040
},
{
"epoch": 24.491869918699187,
"grad_norm": 0.08342014253139496,
"learning_rate": 1.0213526385578704e-05,
"loss": 0.0085,
"step": 12050
},
{
"epoch": 24.51219512195122,
"grad_norm": 0.1682083010673523,
"learning_rate": 1.0146861284303394e-05,
"loss": 0.0103,
"step": 12060
},
{
"epoch": 24.53252032520325,
"grad_norm": 0.11061355471611023,
"learning_rate": 1.0080389884255037e-05,
"loss": 0.0071,
"step": 12070
},
{
"epoch": 24.552845528455286,
"grad_norm": 0.11372610926628113,
"learning_rate": 1.0014112508509588e-05,
"loss": 0.0068,
"step": 12080
},
{
"epoch": 24.573170731707318,
"grad_norm": 0.06454111635684967,
"learning_rate": 9.948029479199994e-06,
"loss": 0.0068,
"step": 12090
},
{
"epoch": 24.59349593495935,
"grad_norm": 0.1143406480550766,
"learning_rate": 9.882141117514632e-06,
"loss": 0.0092,
"step": 12100
},
{
"epoch": 24.61382113821138,
"grad_norm": 0.09725458174943924,
"learning_rate": 9.816447743695656e-06,
"loss": 0.0064,
"step": 12110
},
{
"epoch": 24.634146341463413,
"grad_norm": 0.10150446742773056,
"learning_rate": 9.75094967703758e-06,
"loss": 0.0082,
"step": 12120
},
{
"epoch": 24.65447154471545,
"grad_norm": 0.07894540578126907,
"learning_rate": 9.685647235885597e-06,
"loss": 0.0077,
"step": 12130
},
{
"epoch": 24.67479674796748,
"grad_norm": 0.07765673100948334,
"learning_rate": 9.620540737634087e-06,
"loss": 0.0082,
"step": 12140
},
{
"epoch": 24.695121951219512,
"grad_norm": 0.1181579977273941,
"learning_rate": 9.555630498725133e-06,
"loss": 0.009,
"step": 12150
},
{
"epoch": 24.715447154471544,
"grad_norm": 0.09069491177797318,
"learning_rate": 9.49091683464684e-06,
"loss": 0.0123,
"step": 12160
},
{
"epoch": 24.735772357723576,
"grad_norm": 0.12418906390666962,
"learning_rate": 9.426400059931955e-06,
"loss": 0.0072,
"step": 12170
},
{
"epoch": 24.75609756097561,
"grad_norm": 0.09817170351743698,
"learning_rate": 9.362080488156245e-06,
"loss": 0.0101,
"step": 12180
},
{
"epoch": 24.776422764227643,
"grad_norm": 0.15471002459526062,
"learning_rate": 9.29795843193697e-06,
"loss": 0.0098,
"step": 12190
},
{
"epoch": 24.796747967479675,
"grad_norm": 0.10904843360185623,
"learning_rate": 9.234034202931447e-06,
"loss": 0.0052,
"step": 12200
},
{
"epoch": 24.817073170731707,
"grad_norm": 0.1243189200758934,
"learning_rate": 9.170308111835418e-06,
"loss": 0.0103,
"step": 12210
},
{
"epoch": 24.83739837398374,
"grad_norm": 0.18022632598876953,
"learning_rate": 9.106780468381631e-06,
"loss": 0.0085,
"step": 12220
},
{
"epoch": 24.857723577235774,
"grad_norm": 0.12732474505901337,
"learning_rate": 9.043451581338302e-06,
"loss": 0.0107,
"step": 12230
},
{
"epoch": 24.878048780487806,
"grad_norm": 0.09132570028305054,
"learning_rate": 8.980321758507615e-06,
"loss": 0.0067,
"step": 12240
},
{
"epoch": 24.898373983739837,
"grad_norm": 0.08355541527271271,
"learning_rate": 8.91739130672425e-06,
"loss": 0.0084,
"step": 12250
},
{
"epoch": 24.91869918699187,
"grad_norm": 0.11257751286029816,
"learning_rate": 8.85466053185382e-06,
"loss": 0.0082,
"step": 12260
},
{
"epoch": 24.9390243902439,
"grad_norm": 0.09904129058122635,
"learning_rate": 8.792129738791455e-06,
"loss": 0.0087,
"step": 12270
},
{
"epoch": 24.959349593495936,
"grad_norm": 0.13384602963924408,
"learning_rate": 8.729799231460318e-06,
"loss": 0.008,
"step": 12280
},
{
"epoch": 24.979674796747968,
"grad_norm": 0.0904664397239685,
"learning_rate": 8.66766931281009e-06,
"loss": 0.0072,
"step": 12290
},
{
"epoch": 25.0,
"grad_norm": 0.09332982450723648,
"learning_rate": 8.6057402848155e-06,
"loss": 0.0071,
"step": 12300
},
{
"epoch": 25.020325203252032,
"grad_norm": 0.10007297992706299,
"learning_rate": 8.544012448474904e-06,
"loss": 0.008,
"step": 12310
},
{
"epoch": 25.040650406504064,
"grad_norm": 0.08375833183526993,
"learning_rate": 8.482486103808779e-06,
"loss": 0.0075,
"step": 12320
},
{
"epoch": 25.0609756097561,
"grad_norm": 0.08920438587665558,
"learning_rate": 8.42116154985828e-06,
"loss": 0.0067,
"step": 12330
},
{
"epoch": 25.08130081300813,
"grad_norm": 0.08444127440452576,
"learning_rate": 8.360039084683779e-06,
"loss": 0.0071,
"step": 12340
},
{
"epoch": 25.101626016260163,
"grad_norm": 0.09630052745342255,
"learning_rate": 8.299119005363404e-06,
"loss": 0.0071,
"step": 12350
},
{
"epoch": 25.121951219512194,
"grad_norm": 0.06598120927810669,
"learning_rate": 8.238401607991647e-06,
"loss": 0.0066,
"step": 12360
},
{
"epoch": 25.142276422764226,
"grad_norm": 0.0607173927128315,
"learning_rate": 8.177887187677847e-06,
"loss": 0.0058,
"step": 12370
},
{
"epoch": 25.16260162601626,
"grad_norm": 0.09175746887922287,
"learning_rate": 8.117576038544838e-06,
"loss": 0.0104,
"step": 12380
},
{
"epoch": 25.182926829268293,
"grad_norm": 0.07160919904708862,
"learning_rate": 8.057468453727479e-06,
"loss": 0.0052,
"step": 12390
},
{
"epoch": 25.203252032520325,
"grad_norm": 0.08971722424030304,
"learning_rate": 7.997564725371182e-06,
"loss": 0.0089,
"step": 12400
},
{
"epoch": 25.223577235772357,
"grad_norm": 0.15288154780864716,
"learning_rate": 7.937865144630601e-06,
"loss": 0.0095,
"step": 12410
},
{
"epoch": 25.24390243902439,
"grad_norm": 0.0708981305360794,
"learning_rate": 7.878370001668116e-06,
"loss": 0.0091,
"step": 12420
},
{
"epoch": 25.264227642276424,
"grad_norm": 0.11280938237905502,
"learning_rate": 7.819079585652461e-06,
"loss": 0.0083,
"step": 12430
},
{
"epoch": 25.284552845528456,
"grad_norm": 0.0850948765873909,
"learning_rate": 7.759994184757358e-06,
"loss": 0.0073,
"step": 12440
},
{
"epoch": 25.304878048780488,
"grad_norm": 0.0783085823059082,
"learning_rate": 7.701114086160027e-06,
"loss": 0.0062,
"step": 12450
},
{
"epoch": 25.32520325203252,
"grad_norm": 0.12934930622577667,
"learning_rate": 7.642439576039884e-06,
"loss": 0.0102,
"step": 12460
},
{
"epoch": 25.34552845528455,
"grad_norm": 0.11071004718542099,
"learning_rate": 7.583970939577101e-06,
"loss": 0.0074,
"step": 12470
},
{
"epoch": 25.365853658536587,
"grad_norm": 0.11486160755157471,
"learning_rate": 7.525708460951197e-06,
"loss": 0.009,
"step": 12480
},
{
"epoch": 25.38617886178862,
"grad_norm": 0.10318905860185623,
"learning_rate": 7.467652423339733e-06,
"loss": 0.0079,
"step": 12490
},
{
"epoch": 25.40650406504065,
"grad_norm": 0.13283970952033997,
"learning_rate": 7.409803108916841e-06,
"loss": 0.0072,
"step": 12500
},
{
"epoch": 25.426829268292682,
"grad_norm": 0.09182985872030258,
"learning_rate": 7.35216079885192e-06,
"loss": 0.0062,
"step": 12510
},
{
"epoch": 25.447154471544714,
"grad_norm": 0.12830130755901337,
"learning_rate": 7.29472577330827e-06,
"loss": 0.0078,
"step": 12520
},
{
"epoch": 25.46747967479675,
"grad_norm": 0.06168400123715401,
"learning_rate": 7.237498311441676e-06,
"loss": 0.007,
"step": 12530
},
{
"epoch": 25.48780487804878,
"grad_norm": 0.10279642045497894,
"learning_rate": 7.180478691399134e-06,
"loss": 0.0064,
"step": 12540
},
{
"epoch": 25.508130081300813,
"grad_norm": 0.138838529586792,
"learning_rate": 7.123667190317396e-06,
"loss": 0.0092,
"step": 12550
},
{
"epoch": 25.528455284552845,
"grad_norm": 0.07453944534063339,
"learning_rate": 7.06706408432169e-06,
"loss": 0.0055,
"step": 12560
},
{
"epoch": 25.548780487804876,
"grad_norm": 0.14314241707324982,
"learning_rate": 7.010669648524404e-06,
"loss": 0.0094,
"step": 12570
},
{
"epoch": 25.56910569105691,
"grad_norm": 0.12263060361146927,
"learning_rate": 6.954484157023661e-06,
"loss": 0.007,
"step": 12580
},
{
"epoch": 25.589430894308943,
"grad_norm": 0.047071073204278946,
"learning_rate": 6.898507882902078e-06,
"loss": 0.0057,
"step": 12590
},
{
"epoch": 25.609756097560975,
"grad_norm": 0.12850365042686462,
"learning_rate": 6.842741098225358e-06,
"loss": 0.0095,
"step": 12600
},
{
"epoch": 25.630081300813007,
"grad_norm": 0.06848274171352386,
"learning_rate": 6.787184074041031e-06,
"loss": 0.0072,
"step": 12610
},
{
"epoch": 25.65040650406504,
"grad_norm": 0.06645460426807404,
"learning_rate": 6.731837080377129e-06,
"loss": 0.0064,
"step": 12620
},
{
"epoch": 25.670731707317074,
"grad_norm": 0.08997969329357147,
"learning_rate": 6.676700386240814e-06,
"loss": 0.0094,
"step": 12630
},
{
"epoch": 25.691056910569106,
"grad_norm": 0.06717319041490555,
"learning_rate": 6.621774259617125e-06,
"loss": 0.006,
"step": 12640
},
{
"epoch": 25.711382113821138,
"grad_norm": 0.08344772458076477,
"learning_rate": 6.567058967467704e-06,
"loss": 0.0064,
"step": 12650
},
{
"epoch": 25.73170731707317,
"grad_norm": 0.11553539335727692,
"learning_rate": 6.51255477572939e-06,
"loss": 0.0083,
"step": 12660
},
{
"epoch": 25.752032520325205,
"grad_norm": 0.12962360680103302,
"learning_rate": 6.45826194931306e-06,
"loss": 0.0079,
"step": 12670
},
{
"epoch": 25.772357723577237,
"grad_norm": 0.10802538692951202,
"learning_rate": 6.4041807521022454e-06,
"loss": 0.006,
"step": 12680
},
{
"epoch": 25.79268292682927,
"grad_norm": 0.08937724679708481,
"learning_rate": 6.350311446951868e-06,
"loss": 0.01,
"step": 12690
},
{
"epoch": 25.8130081300813,
"grad_norm": 0.09280110895633698,
"learning_rate": 6.29665429568701e-06,
"loss": 0.0091,
"step": 12700
},
{
"epoch": 25.833333333333332,
"grad_norm": 0.055351488292217255,
"learning_rate": 6.2432095591015705e-06,
"loss": 0.0058,
"step": 12710
},
{
"epoch": 25.853658536585368,
"grad_norm": 0.09830985963344574,
"learning_rate": 6.1899774969570444e-06,
"loss": 0.0083,
"step": 12720
},
{
"epoch": 25.8739837398374,
"grad_norm": 0.06478270888328552,
"learning_rate": 6.136958367981272e-06,
"loss": 0.0061,
"step": 12730
},
{
"epoch": 25.89430894308943,
"grad_norm": 0.05733104050159454,
"learning_rate": 6.084152429867113e-06,
"loss": 0.009,
"step": 12740
},
{
"epoch": 25.914634146341463,
"grad_norm": 0.08511028438806534,
"learning_rate": 6.0315599392712865e-06,
"loss": 0.0062,
"step": 12750
},
{
"epoch": 25.934959349593495,
"grad_norm": 0.09071079641580582,
"learning_rate": 5.979181151813057e-06,
"loss": 0.0068,
"step": 12760
},
{
"epoch": 25.95528455284553,
"grad_norm": 0.06143470108509064,
"learning_rate": 5.927016322072992e-06,
"loss": 0.0058,
"step": 12770
},
{
"epoch": 25.975609756097562,
"grad_norm": 0.06430324912071228,
"learning_rate": 5.875065703591787e-06,
"loss": 0.0113,
"step": 12780
},
{
"epoch": 25.995934959349594,
"grad_norm": 0.10490266978740692,
"learning_rate": 5.823329548868939e-06,
"loss": 0.0108,
"step": 12790
},
{
"epoch": 26.016260162601625,
"grad_norm": 0.05621851235628128,
"learning_rate": 5.77180810936162e-06,
"loss": 0.0091,
"step": 12800
},
{
"epoch": 26.036585365853657,
"grad_norm": 0.10055341571569443,
"learning_rate": 5.720501635483366e-06,
"loss": 0.0096,
"step": 12810
},
{
"epoch": 26.056910569105693,
"grad_norm": 0.06854842603206635,
"learning_rate": 5.669410376602918e-06,
"loss": 0.0085,
"step": 12820
},
{
"epoch": 26.077235772357724,
"grad_norm": 0.13524769246578217,
"learning_rate": 5.618534581043011e-06,
"loss": 0.0077,
"step": 12830
},
{
"epoch": 26.097560975609756,
"grad_norm": 0.09193708002567291,
"learning_rate": 5.5678744960791005e-06,
"loss": 0.0073,
"step": 12840
},
{
"epoch": 26.117886178861788,
"grad_norm": 0.08699709177017212,
"learning_rate": 5.517430367938237e-06,
"loss": 0.0051,
"step": 12850
},
{
"epoch": 26.13821138211382,
"grad_norm": 0.11282419413328171,
"learning_rate": 5.467202441797842e-06,
"loss": 0.0065,
"step": 12860
},
{
"epoch": 26.158536585365855,
"grad_norm": 0.053357355296611786,
"learning_rate": 5.417190961784497e-06,
"loss": 0.008,
"step": 12870
},
{
"epoch": 26.178861788617887,
"grad_norm": 0.125547856092453,
"learning_rate": 5.3673961709727885e-06,
"loss": 0.0077,
"step": 12880
},
{
"epoch": 26.19918699186992,
"grad_norm": 0.076795294880867,
"learning_rate": 5.317818311384115e-06,
"loss": 0.0069,
"step": 12890
},
{
"epoch": 26.21951219512195,
"grad_norm": 0.05759301036596298,
"learning_rate": 5.2684576239854895e-06,
"loss": 0.0071,
"step": 12900
},
{
"epoch": 26.239837398373982,
"grad_norm": 0.08413347601890564,
"learning_rate": 5.219314348688414e-06,
"loss": 0.0073,
"step": 12910
},
{
"epoch": 26.260162601626018,
"grad_norm": 0.10227837413549423,
"learning_rate": 5.170388724347658e-06,
"loss": 0.0087,
"step": 12920
},
{
"epoch": 26.28048780487805,
"grad_norm": 0.08494935929775238,
"learning_rate": 5.1216809887601245e-06,
"loss": 0.0066,
"step": 12930
},
{
"epoch": 26.30081300813008,
"grad_norm": 0.10262715071439743,
"learning_rate": 5.073191378663733e-06,
"loss": 0.0046,
"step": 12940
},
{
"epoch": 26.321138211382113,
"grad_norm": 0.09187835454940796,
"learning_rate": 5.024920129736188e-06,
"loss": 0.0063,
"step": 12950
},
{
"epoch": 26.341463414634145,
"grad_norm": 0.1465609073638916,
"learning_rate": 4.976867476593894e-06,
"loss": 0.008,
"step": 12960
},
{
"epoch": 26.36178861788618,
"grad_norm": 0.08398236334323883,
"learning_rate": 4.929033652790821e-06,
"loss": 0.0084,
"step": 12970
},
{
"epoch": 26.382113821138212,
"grad_norm": 0.08007702231407166,
"learning_rate": 4.881418890817296e-06,
"loss": 0.0093,
"step": 12980
},
{
"epoch": 26.402439024390244,
"grad_norm": 0.13101662695407867,
"learning_rate": 4.834023422098971e-06,
"loss": 0.0075,
"step": 12990
},
{
"epoch": 26.422764227642276,
"grad_norm": 0.07166078686714172,
"learning_rate": 4.7868474769956266e-06,
"loss": 0.0094,
"step": 13000
},
{
"epoch": 26.443089430894307,
"grad_norm": 0.0517122708261013,
"learning_rate": 4.7398912848000636e-06,
"loss": 0.0098,
"step": 13010
},
{
"epoch": 26.463414634146343,
"grad_norm": 0.06409566849470139,
"learning_rate": 4.6931550737370264e-06,
"loss": 0.0082,
"step": 13020
},
{
"epoch": 26.483739837398375,
"grad_norm": 0.07286877185106277,
"learning_rate": 4.646639070962067e-06,
"loss": 0.0072,
"step": 13030
},
{
"epoch": 26.504065040650406,
"grad_norm": 0.05960990488529205,
"learning_rate": 4.600343502560439e-06,
"loss": 0.0064,
"step": 13040
},
{
"epoch": 26.524390243902438,
"grad_norm": 0.07247988879680634,
"learning_rate": 4.55426859354599e-06,
"loss": 0.0084,
"step": 13050
},
{
"epoch": 26.54471544715447,
"grad_norm": 0.06973347067832947,
"learning_rate": 4.5084145678600805e-06,
"loss": 0.0081,
"step": 13060
},
{
"epoch": 26.565040650406505,
"grad_norm": 0.1406828910112381,
"learning_rate": 4.462781648370518e-06,
"loss": 0.0055,
"step": 13070
},
{
"epoch": 26.585365853658537,
"grad_norm": 0.08069416135549545,
"learning_rate": 4.417370056870418e-06,
"loss": 0.006,
"step": 13080
},
{
"epoch": 26.60569105691057,
"grad_norm": 0.10259624570608139,
"learning_rate": 4.372180014077193e-06,
"loss": 0.0091,
"step": 13090
},
{
"epoch": 26.6260162601626,
"grad_norm": 0.08780638128519058,
"learning_rate": 4.327211739631415e-06,
"loss": 0.0067,
"step": 13100
},
{
"epoch": 26.646341463414632,
"grad_norm": 0.0880453810095787,
"learning_rate": 4.282465452095802e-06,
"loss": 0.0071,
"step": 13110
},
{
"epoch": 26.666666666666668,
"grad_norm": 0.09189548343420029,
"learning_rate": 4.237941368954124e-06,
"loss": 0.0094,
"step": 13120
},
{
"epoch": 26.6869918699187,
"grad_norm": 0.05421117693185806,
"learning_rate": 4.193639706610147e-06,
"loss": 0.0092,
"step": 13130
},
{
"epoch": 26.70731707317073,
"grad_norm": 0.10190868377685547,
"learning_rate": 4.149560680386588e-06,
"loss": 0.0074,
"step": 13140
},
{
"epoch": 26.727642276422763,
"grad_norm": 0.11536475270986557,
"learning_rate": 4.105704504524094e-06,
"loss": 0.0068,
"step": 13150
},
{
"epoch": 26.747967479674795,
"grad_norm": 0.08702490478754044,
"learning_rate": 4.0620713921801334e-06,
"loss": 0.0047,
"step": 13160
},
{
"epoch": 26.76829268292683,
"grad_norm": 0.0775069072842598,
"learning_rate": 4.0186615554280385e-06,
"loss": 0.0084,
"step": 13170
},
{
"epoch": 26.788617886178862,
"grad_norm": 0.06977381557226181,
"learning_rate": 3.975475205255929e-06,
"loss": 0.0067,
"step": 13180
},
{
"epoch": 26.808943089430894,
"grad_norm": 0.05994332954287529,
"learning_rate": 3.932512551565676e-06,
"loss": 0.0051,
"step": 13190
},
{
"epoch": 26.829268292682926,
"grad_norm": 0.06960119307041168,
"learning_rate": 3.889773803171936e-06,
"loss": 0.0105,
"step": 13200
},
{
"epoch": 26.84959349593496,
"grad_norm": 0.05765342339873314,
"learning_rate": 3.847259167801076e-06,
"loss": 0.0066,
"step": 13210
},
{
"epoch": 26.869918699186993,
"grad_norm": 0.11678333580493927,
"learning_rate": 3.804968852090185e-06,
"loss": 0.0074,
"step": 13220
},
{
"epoch": 26.890243902439025,
"grad_norm": 0.06083038076758385,
"learning_rate": 3.762903061586104e-06,
"loss": 0.0072,
"step": 13230
},
{
"epoch": 26.910569105691057,
"grad_norm": 0.04766000062227249,
"learning_rate": 3.721062000744363e-06,
"loss": 0.0098,
"step": 13240
},
{
"epoch": 26.93089430894309,
"grad_norm": 0.06560375541448593,
"learning_rate": 3.679445872928244e-06,
"loss": 0.0094,
"step": 13250
},
{
"epoch": 26.951219512195124,
"grad_norm": 0.08765765279531479,
"learning_rate": 3.6380548804077707e-06,
"loss": 0.0093,
"step": 13260
},
{
"epoch": 26.971544715447155,
"grad_norm": 0.12069438397884369,
"learning_rate": 3.5968892243587016e-06,
"loss": 0.0091,
"step": 13270
},
{
"epoch": 26.991869918699187,
"grad_norm": 0.07621457427740097,
"learning_rate": 3.555949104861611e-06,
"loss": 0.0077,
"step": 13280
},
{
"epoch": 27.01219512195122,
"grad_norm": 0.1240905299782753,
"learning_rate": 3.5152347209008394e-06,
"loss": 0.0087,
"step": 13290
},
{
"epoch": 27.03252032520325,
"grad_norm": 0.167019784450531,
"learning_rate": 3.4747462703636104e-06,
"loss": 0.0093,
"step": 13300
},
{
"epoch": 27.052845528455286,
"grad_norm": 0.06322633475065231,
"learning_rate": 3.434483950038986e-06,
"loss": 0.006,
"step": 13310
},
{
"epoch": 27.073170731707318,
"grad_norm": 0.05990873649716377,
"learning_rate": 3.3944479556169806e-06,
"loss": 0.0076,
"step": 13320
},
{
"epoch": 27.09349593495935,
"grad_norm": 0.057164546102285385,
"learning_rate": 3.3546384816875665e-06,
"loss": 0.0078,
"step": 13330
},
{
"epoch": 27.11382113821138,
"grad_norm": 0.08408369868993759,
"learning_rate": 3.315055721739746e-06,
"loss": 0.007,
"step": 13340
},
{
"epoch": 27.134146341463413,
"grad_norm": 0.05896256864070892,
"learning_rate": 3.275699868160592e-06,
"loss": 0.005,
"step": 13350
},
{
"epoch": 27.15447154471545,
"grad_norm": 0.09455864876508713,
"learning_rate": 3.23657111223436e-06,
"loss": 0.0072,
"step": 13360
},
{
"epoch": 27.17479674796748,
"grad_norm": 0.13212212920188904,
"learning_rate": 3.1976696441414764e-06,
"loss": 0.0069,
"step": 13370
},
{
"epoch": 27.195121951219512,
"grad_norm": 0.04661380499601364,
"learning_rate": 3.158995652957719e-06,
"loss": 0.0072,
"step": 13380
},
{
"epoch": 27.215447154471544,
"grad_norm": 0.06701108068227768,
"learning_rate": 3.1205493266531937e-06,
"loss": 0.0092,
"step": 13390
},
{
"epoch": 27.235772357723576,
"grad_norm": 0.07119470834732056,
"learning_rate": 3.082330852091497e-06,
"loss": 0.0062,
"step": 13400
},
{
"epoch": 27.25609756097561,
"grad_norm": 0.11395665258169174,
"learning_rate": 3.0443404150287847e-06,
"loss": 0.0065,
"step": 13410
},
{
"epoch": 27.276422764227643,
"grad_norm": 0.08765079081058502,
"learning_rate": 3.0065782001128475e-06,
"loss": 0.0064,
"step": 13420
},
{
"epoch": 27.296747967479675,
"grad_norm": 0.05511057376861572,
"learning_rate": 2.9690443908822252e-06,
"loss": 0.007,
"step": 13430
},
{
"epoch": 27.317073170731707,
"grad_norm": 0.04316573217511177,
"learning_rate": 2.9317391697653518e-06,
"loss": 0.0098,
"step": 13440
},
{
"epoch": 27.33739837398374,
"grad_norm": 0.0791875571012497,
"learning_rate": 2.8946627180795936e-06,
"loss": 0.0054,
"step": 13450
},
{
"epoch": 27.357723577235774,
"grad_norm": 0.07902947813272476,
"learning_rate": 2.8578152160304573e-06,
"loss": 0.0079,
"step": 13460
},
{
"epoch": 27.378048780487806,
"grad_norm": 0.06220165267586708,
"learning_rate": 2.821196842710638e-06,
"loss": 0.0086,
"step": 13470
},
{
"epoch": 27.398373983739837,
"grad_norm": 0.09437467157840729,
"learning_rate": 2.7848077760991853e-06,
"loss": 0.0061,
"step": 13480
},
{
"epoch": 27.41869918699187,
"grad_norm": 0.06286446005105972,
"learning_rate": 2.7486481930606434e-06,
"loss": 0.0127,
"step": 13490
},
{
"epoch": 27.4390243902439,
"grad_norm": 0.05507444962859154,
"learning_rate": 2.712718269344161e-06,
"loss": 0.0099,
"step": 13500
},
{
"epoch": 27.459349593495936,
"grad_norm": 0.09279751032590866,
"learning_rate": 2.677018179582669e-06,
"loss": 0.0116,
"step": 13510
},
{
"epoch": 27.479674796747968,
"grad_norm": 0.06787155568599701,
"learning_rate": 2.641548097292024e-06,
"loss": 0.005,
"step": 13520
},
{
"epoch": 27.5,
"grad_norm": 0.061754241585731506,
"learning_rate": 2.606308194870133e-06,
"loss": 0.007,
"step": 13530
},
{
"epoch": 27.520325203252032,
"grad_norm": 0.10742611438035965,
"learning_rate": 2.5712986435961707e-06,
"loss": 0.0124,
"step": 13540
},
{
"epoch": 27.540650406504064,
"grad_norm": 0.12080876529216766,
"learning_rate": 2.536519613629723e-06,
"loss": 0.0086,
"step": 13550
},
{
"epoch": 27.5609756097561,
"grad_norm": 0.10064790397882462,
"learning_rate": 2.501971274009923e-06,
"loss": 0.0088,
"step": 13560
},
{
"epoch": 27.58130081300813,
"grad_norm": 0.07646507024765015,
"learning_rate": 2.467653792654695e-06,
"loss": 0.0049,
"step": 13570
},
{
"epoch": 27.601626016260163,
"grad_norm": 0.06533602625131607,
"learning_rate": 2.4335673363598822e-06,
"loss": 0.0092,
"step": 13580
},
{
"epoch": 27.621951219512194,
"grad_norm": 0.10205673426389694,
"learning_rate": 2.399712070798471e-06,
"loss": 0.0086,
"step": 13590
},
{
"epoch": 27.642276422764226,
"grad_norm": 0.05475059896707535,
"learning_rate": 2.3660881605197694e-06,
"loss": 0.0089,
"step": 13600
},
{
"epoch": 27.66260162601626,
"grad_norm": 0.06472466886043549,
"learning_rate": 2.332695768948617e-06,
"loss": 0.0087,
"step": 13610
},
{
"epoch": 27.682926829268293,
"grad_norm": 0.07751797884702682,
"learning_rate": 2.299535058384583e-06,
"loss": 0.0069,
"step": 13620
},
{
"epoch": 27.703252032520325,
"grad_norm": 0.06442529708147049,
"learning_rate": 2.266606190001186e-06,
"loss": 0.0074,
"step": 13630
},
{
"epoch": 27.723577235772357,
"grad_norm": 0.06449927389621735,
"learning_rate": 2.2339093238450737e-06,
"loss": 0.0085,
"step": 13640
},
{
"epoch": 27.74390243902439,
"grad_norm": 0.08560163527727127,
"learning_rate": 2.20144461883533e-06,
"loss": 0.0063,
"step": 13650
},
{
"epoch": 27.764227642276424,
"grad_norm": 0.06769910454750061,
"learning_rate": 2.1692122327625908e-06,
"loss": 0.0096,
"step": 13660
},
{
"epoch": 27.784552845528456,
"grad_norm": 0.03535304218530655,
"learning_rate": 2.137212322288379e-06,
"loss": 0.0043,
"step": 13670
},
{
"epoch": 27.804878048780488,
"grad_norm": 0.09477891027927399,
"learning_rate": 2.105445042944282e-06,
"loss": 0.0109,
"step": 13680
},
{
"epoch": 27.82520325203252,
"grad_norm": 0.3004438281059265,
"learning_rate": 2.0739105491312027e-06,
"loss": 0.0066,
"step": 13690
},
{
"epoch": 27.84552845528455,
"grad_norm": 0.05074850842356682,
"learning_rate": 2.0426089941186443e-06,
"loss": 0.0064,
"step": 13700
},
{
"epoch": 27.865853658536587,
"grad_norm": 0.06132441386580467,
"learning_rate": 2.0115405300439093e-06,
"loss": 0.009,
"step": 13710
},
{
"epoch": 27.88617886178862,
"grad_norm": 0.09876362234354019,
"learning_rate": 1.9807053079114013e-06,
"loss": 0.0057,
"step": 13720
},
{
"epoch": 27.90650406504065,
"grad_norm": 0.13496215641498566,
"learning_rate": 1.9501034775919024e-06,
"loss": 0.0068,
"step": 13730
},
{
"epoch": 27.926829268292682,
"grad_norm": 0.07970809936523438,
"learning_rate": 1.9197351878217917e-06,
"loss": 0.0058,
"step": 13740
},
{
"epoch": 27.947154471544714,
"grad_norm": 0.10741535574197769,
"learning_rate": 1.8896005862023669e-06,
"loss": 0.0093,
"step": 13750
},
{
"epoch": 27.96747967479675,
"grad_norm": 0.09508837759494781,
"learning_rate": 1.8596998191991288e-06,
"loss": 0.007,
"step": 13760
},
{
"epoch": 27.98780487804878,
"grad_norm": 0.07169695198535919,
"learning_rate": 1.8300330321410208e-06,
"loss": 0.0052,
"step": 13770
},
{
"epoch": 28.008130081300813,
"grad_norm": 0.07175235450267792,
"learning_rate": 1.8006003692197794e-06,
"loss": 0.0064,
"step": 13780
},
{
"epoch": 28.028455284552845,
"grad_norm": 0.12354432791471481,
"learning_rate": 1.7714019734892062e-06,
"loss": 0.0077,
"step": 13790
},
{
"epoch": 28.048780487804876,
"grad_norm": 0.10336112231016159,
"learning_rate": 1.7424379868644759e-06,
"loss": 0.0082,
"step": 13800
},
{
"epoch": 28.06910569105691,
"grad_norm": 0.08332820981740952,
"learning_rate": 1.71370855012144e-06,
"loss": 0.0097,
"step": 13810
},
{
"epoch": 28.089430894308943,
"grad_norm": 0.047899335622787476,
"learning_rate": 1.6852138028959574e-06,
"loss": 0.0055,
"step": 13820
},
{
"epoch": 28.109756097560975,
"grad_norm": 0.13389968872070312,
"learning_rate": 1.6569538836832044e-06,
"loss": 0.0097,
"step": 13830
},
{
"epoch": 28.130081300813007,
"grad_norm": 0.09461624920368195,
"learning_rate": 1.6289289298370147e-06,
"loss": 0.0071,
"step": 13840
},
{
"epoch": 28.150406504065042,
"grad_norm": 0.046392664313316345,
"learning_rate": 1.6011390775691748e-06,
"loss": 0.0062,
"step": 13850
},
{
"epoch": 28.170731707317074,
"grad_norm": 0.09244752675294876,
"learning_rate": 1.5735844619488238e-06,
"loss": 0.0089,
"step": 13860
},
{
"epoch": 28.191056910569106,
"grad_norm": 0.08714314550161362,
"learning_rate": 1.5462652169017322e-06,
"loss": 0.0044,
"step": 13870
},
{
"epoch": 28.211382113821138,
"grad_norm": 0.06532461196184158,
"learning_rate": 1.5191814752097023e-06,
"loss": 0.0065,
"step": 13880
},
{
"epoch": 28.23170731707317,
"grad_norm": 0.07722274214029312,
"learning_rate": 1.492333368509896e-06,
"loss": 0.0066,
"step": 13890
},
{
"epoch": 28.252032520325205,
"grad_norm": 0.1046539843082428,
"learning_rate": 1.4657210272941923e-06,
"loss": 0.0084,
"step": 13900
},
{
"epoch": 28.272357723577237,
"grad_norm": 0.05809272080659866,
"learning_rate": 1.4393445809085748e-06,
"loss": 0.0052,
"step": 13910
},
{
"epoch": 28.29268292682927,
"grad_norm": 0.06860511004924774,
"learning_rate": 1.4132041575524834e-06,
"loss": 0.0078,
"step": 13920
},
{
"epoch": 28.3130081300813,
"grad_norm": 0.051914114505052567,
"learning_rate": 1.387299884278187e-06,
"loss": 0.0062,
"step": 13930
},
{
"epoch": 28.333333333333332,
"grad_norm": 0.07846979796886444,
"learning_rate": 1.3616318869901945e-06,
"loss": 0.0059,
"step": 13940
},
{
"epoch": 28.353658536585368,
"grad_norm": 0.08878767490386963,
"learning_rate": 1.336200290444606e-06,
"loss": 0.0091,
"step": 13950
},
{
"epoch": 28.3739837398374,
"grad_norm": 0.04697742313146591,
"learning_rate": 1.3110052182485454e-06,
"loss": 0.0071,
"step": 13960
},
{
"epoch": 28.39430894308943,
"grad_norm": 0.05483054742217064,
"learning_rate": 1.2860467928595298e-06,
"loss": 0.0048,
"step": 13970
},
{
"epoch": 28.414634146341463,
"grad_norm": 0.04687541350722313,
"learning_rate": 1.2613251355848732e-06,
"loss": 0.0065,
"step": 13980
},
{
"epoch": 28.434959349593495,
"grad_norm": 0.06474806368350983,
"learning_rate": 1.2368403665811324e-06,
"loss": 0.0064,
"step": 13990
},
{
"epoch": 28.45528455284553,
"grad_norm": 0.053517088294029236,
"learning_rate": 1.2125926048534686e-06,
"loss": 0.006,
"step": 14000
},
{
"epoch": 28.475609756097562,
"grad_norm": 0.0952361673116684,
"learning_rate": 1.1885819682551259e-06,
"loss": 0.0075,
"step": 14010
},
{
"epoch": 28.495934959349594,
"grad_norm": 0.11012918502092361,
"learning_rate": 1.164808573486814e-06,
"loss": 0.0079,
"step": 14020
},
{
"epoch": 28.516260162601625,
"grad_norm": 0.06327813118696213,
"learning_rate": 1.1412725360961608e-06,
"loss": 0.0062,
"step": 14030
},
{
"epoch": 28.536585365853657,
"grad_norm": 0.13205966353416443,
"learning_rate": 1.1179739704771486e-06,
"loss": 0.0064,
"step": 14040
},
{
"epoch": 28.556910569105693,
"grad_norm": 0.06284066289663315,
"learning_rate": 1.0949129898695675e-06,
"loss": 0.006,
"step": 14050
},
{
"epoch": 28.577235772357724,
"grad_norm": 0.10052363574504852,
"learning_rate": 1.0720897063584423e-06,
"loss": 0.0066,
"step": 14060
},
{
"epoch": 28.597560975609756,
"grad_norm": 0.09994717687368393,
"learning_rate": 1.0495042308735103e-06,
"loss": 0.0072,
"step": 14070
},
{
"epoch": 28.617886178861788,
"grad_norm": 0.06892990320920944,
"learning_rate": 1.0271566731886617e-06,
"loss": 0.0058,
"step": 14080
},
{
"epoch": 28.63821138211382,
"grad_norm": 0.061333317309617996,
"learning_rate": 1.005047141921428e-06,
"loss": 0.0054,
"step": 14090
},
{
"epoch": 28.658536585365855,
"grad_norm": 0.1620459407567978,
"learning_rate": 9.831757445324274e-07,
"loss": 0.0084,
"step": 14100
},
{
"epoch": 28.678861788617887,
"grad_norm": 0.09121565520763397,
"learning_rate": 9.615425873248761e-07,
"loss": 0.0053,
"step": 14110
},
{
"epoch": 28.69918699186992,
"grad_norm": 0.08609441667795181,
"learning_rate": 9.401477754440502e-07,
"loss": 0.0058,
"step": 14120
},
{
"epoch": 28.71951219512195,
"grad_norm": 0.11729393154382706,
"learning_rate": 9.189914128767684e-07,
"loss": 0.0086,
"step": 14130
},
{
"epoch": 28.739837398373982,
"grad_norm": 0.063324473798275,
"learning_rate": 8.980736024508996e-07,
"loss": 0.0097,
"step": 14140
},
{
"epoch": 28.760162601626018,
"grad_norm": 0.07590149343013763,
"learning_rate": 8.77394445834867e-07,
"loss": 0.006,
"step": 14150
},
{
"epoch": 28.78048780487805,
"grad_norm": 0.09050539880990982,
"learning_rate": 8.569540435371281e-07,
"loss": 0.0063,
"step": 14160
},
{
"epoch": 28.80081300813008,
"grad_norm": 0.06578807532787323,
"learning_rate": 8.367524949057348e-07,
"loss": 0.0095,
"step": 14170
},
{
"epoch": 28.821138211382113,
"grad_norm": 0.11855498701334,
"learning_rate": 8.167898981277844e-07,
"loss": 0.007,
"step": 14180
},
{
"epoch": 28.841463414634145,
"grad_norm": 0.059526070952415466,
"learning_rate": 7.970663502290143e-07,
"loss": 0.0053,
"step": 14190
},
{
"epoch": 28.86178861788618,
"grad_norm": 0.09736321866512299,
"learning_rate": 7.775819470732692e-07,
"loss": 0.0053,
"step": 14200
},
{
"epoch": 28.882113821138212,
"grad_norm": 0.13699515163898468,
"learning_rate": 7.583367833620681e-07,
"loss": 0.0079,
"step": 14210
},
{
"epoch": 28.902439024390244,
"grad_norm": 0.08840407431125641,
"learning_rate": 7.39330952634143e-07,
"loss": 0.0081,
"step": 14220
},
{
"epoch": 28.922764227642276,
"grad_norm": 0.040457312017679214,
"learning_rate": 7.205645472649681e-07,
"loss": 0.0074,
"step": 14230
},
{
"epoch": 28.943089430894307,
"grad_norm": 0.1015058159828186,
"learning_rate": 7.020376584663202e-07,
"loss": 0.0058,
"step": 14240
},
{
"epoch": 28.963414634146343,
"grad_norm": 0.0492464154958725,
"learning_rate": 6.83750376285841e-07,
"loss": 0.0066,
"step": 14250
},
{
"epoch": 28.983739837398375,
"grad_norm": 0.07046113163232803,
"learning_rate": 6.657027896065982e-07,
"loss": 0.0075,
"step": 14260
},
{
"epoch": 29.004065040650406,
"grad_norm": 0.08203638345003128,
"learning_rate": 6.478949861466355e-07,
"loss": 0.0057,
"step": 14270
},
{
"epoch": 29.024390243902438,
"grad_norm": 0.07042611390352249,
"learning_rate": 6.303270524585736e-07,
"loss": 0.0057,
"step": 14280
},
{
"epoch": 29.04471544715447,
"grad_norm": 0.10965394973754883,
"learning_rate": 6.129990739291713e-07,
"loss": 0.0043,
"step": 14290
},
{
"epoch": 29.065040650406505,
"grad_norm": 0.04560219496488571,
"learning_rate": 5.959111347789093e-07,
"loss": 0.0062,
"step": 14300
},
{
"epoch": 29.085365853658537,
"grad_norm": 0.1140056774020195,
"learning_rate": 5.790633180615956e-07,
"loss": 0.0053,
"step": 14310
},
{
"epoch": 29.10569105691057,
"grad_norm": 0.0836968868970871,
"learning_rate": 5.624557056639446e-07,
"loss": 0.0094,
"step": 14320
},
{
"epoch": 29.1260162601626,
"grad_norm": 0.06501860171556473,
"learning_rate": 5.460883783051984e-07,
"loss": 0.0073,
"step": 14330
},
{
"epoch": 29.146341463414632,
"grad_norm": 0.08121626824140549,
"learning_rate": 5.299614155367171e-07,
"loss": 0.0075,
"step": 14340
},
{
"epoch": 29.166666666666668,
"grad_norm": 0.05819685012102127,
"learning_rate": 5.140748957415897e-07,
"loss": 0.0061,
"step": 14350
},
{
"epoch": 29.1869918699187,
"grad_norm": 0.1377357393503189,
"learning_rate": 4.984288961342787e-07,
"loss": 0.0075,
"step": 14360
},
{
"epoch": 29.20731707317073,
"grad_norm": 0.10657922178506851,
"learning_rate": 4.830234927602206e-07,
"loss": 0.0064,
"step": 14370
},
{
"epoch": 29.227642276422763,
"grad_norm": 0.05665343627333641,
"learning_rate": 4.6785876049545986e-07,
"loss": 0.0084,
"step": 14380
},
{
"epoch": 29.247967479674795,
"grad_norm": 0.07270264625549316,
"learning_rate": 4.5293477304629297e-07,
"loss": 0.0063,
"step": 14390
},
{
"epoch": 29.26829268292683,
"grad_norm": 0.050934601575136185,
"learning_rate": 4.382516029489081e-07,
"loss": 0.0083,
"step": 14400
},
{
"epoch": 29.288617886178862,
"grad_norm": 0.06483236700296402,
"learning_rate": 4.2380932156902975e-07,
"loss": 0.0083,
"step": 14410
},
{
"epoch": 29.308943089430894,
"grad_norm": 0.07827263325452805,
"learning_rate": 4.0960799910156335e-07,
"loss": 0.006,
"step": 14420
},
{
"epoch": 29.329268292682926,
"grad_norm": 0.07869955897331238,
"learning_rate": 3.956477045702844e-07,
"loss": 0.0047,
"step": 14430
},
{
"epoch": 29.34959349593496,
"grad_norm": 0.07201409339904785,
"learning_rate": 3.819285058274613e-07,
"loss": 0.005,
"step": 14440
},
{
"epoch": 29.369918699186993,
"grad_norm": 0.12948809564113617,
"learning_rate": 3.684504695535496e-07,
"loss": 0.0115,
"step": 14450
},
{
"epoch": 29.390243902439025,
"grad_norm": 0.06162776052951813,
"learning_rate": 3.552136612568813e-07,
"loss": 0.0054,
"step": 14460
},
{
"epoch": 29.410569105691057,
"grad_norm": 0.11850345879793167,
"learning_rate": 3.422181452733042e-07,
"loss": 0.0093,
"step": 14470
},
{
"epoch": 29.43089430894309,
"grad_norm": 0.0406278520822525,
"learning_rate": 3.294639847659209e-07,
"loss": 0.0106,
"step": 14480
},
{
"epoch": 29.451219512195124,
"grad_norm": 0.1252788007259369,
"learning_rate": 3.169512417247389e-07,
"loss": 0.0073,
"step": 14490
},
{
"epoch": 29.471544715447155,
"grad_norm": 0.049834635108709335,
"learning_rate": 3.046799769663822e-07,
"loss": 0.0044,
"step": 14500
},
{
"epoch": 29.491869918699187,
"grad_norm": 0.04352590814232826,
"learning_rate": 2.926502501338191e-07,
"loss": 0.0058,
"step": 14510
},
{
"epoch": 29.51219512195122,
"grad_norm": 0.08490035682916641,
"learning_rate": 2.808621196960404e-07,
"loss": 0.0059,
"step": 14520
},
{
"epoch": 29.53252032520325,
"grad_norm": 0.09383466094732285,
"learning_rate": 2.6931564294778164e-07,
"loss": 0.0082,
"step": 14530
},
{
"epoch": 29.552845528455286,
"grad_norm": 0.06676391512155533,
"learning_rate": 2.58010876009257e-07,
"loss": 0.0072,
"step": 14540
},
{
"epoch": 29.573170731707318,
"grad_norm": 0.08328359574079514,
"learning_rate": 2.4694787382589237e-07,
"loss": 0.0074,
"step": 14550
},
{
"epoch": 29.59349593495935,
"grad_norm": 0.08633271604776382,
"learning_rate": 2.3612669016802592e-07,
"loss": 0.0063,
"step": 14560
},
{
"epoch": 29.61382113821138,
"grad_norm": 0.0577889010310173,
"learning_rate": 2.2554737763068045e-07,
"loss": 0.0085,
"step": 14570
},
{
"epoch": 29.634146341463413,
"grad_norm": 0.05909036099910736,
"learning_rate": 2.152099876332858e-07,
"loss": 0.0069,
"step": 14580
},
{
"epoch": 29.65447154471545,
"grad_norm": 0.07131492346525192,
"learning_rate": 2.051145704194457e-07,
"loss": 0.0063,
"step": 14590
},
{
"epoch": 29.67479674796748,
"grad_norm": 0.04138827696442604,
"learning_rate": 1.9526117505667129e-07,
"loss": 0.0058,
"step": 14600
},
{
"epoch": 29.695121951219512,
"grad_norm": 0.08429524302482605,
"learning_rate": 1.856498494361758e-07,
"loss": 0.0076,
"step": 14610
},
{
"epoch": 29.715447154471544,
"grad_norm": 0.0781969353556633,
"learning_rate": 1.7628064027260803e-07,
"loss": 0.0053,
"step": 14620
},
{
"epoch": 29.735772357723576,
"grad_norm": 0.09103638678789139,
"learning_rate": 1.671535931038415e-07,
"loss": 0.0115,
"step": 14630
},
{
"epoch": 29.75609756097561,
"grad_norm": 0.09687261283397675,
"learning_rate": 1.5826875229076333e-07,
"loss": 0.0066,
"step": 14640
},
{
"epoch": 29.776422764227643,
"grad_norm": 0.07173122465610504,
"learning_rate": 1.496261610170302e-07,
"loss": 0.0068,
"step": 14650
},
{
"epoch": 29.796747967479675,
"grad_norm": 0.08791932463645935,
"learning_rate": 1.4122586128888503e-07,
"loss": 0.0054,
"step": 14660
},
{
"epoch": 29.817073170731707,
"grad_norm": 0.04476994648575783,
"learning_rate": 1.3306789393494612e-07,
"loss": 0.0039,
"step": 14670
},
{
"epoch": 29.83739837398374,
"grad_norm": 0.07385668158531189,
"learning_rate": 1.2515229860599054e-07,
"loss": 0.0064,
"step": 14680
},
{
"epoch": 29.857723577235774,
"grad_norm": 0.06950011104345322,
"learning_rate": 1.1747911377478771e-07,
"loss": 0.0057,
"step": 14690
},
{
"epoch": 29.878048780487806,
"grad_norm": 0.036131761968135834,
"learning_rate": 1.1004837673589952e-07,
"loss": 0.0067,
"step": 14700
},
{
"epoch": 29.898373983739837,
"grad_norm": 0.044530197978019714,
"learning_rate": 1.0286012360550267e-07,
"loss": 0.0149,
"step": 14710
},
{
"epoch": 29.91869918699187,
"grad_norm": 0.0911911204457283,
"learning_rate": 9.591438932121111e-08,
"loss": 0.0073,
"step": 14720
},
{
"epoch": 29.9390243902439,
"grad_norm": 0.058300457894802094,
"learning_rate": 8.921120764189272e-08,
"loss": 0.007,
"step": 14730
},
{
"epoch": 29.959349593495936,
"grad_norm": 0.09375248104333878,
"learning_rate": 8.275061114753068e-08,
"loss": 0.0103,
"step": 14740
},
{
"epoch": 29.979674796747968,
"grad_norm": 0.08694697916507721,
"learning_rate": 7.65326312390624e-08,
"loss": 0.0079,
"step": 14750
},
{
"epoch": 30.0,
"grad_norm": 0.04056360200047493,
"learning_rate": 7.055729813819079e-08,
"loss": 0.0071,
"step": 14760
},
{
"epoch": 30.020325203252032,
"grad_norm": 0.1188066303730011,
"learning_rate": 6.48246408872899e-08,
"loss": 0.0094,
"step": 14770
},
{
"epoch": 30.040650406504064,
"grad_norm": 0.07885562628507614,
"learning_rate": 5.9334687349227314e-08,
"loss": 0.0068,
"step": 14780
},
{
"epoch": 30.0609756097561,
"grad_norm": 0.07324660569429398,
"learning_rate": 5.4087464207236426e-08,
"loss": 0.0084,
"step": 14790
},
{
"epoch": 30.08130081300813,
"grad_norm": 0.031079620122909546,
"learning_rate": 4.9082996964794345e-08,
"loss": 0.0047,
"step": 14800
},
{
"epoch": 30.101626016260163,
"grad_norm": 0.08347532153129578,
"learning_rate": 4.432130994548866e-08,
"loss": 0.0061,
"step": 14810
},
{
"epoch": 30.121951219512194,
"grad_norm": 0.10173796117305756,
"learning_rate": 3.980242629291198e-08,
"loss": 0.0067,
"step": 14820
},
{
"epoch": 30.142276422764226,
"grad_norm": 0.07320091128349304,
"learning_rate": 3.5526367970539765e-08,
"loss": 0.0073,
"step": 14830
},
{
"epoch": 30.16260162601626,
"grad_norm": 0.09657250344753265,
"learning_rate": 3.1493155761613826e-08,
"loss": 0.0059,
"step": 14840
},
{
"epoch": 30.182926829268293,
"grad_norm": 0.0755375325679779,
"learning_rate": 2.7702809269058992e-08,
"loss": 0.0057,
"step": 14850
},
{
"epoch": 30.203252032520325,
"grad_norm": 0.11463439464569092,
"learning_rate": 2.4155346915394337e-08,
"loss": 0.008,
"step": 14860
},
{
"epoch": 30.223577235772357,
"grad_norm": 0.0628993809223175,
"learning_rate": 2.085078594261103e-08,
"loss": 0.0077,
"step": 14870
},
{
"epoch": 30.24390243902439,
"grad_norm": 0.050431057810783386,
"learning_rate": 1.7789142412122372e-08,
"loss": 0.006,
"step": 14880
},
{
"epoch": 30.264227642276424,
"grad_norm": 0.05110840126872063,
"learning_rate": 1.4970431204663905e-08,
"loss": 0.0066,
"step": 14890
},
{
"epoch": 30.284552845528456,
"grad_norm": 0.05618211254477501,
"learning_rate": 1.2394666020226764e-08,
"loss": 0.0057,
"step": 14900
},
{
"epoch": 30.304878048780488,
"grad_norm": 0.06732738763093948,
"learning_rate": 1.0061859378007743e-08,
"loss": 0.0047,
"step": 14910
},
{
"epoch": 30.32520325203252,
"grad_norm": 0.11842504143714905,
"learning_rate": 7.97202261630936e-09,
"loss": 0.0107,
"step": 14920
},
{
"epoch": 30.34552845528455,
"grad_norm": 0.039801180362701416,
"learning_rate": 6.125165892539863e-09,
"loss": 0.0066,
"step": 14930
},
{
"epoch": 30.365853658536587,
"grad_norm": 0.08215242624282837,
"learning_rate": 4.5212981831022076e-09,
"loss": 0.006,
"step": 14940
},
{
"epoch": 30.38617886178862,
"grad_norm": 0.043269477784633636,
"learning_rate": 3.1604272834051542e-09,
"loss": 0.0056,
"step": 14950
},
{
"epoch": 30.40650406504065,
"grad_norm": 0.09183301776647568,
"learning_rate": 2.04255980778556e-09,
"loss": 0.0049,
"step": 14960
},
{
"epoch": 30.426829268292682,
"grad_norm": 0.02949368581175804,
"learning_rate": 1.1677011895028234e-09,
"loss": 0.0047,
"step": 14970
},
{
"epoch": 30.447154471544714,
"grad_norm": 0.06685309112071991,
"learning_rate": 5.358556807000259e-10,
"loss": 0.0052,
"step": 14980
},
{
"epoch": 30.46747967479675,
"grad_norm": 0.07415076345205307,
"learning_rate": 1.4702635238728058e-10,
"loss": 0.007,
"step": 14990
},
{
"epoch": 30.48780487804878,
"grad_norm": 0.06969624757766724,
"learning_rate": 1.2150944139754927e-12,
"loss": 0.0058,
"step": 15000
}
],
"logging_steps": 10,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 31,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}