full_sft / trainer_state.json
israel's picture
Upload 24 files
b40577e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.036250196710082,
"eval_steps": 500,
"global_step": 82000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0037027780092014034,
"grad_norm": 21.161716771412177,
"learning_rate": 3.6657162957751695e-08,
"loss": 2.6783,
"step": 100
},
{
"epoch": 0.007405556018402807,
"grad_norm": 17.61802823072404,
"learning_rate": 7.368460028881402e-08,
"loss": 2.6426,
"step": 200
},
{
"epoch": 0.01110833402760421,
"grad_norm": 8.635640044259958,
"learning_rate": 1.1071203761987633e-07,
"loss": 2.5725,
"step": 300
},
{
"epoch": 0.014811112036805614,
"grad_norm": 4.290727817061978,
"learning_rate": 1.4773947495093866e-07,
"loss": 2.4757,
"step": 400
},
{
"epoch": 0.018513890046007016,
"grad_norm": 3.791476034639625,
"learning_rate": 1.8476691228200099e-07,
"loss": 2.367,
"step": 500
},
{
"epoch": 0.02221666805520842,
"grad_norm": 4.547923325980467,
"learning_rate": 2.2179434961306329e-07,
"loss": 2.3288,
"step": 600
},
{
"epoch": 0.025919446064409823,
"grad_norm": 4.402404364420564,
"learning_rate": 2.5882178694412564e-07,
"loss": 2.2759,
"step": 700
},
{
"epoch": 0.029622224073611227,
"grad_norm": 3.6276565743103535,
"learning_rate": 2.958492242751879e-07,
"loss": 2.2766,
"step": 800
},
{
"epoch": 0.03332500208281263,
"grad_norm": 4.170244852808218,
"learning_rate": 3.3287666160625024e-07,
"loss": 2.2338,
"step": 900
},
{
"epoch": 0.03702778009201403,
"grad_norm": 4.412766673186702,
"learning_rate": 3.699040989373126e-07,
"loss": 2.1561,
"step": 1000
},
{
"epoch": 0.04073055810121544,
"grad_norm": 4.628401400135504,
"learning_rate": 4.069315362683749e-07,
"loss": 2.1544,
"step": 1100
},
{
"epoch": 0.04443333611041684,
"grad_norm": 3.7081302230204867,
"learning_rate": 4.439589735994372e-07,
"loss": 2.106,
"step": 1200
},
{
"epoch": 0.04813611411961825,
"grad_norm": 4.2137908215712905,
"learning_rate": 4.809864109304995e-07,
"loss": 2.0794,
"step": 1300
},
{
"epoch": 0.05183889212881965,
"grad_norm": 4.60851346975717,
"learning_rate": 5.180138482615619e-07,
"loss": 2.0641,
"step": 1400
},
{
"epoch": 0.05554167013802105,
"grad_norm": 4.313023013354129,
"learning_rate": 5.550412855926242e-07,
"loss": 1.9983,
"step": 1500
},
{
"epoch": 0.059244448147222455,
"grad_norm": 3.966177577383383,
"learning_rate": 5.920687229236865e-07,
"loss": 2.0366,
"step": 1600
},
{
"epoch": 0.06294722615642385,
"grad_norm": 4.661276659671688,
"learning_rate": 6.290961602547487e-07,
"loss": 2.0112,
"step": 1700
},
{
"epoch": 0.06665000416562526,
"grad_norm": 3.856161388179088,
"learning_rate": 6.661235975858112e-07,
"loss": 1.9547,
"step": 1800
},
{
"epoch": 0.07035278217482667,
"grad_norm": 4.5116015322440015,
"learning_rate": 7.031510349168734e-07,
"loss": 1.9599,
"step": 1900
},
{
"epoch": 0.07405556018402806,
"grad_norm": 3.9605628970368416,
"learning_rate": 7.401784722479357e-07,
"loss": 1.9673,
"step": 2000
},
{
"epoch": 0.07775833819322947,
"grad_norm": 4.534104810402031,
"learning_rate": 7.772059095789982e-07,
"loss": 1.8987,
"step": 2100
},
{
"epoch": 0.08146111620243088,
"grad_norm": 3.7768720377686815,
"learning_rate": 8.142333469100604e-07,
"loss": 1.9039,
"step": 2200
},
{
"epoch": 0.08516389421163227,
"grad_norm": 3.709870476374346,
"learning_rate": 8.512607842411227e-07,
"loss": 1.8572,
"step": 2300
},
{
"epoch": 0.08886667222083368,
"grad_norm": 3.861469813158571,
"learning_rate": 8.882882215721851e-07,
"loss": 1.831,
"step": 2400
},
{
"epoch": 0.09256945023003509,
"grad_norm": 4.046784580473015,
"learning_rate": 9.253156589032473e-07,
"loss": 1.856,
"step": 2500
},
{
"epoch": 0.0962722282392365,
"grad_norm": 4.439862554726597,
"learning_rate": 9.623430962343098e-07,
"loss": 1.8344,
"step": 2600
},
{
"epoch": 0.09997500624843789,
"grad_norm": 6.222158633649652,
"learning_rate": 9.993705335653721e-07,
"loss": 1.8408,
"step": 2700
},
{
"epoch": 0.1036777842576393,
"grad_norm": 3.7129912385070503,
"learning_rate": 1.0363979708964342e-06,
"loss": 1.8195,
"step": 2800
},
{
"epoch": 0.1073805622668407,
"grad_norm": 4.073889809938404,
"learning_rate": 1.0734254082274968e-06,
"loss": 1.7714,
"step": 2900
},
{
"epoch": 0.1110833402760421,
"grad_norm": 3.8485467079112805,
"learning_rate": 1.1104528455585589e-06,
"loss": 1.7782,
"step": 3000
},
{
"epoch": 0.1147861182852435,
"grad_norm": 3.548076088775858,
"learning_rate": 1.1474802828896212e-06,
"loss": 1.7646,
"step": 3100
},
{
"epoch": 0.11848889629444491,
"grad_norm": 3.754340846452376,
"learning_rate": 1.1845077202206837e-06,
"loss": 1.7733,
"step": 3200
},
{
"epoch": 0.12219167430364632,
"grad_norm": 6.281470572024916,
"learning_rate": 1.2215351575517459e-06,
"loss": 1.7361,
"step": 3300
},
{
"epoch": 0.1258944523128477,
"grad_norm": 4.065489405313006,
"learning_rate": 1.2585625948828082e-06,
"loss": 1.7397,
"step": 3400
},
{
"epoch": 0.12959723032204912,
"grad_norm": 4.260390115086277,
"learning_rate": 1.2955900322138707e-06,
"loss": 1.7571,
"step": 3500
},
{
"epoch": 0.13330000833125052,
"grad_norm": 5.9838104492225215,
"learning_rate": 1.3326174695449328e-06,
"loss": 1.7205,
"step": 3600
},
{
"epoch": 0.13700278634045193,
"grad_norm": 4.7607112587769915,
"learning_rate": 1.3696449068759952e-06,
"loss": 1.713,
"step": 3700
},
{
"epoch": 0.14070556434965334,
"grad_norm": 4.047013434162942,
"learning_rate": 1.4066723442070577e-06,
"loss": 1.7084,
"step": 3800
},
{
"epoch": 0.14440834235885472,
"grad_norm": 6.154993338003737,
"learning_rate": 1.4436997815381198e-06,
"loss": 1.6784,
"step": 3900
},
{
"epoch": 0.14811112036805613,
"grad_norm": 3.6172501531655614,
"learning_rate": 1.4807272188691821e-06,
"loss": 1.6514,
"step": 4000
},
{
"epoch": 0.15181389837725753,
"grad_norm": 3.9661920321991757,
"learning_rate": 1.5177546562002447e-06,
"loss": 1.6642,
"step": 4100
},
{
"epoch": 0.15551667638645894,
"grad_norm": 4.468519494026533,
"learning_rate": 1.5547820935313068e-06,
"loss": 1.6517,
"step": 4200
},
{
"epoch": 0.15921945439566035,
"grad_norm": 4.809090349712038,
"learning_rate": 1.5918095308623691e-06,
"loss": 1.6456,
"step": 4300
},
{
"epoch": 0.16292223240486176,
"grad_norm": 3.991862639554731,
"learning_rate": 1.6288369681934317e-06,
"loss": 1.6474,
"step": 4400
},
{
"epoch": 0.16662501041406316,
"grad_norm": 4.719090103521477,
"learning_rate": 1.6658644055244938e-06,
"loss": 1.6042,
"step": 4500
},
{
"epoch": 0.17032778842326454,
"grad_norm": 4.927706214636146,
"learning_rate": 1.702891842855556e-06,
"loss": 1.5918,
"step": 4600
},
{
"epoch": 0.17403056643246595,
"grad_norm": 4.566524993389404,
"learning_rate": 1.7399192801866186e-06,
"loss": 1.6103,
"step": 4700
},
{
"epoch": 0.17773334444166736,
"grad_norm": 4.908438965819459,
"learning_rate": 1.7769467175176807e-06,
"loss": 1.6101,
"step": 4800
},
{
"epoch": 0.18143612245086876,
"grad_norm": 3.8011540864311866,
"learning_rate": 1.813974154848743e-06,
"loss": 1.6162,
"step": 4900
},
{
"epoch": 0.18513890046007017,
"grad_norm": 4.241301056669864,
"learning_rate": 1.8510015921798056e-06,
"loss": 1.6117,
"step": 5000
},
{
"epoch": 0.18884167846927158,
"grad_norm": 4.305210424709023,
"learning_rate": 1.8880290295108677e-06,
"loss": 1.5634,
"step": 5100
},
{
"epoch": 0.192544456478473,
"grad_norm": 5.188453275523176,
"learning_rate": 1.92505646684193e-06,
"loss": 1.5839,
"step": 5200
},
{
"epoch": 0.19624723448767437,
"grad_norm": 4.390695329489216,
"learning_rate": 1.9620839041729924e-06,
"loss": 1.5747,
"step": 5300
},
{
"epoch": 0.19995001249687577,
"grad_norm": 4.581970539434446,
"learning_rate": 1.999111341504055e-06,
"loss": 1.5597,
"step": 5400
},
{
"epoch": 0.20365279050607718,
"grad_norm": 4.0470801193804755,
"learning_rate": 2.036138778835117e-06,
"loss": 1.5275,
"step": 5500
},
{
"epoch": 0.2073555685152786,
"grad_norm": 4.1287928029941385,
"learning_rate": 2.073166216166179e-06,
"loss": 1.5455,
"step": 5600
},
{
"epoch": 0.21105834652448,
"grad_norm": 5.5739542390705274,
"learning_rate": 2.1101936534972417e-06,
"loss": 1.526,
"step": 5700
},
{
"epoch": 0.2147611245336814,
"grad_norm": 4.072688346808655,
"learning_rate": 2.147221090828304e-06,
"loss": 1.5006,
"step": 5800
},
{
"epoch": 0.2184639025428828,
"grad_norm": 5.331904305941977,
"learning_rate": 2.1842485281593663e-06,
"loss": 1.5417,
"step": 5900
},
{
"epoch": 0.2221666805520842,
"grad_norm": 4.378202415797325,
"learning_rate": 2.221275965490429e-06,
"loss": 1.4885,
"step": 6000
},
{
"epoch": 0.2258694585612856,
"grad_norm": 4.318453195850846,
"learning_rate": 2.258303402821491e-06,
"loss": 1.5359,
"step": 6100
},
{
"epoch": 0.229572236570487,
"grad_norm": 4.475047536813248,
"learning_rate": 2.295330840152553e-06,
"loss": 1.5168,
"step": 6200
},
{
"epoch": 0.2332750145796884,
"grad_norm": 4.665314820774367,
"learning_rate": 2.3323582774836156e-06,
"loss": 1.4688,
"step": 6300
},
{
"epoch": 0.23697779258888982,
"grad_norm": 5.210970519844203,
"learning_rate": 2.3693857148146778e-06,
"loss": 1.4775,
"step": 6400
},
{
"epoch": 0.24068057059809123,
"grad_norm": 4.410640014744449,
"learning_rate": 2.4064131521457403e-06,
"loss": 1.4738,
"step": 6500
},
{
"epoch": 0.24438334860729263,
"grad_norm": 3.944391436658137,
"learning_rate": 2.443440589476803e-06,
"loss": 1.4469,
"step": 6600
},
{
"epoch": 0.248086126616494,
"grad_norm": 4.96868756013431,
"learning_rate": 2.480468026807865e-06,
"loss": 1.4821,
"step": 6700
},
{
"epoch": 0.2517889046256954,
"grad_norm": 4.462942085694235,
"learning_rate": 2.517495464138927e-06,
"loss": 1.4591,
"step": 6800
},
{
"epoch": 0.25549168263489686,
"grad_norm": 4.378031684132469,
"learning_rate": 2.5545229014699896e-06,
"loss": 1.4578,
"step": 6900
},
{
"epoch": 0.25919446064409823,
"grad_norm": 4.241655047493595,
"learning_rate": 2.5915503388010517e-06,
"loss": 1.4705,
"step": 7000
},
{
"epoch": 0.2628972386532996,
"grad_norm": 3.981686552106327,
"learning_rate": 2.6285777761321142e-06,
"loss": 1.4538,
"step": 7100
},
{
"epoch": 0.26660001666250105,
"grad_norm": 4.3607343276496575,
"learning_rate": 2.6656052134631768e-06,
"loss": 1.411,
"step": 7200
},
{
"epoch": 0.27030279467170243,
"grad_norm": 4.807349625008967,
"learning_rate": 2.7026326507942385e-06,
"loss": 1.4205,
"step": 7300
},
{
"epoch": 0.27400557268090386,
"grad_norm": 4.289974925492848,
"learning_rate": 2.739660088125301e-06,
"loss": 1.4374,
"step": 7400
},
{
"epoch": 0.27770835069010524,
"grad_norm": 3.491997941988729,
"learning_rate": 2.7766875254563636e-06,
"loss": 1.4393,
"step": 7500
},
{
"epoch": 0.2814111286993067,
"grad_norm": 4.317473381602871,
"learning_rate": 2.8137149627874257e-06,
"loss": 1.3965,
"step": 7600
},
{
"epoch": 0.28511390670850806,
"grad_norm": 4.317169353881641,
"learning_rate": 2.850742400118488e-06,
"loss": 1.4139,
"step": 7700
},
{
"epoch": 0.28881668471770944,
"grad_norm": 4.342306783527047,
"learning_rate": 2.8877698374495503e-06,
"loss": 1.3966,
"step": 7800
},
{
"epoch": 0.2925194627269109,
"grad_norm": 4.561613447481574,
"learning_rate": 2.9247972747806124e-06,
"loss": 1.4207,
"step": 7900
},
{
"epoch": 0.29622224073611225,
"grad_norm": 4.603226423674348,
"learning_rate": 2.961824712111675e-06,
"loss": 1.4369,
"step": 8000
},
{
"epoch": 0.2999250187453137,
"grad_norm": 4.628660307539308,
"learning_rate": 2.9988521494427375e-06,
"loss": 1.3871,
"step": 8100
},
{
"epoch": 0.30362779675451507,
"grad_norm": 3.8865443379767415,
"learning_rate": 3.0358795867737996e-06,
"loss": 1.4182,
"step": 8200
},
{
"epoch": 0.3073305747637165,
"grad_norm": 4.038269978955141,
"learning_rate": 3.072907024104862e-06,
"loss": 1.3622,
"step": 8300
},
{
"epoch": 0.3110333527729179,
"grad_norm": 5.136502242831012,
"learning_rate": 3.1099344614359243e-06,
"loss": 1.3889,
"step": 8400
},
{
"epoch": 0.31473613078211926,
"grad_norm": 4.457630892251384,
"learning_rate": 3.1469618987669864e-06,
"loss": 1.387,
"step": 8500
},
{
"epoch": 0.3184389087913207,
"grad_norm": 4.651901883399858,
"learning_rate": 3.183989336098049e-06,
"loss": 1.3741,
"step": 8600
},
{
"epoch": 0.3221416868005221,
"grad_norm": 4.800399697463299,
"learning_rate": 3.2210167734291115e-06,
"loss": 1.3473,
"step": 8700
},
{
"epoch": 0.3258444648097235,
"grad_norm": 5.122987743156367,
"learning_rate": 3.2580442107601736e-06,
"loss": 1.3733,
"step": 8800
},
{
"epoch": 0.3295472428189249,
"grad_norm": 5.307686552433291,
"learning_rate": 3.295071648091236e-06,
"loss": 1.341,
"step": 8900
},
{
"epoch": 0.3332500208281263,
"grad_norm": 4.4204932532106085,
"learning_rate": 3.3320990854222982e-06,
"loss": 1.3615,
"step": 9000
},
{
"epoch": 0.3369527988373277,
"grad_norm": 4.937899325267276,
"learning_rate": 3.3691265227533603e-06,
"loss": 1.368,
"step": 9100
},
{
"epoch": 0.3406555768465291,
"grad_norm": 3.6435580501639326,
"learning_rate": 3.406153960084423e-06,
"loss": 1.341,
"step": 9200
},
{
"epoch": 0.3443583548557305,
"grad_norm": 3.884956476912519,
"learning_rate": 3.4431813974154854e-06,
"loss": 1.3458,
"step": 9300
},
{
"epoch": 0.3480611328649319,
"grad_norm": 4.291808958034935,
"learning_rate": 3.4802088347465475e-06,
"loss": 1.301,
"step": 9400
},
{
"epoch": 0.35176391087413333,
"grad_norm": 4.55718057133725,
"learning_rate": 3.51723627207761e-06,
"loss": 1.3203,
"step": 9500
},
{
"epoch": 0.3554666888833347,
"grad_norm": 3.927788263058885,
"learning_rate": 3.554263709408672e-06,
"loss": 1.3087,
"step": 9600
},
{
"epoch": 0.35916946689253615,
"grad_norm": 4.334142071876793,
"learning_rate": 3.5912911467397343e-06,
"loss": 1.328,
"step": 9700
},
{
"epoch": 0.36287224490173753,
"grad_norm": 3.9826425837051715,
"learning_rate": 3.628318584070797e-06,
"loss": 1.3162,
"step": 9800
},
{
"epoch": 0.3665750229109389,
"grad_norm": 3.7373318962103834,
"learning_rate": 3.665346021401859e-06,
"loss": 1.3427,
"step": 9900
},
{
"epoch": 0.37027780092014034,
"grad_norm": 4.309257631492866,
"learning_rate": 3.7023734587329215e-06,
"loss": 1.2918,
"step": 10000
},
{
"epoch": 0.3739805789293417,
"grad_norm": 4.000949746879435,
"learning_rate": 3.739400896063984e-06,
"loss": 1.2864,
"step": 10100
},
{
"epoch": 0.37768335693854316,
"grad_norm": 4.314641431918059,
"learning_rate": 3.7764283333950457e-06,
"loss": 1.2784,
"step": 10200
},
{
"epoch": 0.38138613494774454,
"grad_norm": 4.136976500403523,
"learning_rate": 3.8134557707261083e-06,
"loss": 1.2821,
"step": 10300
},
{
"epoch": 0.385088912956946,
"grad_norm": 4.8607889216449385,
"learning_rate": 3.85048320805717e-06,
"loss": 1.281,
"step": 10400
},
{
"epoch": 0.38879169096614735,
"grad_norm": 4.432173453928133,
"learning_rate": 3.8875106453882325e-06,
"loss": 1.2959,
"step": 10500
},
{
"epoch": 0.39249446897534873,
"grad_norm": 5.118601631131207,
"learning_rate": 3.9245380827192954e-06,
"loss": 1.2772,
"step": 10600
},
{
"epoch": 0.39619724698455017,
"grad_norm": 4.189210920096741,
"learning_rate": 3.9615655200503576e-06,
"loss": 1.311,
"step": 10700
},
{
"epoch": 0.39990002499375155,
"grad_norm": 4.9933785952086875,
"learning_rate": 3.99859295738142e-06,
"loss": 1.2798,
"step": 10800
},
{
"epoch": 0.403602803002953,
"grad_norm": 3.8535620848124785,
"learning_rate": 4.035620394712483e-06,
"loss": 1.2532,
"step": 10900
},
{
"epoch": 0.40730558101215436,
"grad_norm": 4.040521438500001,
"learning_rate": 4.072647832043545e-06,
"loss": 1.2739,
"step": 11000
},
{
"epoch": 0.4110083590213558,
"grad_norm": 4.280568957640515,
"learning_rate": 4.109675269374607e-06,
"loss": 1.2755,
"step": 11100
},
{
"epoch": 0.4147111370305572,
"grad_norm": 4.230099582418301,
"learning_rate": 4.146702706705669e-06,
"loss": 1.2676,
"step": 11200
},
{
"epoch": 0.41841391503975855,
"grad_norm": 3.807801983954757,
"learning_rate": 4.183730144036732e-06,
"loss": 1.2487,
"step": 11300
},
{
"epoch": 0.42211669304896,
"grad_norm": 4.2784766116809365,
"learning_rate": 4.220757581367794e-06,
"loss": 1.2545,
"step": 11400
},
{
"epoch": 0.42581947105816137,
"grad_norm": 4.855973605257093,
"learning_rate": 4.257785018698856e-06,
"loss": 1.2436,
"step": 11500
},
{
"epoch": 0.4295222490673628,
"grad_norm": 3.5115767446990995,
"learning_rate": 4.294812456029918e-06,
"loss": 1.2505,
"step": 11600
},
{
"epoch": 0.4332250270765642,
"grad_norm": 3.7062317227814408,
"learning_rate": 4.33183989336098e-06,
"loss": 1.2175,
"step": 11700
},
{
"epoch": 0.4369278050857656,
"grad_norm": 3.9387496036649936,
"learning_rate": 4.368867330692043e-06,
"loss": 1.222,
"step": 11800
},
{
"epoch": 0.440630583094967,
"grad_norm": 5.23595658030001,
"learning_rate": 4.4058947680231055e-06,
"loss": 1.2388,
"step": 11900
},
{
"epoch": 0.4443333611041684,
"grad_norm": 4.759096268753192,
"learning_rate": 4.442922205354168e-06,
"loss": 1.2036,
"step": 12000
},
{
"epoch": 0.4480361391133698,
"grad_norm": 4.094075469283295,
"learning_rate": 4.4799496426852306e-06,
"loss": 1.2543,
"step": 12100
},
{
"epoch": 0.4517389171225712,
"grad_norm": 4.153721175621665,
"learning_rate": 4.516977080016293e-06,
"loss": 1.227,
"step": 12200
},
{
"epoch": 0.45544169513177263,
"grad_norm": 4.508419619958273,
"learning_rate": 4.554004517347355e-06,
"loss": 1.188,
"step": 12300
},
{
"epoch": 0.459144473140974,
"grad_norm": 5.431232416017837,
"learning_rate": 4.591031954678417e-06,
"loss": 1.2254,
"step": 12400
},
{
"epoch": 0.46284725115017544,
"grad_norm": 4.195672755415828,
"learning_rate": 4.62805939200948e-06,
"loss": 1.2161,
"step": 12500
},
{
"epoch": 0.4665500291593768,
"grad_norm": 4.928316304383083,
"learning_rate": 4.665086829340542e-06,
"loss": 1.2117,
"step": 12600
},
{
"epoch": 0.4702528071685782,
"grad_norm": 3.591246874782549,
"learning_rate": 4.702114266671604e-06,
"loss": 1.2114,
"step": 12700
},
{
"epoch": 0.47395558517777964,
"grad_norm": 3.4385074546101473,
"learning_rate": 4.739141704002666e-06,
"loss": 1.1622,
"step": 12800
},
{
"epoch": 0.477658363186981,
"grad_norm": 4.266312659389707,
"learning_rate": 4.776169141333728e-06,
"loss": 1.1994,
"step": 12900
},
{
"epoch": 0.48136114119618245,
"grad_norm": 4.318134728161222,
"learning_rate": 4.813196578664791e-06,
"loss": 1.194,
"step": 13000
},
{
"epoch": 0.48506391920538383,
"grad_norm": 3.743433627557919,
"learning_rate": 4.850224015995853e-06,
"loss": 1.1862,
"step": 13100
},
{
"epoch": 0.48876669721458527,
"grad_norm": 4.432513953759492,
"learning_rate": 4.8872514533269155e-06,
"loss": 1.2034,
"step": 13200
},
{
"epoch": 0.49246947522378665,
"grad_norm": 4.932972639382486,
"learning_rate": 4.924278890657978e-06,
"loss": 1.1829,
"step": 13300
},
{
"epoch": 0.496172253232988,
"grad_norm": 4.552124318708114,
"learning_rate": 4.961306327989041e-06,
"loss": 1.1877,
"step": 13400
},
{
"epoch": 0.49987503124218946,
"grad_norm": 4.57039085993272,
"learning_rate": 4.998333765320103e-06,
"loss": 1.1804,
"step": 13500
},
{
"epoch": 0.5035778092513908,
"grad_norm": 4.435102306960172,
"learning_rate": 5.035361202651165e-06,
"loss": 1.1673,
"step": 13600
},
{
"epoch": 0.5072805872605922,
"grad_norm": 4.186670025498094,
"learning_rate": 5.072388639982228e-06,
"loss": 1.1933,
"step": 13700
},
{
"epoch": 0.5109833652697937,
"grad_norm": 4.405934801625066,
"learning_rate": 5.10941607731329e-06,
"loss": 1.1772,
"step": 13800
},
{
"epoch": 0.5146861432789951,
"grad_norm": 4.311924669929619,
"learning_rate": 5.146443514644351e-06,
"loss": 1.1665,
"step": 13900
},
{
"epoch": 0.5183889212881965,
"grad_norm": 5.625220459288197,
"learning_rate": 5.183470951975414e-06,
"loss": 1.1735,
"step": 14000
},
{
"epoch": 0.5220916992973978,
"grad_norm": 3.8405703022228606,
"learning_rate": 5.220498389306476e-06,
"loss": 1.1904,
"step": 14100
},
{
"epoch": 0.5257944773065992,
"grad_norm": 4.059806709247945,
"learning_rate": 5.257525826637538e-06,
"loss": 1.1588,
"step": 14200
},
{
"epoch": 0.5294972553158007,
"grad_norm": 3.8722695281592485,
"learning_rate": 5.294553263968601e-06,
"loss": 1.1469,
"step": 14300
},
{
"epoch": 0.5332000333250021,
"grad_norm": 4.66275202781423,
"learning_rate": 5.331580701299663e-06,
"loss": 1.1442,
"step": 14400
},
{
"epoch": 0.5369028113342035,
"grad_norm": 4.6182096996448845,
"learning_rate": 5.3686081386307255e-06,
"loss": 1.1543,
"step": 14500
},
{
"epoch": 0.5406055893434049,
"grad_norm": 3.9837337174635103,
"learning_rate": 5.4056355759617885e-06,
"loss": 1.158,
"step": 14600
},
{
"epoch": 0.5443083673526063,
"grad_norm": 3.8034049725872356,
"learning_rate": 5.442663013292851e-06,
"loss": 1.1546,
"step": 14700
},
{
"epoch": 0.5480111453618077,
"grad_norm": 4.62083026318626,
"learning_rate": 5.479690450623913e-06,
"loss": 1.1553,
"step": 14800
},
{
"epoch": 0.5517139233710091,
"grad_norm": 4.462803142186011,
"learning_rate": 5.516717887954976e-06,
"loss": 1.1506,
"step": 14900
},
{
"epoch": 0.5554167013802105,
"grad_norm": 4.165198766777481,
"learning_rate": 5.553745325286038e-06,
"loss": 1.147,
"step": 15000
},
{
"epoch": 0.5591194793894119,
"grad_norm": 3.5365327477483297,
"learning_rate": 5.590772762617099e-06,
"loss": 1.1341,
"step": 15100
},
{
"epoch": 0.5628222573986134,
"grad_norm": 3.8324766687461653,
"learning_rate": 5.627800199948162e-06,
"loss": 1.1123,
"step": 15200
},
{
"epoch": 0.5665250354078147,
"grad_norm": 4.271061462203587,
"learning_rate": 5.664827637279224e-06,
"loss": 1.1212,
"step": 15300
},
{
"epoch": 0.5702278134170161,
"grad_norm": 4.5272229115194875,
"learning_rate": 5.701855074610286e-06,
"loss": 1.1438,
"step": 15400
},
{
"epoch": 0.5739305914262175,
"grad_norm": 4.660071317336288,
"learning_rate": 5.738882511941349e-06,
"loss": 1.1174,
"step": 15500
},
{
"epoch": 0.5776333694354189,
"grad_norm": 3.8174601055554094,
"learning_rate": 5.775909949272411e-06,
"loss": 1.095,
"step": 15600
},
{
"epoch": 0.5813361474446204,
"grad_norm": 3.9772037532519784,
"learning_rate": 5.8129373866034734e-06,
"loss": 1.0901,
"step": 15700
},
{
"epoch": 0.5850389254538217,
"grad_norm": 4.117368390246338,
"learning_rate": 5.849964823934536e-06,
"loss": 1.1218,
"step": 15800
},
{
"epoch": 0.5887417034630231,
"grad_norm": 3.6969429985121387,
"learning_rate": 5.8869922612655985e-06,
"loss": 1.1172,
"step": 15900
},
{
"epoch": 0.5924444814722245,
"grad_norm": 4.208647101328797,
"learning_rate": 5.924019698596661e-06,
"loss": 1.1005,
"step": 16000
},
{
"epoch": 0.596147259481426,
"grad_norm": 4.0291894246178455,
"learning_rate": 5.961047135927724e-06,
"loss": 1.1129,
"step": 16100
},
{
"epoch": 0.5998500374906274,
"grad_norm": 4.713616701400172,
"learning_rate": 5.998074573258785e-06,
"loss": 1.1002,
"step": 16200
},
{
"epoch": 0.6035528154998288,
"grad_norm": 4.1930372418205355,
"learning_rate": 6.035102010589847e-06,
"loss": 1.0886,
"step": 16300
},
{
"epoch": 0.6072555935090301,
"grad_norm": 4.254442138954682,
"learning_rate": 6.07212944792091e-06,
"loss": 1.0954,
"step": 16400
},
{
"epoch": 0.6109583715182315,
"grad_norm": 3.7190710738003014,
"learning_rate": 6.109156885251972e-06,
"loss": 1.1288,
"step": 16500
},
{
"epoch": 0.614661149527433,
"grad_norm": 5.286326819627371,
"learning_rate": 6.146184322583034e-06,
"loss": 1.0861,
"step": 16600
},
{
"epoch": 0.6183639275366344,
"grad_norm": 3.5036265705416,
"learning_rate": 6.183211759914097e-06,
"loss": 1.1086,
"step": 16700
},
{
"epoch": 0.6220667055458358,
"grad_norm": 3.5111130437153495,
"learning_rate": 6.220239197245159e-06,
"loss": 1.1022,
"step": 16800
},
{
"epoch": 0.6257694835550371,
"grad_norm": 3.2896238161080946,
"learning_rate": 6.257266634576221e-06,
"loss": 1.0955,
"step": 16900
},
{
"epoch": 0.6294722615642385,
"grad_norm": 3.6540403667854604,
"learning_rate": 6.294294071907284e-06,
"loss": 1.058,
"step": 17000
},
{
"epoch": 0.63317503957344,
"grad_norm": 4.4670975803638475,
"learning_rate": 6.3313215092383464e-06,
"loss": 1.068,
"step": 17100
},
{
"epoch": 0.6368778175826414,
"grad_norm": 4.530195581158383,
"learning_rate": 6.3683489465694085e-06,
"loss": 1.1105,
"step": 17200
},
{
"epoch": 0.6405805955918428,
"grad_norm": 3.8288830335210995,
"learning_rate": 6.4053763839004715e-06,
"loss": 1.0332,
"step": 17300
},
{
"epoch": 0.6442833736010442,
"grad_norm": 3.815833342915358,
"learning_rate": 6.442403821231533e-06,
"loss": 1.0794,
"step": 17400
},
{
"epoch": 0.6479861516102456,
"grad_norm": 4.604021991413655,
"learning_rate": 6.479431258562595e-06,
"loss": 1.079,
"step": 17500
},
{
"epoch": 0.651688929619447,
"grad_norm": 3.577028223699294,
"learning_rate": 6.516458695893658e-06,
"loss": 1.0653,
"step": 17600
},
{
"epoch": 0.6553917076286484,
"grad_norm": 4.111818385244818,
"learning_rate": 6.55348613322472e-06,
"loss": 1.035,
"step": 17700
},
{
"epoch": 0.6590944856378498,
"grad_norm": 3.56811764186164,
"learning_rate": 6.590513570555782e-06,
"loss": 1.0452,
"step": 17800
},
{
"epoch": 0.6627972636470512,
"grad_norm": 3.87646009188637,
"learning_rate": 6.627541007886845e-06,
"loss": 1.0493,
"step": 17900
},
{
"epoch": 0.6665000416562527,
"grad_norm": 3.406407594729227,
"learning_rate": 6.664568445217907e-06,
"loss": 1.0651,
"step": 18000
},
{
"epoch": 0.670202819665454,
"grad_norm": 2.866315730154109,
"learning_rate": 6.701595882548969e-06,
"loss": 1.0561,
"step": 18100
},
{
"epoch": 0.6739055976746554,
"grad_norm": 4.376427115975032,
"learning_rate": 6.738623319880032e-06,
"loss": 1.0602,
"step": 18200
},
{
"epoch": 0.6776083756838568,
"grad_norm": 3.2373014543024072,
"learning_rate": 6.775650757211094e-06,
"loss": 1.0397,
"step": 18300
},
{
"epoch": 0.6813111536930582,
"grad_norm": 3.9666084272474245,
"learning_rate": 6.8126781945421565e-06,
"loss": 1.0445,
"step": 18400
},
{
"epoch": 0.6850139317022597,
"grad_norm": 3.800460648153449,
"learning_rate": 6.8497056318732194e-06,
"loss": 1.0494,
"step": 18500
},
{
"epoch": 0.688716709711461,
"grad_norm": 4.274139967670289,
"learning_rate": 6.886733069204281e-06,
"loss": 1.0756,
"step": 18600
},
{
"epoch": 0.6924194877206624,
"grad_norm": 3.9994897603573665,
"learning_rate": 6.923760506535343e-06,
"loss": 1.0412,
"step": 18700
},
{
"epoch": 0.6961222657298638,
"grad_norm": 3.8633706482810553,
"learning_rate": 6.960787943866406e-06,
"loss": 1.0522,
"step": 18800
},
{
"epoch": 0.6998250437390653,
"grad_norm": 4.381038145148409,
"learning_rate": 6.997815381197468e-06,
"loss": 1.0209,
"step": 18900
},
{
"epoch": 0.7035278217482667,
"grad_norm": 4.1366815727300175,
"learning_rate": 7.03484281852853e-06,
"loss": 1.0351,
"step": 19000
},
{
"epoch": 0.707230599757468,
"grad_norm": 3.675160954426471,
"learning_rate": 7.071870255859593e-06,
"loss": 1.0291,
"step": 19100
},
{
"epoch": 0.7109333777666694,
"grad_norm": 3.603001585625626,
"learning_rate": 7.108897693190655e-06,
"loss": 1.0093,
"step": 19200
},
{
"epoch": 0.7146361557758708,
"grad_norm": 3.264233985905883,
"learning_rate": 7.145925130521717e-06,
"loss": 1.0394,
"step": 19300
},
{
"epoch": 0.7183389337850723,
"grad_norm": 3.8898393773576614,
"learning_rate": 7.182952567852779e-06,
"loss": 1.0309,
"step": 19400
},
{
"epoch": 0.7220417117942737,
"grad_norm": 3.2365236970202917,
"learning_rate": 7.219980005183842e-06,
"loss": 1.0304,
"step": 19500
},
{
"epoch": 0.7257444898034751,
"grad_norm": 3.497985907223146,
"learning_rate": 7.257007442514904e-06,
"loss": 1.0375,
"step": 19600
},
{
"epoch": 0.7294472678126764,
"grad_norm": 3.6359487776078714,
"learning_rate": 7.294034879845966e-06,
"loss": 1.0132,
"step": 19700
},
{
"epoch": 0.7331500458218778,
"grad_norm": 3.9379821448814343,
"learning_rate": 7.331062317177029e-06,
"loss": 1.019,
"step": 19800
},
{
"epoch": 0.7368528238310793,
"grad_norm": 4.750912299244358,
"learning_rate": 7.368089754508091e-06,
"loss": 1.0172,
"step": 19900
},
{
"epoch": 0.7405556018402807,
"grad_norm": 4.253487251656156,
"learning_rate": 7.405117191839153e-06,
"loss": 1.0093,
"step": 20000
},
{
"epoch": 0.7442583798494821,
"grad_norm": 3.5926201949515284,
"learning_rate": 7.442144629170216e-06,
"loss": 1.0163,
"step": 20100
},
{
"epoch": 0.7479611578586834,
"grad_norm": 3.8927981125403073,
"learning_rate": 7.479172066501278e-06,
"loss": 0.9827,
"step": 20200
},
{
"epoch": 0.7516639358678849,
"grad_norm": 4.573750708649385,
"learning_rate": 7.51619950383234e-06,
"loss": 1.0087,
"step": 20300
},
{
"epoch": 0.7553667138770863,
"grad_norm": 3.1260071677382952,
"learning_rate": 7.553226941163403e-06,
"loss": 1.0275,
"step": 20400
},
{
"epoch": 0.7590694918862877,
"grad_norm": 3.965170800530151,
"learning_rate": 7.590254378494465e-06,
"loss": 1.0132,
"step": 20500
},
{
"epoch": 0.7627722698954891,
"grad_norm": 3.1267324185244876,
"learning_rate": 7.627281815825527e-06,
"loss": 0.9803,
"step": 20600
},
{
"epoch": 0.7664750479046905,
"grad_norm": 3.4086206206945358,
"learning_rate": 7.66430925315659e-06,
"loss": 0.9954,
"step": 20700
},
{
"epoch": 0.770177825913892,
"grad_norm": 4.254989394844253,
"learning_rate": 7.701336690487652e-06,
"loss": 0.9922,
"step": 20800
},
{
"epoch": 0.7738806039230933,
"grad_norm": 3.7295802263455564,
"learning_rate": 7.738364127818714e-06,
"loss": 1.0004,
"step": 20900
},
{
"epoch": 0.7775833819322947,
"grad_norm": 4.671822378080011,
"learning_rate": 7.775391565149777e-06,
"loss": 0.9887,
"step": 21000
},
{
"epoch": 0.7812861599414961,
"grad_norm": 3.941447202712077,
"learning_rate": 7.81241900248084e-06,
"loss": 0.982,
"step": 21100
},
{
"epoch": 0.7849889379506975,
"grad_norm": 3.6530756566069633,
"learning_rate": 7.8494464398119e-06,
"loss": 0.9668,
"step": 21200
},
{
"epoch": 0.788691715959899,
"grad_norm": 3.9080413448774625,
"learning_rate": 7.886473877142964e-06,
"loss": 1.0055,
"step": 21300
},
{
"epoch": 0.7923944939691003,
"grad_norm": 3.603292889224154,
"learning_rate": 7.923501314474025e-06,
"loss": 1.0039,
"step": 21400
},
{
"epoch": 0.7960972719783017,
"grad_norm": 3.677324652882952,
"learning_rate": 7.960528751805088e-06,
"loss": 1.0226,
"step": 21500
},
{
"epoch": 0.7998000499875031,
"grad_norm": 3.6513050899215056,
"learning_rate": 7.997556189136151e-06,
"loss": 0.9766,
"step": 21600
},
{
"epoch": 0.8035028279967046,
"grad_norm": 3.357793359434031,
"learning_rate": 8.034583626467212e-06,
"loss": 0.9813,
"step": 21700
},
{
"epoch": 0.807205606005906,
"grad_norm": 2.760916399152151,
"learning_rate": 8.071611063798275e-06,
"loss": 0.979,
"step": 21800
},
{
"epoch": 0.8109083840151073,
"grad_norm": 3.6887561411257046,
"learning_rate": 8.108638501129338e-06,
"loss": 0.9601,
"step": 21900
},
{
"epoch": 0.8146111620243087,
"grad_norm": 3.652784111767623,
"learning_rate": 8.1456659384604e-06,
"loss": 0.9799,
"step": 22000
},
{
"epoch": 0.8183139400335101,
"grad_norm": 3.479191284518454,
"learning_rate": 8.182693375791462e-06,
"loss": 0.9785,
"step": 22100
},
{
"epoch": 0.8220167180427116,
"grad_norm": 4.128090009925586,
"learning_rate": 8.219720813122525e-06,
"loss": 0.9841,
"step": 22200
},
{
"epoch": 0.825719496051913,
"grad_norm": 3.662863541747893,
"learning_rate": 8.256748250453587e-06,
"loss": 0.947,
"step": 22300
},
{
"epoch": 0.8294222740611144,
"grad_norm": 3.217491905494941,
"learning_rate": 8.29377568778465e-06,
"loss": 0.9757,
"step": 22400
},
{
"epoch": 0.8331250520703157,
"grad_norm": 3.326399094958347,
"learning_rate": 8.330803125115712e-06,
"loss": 0.9598,
"step": 22500
},
{
"epoch": 0.8368278300795171,
"grad_norm": 2.909529562627351,
"learning_rate": 8.367830562446774e-06,
"loss": 0.9587,
"step": 22600
},
{
"epoch": 0.8405306080887186,
"grad_norm": 3.1644941957196346,
"learning_rate": 8.404857999777835e-06,
"loss": 0.9689,
"step": 22700
},
{
"epoch": 0.84423338609792,
"grad_norm": 3.152721404570826,
"learning_rate": 8.441885437108898e-06,
"loss": 0.9822,
"step": 22800
},
{
"epoch": 0.8479361641071214,
"grad_norm": 3.879640971417378,
"learning_rate": 8.478912874439961e-06,
"loss": 0.9379,
"step": 22900
},
{
"epoch": 0.8516389421163227,
"grad_norm": 4.432110940230918,
"learning_rate": 8.515940311771022e-06,
"loss": 0.9544,
"step": 23000
},
{
"epoch": 0.8553417201255242,
"grad_norm": 3.2420578574260697,
"learning_rate": 8.552967749102085e-06,
"loss": 0.9292,
"step": 23100
},
{
"epoch": 0.8590444981347256,
"grad_norm": 4.617591314029601,
"learning_rate": 8.589995186433148e-06,
"loss": 0.9477,
"step": 23200
},
{
"epoch": 0.862747276143927,
"grad_norm": 3.0259937163445194,
"learning_rate": 8.62702262376421e-06,
"loss": 0.9559,
"step": 23300
},
{
"epoch": 0.8664500541531284,
"grad_norm": 3.278192583185341,
"learning_rate": 8.664050061095272e-06,
"loss": 0.9583,
"step": 23400
},
{
"epoch": 0.8701528321623297,
"grad_norm": 3.6509553777490424,
"learning_rate": 8.701077498426335e-06,
"loss": 0.9749,
"step": 23500
},
{
"epoch": 0.8738556101715312,
"grad_norm": 3.4075169195241677,
"learning_rate": 8.738104935757397e-06,
"loss": 0.9223,
"step": 23600
},
{
"epoch": 0.8775583881807326,
"grad_norm": 2.893435896152694,
"learning_rate": 8.77513237308846e-06,
"loss": 0.9721,
"step": 23700
},
{
"epoch": 0.881261166189934,
"grad_norm": 4.064038243050667,
"learning_rate": 8.81215981041952e-06,
"loss": 0.9648,
"step": 23800
},
{
"epoch": 0.8849639441991354,
"grad_norm": 3.28397127001984,
"learning_rate": 8.849187247750584e-06,
"loss": 0.9413,
"step": 23900
},
{
"epoch": 0.8886667222083368,
"grad_norm": 2.7396572790329183,
"learning_rate": 8.886214685081647e-06,
"loss": 0.9538,
"step": 24000
},
{
"epoch": 0.8923695002175382,
"grad_norm": 3.831935799094677,
"learning_rate": 8.923242122412708e-06,
"loss": 0.9488,
"step": 24100
},
{
"epoch": 0.8960722782267396,
"grad_norm": 3.083787982483977,
"learning_rate": 8.960269559743771e-06,
"loss": 0.9798,
"step": 24200
},
{
"epoch": 0.899775056235941,
"grad_norm": 3.2638182611068784,
"learning_rate": 8.997296997074834e-06,
"loss": 0.9364,
"step": 24300
},
{
"epoch": 0.9034778342451424,
"grad_norm": 2.8271807284549824,
"learning_rate": 9.034324434405895e-06,
"loss": 0.9592,
"step": 24400
},
{
"epoch": 0.9071806122543439,
"grad_norm": 3.1878575837021295,
"learning_rate": 9.071351871736958e-06,
"loss": 0.944,
"step": 24500
},
{
"epoch": 0.9108833902635453,
"grad_norm": 3.344639834530028,
"learning_rate": 9.108379309068021e-06,
"loss": 0.9305,
"step": 24600
},
{
"epoch": 0.9145861682727466,
"grad_norm": 3.271225972067076,
"learning_rate": 9.145406746399082e-06,
"loss": 0.9334,
"step": 24700
},
{
"epoch": 0.918288946281948,
"grad_norm": 3.7284762668702314,
"learning_rate": 9.182434183730145e-06,
"loss": 0.9195,
"step": 24800
},
{
"epoch": 0.9219917242911494,
"grad_norm": 4.098726803359245,
"learning_rate": 9.219461621061208e-06,
"loss": 0.9398,
"step": 24900
},
{
"epoch": 0.9256945023003509,
"grad_norm": 2.8965683319135795,
"learning_rate": 9.25648905839227e-06,
"loss": 0.9365,
"step": 25000
},
{
"epoch": 0.9293972803095523,
"grad_norm": 3.472751774536713,
"learning_rate": 9.29351649572333e-06,
"loss": 0.942,
"step": 25100
},
{
"epoch": 0.9331000583187536,
"grad_norm": 2.741682625283635,
"learning_rate": 9.330543933054394e-06,
"loss": 0.9463,
"step": 25200
},
{
"epoch": 0.936802836327955,
"grad_norm": 2.9695965336662584,
"learning_rate": 9.367571370385457e-06,
"loss": 0.9355,
"step": 25300
},
{
"epoch": 0.9405056143371564,
"grad_norm": 3.0507053957289814,
"learning_rate": 9.404598807716518e-06,
"loss": 0.9123,
"step": 25400
},
{
"epoch": 0.9442083923463579,
"grad_norm": 3.997694772330171,
"learning_rate": 9.441626245047581e-06,
"loss": 0.9326,
"step": 25500
},
{
"epoch": 0.9479111703555593,
"grad_norm": 3.006394895446821,
"learning_rate": 9.478653682378644e-06,
"loss": 0.9343,
"step": 25600
},
{
"epoch": 0.9516139483647607,
"grad_norm": 3.030113997176904,
"learning_rate": 9.515681119709705e-06,
"loss": 0.9263,
"step": 25700
},
{
"epoch": 0.955316726373962,
"grad_norm": 2.9203358557009187,
"learning_rate": 9.552708557040768e-06,
"loss": 0.9432,
"step": 25800
},
{
"epoch": 0.9590195043831635,
"grad_norm": 3.320595109219397,
"learning_rate": 9.589735994371831e-06,
"loss": 0.9132,
"step": 25900
},
{
"epoch": 0.9627222823923649,
"grad_norm": 3.2953825381075883,
"learning_rate": 9.626763431702892e-06,
"loss": 0.945,
"step": 26000
},
{
"epoch": 0.9664250604015663,
"grad_norm": 2.8690624980388097,
"learning_rate": 9.663790869033955e-06,
"loss": 0.9269,
"step": 26100
},
{
"epoch": 0.9701278384107677,
"grad_norm": 3.238448651703217,
"learning_rate": 9.700818306365017e-06,
"loss": 0.9223,
"step": 26200
},
{
"epoch": 0.973830616419969,
"grad_norm": 3.8191924390794076,
"learning_rate": 9.73784574369608e-06,
"loss": 0.9294,
"step": 26300
},
{
"epoch": 0.9775333944291705,
"grad_norm": 2.89593322378588,
"learning_rate": 9.774873181027143e-06,
"loss": 0.92,
"step": 26400
},
{
"epoch": 0.9812361724383719,
"grad_norm": 3.701573808189793,
"learning_rate": 9.811900618358204e-06,
"loss": 0.9471,
"step": 26500
},
{
"epoch": 0.9849389504475733,
"grad_norm": 3.334857306171596,
"learning_rate": 9.848928055689267e-06,
"loss": 0.9205,
"step": 26600
},
{
"epoch": 0.9886417284567747,
"grad_norm": 3.3187413615000705,
"learning_rate": 9.88595549302033e-06,
"loss": 0.9065,
"step": 26700
},
{
"epoch": 0.992344506465976,
"grad_norm": 3.3451918803521945,
"learning_rate": 9.922982930351391e-06,
"loss": 0.8933,
"step": 26800
},
{
"epoch": 0.9960472844751775,
"grad_norm": 3.2100420698017147,
"learning_rate": 9.960010367682454e-06,
"loss": 0.9112,
"step": 26900
},
{
"epoch": 0.9997500624843789,
"grad_norm": 2.837918388436053,
"learning_rate": 9.997037805013515e-06,
"loss": 0.9567,
"step": 27000
},
{
"epoch": 1.0034435835485573,
"grad_norm": 2.9548645107416087,
"learning_rate": 9.99999646509579e-06,
"loss": 0.8909,
"step": 27100
},
{
"epoch": 1.0071463615577587,
"grad_norm": 3.1446751198623435,
"learning_rate": 9.999984604128388e-06,
"loss": 0.8726,
"step": 27200
},
{
"epoch": 1.01084913956696,
"grad_norm": 2.976258203122292,
"learning_rate": 9.999964390388652e-06,
"loss": 0.893,
"step": 27300
},
{
"epoch": 1.0145519175761615,
"grad_norm": 2.980666927786789,
"learning_rate": 9.999935823910352e-06,
"loss": 0.8665,
"step": 27400
},
{
"epoch": 1.0182546955853629,
"grad_norm": 3.1487567474786795,
"learning_rate": 9.999898904741209e-06,
"loss": 0.8649,
"step": 27500
},
{
"epoch": 1.0219574735945642,
"grad_norm": 2.629105326617924,
"learning_rate": 9.999853632942897e-06,
"loss": 0.8607,
"step": 27600
},
{
"epoch": 1.0256602516037656,
"grad_norm": 3.8572529888050826,
"learning_rate": 9.999800008591049e-06,
"loss": 0.8761,
"step": 27700
},
{
"epoch": 1.0293630296129672,
"grad_norm": 2.767732632774148,
"learning_rate": 9.999738031775246e-06,
"loss": 0.8778,
"step": 27800
},
{
"epoch": 1.0330658076221686,
"grad_norm": 3.4506007181787606,
"learning_rate": 9.99966770259902e-06,
"loss": 0.8842,
"step": 27900
},
{
"epoch": 1.03676858563137,
"grad_norm": 3.3838219329729764,
"learning_rate": 9.999589021179867e-06,
"loss": 0.8653,
"step": 28000
},
{
"epoch": 1.0404713636405714,
"grad_norm": 2.7380768463734673,
"learning_rate": 9.999501987649225e-06,
"loss": 0.8715,
"step": 28100
},
{
"epoch": 1.0441741416497727,
"grad_norm": 2.5724850888468818,
"learning_rate": 9.999406602152487e-06,
"loss": 0.873,
"step": 28200
},
{
"epoch": 1.0478769196589741,
"grad_norm": 2.814547694838622,
"learning_rate": 9.999302864849006e-06,
"loss": 0.8652,
"step": 28300
},
{
"epoch": 1.0515796976681755,
"grad_norm": 2.8671305113874985,
"learning_rate": 9.999190775912075e-06,
"loss": 0.8773,
"step": 28400
},
{
"epoch": 1.0552824756773769,
"grad_norm": 3.4364162307062016,
"learning_rate": 9.999070335528951e-06,
"loss": 0.8722,
"step": 28500
},
{
"epoch": 1.0589852536865783,
"grad_norm": 3.1668831518960747,
"learning_rate": 9.99894154390083e-06,
"loss": 0.878,
"step": 28600
},
{
"epoch": 1.0626880316957799,
"grad_norm": 2.5661208890092215,
"learning_rate": 9.998804401242874e-06,
"loss": 0.8642,
"step": 28700
},
{
"epoch": 1.0663908097049812,
"grad_norm": 2.702695496460383,
"learning_rate": 9.998658907784183e-06,
"loss": 0.8576,
"step": 28800
},
{
"epoch": 1.0700935877141826,
"grad_norm": 2.9176557901328666,
"learning_rate": 9.998505063767811e-06,
"loss": 0.8705,
"step": 28900
},
{
"epoch": 1.073796365723384,
"grad_norm": 3.7212751721205937,
"learning_rate": 9.998342869450767e-06,
"loss": 0.8641,
"step": 29000
},
{
"epoch": 1.0774991437325854,
"grad_norm": 3.1124898105603767,
"learning_rate": 9.998172325104007e-06,
"loss": 0.8679,
"step": 29100
},
{
"epoch": 1.0812019217417868,
"grad_norm": 3.211449706725418,
"learning_rate": 9.997993431012433e-06,
"loss": 0.8698,
"step": 29200
},
{
"epoch": 1.0849046997509881,
"grad_norm": 3.3735417926416105,
"learning_rate": 9.997806187474899e-06,
"loss": 0.8593,
"step": 29300
},
{
"epoch": 1.0886074777601895,
"grad_norm": 3.100346897510759,
"learning_rate": 9.997610594804206e-06,
"loss": 0.8852,
"step": 29400
},
{
"epoch": 1.092310255769391,
"grad_norm": 3.046486604721806,
"learning_rate": 9.997406653327103e-06,
"loss": 0.8734,
"step": 29500
},
{
"epoch": 1.0960130337785925,
"grad_norm": 3.462276437104938,
"learning_rate": 9.99719436338429e-06,
"loss": 0.8333,
"step": 29600
},
{
"epoch": 1.0997158117877939,
"grad_norm": 2.8629449765918564,
"learning_rate": 9.996973725330405e-06,
"loss": 0.8584,
"step": 29700
},
{
"epoch": 1.1034185897969953,
"grad_norm": 2.9165610257153873,
"learning_rate": 9.996744739534042e-06,
"loss": 0.8665,
"step": 29800
},
{
"epoch": 1.1071213678061966,
"grad_norm": 3.1796750168940315,
"learning_rate": 9.996507406377728e-06,
"loss": 0.8787,
"step": 29900
},
{
"epoch": 1.110824145815398,
"grad_norm": 3.160592130889014,
"learning_rate": 9.99626172625795e-06,
"loss": 0.8718,
"step": 30000
},
{
"epoch": 1.1145269238245994,
"grad_norm": 2.779237960939386,
"learning_rate": 9.99600769958513e-06,
"loss": 0.878,
"step": 30100
},
{
"epoch": 1.1182297018338008,
"grad_norm": 3.108400228708763,
"learning_rate": 9.995745326783628e-06,
"loss": 0.868,
"step": 30200
},
{
"epoch": 1.1219324798430022,
"grad_norm": 3.0911030432916817,
"learning_rate": 9.995474608291761e-06,
"loss": 0.8621,
"step": 30300
},
{
"epoch": 1.1256352578522035,
"grad_norm": 2.5787433018725,
"learning_rate": 9.995195544561778e-06,
"loss": 0.8754,
"step": 30400
},
{
"epoch": 1.1293380358614051,
"grad_norm": 2.8342296943136165,
"learning_rate": 9.994908136059868e-06,
"loss": 0.8373,
"step": 30500
},
{
"epoch": 1.1330408138706065,
"grad_norm": 2.476461823350524,
"learning_rate": 9.994612383266171e-06,
"loss": 0.842,
"step": 30600
},
{
"epoch": 1.136743591879808,
"grad_norm": 3.9271884057807296,
"learning_rate": 9.994308286674754e-06,
"loss": 0.8453,
"step": 30700
},
{
"epoch": 1.1404463698890093,
"grad_norm": 3.1786946065406236,
"learning_rate": 9.99399584679363e-06,
"loss": 0.8648,
"step": 30800
},
{
"epoch": 1.1441491478982106,
"grad_norm": 2.634901131802063,
"learning_rate": 9.99367506414475e-06,
"loss": 0.8751,
"step": 30900
},
{
"epoch": 1.147851925907412,
"grad_norm": 3.078376805123231,
"learning_rate": 9.993345939264e-06,
"loss": 0.8635,
"step": 31000
},
{
"epoch": 1.1515547039166134,
"grad_norm": 3.1528853117678786,
"learning_rate": 9.9930084727012e-06,
"loss": 0.843,
"step": 31100
},
{
"epoch": 1.1552574819258148,
"grad_norm": 2.828228109125317,
"learning_rate": 9.992662665020112e-06,
"loss": 0.8624,
"step": 31200
},
{
"epoch": 1.1589602599350162,
"grad_norm": 3.1953124851506707,
"learning_rate": 9.992308516798426e-06,
"loss": 0.8579,
"step": 31300
},
{
"epoch": 1.1626630379442178,
"grad_norm": 3.0902899613744603,
"learning_rate": 9.991946028627768e-06,
"loss": 0.8527,
"step": 31400
},
{
"epoch": 1.1663658159534191,
"grad_norm": 2.9463681925783023,
"learning_rate": 9.991575201113695e-06,
"loss": 0.8268,
"step": 31500
},
{
"epoch": 1.1700685939626205,
"grad_norm": 2.8044002498862057,
"learning_rate": 9.991196034875698e-06,
"loss": 0.8395,
"step": 31600
},
{
"epoch": 1.173771371971822,
"grad_norm": 2.9461135183049936,
"learning_rate": 9.990808530547197e-06,
"loss": 0.858,
"step": 31700
},
{
"epoch": 1.1774741499810233,
"grad_norm": 3.213674861669168,
"learning_rate": 9.990412688775542e-06,
"loss": 0.864,
"step": 31800
},
{
"epoch": 1.1811769279902247,
"grad_norm": 2.71190688635739,
"learning_rate": 9.99000851022201e-06,
"loss": 0.855,
"step": 31900
},
{
"epoch": 1.184879705999426,
"grad_norm": 3.723777231794139,
"learning_rate": 9.9895959955618e-06,
"loss": 0.8456,
"step": 32000
},
{
"epoch": 1.1885824840086274,
"grad_norm": 2.4622343303272918,
"learning_rate": 9.989175145484049e-06,
"loss": 0.8217,
"step": 32100
},
{
"epoch": 1.1922852620178288,
"grad_norm": 2.8875388301298472,
"learning_rate": 9.98874596069181e-06,
"loss": 0.8591,
"step": 32200
},
{
"epoch": 1.1959880400270302,
"grad_norm": 2.5910572126310716,
"learning_rate": 9.988308441902061e-06,
"loss": 0.8453,
"step": 32300
},
{
"epoch": 1.1996908180362316,
"grad_norm": 2.4069698963541755,
"learning_rate": 9.987862589845703e-06,
"loss": 0.8503,
"step": 32400
},
{
"epoch": 1.2033935960454332,
"grad_norm": 2.914526087822122,
"learning_rate": 9.987408405267561e-06,
"loss": 0.8668,
"step": 32500
},
{
"epoch": 1.2070963740546345,
"grad_norm": 2.8077292984671485,
"learning_rate": 9.986945888926374e-06,
"loss": 0.8314,
"step": 32600
},
{
"epoch": 1.210799152063836,
"grad_norm": 3.6704712964311437,
"learning_rate": 9.986475041594805e-06,
"loss": 0.8371,
"step": 32700
},
{
"epoch": 1.2145019300730373,
"grad_norm": 2.6706897230097297,
"learning_rate": 9.985995864059433e-06,
"loss": 0.876,
"step": 32800
},
{
"epoch": 1.2182047080822387,
"grad_norm": 3.0940143448561037,
"learning_rate": 9.98550835712075e-06,
"loss": 0.8364,
"step": 32900
},
{
"epoch": 1.22190748609144,
"grad_norm": 3.6081252765429963,
"learning_rate": 9.98501252159317e-06,
"loss": 0.8378,
"step": 33000
},
{
"epoch": 1.2256102641006414,
"grad_norm": 2.77425534329751,
"learning_rate": 9.984508358305012e-06,
"loss": 0.8449,
"step": 33100
},
{
"epoch": 1.2293130421098428,
"grad_norm": 2.847973382987711,
"learning_rate": 9.98399586809851e-06,
"loss": 0.8446,
"step": 33200
},
{
"epoch": 1.2330158201190442,
"grad_norm": 3.1916476231654984,
"learning_rate": 9.983475051829814e-06,
"loss": 0.8499,
"step": 33300
},
{
"epoch": 1.2367185981282458,
"grad_norm": 3.0278042743633047,
"learning_rate": 9.982945910368974e-06,
"loss": 0.8427,
"step": 33400
},
{
"epoch": 1.2404213761374472,
"grad_norm": 2.9987823840994685,
"learning_rate": 9.982408444599955e-06,
"loss": 0.8565,
"step": 33500
},
{
"epoch": 1.2441241541466486,
"grad_norm": 2.956201689254189,
"learning_rate": 9.981862655420626e-06,
"loss": 0.8303,
"step": 33600
},
{
"epoch": 1.24782693215585,
"grad_norm": 3.306173716101804,
"learning_rate": 9.981308543742759e-06,
"loss": 0.8351,
"step": 33700
},
{
"epoch": 1.2515297101650513,
"grad_norm": 2.8358161347669624,
"learning_rate": 9.98074611049203e-06,
"loss": 0.854,
"step": 33800
},
{
"epoch": 1.2552324881742527,
"grad_norm": 3.2862147001432263,
"learning_rate": 9.980175356608018e-06,
"loss": 0.8176,
"step": 33900
},
{
"epoch": 1.258935266183454,
"grad_norm": 3.839933772493448,
"learning_rate": 9.979596283044202e-06,
"loss": 0.8353,
"step": 34000
},
{
"epoch": 1.2626380441926555,
"grad_norm": 3.039521277363643,
"learning_rate": 9.979008890767958e-06,
"loss": 0.8313,
"step": 34100
},
{
"epoch": 1.2663408222018568,
"grad_norm": 2.670418682490729,
"learning_rate": 9.97841318076056e-06,
"loss": 0.8299,
"step": 34200
},
{
"epoch": 1.2700436002110584,
"grad_norm": 2.6610287960828947,
"learning_rate": 9.977809154017177e-06,
"loss": 0.8255,
"step": 34300
},
{
"epoch": 1.2737463782202598,
"grad_norm": 4.255372007943821,
"learning_rate": 9.977196811546874e-06,
"loss": 0.8178,
"step": 34400
},
{
"epoch": 1.2774491562294612,
"grad_norm": 2.486491204040578,
"learning_rate": 9.976576154372603e-06,
"loss": 0.8131,
"step": 34500
},
{
"epoch": 1.2811519342386626,
"grad_norm": 2.5063224331855967,
"learning_rate": 9.975947183531208e-06,
"loss": 0.8425,
"step": 34600
},
{
"epoch": 1.284854712247864,
"grad_norm": 2.7512179307220226,
"learning_rate": 9.975309900073424e-06,
"loss": 0.8593,
"step": 34700
},
{
"epoch": 1.2885574902570653,
"grad_norm": 2.712022237784725,
"learning_rate": 9.974664305063872e-06,
"loss": 0.8217,
"step": 34800
},
{
"epoch": 1.2922602682662667,
"grad_norm": 2.5781139265649213,
"learning_rate": 9.974010399581056e-06,
"loss": 0.8009,
"step": 34900
},
{
"epoch": 1.295963046275468,
"grad_norm": 2.3094975445159927,
"learning_rate": 9.973348184717362e-06,
"loss": 0.8441,
"step": 35000
},
{
"epoch": 1.2996658242846695,
"grad_norm": 3.222306020034265,
"learning_rate": 9.972677661579062e-06,
"loss": 0.8453,
"step": 35100
},
{
"epoch": 1.303368602293871,
"grad_norm": 2.4004480742086383,
"learning_rate": 9.971998831286305e-06,
"loss": 0.8352,
"step": 35200
},
{
"epoch": 1.3070713803030725,
"grad_norm": 2.9242567540358193,
"learning_rate": 9.971311694973115e-06,
"loss": 0.8251,
"step": 35300
},
{
"epoch": 1.3107741583122738,
"grad_norm": 3.3760497497529234,
"learning_rate": 9.970616253787394e-06,
"loss": 0.8212,
"step": 35400
},
{
"epoch": 1.3144769363214752,
"grad_norm": 2.738484575208949,
"learning_rate": 9.969912508890924e-06,
"loss": 0.8338,
"step": 35500
},
{
"epoch": 1.3181797143306766,
"grad_norm": 2.403858688871253,
"learning_rate": 9.969200461459344e-06,
"loss": 0.8051,
"step": 35600
},
{
"epoch": 1.321882492339878,
"grad_norm": 3.317873477816687,
"learning_rate": 9.96848011268218e-06,
"loss": 0.8275,
"step": 35700
},
{
"epoch": 1.3255852703490794,
"grad_norm": 3.0383409211764465,
"learning_rate": 9.967751463762811e-06,
"loss": 0.8102,
"step": 35800
},
{
"epoch": 1.3292880483582807,
"grad_norm": 2.716682345656308,
"learning_rate": 9.967014515918491e-06,
"loss": 0.7922,
"step": 35900
},
{
"epoch": 1.3329908263674821,
"grad_norm": 2.5903044471345407,
"learning_rate": 9.966269270380338e-06,
"loss": 0.8152,
"step": 36000
},
{
"epoch": 1.3366936043766837,
"grad_norm": 2.6042198988611505,
"learning_rate": 9.965515728393324e-06,
"loss": 0.816,
"step": 36100
},
{
"epoch": 1.3403963823858849,
"grad_norm": 2.693329966895918,
"learning_rate": 9.96475389121629e-06,
"loss": 0.8213,
"step": 36200
},
{
"epoch": 1.3440991603950865,
"grad_norm": 2.8636639283082683,
"learning_rate": 9.963983760121927e-06,
"loss": 0.8028,
"step": 36300
},
{
"epoch": 1.3478019384042879,
"grad_norm": 2.265142729976952,
"learning_rate": 9.963205336396789e-06,
"loss": 0.8312,
"step": 36400
},
{
"epoch": 1.3515047164134892,
"grad_norm": 2.346991317901365,
"learning_rate": 9.962418621341275e-06,
"loss": 0.8057,
"step": 36500
},
{
"epoch": 1.3552074944226906,
"grad_norm": 2.9365358115995988,
"learning_rate": 9.961623616269642e-06,
"loss": 0.811,
"step": 36600
},
{
"epoch": 1.358910272431892,
"grad_norm": 2.765547820893004,
"learning_rate": 9.960820322509991e-06,
"loss": 0.8176,
"step": 36700
},
{
"epoch": 1.3626130504410934,
"grad_norm": 2.242257331515756,
"learning_rate": 9.960008741404278e-06,
"loss": 0.8093,
"step": 36800
},
{
"epoch": 1.3663158284502948,
"grad_norm": 2.6584008858920396,
"learning_rate": 9.959188874308289e-06,
"loss": 0.8128,
"step": 36900
},
{
"epoch": 1.3700186064594964,
"grad_norm": 2.426868840194363,
"learning_rate": 9.958360722591666e-06,
"loss": 0.8356,
"step": 37000
},
{
"epoch": 1.3737213844686975,
"grad_norm": 2.372175307387934,
"learning_rate": 9.957524287637887e-06,
"loss": 0.7955,
"step": 37100
},
{
"epoch": 1.3774241624778991,
"grad_norm": 3.223986523587691,
"learning_rate": 9.956679570844263e-06,
"loss": 0.8446,
"step": 37200
},
{
"epoch": 1.3811269404871005,
"grad_norm": 2.9758674723626495,
"learning_rate": 9.955826573621947e-06,
"loss": 0.8258,
"step": 37300
},
{
"epoch": 1.3848297184963019,
"grad_norm": 2.4986387404446972,
"learning_rate": 9.954965297395917e-06,
"loss": 0.838,
"step": 37400
},
{
"epoch": 1.3885324965055033,
"grad_norm": 3.0506103639317383,
"learning_rate": 9.954095743604993e-06,
"loss": 0.8106,
"step": 37500
},
{
"epoch": 1.3922352745147046,
"grad_norm": 2.596803194782613,
"learning_rate": 9.953217913701809e-06,
"loss": 0.8101,
"step": 37600
},
{
"epoch": 1.395938052523906,
"grad_norm": 3.0268925428493034,
"learning_rate": 9.952331809152837e-06,
"loss": 0.7984,
"step": 37700
},
{
"epoch": 1.3996408305331074,
"grad_norm": 2.811749288978374,
"learning_rate": 9.951437431438368e-06,
"loss": 0.7956,
"step": 37800
},
{
"epoch": 1.403343608542309,
"grad_norm": 2.5791340034648673,
"learning_rate": 9.95053478205251e-06,
"loss": 0.807,
"step": 37900
},
{
"epoch": 1.4070463865515102,
"grad_norm": 2.73932100840186,
"learning_rate": 9.949623862503194e-06,
"loss": 0.8045,
"step": 38000
},
{
"epoch": 1.4107491645607118,
"grad_norm": 2.4120952001387836,
"learning_rate": 9.948704674312166e-06,
"loss": 0.8062,
"step": 38100
},
{
"epoch": 1.4144519425699131,
"grad_norm": 2.876732239954283,
"learning_rate": 9.947777219014985e-06,
"loss": 0.8153,
"step": 38200
},
{
"epoch": 1.4181547205791145,
"grad_norm": 2.648889760862942,
"learning_rate": 9.94684149816102e-06,
"loss": 0.7769,
"step": 38300
},
{
"epoch": 1.421857498588316,
"grad_norm": 2.3672035754478253,
"learning_rate": 9.945897513313446e-06,
"loss": 0.8248,
"step": 38400
},
{
"epoch": 1.4255602765975173,
"grad_norm": 2.5897674265990966,
"learning_rate": 9.944945266049249e-06,
"loss": 0.8168,
"step": 38500
},
{
"epoch": 1.4292630546067187,
"grad_norm": 2.3627727017427986,
"learning_rate": 9.943984757959214e-06,
"loss": 0.8061,
"step": 38600
},
{
"epoch": 1.43296583261592,
"grad_norm": 2.75793352812743,
"learning_rate": 9.943015990647928e-06,
"loss": 0.8406,
"step": 38700
},
{
"epoch": 1.4366686106251216,
"grad_norm": 2.93848471854443,
"learning_rate": 9.942038965733772e-06,
"loss": 0.8093,
"step": 38800
},
{
"epoch": 1.4403713886343228,
"grad_norm": 2.649269836710229,
"learning_rate": 9.941053684848927e-06,
"loss": 0.8156,
"step": 38900
},
{
"epoch": 1.4440741666435244,
"grad_norm": 2.8474414546277336,
"learning_rate": 9.940060149639362e-06,
"loss": 0.803,
"step": 39000
},
{
"epoch": 1.4477769446527258,
"grad_norm": 2.48930011170331,
"learning_rate": 9.939058361764835e-06,
"loss": 0.8149,
"step": 39100
},
{
"epoch": 1.4514797226619272,
"grad_norm": 3.0541359408620954,
"learning_rate": 9.938048322898897e-06,
"loss": 0.7905,
"step": 39200
},
{
"epoch": 1.4551825006711285,
"grad_norm": 2.4906684578035634,
"learning_rate": 9.937030034728875e-06,
"loss": 0.7983,
"step": 39300
},
{
"epoch": 1.45888527868033,
"grad_norm": 2.522647352158736,
"learning_rate": 9.93600349895588e-06,
"loss": 0.8257,
"step": 39400
},
{
"epoch": 1.4625880566895313,
"grad_norm": 2.3593412219963636,
"learning_rate": 9.934968717294801e-06,
"loss": 0.8156,
"step": 39500
},
{
"epoch": 1.4662908346987327,
"grad_norm": 2.557490073344118,
"learning_rate": 9.933925691474306e-06,
"loss": 0.8168,
"step": 39600
},
{
"epoch": 1.469993612707934,
"grad_norm": 2.338179664285831,
"learning_rate": 9.932874423236827e-06,
"loss": 0.8037,
"step": 39700
},
{
"epoch": 1.4736963907171354,
"grad_norm": 2.5845812397294106,
"learning_rate": 9.931814914338574e-06,
"loss": 0.8037,
"step": 39800
},
{
"epoch": 1.477399168726337,
"grad_norm": 2.738120618908721,
"learning_rate": 9.930747166549517e-06,
"loss": 0.8248,
"step": 39900
},
{
"epoch": 1.4811019467355384,
"grad_norm": 3.0001861261521077,
"learning_rate": 9.929671181653393e-06,
"loss": 0.8158,
"step": 40000
},
{
"epoch": 1.4848047247447398,
"grad_norm": 2.8790789963198864,
"learning_rate": 9.9285869614477e-06,
"loss": 0.81,
"step": 40100
},
{
"epoch": 1.4885075027539412,
"grad_norm": 2.3530303824789263,
"learning_rate": 9.927494507743693e-06,
"loss": 0.8065,
"step": 40200
},
{
"epoch": 1.4922102807631425,
"grad_norm": 2.5001939120344563,
"learning_rate": 9.926393822366378e-06,
"loss": 0.7986,
"step": 40300
},
{
"epoch": 1.495913058772344,
"grad_norm": 3.322122232875802,
"learning_rate": 9.925284907154518e-06,
"loss": 0.8018,
"step": 40400
},
{
"epoch": 1.4996158367815453,
"grad_norm": 2.632512694974853,
"learning_rate": 9.924167763960622e-06,
"loss": 0.7926,
"step": 40500
},
{
"epoch": 1.503318614790747,
"grad_norm": 3.298194101210266,
"learning_rate": 9.923042394650944e-06,
"loss": 0.8012,
"step": 40600
},
{
"epoch": 1.507021392799948,
"grad_norm": 2.5336893450551714,
"learning_rate": 9.921908801105478e-06,
"loss": 0.7979,
"step": 40700
},
{
"epoch": 1.5107241708091497,
"grad_norm": 2.805390982714785,
"learning_rate": 9.920766985217964e-06,
"loss": 0.8007,
"step": 40800
},
{
"epoch": 1.5144269488183508,
"grad_norm": 2.6228261966166846,
"learning_rate": 9.919616948895869e-06,
"loss": 0.7925,
"step": 40900
},
{
"epoch": 1.5181297268275524,
"grad_norm": 2.608871750206852,
"learning_rate": 9.918458694060401e-06,
"loss": 0.8165,
"step": 41000
},
{
"epoch": 1.5218325048367538,
"grad_norm": 2.3331476004266802,
"learning_rate": 9.917292222646494e-06,
"loss": 0.812,
"step": 41100
},
{
"epoch": 1.5255352828459552,
"grad_norm": 2.256474693417922,
"learning_rate": 9.916117536602805e-06,
"loss": 0.8252,
"step": 41200
},
{
"epoch": 1.5292380608551566,
"grad_norm": 2.2698106482233444,
"learning_rate": 9.914934637891717e-06,
"loss": 0.8049,
"step": 41300
},
{
"epoch": 1.532940838864358,
"grad_norm": 2.505148031562781,
"learning_rate": 9.913743528489335e-06,
"loss": 0.7945,
"step": 41400
},
{
"epoch": 1.5366436168735595,
"grad_norm": 2.273130621446297,
"learning_rate": 9.912544210385478e-06,
"loss": 0.7592,
"step": 41500
},
{
"epoch": 1.5403463948827607,
"grad_norm": 2.7980874710174746,
"learning_rate": 9.911336685583678e-06,
"loss": 0.788,
"step": 41600
},
{
"epoch": 1.5440491728919623,
"grad_norm": 2.518652055633435,
"learning_rate": 9.910120956101177e-06,
"loss": 0.7985,
"step": 41700
},
{
"epoch": 1.5477519509011635,
"grad_norm": 2.8908580237428727,
"learning_rate": 9.908897023968923e-06,
"loss": 0.8022,
"step": 41800
},
{
"epoch": 1.551454728910365,
"grad_norm": 2.796915452230328,
"learning_rate": 9.907664891231567e-06,
"loss": 0.7891,
"step": 41900
},
{
"epoch": 1.5551575069195664,
"grad_norm": 2.58199772952833,
"learning_rate": 9.906424559947463e-06,
"loss": 0.8127,
"step": 42000
},
{
"epoch": 1.5588602849287678,
"grad_norm": 2.9876242988374795,
"learning_rate": 9.905176032188657e-06,
"loss": 0.8103,
"step": 42100
},
{
"epoch": 1.5625630629379692,
"grad_norm": 2.5402919151901284,
"learning_rate": 9.903919310040888e-06,
"loss": 0.8088,
"step": 42200
},
{
"epoch": 1.5662658409471706,
"grad_norm": 2.759850752080656,
"learning_rate": 9.902654395603585e-06,
"loss": 0.7802,
"step": 42300
},
{
"epoch": 1.569968618956372,
"grad_norm": 2.303462256584801,
"learning_rate": 9.901381290989866e-06,
"loss": 0.814,
"step": 42400
},
{
"epoch": 1.5736713969655733,
"grad_norm": 2.5606229953487007,
"learning_rate": 9.900099998326524e-06,
"loss": 0.8199,
"step": 42500
},
{
"epoch": 1.577374174974775,
"grad_norm": 2.429988282841457,
"learning_rate": 9.898810519754038e-06,
"loss": 0.8119,
"step": 42600
},
{
"epoch": 1.581076952983976,
"grad_norm": 2.5219967389765823,
"learning_rate": 9.897512857426559e-06,
"loss": 0.8047,
"step": 42700
},
{
"epoch": 1.5847797309931777,
"grad_norm": 1.9311972103887236,
"learning_rate": 9.896207013511906e-06,
"loss": 0.7986,
"step": 42800
},
{
"epoch": 1.588482509002379,
"grad_norm": 2.234707722695266,
"learning_rate": 9.894892990191572e-06,
"loss": 0.8208,
"step": 42900
},
{
"epoch": 1.5921852870115805,
"grad_norm": 2.52585572604136,
"learning_rate": 9.89357078966071e-06,
"loss": 0.8055,
"step": 43000
},
{
"epoch": 1.5958880650207818,
"grad_norm": 2.4607414449083564,
"learning_rate": 9.892240414128134e-06,
"loss": 0.814,
"step": 43100
},
{
"epoch": 1.5995908430299832,
"grad_norm": 2.397978043007156,
"learning_rate": 9.890901865816318e-06,
"loss": 0.7858,
"step": 43200
},
{
"epoch": 1.6032936210391846,
"grad_norm": 2.6825010522588464,
"learning_rate": 9.889555146961386e-06,
"loss": 0.7643,
"step": 43300
},
{
"epoch": 1.606996399048386,
"grad_norm": 2.296924382807737,
"learning_rate": 9.888200259813112e-06,
"loss": 0.774,
"step": 43400
},
{
"epoch": 1.6106991770575876,
"grad_norm": 3.086030898842717,
"learning_rate": 9.886837206634913e-06,
"loss": 0.7903,
"step": 43500
},
{
"epoch": 1.6144019550667887,
"grad_norm": 2.1770814434757852,
"learning_rate": 9.885465989703855e-06,
"loss": 0.7992,
"step": 43600
},
{
"epoch": 1.6181047330759903,
"grad_norm": 2.1173853625857677,
"learning_rate": 9.884086611310636e-06,
"loss": 0.7562,
"step": 43700
},
{
"epoch": 1.6218075110851915,
"grad_norm": 2.6347540026530383,
"learning_rate": 9.88269907375959e-06,
"loss": 0.7812,
"step": 43800
},
{
"epoch": 1.625510289094393,
"grad_norm": 2.3677734896044367,
"learning_rate": 9.881303379368679e-06,
"loss": 0.7949,
"step": 43900
},
{
"epoch": 1.6292130671035945,
"grad_norm": 1.9541227462181452,
"learning_rate": 9.879899530469495e-06,
"loss": 0.8014,
"step": 44000
},
{
"epoch": 1.6329158451127959,
"grad_norm": 2.250577456663235,
"learning_rate": 9.878487529407252e-06,
"loss": 0.789,
"step": 44100
},
{
"epoch": 1.6366186231219972,
"grad_norm": 2.332159230411907,
"learning_rate": 9.877067378540783e-06,
"loss": 0.7583,
"step": 44200
},
{
"epoch": 1.6403214011311986,
"grad_norm": 2.620339688070587,
"learning_rate": 9.875639080242532e-06,
"loss": 0.7609,
"step": 44300
},
{
"epoch": 1.6440241791404002,
"grad_norm": 2.281843212752168,
"learning_rate": 9.874202636898557e-06,
"loss": 0.7923,
"step": 44400
},
{
"epoch": 1.6477269571496014,
"grad_norm": 2.7756614740728756,
"learning_rate": 9.872758050908525e-06,
"loss": 0.8133,
"step": 44500
},
{
"epoch": 1.651429735158803,
"grad_norm": 2.0566706027251933,
"learning_rate": 9.871305324685698e-06,
"loss": 0.7771,
"step": 44600
},
{
"epoch": 1.6551325131680041,
"grad_norm": 2.7157875508307203,
"learning_rate": 9.869844460656946e-06,
"loss": 0.7887,
"step": 44700
},
{
"epoch": 1.6588352911772057,
"grad_norm": 2.3909529963011225,
"learning_rate": 9.868375461262729e-06,
"loss": 0.786,
"step": 44800
},
{
"epoch": 1.6625380691864071,
"grad_norm": 3.0348798043450107,
"learning_rate": 9.866898328957097e-06,
"loss": 0.7658,
"step": 44900
},
{
"epoch": 1.6662408471956085,
"grad_norm": 2.6335015906277564,
"learning_rate": 9.865413066207686e-06,
"loss": 0.7995,
"step": 45000
},
{
"epoch": 1.6699436252048099,
"grad_norm": 2.385629891283996,
"learning_rate": 9.863919675495718e-06,
"loss": 0.7915,
"step": 45100
},
{
"epoch": 1.6736464032140113,
"grad_norm": 2.9226049040665196,
"learning_rate": 9.862418159315994e-06,
"loss": 0.7846,
"step": 45200
},
{
"epoch": 1.6773491812232129,
"grad_norm": 2.0898569179597546,
"learning_rate": 9.860908520176881e-06,
"loss": 0.7798,
"step": 45300
},
{
"epoch": 1.681051959232414,
"grad_norm": 2.486543038672127,
"learning_rate": 9.859390760600323e-06,
"loss": 0.788,
"step": 45400
},
{
"epoch": 1.6847547372416156,
"grad_norm": 2.150826890053404,
"learning_rate": 9.857864883121829e-06,
"loss": 0.8,
"step": 45500
},
{
"epoch": 1.6884575152508168,
"grad_norm": 2.0064655253486494,
"learning_rate": 9.856330890290467e-06,
"loss": 0.7893,
"step": 45600
},
{
"epoch": 1.6921602932600184,
"grad_norm": 1.974144415250403,
"learning_rate": 9.854788784668862e-06,
"loss": 0.8071,
"step": 45700
},
{
"epoch": 1.6958630712692198,
"grad_norm": 2.8759598688034553,
"learning_rate": 9.853238568833198e-06,
"loss": 0.795,
"step": 45800
},
{
"epoch": 1.6995658492784211,
"grad_norm": 2.4899159117702325,
"learning_rate": 9.851680245373201e-06,
"loss": 0.7933,
"step": 45900
},
{
"epoch": 1.7032686272876225,
"grad_norm": 2.4900169396878535,
"learning_rate": 9.85011381689214e-06,
"loss": 0.7734,
"step": 46000
},
{
"epoch": 1.706971405296824,
"grad_norm": 2.422580198248974,
"learning_rate": 9.848539286006832e-06,
"loss": 0.7951,
"step": 46100
},
{
"epoch": 1.7106741833060255,
"grad_norm": 2.8907724621020634,
"learning_rate": 9.846956655347621e-06,
"loss": 0.7905,
"step": 46200
},
{
"epoch": 1.7143769613152267,
"grad_norm": 2.013474316995975,
"learning_rate": 9.845365927558387e-06,
"loss": 0.8006,
"step": 46300
},
{
"epoch": 1.7180797393244283,
"grad_norm": 2.1840681748691444,
"learning_rate": 9.843767105296536e-06,
"loss": 0.7635,
"step": 46400
},
{
"epoch": 1.7217825173336294,
"grad_norm": 2.5617018354083934,
"learning_rate": 9.842160191232996e-06,
"loss": 0.7824,
"step": 46500
},
{
"epoch": 1.725485295342831,
"grad_norm": 2.308519604503349,
"learning_rate": 9.840545188052214e-06,
"loss": 0.774,
"step": 46600
},
{
"epoch": 1.7291880733520324,
"grad_norm": 2.2499525442223853,
"learning_rate": 9.838922098452146e-06,
"loss": 0.756,
"step": 46700
},
{
"epoch": 1.7328908513612338,
"grad_norm": 2.3820412376888322,
"learning_rate": 9.83729092514426e-06,
"loss": 0.789,
"step": 46800
},
{
"epoch": 1.7365936293704352,
"grad_norm": 2.5725331335845127,
"learning_rate": 9.835651670853532e-06,
"loss": 0.7854,
"step": 46900
},
{
"epoch": 1.7402964073796365,
"grad_norm": 2.309343999229651,
"learning_rate": 9.83400433831843e-06,
"loss": 0.775,
"step": 47000
},
{
"epoch": 1.7439991853888381,
"grad_norm": 2.5850280201791436,
"learning_rate": 9.832348930290925e-06,
"loss": 0.7802,
"step": 47100
},
{
"epoch": 1.7477019633980393,
"grad_norm": 2.2679853588645105,
"learning_rate": 9.830685449536472e-06,
"loss": 0.7678,
"step": 47200
},
{
"epoch": 1.751404741407241,
"grad_norm": 2.5086985240224635,
"learning_rate": 9.829013898834014e-06,
"loss": 0.7577,
"step": 47300
},
{
"epoch": 1.755107519416442,
"grad_norm": 1.9266513380957035,
"learning_rate": 9.827334280975978e-06,
"loss": 0.7758,
"step": 47400
},
{
"epoch": 1.7588102974256437,
"grad_norm": 2.2928770756948547,
"learning_rate": 9.825646598768267e-06,
"loss": 0.7637,
"step": 47500
},
{
"epoch": 1.762513075434845,
"grad_norm": 2.5105321008988146,
"learning_rate": 9.82395085503025e-06,
"loss": 0.7832,
"step": 47600
},
{
"epoch": 1.7662158534440464,
"grad_norm": 2.2393983265475867,
"learning_rate": 9.822247052594775e-06,
"loss": 0.7957,
"step": 47700
},
{
"epoch": 1.7699186314532478,
"grad_norm": 2.2356517021326447,
"learning_rate": 9.82053519430814e-06,
"loss": 0.7786,
"step": 47800
},
{
"epoch": 1.7736214094624492,
"grad_norm": 2.1349870329764467,
"learning_rate": 9.818815283030107e-06,
"loss": 0.7639,
"step": 47900
},
{
"epoch": 1.7773241874716506,
"grad_norm": 2.207362188864924,
"learning_rate": 9.817087321633891e-06,
"loss": 0.7774,
"step": 48000
},
{
"epoch": 1.781026965480852,
"grad_norm": 2.357569522929328,
"learning_rate": 9.815351313006155e-06,
"loss": 0.7903,
"step": 48100
},
{
"epoch": 1.7847297434900535,
"grad_norm": 2.5604354649057512,
"learning_rate": 9.813607260047007e-06,
"loss": 0.7861,
"step": 48200
},
{
"epoch": 1.7884325214992547,
"grad_norm": 2.3900699599516964,
"learning_rate": 9.811855165669985e-06,
"loss": 0.7883,
"step": 48300
},
{
"epoch": 1.7921352995084563,
"grad_norm": 2.2498623353917093,
"learning_rate": 9.810095032802075e-06,
"loss": 0.7749,
"step": 48400
},
{
"epoch": 1.7958380775176577,
"grad_norm": 2.523396892959795,
"learning_rate": 9.808326864383679e-06,
"loss": 0.773,
"step": 48500
},
{
"epoch": 1.799540855526859,
"grad_norm": 2.2014201396256214,
"learning_rate": 9.806550663368628e-06,
"loss": 0.7784,
"step": 48600
},
{
"epoch": 1.8032436335360604,
"grad_norm": 2.137068120876505,
"learning_rate": 9.804766432724172e-06,
"loss": 0.781,
"step": 48700
},
{
"epoch": 1.8069464115452618,
"grad_norm": 2.1941678037156036,
"learning_rate": 9.802974175430975e-06,
"loss": 0.7813,
"step": 48800
},
{
"epoch": 1.8106491895544632,
"grad_norm": 2.3496769370735775,
"learning_rate": 9.801173894483111e-06,
"loss": 0.7758,
"step": 48900
},
{
"epoch": 1.8143519675636646,
"grad_norm": 2.6951877515683917,
"learning_rate": 9.799365592888054e-06,
"loss": 0.7753,
"step": 49000
},
{
"epoch": 1.8180547455728662,
"grad_norm": 2.2987294149497504,
"learning_rate": 9.797549273666682e-06,
"loss": 0.7979,
"step": 49100
},
{
"epoch": 1.8217575235820673,
"grad_norm": 2.1550107736300883,
"learning_rate": 9.795724939853265e-06,
"loss": 0.7547,
"step": 49200
},
{
"epoch": 1.825460301591269,
"grad_norm": 2.1082078725588724,
"learning_rate": 9.793892594495457e-06,
"loss": 0.7481,
"step": 49300
},
{
"epoch": 1.82916307960047,
"grad_norm": 2.535213902917527,
"learning_rate": 9.792052240654304e-06,
"loss": 0.7568,
"step": 49400
},
{
"epoch": 1.8328658576096717,
"grad_norm": 2.3198113159972595,
"learning_rate": 9.790203881404228e-06,
"loss": 0.7834,
"step": 49500
},
{
"epoch": 1.836568635618873,
"grad_norm": 2.519238802449602,
"learning_rate": 9.78834751983302e-06,
"loss": 0.7699,
"step": 49600
},
{
"epoch": 1.8402714136280744,
"grad_norm": 2.3333401238694798,
"learning_rate": 9.786483159041842e-06,
"loss": 0.7834,
"step": 49700
},
{
"epoch": 1.8439741916372758,
"grad_norm": 2.4205774253956385,
"learning_rate": 9.784610802145222e-06,
"loss": 0.7863,
"step": 49800
},
{
"epoch": 1.8476769696464772,
"grad_norm": 2.178068900898099,
"learning_rate": 9.782730452271046e-06,
"loss": 0.7674,
"step": 49900
},
{
"epoch": 1.8513797476556788,
"grad_norm": 2.6080625282619714,
"learning_rate": 9.780842112560548e-06,
"loss": 0.7642,
"step": 50000
},
{
"epoch": 1.85508252566488,
"grad_norm": 2.843984991990864,
"learning_rate": 9.778945786168308e-06,
"loss": 0.7655,
"step": 50100
},
{
"epoch": 1.8587853036740816,
"grad_norm": 2.2308315520099424,
"learning_rate": 9.777041476262259e-06,
"loss": 0.7656,
"step": 50200
},
{
"epoch": 1.8624880816832827,
"grad_norm": 2.400873208112685,
"learning_rate": 9.775129186023661e-06,
"loss": 0.7588,
"step": 50300
},
{
"epoch": 1.8661908596924843,
"grad_norm": 2.9815623334199604,
"learning_rate": 9.773208918647111e-06,
"loss": 0.7722,
"step": 50400
},
{
"epoch": 1.8698936377016857,
"grad_norm": 2.6488046885793373,
"learning_rate": 9.771280677340528e-06,
"loss": 0.7813,
"step": 50500
},
{
"epoch": 1.873596415710887,
"grad_norm": 2.521885076282361,
"learning_rate": 9.769344465325153e-06,
"loss": 0.7846,
"step": 50600
},
{
"epoch": 1.8772991937200885,
"grad_norm": 1.8398874480846792,
"learning_rate": 9.767400285835546e-06,
"loss": 0.7799,
"step": 50700
},
{
"epoch": 1.8810019717292898,
"grad_norm": 2.6273527775975114,
"learning_rate": 9.765448142119575e-06,
"loss": 0.7463,
"step": 50800
},
{
"epoch": 1.8847047497384914,
"grad_norm": 2.4232551189720626,
"learning_rate": 9.763488037438412e-06,
"loss": 0.7763,
"step": 50900
},
{
"epoch": 1.8884075277476926,
"grad_norm": 2.182119510957546,
"learning_rate": 9.761519975066524e-06,
"loss": 0.76,
"step": 51000
},
{
"epoch": 1.8921103057568942,
"grad_norm": 2.4800365305029106,
"learning_rate": 9.759543958291683e-06,
"loss": 0.7878,
"step": 51100
},
{
"epoch": 1.8958130837660954,
"grad_norm": 2.2209159681563055,
"learning_rate": 9.757559990414941e-06,
"loss": 0.7706,
"step": 51200
},
{
"epoch": 1.899515861775297,
"grad_norm": 2.2456207460433175,
"learning_rate": 9.755568074750635e-06,
"loss": 0.7533,
"step": 51300
},
{
"epoch": 1.9032186397844983,
"grad_norm": 2.216946549826359,
"learning_rate": 9.753568214626375e-06,
"loss": 0.7651,
"step": 51400
},
{
"epoch": 1.9069214177936997,
"grad_norm": 2.1353696650613556,
"learning_rate": 9.751560413383051e-06,
"loss": 0.7451,
"step": 51500
},
{
"epoch": 1.910624195802901,
"grad_norm": 2.2188964222997227,
"learning_rate": 9.749544674374814e-06,
"loss": 0.771,
"step": 51600
},
{
"epoch": 1.9143269738121025,
"grad_norm": 2.6602884956835373,
"learning_rate": 9.747521000969074e-06,
"loss": 0.7652,
"step": 51700
},
{
"epoch": 1.918029751821304,
"grad_norm": 2.366026652497562,
"learning_rate": 9.745489396546499e-06,
"loss": 0.7778,
"step": 51800
},
{
"epoch": 1.9217325298305052,
"grad_norm": 2.4178576890485166,
"learning_rate": 9.743449864501006e-06,
"loss": 0.7682,
"step": 51900
},
{
"epoch": 1.9254353078397068,
"grad_norm": 2.461073225865995,
"learning_rate": 9.741402408239753e-06,
"loss": 0.7379,
"step": 52000
},
{
"epoch": 1.929138085848908,
"grad_norm": 2.3169585710466443,
"learning_rate": 9.739347031183142e-06,
"loss": 0.74,
"step": 52100
},
{
"epoch": 1.9328408638581096,
"grad_norm": 2.480606692215648,
"learning_rate": 9.737283736764798e-06,
"loss": 0.7811,
"step": 52200
},
{
"epoch": 1.936543641867311,
"grad_norm": 2.33302742635216,
"learning_rate": 9.73521252843158e-06,
"loss": 0.7853,
"step": 52300
},
{
"epoch": 1.9402464198765124,
"grad_norm": 2.178325980295709,
"learning_rate": 9.733133409643565e-06,
"loss": 0.7678,
"step": 52400
},
{
"epoch": 1.9439491978857137,
"grad_norm": 2.6202132000217,
"learning_rate": 9.731046383874044e-06,
"loss": 0.7496,
"step": 52500
},
{
"epoch": 1.9476519758949151,
"grad_norm": 3.155040247361292,
"learning_rate": 9.728951454609517e-06,
"loss": 0.7728,
"step": 52600
},
{
"epoch": 1.9513547539041167,
"grad_norm": 2.115338674943332,
"learning_rate": 9.726848625349691e-06,
"loss": 0.7625,
"step": 52700
},
{
"epoch": 1.9550575319133179,
"grad_norm": 2.203447982138841,
"learning_rate": 9.724737899607466e-06,
"loss": 0.7693,
"step": 52800
},
{
"epoch": 1.9587603099225195,
"grad_norm": 2.314534740326119,
"learning_rate": 9.722619280908934e-06,
"loss": 0.7628,
"step": 52900
},
{
"epoch": 1.9624630879317206,
"grad_norm": 2.1956336825914793,
"learning_rate": 9.720492772793375e-06,
"loss": 0.7636,
"step": 53000
},
{
"epoch": 1.9661658659409222,
"grad_norm": 2.237936138713292,
"learning_rate": 9.718358378813248e-06,
"loss": 0.7559,
"step": 53100
},
{
"epoch": 1.9698686439501236,
"grad_norm": 2.166260133162057,
"learning_rate": 9.716216102534186e-06,
"loss": 0.7619,
"step": 53200
},
{
"epoch": 1.973571421959325,
"grad_norm": 2.4562700667901933,
"learning_rate": 9.714065947534987e-06,
"loss": 0.7596,
"step": 53300
},
{
"epoch": 1.9772741999685264,
"grad_norm": 2.0820216516365027,
"learning_rate": 9.711907917407614e-06,
"loss": 0.7526,
"step": 53400
},
{
"epoch": 1.9809769779777278,
"grad_norm": 2.3755910874830657,
"learning_rate": 9.709742015757187e-06,
"loss": 0.7553,
"step": 53500
},
{
"epoch": 1.9846797559869291,
"grad_norm": 2.369452864927645,
"learning_rate": 9.707568246201972e-06,
"loss": 0.753,
"step": 53600
},
{
"epoch": 1.9883825339961305,
"grad_norm": 2.3410608706416762,
"learning_rate": 9.70538661237338e-06,
"loss": 0.7787,
"step": 53700
},
{
"epoch": 1.9920853120053321,
"grad_norm": 2.087568589173381,
"learning_rate": 9.70319711791596e-06,
"loss": 0.7586,
"step": 53800
},
{
"epoch": 1.9957880900145333,
"grad_norm": 2.6786991173682373,
"learning_rate": 9.700999766487395e-06,
"loss": 0.7465,
"step": 53900
},
{
"epoch": 1.9994908680237349,
"grad_norm": 3.03880633662284,
"learning_rate": 9.698794561758493e-06,
"loss": 0.7403,
"step": 54000
},
{
"epoch": 2.003184389087913,
"grad_norm": 2.5868756494206497,
"learning_rate": 9.696581507413174e-06,
"loss": 0.6992,
"step": 54100
},
{
"epoch": 2.0068871670971147,
"grad_norm": 2.0423484305586994,
"learning_rate": 9.694360607148484e-06,
"loss": 0.6838,
"step": 54200
},
{
"epoch": 2.010589945106316,
"grad_norm": 2.313387880714559,
"learning_rate": 9.692131864674563e-06,
"loss": 0.6912,
"step": 54300
},
{
"epoch": 2.0142927231155174,
"grad_norm": 2.274235624804895,
"learning_rate": 9.689895283714663e-06,
"loss": 0.6854,
"step": 54400
},
{
"epoch": 2.017995501124719,
"grad_norm": 2.119309140091523,
"learning_rate": 9.687650868005124e-06,
"loss": 0.6786,
"step": 54500
},
{
"epoch": 2.02169827913392,
"grad_norm": 2.3759711399354413,
"learning_rate": 9.685398621295377e-06,
"loss": 0.6841,
"step": 54600
},
{
"epoch": 2.025401057143122,
"grad_norm": 2.652183252752739,
"learning_rate": 9.683138547347933e-06,
"loss": 0.6938,
"step": 54700
},
{
"epoch": 2.029103835152323,
"grad_norm": 2.497195487638967,
"learning_rate": 9.68087064993838e-06,
"loss": 0.6834,
"step": 54800
},
{
"epoch": 2.0328066131615246,
"grad_norm": 2.0788434226359174,
"learning_rate": 9.678594932855377e-06,
"loss": 0.681,
"step": 54900
},
{
"epoch": 2.0365093911707257,
"grad_norm": 2.557154847367296,
"learning_rate": 9.676311399900644e-06,
"loss": 0.6956,
"step": 55000
},
{
"epoch": 2.0402121691799273,
"grad_norm": 2.5976913602453595,
"learning_rate": 9.674020054888962e-06,
"loss": 0.673,
"step": 55100
},
{
"epoch": 2.0439149471891285,
"grad_norm": 2.1397702042236206,
"learning_rate": 9.671720901648157e-06,
"loss": 0.6939,
"step": 55200
},
{
"epoch": 2.04761772519833,
"grad_norm": 2.384247384660651,
"learning_rate": 9.669413944019099e-06,
"loss": 0.6757,
"step": 55300
},
{
"epoch": 2.0513205032075312,
"grad_norm": 2.1863152443770786,
"learning_rate": 9.667099185855703e-06,
"loss": 0.6968,
"step": 55400
},
{
"epoch": 2.055023281216733,
"grad_norm": 4.405956537339234,
"learning_rate": 9.664776631024908e-06,
"loss": 0.683,
"step": 55500
},
{
"epoch": 2.0587260592259344,
"grad_norm": 2.049043630631007,
"learning_rate": 9.662446283406682e-06,
"loss": 0.6914,
"step": 55600
},
{
"epoch": 2.0624288372351356,
"grad_norm": 2.5664972711721625,
"learning_rate": 9.660108146894007e-06,
"loss": 0.6915,
"step": 55700
},
{
"epoch": 2.066131615244337,
"grad_norm": 2.005386472768463,
"learning_rate": 9.65776222539288e-06,
"loss": 0.6598,
"step": 55800
},
{
"epoch": 2.0698343932535384,
"grad_norm": 2.449106381543406,
"learning_rate": 9.655408522822306e-06,
"loss": 0.66,
"step": 55900
},
{
"epoch": 2.07353717126274,
"grad_norm": 3.0381314839729177,
"learning_rate": 9.653047043114281e-06,
"loss": 0.6685,
"step": 56000
},
{
"epoch": 2.077239949271941,
"grad_norm": 2.412440777034649,
"learning_rate": 9.650677790213799e-06,
"loss": 0.666,
"step": 56100
},
{
"epoch": 2.0809427272811427,
"grad_norm": 2.3635812683682222,
"learning_rate": 9.64830076807884e-06,
"loss": 0.6719,
"step": 56200
},
{
"epoch": 2.084645505290344,
"grad_norm": 2.337252826890419,
"learning_rate": 9.64591598068036e-06,
"loss": 0.6761,
"step": 56300
},
{
"epoch": 2.0883482832995455,
"grad_norm": 2.1411084610979856,
"learning_rate": 9.643523432002288e-06,
"loss": 0.6699,
"step": 56400
},
{
"epoch": 2.092051061308747,
"grad_norm": 2.396579312617091,
"learning_rate": 9.64112312604152e-06,
"loss": 0.6811,
"step": 56500
},
{
"epoch": 2.0957538393179482,
"grad_norm": 1.991014001057909,
"learning_rate": 9.638715066807908e-06,
"loss": 0.6921,
"step": 56600
},
{
"epoch": 2.09945661732715,
"grad_norm": 1.8170099574989464,
"learning_rate": 9.636299258324263e-06,
"loss": 0.6748,
"step": 56700
},
{
"epoch": 2.103159395336351,
"grad_norm": 2.407538918588163,
"learning_rate": 9.633875704626332e-06,
"loss": 0.6556,
"step": 56800
},
{
"epoch": 2.1068621733455526,
"grad_norm": 2.0009350084583186,
"learning_rate": 9.63144440976281e-06,
"loss": 0.6804,
"step": 56900
},
{
"epoch": 2.1105649513547537,
"grad_norm": 2.318529373742503,
"learning_rate": 9.629005377795318e-06,
"loss": 0.6766,
"step": 57000
},
{
"epoch": 2.1142677293639554,
"grad_norm": 2.901357640692631,
"learning_rate": 9.626558612798404e-06,
"loss": 0.6794,
"step": 57100
},
{
"epoch": 2.1179705073731565,
"grad_norm": 2.154557247934532,
"learning_rate": 9.624104118859535e-06,
"loss": 0.6691,
"step": 57200
},
{
"epoch": 2.121673285382358,
"grad_norm": 2.3903883257512577,
"learning_rate": 9.62164190007909e-06,
"loss": 0.6545,
"step": 57300
},
{
"epoch": 2.1253760633915597,
"grad_norm": 2.1742676469308093,
"learning_rate": 9.619171960570353e-06,
"loss": 0.6894,
"step": 57400
},
{
"epoch": 2.129078841400761,
"grad_norm": 2.654747582200517,
"learning_rate": 9.616694304459504e-06,
"loss": 0.6784,
"step": 57500
},
{
"epoch": 2.1327816194099625,
"grad_norm": 2.402727194182496,
"learning_rate": 9.614208935885615e-06,
"loss": 0.6724,
"step": 57600
},
{
"epoch": 2.1364843974191636,
"grad_norm": 2.2360152593662743,
"learning_rate": 9.611715859000643e-06,
"loss": 0.6622,
"step": 57700
},
{
"epoch": 2.1401871754283652,
"grad_norm": 2.591011720239606,
"learning_rate": 9.609215077969422e-06,
"loss": 0.6981,
"step": 57800
},
{
"epoch": 2.1438899534375664,
"grad_norm": 2.059137036300649,
"learning_rate": 9.606706596969655e-06,
"loss": 0.6665,
"step": 57900
},
{
"epoch": 2.147592731446768,
"grad_norm": 2.5471008394463768,
"learning_rate": 9.604190420191908e-06,
"loss": 0.6725,
"step": 58000
},
{
"epoch": 2.151295509455969,
"grad_norm": 2.3973486591141504,
"learning_rate": 9.601666551839606e-06,
"loss": 0.6855,
"step": 58100
},
{
"epoch": 2.1549982874651707,
"grad_norm": 2.6375773664347286,
"learning_rate": 9.599134996129022e-06,
"loss": 0.6826,
"step": 58200
},
{
"epoch": 2.1587010654743723,
"grad_norm": 2.332457102848655,
"learning_rate": 9.596595757289268e-06,
"loss": 0.6814,
"step": 58300
},
{
"epoch": 2.1624038434835735,
"grad_norm": 2.4870013874361745,
"learning_rate": 9.594048839562298e-06,
"loss": 0.6792,
"step": 58400
},
{
"epoch": 2.166106621492775,
"grad_norm": 2.3255182488507034,
"learning_rate": 9.591494247202886e-06,
"loss": 0.6954,
"step": 58500
},
{
"epoch": 2.1698093995019763,
"grad_norm": 2.83057658696704,
"learning_rate": 9.588931984478633e-06,
"loss": 0.6914,
"step": 58600
},
{
"epoch": 2.173512177511178,
"grad_norm": 2.354736029056531,
"learning_rate": 9.58636205566995e-06,
"loss": 0.6705,
"step": 58700
},
{
"epoch": 2.177214955520379,
"grad_norm": 2.413081651067487,
"learning_rate": 9.583784465070056e-06,
"loss": 0.692,
"step": 58800
},
{
"epoch": 2.1809177335295806,
"grad_norm": 1.9994020495850402,
"learning_rate": 9.581199216984974e-06,
"loss": 0.6789,
"step": 58900
},
{
"epoch": 2.184620511538782,
"grad_norm": 2.336676864459619,
"learning_rate": 9.57860631573351e-06,
"loss": 0.6746,
"step": 59000
},
{
"epoch": 2.1883232895479834,
"grad_norm": 1.9765951823994232,
"learning_rate": 9.576005765647262e-06,
"loss": 0.6841,
"step": 59100
},
{
"epoch": 2.192026067557185,
"grad_norm": 2.6704707200624567,
"learning_rate": 9.573397571070606e-06,
"loss": 0.6606,
"step": 59200
},
{
"epoch": 2.195728845566386,
"grad_norm": 2.7273193459509057,
"learning_rate": 9.570781736360682e-06,
"loss": 0.694,
"step": 59300
},
{
"epoch": 2.1994316235755877,
"grad_norm": 2.4075381061640475,
"learning_rate": 9.568158265887402e-06,
"loss": 0.7058,
"step": 59400
},
{
"epoch": 2.203134401584789,
"grad_norm": 2.7561200770361283,
"learning_rate": 9.565527164033428e-06,
"loss": 0.6635,
"step": 59500
},
{
"epoch": 2.2068371795939905,
"grad_norm": 1.9296516325452246,
"learning_rate": 9.562888435194171e-06,
"loss": 0.6944,
"step": 59600
},
{
"epoch": 2.2105399576031917,
"grad_norm": 2.3953208065104445,
"learning_rate": 9.56024208377779e-06,
"loss": 0.6511,
"step": 59700
},
{
"epoch": 2.2142427356123933,
"grad_norm": 2.3935986970014507,
"learning_rate": 9.557588114205166e-06,
"loss": 0.684,
"step": 59800
},
{
"epoch": 2.2179455136215944,
"grad_norm": 2.474410928209759,
"learning_rate": 9.554926530909918e-06,
"loss": 0.6944,
"step": 59900
},
{
"epoch": 2.221648291630796,
"grad_norm": 2.300308607407991,
"learning_rate": 9.552257338338377e-06,
"loss": 0.6619,
"step": 60000
},
{
"epoch": 2.2253510696399976,
"grad_norm": 2.2318245286663347,
"learning_rate": 9.549580540949592e-06,
"loss": 0.6737,
"step": 60100
},
{
"epoch": 2.229053847649199,
"grad_norm": 3.050417412156981,
"learning_rate": 9.546896143215307e-06,
"loss": 0.6588,
"step": 60200
},
{
"epoch": 2.2327566256584004,
"grad_norm": 2.096536346012167,
"learning_rate": 9.544204149619973e-06,
"loss": 0.6529,
"step": 60300
},
{
"epoch": 2.2364594036676015,
"grad_norm": 2.81530679664561,
"learning_rate": 9.541504564660726e-06,
"loss": 0.6691,
"step": 60400
},
{
"epoch": 2.240162181676803,
"grad_norm": 2.5250157137123606,
"learning_rate": 9.53879739284738e-06,
"loss": 0.6956,
"step": 60500
},
{
"epoch": 2.2438649596860043,
"grad_norm": 2.1361057888741057,
"learning_rate": 9.536082638702428e-06,
"loss": 0.6622,
"step": 60600
},
{
"epoch": 2.247567737695206,
"grad_norm": 2.418961423369967,
"learning_rate": 9.533360306761032e-06,
"loss": 0.6718,
"step": 60700
},
{
"epoch": 2.251270515704407,
"grad_norm": 2.0162128525744984,
"learning_rate": 9.530630401571006e-06,
"loss": 0.6757,
"step": 60800
},
{
"epoch": 2.2549732937136087,
"grad_norm": 1.7747207441276736,
"learning_rate": 9.527892927692819e-06,
"loss": 0.6895,
"step": 60900
},
{
"epoch": 2.2586760717228103,
"grad_norm": 2.3868183988130007,
"learning_rate": 9.525147889699587e-06,
"loss": 0.6982,
"step": 61000
},
{
"epoch": 2.2623788497320114,
"grad_norm": 2.370342809471196,
"learning_rate": 9.52239529217706e-06,
"loss": 0.6802,
"step": 61100
},
{
"epoch": 2.266081627741213,
"grad_norm": 2.001291795407346,
"learning_rate": 9.519635139723613e-06,
"loss": 0.6836,
"step": 61200
},
{
"epoch": 2.269784405750414,
"grad_norm": 2.4678585122972367,
"learning_rate": 9.516867436950247e-06,
"loss": 0.6709,
"step": 61300
},
{
"epoch": 2.273487183759616,
"grad_norm": 2.6005957215451754,
"learning_rate": 9.514092188480574e-06,
"loss": 0.6818,
"step": 61400
},
{
"epoch": 2.277189961768817,
"grad_norm": 2.920386936383934,
"learning_rate": 9.511309398950815e-06,
"loss": 0.7052,
"step": 61500
},
{
"epoch": 2.2808927397780185,
"grad_norm": 2.5403436437373865,
"learning_rate": 9.50851907300978e-06,
"loss": 0.6955,
"step": 61600
},
{
"epoch": 2.2845955177872197,
"grad_norm": 2.2492931914844347,
"learning_rate": 9.505721215318879e-06,
"loss": 0.6785,
"step": 61700
},
{
"epoch": 2.2882982957964213,
"grad_norm": 2.6880292587251047,
"learning_rate": 9.5029158305521e-06,
"loss": 0.68,
"step": 61800
},
{
"epoch": 2.292001073805623,
"grad_norm": 2.402997197868183,
"learning_rate": 9.500102923396004e-06,
"loss": 0.6927,
"step": 61900
},
{
"epoch": 2.295703851814824,
"grad_norm": 1.9864142200066779,
"learning_rate": 9.49728249854972e-06,
"loss": 0.6919,
"step": 62000
},
{
"epoch": 2.2994066298240257,
"grad_norm": 2.8456743709517163,
"learning_rate": 9.494454560724938e-06,
"loss": 0.6762,
"step": 62100
},
{
"epoch": 2.303109407833227,
"grad_norm": 2.7748514053291484,
"learning_rate": 9.491619114645892e-06,
"loss": 0.6777,
"step": 62200
},
{
"epoch": 2.3068121858424284,
"grad_norm": 2.559623553355795,
"learning_rate": 9.48877616504937e-06,
"loss": 0.6885,
"step": 62300
},
{
"epoch": 2.3105149638516296,
"grad_norm": 2.1469384529226008,
"learning_rate": 9.485925716684684e-06,
"loss": 0.7014,
"step": 62400
},
{
"epoch": 2.314217741860831,
"grad_norm": 2.6264777887477444,
"learning_rate": 9.48306777431368e-06,
"loss": 0.6778,
"step": 62500
},
{
"epoch": 2.3179205198700323,
"grad_norm": 2.621989964486446,
"learning_rate": 9.48020234271072e-06,
"loss": 0.6805,
"step": 62600
},
{
"epoch": 2.321623297879234,
"grad_norm": 2.272202713631239,
"learning_rate": 9.47732942666268e-06,
"loss": 0.6867,
"step": 62700
},
{
"epoch": 2.3253260758884355,
"grad_norm": 1.9762020868593124,
"learning_rate": 9.474449030968937e-06,
"loss": 0.6854,
"step": 62800
},
{
"epoch": 2.3290288538976367,
"grad_norm": 2.1226904406736984,
"learning_rate": 9.471561160441363e-06,
"loss": 0.6688,
"step": 62900
},
{
"epoch": 2.3327316319068383,
"grad_norm": 2.316689916305218,
"learning_rate": 9.468665819904317e-06,
"loss": 0.6951,
"step": 63000
},
{
"epoch": 2.3364344099160395,
"grad_norm": 2.316136479919069,
"learning_rate": 9.465763014194638e-06,
"loss": 0.6808,
"step": 63100
},
{
"epoch": 2.340137187925241,
"grad_norm": 2.307220790631874,
"learning_rate": 9.46285274816164e-06,
"loss": 0.6869,
"step": 63200
},
{
"epoch": 2.343839965934442,
"grad_norm": 2.213269812970463,
"learning_rate": 9.459935026667089e-06,
"loss": 0.6578,
"step": 63300
},
{
"epoch": 2.347542743943644,
"grad_norm": 2.344279831358738,
"learning_rate": 9.457009854585219e-06,
"loss": 0.6971,
"step": 63400
},
{
"epoch": 2.351245521952845,
"grad_norm": 2.0096880506357446,
"learning_rate": 9.454077236802702e-06,
"loss": 0.6828,
"step": 63500
},
{
"epoch": 2.3549482999620466,
"grad_norm": 2.2548311729082253,
"learning_rate": 9.45113717821865e-06,
"loss": 0.6727,
"step": 63600
},
{
"epoch": 2.358651077971248,
"grad_norm": 2.4341693614642996,
"learning_rate": 9.448189683744608e-06,
"loss": 0.6809,
"step": 63700
},
{
"epoch": 2.3623538559804493,
"grad_norm": 2.419848393996797,
"learning_rate": 9.445234758304537e-06,
"loss": 0.6928,
"step": 63800
},
{
"epoch": 2.3660566339896505,
"grad_norm": 2.7840357590734994,
"learning_rate": 9.442272406834823e-06,
"loss": 0.6698,
"step": 63900
},
{
"epoch": 2.369759411998852,
"grad_norm": 2.3936132840359665,
"learning_rate": 9.439302634284244e-06,
"loss": 0.6741,
"step": 64000
},
{
"epoch": 2.3734621900080537,
"grad_norm": 2.2628698955348923,
"learning_rate": 9.436325445613988e-06,
"loss": 0.6982,
"step": 64100
},
{
"epoch": 2.377164968017255,
"grad_norm": 2.5913137677554645,
"learning_rate": 9.43334084579762e-06,
"loss": 0.6843,
"step": 64200
},
{
"epoch": 2.3808677460264565,
"grad_norm": 2.5306835812838027,
"learning_rate": 9.430348839821095e-06,
"loss": 0.6931,
"step": 64300
},
{
"epoch": 2.3845705240356576,
"grad_norm": 1.824497906863608,
"learning_rate": 9.42734943268274e-06,
"loss": 0.6784,
"step": 64400
},
{
"epoch": 2.388273302044859,
"grad_norm": 2.031648470909946,
"learning_rate": 9.424342629393238e-06,
"loss": 0.6845,
"step": 64500
},
{
"epoch": 2.3919760800540604,
"grad_norm": 2.5806566539882274,
"learning_rate": 9.421328434975636e-06,
"loss": 0.6893,
"step": 64600
},
{
"epoch": 2.395678858063262,
"grad_norm": 2.1526340438291807,
"learning_rate": 9.418306854465327e-06,
"loss": 0.6973,
"step": 64700
},
{
"epoch": 2.399381636072463,
"grad_norm": 2.4285535244597702,
"learning_rate": 9.41527789291004e-06,
"loss": 0.7019,
"step": 64800
},
{
"epoch": 2.4030844140816647,
"grad_norm": 2.621188381463244,
"learning_rate": 9.412241555369834e-06,
"loss": 0.6653,
"step": 64900
},
{
"epoch": 2.4067871920908663,
"grad_norm": 2.224098798333827,
"learning_rate": 9.409197846917093e-06,
"loss": 0.6725,
"step": 65000
},
{
"epoch": 2.4104899701000675,
"grad_norm": 1.997533351532834,
"learning_rate": 9.406146772636516e-06,
"loss": 0.6812,
"step": 65100
},
{
"epoch": 2.414192748109269,
"grad_norm": 2.19098514780732,
"learning_rate": 9.403088337625099e-06,
"loss": 0.6677,
"step": 65200
},
{
"epoch": 2.4178955261184703,
"grad_norm": 2.1908878890803605,
"learning_rate": 9.400022546992148e-06,
"loss": 0.6813,
"step": 65300
},
{
"epoch": 2.421598304127672,
"grad_norm": 2.324540384353367,
"learning_rate": 9.396949405859239e-06,
"loss": 0.6579,
"step": 65400
},
{
"epoch": 2.425301082136873,
"grad_norm": 2.5054016122271374,
"learning_rate": 9.393868919360244e-06,
"loss": 0.6744,
"step": 65500
},
{
"epoch": 2.4290038601460746,
"grad_norm": 2.582887067658994,
"learning_rate": 9.390781092641301e-06,
"loss": 0.6913,
"step": 65600
},
{
"epoch": 2.4327066381552758,
"grad_norm": 2.2584713627681428,
"learning_rate": 9.387685930860804e-06,
"loss": 0.6645,
"step": 65700
},
{
"epoch": 2.4364094161644774,
"grad_norm": 2.202586980967711,
"learning_rate": 9.384583439189406e-06,
"loss": 0.6599,
"step": 65800
},
{
"epoch": 2.440112194173679,
"grad_norm": 2.0537705242407256,
"learning_rate": 9.381473622810005e-06,
"loss": 0.6524,
"step": 65900
},
{
"epoch": 2.44381497218288,
"grad_norm": 2.446679586314843,
"learning_rate": 9.378356486917736e-06,
"loss": 0.6586,
"step": 66000
},
{
"epoch": 2.4475177501920817,
"grad_norm": 2.3254324060908886,
"learning_rate": 9.37523203671996e-06,
"loss": 0.6716,
"step": 66100
},
{
"epoch": 2.451220528201283,
"grad_norm": 2.402871716965202,
"learning_rate": 9.372100277436253e-06,
"loss": 0.6771,
"step": 66200
},
{
"epoch": 2.4549233062104845,
"grad_norm": 2.4014908865791402,
"learning_rate": 9.368961214298414e-06,
"loss": 0.6892,
"step": 66300
},
{
"epoch": 2.4586260842196856,
"grad_norm": 2.309859916718413,
"learning_rate": 9.365814852550426e-06,
"loss": 0.6725,
"step": 66400
},
{
"epoch": 2.4623288622288872,
"grad_norm": 2.657756967242288,
"learning_rate": 9.36266119744848e-06,
"loss": 0.6835,
"step": 66500
},
{
"epoch": 2.4660316402380884,
"grad_norm": 2.3060608989482327,
"learning_rate": 9.35950025426094e-06,
"loss": 0.6694,
"step": 66600
},
{
"epoch": 2.46973441824729,
"grad_norm": 1.9200073351424498,
"learning_rate": 9.356332028268356e-06,
"loss": 0.6725,
"step": 66700
},
{
"epoch": 2.4734371962564916,
"grad_norm": 1.7930879502348702,
"learning_rate": 9.353156524763433e-06,
"loss": 0.6674,
"step": 66800
},
{
"epoch": 2.4771399742656928,
"grad_norm": 2.312137593139913,
"learning_rate": 9.349973749051042e-06,
"loss": 0.665,
"step": 66900
},
{
"epoch": 2.4808427522748944,
"grad_norm": 2.7119648286693536,
"learning_rate": 9.346783706448199e-06,
"loss": 0.6925,
"step": 67000
},
{
"epoch": 2.4845455302840955,
"grad_norm": 2.356555621714717,
"learning_rate": 9.343586402284061e-06,
"loss": 0.6774,
"step": 67100
},
{
"epoch": 2.488248308293297,
"grad_norm": 3.4467386366257196,
"learning_rate": 9.340381841899913e-06,
"loss": 0.6907,
"step": 67200
},
{
"epoch": 2.4919510863024983,
"grad_norm": 2.5874996764431,
"learning_rate": 9.337170030649166e-06,
"loss": 0.6808,
"step": 67300
},
{
"epoch": 2.4956538643117,
"grad_norm": 2.4262991872836093,
"learning_rate": 9.33395097389734e-06,
"loss": 0.6714,
"step": 67400
},
{
"epoch": 2.499356642320901,
"grad_norm": 2.2026220733741737,
"learning_rate": 9.330724677022063e-06,
"loss": 0.6798,
"step": 67500
},
{
"epoch": 2.5030594203301026,
"grad_norm": 2.1788581179144395,
"learning_rate": 9.327491145413057e-06,
"loss": 0.6811,
"step": 67600
},
{
"epoch": 2.5067621983393042,
"grad_norm": 2.275441203213566,
"learning_rate": 9.324250384472127e-06,
"loss": 0.6627,
"step": 67700
},
{
"epoch": 2.5104649763485054,
"grad_norm": 2.283344949810879,
"learning_rate": 9.32100239961316e-06,
"loss": 0.6642,
"step": 67800
},
{
"epoch": 2.514167754357707,
"grad_norm": 2.4267848884723167,
"learning_rate": 9.317747196262105e-06,
"loss": 0.6787,
"step": 67900
},
{
"epoch": 2.517870532366908,
"grad_norm": 2.5953019278693965,
"learning_rate": 9.314484779856977e-06,
"loss": 0.6737,
"step": 68000
},
{
"epoch": 2.5215733103761098,
"grad_norm": 2.036941282735882,
"learning_rate": 9.311215155847834e-06,
"loss": 0.6589,
"step": 68100
},
{
"epoch": 2.525276088385311,
"grad_norm": 2.597845885761239,
"learning_rate": 9.30793832969678e-06,
"loss": 0.6717,
"step": 68200
},
{
"epoch": 2.5289788663945125,
"grad_norm": 2.4622763848737774,
"learning_rate": 9.304654306877946e-06,
"loss": 0.6897,
"step": 68300
},
{
"epoch": 2.5326816444037137,
"grad_norm": 2.2606318900396047,
"learning_rate": 9.30136309287749e-06,
"loss": 0.6811,
"step": 68400
},
{
"epoch": 2.5363844224129153,
"grad_norm": 2.4860591476196423,
"learning_rate": 9.298064693193581e-06,
"loss": 0.6776,
"step": 68500
},
{
"epoch": 2.540087200422117,
"grad_norm": 2.4829377853240837,
"learning_rate": 9.29475911333639e-06,
"loss": 0.7002,
"step": 68600
},
{
"epoch": 2.543789978431318,
"grad_norm": 2.367231988606884,
"learning_rate": 9.291446358828091e-06,
"loss": 0.6675,
"step": 68700
},
{
"epoch": 2.5474927564405196,
"grad_norm": 2.4582997910649484,
"learning_rate": 9.288126435202831e-06,
"loss": 0.6656,
"step": 68800
},
{
"epoch": 2.551195534449721,
"grad_norm": 2.0143015209204185,
"learning_rate": 9.284799348006743e-06,
"loss": 0.6811,
"step": 68900
},
{
"epoch": 2.5548983124589224,
"grad_norm": 2.9078889050531473,
"learning_rate": 9.281465102797926e-06,
"loss": 0.677,
"step": 69000
},
{
"epoch": 2.5586010904681236,
"grad_norm": 2.2408419976033693,
"learning_rate": 9.278123705146434e-06,
"loss": 0.6884,
"step": 69100
},
{
"epoch": 2.562303868477325,
"grad_norm": 2.626201962148744,
"learning_rate": 9.27477516063427e-06,
"loss": 0.6612,
"step": 69200
},
{
"epoch": 2.5660066464865263,
"grad_norm": 2.1552325508134134,
"learning_rate": 9.271419474855377e-06,
"loss": 0.666,
"step": 69300
},
{
"epoch": 2.569709424495728,
"grad_norm": 2.3227195866166768,
"learning_rate": 9.268056653415632e-06,
"loss": 0.6652,
"step": 69400
},
{
"epoch": 2.5734122025049295,
"grad_norm": 2.753551170952296,
"learning_rate": 9.264686701932825e-06,
"loss": 0.6791,
"step": 69500
},
{
"epoch": 2.5771149805141307,
"grad_norm": 2.2976640888247415,
"learning_rate": 9.261309626036661e-06,
"loss": 0.6705,
"step": 69600
},
{
"epoch": 2.5808177585233323,
"grad_norm": 2.339779663329093,
"learning_rate": 9.257925431368749e-06,
"loss": 0.6669,
"step": 69700
},
{
"epoch": 2.5845205365325334,
"grad_norm": 2.1019636120259695,
"learning_rate": 9.254534123582585e-06,
"loss": 0.6734,
"step": 69800
},
{
"epoch": 2.588223314541735,
"grad_norm": 2.2977009075813744,
"learning_rate": 9.251135708343555e-06,
"loss": 0.6724,
"step": 69900
},
{
"epoch": 2.591926092550936,
"grad_norm": 2.1726587191847386,
"learning_rate": 9.247730191328908e-06,
"loss": 0.686,
"step": 70000
},
{
"epoch": 2.595628870560138,
"grad_norm": 2.2059169127907907,
"learning_rate": 9.244317578227769e-06,
"loss": 0.6829,
"step": 70100
},
{
"epoch": 2.599331648569339,
"grad_norm": 2.2693764246927843,
"learning_rate": 9.240897874741108e-06,
"loss": 0.6706,
"step": 70200
},
{
"epoch": 2.6030344265785406,
"grad_norm": 2.3773955458790192,
"learning_rate": 9.237471086581744e-06,
"loss": 0.6403,
"step": 70300
},
{
"epoch": 2.606737204587742,
"grad_norm": 2.376894341944025,
"learning_rate": 9.234037219474332e-06,
"loss": 0.6556,
"step": 70400
},
{
"epoch": 2.6104399825969433,
"grad_norm": 2.2164027402226756,
"learning_rate": 9.230596279155353e-06,
"loss": 0.6677,
"step": 70500
},
{
"epoch": 2.614142760606145,
"grad_norm": 2.6004295191699596,
"learning_rate": 9.227148271373102e-06,
"loss": 0.6656,
"step": 70600
},
{
"epoch": 2.617845538615346,
"grad_norm": 2.2941992876843145,
"learning_rate": 9.223693201887677e-06,
"loss": 0.671,
"step": 70700
},
{
"epoch": 2.6215483166245477,
"grad_norm": 2.423457996166192,
"learning_rate": 9.220231076470985e-06,
"loss": 0.671,
"step": 70800
},
{
"epoch": 2.625251094633749,
"grad_norm": 2.028224896189644,
"learning_rate": 9.216761900906707e-06,
"loss": 0.6633,
"step": 70900
},
{
"epoch": 2.6289538726429504,
"grad_norm": 2.8043596176994234,
"learning_rate": 9.213285680990311e-06,
"loss": 0.6733,
"step": 71000
},
{
"epoch": 2.6326566506521516,
"grad_norm": 2.631240157715802,
"learning_rate": 9.209802422529028e-06,
"loss": 0.6694,
"step": 71100
},
{
"epoch": 2.636359428661353,
"grad_norm": 3.27526490574497,
"learning_rate": 9.206312131341848e-06,
"loss": 0.6736,
"step": 71200
},
{
"epoch": 2.640062206670555,
"grad_norm": 2.2969788418244734,
"learning_rate": 9.202814813259514e-06,
"loss": 0.6685,
"step": 71300
},
{
"epoch": 2.643764984679756,
"grad_norm": 2.0448759395992693,
"learning_rate": 9.199310474124501e-06,
"loss": 0.6734,
"step": 71400
},
{
"epoch": 2.647467762688957,
"grad_norm": 2.194888247981071,
"learning_rate": 9.195799119791018e-06,
"loss": 0.6853,
"step": 71500
},
{
"epoch": 2.6511705406981587,
"grad_norm": 2.3157294382898037,
"learning_rate": 9.19228075612499e-06,
"loss": 0.6936,
"step": 71600
},
{
"epoch": 2.6548733187073603,
"grad_norm": 2.2600268640470516,
"learning_rate": 9.188755389004056e-06,
"loss": 0.6482,
"step": 71700
},
{
"epoch": 2.6585760967165615,
"grad_norm": 2.7867346539584026,
"learning_rate": 9.18522302431755e-06,
"loss": 0.6736,
"step": 71800
},
{
"epoch": 2.662278874725763,
"grad_norm": 2.4244549380103284,
"learning_rate": 9.181683667966497e-06,
"loss": 0.6612,
"step": 71900
},
{
"epoch": 2.6659816527349642,
"grad_norm": 2.467628082595294,
"learning_rate": 9.178137325863606e-06,
"loss": 0.662,
"step": 72000
},
{
"epoch": 2.669684430744166,
"grad_norm": 1.991818914003808,
"learning_rate": 9.17458400393325e-06,
"loss": 0.6546,
"step": 72100
},
{
"epoch": 2.6733872087533674,
"grad_norm": 2.6084822187687893,
"learning_rate": 9.171023708111467e-06,
"loss": 0.6707,
"step": 72200
},
{
"epoch": 2.6770899867625686,
"grad_norm": 2.4759759139487674,
"learning_rate": 9.16745644434594e-06,
"loss": 0.6589,
"step": 72300
},
{
"epoch": 2.6807927647717698,
"grad_norm": 2.417557023636743,
"learning_rate": 9.163882218595998e-06,
"loss": 0.6692,
"step": 72400
},
{
"epoch": 2.6844955427809714,
"grad_norm": 2.4757717167657303,
"learning_rate": 9.160301036832601e-06,
"loss": 0.6824,
"step": 72500
},
{
"epoch": 2.688198320790173,
"grad_norm": 1.7864811103086602,
"learning_rate": 9.156712905038324e-06,
"loss": 0.6549,
"step": 72600
},
{
"epoch": 2.691901098799374,
"grad_norm": 1.931145360031176,
"learning_rate": 9.153117829207353e-06,
"loss": 0.6707,
"step": 72700
},
{
"epoch": 2.6956038768085757,
"grad_norm": 2.6583751811214515,
"learning_rate": 9.149515815345477e-06,
"loss": 0.6746,
"step": 72800
},
{
"epoch": 2.699306654817777,
"grad_norm": 2.3434065726826874,
"learning_rate": 9.14590686947008e-06,
"loss": 0.6746,
"step": 72900
},
{
"epoch": 2.7030094328269785,
"grad_norm": 2.1951946075529003,
"learning_rate": 9.142290997610114e-06,
"loss": 0.672,
"step": 73000
},
{
"epoch": 2.70671221083618,
"grad_norm": 2.1266717085417715,
"learning_rate": 9.138668205806116e-06,
"loss": 0.6596,
"step": 73100
},
{
"epoch": 2.7104149888453812,
"grad_norm": 2.052871241822731,
"learning_rate": 9.135038500110169e-06,
"loss": 0.6562,
"step": 73200
},
{
"epoch": 2.7141177668545824,
"grad_norm": 2.3890278232506144,
"learning_rate": 9.131401886585916e-06,
"loss": 0.6791,
"step": 73300
},
{
"epoch": 2.717820544863784,
"grad_norm": 2.339795856765528,
"learning_rate": 9.127758371308537e-06,
"loss": 0.6769,
"step": 73400
},
{
"epoch": 2.7215233228729856,
"grad_norm": 2.0980772669298946,
"learning_rate": 9.124107960364738e-06,
"loss": 0.687,
"step": 73500
},
{
"epoch": 2.7252261008821868,
"grad_norm": 1.9520671316508236,
"learning_rate": 9.120450659852754e-06,
"loss": 0.6619,
"step": 73600
},
{
"epoch": 2.7289288788913884,
"grad_norm": 1.9489571927723024,
"learning_rate": 9.116786475882318e-06,
"loss": 0.6643,
"step": 73700
},
{
"epoch": 2.7326316569005895,
"grad_norm": 2.1143535490363963,
"learning_rate": 9.11311541457467e-06,
"loss": 0.6647,
"step": 73800
},
{
"epoch": 2.736334434909791,
"grad_norm": 2.4454265529124415,
"learning_rate": 9.109437482062538e-06,
"loss": 0.6791,
"step": 73900
},
{
"epoch": 2.7400372129189927,
"grad_norm": 2.12417553054465,
"learning_rate": 9.105752684490125e-06,
"loss": 0.6751,
"step": 74000
},
{
"epoch": 2.743739990928194,
"grad_norm": 3.3698294360651286,
"learning_rate": 9.102061028013108e-06,
"loss": 0.6805,
"step": 74100
},
{
"epoch": 2.747442768937395,
"grad_norm": 2.6079682276880694,
"learning_rate": 9.098362518798615e-06,
"loss": 0.6542,
"step": 74200
},
{
"epoch": 2.7511455469465966,
"grad_norm": 2.9285278794017167,
"learning_rate": 9.094657163025228e-06,
"loss": 0.6798,
"step": 74300
},
{
"epoch": 2.7548483249557982,
"grad_norm": 2.029512121868359,
"learning_rate": 9.090944966882968e-06,
"loss": 0.6716,
"step": 74400
},
{
"epoch": 2.7585511029649994,
"grad_norm": 2.37703823122831,
"learning_rate": 9.087225936573275e-06,
"loss": 0.6664,
"step": 74500
},
{
"epoch": 2.762253880974201,
"grad_norm": 2.290740875061313,
"learning_rate": 9.083500078309013e-06,
"loss": 0.7054,
"step": 74600
},
{
"epoch": 2.765956658983402,
"grad_norm": 1.9826452203518832,
"learning_rate": 9.079767398314452e-06,
"loss": 0.6574,
"step": 74700
},
{
"epoch": 2.7696594369926038,
"grad_norm": 2.510390912417119,
"learning_rate": 9.076027902825252e-06,
"loss": 0.6573,
"step": 74800
},
{
"epoch": 2.7733622150018054,
"grad_norm": 3.253767602420802,
"learning_rate": 9.072281598088467e-06,
"loss": 0.6565,
"step": 74900
},
{
"epoch": 2.7770649930110065,
"grad_norm": 3.1743643654172278,
"learning_rate": 9.068528490362524e-06,
"loss": 0.6636,
"step": 75000
},
{
"epoch": 2.7807677710202077,
"grad_norm": 2.9292198577340463,
"learning_rate": 9.064768585917207e-06,
"loss": 0.6763,
"step": 75100
},
{
"epoch": 2.7844705490294093,
"grad_norm": 2.3225242842709766,
"learning_rate": 9.061001891033666e-06,
"loss": 0.6696,
"step": 75200
},
{
"epoch": 2.788173327038611,
"grad_norm": 2.272648856356267,
"learning_rate": 9.057228412004386e-06,
"loss": 0.6585,
"step": 75300
},
{
"epoch": 2.791876105047812,
"grad_norm": 2.708064532509065,
"learning_rate": 9.053448155133192e-06,
"loss": 0.6674,
"step": 75400
},
{
"epoch": 2.7955788830570136,
"grad_norm": 2.0878561841156706,
"learning_rate": 9.049661126735223e-06,
"loss": 0.6523,
"step": 75500
},
{
"epoch": 2.799281661066215,
"grad_norm": 2.0218162021372637,
"learning_rate": 9.045867333136939e-06,
"loss": 0.667,
"step": 75600
},
{
"epoch": 2.8029844390754164,
"grad_norm": 1.9615749815202044,
"learning_rate": 9.042066780676101e-06,
"loss": 0.6644,
"step": 75700
},
{
"epoch": 2.806687217084618,
"grad_norm": 2.458125241194594,
"learning_rate": 9.038259475701756e-06,
"loss": 0.6592,
"step": 75800
},
{
"epoch": 2.810389995093819,
"grad_norm": 2.5321957606480887,
"learning_rate": 9.034445424574232e-06,
"loss": 0.6542,
"step": 75900
},
{
"epoch": 2.8140927731030203,
"grad_norm": 2.305578502814208,
"learning_rate": 9.030624633665131e-06,
"loss": 0.6626,
"step": 76000
},
{
"epoch": 2.817795551112222,
"grad_norm": 2.304093777477429,
"learning_rate": 9.026797109357313e-06,
"loss": 0.6585,
"step": 76100
},
{
"epoch": 2.8214983291214235,
"grad_norm": 1.9063487829056964,
"learning_rate": 9.022962858044881e-06,
"loss": 0.6634,
"step": 76200
},
{
"epoch": 2.8252011071306247,
"grad_norm": 2.4605756536089998,
"learning_rate": 9.019121886133185e-06,
"loss": 0.659,
"step": 76300
},
{
"epoch": 2.8289038851398263,
"grad_norm": 2.908256690477109,
"learning_rate": 9.015274200038798e-06,
"loss": 0.6873,
"step": 76400
},
{
"epoch": 2.8326066631490274,
"grad_norm": 2.195376131615668,
"learning_rate": 9.011419806189503e-06,
"loss": 0.6786,
"step": 76500
},
{
"epoch": 2.836309441158229,
"grad_norm": 2.4481520740229588,
"learning_rate": 9.0075587110243e-06,
"loss": 0.6586,
"step": 76600
},
{
"epoch": 2.8400122191674306,
"grad_norm": 2.5436298766851024,
"learning_rate": 9.003690920993378e-06,
"loss": 0.6732,
"step": 76700
},
{
"epoch": 2.843714997176632,
"grad_norm": 2.0233903955790664,
"learning_rate": 8.999816442558112e-06,
"loss": 0.6694,
"step": 76800
},
{
"epoch": 2.847417775185833,
"grad_norm": 1.9592757597831238,
"learning_rate": 8.995935282191044e-06,
"loss": 0.642,
"step": 76900
},
{
"epoch": 2.8511205531950345,
"grad_norm": 2.4999659621973676,
"learning_rate": 8.992047446375887e-06,
"loss": 0.6758,
"step": 77000
},
{
"epoch": 2.854823331204236,
"grad_norm": 2.320920562047208,
"learning_rate": 8.988152941607505e-06,
"loss": 0.6686,
"step": 77100
},
{
"epoch": 2.8585261092134373,
"grad_norm": 2.180371204577853,
"learning_rate": 8.984251774391895e-06,
"loss": 0.6572,
"step": 77200
},
{
"epoch": 2.862228887222639,
"grad_norm": 2.548377630577026,
"learning_rate": 8.980343951246193e-06,
"loss": 0.6858,
"step": 77300
},
{
"epoch": 2.86593166523184,
"grad_norm": 2.2916044435835023,
"learning_rate": 8.976429478698651e-06,
"loss": 0.6612,
"step": 77400
},
{
"epoch": 2.8696344432410417,
"grad_norm": 2.137867387232337,
"learning_rate": 8.972508363288627e-06,
"loss": 0.656,
"step": 77500
},
{
"epoch": 2.8733372212502433,
"grad_norm": 2.6319833480679713,
"learning_rate": 8.968580611566578e-06,
"loss": 0.6505,
"step": 77600
},
{
"epoch": 2.8770399992594444,
"grad_norm": 2.1088025728984907,
"learning_rate": 8.96464623009405e-06,
"loss": 0.6667,
"step": 77700
},
{
"epoch": 2.8807427772686456,
"grad_norm": 1.9521003147155882,
"learning_rate": 8.960705225443657e-06,
"loss": 0.6596,
"step": 77800
},
{
"epoch": 2.884445555277847,
"grad_norm": 2.5972066347938294,
"learning_rate": 8.956757604199085e-06,
"loss": 0.6545,
"step": 77900
},
{
"epoch": 2.888148333287049,
"grad_norm": 2.4786047868289964,
"learning_rate": 8.952803372955073e-06,
"loss": 0.6722,
"step": 78000
},
{
"epoch": 2.89185111129625,
"grad_norm": 2.2514808731629112,
"learning_rate": 8.948842538317395e-06,
"loss": 0.6556,
"step": 78100
},
{
"epoch": 2.8955538893054515,
"grad_norm": 2.365087481495297,
"learning_rate": 8.944875106902864e-06,
"loss": 0.6482,
"step": 78200
},
{
"epoch": 2.8992566673146527,
"grad_norm": 2.452402390597274,
"learning_rate": 8.94090108533931e-06,
"loss": 0.6893,
"step": 78300
},
{
"epoch": 2.9029594453238543,
"grad_norm": 2.1846111061646885,
"learning_rate": 8.936920480265576e-06,
"loss": 0.6565,
"step": 78400
},
{
"epoch": 2.9066622233330555,
"grad_norm": 2.5440937876149907,
"learning_rate": 8.932933298331496e-06,
"loss": 0.6731,
"step": 78500
},
{
"epoch": 2.910365001342257,
"grad_norm": 2.1791116047812125,
"learning_rate": 8.928939546197897e-06,
"loss": 0.6747,
"step": 78600
},
{
"epoch": 2.914067779351458,
"grad_norm": 2.5381792077290934,
"learning_rate": 8.92493923053658e-06,
"loss": 0.6759,
"step": 78700
},
{
"epoch": 2.91777055736066,
"grad_norm": 2.3236635792732137,
"learning_rate": 8.920932358030309e-06,
"loss": 0.6675,
"step": 78800
},
{
"epoch": 2.9214733353698614,
"grad_norm": 1.9029464622582775,
"learning_rate": 8.916918935372805e-06,
"loss": 0.6634,
"step": 78900
},
{
"epoch": 2.9251761133790626,
"grad_norm": 2.224078219093189,
"learning_rate": 8.912898969268731e-06,
"loss": 0.6546,
"step": 79000
},
{
"epoch": 2.928878891388264,
"grad_norm": 2.9148804782966233,
"learning_rate": 8.908872466433677e-06,
"loss": 0.6549,
"step": 79100
},
{
"epoch": 2.9325816693974653,
"grad_norm": 2.4717406257998773,
"learning_rate": 8.904839433594158e-06,
"loss": 0.6522,
"step": 79200
},
{
"epoch": 2.936284447406667,
"grad_norm": 2.6821434461084896,
"learning_rate": 8.900799877487595e-06,
"loss": 0.669,
"step": 79300
},
{
"epoch": 2.939987225415868,
"grad_norm": 2.5288488175630057,
"learning_rate": 8.896753804862308e-06,
"loss": 0.6675,
"step": 79400
},
{
"epoch": 2.9436900034250697,
"grad_norm": 2.3118984656483748,
"learning_rate": 8.892701222477503e-06,
"loss": 0.6428,
"step": 79500
},
{
"epoch": 2.947392781434271,
"grad_norm": 1.7707450134385863,
"learning_rate": 8.888642137103258e-06,
"loss": 0.6423,
"step": 79600
},
{
"epoch": 2.9510955594434725,
"grad_norm": 2.7951973513737016,
"learning_rate": 8.884576555520521e-06,
"loss": 0.6666,
"step": 79700
},
{
"epoch": 2.954798337452674,
"grad_norm": 1.9441758598215642,
"learning_rate": 8.880504484521084e-06,
"loss": 0.6911,
"step": 79800
},
{
"epoch": 2.958501115461875,
"grad_norm": 2.311415822913053,
"learning_rate": 8.876425930907587e-06,
"loss": 0.69,
"step": 79900
},
{
"epoch": 2.962203893471077,
"grad_norm": 1.962196622233137,
"learning_rate": 8.872340901493496e-06,
"loss": 0.6991,
"step": 80000
},
{
"epoch": 2.965906671480278,
"grad_norm": 2.2188989804402635,
"learning_rate": 8.868249403103098e-06,
"loss": 0.6512,
"step": 80100
},
{
"epoch": 2.9696094494894796,
"grad_norm": 2.0738562772495217,
"learning_rate": 8.864151442571481e-06,
"loss": 0.6673,
"step": 80200
},
{
"epoch": 2.9733122274986807,
"grad_norm": 2.45682348863258,
"learning_rate": 8.860047026744535e-06,
"loss": 0.6488,
"step": 80300
},
{
"epoch": 2.9770150055078823,
"grad_norm": 2.876210559752475,
"learning_rate": 8.855936162478933e-06,
"loss": 0.641,
"step": 80400
},
{
"epoch": 2.9807177835170835,
"grad_norm": 2.113010077915775,
"learning_rate": 8.851818856642116e-06,
"loss": 0.6482,
"step": 80500
},
{
"epoch": 2.984420561526285,
"grad_norm": 2.2593684990909297,
"learning_rate": 8.84769511611229e-06,
"loss": 0.6596,
"step": 80600
},
{
"epoch": 2.9881233395354867,
"grad_norm": 2.213052710368658,
"learning_rate": 8.843564947778408e-06,
"loss": 0.6674,
"step": 80700
},
{
"epoch": 2.991826117544688,
"grad_norm": 1.9824851077389378,
"learning_rate": 8.839428358540165e-06,
"loss": 0.6606,
"step": 80800
},
{
"epoch": 2.9955288955538895,
"grad_norm": 1.8350785430581344,
"learning_rate": 8.835285355307979e-06,
"loss": 0.6625,
"step": 80900
},
{
"epoch": 2.9992316735630906,
"grad_norm": 2.2196935514359537,
"learning_rate": 8.831135945002982e-06,
"loss": 0.6483,
"step": 81000
},
{
"epoch": 3.0029251946272693,
"grad_norm": 2.180481700028787,
"learning_rate": 8.826980134557012e-06,
"loss": 0.5716,
"step": 81100
},
{
"epoch": 3.0066279726364704,
"grad_norm": 2.3154128557009166,
"learning_rate": 8.8228179309126e-06,
"loss": 0.5747,
"step": 81200
},
{
"epoch": 3.010330750645672,
"grad_norm": 2.5911631549986316,
"learning_rate": 8.818649341022954e-06,
"loss": 0.5708,
"step": 81300
},
{
"epoch": 3.014033528654873,
"grad_norm": 2.172878251158029,
"learning_rate": 8.81447437185195e-06,
"loss": 0.586,
"step": 81400
},
{
"epoch": 3.0177363066640748,
"grad_norm": 2.285708121202155,
"learning_rate": 8.810293030374126e-06,
"loss": 0.5279,
"step": 81500
},
{
"epoch": 3.021439084673276,
"grad_norm": 1.8325067800290862,
"learning_rate": 8.80610532357466e-06,
"loss": 0.5743,
"step": 81600
},
{
"epoch": 3.0251418626824775,
"grad_norm": 2.4724163520836617,
"learning_rate": 8.801911258449367e-06,
"loss": 0.5686,
"step": 81700
},
{
"epoch": 3.028844640691679,
"grad_norm": 2.8204386478402657,
"learning_rate": 8.797710842004683e-06,
"loss": 0.5661,
"step": 81800
},
{
"epoch": 3.0325474187008803,
"grad_norm": 2.1624621580723504,
"learning_rate": 8.793504081257653e-06,
"loss": 0.5609,
"step": 81900
},
{
"epoch": 3.036250196710082,
"grad_norm": 1.9578194242090217,
"learning_rate": 8.789290983235925e-06,
"loss": 0.5557,
"step": 82000
}
],
"logging_steps": 100,
"max_steps": 270070,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2119412823359488.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}