Inheritance-Thematic / trainer_state.json
Pclanglais's picture
Upload folder using huggingface_hub
39a7ec8 verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_global_step": 203490,
"best_metric": 0.6936652741069145,
"best_model_checkpoint": "modernbert-heritage-category/checkpoint-203490",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 203490,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007371369600471768,
"grad_norm": 1.2525012493133545,
"learning_rate": 1.995105410585287e-05,
"loss": 0.1758,
"step": 500
},
{
"epoch": 0.014742739200943536,
"grad_norm": 0.9046939611434937,
"learning_rate": 1.990200992677773e-05,
"loss": 0.1536,
"step": 1000
},
{
"epoch": 0.022114108801415303,
"grad_norm": 1.5520201921463013,
"learning_rate": 1.9852867462774585e-05,
"loss": 0.1429,
"step": 1500
},
{
"epoch": 0.02948547840188707,
"grad_norm": 2.7046384811401367,
"learning_rate": 1.980372499877144e-05,
"loss": 0.1292,
"step": 2000
},
{
"epoch": 0.03685684800235884,
"grad_norm": 0.7337467670440674,
"learning_rate": 1.9754582534768295e-05,
"loss": 0.1229,
"step": 2500
},
{
"epoch": 0.044228217602830605,
"grad_norm": 0.6729874610900879,
"learning_rate": 1.970544007076515e-05,
"loss": 0.1193,
"step": 3000
},
{
"epoch": 0.05159958720330237,
"grad_norm": 6.235721111297607,
"learning_rate": 1.9656592461546026e-05,
"loss": 0.1182,
"step": 3500
},
{
"epoch": 0.05897095680377414,
"grad_norm": 0.02936830371618271,
"learning_rate": 1.9607449997542877e-05,
"loss": 0.1183,
"step": 4000
},
{
"epoch": 0.06634232640424591,
"grad_norm": 0.028409462422132492,
"learning_rate": 1.9558307533539732e-05,
"loss": 0.1164,
"step": 4500
},
{
"epoch": 0.07371369600471768,
"grad_norm": 4.09944486618042,
"learning_rate": 1.9509165069536587e-05,
"loss": 0.1053,
"step": 5000
},
{
"epoch": 0.08108506560518944,
"grad_norm": 3.7119829654693604,
"learning_rate": 1.9460022605533442e-05,
"loss": 0.1118,
"step": 5500
},
{
"epoch": 0.08845643520566121,
"grad_norm": 1.3968170881271362,
"learning_rate": 1.9410880141530297e-05,
"loss": 0.1092,
"step": 6000
},
{
"epoch": 0.09582780480613298,
"grad_norm": 1.2682095766067505,
"learning_rate": 1.9361737677527152e-05,
"loss": 0.107,
"step": 6500
},
{
"epoch": 0.10319917440660474,
"grad_norm": 1.898970127105713,
"learning_rate": 1.9312595213524007e-05,
"loss": 0.106,
"step": 7000
},
{
"epoch": 0.11057054400707651,
"grad_norm": 1.1229842901229858,
"learning_rate": 1.9263452749520862e-05,
"loss": 0.1066,
"step": 7500
},
{
"epoch": 0.11794191360754828,
"grad_norm": 1.5770803689956665,
"learning_rate": 1.9214310285517717e-05,
"loss": 0.1008,
"step": 8000
},
{
"epoch": 0.12531328320802004,
"grad_norm": 2.80481219291687,
"learning_rate": 1.9165167821514572e-05,
"loss": 0.1115,
"step": 8500
},
{
"epoch": 0.13268465280849182,
"grad_norm": 0.8099410533905029,
"learning_rate": 1.9116025357511427e-05,
"loss": 0.0979,
"step": 9000
},
{
"epoch": 0.1400560224089636,
"grad_norm": 2.5966243743896484,
"learning_rate": 1.9066882893508282e-05,
"loss": 0.1111,
"step": 9500
},
{
"epoch": 0.14742739200943536,
"grad_norm": 0.39728572964668274,
"learning_rate": 1.9017740429505137e-05,
"loss": 0.1014,
"step": 10000
},
{
"epoch": 0.15479876160990713,
"grad_norm": 0.4020085334777832,
"learning_rate": 1.896869625043e-05,
"loss": 0.0956,
"step": 10500
},
{
"epoch": 0.16217013121037888,
"grad_norm": 1.247157335281372,
"learning_rate": 1.891955378642685e-05,
"loss": 0.1054,
"step": 11000
},
{
"epoch": 0.16954150081085065,
"grad_norm": 0.4768887758255005,
"learning_rate": 1.8870411322423706e-05,
"loss": 0.0981,
"step": 11500
},
{
"epoch": 0.17691287041132242,
"grad_norm": 2.418336868286133,
"learning_rate": 1.882126885842056e-05,
"loss": 0.0936,
"step": 12000
},
{
"epoch": 0.1842842400117942,
"grad_norm": 0.27637165784835815,
"learning_rate": 1.877212639441742e-05,
"loss": 0.0975,
"step": 12500
},
{
"epoch": 0.19165560961226596,
"grad_norm": 0.23639962077140808,
"learning_rate": 1.8722983930414275e-05,
"loss": 0.0915,
"step": 13000
},
{
"epoch": 0.19902697921273774,
"grad_norm": 0.8892920017242432,
"learning_rate": 1.8673841466411126e-05,
"loss": 0.1,
"step": 13500
},
{
"epoch": 0.20639834881320948,
"grad_norm": 3.2099547386169434,
"learning_rate": 1.862469900240798e-05,
"loss": 0.0965,
"step": 14000
},
{
"epoch": 0.21376971841368125,
"grad_norm": 0.9265658259391785,
"learning_rate": 1.8575556538404836e-05,
"loss": 0.0988,
"step": 14500
},
{
"epoch": 0.22114108801415303,
"grad_norm": 1.8521679639816284,
"learning_rate": 1.8526414074401695e-05,
"loss": 0.0951,
"step": 15000
},
{
"epoch": 0.2285124576146248,
"grad_norm": 2.191715717315674,
"learning_rate": 1.8477369895326553e-05,
"loss": 0.1037,
"step": 15500
},
{
"epoch": 0.23588382721509657,
"grad_norm": 0.10625209659337997,
"learning_rate": 1.8428227431323408e-05,
"loss": 0.093,
"step": 16000
},
{
"epoch": 0.24325519681556834,
"grad_norm": 4.059609413146973,
"learning_rate": 1.8379084967320263e-05,
"loss": 0.0931,
"step": 16500
},
{
"epoch": 0.2506265664160401,
"grad_norm": 0.341007798910141,
"learning_rate": 1.8329942503317118e-05,
"loss": 0.0877,
"step": 17000
},
{
"epoch": 0.2579979360165119,
"grad_norm": 0.16973993182182312,
"learning_rate": 1.8280800039313973e-05,
"loss": 0.0981,
"step": 17500
},
{
"epoch": 0.26536930561698363,
"grad_norm": 0.20512279868125916,
"learning_rate": 1.8231657575310828e-05,
"loss": 0.0944,
"step": 18000
},
{
"epoch": 0.27274067521745543,
"grad_norm": 5.859679222106934,
"learning_rate": 1.818251511130768e-05,
"loss": 0.0952,
"step": 18500
},
{
"epoch": 0.2801120448179272,
"grad_norm": 0.06838594377040863,
"learning_rate": 1.8133470932232545e-05,
"loss": 0.0891,
"step": 19000
},
{
"epoch": 0.2874834144183989,
"grad_norm": 1.6491619348526,
"learning_rate": 1.80843284682294e-05,
"loss": 0.095,
"step": 19500
},
{
"epoch": 0.2948547840188707,
"grad_norm": 0.21653395891189575,
"learning_rate": 1.8035186004226252e-05,
"loss": 0.0925,
"step": 20000
},
{
"epoch": 0.30222615361934246,
"grad_norm": 0.1839127540588379,
"learning_rate": 1.7986043540223107e-05,
"loss": 0.0874,
"step": 20500
},
{
"epoch": 0.30959752321981426,
"grad_norm": 1.7096320390701294,
"learning_rate": 1.7936901076219962e-05,
"loss": 0.0921,
"step": 21000
},
{
"epoch": 0.316968892820286,
"grad_norm": 0.013913823291659355,
"learning_rate": 1.788775861221682e-05,
"loss": 0.0879,
"step": 21500
},
{
"epoch": 0.32434026242075775,
"grad_norm": 2.2390196323394775,
"learning_rate": 1.7838616148213672e-05,
"loss": 0.0988,
"step": 22000
},
{
"epoch": 0.33171163202122955,
"grad_norm": 1.1112462282180786,
"learning_rate": 1.7789473684210527e-05,
"loss": 0.0906,
"step": 22500
},
{
"epoch": 0.3390830016217013,
"grad_norm": 2.3240630626678467,
"learning_rate": 1.774042950513539e-05,
"loss": 0.0919,
"step": 23000
},
{
"epoch": 0.3464543712221731,
"grad_norm": 0.31203529238700867,
"learning_rate": 1.7691287041132244e-05,
"loss": 0.0886,
"step": 23500
},
{
"epoch": 0.35382574082264484,
"grad_norm": 0.002407611347734928,
"learning_rate": 1.76421445771291e-05,
"loss": 0.0892,
"step": 24000
},
{
"epoch": 0.36119711042311664,
"grad_norm": 0.23297803103923798,
"learning_rate": 1.7593002113125954e-05,
"loss": 0.0894,
"step": 24500
},
{
"epoch": 0.3685684800235884,
"grad_norm": 0.5540401339530945,
"learning_rate": 1.754385964912281e-05,
"loss": 0.0905,
"step": 25000
},
{
"epoch": 0.37593984962406013,
"grad_norm": 1.9130643606185913,
"learning_rate": 1.749481547004767e-05,
"loss": 0.0906,
"step": 25500
},
{
"epoch": 0.38331121922453193,
"grad_norm": 0.23371170461177826,
"learning_rate": 1.7445673006044523e-05,
"loss": 0.0925,
"step": 26000
},
{
"epoch": 0.3906825888250037,
"grad_norm": 0.16552847623825073,
"learning_rate": 1.7396530542041378e-05,
"loss": 0.088,
"step": 26500
},
{
"epoch": 0.3980539584254755,
"grad_norm": 0.008411018177866936,
"learning_rate": 1.7347388078038233e-05,
"loss": 0.0946,
"step": 27000
},
{
"epoch": 0.4054253280259472,
"grad_norm": 0.6356103420257568,
"learning_rate": 1.729824561403509e-05,
"loss": 0.0869,
"step": 27500
},
{
"epoch": 0.41279669762641896,
"grad_norm": 0.8396435379981995,
"learning_rate": 1.724920143495995e-05,
"loss": 0.0933,
"step": 28000
},
{
"epoch": 0.42016806722689076,
"grad_norm": 5.201042652130127,
"learning_rate": 1.7200157255884812e-05,
"loss": 0.0878,
"step": 28500
},
{
"epoch": 0.4275394368273625,
"grad_norm": 1.2198799848556519,
"learning_rate": 1.7151014791881667e-05,
"loss": 0.0837,
"step": 29000
},
{
"epoch": 0.4349108064278343,
"grad_norm": 1.1107237339019775,
"learning_rate": 1.7101872327878522e-05,
"loss": 0.0898,
"step": 29500
},
{
"epoch": 0.44228217602830605,
"grad_norm": 0.47166362404823303,
"learning_rate": 1.705282814880338e-05,
"loss": 0.083,
"step": 30000
},
{
"epoch": 0.44965354562877785,
"grad_norm": 0.9816909432411194,
"learning_rate": 1.7003685684800236e-05,
"loss": 0.0861,
"step": 30500
},
{
"epoch": 0.4570249152292496,
"grad_norm": 0.10324009507894516,
"learning_rate": 1.6954543220797094e-05,
"loss": 0.0942,
"step": 31000
},
{
"epoch": 0.46439628482972134,
"grad_norm": 0.42705604434013367,
"learning_rate": 1.6905400756793946e-05,
"loss": 0.0826,
"step": 31500
},
{
"epoch": 0.47176765443019314,
"grad_norm": 1.8074253797531128,
"learning_rate": 1.68562582927908e-05,
"loss": 0.0853,
"step": 32000
},
{
"epoch": 0.4791390240306649,
"grad_norm": 1.1949777603149414,
"learning_rate": 1.6807115828787656e-05,
"loss": 0.0936,
"step": 32500
},
{
"epoch": 0.4865103936311367,
"grad_norm": 1.8849105834960938,
"learning_rate": 1.675797336478451e-05,
"loss": 0.0849,
"step": 33000
},
{
"epoch": 0.49388176323160843,
"grad_norm": 2.1948788166046143,
"learning_rate": 1.670883090078137e-05,
"loss": 0.083,
"step": 33500
},
{
"epoch": 0.5012531328320802,
"grad_norm": 1.5681918859481812,
"learning_rate": 1.665968843677822e-05,
"loss": 0.0845,
"step": 34000
},
{
"epoch": 0.508624502432552,
"grad_norm": 1.447178840637207,
"learning_rate": 1.6610545972775076e-05,
"loss": 0.0883,
"step": 34500
},
{
"epoch": 0.5159958720330238,
"grad_norm": 0.678683876991272,
"learning_rate": 1.6561501793699938e-05,
"loss": 0.0901,
"step": 35000
},
{
"epoch": 0.5233672416334955,
"grad_norm": 1.585949420928955,
"learning_rate": 1.6512359329696793e-05,
"loss": 0.0893,
"step": 35500
},
{
"epoch": 0.5307386112339673,
"grad_norm": 2.8461952209472656,
"learning_rate": 1.6463216865693648e-05,
"loss": 0.0846,
"step": 36000
},
{
"epoch": 0.5381099808344391,
"grad_norm": 0.08873996883630753,
"learning_rate": 1.6414074401690503e-05,
"loss": 0.0838,
"step": 36500
},
{
"epoch": 0.5454813504349109,
"grad_norm": 0.08909033238887787,
"learning_rate": 1.6364931937687354e-05,
"loss": 0.0835,
"step": 37000
},
{
"epoch": 0.5528527200353825,
"grad_norm": 0.01679537631571293,
"learning_rate": 1.6315789473684213e-05,
"loss": 0.0826,
"step": 37500
},
{
"epoch": 0.5602240896358543,
"grad_norm": 0.018643999472260475,
"learning_rate": 1.6266647009681068e-05,
"loss": 0.0891,
"step": 38000
},
{
"epoch": 0.5675954592363261,
"grad_norm": 3.226288080215454,
"learning_rate": 1.6217504545677923e-05,
"loss": 0.0927,
"step": 38500
},
{
"epoch": 0.5749668288367978,
"grad_norm": 6.410881042480469,
"learning_rate": 1.6168362081674774e-05,
"loss": 0.0826,
"step": 39000
},
{
"epoch": 0.5823381984372696,
"grad_norm": 2.421131134033203,
"learning_rate": 1.611921961767163e-05,
"loss": 0.0854,
"step": 39500
},
{
"epoch": 0.5897095680377414,
"grad_norm": 0.012708733789622784,
"learning_rate": 1.6070077153668488e-05,
"loss": 0.0841,
"step": 40000
},
{
"epoch": 0.5970809376382131,
"grad_norm": 5.636229515075684,
"learning_rate": 1.6021032974593347e-05,
"loss": 0.0794,
"step": 40500
},
{
"epoch": 0.6044523072386849,
"grad_norm": 1.866571307182312,
"learning_rate": 1.59718905105902e-05,
"loss": 0.0836,
"step": 41000
},
{
"epoch": 0.6118236768391567,
"grad_norm": 1.0771315097808838,
"learning_rate": 1.5922748046587057e-05,
"loss": 0.0782,
"step": 41500
},
{
"epoch": 0.6191950464396285,
"grad_norm": 0.09344267845153809,
"learning_rate": 1.587360558258391e-05,
"loss": 0.0891,
"step": 42000
},
{
"epoch": 0.6265664160401002,
"grad_norm": 2.4186413288116455,
"learning_rate": 1.5824463118580767e-05,
"loss": 0.0907,
"step": 42500
},
{
"epoch": 0.633937785640572,
"grad_norm": 0.09242186695337296,
"learning_rate": 1.577532065457762e-05,
"loss": 0.081,
"step": 43000
},
{
"epoch": 0.6413091552410438,
"grad_norm": 0.09285570681095123,
"learning_rate": 1.572637476043049e-05,
"loss": 0.0902,
"step": 43500
},
{
"epoch": 0.6486805248415155,
"grad_norm": 1.1049730777740479,
"learning_rate": 1.5677232296427346e-05,
"loss": 0.081,
"step": 44000
},
{
"epoch": 0.6560518944419873,
"grad_norm": 0.1485988050699234,
"learning_rate": 1.5628089832424197e-05,
"loss": 0.0834,
"step": 44500
},
{
"epoch": 0.6634232640424591,
"grad_norm": 1.4170334339141846,
"learning_rate": 1.5578947368421052e-05,
"loss": 0.08,
"step": 45000
},
{
"epoch": 0.6707946336429309,
"grad_norm": 2.51129150390625,
"learning_rate": 1.5529903189345914e-05,
"loss": 0.0881,
"step": 45500
},
{
"epoch": 0.6781660032434026,
"grad_norm": 0.0491604208946228,
"learning_rate": 1.548076072534277e-05,
"loss": 0.0845,
"step": 46000
},
{
"epoch": 0.6855373728438744,
"grad_norm": 0.09064287692308426,
"learning_rate": 1.5431618261339624e-05,
"loss": 0.0838,
"step": 46500
},
{
"epoch": 0.6929087424443462,
"grad_norm": 1.6225173473358154,
"learning_rate": 1.538247579733648e-05,
"loss": 0.0806,
"step": 47000
},
{
"epoch": 0.7002801120448179,
"grad_norm": 0.025229327380657196,
"learning_rate": 1.5333333333333334e-05,
"loss": 0.0852,
"step": 47500
},
{
"epoch": 0.7076514816452897,
"grad_norm": 1.561880350112915,
"learning_rate": 1.528419086933019e-05,
"loss": 0.0819,
"step": 48000
},
{
"epoch": 0.7150228512457615,
"grad_norm": 0.024792635813355446,
"learning_rate": 1.5235048405327044e-05,
"loss": 0.0844,
"step": 48500
},
{
"epoch": 0.7223942208462333,
"grad_norm": 0.2325647473335266,
"learning_rate": 1.51859059413239e-05,
"loss": 0.0786,
"step": 49000
},
{
"epoch": 0.729765590446705,
"grad_norm": 1.0401220321655273,
"learning_rate": 1.5136763477320754e-05,
"loss": 0.0753,
"step": 49500
},
{
"epoch": 0.7371369600471768,
"grad_norm": 2.6318838596343994,
"learning_rate": 1.5087621013317608e-05,
"loss": 0.0793,
"step": 50000
},
{
"epoch": 0.7445083296476486,
"grad_norm": 0.15695439279079437,
"learning_rate": 1.5038478549314463e-05,
"loss": 0.085,
"step": 50500
},
{
"epoch": 0.7518796992481203,
"grad_norm": 0.006814942229539156,
"learning_rate": 1.498933608531132e-05,
"loss": 0.0819,
"step": 51000
},
{
"epoch": 0.7592510688485921,
"grad_norm": 2.0822718143463135,
"learning_rate": 1.4940193621308174e-05,
"loss": 0.0798,
"step": 51500
},
{
"epoch": 0.7666224384490639,
"grad_norm": 0.06762377172708511,
"learning_rate": 1.4891149442233035e-05,
"loss": 0.0793,
"step": 52000
},
{
"epoch": 0.7739938080495357,
"grad_norm": 1.627299189567566,
"learning_rate": 1.484200697822989e-05,
"loss": 0.08,
"step": 52500
},
{
"epoch": 0.7813651776500073,
"grad_norm": 0.8819578289985657,
"learning_rate": 1.4792864514226743e-05,
"loss": 0.0897,
"step": 53000
},
{
"epoch": 0.7887365472504791,
"grad_norm": 3.7201988697052,
"learning_rate": 1.4743722050223598e-05,
"loss": 0.0787,
"step": 53500
},
{
"epoch": 0.796107916850951,
"grad_norm": 2.0705556869506836,
"learning_rate": 1.4694677871148462e-05,
"loss": 0.0897,
"step": 54000
},
{
"epoch": 0.8034792864514226,
"grad_norm": 0.08984575420618057,
"learning_rate": 1.4645535407145315e-05,
"loss": 0.0875,
"step": 54500
},
{
"epoch": 0.8108506560518944,
"grad_norm": 0.5264925956726074,
"learning_rate": 1.459639294314217e-05,
"loss": 0.081,
"step": 55000
},
{
"epoch": 0.8182220256523662,
"grad_norm": 0.7385400533676147,
"learning_rate": 1.4547250479139025e-05,
"loss": 0.0804,
"step": 55500
},
{
"epoch": 0.8255933952528379,
"grad_norm": 0.04134887456893921,
"learning_rate": 1.449810801513588e-05,
"loss": 0.0797,
"step": 56000
},
{
"epoch": 0.8329647648533097,
"grad_norm": 0.03769136965274811,
"learning_rate": 1.4448965551132734e-05,
"loss": 0.084,
"step": 56500
},
{
"epoch": 0.8403361344537815,
"grad_norm": 0.2623615264892578,
"learning_rate": 1.4399921372057597e-05,
"loss": 0.0821,
"step": 57000
},
{
"epoch": 0.8477075040542533,
"grad_norm": 2.0235373973846436,
"learning_rate": 1.435077890805445e-05,
"loss": 0.0806,
"step": 57500
},
{
"epoch": 0.855078873654725,
"grad_norm": 0.32753029465675354,
"learning_rate": 1.4301636444051306e-05,
"loss": 0.0828,
"step": 58000
},
{
"epoch": 0.8624502432551968,
"grad_norm": 2.255500316619873,
"learning_rate": 1.425249398004816e-05,
"loss": 0.0804,
"step": 58500
},
{
"epoch": 0.8698216128556686,
"grad_norm": 2.8162291049957275,
"learning_rate": 1.4203449800973021e-05,
"loss": 0.0827,
"step": 59000
},
{
"epoch": 0.8771929824561403,
"grad_norm": 0.41316208243370056,
"learning_rate": 1.4154307336969876e-05,
"loss": 0.0753,
"step": 59500
},
{
"epoch": 0.8845643520566121,
"grad_norm": 1.9982844591140747,
"learning_rate": 1.4105164872966733e-05,
"loss": 0.0854,
"step": 60000
},
{
"epoch": 0.8919357216570839,
"grad_norm": 1.8432923555374146,
"learning_rate": 1.4056022408963586e-05,
"loss": 0.077,
"step": 60500
},
{
"epoch": 0.8993070912575557,
"grad_norm": 3.33919620513916,
"learning_rate": 1.4006879944960441e-05,
"loss": 0.082,
"step": 61000
},
{
"epoch": 0.9066784608580274,
"grad_norm": 3.227517604827881,
"learning_rate": 1.3957737480957296e-05,
"loss": 0.0785,
"step": 61500
},
{
"epoch": 0.9140498304584992,
"grad_norm": 0.014868408441543579,
"learning_rate": 1.3908595016954151e-05,
"loss": 0.0741,
"step": 62000
},
{
"epoch": 0.921421200058971,
"grad_norm": 0.06270582973957062,
"learning_rate": 1.3859452552951008e-05,
"loss": 0.0786,
"step": 62500
},
{
"epoch": 0.9287925696594427,
"grad_norm": 0.06437293440103531,
"learning_rate": 1.3810310088947861e-05,
"loss": 0.076,
"step": 63000
},
{
"epoch": 0.9361639392599145,
"grad_norm": 0.9199370741844177,
"learning_rate": 1.3761167624944716e-05,
"loss": 0.0815,
"step": 63500
},
{
"epoch": 0.9435353088603863,
"grad_norm": 0.020321089774370193,
"learning_rate": 1.3712123445869577e-05,
"loss": 0.0837,
"step": 64000
},
{
"epoch": 0.9509066784608581,
"grad_norm": 2.2705533504486084,
"learning_rate": 1.3662980981866432e-05,
"loss": 0.0871,
"step": 64500
},
{
"epoch": 0.9582780480613298,
"grad_norm": 0.027517901733517647,
"learning_rate": 1.3613838517863287e-05,
"loss": 0.0755,
"step": 65000
},
{
"epoch": 0.9656494176618016,
"grad_norm": 1.307394027709961,
"learning_rate": 1.3564696053860143e-05,
"loss": 0.0764,
"step": 65500
},
{
"epoch": 0.9730207872622734,
"grad_norm": 2.4579734802246094,
"learning_rate": 1.3515553589856995e-05,
"loss": 0.0782,
"step": 66000
},
{
"epoch": 0.9803921568627451,
"grad_norm": 13.52622127532959,
"learning_rate": 1.3466411125853852e-05,
"loss": 0.0704,
"step": 66500
},
{
"epoch": 0.9877635264632169,
"grad_norm": 0.32894331216812134,
"learning_rate": 1.3417268661850707e-05,
"loss": 0.0838,
"step": 67000
},
{
"epoch": 0.9951348960636887,
"grad_norm": 0.08820515871047974,
"learning_rate": 1.3368126197847562e-05,
"loss": 0.0792,
"step": 67500
},
{
"epoch": 1.0,
"eval_accuracy": 0.6063492063492063,
"eval_f1": 0.6658804318243672,
"eval_loss": 0.07707133144140244,
"eval_roc_auc": 0.8010158538939403,
"eval_runtime": 92.2448,
"eval_samples_per_second": 64.882,
"eval_steps_per_second": 64.882,
"step": 67830
},
{
"epoch": 1.0025062656641603,
"grad_norm": 1.2780104875564575,
"learning_rate": 1.3318983733844415e-05,
"loss": 0.0722,
"step": 68000
},
{
"epoch": 1.0098776352646321,
"grad_norm": 0.016919715330004692,
"learning_rate": 1.3269939554769277e-05,
"loss": 0.0544,
"step": 68500
},
{
"epoch": 1.017249004865104,
"grad_norm": 0.0008094881195574999,
"learning_rate": 1.3220797090766134e-05,
"loss": 0.0589,
"step": 69000
},
{
"epoch": 1.0246203744655757,
"grad_norm": 0.06712741404771805,
"learning_rate": 1.3171752911690994e-05,
"loss": 0.0692,
"step": 69500
},
{
"epoch": 1.0319917440660475,
"grad_norm": 1.4024405479431152,
"learning_rate": 1.3122610447687849e-05,
"loss": 0.0577,
"step": 70000
},
{
"epoch": 1.0393631136665193,
"grad_norm": 3.811220407485962,
"learning_rate": 1.3073467983684702e-05,
"loss": 0.0598,
"step": 70500
},
{
"epoch": 1.046734483266991,
"grad_norm": 0.061250410974025726,
"learning_rate": 1.3024325519681557e-05,
"loss": 0.064,
"step": 71000
},
{
"epoch": 1.0541058528674627,
"grad_norm": 2.3042991161346436,
"learning_rate": 1.2975183055678412e-05,
"loss": 0.0646,
"step": 71500
},
{
"epoch": 1.0614772224679345,
"grad_norm": 0.32951870560646057,
"learning_rate": 1.2926040591675269e-05,
"loss": 0.0668,
"step": 72000
},
{
"epoch": 1.0688485920684063,
"grad_norm": 0.013675130903720856,
"learning_rate": 1.2876898127672122e-05,
"loss": 0.0576,
"step": 72500
},
{
"epoch": 1.0762199616688781,
"grad_norm": 2.3298966884613037,
"learning_rate": 1.2827755663668977e-05,
"loss": 0.0583,
"step": 73000
},
{
"epoch": 1.08359133126935,
"grad_norm": 0.04673844203352928,
"learning_rate": 1.2778809769521845e-05,
"loss": 0.0697,
"step": 73500
},
{
"epoch": 1.0909627008698215,
"grad_norm": 1.1629608869552612,
"learning_rate": 1.27296673055187e-05,
"loss": 0.0621,
"step": 74000
},
{
"epoch": 1.0983340704702933,
"grad_norm": 0.06381271779537201,
"learning_rate": 1.2680524841515553e-05,
"loss": 0.0629,
"step": 74500
},
{
"epoch": 1.105705440070765,
"grad_norm": 0.00508810393512249,
"learning_rate": 1.2631382377512408e-05,
"loss": 0.065,
"step": 75000
},
{
"epoch": 1.113076809671237,
"grad_norm": 4.200405597686768,
"learning_rate": 1.2582239913509265e-05,
"loss": 0.0704,
"step": 75500
},
{
"epoch": 1.1204481792717087,
"grad_norm": 0.18736723065376282,
"learning_rate": 1.253309744950612e-05,
"loss": 0.0683,
"step": 76000
},
{
"epoch": 1.1278195488721805,
"grad_norm": 0.09223194420337677,
"learning_rate": 1.2483954985502975e-05,
"loss": 0.0557,
"step": 76500
},
{
"epoch": 1.1351909184726523,
"grad_norm": 5.287250518798828,
"learning_rate": 1.2434812521499828e-05,
"loss": 0.0643,
"step": 77000
},
{
"epoch": 1.1425622880731239,
"grad_norm": 1.283521294593811,
"learning_rate": 1.2385670057496683e-05,
"loss": 0.0584,
"step": 77500
},
{
"epoch": 1.1499336576735957,
"grad_norm": 3.34344220161438,
"learning_rate": 1.233652759349354e-05,
"loss": 0.0673,
"step": 78000
},
{
"epoch": 1.1573050272740675,
"grad_norm": 0.23046046495437622,
"learning_rate": 1.2287385129490395e-05,
"loss": 0.0605,
"step": 78500
},
{
"epoch": 1.1646763968745393,
"grad_norm": 0.0487230159342289,
"learning_rate": 1.2238242665487248e-05,
"loss": 0.0672,
"step": 79000
},
{
"epoch": 1.172047766475011,
"grad_norm": 0.0587400384247303,
"learning_rate": 1.2189100201484103e-05,
"loss": 0.0635,
"step": 79500
},
{
"epoch": 1.1794191360754829,
"grad_norm": 0.3049776256084442,
"learning_rate": 1.2140056022408964e-05,
"loss": 0.0587,
"step": 80000
},
{
"epoch": 1.1867905056759547,
"grad_norm": 0.5761535167694092,
"learning_rate": 1.2091011843333826e-05,
"loss": 0.0706,
"step": 80500
},
{
"epoch": 1.1941618752764263,
"grad_norm": 2.524258852005005,
"learning_rate": 1.2041967664258686e-05,
"loss": 0.0607,
"step": 81000
},
{
"epoch": 1.201533244876898,
"grad_norm": 0.026634838432073593,
"learning_rate": 1.1992825200255543e-05,
"loss": 0.0581,
"step": 81500
},
{
"epoch": 1.2089046144773699,
"grad_norm": 0.39337214827537537,
"learning_rate": 1.1943682736252398e-05,
"loss": 0.0652,
"step": 82000
},
{
"epoch": 1.2162759840778417,
"grad_norm": 1.8906174898147583,
"learning_rate": 1.1894540272249251e-05,
"loss": 0.0659,
"step": 82500
},
{
"epoch": 1.2236473536783135,
"grad_norm": 0.011290138587355614,
"learning_rate": 1.1845397808246106e-05,
"loss": 0.0615,
"step": 83000
},
{
"epoch": 1.2310187232787853,
"grad_norm": 0.5536847114562988,
"learning_rate": 1.1796255344242961e-05,
"loss": 0.0666,
"step": 83500
},
{
"epoch": 1.238390092879257,
"grad_norm": 0.0035450158175081015,
"learning_rate": 1.1747112880239818e-05,
"loss": 0.0597,
"step": 84000
},
{
"epoch": 1.2457614624797286,
"grad_norm": 0.005579414777457714,
"learning_rate": 1.1697970416236671e-05,
"loss": 0.0545,
"step": 84500
},
{
"epoch": 1.2531328320802004,
"grad_norm": 0.10251569747924805,
"learning_rate": 1.1648926237161533e-05,
"loss": 0.0652,
"step": 85000
},
{
"epoch": 1.2605042016806722,
"grad_norm": 3.2004494667053223,
"learning_rate": 1.1599783773158386e-05,
"loss": 0.0546,
"step": 85500
},
{
"epoch": 1.267875571281144,
"grad_norm": 1.5647473335266113,
"learning_rate": 1.1550641309155241e-05,
"loss": 0.0661,
"step": 86000
},
{
"epoch": 1.2752469408816158,
"grad_norm": 2.5646321773529053,
"learning_rate": 1.1501498845152096e-05,
"loss": 0.0616,
"step": 86500
},
{
"epoch": 1.2826183104820876,
"grad_norm": 0.008838827721774578,
"learning_rate": 1.1452356381148953e-05,
"loss": 0.0643,
"step": 87000
},
{
"epoch": 1.2899896800825594,
"grad_norm": 0.27586570382118225,
"learning_rate": 1.1403213917145805e-05,
"loss": 0.0651,
"step": 87500
},
{
"epoch": 1.297361049683031,
"grad_norm": 2.2683589458465576,
"learning_rate": 1.1354071453142661e-05,
"loss": 0.0613,
"step": 88000
},
{
"epoch": 1.3047324192835028,
"grad_norm": 0.0017950567416846752,
"learning_rate": 1.1305027274067524e-05,
"loss": 0.0616,
"step": 88500
},
{
"epoch": 1.3121037888839746,
"grad_norm": 0.03913048282265663,
"learning_rate": 1.1255884810064377e-05,
"loss": 0.0627,
"step": 89000
},
{
"epoch": 1.3194751584844464,
"grad_norm": 5.085097312927246,
"learning_rate": 1.1206742346061232e-05,
"loss": 0.0648,
"step": 89500
},
{
"epoch": 1.3268465280849182,
"grad_norm": 0.04779289662837982,
"learning_rate": 1.1157599882058089e-05,
"loss": 0.0666,
"step": 90000
},
{
"epoch": 1.33421789768539,
"grad_norm": 0.01123060006648302,
"learning_rate": 1.1108457418054944e-05,
"loss": 0.0618,
"step": 90500
},
{
"epoch": 1.3415892672858618,
"grad_norm": 1.5869191884994507,
"learning_rate": 1.1059314954051797e-05,
"loss": 0.066,
"step": 91000
},
{
"epoch": 1.3489606368863334,
"grad_norm": 0.00517408037558198,
"learning_rate": 1.1010172490048652e-05,
"loss": 0.062,
"step": 91500
},
{
"epoch": 1.3563320064868052,
"grad_norm": 0.09691867977380753,
"learning_rate": 1.0961030026045507e-05,
"loss": 0.0606,
"step": 92000
},
{
"epoch": 1.363703376087277,
"grad_norm": 3.3921549320220947,
"learning_rate": 1.0911887562042362e-05,
"loss": 0.0643,
"step": 92500
},
{
"epoch": 1.3710747456877488,
"grad_norm": 2.874007225036621,
"learning_rate": 1.0862843382967222e-05,
"loss": 0.0622,
"step": 93000
},
{
"epoch": 1.3784461152882206,
"grad_norm": 0.03864584490656853,
"learning_rate": 1.0813897488820091e-05,
"loss": 0.0582,
"step": 93500
},
{
"epoch": 1.3858174848886924,
"grad_norm": 0.12044321745634079,
"learning_rate": 1.0764755024816945e-05,
"loss": 0.0672,
"step": 94000
},
{
"epoch": 1.3931888544891642,
"grad_norm": 3.673576593399048,
"learning_rate": 1.07156125608138e-05,
"loss": 0.0619,
"step": 94500
},
{
"epoch": 1.4005602240896358,
"grad_norm": 0.023468611761927605,
"learning_rate": 1.0666470096810655e-05,
"loss": 0.0567,
"step": 95000
},
{
"epoch": 1.4079315936901076,
"grad_norm": 1.5474720001220703,
"learning_rate": 1.061732763280751e-05,
"loss": 0.057,
"step": 95500
},
{
"epoch": 1.4153029632905794,
"grad_norm": 0.4061996340751648,
"learning_rate": 1.0568185168804366e-05,
"loss": 0.0551,
"step": 96000
},
{
"epoch": 1.4226743328910512,
"grad_norm": 1.828468918800354,
"learning_rate": 1.0519042704801218e-05,
"loss": 0.0642,
"step": 96500
},
{
"epoch": 1.430045702491523,
"grad_norm": 0.006184196099638939,
"learning_rate": 1.0469900240798075e-05,
"loss": 0.0562,
"step": 97000
},
{
"epoch": 1.4374170720919948,
"grad_norm": 0.8913156390190125,
"learning_rate": 1.0420856061722935e-05,
"loss": 0.0626,
"step": 97500
},
{
"epoch": 1.4447884416924666,
"grad_norm": 0.08904910832643509,
"learning_rate": 1.037171359771979e-05,
"loss": 0.0558,
"step": 98000
},
{
"epoch": 1.4521598112929381,
"grad_norm": 0.1683080941438675,
"learning_rate": 1.0322571133716645e-05,
"loss": 0.0644,
"step": 98500
},
{
"epoch": 1.45953118089341,
"grad_norm": 0.4399701654911041,
"learning_rate": 1.0273428669713502e-05,
"loss": 0.0608,
"step": 99000
},
{
"epoch": 1.4669025504938817,
"grad_norm": 9.481819152832031,
"learning_rate": 1.0224286205710354e-05,
"loss": 0.0597,
"step": 99500
},
{
"epoch": 1.4742739200943535,
"grad_norm": 0.10929368436336517,
"learning_rate": 1.017514374170721e-05,
"loss": 0.0616,
"step": 100000
},
{
"epoch": 1.4816452896948253,
"grad_norm": 0.0035255183465778828,
"learning_rate": 1.0126001277704065e-05,
"loss": 0.0593,
"step": 100500
},
{
"epoch": 1.4890166592952971,
"grad_norm": 0.07139890640974045,
"learning_rate": 1.007685881370092e-05,
"loss": 0.0635,
"step": 101000
},
{
"epoch": 1.496388028895769,
"grad_norm": 3.2497317790985107,
"learning_rate": 1.0027716349697774e-05,
"loss": 0.055,
"step": 101500
},
{
"epoch": 1.5037593984962405,
"grad_norm": 0.16377945244312286,
"learning_rate": 9.978573885694629e-06,
"loss": 0.0602,
"step": 102000
},
{
"epoch": 1.5111307680967123,
"grad_norm": 0.29184427857398987,
"learning_rate": 9.929431421691485e-06,
"loss": 0.0596,
"step": 102500
},
{
"epoch": 1.5185021376971841,
"grad_norm": 0.14543047547340393,
"learning_rate": 9.88028895768834e-06,
"loss": 0.0593,
"step": 103000
},
{
"epoch": 1.525873507297656,
"grad_norm": 4.776684284210205,
"learning_rate": 9.831146493685194e-06,
"loss": 0.0604,
"step": 103500
},
{
"epoch": 1.5332448768981277,
"grad_norm": 3.4175798892974854,
"learning_rate": 9.782102314610056e-06,
"loss": 0.0622,
"step": 104000
},
{
"epoch": 1.5406162464985993,
"grad_norm": 5.478698253631592,
"learning_rate": 9.73295985060691e-06,
"loss": 0.0582,
"step": 104500
},
{
"epoch": 1.5479876160990713,
"grad_norm": 0.09877605736255646,
"learning_rate": 9.683817386603766e-06,
"loss": 0.0644,
"step": 105000
},
{
"epoch": 1.555358985699543,
"grad_norm": 3.169551134109497,
"learning_rate": 9.634674922600619e-06,
"loss": 0.0672,
"step": 105500
},
{
"epoch": 1.5627303553000147,
"grad_norm": 0.0030992806423455477,
"learning_rate": 9.585532458597476e-06,
"loss": 0.0654,
"step": 106000
},
{
"epoch": 1.5701017249004865,
"grad_norm": 1.8882814645767212,
"learning_rate": 9.536488279522336e-06,
"loss": 0.0643,
"step": 106500
},
{
"epoch": 1.5774730945009583,
"grad_norm": 0.02398967184126377,
"learning_rate": 9.487345815519191e-06,
"loss": 0.0677,
"step": 107000
},
{
"epoch": 1.58484446410143,
"grad_norm": 0.010672827251255512,
"learning_rate": 9.438203351516046e-06,
"loss": 0.0637,
"step": 107500
},
{
"epoch": 1.5922158337019017,
"grad_norm": 0.018269941210746765,
"learning_rate": 9.389159172440906e-06,
"loss": 0.0624,
"step": 108000
},
{
"epoch": 1.5995872033023737,
"grad_norm": 1.7238303422927856,
"learning_rate": 9.340016708437761e-06,
"loss": 0.0595,
"step": 108500
},
{
"epoch": 1.6069585729028453,
"grad_norm": 1.6856399774551392,
"learning_rate": 9.290874244434616e-06,
"loss": 0.0572,
"step": 109000
},
{
"epoch": 1.614329942503317,
"grad_norm": 0.08445548266172409,
"learning_rate": 9.241731780431471e-06,
"loss": 0.0617,
"step": 109500
},
{
"epoch": 1.6217013121037889,
"grad_norm": 2.7674472332000732,
"learning_rate": 9.192589316428326e-06,
"loss": 0.061,
"step": 110000
},
{
"epoch": 1.6290726817042607,
"grad_norm": 6.365856647491455,
"learning_rate": 9.143446852425181e-06,
"loss": 0.0548,
"step": 110500
},
{
"epoch": 1.6364440513047325,
"grad_norm": 1.224268913269043,
"learning_rate": 9.094304388422036e-06,
"loss": 0.0621,
"step": 111000
},
{
"epoch": 1.643815420905204,
"grad_norm": 0.021649343892931938,
"learning_rate": 9.045161924418891e-06,
"loss": 0.0659,
"step": 111500
},
{
"epoch": 1.651186790505676,
"grad_norm": 0.012330977246165276,
"learning_rate": 8.996117745343752e-06,
"loss": 0.0602,
"step": 112000
},
{
"epoch": 1.6585581601061476,
"grad_norm": 0.6574206948280334,
"learning_rate": 8.946975281340607e-06,
"loss": 0.0617,
"step": 112500
},
{
"epoch": 1.6659295297066194,
"grad_norm": 0.018540961667895317,
"learning_rate": 8.897832817337462e-06,
"loss": 0.0602,
"step": 113000
},
{
"epoch": 1.6733008993070912,
"grad_norm": 5.6425557136535645,
"learning_rate": 8.848690353334317e-06,
"loss": 0.0597,
"step": 113500
},
{
"epoch": 1.680672268907563,
"grad_norm": 2.435633420944214,
"learning_rate": 8.799646174259177e-06,
"loss": 0.0624,
"step": 114000
},
{
"epoch": 1.6880436385080348,
"grad_norm": 1.5483721494674683,
"learning_rate": 8.750503710256032e-06,
"loss": 0.0614,
"step": 114500
},
{
"epoch": 1.6954150081085064,
"grad_norm": 0.06437569856643677,
"learning_rate": 8.701361246252887e-06,
"loss": 0.0594,
"step": 115000
},
{
"epoch": 1.7027863777089784,
"grad_norm": 0.29250073432922363,
"learning_rate": 8.652218782249742e-06,
"loss": 0.0559,
"step": 115500
},
{
"epoch": 1.71015774730945,
"grad_norm": 0.5888819098472595,
"learning_rate": 8.603076318246597e-06,
"loss": 0.0659,
"step": 116000
},
{
"epoch": 1.7175291169099218,
"grad_norm": 0.2926543354988098,
"learning_rate": 8.55403213917146e-06,
"loss": 0.063,
"step": 116500
},
{
"epoch": 1.7249004865103936,
"grad_norm": 2.316805601119995,
"learning_rate": 8.50498796009632e-06,
"loss": 0.0544,
"step": 117000
},
{
"epoch": 1.7322718561108654,
"grad_norm": 0.018379326909780502,
"learning_rate": 8.455845496093175e-06,
"loss": 0.0663,
"step": 117500
},
{
"epoch": 1.7396432257113372,
"grad_norm": 0.014781077392399311,
"learning_rate": 8.40670303209003e-06,
"loss": 0.0573,
"step": 118000
},
{
"epoch": 1.7470145953118088,
"grad_norm": 2.3919591903686523,
"learning_rate": 8.357560568086885e-06,
"loss": 0.0592,
"step": 118500
},
{
"epoch": 1.7543859649122808,
"grad_norm": 2.7669119834899902,
"learning_rate": 8.30841810408374e-06,
"loss": 0.0644,
"step": 119000
},
{
"epoch": 1.7617573345127524,
"grad_norm": 3.5739755630493164,
"learning_rate": 8.259275640080595e-06,
"loss": 0.0572,
"step": 119500
},
{
"epoch": 1.7691287041132242,
"grad_norm": 0.4299847483634949,
"learning_rate": 8.21013317607745e-06,
"loss": 0.0664,
"step": 120000
},
{
"epoch": 1.776500073713696,
"grad_norm": 0.9990677833557129,
"learning_rate": 8.16108899700231e-06,
"loss": 0.0638,
"step": 120500
},
{
"epoch": 1.7838714433141678,
"grad_norm": 2.1424782276153564,
"learning_rate": 8.111946532999165e-06,
"loss": 0.0572,
"step": 121000
},
{
"epoch": 1.7912428129146396,
"grad_norm": 4.301726341247559,
"learning_rate": 8.06280406899602e-06,
"loss": 0.0596,
"step": 121500
},
{
"epoch": 1.7986141825151112,
"grad_norm": 8.399239540100098,
"learning_rate": 8.013661604992875e-06,
"loss": 0.0667,
"step": 122000
},
{
"epoch": 1.8059855521155832,
"grad_norm": 0.00977667048573494,
"learning_rate": 7.96451914098973e-06,
"loss": 0.0596,
"step": 122500
},
{
"epoch": 1.8133569217160548,
"grad_norm": 0.019315605983138084,
"learning_rate": 7.915376676986585e-06,
"loss": 0.0607,
"step": 123000
},
{
"epoch": 1.8207282913165266,
"grad_norm": 0.011183898895978928,
"learning_rate": 7.866332497911446e-06,
"loss": 0.0656,
"step": 123500
},
{
"epoch": 1.8280996609169984,
"grad_norm": 0.1689341515302658,
"learning_rate": 7.8171900339083e-06,
"loss": 0.0571,
"step": 124000
},
{
"epoch": 1.8354710305174702,
"grad_norm": 7.370288848876953,
"learning_rate": 7.768047569905156e-06,
"loss": 0.0627,
"step": 124500
},
{
"epoch": 1.842842400117942,
"grad_norm": 0.058736398816108704,
"learning_rate": 7.719003390830016e-06,
"loss": 0.0511,
"step": 125000
},
{
"epoch": 1.8502137697184136,
"grad_norm": 0.014665275812149048,
"learning_rate": 7.669860926826873e-06,
"loss": 0.0624,
"step": 125500
},
{
"epoch": 1.8575851393188856,
"grad_norm": 0.572428286075592,
"learning_rate": 7.620718462823726e-06,
"loss": 0.0607,
"step": 126000
},
{
"epoch": 1.8649565089193572,
"grad_norm": 0.4777454733848572,
"learning_rate": 7.571575998820582e-06,
"loss": 0.0606,
"step": 126500
},
{
"epoch": 1.872327878519829,
"grad_norm": 0.020159974694252014,
"learning_rate": 7.522433534817436e-06,
"loss": 0.0539,
"step": 127000
},
{
"epoch": 1.8796992481203008,
"grad_norm": 0.010219153016805649,
"learning_rate": 7.473291070814291e-06,
"loss": 0.0593,
"step": 127500
},
{
"epoch": 1.8870706177207726,
"grad_norm": 1.854982614517212,
"learning_rate": 7.424148606811146e-06,
"loss": 0.0636,
"step": 128000
},
{
"epoch": 1.8944419873212444,
"grad_norm": 0.6811599135398865,
"learning_rate": 7.375006142808001e-06,
"loss": 0.0598,
"step": 128500
},
{
"epoch": 1.901813356921716,
"grad_norm": 0.2864709496498108,
"learning_rate": 7.325863678804855e-06,
"loss": 0.0604,
"step": 129000
},
{
"epoch": 1.909184726522188,
"grad_norm": 0.030090967193245888,
"learning_rate": 7.276721214801711e-06,
"loss": 0.0602,
"step": 129500
},
{
"epoch": 1.9165560961226595,
"grad_norm": 4.568465232849121,
"learning_rate": 7.227578750798565e-06,
"loss": 0.0571,
"step": 130000
},
{
"epoch": 1.9239274657231313,
"grad_norm": 2.8999075889587402,
"learning_rate": 7.17843628679542e-06,
"loss": 0.0556,
"step": 130500
},
{
"epoch": 1.9312988353236031,
"grad_norm": 2.021425485610962,
"learning_rate": 7.129392107720282e-06,
"loss": 0.0593,
"step": 131000
},
{
"epoch": 1.938670204924075,
"grad_norm": 5.252723217010498,
"learning_rate": 7.080249643717136e-06,
"loss": 0.054,
"step": 131500
},
{
"epoch": 1.9460415745245467,
"grad_norm": 8.669822692871094,
"learning_rate": 7.031107179713991e-06,
"loss": 0.0622,
"step": 132000
},
{
"epoch": 1.9534129441250183,
"grad_norm": 2.219619035720825,
"learning_rate": 6.981964715710846e-06,
"loss": 0.0656,
"step": 132500
},
{
"epoch": 1.9607843137254903,
"grad_norm": 0.023053865879774094,
"learning_rate": 6.932822251707701e-06,
"loss": 0.0628,
"step": 133000
},
{
"epoch": 1.968155683325962,
"grad_norm": 0.3438442647457123,
"learning_rate": 6.8836797877045555e-06,
"loss": 0.0593,
"step": 133500
},
{
"epoch": 1.9755270529264337,
"grad_norm": 0.0067783379927277565,
"learning_rate": 6.834537323701411e-06,
"loss": 0.0639,
"step": 134000
},
{
"epoch": 1.9828984225269055,
"grad_norm": 1.7491209506988525,
"learning_rate": 6.7853948596982655e-06,
"loss": 0.0518,
"step": 134500
},
{
"epoch": 1.9902697921273773,
"grad_norm": 0.0214830469340086,
"learning_rate": 6.736448965551133e-06,
"loss": 0.0499,
"step": 135000
},
{
"epoch": 1.9976411617278491,
"grad_norm": 2.5241074562072754,
"learning_rate": 6.687306501547988e-06,
"loss": 0.0649,
"step": 135500
},
{
"epoch": 2.0,
"eval_accuracy": 0.6497911445279866,
"eval_f1": 0.6859684311502573,
"eval_loss": 0.08295563608407974,
"eval_roc_auc": 0.8222563704452553,
"eval_runtime": 89.7963,
"eval_samples_per_second": 66.651,
"eval_steps_per_second": 66.651,
"step": 135660
},
{
"epoch": 2.0050125313283207,
"grad_norm": 3.550609588623047,
"learning_rate": 6.638164037544843e-06,
"loss": 0.0348,
"step": 136000
},
{
"epoch": 2.0123839009287927,
"grad_norm": 0.014567219652235508,
"learning_rate": 6.589021573541698e-06,
"loss": 0.034,
"step": 136500
},
{
"epoch": 2.0197552705292643,
"grad_norm": 4.3217291831970215,
"learning_rate": 6.539977394466559e-06,
"loss": 0.0346,
"step": 137000
},
{
"epoch": 2.0271266401297363,
"grad_norm": 0.12837082147598267,
"learning_rate": 6.490834930463414e-06,
"loss": 0.0402,
"step": 137500
},
{
"epoch": 2.034498009730208,
"grad_norm": 0.08076170086860657,
"learning_rate": 6.4416924664602685e-06,
"loss": 0.0269,
"step": 138000
},
{
"epoch": 2.0418693793306795,
"grad_norm": 0.004311998374760151,
"learning_rate": 6.392550002457124e-06,
"loss": 0.0273,
"step": 138500
},
{
"epoch": 2.0492407489311515,
"grad_norm": 0.0007999803638085723,
"learning_rate": 6.3434075384539785e-06,
"loss": 0.0263,
"step": 139000
},
{
"epoch": 2.056612118531623,
"grad_norm": 0.004331584554165602,
"learning_rate": 6.294461644306846e-06,
"loss": 0.0301,
"step": 139500
},
{
"epoch": 2.063983488132095,
"grad_norm": 0.013063879683613777,
"learning_rate": 6.245417465231706e-06,
"loss": 0.03,
"step": 140000
},
{
"epoch": 2.0713548577325667,
"grad_norm": 0.01823696680366993,
"learning_rate": 6.196275001228562e-06,
"loss": 0.0343,
"step": 140500
},
{
"epoch": 2.0787262273330387,
"grad_norm": 7.280787944793701,
"learning_rate": 6.147132537225416e-06,
"loss": 0.0261,
"step": 141000
},
{
"epoch": 2.0860975969335103,
"grad_norm": 0.0030333856120705605,
"learning_rate": 6.097990073222272e-06,
"loss": 0.0298,
"step": 141500
},
{
"epoch": 2.093468966533982,
"grad_norm": Infinity,
"learning_rate": 6.0489458941471335e-06,
"loss": 0.0241,
"step": 142000
},
{
"epoch": 2.100840336134454,
"grad_norm": 46.87717056274414,
"learning_rate": 5.999803430143988e-06,
"loss": 0.0352,
"step": 142500
},
{
"epoch": 2.1082117057349254,
"grad_norm": 0.008375998586416245,
"learning_rate": 5.9506609661408435e-06,
"loss": 0.0283,
"step": 143000
},
{
"epoch": 2.1155830753353975,
"grad_norm": 0.0009784572757780552,
"learning_rate": 5.901518502137698e-06,
"loss": 0.0271,
"step": 143500
},
{
"epoch": 2.122954444935869,
"grad_norm": 0.0016769421054050326,
"learning_rate": 5.852376038134553e-06,
"loss": 0.0233,
"step": 144000
},
{
"epoch": 2.1303258145363406,
"grad_norm": 0.006961928680539131,
"learning_rate": 5.803233574131407e-06,
"loss": 0.0294,
"step": 144500
},
{
"epoch": 2.1376971841368126,
"grad_norm": 0.005140836350619793,
"learning_rate": 5.754091110128263e-06,
"loss": 0.039,
"step": 145000
},
{
"epoch": 2.1450685537372842,
"grad_norm": 0.001053107320331037,
"learning_rate": 5.704948646125117e-06,
"loss": 0.0309,
"step": 145500
},
{
"epoch": 2.1524399233377562,
"grad_norm": 0.0007632673368789256,
"learning_rate": 5.655806182121973e-06,
"loss": 0.0363,
"step": 146000
},
{
"epoch": 2.159811292938228,
"grad_norm": 0.4370366036891937,
"learning_rate": 5.606762003046833e-06,
"loss": 0.0317,
"step": 146500
},
{
"epoch": 2.1671826625387,
"grad_norm": 0.00013254112855065614,
"learning_rate": 5.557619539043688e-06,
"loss": 0.0284,
"step": 147000
},
{
"epoch": 2.1745540321391714,
"grad_norm": 6.598239421844482,
"learning_rate": 5.508477075040542e-06,
"loss": 0.0269,
"step": 147500
},
{
"epoch": 2.181925401739643,
"grad_norm": 0.021487107500433922,
"learning_rate": 5.459334611037398e-06,
"loss": 0.0382,
"step": 148000
},
{
"epoch": 2.189296771340115,
"grad_norm": 11.90185546875,
"learning_rate": 5.410192147034252e-06,
"loss": 0.0372,
"step": 148500
},
{
"epoch": 2.1966681409405866,
"grad_norm": 0.0010807571234181523,
"learning_rate": 5.361049683031107e-06,
"loss": 0.0294,
"step": 149000
},
{
"epoch": 2.2040395105410586,
"grad_norm": 0.024999860674142838,
"learning_rate": 5.311907219027963e-06,
"loss": 0.031,
"step": 149500
},
{
"epoch": 2.21141088014153,
"grad_norm": 0.011472758837044239,
"learning_rate": 5.2628630399528235e-06,
"loss": 0.0253,
"step": 150000
},
{
"epoch": 2.218782249742002,
"grad_norm": 0.0003523350169416517,
"learning_rate": 5.213720575949679e-06,
"loss": 0.0312,
"step": 150500
},
{
"epoch": 2.226153619342474,
"grad_norm": 0.00018138001905754209,
"learning_rate": 5.1645781119465335e-06,
"loss": 0.0306,
"step": 151000
},
{
"epoch": 2.2335249889429454,
"grad_norm": 27.588563919067383,
"learning_rate": 5.1154356479433885e-06,
"loss": 0.0257,
"step": 151500
},
{
"epoch": 2.2408963585434174,
"grad_norm": 0.0001788044028216973,
"learning_rate": 5.066293183940243e-06,
"loss": 0.0364,
"step": 152000
},
{
"epoch": 2.248267728143889,
"grad_norm": 0.025098439306020737,
"learning_rate": 5.017347289793111e-06,
"loss": 0.033,
"step": 152500
},
{
"epoch": 2.255639097744361,
"grad_norm": 0.028794238343834877,
"learning_rate": 4.968204825789966e-06,
"loss": 0.0268,
"step": 153000
},
{
"epoch": 2.2630104673448326,
"grad_norm": 0.0443144217133522,
"learning_rate": 4.91906236178682e-06,
"loss": 0.0277,
"step": 153500
},
{
"epoch": 2.2703818369453046,
"grad_norm": 5.745356559753418,
"learning_rate": 4.869919897783675e-06,
"loss": 0.0319,
"step": 154000
},
{
"epoch": 2.277753206545776,
"grad_norm": 0.01881660334765911,
"learning_rate": 4.82077743378053e-06,
"loss": 0.0321,
"step": 154500
},
{
"epoch": 2.2851245761462478,
"grad_norm": 0.0014673862606287003,
"learning_rate": 4.771634969777385e-06,
"loss": 0.0325,
"step": 155000
},
{
"epoch": 2.2924959457467198,
"grad_norm": 0.001017951057292521,
"learning_rate": 4.72249250577424e-06,
"loss": 0.0369,
"step": 155500
},
{
"epoch": 2.2998673153471914,
"grad_norm": 0.0014965501613914967,
"learning_rate": 4.673350041771095e-06,
"loss": 0.0349,
"step": 156000
},
{
"epoch": 2.3072386849476634,
"grad_norm": 0.0005304542719386518,
"learning_rate": 4.6243058626959555e-06,
"loss": 0.0303,
"step": 156500
},
{
"epoch": 2.314610054548135,
"grad_norm": 0.0002412071480648592,
"learning_rate": 4.575261683620817e-06,
"loss": 0.0331,
"step": 157000
},
{
"epoch": 2.321981424148607,
"grad_norm": 0.0006970348185859621,
"learning_rate": 4.526119219617672e-06,
"loss": 0.0343,
"step": 157500
},
{
"epoch": 2.3293527937490786,
"grad_norm": 0.011668604798614979,
"learning_rate": 4.476976755614527e-06,
"loss": 0.0279,
"step": 158000
},
{
"epoch": 2.33672416334955,
"grad_norm": 0.005698871333152056,
"learning_rate": 4.427834291611382e-06,
"loss": 0.0322,
"step": 158500
},
{
"epoch": 2.344095532950022,
"grad_norm": 0.0057191732339560986,
"learning_rate": 4.378691827608237e-06,
"loss": 0.0314,
"step": 159000
},
{
"epoch": 2.3514669025504937,
"grad_norm": 0.00243947422131896,
"learning_rate": 4.329549363605092e-06,
"loss": 0.0267,
"step": 159500
},
{
"epoch": 2.3588382721509658,
"grad_norm": 0.00033369645825587213,
"learning_rate": 4.280406899601947e-06,
"loss": 0.0244,
"step": 160000
},
{
"epoch": 2.3662096417514373,
"grad_norm": 0.023830311372876167,
"learning_rate": 4.231264435598802e-06,
"loss": 0.0283,
"step": 160500
},
{
"epoch": 2.3735810113519094,
"grad_norm": 0.000461634888779372,
"learning_rate": 4.182121971595656e-06,
"loss": 0.0285,
"step": 161000
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.001816113363020122,
"learning_rate": 4.132979507592511e-06,
"loss": 0.03,
"step": 161500
},
{
"epoch": 2.3883237505528525,
"grad_norm": 0.001825949759222567,
"learning_rate": 4.083837043589366e-06,
"loss": 0.0294,
"step": 162000
},
{
"epoch": 2.3956951201533245,
"grad_norm": 9.502708435058594,
"learning_rate": 4.034792864514227e-06,
"loss": 0.0255,
"step": 162500
},
{
"epoch": 2.403066489753796,
"grad_norm": 0.0001642414426896721,
"learning_rate": 3.985650400511082e-06,
"loss": 0.0255,
"step": 163000
},
{
"epoch": 2.410437859354268,
"grad_norm": 5.004094123840332,
"learning_rate": 3.936507936507936e-06,
"loss": 0.0304,
"step": 163500
},
{
"epoch": 2.4178092289547397,
"grad_norm": 0.1878451257944107,
"learning_rate": 3.887365472504791e-06,
"loss": 0.0236,
"step": 164000
},
{
"epoch": 2.4251805985552117,
"grad_norm": 0.0004031589487567544,
"learning_rate": 3.838223008501646e-06,
"loss": 0.0316,
"step": 164500
},
{
"epoch": 2.4325519681556833,
"grad_norm": 0.0004982321988791227,
"learning_rate": 3.7890805444985013e-06,
"loss": 0.0277,
"step": 165000
},
{
"epoch": 2.439923337756155,
"grad_norm": 0.0037121805362403393,
"learning_rate": 3.739938080495356e-06,
"loss": 0.0303,
"step": 165500
},
{
"epoch": 2.447294707356627,
"grad_norm": 0.0101530272513628,
"learning_rate": 3.690795616492211e-06,
"loss": 0.0371,
"step": 166000
},
{
"epoch": 2.4546660769570985,
"grad_norm": 0.00018950540106743574,
"learning_rate": 3.6416531524890663e-06,
"loss": 0.0319,
"step": 166500
},
{
"epoch": 2.4620374465575705,
"grad_norm": 0.8489145040512085,
"learning_rate": 3.5925106884859213e-06,
"loss": 0.0261,
"step": 167000
},
{
"epoch": 2.469408816158042,
"grad_norm": 0.0013942194636911154,
"learning_rate": 3.543368224482776e-06,
"loss": 0.0283,
"step": 167500
},
{
"epoch": 2.476780185758514,
"grad_norm": 0.4112614691257477,
"learning_rate": 3.494225760479631e-06,
"loss": 0.0332,
"step": 168000
},
{
"epoch": 2.4841515553589857,
"grad_norm": 0.0010579220252111554,
"learning_rate": 3.445181581404492e-06,
"loss": 0.0288,
"step": 168500
},
{
"epoch": 2.4915229249594573,
"grad_norm": 0.0012302091345191002,
"learning_rate": 3.396137402329353e-06,
"loss": 0.0272,
"step": 169000
},
{
"epoch": 2.4988942945599293,
"grad_norm": 0.0008321187924593687,
"learning_rate": 3.3470932232542143e-06,
"loss": 0.0251,
"step": 169500
},
{
"epoch": 2.506265664160401,
"grad_norm": 2.113279342651367,
"learning_rate": 3.297950759251069e-06,
"loss": 0.0306,
"step": 170000
},
{
"epoch": 2.513637033760873,
"grad_norm": 0.030419809743762016,
"learning_rate": 3.248808295247924e-06,
"loss": 0.033,
"step": 170500
},
{
"epoch": 2.5210084033613445,
"grad_norm": 0.002908308058977127,
"learning_rate": 3.199665831244779e-06,
"loss": 0.0333,
"step": 171000
},
{
"epoch": 2.5283797729618165,
"grad_norm": 0.01623060740530491,
"learning_rate": 3.1505233672416334e-06,
"loss": 0.0347,
"step": 171500
},
{
"epoch": 2.535751142562288,
"grad_norm": 0.0023743058554828167,
"learning_rate": 3.1013809032384884e-06,
"loss": 0.029,
"step": 172000
},
{
"epoch": 2.5431225121627596,
"grad_norm": 0.0017329910770058632,
"learning_rate": 3.0522384392353434e-06,
"loss": 0.0399,
"step": 172500
},
{
"epoch": 2.5504938817632317,
"grad_norm": 0.003035512287169695,
"learning_rate": 3.003095975232199e-06,
"loss": 0.0292,
"step": 173000
},
{
"epoch": 2.5578652513637032,
"grad_norm": 0.016472771763801575,
"learning_rate": 2.9539535112290534e-06,
"loss": 0.0295,
"step": 173500
},
{
"epoch": 2.5652366209641753,
"grad_norm": 0.19270442426204681,
"learning_rate": 2.9048110472259084e-06,
"loss": 0.0237,
"step": 174000
},
{
"epoch": 2.572607990564647,
"grad_norm": 14.15371322631836,
"learning_rate": 2.855668583222763e-06,
"loss": 0.031,
"step": 174500
},
{
"epoch": 2.579979360165119,
"grad_norm": 0.0006798787508159876,
"learning_rate": 2.806526119219618e-06,
"loss": 0.0348,
"step": 175000
},
{
"epoch": 2.5873507297655904,
"grad_norm": 0.0007228073664009571,
"learning_rate": 2.757383655216473e-06,
"loss": 0.0266,
"step": 175500
},
{
"epoch": 2.594722099366062,
"grad_norm": 0.003529267618432641,
"learning_rate": 2.708339476141334e-06,
"loss": 0.025,
"step": 176000
},
{
"epoch": 2.602093468966534,
"grad_norm": 0.004369991831481457,
"learning_rate": 2.659197012138189e-06,
"loss": 0.0343,
"step": 176500
},
{
"epoch": 2.6094648385670056,
"grad_norm": 0.0011527182068675756,
"learning_rate": 2.610054548135044e-06,
"loss": 0.0344,
"step": 177000
},
{
"epoch": 2.6168362081674776,
"grad_norm": 0.0012939295265823603,
"learning_rate": 2.5609120841318984e-06,
"loss": 0.023,
"step": 177500
},
{
"epoch": 2.624207577767949,
"grad_norm": 0.004880073014646769,
"learning_rate": 2.5117696201287534e-06,
"loss": 0.0226,
"step": 178000
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.004119515884667635,
"learning_rate": 2.4626271561256084e-06,
"loss": 0.0217,
"step": 178500
},
{
"epoch": 2.638950316968893,
"grad_norm": 0.0001360880269203335,
"learning_rate": 2.4135829770504697e-06,
"loss": 0.0318,
"step": 179000
},
{
"epoch": 2.6463216865693644,
"grad_norm": 0.09521844983100891,
"learning_rate": 2.3644405130473242e-06,
"loss": 0.0382,
"step": 179500
},
{
"epoch": 2.6536930561698364,
"grad_norm": 0.0037165977992117405,
"learning_rate": 2.3152980490441792e-06,
"loss": 0.0277,
"step": 180000
},
{
"epoch": 2.661064425770308,
"grad_norm": 0.0008691260009072721,
"learning_rate": 2.2661555850410342e-06,
"loss": 0.0322,
"step": 180500
},
{
"epoch": 2.66843579537078,
"grad_norm": 1.0185267925262451,
"learning_rate": 2.217111405965895e-06,
"loss": 0.0287,
"step": 181000
},
{
"epoch": 2.6758071649712516,
"grad_norm": 11.761468887329102,
"learning_rate": 2.1679689419627505e-06,
"loss": 0.0269,
"step": 181500
},
{
"epoch": 2.6831785345717236,
"grad_norm": 0.00046698356163688004,
"learning_rate": 2.118826477959605e-06,
"loss": 0.0324,
"step": 182000
},
{
"epoch": 2.690549904172195,
"grad_norm": 0.010263352654874325,
"learning_rate": 2.0698805838124726e-06,
"loss": 0.0239,
"step": 182500
},
{
"epoch": 2.6979212737726668,
"grad_norm": 3.434927463531494,
"learning_rate": 2.020738119809327e-06,
"loss": 0.0335,
"step": 183000
},
{
"epoch": 2.705292643373139,
"grad_norm": 0.00016977268387563527,
"learning_rate": 1.971595655806182e-06,
"loss": 0.0275,
"step": 183500
},
{
"epoch": 2.7126640129736104,
"grad_norm": 0.0012544383062049747,
"learning_rate": 1.922453191803037e-06,
"loss": 0.0312,
"step": 184000
},
{
"epoch": 2.7200353825740824,
"grad_norm": 0.0017481895629316568,
"learning_rate": 1.8733107277998921e-06,
"loss": 0.0244,
"step": 184500
},
{
"epoch": 2.727406752174554,
"grad_norm": 0.001362023875117302,
"learning_rate": 1.824168263796747e-06,
"loss": 0.0242,
"step": 185000
},
{
"epoch": 2.734778121775026,
"grad_norm": 0.0014405859401449561,
"learning_rate": 1.775025799793602e-06,
"loss": 0.0304,
"step": 185500
},
{
"epoch": 2.7421494913754976,
"grad_norm": 0.0008861696696840227,
"learning_rate": 1.7258833357904567e-06,
"loss": 0.0278,
"step": 186000
},
{
"epoch": 2.749520860975969,
"grad_norm": 0.12847045063972473,
"learning_rate": 1.6767408717873115e-06,
"loss": 0.0244,
"step": 186500
},
{
"epoch": 2.756892230576441,
"grad_norm": 0.001703253947198391,
"learning_rate": 1.6275984077841663e-06,
"loss": 0.0356,
"step": 187000
},
{
"epoch": 2.7642636001769127,
"grad_norm": 0.003184770466759801,
"learning_rate": 1.5784559437810213e-06,
"loss": 0.0234,
"step": 187500
},
{
"epoch": 2.7716349697773848,
"grad_norm": 5.178366661071777,
"learning_rate": 1.529313479777876e-06,
"loss": 0.0272,
"step": 188000
},
{
"epoch": 2.7790063393778563,
"grad_norm": 0.0035832468420267105,
"learning_rate": 1.4801710157747309e-06,
"loss": 0.0227,
"step": 188500
},
{
"epoch": 2.7863777089783284,
"grad_norm": 0.007811425253748894,
"learning_rate": 1.4311268366995923e-06,
"loss": 0.03,
"step": 189000
},
{
"epoch": 2.7937490785788,
"grad_norm": 12.170905113220215,
"learning_rate": 1.3819843726964471e-06,
"loss": 0.028,
"step": 189500
},
{
"epoch": 2.8011204481792715,
"grad_norm": 0.0028726314194500446,
"learning_rate": 1.3329401936213082e-06,
"loss": 0.0303,
"step": 190000
},
{
"epoch": 2.8084918177797435,
"grad_norm": 30.53835678100586,
"learning_rate": 1.283797729618163e-06,
"loss": 0.0321,
"step": 190500
},
{
"epoch": 2.815863187380215,
"grad_norm": 0.006513836327940226,
"learning_rate": 1.234655265615018e-06,
"loss": 0.0235,
"step": 191000
},
{
"epoch": 2.823234556980687,
"grad_norm": 7.619091510772705,
"learning_rate": 1.185512801611873e-06,
"loss": 0.0307,
"step": 191500
},
{
"epoch": 2.8306059265811587,
"grad_norm": 0.005691774655133486,
"learning_rate": 1.1363703376087278e-06,
"loss": 0.0257,
"step": 192000
},
{
"epoch": 2.8379772961816307,
"grad_norm": 2.5761618614196777,
"learning_rate": 1.0872278736055828e-06,
"loss": 0.0226,
"step": 192500
},
{
"epoch": 2.8453486657821023,
"grad_norm": 0.00010429321264382452,
"learning_rate": 1.0380854096024375e-06,
"loss": 0.0242,
"step": 193000
},
{
"epoch": 2.852720035382574,
"grad_norm": 0.02838067337870598,
"learning_rate": 9.889429455992925e-07,
"loss": 0.0287,
"step": 193500
},
{
"epoch": 2.860091404983046,
"grad_norm": 0.12736959755420685,
"learning_rate": 9.398004815961473e-07,
"loss": 0.0256,
"step": 194000
},
{
"epoch": 2.8674627745835175,
"grad_norm": 0.32672008872032166,
"learning_rate": 8.907563025210085e-07,
"loss": 0.0265,
"step": 194500
},
{
"epoch": 2.8748341441839895,
"grad_norm": 0.000785826297942549,
"learning_rate": 8.416138385178634e-07,
"loss": 0.0339,
"step": 195000
},
{
"epoch": 2.882205513784461,
"grad_norm": 6.065654754638672,
"learning_rate": 7.924713745147182e-07,
"loss": 0.0337,
"step": 195500
},
{
"epoch": 2.889576883384933,
"grad_norm": 0.012824644334614277,
"learning_rate": 7.433289105115731e-07,
"loss": 0.0278,
"step": 196000
},
{
"epoch": 2.8969482529854047,
"grad_norm": 0.011444471776485443,
"learning_rate": 6.941864465084281e-07,
"loss": 0.0368,
"step": 196500
},
{
"epoch": 2.9043196225858763,
"grad_norm": 0.0061572156846523285,
"learning_rate": 6.450439825052828e-07,
"loss": 0.0289,
"step": 197000
},
{
"epoch": 2.9116909921863483,
"grad_norm": 0.010506005957722664,
"learning_rate": 5.95999803430144e-07,
"loss": 0.022,
"step": 197500
},
{
"epoch": 2.91906236178682,
"grad_norm": 0.00013557464990299195,
"learning_rate": 5.468573394269989e-07,
"loss": 0.0313,
"step": 198000
},
{
"epoch": 2.926433731387292,
"grad_norm": 0.0008798382477834821,
"learning_rate": 4.977148754238538e-07,
"loss": 0.0302,
"step": 198500
},
{
"epoch": 2.9338051009877635,
"grad_norm": 0.11164344847202301,
"learning_rate": 4.485724114207086e-07,
"loss": 0.0234,
"step": 199000
},
{
"epoch": 2.9411764705882355,
"grad_norm": 17.183870315551758,
"learning_rate": 3.9942994741756357e-07,
"loss": 0.0294,
"step": 199500
},
{
"epoch": 2.948547840188707,
"grad_norm": 14.767284393310547,
"learning_rate": 3.502874834144184e-07,
"loss": 0.0277,
"step": 200000
},
{
"epoch": 2.9559192097891787,
"grad_norm": 0.0102895712479949,
"learning_rate": 3.012433043392796e-07,
"loss": 0.0307,
"step": 200500
},
{
"epoch": 2.9632905793896507,
"grad_norm": 0.0019515061285346746,
"learning_rate": 2.5210084033613445e-07,
"loss": 0.0345,
"step": 201000
},
{
"epoch": 2.9706619489901223,
"grad_norm": 0.5170195698738098,
"learning_rate": 2.0295837633298937e-07,
"loss": 0.0282,
"step": 201500
},
{
"epoch": 2.9780333185905943,
"grad_norm": 0.00022552709560841322,
"learning_rate": 1.5381591232984424e-07,
"loss": 0.0294,
"step": 202000
},
{
"epoch": 2.985404688191066,
"grad_norm": 0.004490272141993046,
"learning_rate": 1.0467344832669911e-07,
"loss": 0.0223,
"step": 202500
},
{
"epoch": 2.992776057791538,
"grad_norm": 0.0005161833250895143,
"learning_rate": 5.562926925156028e-08,
"loss": 0.0247,
"step": 203000
},
{
"epoch": 3.0,
"eval_accuracy": 0.6753550543024227,
"eval_f1": 0.6936652741069145,
"eval_loss": 0.12206839770078659,
"eval_roc_auc": 0.833023672143383,
"eval_runtime": 94.2338,
"eval_samples_per_second": 63.512,
"eval_steps_per_second": 63.512,
"step": 203490
}
],
"logging_steps": 500,
"max_steps": 203490,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1521488766625792e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}