Invalid JSON:
Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 203490, | |
| "best_metric": 0.6936652741069145, | |
| "best_model_checkpoint": "modernbert-heritage-category/checkpoint-203490", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 203490, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007371369600471768, | |
| "grad_norm": 1.2525012493133545, | |
| "learning_rate": 1.995105410585287e-05, | |
| "loss": 0.1758, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.014742739200943536, | |
| "grad_norm": 0.9046939611434937, | |
| "learning_rate": 1.990200992677773e-05, | |
| "loss": 0.1536, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.022114108801415303, | |
| "grad_norm": 1.5520201921463013, | |
| "learning_rate": 1.9852867462774585e-05, | |
| "loss": 0.1429, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02948547840188707, | |
| "grad_norm": 2.7046384811401367, | |
| "learning_rate": 1.980372499877144e-05, | |
| "loss": 0.1292, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03685684800235884, | |
| "grad_norm": 0.7337467670440674, | |
| "learning_rate": 1.9754582534768295e-05, | |
| "loss": 0.1229, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.044228217602830605, | |
| "grad_norm": 0.6729874610900879, | |
| "learning_rate": 1.970544007076515e-05, | |
| "loss": 0.1193, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05159958720330237, | |
| "grad_norm": 6.235721111297607, | |
| "learning_rate": 1.9656592461546026e-05, | |
| "loss": 0.1182, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.05897095680377414, | |
| "grad_norm": 0.02936830371618271, | |
| "learning_rate": 1.9607449997542877e-05, | |
| "loss": 0.1183, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.06634232640424591, | |
| "grad_norm": 0.028409462422132492, | |
| "learning_rate": 1.9558307533539732e-05, | |
| "loss": 0.1164, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.07371369600471768, | |
| "grad_norm": 4.09944486618042, | |
| "learning_rate": 1.9509165069536587e-05, | |
| "loss": 0.1053, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.08108506560518944, | |
| "grad_norm": 3.7119829654693604, | |
| "learning_rate": 1.9460022605533442e-05, | |
| "loss": 0.1118, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.08845643520566121, | |
| "grad_norm": 1.3968170881271362, | |
| "learning_rate": 1.9410880141530297e-05, | |
| "loss": 0.1092, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.09582780480613298, | |
| "grad_norm": 1.2682095766067505, | |
| "learning_rate": 1.9361737677527152e-05, | |
| "loss": 0.107, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.10319917440660474, | |
| "grad_norm": 1.898970127105713, | |
| "learning_rate": 1.9312595213524007e-05, | |
| "loss": 0.106, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.11057054400707651, | |
| "grad_norm": 1.1229842901229858, | |
| "learning_rate": 1.9263452749520862e-05, | |
| "loss": 0.1066, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.11794191360754828, | |
| "grad_norm": 1.5770803689956665, | |
| "learning_rate": 1.9214310285517717e-05, | |
| "loss": 0.1008, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.12531328320802004, | |
| "grad_norm": 2.80481219291687, | |
| "learning_rate": 1.9165167821514572e-05, | |
| "loss": 0.1115, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.13268465280849182, | |
| "grad_norm": 0.8099410533905029, | |
| "learning_rate": 1.9116025357511427e-05, | |
| "loss": 0.0979, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.1400560224089636, | |
| "grad_norm": 2.5966243743896484, | |
| "learning_rate": 1.9066882893508282e-05, | |
| "loss": 0.1111, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.14742739200943536, | |
| "grad_norm": 0.39728572964668274, | |
| "learning_rate": 1.9017740429505137e-05, | |
| "loss": 0.1014, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.15479876160990713, | |
| "grad_norm": 0.4020085334777832, | |
| "learning_rate": 1.896869625043e-05, | |
| "loss": 0.0956, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.16217013121037888, | |
| "grad_norm": 1.247157335281372, | |
| "learning_rate": 1.891955378642685e-05, | |
| "loss": 0.1054, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.16954150081085065, | |
| "grad_norm": 0.4768887758255005, | |
| "learning_rate": 1.8870411322423706e-05, | |
| "loss": 0.0981, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.17691287041132242, | |
| "grad_norm": 2.418336868286133, | |
| "learning_rate": 1.882126885842056e-05, | |
| "loss": 0.0936, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1842842400117942, | |
| "grad_norm": 0.27637165784835815, | |
| "learning_rate": 1.877212639441742e-05, | |
| "loss": 0.0975, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.19165560961226596, | |
| "grad_norm": 0.23639962077140808, | |
| "learning_rate": 1.8722983930414275e-05, | |
| "loss": 0.0915, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.19902697921273774, | |
| "grad_norm": 0.8892920017242432, | |
| "learning_rate": 1.8673841466411126e-05, | |
| "loss": 0.1, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.20639834881320948, | |
| "grad_norm": 3.2099547386169434, | |
| "learning_rate": 1.862469900240798e-05, | |
| "loss": 0.0965, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.21376971841368125, | |
| "grad_norm": 0.9265658259391785, | |
| "learning_rate": 1.8575556538404836e-05, | |
| "loss": 0.0988, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.22114108801415303, | |
| "grad_norm": 1.8521679639816284, | |
| "learning_rate": 1.8526414074401695e-05, | |
| "loss": 0.0951, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.2285124576146248, | |
| "grad_norm": 2.191715717315674, | |
| "learning_rate": 1.8477369895326553e-05, | |
| "loss": 0.1037, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.23588382721509657, | |
| "grad_norm": 0.10625209659337997, | |
| "learning_rate": 1.8428227431323408e-05, | |
| "loss": 0.093, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.24325519681556834, | |
| "grad_norm": 4.059609413146973, | |
| "learning_rate": 1.8379084967320263e-05, | |
| "loss": 0.0931, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.2506265664160401, | |
| "grad_norm": 0.341007798910141, | |
| "learning_rate": 1.8329942503317118e-05, | |
| "loss": 0.0877, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.2579979360165119, | |
| "grad_norm": 0.16973993182182312, | |
| "learning_rate": 1.8280800039313973e-05, | |
| "loss": 0.0981, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.26536930561698363, | |
| "grad_norm": 0.20512279868125916, | |
| "learning_rate": 1.8231657575310828e-05, | |
| "loss": 0.0944, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.27274067521745543, | |
| "grad_norm": 5.859679222106934, | |
| "learning_rate": 1.818251511130768e-05, | |
| "loss": 0.0952, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.2801120448179272, | |
| "grad_norm": 0.06838594377040863, | |
| "learning_rate": 1.8133470932232545e-05, | |
| "loss": 0.0891, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.2874834144183989, | |
| "grad_norm": 1.6491619348526, | |
| "learning_rate": 1.80843284682294e-05, | |
| "loss": 0.095, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.2948547840188707, | |
| "grad_norm": 0.21653395891189575, | |
| "learning_rate": 1.8035186004226252e-05, | |
| "loss": 0.0925, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.30222615361934246, | |
| "grad_norm": 0.1839127540588379, | |
| "learning_rate": 1.7986043540223107e-05, | |
| "loss": 0.0874, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.30959752321981426, | |
| "grad_norm": 1.7096320390701294, | |
| "learning_rate": 1.7936901076219962e-05, | |
| "loss": 0.0921, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.316968892820286, | |
| "grad_norm": 0.013913823291659355, | |
| "learning_rate": 1.788775861221682e-05, | |
| "loss": 0.0879, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.32434026242075775, | |
| "grad_norm": 2.2390196323394775, | |
| "learning_rate": 1.7838616148213672e-05, | |
| "loss": 0.0988, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.33171163202122955, | |
| "grad_norm": 1.1112462282180786, | |
| "learning_rate": 1.7789473684210527e-05, | |
| "loss": 0.0906, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.3390830016217013, | |
| "grad_norm": 2.3240630626678467, | |
| "learning_rate": 1.774042950513539e-05, | |
| "loss": 0.0919, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.3464543712221731, | |
| "grad_norm": 0.31203529238700867, | |
| "learning_rate": 1.7691287041132244e-05, | |
| "loss": 0.0886, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.35382574082264484, | |
| "grad_norm": 0.002407611347734928, | |
| "learning_rate": 1.76421445771291e-05, | |
| "loss": 0.0892, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.36119711042311664, | |
| "grad_norm": 0.23297803103923798, | |
| "learning_rate": 1.7593002113125954e-05, | |
| "loss": 0.0894, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.3685684800235884, | |
| "grad_norm": 0.5540401339530945, | |
| "learning_rate": 1.754385964912281e-05, | |
| "loss": 0.0905, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 1.9130643606185913, | |
| "learning_rate": 1.749481547004767e-05, | |
| "loss": 0.0906, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.38331121922453193, | |
| "grad_norm": 0.23371170461177826, | |
| "learning_rate": 1.7445673006044523e-05, | |
| "loss": 0.0925, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.3906825888250037, | |
| "grad_norm": 0.16552847623825073, | |
| "learning_rate": 1.7396530542041378e-05, | |
| "loss": 0.088, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.3980539584254755, | |
| "grad_norm": 0.008411018177866936, | |
| "learning_rate": 1.7347388078038233e-05, | |
| "loss": 0.0946, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.4054253280259472, | |
| "grad_norm": 0.6356103420257568, | |
| "learning_rate": 1.729824561403509e-05, | |
| "loss": 0.0869, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.41279669762641896, | |
| "grad_norm": 0.8396435379981995, | |
| "learning_rate": 1.724920143495995e-05, | |
| "loss": 0.0933, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.42016806722689076, | |
| "grad_norm": 5.201042652130127, | |
| "learning_rate": 1.7200157255884812e-05, | |
| "loss": 0.0878, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.4275394368273625, | |
| "grad_norm": 1.2198799848556519, | |
| "learning_rate": 1.7151014791881667e-05, | |
| "loss": 0.0837, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.4349108064278343, | |
| "grad_norm": 1.1107237339019775, | |
| "learning_rate": 1.7101872327878522e-05, | |
| "loss": 0.0898, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.44228217602830605, | |
| "grad_norm": 0.47166362404823303, | |
| "learning_rate": 1.705282814880338e-05, | |
| "loss": 0.083, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.44965354562877785, | |
| "grad_norm": 0.9816909432411194, | |
| "learning_rate": 1.7003685684800236e-05, | |
| "loss": 0.0861, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.4570249152292496, | |
| "grad_norm": 0.10324009507894516, | |
| "learning_rate": 1.6954543220797094e-05, | |
| "loss": 0.0942, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.46439628482972134, | |
| "grad_norm": 0.42705604434013367, | |
| "learning_rate": 1.6905400756793946e-05, | |
| "loss": 0.0826, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.47176765443019314, | |
| "grad_norm": 1.8074253797531128, | |
| "learning_rate": 1.68562582927908e-05, | |
| "loss": 0.0853, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.4791390240306649, | |
| "grad_norm": 1.1949777603149414, | |
| "learning_rate": 1.6807115828787656e-05, | |
| "loss": 0.0936, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.4865103936311367, | |
| "grad_norm": 1.8849105834960938, | |
| "learning_rate": 1.675797336478451e-05, | |
| "loss": 0.0849, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.49388176323160843, | |
| "grad_norm": 2.1948788166046143, | |
| "learning_rate": 1.670883090078137e-05, | |
| "loss": 0.083, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.5012531328320802, | |
| "grad_norm": 1.5681918859481812, | |
| "learning_rate": 1.665968843677822e-05, | |
| "loss": 0.0845, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.508624502432552, | |
| "grad_norm": 1.447178840637207, | |
| "learning_rate": 1.6610545972775076e-05, | |
| "loss": 0.0883, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.5159958720330238, | |
| "grad_norm": 0.678683876991272, | |
| "learning_rate": 1.6561501793699938e-05, | |
| "loss": 0.0901, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.5233672416334955, | |
| "grad_norm": 1.585949420928955, | |
| "learning_rate": 1.6512359329696793e-05, | |
| "loss": 0.0893, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.5307386112339673, | |
| "grad_norm": 2.8461952209472656, | |
| "learning_rate": 1.6463216865693648e-05, | |
| "loss": 0.0846, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.5381099808344391, | |
| "grad_norm": 0.08873996883630753, | |
| "learning_rate": 1.6414074401690503e-05, | |
| "loss": 0.0838, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.5454813504349109, | |
| "grad_norm": 0.08909033238887787, | |
| "learning_rate": 1.6364931937687354e-05, | |
| "loss": 0.0835, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.5528527200353825, | |
| "grad_norm": 0.01679537631571293, | |
| "learning_rate": 1.6315789473684213e-05, | |
| "loss": 0.0826, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.5602240896358543, | |
| "grad_norm": 0.018643999472260475, | |
| "learning_rate": 1.6266647009681068e-05, | |
| "loss": 0.0891, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.5675954592363261, | |
| "grad_norm": 3.226288080215454, | |
| "learning_rate": 1.6217504545677923e-05, | |
| "loss": 0.0927, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.5749668288367978, | |
| "grad_norm": 6.410881042480469, | |
| "learning_rate": 1.6168362081674774e-05, | |
| "loss": 0.0826, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.5823381984372696, | |
| "grad_norm": 2.421131134033203, | |
| "learning_rate": 1.611921961767163e-05, | |
| "loss": 0.0854, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.5897095680377414, | |
| "grad_norm": 0.012708733789622784, | |
| "learning_rate": 1.6070077153668488e-05, | |
| "loss": 0.0841, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.5970809376382131, | |
| "grad_norm": 5.636229515075684, | |
| "learning_rate": 1.6021032974593347e-05, | |
| "loss": 0.0794, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.6044523072386849, | |
| "grad_norm": 1.866571307182312, | |
| "learning_rate": 1.59718905105902e-05, | |
| "loss": 0.0836, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.6118236768391567, | |
| "grad_norm": 1.0771315097808838, | |
| "learning_rate": 1.5922748046587057e-05, | |
| "loss": 0.0782, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.6191950464396285, | |
| "grad_norm": 0.09344267845153809, | |
| "learning_rate": 1.587360558258391e-05, | |
| "loss": 0.0891, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.6265664160401002, | |
| "grad_norm": 2.4186413288116455, | |
| "learning_rate": 1.5824463118580767e-05, | |
| "loss": 0.0907, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.633937785640572, | |
| "grad_norm": 0.09242186695337296, | |
| "learning_rate": 1.577532065457762e-05, | |
| "loss": 0.081, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.6413091552410438, | |
| "grad_norm": 0.09285570681095123, | |
| "learning_rate": 1.572637476043049e-05, | |
| "loss": 0.0902, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.6486805248415155, | |
| "grad_norm": 1.1049730777740479, | |
| "learning_rate": 1.5677232296427346e-05, | |
| "loss": 0.081, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.6560518944419873, | |
| "grad_norm": 0.1485988050699234, | |
| "learning_rate": 1.5628089832424197e-05, | |
| "loss": 0.0834, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.6634232640424591, | |
| "grad_norm": 1.4170334339141846, | |
| "learning_rate": 1.5578947368421052e-05, | |
| "loss": 0.08, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.6707946336429309, | |
| "grad_norm": 2.51129150390625, | |
| "learning_rate": 1.5529903189345914e-05, | |
| "loss": 0.0881, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.6781660032434026, | |
| "grad_norm": 0.0491604208946228, | |
| "learning_rate": 1.548076072534277e-05, | |
| "loss": 0.0845, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.6855373728438744, | |
| "grad_norm": 0.09064287692308426, | |
| "learning_rate": 1.5431618261339624e-05, | |
| "loss": 0.0838, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.6929087424443462, | |
| "grad_norm": 1.6225173473358154, | |
| "learning_rate": 1.538247579733648e-05, | |
| "loss": 0.0806, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.7002801120448179, | |
| "grad_norm": 0.025229327380657196, | |
| "learning_rate": 1.5333333333333334e-05, | |
| "loss": 0.0852, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.7076514816452897, | |
| "grad_norm": 1.561880350112915, | |
| "learning_rate": 1.528419086933019e-05, | |
| "loss": 0.0819, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.7150228512457615, | |
| "grad_norm": 0.024792635813355446, | |
| "learning_rate": 1.5235048405327044e-05, | |
| "loss": 0.0844, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.7223942208462333, | |
| "grad_norm": 0.2325647473335266, | |
| "learning_rate": 1.51859059413239e-05, | |
| "loss": 0.0786, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.729765590446705, | |
| "grad_norm": 1.0401220321655273, | |
| "learning_rate": 1.5136763477320754e-05, | |
| "loss": 0.0753, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.7371369600471768, | |
| "grad_norm": 2.6318838596343994, | |
| "learning_rate": 1.5087621013317608e-05, | |
| "loss": 0.0793, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.7445083296476486, | |
| "grad_norm": 0.15695439279079437, | |
| "learning_rate": 1.5038478549314463e-05, | |
| "loss": 0.085, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 0.006814942229539156, | |
| "learning_rate": 1.498933608531132e-05, | |
| "loss": 0.0819, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.7592510688485921, | |
| "grad_norm": 2.0822718143463135, | |
| "learning_rate": 1.4940193621308174e-05, | |
| "loss": 0.0798, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.7666224384490639, | |
| "grad_norm": 0.06762377172708511, | |
| "learning_rate": 1.4891149442233035e-05, | |
| "loss": 0.0793, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.7739938080495357, | |
| "grad_norm": 1.627299189567566, | |
| "learning_rate": 1.484200697822989e-05, | |
| "loss": 0.08, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.7813651776500073, | |
| "grad_norm": 0.8819578289985657, | |
| "learning_rate": 1.4792864514226743e-05, | |
| "loss": 0.0897, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.7887365472504791, | |
| "grad_norm": 3.7201988697052, | |
| "learning_rate": 1.4743722050223598e-05, | |
| "loss": 0.0787, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.796107916850951, | |
| "grad_norm": 2.0705556869506836, | |
| "learning_rate": 1.4694677871148462e-05, | |
| "loss": 0.0897, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.8034792864514226, | |
| "grad_norm": 0.08984575420618057, | |
| "learning_rate": 1.4645535407145315e-05, | |
| "loss": 0.0875, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.8108506560518944, | |
| "grad_norm": 0.5264925956726074, | |
| "learning_rate": 1.459639294314217e-05, | |
| "loss": 0.081, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.8182220256523662, | |
| "grad_norm": 0.7385400533676147, | |
| "learning_rate": 1.4547250479139025e-05, | |
| "loss": 0.0804, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.8255933952528379, | |
| "grad_norm": 0.04134887456893921, | |
| "learning_rate": 1.449810801513588e-05, | |
| "loss": 0.0797, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.8329647648533097, | |
| "grad_norm": 0.03769136965274811, | |
| "learning_rate": 1.4448965551132734e-05, | |
| "loss": 0.084, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 0.2623615264892578, | |
| "learning_rate": 1.4399921372057597e-05, | |
| "loss": 0.0821, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.8477075040542533, | |
| "grad_norm": 2.0235373973846436, | |
| "learning_rate": 1.435077890805445e-05, | |
| "loss": 0.0806, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.855078873654725, | |
| "grad_norm": 0.32753029465675354, | |
| "learning_rate": 1.4301636444051306e-05, | |
| "loss": 0.0828, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.8624502432551968, | |
| "grad_norm": 2.255500316619873, | |
| "learning_rate": 1.425249398004816e-05, | |
| "loss": 0.0804, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.8698216128556686, | |
| "grad_norm": 2.8162291049957275, | |
| "learning_rate": 1.4203449800973021e-05, | |
| "loss": 0.0827, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.8771929824561403, | |
| "grad_norm": 0.41316208243370056, | |
| "learning_rate": 1.4154307336969876e-05, | |
| "loss": 0.0753, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.8845643520566121, | |
| "grad_norm": 1.9982844591140747, | |
| "learning_rate": 1.4105164872966733e-05, | |
| "loss": 0.0854, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.8919357216570839, | |
| "grad_norm": 1.8432923555374146, | |
| "learning_rate": 1.4056022408963586e-05, | |
| "loss": 0.077, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.8993070912575557, | |
| "grad_norm": 3.33919620513916, | |
| "learning_rate": 1.4006879944960441e-05, | |
| "loss": 0.082, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.9066784608580274, | |
| "grad_norm": 3.227517604827881, | |
| "learning_rate": 1.3957737480957296e-05, | |
| "loss": 0.0785, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.9140498304584992, | |
| "grad_norm": 0.014868408441543579, | |
| "learning_rate": 1.3908595016954151e-05, | |
| "loss": 0.0741, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.921421200058971, | |
| "grad_norm": 0.06270582973957062, | |
| "learning_rate": 1.3859452552951008e-05, | |
| "loss": 0.0786, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.9287925696594427, | |
| "grad_norm": 0.06437293440103531, | |
| "learning_rate": 1.3810310088947861e-05, | |
| "loss": 0.076, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.9361639392599145, | |
| "grad_norm": 0.9199370741844177, | |
| "learning_rate": 1.3761167624944716e-05, | |
| "loss": 0.0815, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.9435353088603863, | |
| "grad_norm": 0.020321089774370193, | |
| "learning_rate": 1.3712123445869577e-05, | |
| "loss": 0.0837, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.9509066784608581, | |
| "grad_norm": 2.2705533504486084, | |
| "learning_rate": 1.3662980981866432e-05, | |
| "loss": 0.0871, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.9582780480613298, | |
| "grad_norm": 0.027517901733517647, | |
| "learning_rate": 1.3613838517863287e-05, | |
| "loss": 0.0755, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.9656494176618016, | |
| "grad_norm": 1.307394027709961, | |
| "learning_rate": 1.3564696053860143e-05, | |
| "loss": 0.0764, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.9730207872622734, | |
| "grad_norm": 2.4579734802246094, | |
| "learning_rate": 1.3515553589856995e-05, | |
| "loss": 0.0782, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 13.52622127532959, | |
| "learning_rate": 1.3466411125853852e-05, | |
| "loss": 0.0704, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.9877635264632169, | |
| "grad_norm": 0.32894331216812134, | |
| "learning_rate": 1.3417268661850707e-05, | |
| "loss": 0.0838, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.9951348960636887, | |
| "grad_norm": 0.08820515871047974, | |
| "learning_rate": 1.3368126197847562e-05, | |
| "loss": 0.0792, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6063492063492063, | |
| "eval_f1": 0.6658804318243672, | |
| "eval_loss": 0.07707133144140244, | |
| "eval_roc_auc": 0.8010158538939403, | |
| "eval_runtime": 92.2448, | |
| "eval_samples_per_second": 64.882, | |
| "eval_steps_per_second": 64.882, | |
| "step": 67830 | |
| }, | |
| { | |
| "epoch": 1.0025062656641603, | |
| "grad_norm": 1.2780104875564575, | |
| "learning_rate": 1.3318983733844415e-05, | |
| "loss": 0.0722, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.0098776352646321, | |
| "grad_norm": 0.016919715330004692, | |
| "learning_rate": 1.3269939554769277e-05, | |
| "loss": 0.0544, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.017249004865104, | |
| "grad_norm": 0.0008094881195574999, | |
| "learning_rate": 1.3220797090766134e-05, | |
| "loss": 0.0589, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.0246203744655757, | |
| "grad_norm": 0.06712741404771805, | |
| "learning_rate": 1.3171752911690994e-05, | |
| "loss": 0.0692, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.0319917440660475, | |
| "grad_norm": 1.4024405479431152, | |
| "learning_rate": 1.3122610447687849e-05, | |
| "loss": 0.0577, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.0393631136665193, | |
| "grad_norm": 3.811220407485962, | |
| "learning_rate": 1.3073467983684702e-05, | |
| "loss": 0.0598, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.046734483266991, | |
| "grad_norm": 0.061250410974025726, | |
| "learning_rate": 1.3024325519681557e-05, | |
| "loss": 0.064, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.0541058528674627, | |
| "grad_norm": 2.3042991161346436, | |
| "learning_rate": 1.2975183055678412e-05, | |
| "loss": 0.0646, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.0614772224679345, | |
| "grad_norm": 0.32951870560646057, | |
| "learning_rate": 1.2926040591675269e-05, | |
| "loss": 0.0668, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.0688485920684063, | |
| "grad_norm": 0.013675130903720856, | |
| "learning_rate": 1.2876898127672122e-05, | |
| "loss": 0.0576, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.0762199616688781, | |
| "grad_norm": 2.3298966884613037, | |
| "learning_rate": 1.2827755663668977e-05, | |
| "loss": 0.0583, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.08359133126935, | |
| "grad_norm": 0.04673844203352928, | |
| "learning_rate": 1.2778809769521845e-05, | |
| "loss": 0.0697, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.0909627008698215, | |
| "grad_norm": 1.1629608869552612, | |
| "learning_rate": 1.27296673055187e-05, | |
| "loss": 0.0621, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.0983340704702933, | |
| "grad_norm": 0.06381271779537201, | |
| "learning_rate": 1.2680524841515553e-05, | |
| "loss": 0.0629, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.105705440070765, | |
| "grad_norm": 0.00508810393512249, | |
| "learning_rate": 1.2631382377512408e-05, | |
| "loss": 0.065, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.113076809671237, | |
| "grad_norm": 4.200405597686768, | |
| "learning_rate": 1.2582239913509265e-05, | |
| "loss": 0.0704, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.1204481792717087, | |
| "grad_norm": 0.18736723065376282, | |
| "learning_rate": 1.253309744950612e-05, | |
| "loss": 0.0683, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.1278195488721805, | |
| "grad_norm": 0.09223194420337677, | |
| "learning_rate": 1.2483954985502975e-05, | |
| "loss": 0.0557, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.1351909184726523, | |
| "grad_norm": 5.287250518798828, | |
| "learning_rate": 1.2434812521499828e-05, | |
| "loss": 0.0643, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.1425622880731239, | |
| "grad_norm": 1.283521294593811, | |
| "learning_rate": 1.2385670057496683e-05, | |
| "loss": 0.0584, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.1499336576735957, | |
| "grad_norm": 3.34344220161438, | |
| "learning_rate": 1.233652759349354e-05, | |
| "loss": 0.0673, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.1573050272740675, | |
| "grad_norm": 0.23046046495437622, | |
| "learning_rate": 1.2287385129490395e-05, | |
| "loss": 0.0605, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.1646763968745393, | |
| "grad_norm": 0.0487230159342289, | |
| "learning_rate": 1.2238242665487248e-05, | |
| "loss": 0.0672, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.172047766475011, | |
| "grad_norm": 0.0587400384247303, | |
| "learning_rate": 1.2189100201484103e-05, | |
| "loss": 0.0635, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.1794191360754829, | |
| "grad_norm": 0.3049776256084442, | |
| "learning_rate": 1.2140056022408964e-05, | |
| "loss": 0.0587, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.1867905056759547, | |
| "grad_norm": 0.5761535167694092, | |
| "learning_rate": 1.2091011843333826e-05, | |
| "loss": 0.0706, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.1941618752764263, | |
| "grad_norm": 2.524258852005005, | |
| "learning_rate": 1.2041967664258686e-05, | |
| "loss": 0.0607, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.201533244876898, | |
| "grad_norm": 0.026634838432073593, | |
| "learning_rate": 1.1992825200255543e-05, | |
| "loss": 0.0581, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.2089046144773699, | |
| "grad_norm": 0.39337214827537537, | |
| "learning_rate": 1.1943682736252398e-05, | |
| "loss": 0.0652, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.2162759840778417, | |
| "grad_norm": 1.8906174898147583, | |
| "learning_rate": 1.1894540272249251e-05, | |
| "loss": 0.0659, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.2236473536783135, | |
| "grad_norm": 0.011290138587355614, | |
| "learning_rate": 1.1845397808246106e-05, | |
| "loss": 0.0615, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.2310187232787853, | |
| "grad_norm": 0.5536847114562988, | |
| "learning_rate": 1.1796255344242961e-05, | |
| "loss": 0.0666, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.238390092879257, | |
| "grad_norm": 0.0035450158175081015, | |
| "learning_rate": 1.1747112880239818e-05, | |
| "loss": 0.0597, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.2457614624797286, | |
| "grad_norm": 0.005579414777457714, | |
| "learning_rate": 1.1697970416236671e-05, | |
| "loss": 0.0545, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.2531328320802004, | |
| "grad_norm": 0.10251569747924805, | |
| "learning_rate": 1.1648926237161533e-05, | |
| "loss": 0.0652, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.2605042016806722, | |
| "grad_norm": 3.2004494667053223, | |
| "learning_rate": 1.1599783773158386e-05, | |
| "loss": 0.0546, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.267875571281144, | |
| "grad_norm": 1.5647473335266113, | |
| "learning_rate": 1.1550641309155241e-05, | |
| "loss": 0.0661, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.2752469408816158, | |
| "grad_norm": 2.5646321773529053, | |
| "learning_rate": 1.1501498845152096e-05, | |
| "loss": 0.0616, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.2826183104820876, | |
| "grad_norm": 0.008838827721774578, | |
| "learning_rate": 1.1452356381148953e-05, | |
| "loss": 0.0643, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.2899896800825594, | |
| "grad_norm": 0.27586570382118225, | |
| "learning_rate": 1.1403213917145805e-05, | |
| "loss": 0.0651, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.297361049683031, | |
| "grad_norm": 2.2683589458465576, | |
| "learning_rate": 1.1354071453142661e-05, | |
| "loss": 0.0613, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.3047324192835028, | |
| "grad_norm": 0.0017950567416846752, | |
| "learning_rate": 1.1305027274067524e-05, | |
| "loss": 0.0616, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.3121037888839746, | |
| "grad_norm": 0.03913048282265663, | |
| "learning_rate": 1.1255884810064377e-05, | |
| "loss": 0.0627, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.3194751584844464, | |
| "grad_norm": 5.085097312927246, | |
| "learning_rate": 1.1206742346061232e-05, | |
| "loss": 0.0648, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.3268465280849182, | |
| "grad_norm": 0.04779289662837982, | |
| "learning_rate": 1.1157599882058089e-05, | |
| "loss": 0.0666, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.33421789768539, | |
| "grad_norm": 0.01123060006648302, | |
| "learning_rate": 1.1108457418054944e-05, | |
| "loss": 0.0618, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.3415892672858618, | |
| "grad_norm": 1.5869191884994507, | |
| "learning_rate": 1.1059314954051797e-05, | |
| "loss": 0.066, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 1.3489606368863334, | |
| "grad_norm": 0.00517408037558198, | |
| "learning_rate": 1.1010172490048652e-05, | |
| "loss": 0.062, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 1.3563320064868052, | |
| "grad_norm": 0.09691867977380753, | |
| "learning_rate": 1.0961030026045507e-05, | |
| "loss": 0.0606, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.363703376087277, | |
| "grad_norm": 3.3921549320220947, | |
| "learning_rate": 1.0911887562042362e-05, | |
| "loss": 0.0643, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 1.3710747456877488, | |
| "grad_norm": 2.874007225036621, | |
| "learning_rate": 1.0862843382967222e-05, | |
| "loss": 0.0622, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 1.3784461152882206, | |
| "grad_norm": 0.03864584490656853, | |
| "learning_rate": 1.0813897488820091e-05, | |
| "loss": 0.0582, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 1.3858174848886924, | |
| "grad_norm": 0.12044321745634079, | |
| "learning_rate": 1.0764755024816945e-05, | |
| "loss": 0.0672, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 1.3931888544891642, | |
| "grad_norm": 3.673576593399048, | |
| "learning_rate": 1.07156125608138e-05, | |
| "loss": 0.0619, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 1.4005602240896358, | |
| "grad_norm": 0.023468611761927605, | |
| "learning_rate": 1.0666470096810655e-05, | |
| "loss": 0.0567, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 1.4079315936901076, | |
| "grad_norm": 1.5474720001220703, | |
| "learning_rate": 1.061732763280751e-05, | |
| "loss": 0.057, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 1.4153029632905794, | |
| "grad_norm": 0.4061996340751648, | |
| "learning_rate": 1.0568185168804366e-05, | |
| "loss": 0.0551, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.4226743328910512, | |
| "grad_norm": 1.828468918800354, | |
| "learning_rate": 1.0519042704801218e-05, | |
| "loss": 0.0642, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.430045702491523, | |
| "grad_norm": 0.006184196099638939, | |
| "learning_rate": 1.0469900240798075e-05, | |
| "loss": 0.0562, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.4374170720919948, | |
| "grad_norm": 0.8913156390190125, | |
| "learning_rate": 1.0420856061722935e-05, | |
| "loss": 0.0626, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.4447884416924666, | |
| "grad_norm": 0.08904910832643509, | |
| "learning_rate": 1.037171359771979e-05, | |
| "loss": 0.0558, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.4521598112929381, | |
| "grad_norm": 0.1683080941438675, | |
| "learning_rate": 1.0322571133716645e-05, | |
| "loss": 0.0644, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.45953118089341, | |
| "grad_norm": 0.4399701654911041, | |
| "learning_rate": 1.0273428669713502e-05, | |
| "loss": 0.0608, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.4669025504938817, | |
| "grad_norm": 9.481819152832031, | |
| "learning_rate": 1.0224286205710354e-05, | |
| "loss": 0.0597, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.4742739200943535, | |
| "grad_norm": 0.10929368436336517, | |
| "learning_rate": 1.017514374170721e-05, | |
| "loss": 0.0616, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.4816452896948253, | |
| "grad_norm": 0.0035255183465778828, | |
| "learning_rate": 1.0126001277704065e-05, | |
| "loss": 0.0593, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.4890166592952971, | |
| "grad_norm": 0.07139890640974045, | |
| "learning_rate": 1.007685881370092e-05, | |
| "loss": 0.0635, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.496388028895769, | |
| "grad_norm": 3.2497317790985107, | |
| "learning_rate": 1.0027716349697774e-05, | |
| "loss": 0.055, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.5037593984962405, | |
| "grad_norm": 0.16377945244312286, | |
| "learning_rate": 9.978573885694629e-06, | |
| "loss": 0.0602, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.5111307680967123, | |
| "grad_norm": 0.29184427857398987, | |
| "learning_rate": 9.929431421691485e-06, | |
| "loss": 0.0596, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.5185021376971841, | |
| "grad_norm": 0.14543047547340393, | |
| "learning_rate": 9.88028895768834e-06, | |
| "loss": 0.0593, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.525873507297656, | |
| "grad_norm": 4.776684284210205, | |
| "learning_rate": 9.831146493685194e-06, | |
| "loss": 0.0604, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.5332448768981277, | |
| "grad_norm": 3.4175798892974854, | |
| "learning_rate": 9.782102314610056e-06, | |
| "loss": 0.0622, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.5406162464985993, | |
| "grad_norm": 5.478698253631592, | |
| "learning_rate": 9.73295985060691e-06, | |
| "loss": 0.0582, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.5479876160990713, | |
| "grad_norm": 0.09877605736255646, | |
| "learning_rate": 9.683817386603766e-06, | |
| "loss": 0.0644, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.555358985699543, | |
| "grad_norm": 3.169551134109497, | |
| "learning_rate": 9.634674922600619e-06, | |
| "loss": 0.0672, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.5627303553000147, | |
| "grad_norm": 0.0030992806423455477, | |
| "learning_rate": 9.585532458597476e-06, | |
| "loss": 0.0654, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.5701017249004865, | |
| "grad_norm": 1.8882814645767212, | |
| "learning_rate": 9.536488279522336e-06, | |
| "loss": 0.0643, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.5774730945009583, | |
| "grad_norm": 0.02398967184126377, | |
| "learning_rate": 9.487345815519191e-06, | |
| "loss": 0.0677, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.58484446410143, | |
| "grad_norm": 0.010672827251255512, | |
| "learning_rate": 9.438203351516046e-06, | |
| "loss": 0.0637, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.5922158337019017, | |
| "grad_norm": 0.018269941210746765, | |
| "learning_rate": 9.389159172440906e-06, | |
| "loss": 0.0624, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.5995872033023737, | |
| "grad_norm": 1.7238303422927856, | |
| "learning_rate": 9.340016708437761e-06, | |
| "loss": 0.0595, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.6069585729028453, | |
| "grad_norm": 1.6856399774551392, | |
| "learning_rate": 9.290874244434616e-06, | |
| "loss": 0.0572, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.614329942503317, | |
| "grad_norm": 0.08445548266172409, | |
| "learning_rate": 9.241731780431471e-06, | |
| "loss": 0.0617, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.6217013121037889, | |
| "grad_norm": 2.7674472332000732, | |
| "learning_rate": 9.192589316428326e-06, | |
| "loss": 0.061, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.6290726817042607, | |
| "grad_norm": 6.365856647491455, | |
| "learning_rate": 9.143446852425181e-06, | |
| "loss": 0.0548, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.6364440513047325, | |
| "grad_norm": 1.224268913269043, | |
| "learning_rate": 9.094304388422036e-06, | |
| "loss": 0.0621, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.643815420905204, | |
| "grad_norm": 0.021649343892931938, | |
| "learning_rate": 9.045161924418891e-06, | |
| "loss": 0.0659, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.651186790505676, | |
| "grad_norm": 0.012330977246165276, | |
| "learning_rate": 8.996117745343752e-06, | |
| "loss": 0.0602, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.6585581601061476, | |
| "grad_norm": 0.6574206948280334, | |
| "learning_rate": 8.946975281340607e-06, | |
| "loss": 0.0617, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.6659295297066194, | |
| "grad_norm": 0.018540961667895317, | |
| "learning_rate": 8.897832817337462e-06, | |
| "loss": 0.0602, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.6733008993070912, | |
| "grad_norm": 5.6425557136535645, | |
| "learning_rate": 8.848690353334317e-06, | |
| "loss": 0.0597, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.680672268907563, | |
| "grad_norm": 2.435633420944214, | |
| "learning_rate": 8.799646174259177e-06, | |
| "loss": 0.0624, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.6880436385080348, | |
| "grad_norm": 1.5483721494674683, | |
| "learning_rate": 8.750503710256032e-06, | |
| "loss": 0.0614, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.6954150081085064, | |
| "grad_norm": 0.06437569856643677, | |
| "learning_rate": 8.701361246252887e-06, | |
| "loss": 0.0594, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.7027863777089784, | |
| "grad_norm": 0.29250073432922363, | |
| "learning_rate": 8.652218782249742e-06, | |
| "loss": 0.0559, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.71015774730945, | |
| "grad_norm": 0.5888819098472595, | |
| "learning_rate": 8.603076318246597e-06, | |
| "loss": 0.0659, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.7175291169099218, | |
| "grad_norm": 0.2926543354988098, | |
| "learning_rate": 8.55403213917146e-06, | |
| "loss": 0.063, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.7249004865103936, | |
| "grad_norm": 2.316805601119995, | |
| "learning_rate": 8.50498796009632e-06, | |
| "loss": 0.0544, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.7322718561108654, | |
| "grad_norm": 0.018379326909780502, | |
| "learning_rate": 8.455845496093175e-06, | |
| "loss": 0.0663, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.7396432257113372, | |
| "grad_norm": 0.014781077392399311, | |
| "learning_rate": 8.40670303209003e-06, | |
| "loss": 0.0573, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.7470145953118088, | |
| "grad_norm": 2.3919591903686523, | |
| "learning_rate": 8.357560568086885e-06, | |
| "loss": 0.0592, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.7543859649122808, | |
| "grad_norm": 2.7669119834899902, | |
| "learning_rate": 8.30841810408374e-06, | |
| "loss": 0.0644, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.7617573345127524, | |
| "grad_norm": 3.5739755630493164, | |
| "learning_rate": 8.259275640080595e-06, | |
| "loss": 0.0572, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.7691287041132242, | |
| "grad_norm": 0.4299847483634949, | |
| "learning_rate": 8.21013317607745e-06, | |
| "loss": 0.0664, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.776500073713696, | |
| "grad_norm": 0.9990677833557129, | |
| "learning_rate": 8.16108899700231e-06, | |
| "loss": 0.0638, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.7838714433141678, | |
| "grad_norm": 2.1424782276153564, | |
| "learning_rate": 8.111946532999165e-06, | |
| "loss": 0.0572, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.7912428129146396, | |
| "grad_norm": 4.301726341247559, | |
| "learning_rate": 8.06280406899602e-06, | |
| "loss": 0.0596, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.7986141825151112, | |
| "grad_norm": 8.399239540100098, | |
| "learning_rate": 8.013661604992875e-06, | |
| "loss": 0.0667, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.8059855521155832, | |
| "grad_norm": 0.00977667048573494, | |
| "learning_rate": 7.96451914098973e-06, | |
| "loss": 0.0596, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.8133569217160548, | |
| "grad_norm": 0.019315605983138084, | |
| "learning_rate": 7.915376676986585e-06, | |
| "loss": 0.0607, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.8207282913165266, | |
| "grad_norm": 0.011183898895978928, | |
| "learning_rate": 7.866332497911446e-06, | |
| "loss": 0.0656, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.8280996609169984, | |
| "grad_norm": 0.1689341515302658, | |
| "learning_rate": 7.8171900339083e-06, | |
| "loss": 0.0571, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.8354710305174702, | |
| "grad_norm": 7.370288848876953, | |
| "learning_rate": 7.768047569905156e-06, | |
| "loss": 0.0627, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.842842400117942, | |
| "grad_norm": 0.058736398816108704, | |
| "learning_rate": 7.719003390830016e-06, | |
| "loss": 0.0511, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.8502137697184136, | |
| "grad_norm": 0.014665275812149048, | |
| "learning_rate": 7.669860926826873e-06, | |
| "loss": 0.0624, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.8575851393188856, | |
| "grad_norm": 0.572428286075592, | |
| "learning_rate": 7.620718462823726e-06, | |
| "loss": 0.0607, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.8649565089193572, | |
| "grad_norm": 0.4777454733848572, | |
| "learning_rate": 7.571575998820582e-06, | |
| "loss": 0.0606, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.872327878519829, | |
| "grad_norm": 0.020159974694252014, | |
| "learning_rate": 7.522433534817436e-06, | |
| "loss": 0.0539, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.8796992481203008, | |
| "grad_norm": 0.010219153016805649, | |
| "learning_rate": 7.473291070814291e-06, | |
| "loss": 0.0593, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 1.8870706177207726, | |
| "grad_norm": 1.854982614517212, | |
| "learning_rate": 7.424148606811146e-06, | |
| "loss": 0.0636, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 1.8944419873212444, | |
| "grad_norm": 0.6811599135398865, | |
| "learning_rate": 7.375006142808001e-06, | |
| "loss": 0.0598, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 1.901813356921716, | |
| "grad_norm": 0.2864709496498108, | |
| "learning_rate": 7.325863678804855e-06, | |
| "loss": 0.0604, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 1.909184726522188, | |
| "grad_norm": 0.030090967193245888, | |
| "learning_rate": 7.276721214801711e-06, | |
| "loss": 0.0602, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 1.9165560961226595, | |
| "grad_norm": 4.568465232849121, | |
| "learning_rate": 7.227578750798565e-06, | |
| "loss": 0.0571, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.9239274657231313, | |
| "grad_norm": 2.8999075889587402, | |
| "learning_rate": 7.17843628679542e-06, | |
| "loss": 0.0556, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 1.9312988353236031, | |
| "grad_norm": 2.021425485610962, | |
| "learning_rate": 7.129392107720282e-06, | |
| "loss": 0.0593, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 1.938670204924075, | |
| "grad_norm": 5.252723217010498, | |
| "learning_rate": 7.080249643717136e-06, | |
| "loss": 0.054, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 1.9460415745245467, | |
| "grad_norm": 8.669822692871094, | |
| "learning_rate": 7.031107179713991e-06, | |
| "loss": 0.0622, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 1.9534129441250183, | |
| "grad_norm": 2.219619035720825, | |
| "learning_rate": 6.981964715710846e-06, | |
| "loss": 0.0656, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 1.9607843137254903, | |
| "grad_norm": 0.023053865879774094, | |
| "learning_rate": 6.932822251707701e-06, | |
| "loss": 0.0628, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 1.968155683325962, | |
| "grad_norm": 0.3438442647457123, | |
| "learning_rate": 6.8836797877045555e-06, | |
| "loss": 0.0593, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 1.9755270529264337, | |
| "grad_norm": 0.0067783379927277565, | |
| "learning_rate": 6.834537323701411e-06, | |
| "loss": 0.0639, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 1.9828984225269055, | |
| "grad_norm": 1.7491209506988525, | |
| "learning_rate": 6.7853948596982655e-06, | |
| "loss": 0.0518, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 1.9902697921273773, | |
| "grad_norm": 0.0214830469340086, | |
| "learning_rate": 6.736448965551133e-06, | |
| "loss": 0.0499, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 1.9976411617278491, | |
| "grad_norm": 2.5241074562072754, | |
| "learning_rate": 6.687306501547988e-06, | |
| "loss": 0.0649, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6497911445279866, | |
| "eval_f1": 0.6859684311502573, | |
| "eval_loss": 0.08295563608407974, | |
| "eval_roc_auc": 0.8222563704452553, | |
| "eval_runtime": 89.7963, | |
| "eval_samples_per_second": 66.651, | |
| "eval_steps_per_second": 66.651, | |
| "step": 135660 | |
| }, | |
| { | |
| "epoch": 2.0050125313283207, | |
| "grad_norm": 3.550609588623047, | |
| "learning_rate": 6.638164037544843e-06, | |
| "loss": 0.0348, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 2.0123839009287927, | |
| "grad_norm": 0.014567219652235508, | |
| "learning_rate": 6.589021573541698e-06, | |
| "loss": 0.034, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 2.0197552705292643, | |
| "grad_norm": 4.3217291831970215, | |
| "learning_rate": 6.539977394466559e-06, | |
| "loss": 0.0346, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 2.0271266401297363, | |
| "grad_norm": 0.12837082147598267, | |
| "learning_rate": 6.490834930463414e-06, | |
| "loss": 0.0402, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 2.034498009730208, | |
| "grad_norm": 0.08076170086860657, | |
| "learning_rate": 6.4416924664602685e-06, | |
| "loss": 0.0269, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 2.0418693793306795, | |
| "grad_norm": 0.004311998374760151, | |
| "learning_rate": 6.392550002457124e-06, | |
| "loss": 0.0273, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 2.0492407489311515, | |
| "grad_norm": 0.0007999803638085723, | |
| "learning_rate": 6.3434075384539785e-06, | |
| "loss": 0.0263, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 2.056612118531623, | |
| "grad_norm": 0.004331584554165602, | |
| "learning_rate": 6.294461644306846e-06, | |
| "loss": 0.0301, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 2.063983488132095, | |
| "grad_norm": 0.013063879683613777, | |
| "learning_rate": 6.245417465231706e-06, | |
| "loss": 0.03, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 2.0713548577325667, | |
| "grad_norm": 0.01823696680366993, | |
| "learning_rate": 6.196275001228562e-06, | |
| "loss": 0.0343, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 2.0787262273330387, | |
| "grad_norm": 7.280787944793701, | |
| "learning_rate": 6.147132537225416e-06, | |
| "loss": 0.0261, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 2.0860975969335103, | |
| "grad_norm": 0.0030333856120705605, | |
| "learning_rate": 6.097990073222272e-06, | |
| "loss": 0.0298, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 2.093468966533982, | |
| "grad_norm": Infinity, | |
| "learning_rate": 6.0489458941471335e-06, | |
| "loss": 0.0241, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 2.100840336134454, | |
| "grad_norm": 46.87717056274414, | |
| "learning_rate": 5.999803430143988e-06, | |
| "loss": 0.0352, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 2.1082117057349254, | |
| "grad_norm": 0.008375998586416245, | |
| "learning_rate": 5.9506609661408435e-06, | |
| "loss": 0.0283, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 2.1155830753353975, | |
| "grad_norm": 0.0009784572757780552, | |
| "learning_rate": 5.901518502137698e-06, | |
| "loss": 0.0271, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 2.122954444935869, | |
| "grad_norm": 0.0016769421054050326, | |
| "learning_rate": 5.852376038134553e-06, | |
| "loss": 0.0233, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 2.1303258145363406, | |
| "grad_norm": 0.006961928680539131, | |
| "learning_rate": 5.803233574131407e-06, | |
| "loss": 0.0294, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 2.1376971841368126, | |
| "grad_norm": 0.005140836350619793, | |
| "learning_rate": 5.754091110128263e-06, | |
| "loss": 0.039, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 2.1450685537372842, | |
| "grad_norm": 0.001053107320331037, | |
| "learning_rate": 5.704948646125117e-06, | |
| "loss": 0.0309, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 2.1524399233377562, | |
| "grad_norm": 0.0007632673368789256, | |
| "learning_rate": 5.655806182121973e-06, | |
| "loss": 0.0363, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 2.159811292938228, | |
| "grad_norm": 0.4370366036891937, | |
| "learning_rate": 5.606762003046833e-06, | |
| "loss": 0.0317, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 2.1671826625387, | |
| "grad_norm": 0.00013254112855065614, | |
| "learning_rate": 5.557619539043688e-06, | |
| "loss": 0.0284, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 2.1745540321391714, | |
| "grad_norm": 6.598239421844482, | |
| "learning_rate": 5.508477075040542e-06, | |
| "loss": 0.0269, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 2.181925401739643, | |
| "grad_norm": 0.021487107500433922, | |
| "learning_rate": 5.459334611037398e-06, | |
| "loss": 0.0382, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 2.189296771340115, | |
| "grad_norm": 11.90185546875, | |
| "learning_rate": 5.410192147034252e-06, | |
| "loss": 0.0372, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 2.1966681409405866, | |
| "grad_norm": 0.0010807571234181523, | |
| "learning_rate": 5.361049683031107e-06, | |
| "loss": 0.0294, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 2.2040395105410586, | |
| "grad_norm": 0.024999860674142838, | |
| "learning_rate": 5.311907219027963e-06, | |
| "loss": 0.031, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 2.21141088014153, | |
| "grad_norm": 0.011472758837044239, | |
| "learning_rate": 5.2628630399528235e-06, | |
| "loss": 0.0253, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 2.218782249742002, | |
| "grad_norm": 0.0003523350169416517, | |
| "learning_rate": 5.213720575949679e-06, | |
| "loss": 0.0312, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 2.226153619342474, | |
| "grad_norm": 0.00018138001905754209, | |
| "learning_rate": 5.1645781119465335e-06, | |
| "loss": 0.0306, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 2.2335249889429454, | |
| "grad_norm": 27.588563919067383, | |
| "learning_rate": 5.1154356479433885e-06, | |
| "loss": 0.0257, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 2.2408963585434174, | |
| "grad_norm": 0.0001788044028216973, | |
| "learning_rate": 5.066293183940243e-06, | |
| "loss": 0.0364, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 2.248267728143889, | |
| "grad_norm": 0.025098439306020737, | |
| "learning_rate": 5.017347289793111e-06, | |
| "loss": 0.033, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 2.255639097744361, | |
| "grad_norm": 0.028794238343834877, | |
| "learning_rate": 4.968204825789966e-06, | |
| "loss": 0.0268, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 2.2630104673448326, | |
| "grad_norm": 0.0443144217133522, | |
| "learning_rate": 4.91906236178682e-06, | |
| "loss": 0.0277, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 2.2703818369453046, | |
| "grad_norm": 5.745356559753418, | |
| "learning_rate": 4.869919897783675e-06, | |
| "loss": 0.0319, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 2.277753206545776, | |
| "grad_norm": 0.01881660334765911, | |
| "learning_rate": 4.82077743378053e-06, | |
| "loss": 0.0321, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 2.2851245761462478, | |
| "grad_norm": 0.0014673862606287003, | |
| "learning_rate": 4.771634969777385e-06, | |
| "loss": 0.0325, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 2.2924959457467198, | |
| "grad_norm": 0.001017951057292521, | |
| "learning_rate": 4.72249250577424e-06, | |
| "loss": 0.0369, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 2.2998673153471914, | |
| "grad_norm": 0.0014965501613914967, | |
| "learning_rate": 4.673350041771095e-06, | |
| "loss": 0.0349, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 2.3072386849476634, | |
| "grad_norm": 0.0005304542719386518, | |
| "learning_rate": 4.6243058626959555e-06, | |
| "loss": 0.0303, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 2.314610054548135, | |
| "grad_norm": 0.0002412071480648592, | |
| "learning_rate": 4.575261683620817e-06, | |
| "loss": 0.0331, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 2.321981424148607, | |
| "grad_norm": 0.0006970348185859621, | |
| "learning_rate": 4.526119219617672e-06, | |
| "loss": 0.0343, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 2.3293527937490786, | |
| "grad_norm": 0.011668604798614979, | |
| "learning_rate": 4.476976755614527e-06, | |
| "loss": 0.0279, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 2.33672416334955, | |
| "grad_norm": 0.005698871333152056, | |
| "learning_rate": 4.427834291611382e-06, | |
| "loss": 0.0322, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 2.344095532950022, | |
| "grad_norm": 0.0057191732339560986, | |
| "learning_rate": 4.378691827608237e-06, | |
| "loss": 0.0314, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 2.3514669025504937, | |
| "grad_norm": 0.00243947422131896, | |
| "learning_rate": 4.329549363605092e-06, | |
| "loss": 0.0267, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 2.3588382721509658, | |
| "grad_norm": 0.00033369645825587213, | |
| "learning_rate": 4.280406899601947e-06, | |
| "loss": 0.0244, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 2.3662096417514373, | |
| "grad_norm": 0.023830311372876167, | |
| "learning_rate": 4.231264435598802e-06, | |
| "loss": 0.0283, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 2.3735810113519094, | |
| "grad_norm": 0.000461634888779372, | |
| "learning_rate": 4.182121971595656e-06, | |
| "loss": 0.0285, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.001816113363020122, | |
| "learning_rate": 4.132979507592511e-06, | |
| "loss": 0.03, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 2.3883237505528525, | |
| "grad_norm": 0.001825949759222567, | |
| "learning_rate": 4.083837043589366e-06, | |
| "loss": 0.0294, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 2.3956951201533245, | |
| "grad_norm": 9.502708435058594, | |
| "learning_rate": 4.034792864514227e-06, | |
| "loss": 0.0255, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 2.403066489753796, | |
| "grad_norm": 0.0001642414426896721, | |
| "learning_rate": 3.985650400511082e-06, | |
| "loss": 0.0255, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 2.410437859354268, | |
| "grad_norm": 5.004094123840332, | |
| "learning_rate": 3.936507936507936e-06, | |
| "loss": 0.0304, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 2.4178092289547397, | |
| "grad_norm": 0.1878451257944107, | |
| "learning_rate": 3.887365472504791e-06, | |
| "loss": 0.0236, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 2.4251805985552117, | |
| "grad_norm": 0.0004031589487567544, | |
| "learning_rate": 3.838223008501646e-06, | |
| "loss": 0.0316, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 2.4325519681556833, | |
| "grad_norm": 0.0004982321988791227, | |
| "learning_rate": 3.7890805444985013e-06, | |
| "loss": 0.0277, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 2.439923337756155, | |
| "grad_norm": 0.0037121805362403393, | |
| "learning_rate": 3.739938080495356e-06, | |
| "loss": 0.0303, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 2.447294707356627, | |
| "grad_norm": 0.0101530272513628, | |
| "learning_rate": 3.690795616492211e-06, | |
| "loss": 0.0371, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 2.4546660769570985, | |
| "grad_norm": 0.00018950540106743574, | |
| "learning_rate": 3.6416531524890663e-06, | |
| "loss": 0.0319, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 2.4620374465575705, | |
| "grad_norm": 0.8489145040512085, | |
| "learning_rate": 3.5925106884859213e-06, | |
| "loss": 0.0261, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 2.469408816158042, | |
| "grad_norm": 0.0013942194636911154, | |
| "learning_rate": 3.543368224482776e-06, | |
| "loss": 0.0283, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 2.476780185758514, | |
| "grad_norm": 0.4112614691257477, | |
| "learning_rate": 3.494225760479631e-06, | |
| "loss": 0.0332, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 2.4841515553589857, | |
| "grad_norm": 0.0010579220252111554, | |
| "learning_rate": 3.445181581404492e-06, | |
| "loss": 0.0288, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 2.4915229249594573, | |
| "grad_norm": 0.0012302091345191002, | |
| "learning_rate": 3.396137402329353e-06, | |
| "loss": 0.0272, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 2.4988942945599293, | |
| "grad_norm": 0.0008321187924593687, | |
| "learning_rate": 3.3470932232542143e-06, | |
| "loss": 0.0251, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 2.506265664160401, | |
| "grad_norm": 2.113279342651367, | |
| "learning_rate": 3.297950759251069e-06, | |
| "loss": 0.0306, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 2.513637033760873, | |
| "grad_norm": 0.030419809743762016, | |
| "learning_rate": 3.248808295247924e-06, | |
| "loss": 0.033, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 2.5210084033613445, | |
| "grad_norm": 0.002908308058977127, | |
| "learning_rate": 3.199665831244779e-06, | |
| "loss": 0.0333, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 2.5283797729618165, | |
| "grad_norm": 0.01623060740530491, | |
| "learning_rate": 3.1505233672416334e-06, | |
| "loss": 0.0347, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 2.535751142562288, | |
| "grad_norm": 0.0023743058554828167, | |
| "learning_rate": 3.1013809032384884e-06, | |
| "loss": 0.029, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 2.5431225121627596, | |
| "grad_norm": 0.0017329910770058632, | |
| "learning_rate": 3.0522384392353434e-06, | |
| "loss": 0.0399, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 2.5504938817632317, | |
| "grad_norm": 0.003035512287169695, | |
| "learning_rate": 3.003095975232199e-06, | |
| "loss": 0.0292, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 2.5578652513637032, | |
| "grad_norm": 0.016472771763801575, | |
| "learning_rate": 2.9539535112290534e-06, | |
| "loss": 0.0295, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 2.5652366209641753, | |
| "grad_norm": 0.19270442426204681, | |
| "learning_rate": 2.9048110472259084e-06, | |
| "loss": 0.0237, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 2.572607990564647, | |
| "grad_norm": 14.15371322631836, | |
| "learning_rate": 2.855668583222763e-06, | |
| "loss": 0.031, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 2.579979360165119, | |
| "grad_norm": 0.0006798787508159876, | |
| "learning_rate": 2.806526119219618e-06, | |
| "loss": 0.0348, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 2.5873507297655904, | |
| "grad_norm": 0.0007228073664009571, | |
| "learning_rate": 2.757383655216473e-06, | |
| "loss": 0.0266, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 2.594722099366062, | |
| "grad_norm": 0.003529267618432641, | |
| "learning_rate": 2.708339476141334e-06, | |
| "loss": 0.025, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 2.602093468966534, | |
| "grad_norm": 0.004369991831481457, | |
| "learning_rate": 2.659197012138189e-06, | |
| "loss": 0.0343, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 2.6094648385670056, | |
| "grad_norm": 0.0011527182068675756, | |
| "learning_rate": 2.610054548135044e-06, | |
| "loss": 0.0344, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 2.6168362081674776, | |
| "grad_norm": 0.0012939295265823603, | |
| "learning_rate": 2.5609120841318984e-06, | |
| "loss": 0.023, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 2.624207577767949, | |
| "grad_norm": 0.004880073014646769, | |
| "learning_rate": 2.5117696201287534e-06, | |
| "loss": 0.0226, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 0.004119515884667635, | |
| "learning_rate": 2.4626271561256084e-06, | |
| "loss": 0.0217, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 2.638950316968893, | |
| "grad_norm": 0.0001360880269203335, | |
| "learning_rate": 2.4135829770504697e-06, | |
| "loss": 0.0318, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 2.6463216865693644, | |
| "grad_norm": 0.09521844983100891, | |
| "learning_rate": 2.3644405130473242e-06, | |
| "loss": 0.0382, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 2.6536930561698364, | |
| "grad_norm": 0.0037165977992117405, | |
| "learning_rate": 2.3152980490441792e-06, | |
| "loss": 0.0277, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 2.661064425770308, | |
| "grad_norm": 0.0008691260009072721, | |
| "learning_rate": 2.2661555850410342e-06, | |
| "loss": 0.0322, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 2.66843579537078, | |
| "grad_norm": 1.0185267925262451, | |
| "learning_rate": 2.217111405965895e-06, | |
| "loss": 0.0287, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 2.6758071649712516, | |
| "grad_norm": 11.761468887329102, | |
| "learning_rate": 2.1679689419627505e-06, | |
| "loss": 0.0269, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 2.6831785345717236, | |
| "grad_norm": 0.00046698356163688004, | |
| "learning_rate": 2.118826477959605e-06, | |
| "loss": 0.0324, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 2.690549904172195, | |
| "grad_norm": 0.010263352654874325, | |
| "learning_rate": 2.0698805838124726e-06, | |
| "loss": 0.0239, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 2.6979212737726668, | |
| "grad_norm": 3.434927463531494, | |
| "learning_rate": 2.020738119809327e-06, | |
| "loss": 0.0335, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 2.705292643373139, | |
| "grad_norm": 0.00016977268387563527, | |
| "learning_rate": 1.971595655806182e-06, | |
| "loss": 0.0275, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 2.7126640129736104, | |
| "grad_norm": 0.0012544383062049747, | |
| "learning_rate": 1.922453191803037e-06, | |
| "loss": 0.0312, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 2.7200353825740824, | |
| "grad_norm": 0.0017481895629316568, | |
| "learning_rate": 1.8733107277998921e-06, | |
| "loss": 0.0244, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 2.727406752174554, | |
| "grad_norm": 0.001362023875117302, | |
| "learning_rate": 1.824168263796747e-06, | |
| "loss": 0.0242, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 2.734778121775026, | |
| "grad_norm": 0.0014405859401449561, | |
| "learning_rate": 1.775025799793602e-06, | |
| "loss": 0.0304, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 2.7421494913754976, | |
| "grad_norm": 0.0008861696696840227, | |
| "learning_rate": 1.7258833357904567e-06, | |
| "loss": 0.0278, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 2.749520860975969, | |
| "grad_norm": 0.12847045063972473, | |
| "learning_rate": 1.6767408717873115e-06, | |
| "loss": 0.0244, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 2.756892230576441, | |
| "grad_norm": 0.001703253947198391, | |
| "learning_rate": 1.6275984077841663e-06, | |
| "loss": 0.0356, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 2.7642636001769127, | |
| "grad_norm": 0.003184770466759801, | |
| "learning_rate": 1.5784559437810213e-06, | |
| "loss": 0.0234, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 2.7716349697773848, | |
| "grad_norm": 5.178366661071777, | |
| "learning_rate": 1.529313479777876e-06, | |
| "loss": 0.0272, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 2.7790063393778563, | |
| "grad_norm": 0.0035832468420267105, | |
| "learning_rate": 1.4801710157747309e-06, | |
| "loss": 0.0227, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 2.7863777089783284, | |
| "grad_norm": 0.007811425253748894, | |
| "learning_rate": 1.4311268366995923e-06, | |
| "loss": 0.03, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 2.7937490785788, | |
| "grad_norm": 12.170905113220215, | |
| "learning_rate": 1.3819843726964471e-06, | |
| "loss": 0.028, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 2.8011204481792715, | |
| "grad_norm": 0.0028726314194500446, | |
| "learning_rate": 1.3329401936213082e-06, | |
| "loss": 0.0303, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.8084918177797435, | |
| "grad_norm": 30.53835678100586, | |
| "learning_rate": 1.283797729618163e-06, | |
| "loss": 0.0321, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 2.815863187380215, | |
| "grad_norm": 0.006513836327940226, | |
| "learning_rate": 1.234655265615018e-06, | |
| "loss": 0.0235, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 2.823234556980687, | |
| "grad_norm": 7.619091510772705, | |
| "learning_rate": 1.185512801611873e-06, | |
| "loss": 0.0307, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 2.8306059265811587, | |
| "grad_norm": 0.005691774655133486, | |
| "learning_rate": 1.1363703376087278e-06, | |
| "loss": 0.0257, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 2.8379772961816307, | |
| "grad_norm": 2.5761618614196777, | |
| "learning_rate": 1.0872278736055828e-06, | |
| "loss": 0.0226, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 2.8453486657821023, | |
| "grad_norm": 0.00010429321264382452, | |
| "learning_rate": 1.0380854096024375e-06, | |
| "loss": 0.0242, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 2.852720035382574, | |
| "grad_norm": 0.02838067337870598, | |
| "learning_rate": 9.889429455992925e-07, | |
| "loss": 0.0287, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 2.860091404983046, | |
| "grad_norm": 0.12736959755420685, | |
| "learning_rate": 9.398004815961473e-07, | |
| "loss": 0.0256, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 2.8674627745835175, | |
| "grad_norm": 0.32672008872032166, | |
| "learning_rate": 8.907563025210085e-07, | |
| "loss": 0.0265, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 2.8748341441839895, | |
| "grad_norm": 0.000785826297942549, | |
| "learning_rate": 8.416138385178634e-07, | |
| "loss": 0.0339, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 2.882205513784461, | |
| "grad_norm": 6.065654754638672, | |
| "learning_rate": 7.924713745147182e-07, | |
| "loss": 0.0337, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 2.889576883384933, | |
| "grad_norm": 0.012824644334614277, | |
| "learning_rate": 7.433289105115731e-07, | |
| "loss": 0.0278, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 2.8969482529854047, | |
| "grad_norm": 0.011444471776485443, | |
| "learning_rate": 6.941864465084281e-07, | |
| "loss": 0.0368, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 2.9043196225858763, | |
| "grad_norm": 0.0061572156846523285, | |
| "learning_rate": 6.450439825052828e-07, | |
| "loss": 0.0289, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 2.9116909921863483, | |
| "grad_norm": 0.010506005957722664, | |
| "learning_rate": 5.95999803430144e-07, | |
| "loss": 0.022, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 2.91906236178682, | |
| "grad_norm": 0.00013557464990299195, | |
| "learning_rate": 5.468573394269989e-07, | |
| "loss": 0.0313, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 2.926433731387292, | |
| "grad_norm": 0.0008798382477834821, | |
| "learning_rate": 4.977148754238538e-07, | |
| "loss": 0.0302, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 2.9338051009877635, | |
| "grad_norm": 0.11164344847202301, | |
| "learning_rate": 4.485724114207086e-07, | |
| "loss": 0.0234, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 17.183870315551758, | |
| "learning_rate": 3.9942994741756357e-07, | |
| "loss": 0.0294, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 2.948547840188707, | |
| "grad_norm": 14.767284393310547, | |
| "learning_rate": 3.502874834144184e-07, | |
| "loss": 0.0277, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 2.9559192097891787, | |
| "grad_norm": 0.0102895712479949, | |
| "learning_rate": 3.012433043392796e-07, | |
| "loss": 0.0307, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 2.9632905793896507, | |
| "grad_norm": 0.0019515061285346746, | |
| "learning_rate": 2.5210084033613445e-07, | |
| "loss": 0.0345, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 2.9706619489901223, | |
| "grad_norm": 0.5170195698738098, | |
| "learning_rate": 2.0295837633298937e-07, | |
| "loss": 0.0282, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 2.9780333185905943, | |
| "grad_norm": 0.00022552709560841322, | |
| "learning_rate": 1.5381591232984424e-07, | |
| "loss": 0.0294, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 2.985404688191066, | |
| "grad_norm": 0.004490272141993046, | |
| "learning_rate": 1.0467344832669911e-07, | |
| "loss": 0.0223, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 2.992776057791538, | |
| "grad_norm": 0.0005161833250895143, | |
| "learning_rate": 5.562926925156028e-08, | |
| "loss": 0.0247, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6753550543024227, | |
| "eval_f1": 0.6936652741069145, | |
| "eval_loss": 0.12206839770078659, | |
| "eval_roc_auc": 0.833023672143383, | |
| "eval_runtime": 94.2338, | |
| "eval_samples_per_second": 63.512, | |
| "eval_steps_per_second": 63.512, | |
| "step": 203490 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 203490, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1521488766625792e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |