{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3219, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "loss_reasoning": 1.8923735618591309, "loss_utility": 2.731106758117676, "step": 0 }, { "epoch": 0.0009319664492078285, "grad_norm": 5.165795219607406, "learning_rate": 6.211180124223603e-08, "loss": 4.6296, "step": 1 }, { "epoch": 0.0009319664492078285, "loss_reasoning": 2.094414234161377, "loss_utility": 2.8390274047851562, "step": 1 }, { "epoch": 0.001863932898415657, "grad_norm": 4.90328213605062, "learning_rate": 1.2422360248447206e-07, "loss": 4.1935, "step": 2 }, { "epoch": 0.001863932898415657, "loss_reasoning": 2.050252914428711, "loss_utility": 2.73219633102417, "step": 2 }, { "epoch": 0.0027958993476234857, "grad_norm": 5.594568537705461, "learning_rate": 1.863354037267081e-07, "loss": 4.7486, "step": 3 }, { "epoch": 0.0027958993476234857, "loss_reasoning": 1.8123310804367065, "loss_utility": 2.9970457553863525, "step": 3 }, { "epoch": 0.003727865796831314, "grad_norm": 6.391755428766245, "learning_rate": 2.484472049689441e-07, "loss": 5.0667, "step": 4 }, { "epoch": 0.003727865796831314, "loss_reasoning": 1.8772916793823242, "loss_utility": 3.4967803955078125, "step": 4 }, { "epoch": 0.004659832246039142, "grad_norm": 6.015286670482039, "learning_rate": 3.1055900621118013e-07, "loss": 4.9609, "step": 5 }, { "epoch": 0.004659832246039142, "loss_reasoning": 1.9697456359863281, "loss_utility": 3.252359390258789, "step": 5 }, { "epoch": 0.005591798695246971, "grad_norm": 5.528837758741334, "learning_rate": 3.726708074534162e-07, "loss": 4.9485, "step": 6 }, { "epoch": 0.005591798695246971, "loss_reasoning": 1.8815447092056274, "loss_utility": 3.0830488204956055, "step": 6 }, { "epoch": 0.0065237651444548, "grad_norm": 6.697907309485381, "learning_rate": 4.347826086956522e-07, "loss": 5.1472, "step": 7 }, { "epoch": 0.0065237651444548, "loss_reasoning": 2.02980637550354, "loss_utility": 2.78354549407959, "step": 7 }, { "epoch": 0.007455731593662628, "grad_norm": 6.444559470179131, "learning_rate": 4.968944099378882e-07, "loss": 5.0133, "step": 8 }, { "epoch": 0.007455731593662628, "loss_reasoning": 1.8600692749023438, "loss_utility": 2.799402952194214, "step": 8 }, { "epoch": 0.008387698042870456, "grad_norm": 6.0644185540572675, "learning_rate": 5.590062111801243e-07, "loss": 4.9882, "step": 9 }, { "epoch": 0.008387698042870456, "loss_reasoning": 1.9284522533416748, "loss_utility": 2.7967798709869385, "step": 9 }, { "epoch": 0.009319664492078284, "grad_norm": 5.686405621326113, "learning_rate": 6.211180124223603e-07, "loss": 4.6428, "step": 10 }, { "epoch": 0.009319664492078284, "loss_reasoning": 1.9002341032028198, "loss_utility": 3.277134656906128, "step": 10 }, { "epoch": 0.010251630941286114, "grad_norm": 6.461116064994181, "learning_rate": 6.832298136645964e-07, "loss": 4.8953, "step": 11 }, { "epoch": 0.010251630941286114, "loss_reasoning": 2.017637252807617, "loss_utility": 2.7836108207702637, "step": 11 }, { "epoch": 0.011183597390493943, "grad_norm": 5.623173004668743, "learning_rate": 7.453416149068324e-07, "loss": 4.987, "step": 12 }, { "epoch": 0.011183597390493943, "loss_reasoning": 2.023853302001953, "loss_utility": 2.4076366424560547, "step": 12 }, { "epoch": 0.012115563839701771, "grad_norm": 7.36303832277899, "learning_rate": 8.074534161490684e-07, "loss": 5.0359, "step": 13 }, { "epoch": 0.012115563839701771, "loss_reasoning": 1.7696456909179688, "loss_utility": 2.7600879669189453, "step": 13 }, { "epoch": 0.0130475302889096, "grad_norm": 5.939617024608268, "learning_rate": 8.695652173913044e-07, "loss": 4.6165, "step": 14 }, { "epoch": 0.0130475302889096, "loss_reasoning": 1.8399466276168823, "loss_utility": 2.5656042098999023, "step": 14 }, { "epoch": 0.013979496738117428, "grad_norm": 5.193856945382145, "learning_rate": 9.316770186335404e-07, "loss": 4.1438, "step": 15 }, { "epoch": 0.013979496738117428, "loss_reasoning": 1.7903707027435303, "loss_utility": 2.7118632793426514, "step": 15 }, { "epoch": 0.014911463187325256, "grad_norm": 6.602866518638495, "learning_rate": 9.937888198757765e-07, "loss": 4.9845, "step": 16 }, { "epoch": 0.014911463187325256, "loss_reasoning": 1.731900691986084, "loss_utility": 1.7880884408950806, "step": 16 }, { "epoch": 0.015843429636533086, "grad_norm": 5.5746468661627295, "learning_rate": 1.0559006211180126e-06, "loss": 4.4978, "step": 17 }, { "epoch": 0.015843429636533086, "loss_reasoning": 1.9043443202972412, "loss_utility": 1.4210838079452515, "step": 17 }, { "epoch": 0.016775396085740912, "grad_norm": 5.406123765025883, "learning_rate": 1.1180124223602485e-06, "loss": 4.33, "step": 18 }, { "epoch": 0.016775396085740912, "loss_reasoning": 1.7279483079910278, "loss_utility": 3.7826876640319824, "step": 18 }, { "epoch": 0.017707362534948742, "grad_norm": 5.642037128065041, "learning_rate": 1.1801242236024846e-06, "loss": 4.979, "step": 19 }, { "epoch": 0.017707362534948742, "loss_reasoning": 1.8068498373031616, "loss_utility": 2.2260959148406982, "step": 19 }, { "epoch": 0.01863932898415657, "grad_norm": 5.180123223585622, "learning_rate": 1.2422360248447205e-06, "loss": 4.3059, "step": 20 }, { "epoch": 0.01863932898415657, "loss_reasoning": 1.7286304235458374, "loss_utility": 3.269416332244873, "step": 20 }, { "epoch": 0.0195712954333644, "grad_norm": 6.185823151664835, "learning_rate": 1.3043478260869566e-06, "loss": 4.7375, "step": 21 }, { "epoch": 0.0195712954333644, "loss_reasoning": 1.5464811325073242, "loss_utility": 1.8420902490615845, "step": 21 }, { "epoch": 0.02050326188257223, "grad_norm": 4.737951812146464, "learning_rate": 1.3664596273291927e-06, "loss": 4.1239, "step": 22 }, { "epoch": 0.02050326188257223, "loss_reasoning": 1.6339764595031738, "loss_utility": 2.2190606594085693, "step": 22 }, { "epoch": 0.021435228331780055, "grad_norm": 4.652640838518342, "learning_rate": 1.4285714285714286e-06, "loss": 4.0854, "step": 23 }, { "epoch": 0.021435228331780055, "loss_reasoning": 1.6764719486236572, "loss_utility": 3.0543365478515625, "step": 23 }, { "epoch": 0.022367194780987885, "grad_norm": 4.946251823696626, "learning_rate": 1.4906832298136647e-06, "loss": 4.3497, "step": 24 }, { "epoch": 0.022367194780987885, "loss_reasoning": 1.4540598392486572, "loss_utility": 3.067427635192871, "step": 24 }, { "epoch": 0.023299161230195712, "grad_norm": 5.1468217233515166, "learning_rate": 1.5527950310559006e-06, "loss": 4.1854, "step": 25 }, { "epoch": 0.023299161230195712, "loss_reasoning": 1.4110692739486694, "loss_utility": 2.3854470252990723, "step": 25 }, { "epoch": 0.024231127679403542, "grad_norm": 5.4723245376379985, "learning_rate": 1.6149068322981367e-06, "loss": 3.6513, "step": 26 }, { "epoch": 0.024231127679403542, "loss_reasoning": 1.4119246006011963, "loss_utility": 2.8880224227905273, "step": 26 }, { "epoch": 0.02516309412861137, "grad_norm": 4.9092058630757, "learning_rate": 1.6770186335403729e-06, "loss": 4.3314, "step": 27 }, { "epoch": 0.02516309412861137, "loss_reasoning": 1.351423740386963, "loss_utility": 2.3194169998168945, "step": 27 }, { "epoch": 0.0260950605778192, "grad_norm": 4.823376390137743, "learning_rate": 1.7391304347826088e-06, "loss": 3.8472, "step": 28 }, { "epoch": 0.0260950605778192, "loss_reasoning": 1.255765676498413, "loss_utility": 2.723292827606201, "step": 28 }, { "epoch": 0.02702702702702703, "grad_norm": 4.229348198533689, "learning_rate": 1.8012422360248449e-06, "loss": 3.9621, "step": 29 }, { "epoch": 0.02702702702702703, "loss_reasoning": 1.1552119255065918, "loss_utility": 1.6103270053863525, "step": 29 }, { "epoch": 0.027958993476234855, "grad_norm": 3.3582654902180833, "learning_rate": 1.8633540372670808e-06, "loss": 3.5768, "step": 30 }, { "epoch": 0.027958993476234855, "loss_reasoning": 1.2218471765518188, "loss_utility": 2.219080924987793, "step": 30 }, { "epoch": 0.028890959925442685, "grad_norm": 4.232798516319425, "learning_rate": 1.925465838509317e-06, "loss": 3.8757, "step": 31 }, { "epoch": 0.028890959925442685, "loss_reasoning": 1.1324083805084229, "loss_utility": 2.370138645172119, "step": 31 }, { "epoch": 0.02982292637465051, "grad_norm": 4.46326837868537, "learning_rate": 1.987577639751553e-06, "loss": 3.7519, "step": 32 }, { "epoch": 0.02982292637465051, "loss_reasoning": 1.2423624992370605, "loss_utility": 2.6665234565734863, "step": 32 }, { "epoch": 0.03075489282385834, "grad_norm": 4.6211908539398285, "learning_rate": 2.049689440993789e-06, "loss": 3.9289, "step": 33 }, { "epoch": 0.03075489282385834, "loss_reasoning": 1.1418535709381104, "loss_utility": 2.348162889480591, "step": 33 }, { "epoch": 0.03168685927306617, "grad_norm": 4.196579869807078, "learning_rate": 2.111801242236025e-06, "loss": 3.6054, "step": 34 }, { "epoch": 0.03168685927306617, "loss_reasoning": 1.0847673416137695, "loss_utility": 2.90510892868042, "step": 34 }, { "epoch": 0.032618825722273995, "grad_norm": 4.254974038967206, "learning_rate": 2.173913043478261e-06, "loss": 3.9597, "step": 35 }, { "epoch": 0.032618825722273995, "loss_reasoning": 1.0393210649490356, "loss_utility": 3.1231584548950195, "step": 35 }, { "epoch": 0.033550792171481825, "grad_norm": 4.0577716315576815, "learning_rate": 2.236024844720497e-06, "loss": 3.7017, "step": 36 }, { "epoch": 0.033550792171481825, "loss_reasoning": 1.065529704093933, "loss_utility": 2.2850465774536133, "step": 36 }, { "epoch": 0.034482758620689655, "grad_norm": 3.9691383319020774, "learning_rate": 2.298136645962733e-06, "loss": 3.8043, "step": 37 }, { "epoch": 0.034482758620689655, "loss_reasoning": 1.0274102687835693, "loss_utility": 2.3530752658843994, "step": 37 }, { "epoch": 0.035414725069897485, "grad_norm": 4.427477299534233, "learning_rate": 2.3602484472049692e-06, "loss": 3.771, "step": 38 }, { "epoch": 0.035414725069897485, "loss_reasoning": 0.9736369252204895, "loss_utility": 3.134762763977051, "step": 38 }, { "epoch": 0.036346691519105315, "grad_norm": 3.864412228308087, "learning_rate": 2.422360248447205e-06, "loss": 3.8129, "step": 39 }, { "epoch": 0.036346691519105315, "loss_reasoning": 1.0015085935592651, "loss_utility": 3.5927414894104004, "step": 39 }, { "epoch": 0.03727865796831314, "grad_norm": 3.806676981196824, "learning_rate": 2.484472049689441e-06, "loss": 3.7409, "step": 40 }, { "epoch": 0.03727865796831314, "loss_reasoning": 0.9952654242515564, "loss_utility": 2.627222776412964, "step": 40 }, { "epoch": 0.03821062441752097, "grad_norm": 3.2672832560980614, "learning_rate": 2.546583850931677e-06, "loss": 3.3717, "step": 41 }, { "epoch": 0.03821062441752097, "loss_reasoning": 1.0159516334533691, "loss_utility": 2.5176656246185303, "step": 41 }, { "epoch": 0.0391425908667288, "grad_norm": 4.600082889946817, "learning_rate": 2.6086956521739132e-06, "loss": 3.6161, "step": 42 }, { "epoch": 0.0391425908667288, "loss_reasoning": 0.9796465635299683, "loss_utility": 3.439990997314453, "step": 42 }, { "epoch": 0.04007455731593663, "grad_norm": 3.4781375441175784, "learning_rate": 2.670807453416149e-06, "loss": 3.5537, "step": 43 }, { "epoch": 0.04007455731593663, "loss_reasoning": 0.9339590668678284, "loss_utility": 2.8842220306396484, "step": 43 }, { "epoch": 0.04100652376514446, "grad_norm": 3.6306212427322797, "learning_rate": 2.7329192546583855e-06, "loss": 3.5565, "step": 44 }, { "epoch": 0.04100652376514446, "loss_reasoning": 1.0075569152832031, "loss_utility": 3.1500296592712402, "step": 44 }, { "epoch": 0.04193849021435228, "grad_norm": 3.4030437396674063, "learning_rate": 2.795031055900621e-06, "loss": 3.6473, "step": 45 }, { "epoch": 0.04193849021435228, "loss_reasoning": 0.9231812357902527, "loss_utility": 1.7368712425231934, "step": 45 }, { "epoch": 0.04287045666356011, "grad_norm": 3.43270486607915, "learning_rate": 2.8571428571428573e-06, "loss": 2.9427, "step": 46 }, { "epoch": 0.04287045666356011, "loss_reasoning": 1.011641502380371, "loss_utility": 1.6385849714279175, "step": 46 }, { "epoch": 0.04380242311276794, "grad_norm": 3.6083766335120444, "learning_rate": 2.919254658385093e-06, "loss": 3.3393, "step": 47 }, { "epoch": 0.04380242311276794, "loss_reasoning": 0.8256717920303345, "loss_utility": 3.3658370971679688, "step": 47 }, { "epoch": 0.04473438956197577, "grad_norm": 3.2400559878171182, "learning_rate": 2.9813664596273295e-06, "loss": 3.7479, "step": 48 }, { "epoch": 0.04473438956197577, "loss_reasoning": 0.9809677600860596, "loss_utility": 2.4730660915374756, "step": 48 }, { "epoch": 0.045666356011183594, "grad_norm": 3.829737966410092, "learning_rate": 3.043478260869566e-06, "loss": 3.4744, "step": 49 }, { "epoch": 0.045666356011183594, "loss_reasoning": 0.8095309734344482, "loss_utility": 2.28251314163208, "step": 49 }, { "epoch": 0.046598322460391424, "grad_norm": 3.9470689911455072, "learning_rate": 3.1055900621118013e-06, "loss": 3.3244, "step": 50 }, { "epoch": 0.046598322460391424, "loss_reasoning": 0.8526009321212769, "loss_utility": 3.397564172744751, "step": 50 }, { "epoch": 0.047530288909599254, "grad_norm": 5.49692178962284, "learning_rate": 3.1677018633540376e-06, "loss": 3.3228, "step": 51 }, { "epoch": 0.047530288909599254, "loss_reasoning": 0.9042525291442871, "loss_utility": 2.0825271606445312, "step": 51 }, { "epoch": 0.048462255358807084, "grad_norm": 4.7098744204046765, "learning_rate": 3.2298136645962735e-06, "loss": 3.2309, "step": 52 }, { "epoch": 0.048462255358807084, "loss_reasoning": 0.8930448293685913, "loss_utility": 2.70247220993042, "step": 52 }, { "epoch": 0.049394221808014914, "grad_norm": 3.243896166767706, "learning_rate": 3.29192546583851e-06, "loss": 3.5703, "step": 53 }, { "epoch": 0.049394221808014914, "loss_reasoning": 0.8372834920883179, "loss_utility": 3.210474967956543, "step": 53 }, { "epoch": 0.05032618825722274, "grad_norm": 4.447001017156603, "learning_rate": 3.3540372670807457e-06, "loss": 3.4881, "step": 54 }, { "epoch": 0.05032618825722274, "loss_reasoning": 0.9109458923339844, "loss_utility": 2.298184871673584, "step": 54 }, { "epoch": 0.05125815470643057, "grad_norm": 3.7798969457341185, "learning_rate": 3.4161490683229816e-06, "loss": 3.6021, "step": 55 }, { "epoch": 0.05125815470643057, "loss_reasoning": 0.7719396352767944, "loss_utility": 2.2637267112731934, "step": 55 }, { "epoch": 0.0521901211556384, "grad_norm": 2.9873422272336505, "learning_rate": 3.4782608695652175e-06, "loss": 3.1669, "step": 56 }, { "epoch": 0.0521901211556384, "loss_reasoning": 0.801297664642334, "loss_utility": 3.214038372039795, "step": 56 }, { "epoch": 0.05312208760484623, "grad_norm": 7.261134986673031, "learning_rate": 3.540372670807454e-06, "loss": 3.6167, "step": 57 }, { "epoch": 0.05312208760484623, "loss_reasoning": 0.8390157222747803, "loss_utility": 2.707578659057617, "step": 57 }, { "epoch": 0.05405405405405406, "grad_norm": 3.060632973668704, "learning_rate": 3.6024844720496897e-06, "loss": 3.5314, "step": 58 }, { "epoch": 0.05405405405405406, "loss_reasoning": 0.8323641419410706, "loss_utility": 3.2234926223754883, "step": 58 }, { "epoch": 0.05498602050326188, "grad_norm": 3.260032860340479, "learning_rate": 3.664596273291926e-06, "loss": 3.6321, "step": 59 }, { "epoch": 0.05498602050326188, "loss_reasoning": 0.8359338641166687, "loss_utility": 2.9163174629211426, "step": 59 }, { "epoch": 0.05591798695246971, "grad_norm": 3.294189293029506, "learning_rate": 3.7267080745341615e-06, "loss": 3.5557, "step": 60 }, { "epoch": 0.05591798695246971, "loss_reasoning": 0.8825101852416992, "loss_utility": 1.8665688037872314, "step": 60 }, { "epoch": 0.05684995340167754, "grad_norm": 3.7045314157090528, "learning_rate": 3.788819875776398e-06, "loss": 3.0603, "step": 61 }, { "epoch": 0.05684995340167754, "loss_reasoning": 0.8203064203262329, "loss_utility": 2.6058876514434814, "step": 61 }, { "epoch": 0.05778191985088537, "grad_norm": 3.6984728084482157, "learning_rate": 3.850931677018634e-06, "loss": 3.3146, "step": 62 }, { "epoch": 0.05778191985088537, "loss_reasoning": 0.7596521377563477, "loss_utility": 1.4458009004592896, "step": 62 }, { "epoch": 0.05871388630009319, "grad_norm": 2.74247923000296, "learning_rate": 3.91304347826087e-06, "loss": 2.7976, "step": 63 }, { "epoch": 0.05871388630009319, "loss_reasoning": 0.764873206615448, "loss_utility": 2.315087080001831, "step": 63 }, { "epoch": 0.05964585274930102, "grad_norm": 3.2900742136459273, "learning_rate": 3.975155279503106e-06, "loss": 3.2627, "step": 64 }, { "epoch": 0.05964585274930102, "loss_reasoning": 0.7668917179107666, "loss_utility": 1.7370446920394897, "step": 64 }, { "epoch": 0.06057781919850885, "grad_norm": 2.854436126793055, "learning_rate": 4.037267080745342e-06, "loss": 3.4034, "step": 65 }, { "epoch": 0.06057781919850885, "loss_reasoning": 0.8307904005050659, "loss_utility": 2.9247639179229736, "step": 65 }, { "epoch": 0.06150978564771668, "grad_norm": 2.5115768668782295, "learning_rate": 4.099378881987578e-06, "loss": 3.0926, "step": 66 }, { "epoch": 0.06150978564771668, "loss_reasoning": 0.803116500377655, "loss_utility": 2.416374444961548, "step": 66 }, { "epoch": 0.06244175209692451, "grad_norm": 3.0764344057410336, "learning_rate": 4.1614906832298145e-06, "loss": 3.1174, "step": 67 }, { "epoch": 0.06244175209692451, "loss_reasoning": 0.7531195878982544, "loss_utility": 1.5068364143371582, "step": 67 }, { "epoch": 0.06337371854613234, "grad_norm": 3.1713408560440834, "learning_rate": 4.22360248447205e-06, "loss": 3.0176, "step": 68 }, { "epoch": 0.06337371854613234, "loss_reasoning": 0.7770748734474182, "loss_utility": 2.827346086502075, "step": 68 }, { "epoch": 0.06430568499534017, "grad_norm": 3.3805957909006614, "learning_rate": 4.2857142857142855e-06, "loss": 3.5195, "step": 69 }, { "epoch": 0.06430568499534017, "loss_reasoning": 0.7527790665626526, "loss_utility": 3.084488868713379, "step": 69 }, { "epoch": 0.06523765144454799, "grad_norm": 2.8880245205432358, "learning_rate": 4.347826086956522e-06, "loss": 3.3627, "step": 70 }, { "epoch": 0.06523765144454799, "loss_reasoning": 0.7864573001861572, "loss_utility": 2.045292854309082, "step": 70 }, { "epoch": 0.06616961789375582, "grad_norm": 2.8035859319576093, "learning_rate": 4.409937888198758e-06, "loss": 3.4044, "step": 71 }, { "epoch": 0.06616961789375582, "loss_reasoning": 0.7705433368682861, "loss_utility": 2.1913509368896484, "step": 71 }, { "epoch": 0.06710158434296365, "grad_norm": 2.8200592050695255, "learning_rate": 4.472049689440994e-06, "loss": 3.2785, "step": 72 }, { "epoch": 0.06710158434296365, "loss_reasoning": 0.769146203994751, "loss_utility": 2.495736598968506, "step": 72 }, { "epoch": 0.06803355079217148, "grad_norm": 3.5439366568385644, "learning_rate": 4.534161490683231e-06, "loss": 3.4269, "step": 73 }, { "epoch": 0.06803355079217148, "loss_reasoning": 0.6762692332267761, "loss_utility": 2.5208048820495605, "step": 73 }, { "epoch": 0.06896551724137931, "grad_norm": 2.685923739750955, "learning_rate": 4.596273291925466e-06, "loss": 3.0317, "step": 74 }, { "epoch": 0.06896551724137931, "loss_reasoning": 0.6938189268112183, "loss_utility": 1.5935018062591553, "step": 74 }, { "epoch": 0.06989748369058714, "grad_norm": 4.214038414785801, "learning_rate": 4.6583850931677025e-06, "loss": 3.2187, "step": 75 }, { "epoch": 0.06989748369058714, "loss_reasoning": 0.8115956783294678, "loss_utility": 2.200949192047119, "step": 75 }, { "epoch": 0.07082945013979497, "grad_norm": 2.852661340016664, "learning_rate": 4.7204968944099384e-06, "loss": 3.0826, "step": 76 }, { "epoch": 0.07082945013979497, "loss_reasoning": 0.7762209177017212, "loss_utility": 0.9707419872283936, "step": 76 }, { "epoch": 0.0717614165890028, "grad_norm": 2.6955877834057484, "learning_rate": 4.782608695652174e-06, "loss": 2.6427, "step": 77 }, { "epoch": 0.0717614165890028, "loss_reasoning": 0.7467758655548096, "loss_utility": 2.3975353240966797, "step": 77 }, { "epoch": 0.07269338303821063, "grad_norm": 3.4771904326375656, "learning_rate": 4.84472049689441e-06, "loss": 3.138, "step": 78 }, { "epoch": 0.07269338303821063, "loss_reasoning": 0.6604665517807007, "loss_utility": 2.7610201835632324, "step": 78 }, { "epoch": 0.07362534948741846, "grad_norm": 2.9595610604407505, "learning_rate": 4.906832298136646e-06, "loss": 2.9853, "step": 79 }, { "epoch": 0.07362534948741846, "loss_reasoning": 0.7836657762527466, "loss_utility": 2.4961118698120117, "step": 79 }, { "epoch": 0.07455731593662628, "grad_norm": 2.564284618152735, "learning_rate": 4.968944099378882e-06, "loss": 2.9425, "step": 80 }, { "epoch": 0.07455731593662628, "loss_reasoning": 0.7493194341659546, "loss_utility": 1.8449957370758057, "step": 80 }, { "epoch": 0.0754892823858341, "grad_norm": 3.3469445449094364, "learning_rate": 5.031055900621118e-06, "loss": 3.153, "step": 81 }, { "epoch": 0.0754892823858341, "loss_reasoning": 0.671034574508667, "loss_utility": 1.7803502082824707, "step": 81 }, { "epoch": 0.07642124883504194, "grad_norm": 2.9687934731072456, "learning_rate": 5.093167701863354e-06, "loss": 3.2264, "step": 82 }, { "epoch": 0.07642124883504194, "loss_reasoning": 0.7600085139274597, "loss_utility": 2.631359338760376, "step": 82 }, { "epoch": 0.07735321528424977, "grad_norm": 3.0136380085784293, "learning_rate": 5.155279503105591e-06, "loss": 2.8372, "step": 83 }, { "epoch": 0.07735321528424977, "loss_reasoning": 0.8529447317123413, "loss_utility": 2.2103219032287598, "step": 83 }, { "epoch": 0.0782851817334576, "grad_norm": 1.93640014000486, "learning_rate": 5.2173913043478265e-06, "loss": 2.8221, "step": 84 }, { "epoch": 0.0782851817334576, "loss_reasoning": 0.7189385294914246, "loss_utility": 1.4549639225006104, "step": 84 }, { "epoch": 0.07921714818266543, "grad_norm": 3.5267715607181107, "learning_rate": 5.279503105590062e-06, "loss": 2.8984, "step": 85 }, { "epoch": 0.07921714818266543, "loss_reasoning": 0.7446106672286987, "loss_utility": 2.721618175506592, "step": 85 }, { "epoch": 0.08014911463187326, "grad_norm": 2.339148535885371, "learning_rate": 5.341614906832298e-06, "loss": 3.3916, "step": 86 }, { "epoch": 0.08014911463187326, "loss_reasoning": 0.7030254602432251, "loss_utility": 2.899545907974243, "step": 86 }, { "epoch": 0.08108108108108109, "grad_norm": 2.6729345363660957, "learning_rate": 5.403726708074535e-06, "loss": 3.6961, "step": 87 }, { "epoch": 0.08108108108108109, "loss_reasoning": 0.7248711585998535, "loss_utility": 2.3226380348205566, "step": 87 }, { "epoch": 0.08201304753028892, "grad_norm": 2.9865570702396744, "learning_rate": 5.465838509316771e-06, "loss": 3.2695, "step": 88 }, { "epoch": 0.08201304753028892, "loss_reasoning": 0.8257666826248169, "loss_utility": 2.306980609893799, "step": 88 }, { "epoch": 0.08294501397949673, "grad_norm": 2.476116265040213, "learning_rate": 5.527950310559007e-06, "loss": 3.1515, "step": 89 }, { "epoch": 0.08294501397949673, "loss_reasoning": 0.6769297122955322, "loss_utility": 2.3555145263671875, "step": 89 }, { "epoch": 0.08387698042870456, "grad_norm": 2.512311690878044, "learning_rate": 5.590062111801242e-06, "loss": 2.7279, "step": 90 }, { "epoch": 0.08387698042870456, "loss_reasoning": 0.7278487682342529, "loss_utility": 2.2873172760009766, "step": 90 }, { "epoch": 0.08480894687791239, "grad_norm": 2.2434970719588088, "learning_rate": 5.652173913043479e-06, "loss": 2.8304, "step": 91 }, { "epoch": 0.08480894687791239, "loss_reasoning": 0.6990373134613037, "loss_utility": 2.142899751663208, "step": 91 }, { "epoch": 0.08574091332712022, "grad_norm": 2.3594420724884473, "learning_rate": 5.7142857142857145e-06, "loss": 2.7544, "step": 92 }, { "epoch": 0.08574091332712022, "loss_reasoning": 0.7000461220741272, "loss_utility": 2.4580397605895996, "step": 92 }, { "epoch": 0.08667287977632805, "grad_norm": 3.286713887213059, "learning_rate": 5.77639751552795e-06, "loss": 3.0326, "step": 93 }, { "epoch": 0.08667287977632805, "loss_reasoning": 0.6844748258590698, "loss_utility": 2.495149612426758, "step": 93 }, { "epoch": 0.08760484622553588, "grad_norm": 2.623444628770726, "learning_rate": 5.838509316770186e-06, "loss": 2.9536, "step": 94 }, { "epoch": 0.08760484622553588, "loss_reasoning": 0.7023739814758301, "loss_utility": 2.1939709186553955, "step": 94 }, { "epoch": 0.08853681267474371, "grad_norm": 2.7070291421595813, "learning_rate": 5.900621118012423e-06, "loss": 3.0488, "step": 95 }, { "epoch": 0.08853681267474371, "loss_reasoning": 0.6744933128356934, "loss_utility": 2.4974472522735596, "step": 95 }, { "epoch": 0.08946877912395154, "grad_norm": 2.850941276720884, "learning_rate": 5.962732919254659e-06, "loss": 3.1757, "step": 96 }, { "epoch": 0.08946877912395154, "loss_reasoning": 0.6565550565719604, "loss_utility": 2.0762429237365723, "step": 96 }, { "epoch": 0.09040074557315937, "grad_norm": 2.2584894393662274, "learning_rate": 6.024844720496895e-06, "loss": 2.7986, "step": 97 }, { "epoch": 0.09040074557315937, "loss_reasoning": 0.6807472705841064, "loss_utility": 2.1425621509552, "step": 97 }, { "epoch": 0.09133271202236719, "grad_norm": 2.6301110879472143, "learning_rate": 6.086956521739132e-06, "loss": 2.524, "step": 98 }, { "epoch": 0.09133271202236719, "loss_reasoning": 0.6963942050933838, "loss_utility": 2.431126117706299, "step": 98 }, { "epoch": 0.09226467847157502, "grad_norm": 2.9320382148459374, "learning_rate": 6.1490683229813675e-06, "loss": 3.0968, "step": 99 }, { "epoch": 0.09226467847157502, "loss_reasoning": 0.6605645418167114, "loss_utility": 2.8406014442443848, "step": 99 }, { "epoch": 0.09319664492078285, "grad_norm": 2.993996033120977, "learning_rate": 6.2111801242236025e-06, "loss": 2.8286, "step": 100 }, { "epoch": 0.09319664492078285, "loss_reasoning": 0.6621318459510803, "loss_utility": 2.0014450550079346, "step": 100 }, { "epoch": 0.09412861136999068, "grad_norm": 2.552198941489273, "learning_rate": 6.2732919254658384e-06, "loss": 2.804, "step": 101 }, { "epoch": 0.09412861136999068, "loss_reasoning": 0.692467451095581, "loss_utility": 2.1146650314331055, "step": 101 }, { "epoch": 0.09506057781919851, "grad_norm": 2.764710352005383, "learning_rate": 6.335403726708075e-06, "loss": 2.8889, "step": 102 }, { "epoch": 0.09506057781919851, "loss_reasoning": 0.6878317594528198, "loss_utility": 2.439068555831909, "step": 102 }, { "epoch": 0.09599254426840634, "grad_norm": 2.9643415035156138, "learning_rate": 6.397515527950311e-06, "loss": 2.9334, "step": 103 }, { "epoch": 0.09599254426840634, "loss_reasoning": 0.6952853202819824, "loss_utility": 1.0856072902679443, "step": 103 }, { "epoch": 0.09692451071761417, "grad_norm": 2.2023973773550556, "learning_rate": 6.459627329192547e-06, "loss": 2.7325, "step": 104 }, { "epoch": 0.09692451071761417, "loss_reasoning": 0.6520573496818542, "loss_utility": 2.0594522953033447, "step": 104 }, { "epoch": 0.097856477166822, "grad_norm": 3.0690761804084983, "learning_rate": 6.521739130434783e-06, "loss": 3.1, "step": 105 }, { "epoch": 0.097856477166822, "loss_reasoning": 0.6905695199966431, "loss_utility": 3.002454996109009, "step": 105 }, { "epoch": 0.09878844361602983, "grad_norm": 2.5228668038078044, "learning_rate": 6.58385093167702e-06, "loss": 3.4764, "step": 106 }, { "epoch": 0.09878844361602983, "loss_reasoning": 0.646464467048645, "loss_utility": 2.179898262023926, "step": 106 }, { "epoch": 0.09972041006523766, "grad_norm": 2.62280681822817, "learning_rate": 6.6459627329192555e-06, "loss": 2.7236, "step": 107 }, { "epoch": 0.09972041006523766, "loss_reasoning": 0.7227012515068054, "loss_utility": 1.7110880613327026, "step": 107 }, { "epoch": 0.10065237651444547, "grad_norm": 3.4225347469580565, "learning_rate": 6.7080745341614914e-06, "loss": 2.9947, "step": 108 }, { "epoch": 0.10065237651444547, "loss_reasoning": 0.6654960513114929, "loss_utility": 0.6694575548171997, "step": 108 }, { "epoch": 0.1015843429636533, "grad_norm": 2.1307977063286394, "learning_rate": 6.7701863354037265e-06, "loss": 2.5286, "step": 109 }, { "epoch": 0.1015843429636533, "loss_reasoning": 0.7405640482902527, "loss_utility": 2.577967643737793, "step": 109 }, { "epoch": 0.10251630941286113, "grad_norm": 2.5884053851157653, "learning_rate": 6.832298136645963e-06, "loss": 2.6886, "step": 110 }, { "epoch": 0.10251630941286113, "loss_reasoning": 0.6799362897872925, "loss_utility": 1.987869381904602, "step": 110 }, { "epoch": 0.10344827586206896, "grad_norm": 2.6350393006466093, "learning_rate": 6.894409937888199e-06, "loss": 2.9112, "step": 111 }, { "epoch": 0.10344827586206896, "loss_reasoning": 0.7271844148635864, "loss_utility": 2.487851619720459, "step": 111 }, { "epoch": 0.1043802423112768, "grad_norm": 2.658761355774592, "learning_rate": 6.956521739130435e-06, "loss": 2.8952, "step": 112 }, { "epoch": 0.1043802423112768, "loss_reasoning": 0.6442485451698303, "loss_utility": 2.2208313941955566, "step": 112 }, { "epoch": 0.10531220876048462, "grad_norm": 3.3610038831524145, "learning_rate": 7.018633540372671e-06, "loss": 3.367, "step": 113 }, { "epoch": 0.10531220876048462, "loss_reasoning": 0.600180983543396, "loss_utility": 1.210035800933838, "step": 113 }, { "epoch": 0.10624417520969245, "grad_norm": 2.7316098651045104, "learning_rate": 7.080745341614908e-06, "loss": 2.804, "step": 114 }, { "epoch": 0.10624417520969245, "loss_reasoning": 0.6086286306381226, "loss_utility": 2.8204989433288574, "step": 114 }, { "epoch": 0.10717614165890028, "grad_norm": 2.486704246151684, "learning_rate": 7.1428571428571436e-06, "loss": 3.7671, "step": 115 }, { "epoch": 0.10717614165890028, "loss_reasoning": 0.6694086194038391, "loss_utility": 1.6479121446609497, "step": 115 }, { "epoch": 0.10810810810810811, "grad_norm": 2.7140496564120244, "learning_rate": 7.2049689440993795e-06, "loss": 2.7296, "step": 116 }, { "epoch": 0.10810810810810811, "loss_reasoning": 0.6439927816390991, "loss_utility": 2.2855467796325684, "step": 116 }, { "epoch": 0.10904007455731593, "grad_norm": 2.3452262587195176, "learning_rate": 7.267080745341616e-06, "loss": 3.1278, "step": 117 }, { "epoch": 0.10904007455731593, "loss_reasoning": 0.7224453091621399, "loss_utility": 2.328852653503418, "step": 117 }, { "epoch": 0.10997204100652376, "grad_norm": 2.4165630413593724, "learning_rate": 7.329192546583852e-06, "loss": 2.9318, "step": 118 }, { "epoch": 0.10997204100652376, "loss_reasoning": 0.6694356203079224, "loss_utility": 2.4146595001220703, "step": 118 }, { "epoch": 0.11090400745573159, "grad_norm": 3.5844561336780414, "learning_rate": 7.391304347826087e-06, "loss": 3.0218, "step": 119 }, { "epoch": 0.11090400745573159, "loss_reasoning": 0.6374131441116333, "loss_utility": 1.2893106937408447, "step": 119 }, { "epoch": 0.11183597390493942, "grad_norm": 2.0666335503724715, "learning_rate": 7.453416149068323e-06, "loss": 2.6939, "step": 120 }, { "epoch": 0.11183597390493942, "loss_reasoning": 0.7417213916778564, "loss_utility": 2.270730495452881, "step": 120 }, { "epoch": 0.11276794035414725, "grad_norm": 2.3636896406052608, "learning_rate": 7.515527950310559e-06, "loss": 3.0984, "step": 121 }, { "epoch": 0.11276794035414725, "loss_reasoning": 0.7168587446212769, "loss_utility": 2.425771713256836, "step": 121 }, { "epoch": 0.11369990680335508, "grad_norm": 4.305403378188526, "learning_rate": 7.577639751552796e-06, "loss": 2.6216, "step": 122 }, { "epoch": 0.11369990680335508, "loss_reasoning": 0.6888851523399353, "loss_utility": 1.873158574104309, "step": 122 }, { "epoch": 0.11463187325256291, "grad_norm": 2.392259825520155, "learning_rate": 7.639751552795032e-06, "loss": 2.9889, "step": 123 }, { "epoch": 0.11463187325256291, "loss_reasoning": 0.6613936424255371, "loss_utility": 1.9574999809265137, "step": 123 }, { "epoch": 0.11556383970177074, "grad_norm": 2.8044248910635163, "learning_rate": 7.701863354037268e-06, "loss": 2.728, "step": 124 }, { "epoch": 0.11556383970177074, "loss_reasoning": 0.5921840667724609, "loss_utility": 2.1911211013793945, "step": 124 }, { "epoch": 0.11649580615097857, "grad_norm": 2.967198558451476, "learning_rate": 7.763975155279503e-06, "loss": 3.0399, "step": 125 }, { "epoch": 0.11649580615097857, "loss_reasoning": 0.6558321118354797, "loss_utility": 2.623310089111328, "step": 125 }, { "epoch": 0.11742777260018639, "grad_norm": 3.080919056434353, "learning_rate": 7.82608695652174e-06, "loss": 3.191, "step": 126 }, { "epoch": 0.11742777260018639, "loss_reasoning": 0.6850237250328064, "loss_utility": 2.2822012901306152, "step": 126 }, { "epoch": 0.11835973904939422, "grad_norm": 2.7303221627744287, "learning_rate": 7.888198757763977e-06, "loss": 2.9882, "step": 127 }, { "epoch": 0.11835973904939422, "loss_reasoning": 0.6676533222198486, "loss_utility": 2.0602993965148926, "step": 127 }, { "epoch": 0.11929170549860205, "grad_norm": 2.6929641023670925, "learning_rate": 7.950310559006212e-06, "loss": 2.8769, "step": 128 }, { "epoch": 0.11929170549860205, "loss_reasoning": 0.7059943079948425, "loss_utility": 2.7138540744781494, "step": 128 }, { "epoch": 0.12022367194780988, "grad_norm": 2.669803910261003, "learning_rate": 8.012422360248447e-06, "loss": 3.0886, "step": 129 }, { "epoch": 0.12022367194780988, "loss_reasoning": 0.690857470035553, "loss_utility": 2.6568961143493652, "step": 129 }, { "epoch": 0.1211556383970177, "grad_norm": 3.1618805450208636, "learning_rate": 8.074534161490684e-06, "loss": 3.2678, "step": 130 }, { "epoch": 0.1211556383970177, "loss_reasoning": 0.5934849977493286, "loss_utility": 2.6211025714874268, "step": 130 }, { "epoch": 0.12208760484622554, "grad_norm": 2.8299040157029967, "learning_rate": 8.13664596273292e-06, "loss": 2.9489, "step": 131 }, { "epoch": 0.12208760484622554, "loss_reasoning": 0.6148425340652466, "loss_utility": 1.8318016529083252, "step": 131 }, { "epoch": 0.12301957129543337, "grad_norm": 2.374191451250805, "learning_rate": 8.198757763975156e-06, "loss": 3.0082, "step": 132 }, { "epoch": 0.12301957129543337, "loss_reasoning": 0.6749300956726074, "loss_utility": 1.5045605897903442, "step": 132 }, { "epoch": 0.1239515377446412, "grad_norm": 2.100199099160428, "learning_rate": 8.260869565217392e-06, "loss": 2.6389, "step": 133 }, { "epoch": 0.1239515377446412, "loss_reasoning": 0.7557051777839661, "loss_utility": 3.2175960540771484, "step": 133 }, { "epoch": 0.12488350419384903, "grad_norm": 3.2142240901877255, "learning_rate": 8.322981366459629e-06, "loss": 3.2079, "step": 134 }, { "epoch": 0.12488350419384903, "loss_reasoning": 0.621248185634613, "loss_utility": 2.7364892959594727, "step": 134 }, { "epoch": 0.12581547064305684, "grad_norm": 3.6453794000336845, "learning_rate": 8.385093167701864e-06, "loss": 3.0553, "step": 135 }, { "epoch": 0.12581547064305684, "loss_reasoning": 0.620274543762207, "loss_utility": 1.723862648010254, "step": 135 }, { "epoch": 0.1267474370922647, "grad_norm": 2.1496953731469195, "learning_rate": 8.4472049689441e-06, "loss": 2.5409, "step": 136 }, { "epoch": 0.1267474370922647, "loss_reasoning": 0.5798182487487793, "loss_utility": 1.367210030555725, "step": 136 }, { "epoch": 0.1276794035414725, "grad_norm": 2.598410153756213, "learning_rate": 8.509316770186336e-06, "loss": 2.6705, "step": 137 }, { "epoch": 0.1276794035414725, "loss_reasoning": 0.6503661870956421, "loss_utility": 2.072922706604004, "step": 137 }, { "epoch": 0.12861136999068035, "grad_norm": 2.82723011884982, "learning_rate": 8.571428571428571e-06, "loss": 3.1233, "step": 138 }, { "epoch": 0.12861136999068035, "loss_reasoning": 0.5928435325622559, "loss_utility": 1.8674349784851074, "step": 138 }, { "epoch": 0.12954333643988816, "grad_norm": 2.312573376066312, "learning_rate": 8.633540372670808e-06, "loss": 2.8353, "step": 139 }, { "epoch": 0.12954333643988816, "loss_reasoning": 0.6673014163970947, "loss_utility": 2.1743180751800537, "step": 139 }, { "epoch": 0.13047530288909598, "grad_norm": 2.3670244700492975, "learning_rate": 8.695652173913044e-06, "loss": 2.8236, "step": 140 }, { "epoch": 0.13047530288909598, "loss_reasoning": 0.6980910301208496, "loss_utility": 2.8581814765930176, "step": 140 }, { "epoch": 0.13140726933830382, "grad_norm": 2.135608941772954, "learning_rate": 8.75776397515528e-06, "loss": 3.0769, "step": 141 }, { "epoch": 0.13140726933830382, "loss_reasoning": 0.586093008518219, "loss_utility": 2.3772830963134766, "step": 141 }, { "epoch": 0.13233923578751164, "grad_norm": 2.4846285512467614, "learning_rate": 8.819875776397516e-06, "loss": 2.8641, "step": 142 }, { "epoch": 0.13233923578751164, "loss_reasoning": 0.6223667860031128, "loss_utility": 1.6221767663955688, "step": 142 }, { "epoch": 0.13327120223671948, "grad_norm": 2.908451390872509, "learning_rate": 8.881987577639753e-06, "loss": 2.7483, "step": 143 }, { "epoch": 0.13327120223671948, "loss_reasoning": 0.6703914403915405, "loss_utility": 1.0448392629623413, "step": 143 }, { "epoch": 0.1342031686859273, "grad_norm": 2.9283927973357566, "learning_rate": 8.944099378881988e-06, "loss": 2.2695, "step": 144 }, { "epoch": 0.1342031686859273, "loss_reasoning": 0.6600346565246582, "loss_utility": 1.7499333620071411, "step": 144 }, { "epoch": 0.13513513513513514, "grad_norm": 2.1792634686127372, "learning_rate": 9.006211180124225e-06, "loss": 2.7077, "step": 145 }, { "epoch": 0.13513513513513514, "loss_reasoning": 0.620342493057251, "loss_utility": 2.029730796813965, "step": 145 }, { "epoch": 0.13606710158434296, "grad_norm": 1.942689857484351, "learning_rate": 9.068322981366461e-06, "loss": 3.0442, "step": 146 }, { "epoch": 0.13606710158434296, "loss_reasoning": 0.6623519062995911, "loss_utility": 2.044320821762085, "step": 146 }, { "epoch": 0.1369990680335508, "grad_norm": 2.1268490507416473, "learning_rate": 9.130434782608697e-06, "loss": 2.9252, "step": 147 }, { "epoch": 0.1369990680335508, "loss_reasoning": 0.5936262011528015, "loss_utility": 2.630300521850586, "step": 147 }, { "epoch": 0.13793103448275862, "grad_norm": 2.7086961132208183, "learning_rate": 9.192546583850932e-06, "loss": 3.2608, "step": 148 }, { "epoch": 0.13793103448275862, "loss_reasoning": 0.5845557451248169, "loss_utility": 2.7961299419403076, "step": 148 }, { "epoch": 0.13886300093196646, "grad_norm": 2.373358989337834, "learning_rate": 9.254658385093168e-06, "loss": 3.1281, "step": 149 }, { "epoch": 0.13886300093196646, "loss_reasoning": 0.6443119049072266, "loss_utility": 2.1704354286193848, "step": 149 }, { "epoch": 0.13979496738117428, "grad_norm": 1.7928083540766226, "learning_rate": 9.316770186335405e-06, "loss": 2.3918, "step": 150 }, { "epoch": 0.13979496738117428, "loss_reasoning": 0.6801615953445435, "loss_utility": 1.5973083972930908, "step": 150 }, { "epoch": 0.1407269338303821, "grad_norm": 3.096081041842525, "learning_rate": 9.37888198757764e-06, "loss": 2.702, "step": 151 }, { "epoch": 0.1407269338303821, "loss_reasoning": 0.6331682205200195, "loss_utility": 1.9297901391983032, "step": 151 }, { "epoch": 0.14165890027958994, "grad_norm": 2.176460579761802, "learning_rate": 9.440993788819877e-06, "loss": 2.7756, "step": 152 }, { "epoch": 0.14165890027958994, "loss_reasoning": 0.6960755586624146, "loss_utility": 2.508665084838867, "step": 152 }, { "epoch": 0.14259086672879775, "grad_norm": 2.7820525106728105, "learning_rate": 9.503105590062112e-06, "loss": 2.7126, "step": 153 }, { "epoch": 0.14259086672879775, "loss_reasoning": 0.7136507630348206, "loss_utility": 2.2941434383392334, "step": 153 }, { "epoch": 0.1435228331780056, "grad_norm": 3.0216085213904447, "learning_rate": 9.565217391304349e-06, "loss": 2.878, "step": 154 }, { "epoch": 0.1435228331780056, "loss_reasoning": 0.6891734600067139, "loss_utility": 2.770627975463867, "step": 154 }, { "epoch": 0.14445479962721341, "grad_norm": 4.564021282441372, "learning_rate": 9.627329192546585e-06, "loss": 2.9351, "step": 155 }, { "epoch": 0.14445479962721341, "loss_reasoning": 0.6452511548995972, "loss_utility": 2.6423537731170654, "step": 155 }, { "epoch": 0.14538676607642126, "grad_norm": 2.6771403195560985, "learning_rate": 9.68944099378882e-06, "loss": 2.8927, "step": 156 }, { "epoch": 0.14538676607642126, "loss_reasoning": 0.5724940896034241, "loss_utility": 1.8069539070129395, "step": 156 }, { "epoch": 0.14631873252562907, "grad_norm": 2.5432913831737376, "learning_rate": 9.751552795031056e-06, "loss": 2.6527, "step": 157 }, { "epoch": 0.14631873252562907, "loss_reasoning": 0.6549623608589172, "loss_utility": 1.4047019481658936, "step": 157 }, { "epoch": 0.14725069897483692, "grad_norm": 2.13589926299454, "learning_rate": 9.813664596273292e-06, "loss": 2.5836, "step": 158 }, { "epoch": 0.14725069897483692, "loss_reasoning": 0.607820987701416, "loss_utility": 1.823265552520752, "step": 158 }, { "epoch": 0.14818266542404473, "grad_norm": 2.6844686150995996, "learning_rate": 9.875776397515529e-06, "loss": 2.8446, "step": 159 }, { "epoch": 0.14818266542404473, "loss_reasoning": 0.5919392704963684, "loss_utility": 2.630671739578247, "step": 159 }, { "epoch": 0.14911463187325255, "grad_norm": 2.7389434113369733, "learning_rate": 9.937888198757764e-06, "loss": 3.1568, "step": 160 }, { "epoch": 0.14911463187325255, "loss_reasoning": 0.6096336245536804, "loss_utility": 1.9951337575912476, "step": 160 }, { "epoch": 0.1500465983224604, "grad_norm": 2.4984566366627496, "learning_rate": 1e-05, "loss": 2.8049, "step": 161 }, { "epoch": 0.1500465983224604, "loss_reasoning": 0.6275330781936646, "loss_utility": 2.6125383377075195, "step": 161 }, { "epoch": 0.1509785647716682, "grad_norm": 2.260425861856421, "learning_rate": 1.0062111801242236e-05, "loss": 2.8151, "step": 162 }, { "epoch": 0.1509785647716682, "loss_reasoning": 0.5919538140296936, "loss_utility": 2.3301639556884766, "step": 162 }, { "epoch": 0.15191053122087605, "grad_norm": 3.8800590769911993, "learning_rate": 1.0124223602484473e-05, "loss": 3.0496, "step": 163 }, { "epoch": 0.15191053122087605, "loss_reasoning": 0.7349929809570312, "loss_utility": 2.6161229610443115, "step": 163 }, { "epoch": 0.15284249767008387, "grad_norm": 2.491895422756087, "learning_rate": 1.0186335403726708e-05, "loss": 3.0113, "step": 164 }, { "epoch": 0.15284249767008387, "loss_reasoning": 0.6374915838241577, "loss_utility": 2.717190742492676, "step": 164 }, { "epoch": 0.15377446411929171, "grad_norm": 2.136310945418732, "learning_rate": 1.0248447204968946e-05, "loss": 2.5956, "step": 165 }, { "epoch": 0.15377446411929171, "loss_reasoning": 0.618645429611206, "loss_utility": 1.7728439569473267, "step": 165 }, { "epoch": 0.15470643056849953, "grad_norm": 3.0800931125940108, "learning_rate": 1.0310559006211181e-05, "loss": 2.8448, "step": 166 }, { "epoch": 0.15470643056849953, "loss_reasoning": 0.6216656565666199, "loss_utility": 2.788597583770752, "step": 166 }, { "epoch": 0.15563839701770738, "grad_norm": 2.348041657169523, "learning_rate": 1.0372670807453418e-05, "loss": 2.8707, "step": 167 }, { "epoch": 0.15563839701770738, "loss_reasoning": 0.6503612399101257, "loss_utility": 2.5810129642486572, "step": 167 }, { "epoch": 0.1565703634669152, "grad_norm": 2.7664782532344314, "learning_rate": 1.0434782608695653e-05, "loss": 2.7838, "step": 168 }, { "epoch": 0.1565703634669152, "loss_reasoning": 0.5531678199768066, "loss_utility": 1.9495172500610352, "step": 168 }, { "epoch": 0.157502329916123, "grad_norm": 2.5065769943132614, "learning_rate": 1.049689440993789e-05, "loss": 2.8753, "step": 169 }, { "epoch": 0.157502329916123, "loss_reasoning": 0.6590607166290283, "loss_utility": 1.3742871284484863, "step": 169 }, { "epoch": 0.15843429636533085, "grad_norm": 1.818214961641333, "learning_rate": 1.0559006211180125e-05, "loss": 2.1843, "step": 170 }, { "epoch": 0.15843429636533085, "loss_reasoning": 0.6019656658172607, "loss_utility": 1.8179540634155273, "step": 170 }, { "epoch": 0.15936626281453867, "grad_norm": 2.134590678020332, "learning_rate": 1.062111801242236e-05, "loss": 2.6157, "step": 171 }, { "epoch": 0.15936626281453867, "loss_reasoning": 0.6366905570030212, "loss_utility": 3.1248812675476074, "step": 171 }, { "epoch": 0.1602982292637465, "grad_norm": 2.4234051553144362, "learning_rate": 1.0683229813664597e-05, "loss": 3.0415, "step": 172 }, { "epoch": 0.1602982292637465, "loss_reasoning": 0.6457880139350891, "loss_utility": 1.5730255842208862, "step": 172 }, { "epoch": 0.16123019571295433, "grad_norm": 2.347841294354471, "learning_rate": 1.0745341614906832e-05, "loss": 2.6494, "step": 173 }, { "epoch": 0.16123019571295433, "loss_reasoning": 0.5844186544418335, "loss_utility": 1.6537346839904785, "step": 173 }, { "epoch": 0.16216216216216217, "grad_norm": 2.157098676953575, "learning_rate": 1.080745341614907e-05, "loss": 2.3517, "step": 174 }, { "epoch": 0.16216216216216217, "loss_reasoning": 0.6030725240707397, "loss_utility": 2.0564138889312744, "step": 174 }, { "epoch": 0.16309412861137, "grad_norm": 1.9899262938941866, "learning_rate": 1.0869565217391305e-05, "loss": 2.6841, "step": 175 }, { "epoch": 0.16309412861137, "loss_reasoning": 0.5855385065078735, "loss_utility": 2.122997999191284, "step": 175 }, { "epoch": 0.16402609506057783, "grad_norm": 2.814190648992611, "learning_rate": 1.0931677018633542e-05, "loss": 2.9425, "step": 176 }, { "epoch": 0.16402609506057783, "loss_reasoning": 0.6294518709182739, "loss_utility": 2.730790615081787, "step": 176 }, { "epoch": 0.16495806150978565, "grad_norm": 2.052740229697653, "learning_rate": 1.0993788819875777e-05, "loss": 3.0608, "step": 177 }, { "epoch": 0.16495806150978565, "loss_reasoning": 0.6487751603126526, "loss_utility": 2.1259796619415283, "step": 177 }, { "epoch": 0.16589002795899346, "grad_norm": 2.6962356959045852, "learning_rate": 1.1055900621118014e-05, "loss": 2.645, "step": 178 }, { "epoch": 0.16589002795899346, "loss_reasoning": 0.6630024909973145, "loss_utility": 1.3057117462158203, "step": 178 }, { "epoch": 0.1668219944082013, "grad_norm": 1.8811516908189971, "learning_rate": 1.1118012422360249e-05, "loss": 2.7461, "step": 179 }, { "epoch": 0.1668219944082013, "loss_reasoning": 0.6153796911239624, "loss_utility": 1.6845576763153076, "step": 179 }, { "epoch": 0.16775396085740912, "grad_norm": 1.9922804644563379, "learning_rate": 1.1180124223602484e-05, "loss": 2.4112, "step": 180 }, { "epoch": 0.16775396085740912, "loss_reasoning": 0.5902508497238159, "loss_utility": 2.0838494300842285, "step": 180 }, { "epoch": 0.16868592730661697, "grad_norm": 2.334657728661397, "learning_rate": 1.1242236024844722e-05, "loss": 2.9265, "step": 181 }, { "epoch": 0.16868592730661697, "loss_reasoning": 0.5955488681793213, "loss_utility": 1.6323516368865967, "step": 181 }, { "epoch": 0.16961789375582478, "grad_norm": 2.347641108569833, "learning_rate": 1.1304347826086957e-05, "loss": 2.8187, "step": 182 }, { "epoch": 0.16961789375582478, "loss_reasoning": 0.5932341814041138, "loss_utility": 1.8492252826690674, "step": 182 }, { "epoch": 0.17054986020503263, "grad_norm": 2.7406387965471093, "learning_rate": 1.1366459627329194e-05, "loss": 2.8375, "step": 183 }, { "epoch": 0.17054986020503263, "loss_reasoning": 0.6557432413101196, "loss_utility": 1.1142854690551758, "step": 183 }, { "epoch": 0.17148182665424044, "grad_norm": 2.803553305240887, "learning_rate": 1.1428571428571429e-05, "loss": 2.6896, "step": 184 }, { "epoch": 0.17148182665424044, "loss_reasoning": 0.5924148559570312, "loss_utility": 1.9153380393981934, "step": 184 }, { "epoch": 0.1724137931034483, "grad_norm": 2.2286271662361132, "learning_rate": 1.1490683229813666e-05, "loss": 3.0138, "step": 185 }, { "epoch": 0.1724137931034483, "loss_reasoning": 0.6171647906303406, "loss_utility": 1.1769802570343018, "step": 185 }, { "epoch": 0.1733457595526561, "grad_norm": 2.7078512439202593, "learning_rate": 1.15527950310559e-05, "loss": 2.1818, "step": 186 }, { "epoch": 0.1733457595526561, "loss_reasoning": 0.5980218052864075, "loss_utility": 2.386794090270996, "step": 186 }, { "epoch": 0.17427772600186392, "grad_norm": 2.291218204441983, "learning_rate": 1.161490683229814e-05, "loss": 2.7571, "step": 187 }, { "epoch": 0.17427772600186392, "loss_reasoning": 0.5126553773880005, "loss_utility": 2.4551916122436523, "step": 187 }, { "epoch": 0.17520969245107176, "grad_norm": 1.9599382718096816, "learning_rate": 1.1677018633540373e-05, "loss": 2.7251, "step": 188 }, { "epoch": 0.17520969245107176, "loss_reasoning": 0.7388483285903931, "loss_utility": 1.830416202545166, "step": 188 }, { "epoch": 0.17614165890027958, "grad_norm": 1.828903581684357, "learning_rate": 1.1739130434782611e-05, "loss": 2.5089, "step": 189 }, { "epoch": 0.17614165890027958, "loss_reasoning": 0.5925588607788086, "loss_utility": 1.5476030111312866, "step": 189 }, { "epoch": 0.17707362534948742, "grad_norm": 1.744967293820132, "learning_rate": 1.1801242236024846e-05, "loss": 2.4883, "step": 190 }, { "epoch": 0.17707362534948742, "loss_reasoning": 0.6176885962486267, "loss_utility": 2.5511586666107178, "step": 190 }, { "epoch": 0.17800559179869524, "grad_norm": 2.0262479741282853, "learning_rate": 1.1863354037267081e-05, "loss": 2.9032, "step": 191 }, { "epoch": 0.17800559179869524, "loss_reasoning": 0.5965008735656738, "loss_utility": 2.157264232635498, "step": 191 }, { "epoch": 0.17893755824790308, "grad_norm": 1.7569889363548181, "learning_rate": 1.1925465838509318e-05, "loss": 2.4635, "step": 192 }, { "epoch": 0.17893755824790308, "loss_reasoning": 0.6045604944229126, "loss_utility": 2.0018606185913086, "step": 192 }, { "epoch": 0.1798695246971109, "grad_norm": 2.353817471347322, "learning_rate": 1.1987577639751553e-05, "loss": 2.5618, "step": 193 }, { "epoch": 0.1798695246971109, "loss_reasoning": 0.608288049697876, "loss_utility": 2.1382508277893066, "step": 193 }, { "epoch": 0.18080149114631874, "grad_norm": 2.290839606245014, "learning_rate": 1.204968944099379e-05, "loss": 2.5981, "step": 194 }, { "epoch": 0.18080149114631874, "loss_reasoning": 0.6038492918014526, "loss_utility": 2.393233299255371, "step": 194 }, { "epoch": 0.18173345759552656, "grad_norm": 2.4611808699094153, "learning_rate": 1.2111801242236025e-05, "loss": 2.9631, "step": 195 }, { "epoch": 0.18173345759552656, "loss_reasoning": 0.5981485843658447, "loss_utility": 1.7175675630569458, "step": 195 }, { "epoch": 0.18266542404473438, "grad_norm": 2.060466808860372, "learning_rate": 1.2173913043478263e-05, "loss": 2.5115, "step": 196 }, { "epoch": 0.18266542404473438, "loss_reasoning": 0.5998847484588623, "loss_utility": 2.3313746452331543, "step": 196 }, { "epoch": 0.18359739049394222, "grad_norm": 2.110022388089812, "learning_rate": 1.2236024844720498e-05, "loss": 2.5141, "step": 197 }, { "epoch": 0.18359739049394222, "loss_reasoning": 0.6231951713562012, "loss_utility": 2.8150434494018555, "step": 197 }, { "epoch": 0.18452935694315004, "grad_norm": 2.194149595045162, "learning_rate": 1.2298136645962735e-05, "loss": 2.89, "step": 198 }, { "epoch": 0.18452935694315004, "loss_reasoning": 0.6787062883377075, "loss_utility": 2.181281328201294, "step": 198 }, { "epoch": 0.18546132339235788, "grad_norm": 2.481671864916168, "learning_rate": 1.236024844720497e-05, "loss": 2.8, "step": 199 }, { "epoch": 0.18546132339235788, "loss_reasoning": 0.5326529741287231, "loss_utility": 2.471238613128662, "step": 199 }, { "epoch": 0.1863932898415657, "grad_norm": 2.596823912705357, "learning_rate": 1.2422360248447205e-05, "loss": 3.0774, "step": 200 }, { "epoch": 0.1863932898415657, "loss_reasoning": 0.6103478670120239, "loss_utility": 2.517108917236328, "step": 200 }, { "epoch": 0.18732525629077354, "grad_norm": 2.085086752881224, "learning_rate": 1.2484472049689442e-05, "loss": 2.7468, "step": 201 }, { "epoch": 0.18732525629077354, "loss_reasoning": 0.6006878614425659, "loss_utility": 2.4736990928649902, "step": 201 }, { "epoch": 0.18825722273998136, "grad_norm": 1.862229832973221, "learning_rate": 1.2546583850931677e-05, "loss": 2.5865, "step": 202 }, { "epoch": 0.18825722273998136, "loss_reasoning": 0.5684506893157959, "loss_utility": 1.6475887298583984, "step": 202 }, { "epoch": 0.1891891891891892, "grad_norm": 2.46214650936926, "learning_rate": 1.2608695652173915e-05, "loss": 2.7677, "step": 203 }, { "epoch": 0.1891891891891892, "loss_reasoning": 0.6354299783706665, "loss_utility": 2.2271361351013184, "step": 203 }, { "epoch": 0.19012115563839702, "grad_norm": 2.579534918429074, "learning_rate": 1.267080745341615e-05, "loss": 2.962, "step": 204 }, { "epoch": 0.19012115563839702, "loss_reasoning": 0.6001750230789185, "loss_utility": 1.5460197925567627, "step": 204 }, { "epoch": 0.19105312208760486, "grad_norm": 2.5906299269414714, "learning_rate": 1.2732919254658387e-05, "loss": 2.6224, "step": 205 }, { "epoch": 0.19105312208760486, "loss_reasoning": 0.6838210821151733, "loss_utility": 1.7734708786010742, "step": 205 }, { "epoch": 0.19198508853681268, "grad_norm": 2.214972037592668, "learning_rate": 1.2795031055900622e-05, "loss": 2.8789, "step": 206 }, { "epoch": 0.19198508853681268, "loss_reasoning": 0.6333205103874207, "loss_utility": 2.035780429840088, "step": 206 }, { "epoch": 0.1929170549860205, "grad_norm": 2.5924672534571296, "learning_rate": 1.2857142857142859e-05, "loss": 2.7572, "step": 207 }, { "epoch": 0.1929170549860205, "loss_reasoning": 0.5975397229194641, "loss_utility": 2.73039174079895, "step": 207 }, { "epoch": 0.19384902143522834, "grad_norm": 2.050763822909922, "learning_rate": 1.2919254658385094e-05, "loss": 2.8882, "step": 208 }, { "epoch": 0.19384902143522834, "loss_reasoning": 0.590947151184082, "loss_utility": 1.7204687595367432, "step": 208 }, { "epoch": 0.19478098788443615, "grad_norm": 2.047255558137329, "learning_rate": 1.2981366459627329e-05, "loss": 1.9563, "step": 209 }, { "epoch": 0.19478098788443615, "loss_reasoning": 0.6467386484146118, "loss_utility": 2.4631011486053467, "step": 209 }, { "epoch": 0.195712954333644, "grad_norm": 2.349259777509106, "learning_rate": 1.3043478260869566e-05, "loss": 2.6839, "step": 210 }, { "epoch": 0.195712954333644, "loss_reasoning": 0.6577044129371643, "loss_utility": 1.9467341899871826, "step": 210 }, { "epoch": 0.1966449207828518, "grad_norm": 1.5944286081320054, "learning_rate": 1.31055900621118e-05, "loss": 2.2241, "step": 211 }, { "epoch": 0.1966449207828518, "loss_reasoning": 0.605146050453186, "loss_utility": 1.7489819526672363, "step": 211 }, { "epoch": 0.19757688723205966, "grad_norm": 1.92551360124621, "learning_rate": 1.316770186335404e-05, "loss": 2.7441, "step": 212 }, { "epoch": 0.19757688723205966, "loss_reasoning": 0.6178407073020935, "loss_utility": 1.5280100107192993, "step": 212 }, { "epoch": 0.19850885368126747, "grad_norm": 2.4345911305082453, "learning_rate": 1.3229813664596274e-05, "loss": 2.5932, "step": 213 }, { "epoch": 0.19850885368126747, "loss_reasoning": 0.616474986076355, "loss_utility": 2.32114839553833, "step": 213 }, { "epoch": 0.19944082013047532, "grad_norm": 2.9587698542761807, "learning_rate": 1.3291925465838511e-05, "loss": 3.1015, "step": 214 }, { "epoch": 0.19944082013047532, "loss_reasoning": 0.601151704788208, "loss_utility": 1.9924556016921997, "step": 214 }, { "epoch": 0.20037278657968313, "grad_norm": 2.1195025371522944, "learning_rate": 1.3354037267080746e-05, "loss": 2.376, "step": 215 }, { "epoch": 0.20037278657968313, "loss_reasoning": 0.6366987228393555, "loss_utility": 1.024699330329895, "step": 215 }, { "epoch": 0.20130475302889095, "grad_norm": 2.241346787964584, "learning_rate": 1.3416149068322983e-05, "loss": 2.4137, "step": 216 }, { "epoch": 0.20130475302889095, "loss_reasoning": 0.5907614231109619, "loss_utility": 2.299494504928589, "step": 216 }, { "epoch": 0.2022367194780988, "grad_norm": 2.330229970256349, "learning_rate": 1.3478260869565218e-05, "loss": 2.6253, "step": 217 }, { "epoch": 0.2022367194780988, "loss_reasoning": 0.636486291885376, "loss_utility": 2.2877285480499268, "step": 217 }, { "epoch": 0.2031686859273066, "grad_norm": 1.5305989056236635, "learning_rate": 1.3540372670807453e-05, "loss": 2.5968, "step": 218 }, { "epoch": 0.2031686859273066, "loss_reasoning": 0.6208593845367432, "loss_utility": 2.1659305095672607, "step": 218 }, { "epoch": 0.20410065237651445, "grad_norm": 2.0041666029567966, "learning_rate": 1.3602484472049691e-05, "loss": 2.7938, "step": 219 }, { "epoch": 0.20410065237651445, "loss_reasoning": 0.5577052235603333, "loss_utility": 2.587419271469116, "step": 219 }, { "epoch": 0.20503261882572227, "grad_norm": 2.8330893675657483, "learning_rate": 1.3664596273291926e-05, "loss": 2.7268, "step": 220 }, { "epoch": 0.20503261882572227, "loss_reasoning": 0.5630409717559814, "loss_utility": 2.1619279384613037, "step": 220 }, { "epoch": 0.2059645852749301, "grad_norm": 2.021437833445483, "learning_rate": 1.3726708074534163e-05, "loss": 2.7465, "step": 221 }, { "epoch": 0.2059645852749301, "loss_reasoning": 0.6079117655754089, "loss_utility": 1.6982972621917725, "step": 221 }, { "epoch": 0.20689655172413793, "grad_norm": 2.1764148145181985, "learning_rate": 1.3788819875776398e-05, "loss": 2.6498, "step": 222 }, { "epoch": 0.20689655172413793, "loss_reasoning": 0.6161183714866638, "loss_utility": 1.8998939990997314, "step": 222 }, { "epoch": 0.20782851817334577, "grad_norm": 2.268015127428857, "learning_rate": 1.3850931677018635e-05, "loss": 2.7538, "step": 223 }, { "epoch": 0.20782851817334577, "loss_reasoning": 0.6006501317024231, "loss_utility": 2.2651848793029785, "step": 223 }, { "epoch": 0.2087604846225536, "grad_norm": 2.4095491314066355, "learning_rate": 1.391304347826087e-05, "loss": 2.8113, "step": 224 }, { "epoch": 0.2087604846225536, "loss_reasoning": 0.6061087846755981, "loss_utility": 2.9726247787475586, "step": 224 }, { "epoch": 0.2096924510717614, "grad_norm": 3.3490383704315896, "learning_rate": 1.3975155279503107e-05, "loss": 2.8708, "step": 225 }, { "epoch": 0.2096924510717614, "loss_reasoning": 0.5231095552444458, "loss_utility": 2.2492270469665527, "step": 225 }, { "epoch": 0.21062441752096925, "grad_norm": 2.7079952506073024, "learning_rate": 1.4037267080745342e-05, "loss": 2.7201, "step": 226 }, { "epoch": 0.21062441752096925, "loss_reasoning": 0.6141228675842285, "loss_utility": 1.7669057846069336, "step": 226 }, { "epoch": 0.21155638397017706, "grad_norm": 2.675830827874297, "learning_rate": 1.409937888198758e-05, "loss": 2.6977, "step": 227 }, { "epoch": 0.21155638397017706, "loss_reasoning": 0.6073058247566223, "loss_utility": 1.7296876907348633, "step": 227 }, { "epoch": 0.2124883504193849, "grad_norm": 2.1854752039550536, "learning_rate": 1.4161490683229815e-05, "loss": 2.807, "step": 228 }, { "epoch": 0.2124883504193849, "loss_reasoning": 0.6320446133613586, "loss_utility": 2.3991098403930664, "step": 228 }, { "epoch": 0.21342031686859272, "grad_norm": 2.366157984994957, "learning_rate": 1.422360248447205e-05, "loss": 2.9569, "step": 229 }, { "epoch": 0.21342031686859272, "loss_reasoning": 0.6068090200424194, "loss_utility": 1.61674964427948, "step": 229 }, { "epoch": 0.21435228331780057, "grad_norm": 1.8795217162150502, "learning_rate": 1.4285714285714287e-05, "loss": 2.7762, "step": 230 }, { "epoch": 0.21435228331780057, "loss_reasoning": 0.5701931715011597, "loss_utility": 1.8752105236053467, "step": 230 }, { "epoch": 0.21528424976700838, "grad_norm": 2.022688650152991, "learning_rate": 1.4347826086956522e-05, "loss": 2.2254, "step": 231 }, { "epoch": 0.21528424976700838, "loss_reasoning": 0.6093271970748901, "loss_utility": 1.891688585281372, "step": 231 }, { "epoch": 0.21621621621621623, "grad_norm": 2.6998112174562436, "learning_rate": 1.4409937888198759e-05, "loss": 2.5549, "step": 232 }, { "epoch": 0.21621621621621623, "loss_reasoning": 0.7128295302391052, "loss_utility": 2.5362319946289062, "step": 232 }, { "epoch": 0.21714818266542404, "grad_norm": 2.039085760215371, "learning_rate": 1.4472049689440994e-05, "loss": 3.0257, "step": 233 }, { "epoch": 0.21714818266542404, "loss_reasoning": 0.5676242709159851, "loss_utility": 1.627810001373291, "step": 233 }, { "epoch": 0.21808014911463186, "grad_norm": 2.0629821236179593, "learning_rate": 1.4534161490683232e-05, "loss": 2.4536, "step": 234 }, { "epoch": 0.21808014911463186, "loss_reasoning": 0.5835864543914795, "loss_utility": 1.4011150598526, "step": 234 }, { "epoch": 0.2190121155638397, "grad_norm": 2.053703514464041, "learning_rate": 1.4596273291925467e-05, "loss": 2.293, "step": 235 }, { "epoch": 0.2190121155638397, "loss_reasoning": 0.5883059501647949, "loss_utility": 1.839787483215332, "step": 235 }, { "epoch": 0.21994408201304752, "grad_norm": 1.7709522667342263, "learning_rate": 1.4658385093167704e-05, "loss": 2.1984, "step": 236 }, { "epoch": 0.21994408201304752, "loss_reasoning": 0.6036686897277832, "loss_utility": 1.8830153942108154, "step": 236 }, { "epoch": 0.22087604846225536, "grad_norm": 2.2120699197140192, "learning_rate": 1.472049689440994e-05, "loss": 2.3205, "step": 237 }, { "epoch": 0.22087604846225536, "loss_reasoning": 0.5920063257217407, "loss_utility": 1.3147085905075073, "step": 237 }, { "epoch": 0.22180801491146318, "grad_norm": 2.1685243531969394, "learning_rate": 1.4782608695652174e-05, "loss": 2.3975, "step": 238 }, { "epoch": 0.22180801491146318, "loss_reasoning": 0.5545964241027832, "loss_utility": 1.4345450401306152, "step": 238 }, { "epoch": 0.22273998136067102, "grad_norm": 2.289923164491888, "learning_rate": 1.4844720496894411e-05, "loss": 2.4495, "step": 239 }, { "epoch": 0.22273998136067102, "loss_reasoning": 0.6044876575469971, "loss_utility": 2.4942638874053955, "step": 239 }, { "epoch": 0.22367194780987884, "grad_norm": 2.3542300386820747, "learning_rate": 1.4906832298136646e-05, "loss": 2.7951, "step": 240 }, { "epoch": 0.22367194780987884, "loss_reasoning": 0.5597284436225891, "loss_utility": 1.808149814605713, "step": 240 }, { "epoch": 0.22460391425908668, "grad_norm": 2.4588536582394753, "learning_rate": 1.4968944099378885e-05, "loss": 2.8505, "step": 241 }, { "epoch": 0.22460391425908668, "loss_reasoning": 0.5851553082466125, "loss_utility": 1.5745949745178223, "step": 241 }, { "epoch": 0.2255358807082945, "grad_norm": 1.9046123508071222, "learning_rate": 1.5031055900621118e-05, "loss": 2.7242, "step": 242 }, { "epoch": 0.2255358807082945, "loss_reasoning": 0.6988224983215332, "loss_utility": 2.093189239501953, "step": 242 }, { "epoch": 0.22646784715750232, "grad_norm": 2.353230358874391, "learning_rate": 1.5093167701863356e-05, "loss": 2.5861, "step": 243 }, { "epoch": 0.22646784715750232, "loss_reasoning": 0.5769000053405762, "loss_utility": 2.4797286987304688, "step": 243 }, { "epoch": 0.22739981360671016, "grad_norm": 1.9951796930706271, "learning_rate": 1.5155279503105591e-05, "loss": 2.6994, "step": 244 }, { "epoch": 0.22739981360671016, "loss_reasoning": 0.5875635743141174, "loss_utility": 1.4459322690963745, "step": 244 }, { "epoch": 0.22833178005591798, "grad_norm": 2.3903246286294406, "learning_rate": 1.5217391304347828e-05, "loss": 2.7694, "step": 245 }, { "epoch": 0.22833178005591798, "loss_reasoning": 0.5336146950721741, "loss_utility": 2.541754722595215, "step": 245 }, { "epoch": 0.22926374650512582, "grad_norm": 1.6364051966995716, "learning_rate": 1.5279503105590063e-05, "loss": 2.8418, "step": 246 }, { "epoch": 0.22926374650512582, "loss_reasoning": 0.4749275743961334, "loss_utility": 2.7517030239105225, "step": 246 }, { "epoch": 0.23019571295433364, "grad_norm": 3.207427534914478, "learning_rate": 1.5341614906832298e-05, "loss": 2.9693, "step": 247 }, { "epoch": 0.23019571295433364, "loss_reasoning": 0.5920429229736328, "loss_utility": 2.1512997150421143, "step": 247 }, { "epoch": 0.23112767940354148, "grad_norm": 2.2262942179375047, "learning_rate": 1.5403726708074537e-05, "loss": 2.5719, "step": 248 }, { "epoch": 0.23112767940354148, "loss_reasoning": 0.5595180988311768, "loss_utility": 1.4920787811279297, "step": 248 }, { "epoch": 0.2320596458527493, "grad_norm": 1.729655483159805, "learning_rate": 1.5465838509316772e-05, "loss": 2.5354, "step": 249 }, { "epoch": 0.2320596458527493, "loss_reasoning": 0.5860168933868408, "loss_utility": 1.535170078277588, "step": 249 }, { "epoch": 0.23299161230195714, "grad_norm": 1.9239858509860401, "learning_rate": 1.5527950310559007e-05, "loss": 2.4171, "step": 250 }, { "epoch": 0.23299161230195714, "loss_reasoning": 0.6626219749450684, "loss_utility": 2.0867927074432373, "step": 250 }, { "epoch": 0.23392357875116496, "grad_norm": 1.937417867386888, "learning_rate": 1.5590062111801242e-05, "loss": 2.7417, "step": 251 }, { "epoch": 0.23392357875116496, "loss_reasoning": 0.6005516052246094, "loss_utility": 2.244417190551758, "step": 251 }, { "epoch": 0.23485554520037277, "grad_norm": 1.867116214758319, "learning_rate": 1.565217391304348e-05, "loss": 2.6449, "step": 252 }, { "epoch": 0.23485554520037277, "loss_reasoning": 0.5563925504684448, "loss_utility": 1.8628802299499512, "step": 252 }, { "epoch": 0.23578751164958062, "grad_norm": 2.341189342257755, "learning_rate": 1.5714285714285715e-05, "loss": 2.6212, "step": 253 }, { "epoch": 0.23578751164958062, "loss_reasoning": 0.5947913527488708, "loss_utility": 1.9852805137634277, "step": 253 }, { "epoch": 0.23671947809878843, "grad_norm": 2.020335105250317, "learning_rate": 1.5776397515527954e-05, "loss": 2.7211, "step": 254 }, { "epoch": 0.23671947809878843, "loss_reasoning": 0.6093895435333252, "loss_utility": 2.076265573501587, "step": 254 }, { "epoch": 0.23765144454799628, "grad_norm": 1.7090347412989149, "learning_rate": 1.583850931677019e-05, "loss": 2.3546, "step": 255 }, { "epoch": 0.23765144454799628, "loss_reasoning": 0.5693929195404053, "loss_utility": 1.9880385398864746, "step": 255 }, { "epoch": 0.2385834109972041, "grad_norm": 2.1252406219298257, "learning_rate": 1.5900621118012424e-05, "loss": 2.1577, "step": 256 }, { "epoch": 0.2385834109972041, "loss_reasoning": 0.5831501483917236, "loss_utility": 2.5252740383148193, "step": 256 }, { "epoch": 0.23951537744641194, "grad_norm": 2.4644938601599002, "learning_rate": 1.596273291925466e-05, "loss": 2.8563, "step": 257 }, { "epoch": 0.23951537744641194, "loss_reasoning": 0.5820404291152954, "loss_utility": 2.118852138519287, "step": 257 }, { "epoch": 0.24044734389561975, "grad_norm": 1.8861403941393642, "learning_rate": 1.6024844720496894e-05, "loss": 2.6171, "step": 258 }, { "epoch": 0.24044734389561975, "loss_reasoning": 0.6682382225990295, "loss_utility": 2.2231969833374023, "step": 258 }, { "epoch": 0.2413793103448276, "grad_norm": 1.5936164578651575, "learning_rate": 1.6086956521739132e-05, "loss": 2.5237, "step": 259 }, { "epoch": 0.2413793103448276, "loss_reasoning": 0.5693731307983398, "loss_utility": 1.6322228908538818, "step": 259 }, { "epoch": 0.2423112767940354, "grad_norm": 1.917409147968712, "learning_rate": 1.6149068322981367e-05, "loss": 2.5967, "step": 260 }, { "epoch": 0.2423112767940354, "loss_reasoning": 0.540715217590332, "loss_utility": 1.330385446548462, "step": 260 }, { "epoch": 0.24324324324324326, "grad_norm": 2.449592612123702, "learning_rate": 1.6211180124223606e-05, "loss": 2.525, "step": 261 }, { "epoch": 0.24324324324324326, "loss_reasoning": 0.533118724822998, "loss_utility": 1.846388816833496, "step": 261 }, { "epoch": 0.24417520969245107, "grad_norm": 1.9963069500769315, "learning_rate": 1.627329192546584e-05, "loss": 2.3901, "step": 262 }, { "epoch": 0.24417520969245107, "loss_reasoning": 0.595947265625, "loss_utility": 2.628139019012451, "step": 262 }, { "epoch": 0.2451071761416589, "grad_norm": 1.9827187409333864, "learning_rate": 1.6335403726708076e-05, "loss": 2.5435, "step": 263 }, { "epoch": 0.2451071761416589, "loss_reasoning": 0.5414705276489258, "loss_utility": 2.1110832691192627, "step": 263 }, { "epoch": 0.24603914259086673, "grad_norm": 2.0973704813649725, "learning_rate": 1.639751552795031e-05, "loss": 2.5773, "step": 264 }, { "epoch": 0.24603914259086673, "loss_reasoning": 0.5608338117599487, "loss_utility": 1.9405726194381714, "step": 264 }, { "epoch": 0.24697110904007455, "grad_norm": 2.4479791793161607, "learning_rate": 1.645962732919255e-05, "loss": 2.5417, "step": 265 }, { "epoch": 0.24697110904007455, "loss_reasoning": 0.5297178626060486, "loss_utility": 1.704424500465393, "step": 265 }, { "epoch": 0.2479030754892824, "grad_norm": 1.9126700911209102, "learning_rate": 1.6521739130434785e-05, "loss": 2.4813, "step": 266 }, { "epoch": 0.2479030754892824, "loss_reasoning": 0.4950225055217743, "loss_utility": 2.7264673709869385, "step": 266 }, { "epoch": 0.2488350419384902, "grad_norm": 1.69842733397267, "learning_rate": 1.658385093167702e-05, "loss": 2.8559, "step": 267 }, { "epoch": 0.2488350419384902, "loss_reasoning": 0.6331027746200562, "loss_utility": 1.9236290454864502, "step": 267 }, { "epoch": 0.24976700838769805, "grad_norm": 1.3808925335501556, "learning_rate": 1.6645962732919258e-05, "loss": 2.0074, "step": 268 }, { "epoch": 0.24976700838769805, "loss_reasoning": 0.5490509271621704, "loss_utility": 2.176877021789551, "step": 268 }, { "epoch": 0.2506989748369059, "grad_norm": 2.1957059842904223, "learning_rate": 1.670807453416149e-05, "loss": 2.7057, "step": 269 }, { "epoch": 0.2506989748369059, "loss_reasoning": 0.5664539933204651, "loss_utility": 2.3183140754699707, "step": 269 }, { "epoch": 0.2516309412861137, "grad_norm": 2.8851703537189826, "learning_rate": 1.6770186335403728e-05, "loss": 2.6112, "step": 270 }, { "epoch": 0.2516309412861137, "loss_reasoning": 0.5668055415153503, "loss_utility": 1.2378557920455933, "step": 270 }, { "epoch": 0.25256290773532153, "grad_norm": 2.0329896082174845, "learning_rate": 1.6832298136645963e-05, "loss": 2.406, "step": 271 }, { "epoch": 0.25256290773532153, "loss_reasoning": 0.6824672222137451, "loss_utility": 1.488938331604004, "step": 271 }, { "epoch": 0.2534948741845294, "grad_norm": 2.121959283739145, "learning_rate": 1.68944099378882e-05, "loss": 2.6595, "step": 272 }, { "epoch": 0.2534948741845294, "loss_reasoning": 0.61431884765625, "loss_utility": 1.75262451171875, "step": 272 }, { "epoch": 0.25442684063373716, "grad_norm": 1.9269863885961989, "learning_rate": 1.6956521739130437e-05, "loss": 2.1408, "step": 273 }, { "epoch": 0.25442684063373716, "loss_reasoning": 0.6009417176246643, "loss_utility": 2.0546584129333496, "step": 273 }, { "epoch": 0.255358807082945, "grad_norm": 1.8095990129369006, "learning_rate": 1.7018633540372672e-05, "loss": 2.6803, "step": 274 }, { "epoch": 0.255358807082945, "loss_reasoning": 0.6524522304534912, "loss_utility": 2.3609132766723633, "step": 274 }, { "epoch": 0.25629077353215285, "grad_norm": 1.7823819938133916, "learning_rate": 1.7080745341614907e-05, "loss": 2.8621, "step": 275 }, { "epoch": 0.25629077353215285, "loss_reasoning": 0.6314053535461426, "loss_utility": 1.9149956703186035, "step": 275 }, { "epoch": 0.2572227399813607, "grad_norm": 2.149720690035553, "learning_rate": 1.7142857142857142e-05, "loss": 2.6656, "step": 276 }, { "epoch": 0.2572227399813607, "loss_reasoning": 0.5863551497459412, "loss_utility": 2.5289177894592285, "step": 276 }, { "epoch": 0.2581547064305685, "grad_norm": 2.6697530602305397, "learning_rate": 1.720496894409938e-05, "loss": 2.2766, "step": 277 }, { "epoch": 0.2581547064305685, "loss_reasoning": 0.5425847768783569, "loss_utility": 1.9333484172821045, "step": 277 }, { "epoch": 0.2590866728797763, "grad_norm": 1.8832901869588368, "learning_rate": 1.7267080745341615e-05, "loss": 2.5784, "step": 278 }, { "epoch": 0.2590866728797763, "loss_reasoning": 0.5111754536628723, "loss_utility": 1.6347641944885254, "step": 278 }, { "epoch": 0.26001863932898417, "grad_norm": 2.03367802163585, "learning_rate": 1.7329192546583854e-05, "loss": 2.214, "step": 279 }, { "epoch": 0.26001863932898417, "loss_reasoning": 0.5302832126617432, "loss_utility": 2.958479881286621, "step": 279 }, { "epoch": 0.26095060577819196, "grad_norm": 2.0824164745489218, "learning_rate": 1.739130434782609e-05, "loss": 2.8271, "step": 280 }, { "epoch": 0.26095060577819196, "loss_reasoning": 0.6122580766677856, "loss_utility": 2.3234291076660156, "step": 280 }, { "epoch": 0.2618825722273998, "grad_norm": 1.9673614122270697, "learning_rate": 1.7453416149068324e-05, "loss": 2.6717, "step": 281 }, { "epoch": 0.2618825722273998, "loss_reasoning": 0.6455217599868774, "loss_utility": 1.2176551818847656, "step": 281 }, { "epoch": 0.26281453867660765, "grad_norm": 1.6911361484329501, "learning_rate": 1.751552795031056e-05, "loss": 2.0116, "step": 282 }, { "epoch": 0.26281453867660765, "loss_reasoning": 0.5939823389053345, "loss_utility": 1.3098750114440918, "step": 282 }, { "epoch": 0.2637465051258155, "grad_norm": 2.084549043490097, "learning_rate": 1.7577639751552797e-05, "loss": 2.3445, "step": 283 }, { "epoch": 0.2637465051258155, "loss_reasoning": 0.5845383405685425, "loss_utility": 1.7122862339019775, "step": 283 }, { "epoch": 0.2646784715750233, "grad_norm": 1.8274510334824083, "learning_rate": 1.7639751552795032e-05, "loss": 2.1354, "step": 284 }, { "epoch": 0.2646784715750233, "loss_reasoning": 0.5967820882797241, "loss_utility": 1.6689730882644653, "step": 284 }, { "epoch": 0.2656104380242311, "grad_norm": 1.7491979729826033, "learning_rate": 1.7701863354037267e-05, "loss": 2.7533, "step": 285 }, { "epoch": 0.2656104380242311, "loss_reasoning": 0.6570569276809692, "loss_utility": 1.8955669403076172, "step": 285 }, { "epoch": 0.26654240447343897, "grad_norm": 1.9284864820732768, "learning_rate": 1.7763975155279506e-05, "loss": 2.6337, "step": 286 }, { "epoch": 0.26654240447343897, "loss_reasoning": 0.5505393743515015, "loss_utility": 2.1911492347717285, "step": 286 }, { "epoch": 0.2674743709226468, "grad_norm": 2.0747592001768225, "learning_rate": 1.782608695652174e-05, "loss": 2.8275, "step": 287 }, { "epoch": 0.2674743709226468, "loss_reasoning": 0.6011525392532349, "loss_utility": 2.4228057861328125, "step": 287 }, { "epoch": 0.2684063373718546, "grad_norm": 1.9849732041450974, "learning_rate": 1.7888198757763976e-05, "loss": 2.7005, "step": 288 }, { "epoch": 0.2684063373718546, "loss_reasoning": 0.5448274612426758, "loss_utility": 2.5222058296203613, "step": 288 }, { "epoch": 0.26933830382106244, "grad_norm": 1.9960836978444747, "learning_rate": 1.795031055900621e-05, "loss": 2.9796, "step": 289 }, { "epoch": 0.26933830382106244, "loss_reasoning": 0.5660532116889954, "loss_utility": 1.5016517639160156, "step": 289 }, { "epoch": 0.2702702702702703, "grad_norm": 1.8226891348443326, "learning_rate": 1.801242236024845e-05, "loss": 2.5364, "step": 290 }, { "epoch": 0.2702702702702703, "loss_reasoning": 0.641967236995697, "loss_utility": 0.6623930931091309, "step": 290 }, { "epoch": 0.2712022367194781, "grad_norm": 2.2753459042034976, "learning_rate": 1.8074534161490685e-05, "loss": 1.9704, "step": 291 }, { "epoch": 0.2712022367194781, "loss_reasoning": 0.5943620204925537, "loss_utility": 1.8776286840438843, "step": 291 }, { "epoch": 0.2721342031686859, "grad_norm": 1.9039899158669271, "learning_rate": 1.8136645962732923e-05, "loss": 2.3512, "step": 292 }, { "epoch": 0.2721342031686859, "loss_reasoning": 0.5606396198272705, "loss_utility": 1.9924132823944092, "step": 292 }, { "epoch": 0.27306616961789376, "grad_norm": 1.7586975030558996, "learning_rate": 1.8198757763975158e-05, "loss": 2.7961, "step": 293 }, { "epoch": 0.27306616961789376, "loss_reasoning": 0.5611129999160767, "loss_utility": 1.5895498991012573, "step": 293 }, { "epoch": 0.2739981360671016, "grad_norm": 1.8367666879089883, "learning_rate": 1.8260869565217393e-05, "loss": 2.6502, "step": 294 }, { "epoch": 0.2739981360671016, "loss_reasoning": 0.6096438765525818, "loss_utility": 2.3212573528289795, "step": 294 }, { "epoch": 0.2749301025163094, "grad_norm": 1.940427241845383, "learning_rate": 1.8322981366459628e-05, "loss": 2.756, "step": 295 }, { "epoch": 0.2749301025163094, "loss_reasoning": 0.5508200526237488, "loss_utility": 2.013221263885498, "step": 295 }, { "epoch": 0.27586206896551724, "grad_norm": 1.8068637020473823, "learning_rate": 1.8385093167701863e-05, "loss": 2.3381, "step": 296 }, { "epoch": 0.27586206896551724, "loss_reasoning": 0.6295168399810791, "loss_utility": 1.643294334411621, "step": 296 }, { "epoch": 0.2767940354147251, "grad_norm": 1.6805586207424905, "learning_rate": 1.84472049689441e-05, "loss": 2.2464, "step": 297 }, { "epoch": 0.2767940354147251, "loss_reasoning": 0.6082653403282166, "loss_utility": 2.490490436553955, "step": 297 }, { "epoch": 0.2777260018639329, "grad_norm": 1.9524560960276833, "learning_rate": 1.8509316770186337e-05, "loss": 2.6917, "step": 298 }, { "epoch": 0.2777260018639329, "loss_reasoning": 0.5401856899261475, "loss_utility": 1.4952723979949951, "step": 298 }, { "epoch": 0.2786579683131407, "grad_norm": 2.14629745116638, "learning_rate": 1.8571428571428575e-05, "loss": 2.5336, "step": 299 }, { "epoch": 0.2786579683131407, "loss_reasoning": 0.6489607095718384, "loss_utility": 1.5034204721450806, "step": 299 }, { "epoch": 0.27958993476234856, "grad_norm": 2.0215185850490056, "learning_rate": 1.863354037267081e-05, "loss": 2.7974, "step": 300 }, { "epoch": 0.27958993476234856, "loss_reasoning": 0.5849937200546265, "loss_utility": 1.4374772310256958, "step": 300 }, { "epoch": 0.2805219012115564, "grad_norm": 1.5414160545679678, "learning_rate": 1.8695652173913045e-05, "loss": 1.8247, "step": 301 }, { "epoch": 0.2805219012115564, "loss_reasoning": 0.5825250148773193, "loss_utility": 1.8740018606185913, "step": 301 }, { "epoch": 0.2814538676607642, "grad_norm": 1.6198796453423245, "learning_rate": 1.875776397515528e-05, "loss": 2.1153, "step": 302 }, { "epoch": 0.2814538676607642, "loss_reasoning": 0.5231219530105591, "loss_utility": 2.5268678665161133, "step": 302 }, { "epoch": 0.28238583410997203, "grad_norm": 3.219074922932918, "learning_rate": 1.881987577639752e-05, "loss": 2.6599, "step": 303 }, { "epoch": 0.28238583410997203, "loss_reasoning": 0.6462815999984741, "loss_utility": 2.3362884521484375, "step": 303 }, { "epoch": 0.2833178005591799, "grad_norm": 2.3355286350480466, "learning_rate": 1.8881987577639754e-05, "loss": 2.5944, "step": 304 }, { "epoch": 0.2833178005591799, "loss_reasoning": 0.5840877294540405, "loss_utility": 1.9574123620986938, "step": 304 }, { "epoch": 0.2842497670083877, "grad_norm": 2.1297794830009447, "learning_rate": 1.894409937888199e-05, "loss": 2.5146, "step": 305 }, { "epoch": 0.2842497670083877, "loss_reasoning": 0.5775532126426697, "loss_utility": 1.968963384628296, "step": 305 }, { "epoch": 0.2851817334575955, "grad_norm": 2.5573428069620334, "learning_rate": 1.9006211180124224e-05, "loss": 2.7762, "step": 306 }, { "epoch": 0.2851817334575955, "loss_reasoning": 0.5957331657409668, "loss_utility": 1.91242253780365, "step": 306 }, { "epoch": 0.28611369990680335, "grad_norm": 1.7809238634996047, "learning_rate": 1.906832298136646e-05, "loss": 2.5306, "step": 307 }, { "epoch": 0.28611369990680335, "loss_reasoning": 0.5201667547225952, "loss_utility": 2.4820189476013184, "step": 307 }, { "epoch": 0.2870456663560112, "grad_norm": 1.9583699149228806, "learning_rate": 1.9130434782608697e-05, "loss": 2.6656, "step": 308 }, { "epoch": 0.2870456663560112, "loss_reasoning": 0.6451674103736877, "loss_utility": 1.7429814338684082, "step": 308 }, { "epoch": 0.287977632805219, "grad_norm": 1.9428081377133344, "learning_rate": 1.9192546583850932e-05, "loss": 2.4956, "step": 309 }, { "epoch": 0.287977632805219, "loss_reasoning": 0.6241921186447144, "loss_utility": 1.9469373226165771, "step": 309 }, { "epoch": 0.28890959925442683, "grad_norm": 1.5230223248859822, "learning_rate": 1.925465838509317e-05, "loss": 2.0427, "step": 310 }, { "epoch": 0.28890959925442683, "loss_reasoning": 0.5915879011154175, "loss_utility": 1.9392894506454468, "step": 310 }, { "epoch": 0.2898415657036347, "grad_norm": 2.785661677720077, "learning_rate": 1.9316770186335406e-05, "loss": 2.7841, "step": 311 }, { "epoch": 0.2898415657036347, "loss_reasoning": 0.5282020568847656, "loss_utility": 2.3930375576019287, "step": 311 }, { "epoch": 0.2907735321528425, "grad_norm": 2.8011622277307433, "learning_rate": 1.937888198757764e-05, "loss": 2.475, "step": 312 }, { "epoch": 0.2907735321528425, "loss_reasoning": 0.5716676712036133, "loss_utility": 1.5553094148635864, "step": 312 }, { "epoch": 0.2917054986020503, "grad_norm": 1.3936241439165453, "learning_rate": 1.9440993788819876e-05, "loss": 2.0741, "step": 313 }, { "epoch": 0.2917054986020503, "loss_reasoning": 0.5575088858604431, "loss_utility": 1.7981594800949097, "step": 313 }, { "epoch": 0.29263746505125815, "grad_norm": 2.419741520963789, "learning_rate": 1.950310559006211e-05, "loss": 2.6329, "step": 314 }, { "epoch": 0.29263746505125815, "loss_reasoning": 0.5753280520439148, "loss_utility": 1.965730905532837, "step": 314 }, { "epoch": 0.293569431500466, "grad_norm": 1.9032312090731547, "learning_rate": 1.956521739130435e-05, "loss": 2.7123, "step": 315 }, { "epoch": 0.293569431500466, "loss_reasoning": 0.5320262312889099, "loss_utility": 1.7622179985046387, "step": 315 }, { "epoch": 0.29450139794967384, "grad_norm": 1.602719102707963, "learning_rate": 1.9627329192546585e-05, "loss": 2.4811, "step": 316 }, { "epoch": 0.29450139794967384, "loss_reasoning": 0.5478618144989014, "loss_utility": 2.0782201290130615, "step": 316 }, { "epoch": 0.2954333643988816, "grad_norm": 1.907345095412436, "learning_rate": 1.9689440993788823e-05, "loss": 2.4936, "step": 317 }, { "epoch": 0.2954333643988816, "loss_reasoning": 0.5528198480606079, "loss_utility": 1.7720046043395996, "step": 317 }, { "epoch": 0.29636533084808947, "grad_norm": 1.8618152759226307, "learning_rate": 1.9751552795031058e-05, "loss": 2.4398, "step": 318 }, { "epoch": 0.29636533084808947, "loss_reasoning": 0.5113398432731628, "loss_utility": 1.9132622480392456, "step": 318 }, { "epoch": 0.2972972972972973, "grad_norm": 1.7091238227800871, "learning_rate": 1.9813664596273293e-05, "loss": 2.6104, "step": 319 }, { "epoch": 0.2972972972972973, "loss_reasoning": 0.570319414138794, "loss_utility": 2.0269899368286133, "step": 319 }, { "epoch": 0.2982292637465051, "grad_norm": 2.1966719973785147, "learning_rate": 1.9875776397515528e-05, "loss": 2.548, "step": 320 }, { "epoch": 0.2982292637465051, "loss_reasoning": 0.5509387254714966, "loss_utility": 1.6429147720336914, "step": 320 }, { "epoch": 0.29916123019571295, "grad_norm": 1.8239244526138438, "learning_rate": 1.9937888198757767e-05, "loss": 2.5563, "step": 321 }, { "epoch": 0.29916123019571295, "loss_reasoning": 0.5819888114929199, "loss_utility": 1.4712841510772705, "step": 321 }, { "epoch": 0.3000931966449208, "grad_norm": 1.4623444302066253, "learning_rate": 2e-05, "loss": 2.2036, "step": 322 }, { "epoch": 0.3000931966449208, "loss_reasoning": 0.5595107078552246, "loss_utility": 1.8500983715057373, "step": 322 }, { "epoch": 0.30102516309412863, "grad_norm": 2.2688004308297764, "learning_rate": 1.999309630652399e-05, "loss": 2.7942, "step": 323 }, { "epoch": 0.30102516309412863, "loss_reasoning": 0.5634115934371948, "loss_utility": 0.7747569680213928, "step": 323 }, { "epoch": 0.3019571295433364, "grad_norm": 1.746643269444817, "learning_rate": 1.9986192613047983e-05, "loss": 1.7896, "step": 324 }, { "epoch": 0.3019571295433364, "loss_reasoning": 0.582850992679596, "loss_utility": 2.325716972351074, "step": 324 }, { "epoch": 0.30288909599254427, "grad_norm": 1.8454113342332736, "learning_rate": 1.9979288919571972e-05, "loss": 2.6481, "step": 325 }, { "epoch": 0.30288909599254427, "loss_reasoning": 0.6507565975189209, "loss_utility": 1.7875468730926514, "step": 325 }, { "epoch": 0.3038210624417521, "grad_norm": 1.7660885673072284, "learning_rate": 1.997238522609596e-05, "loss": 2.281, "step": 326 }, { "epoch": 0.3038210624417521, "loss_reasoning": 0.5700966119766235, "loss_utility": 1.7341408729553223, "step": 326 }, { "epoch": 0.3047530288909599, "grad_norm": 1.8002309305895947, "learning_rate": 1.9965481532619954e-05, "loss": 2.303, "step": 327 }, { "epoch": 0.3047530288909599, "loss_reasoning": 0.5360133647918701, "loss_utility": 2.1669483184814453, "step": 327 }, { "epoch": 0.30568499534016774, "grad_norm": 1.7187470003755938, "learning_rate": 1.9958577839143946e-05, "loss": 2.5436, "step": 328 }, { "epoch": 0.30568499534016774, "loss_reasoning": 0.5698077082633972, "loss_utility": 2.126607656478882, "step": 328 }, { "epoch": 0.3066169617893756, "grad_norm": 2.7417491987184888, "learning_rate": 1.9951674145667935e-05, "loss": 2.5507, "step": 329 }, { "epoch": 0.3066169617893756, "loss_reasoning": 0.5740898847579956, "loss_utility": 1.3567681312561035, "step": 329 }, { "epoch": 0.30754892823858343, "grad_norm": 1.6038116794776558, "learning_rate": 1.9944770452191924e-05, "loss": 2.4177, "step": 330 }, { "epoch": 0.30754892823858343, "loss_reasoning": 0.5338516235351562, "loss_utility": 0.5154160857200623, "step": 330 }, { "epoch": 0.3084808946877912, "grad_norm": 1.713404175764586, "learning_rate": 1.9937866758715913e-05, "loss": 2.151, "step": 331 }, { "epoch": 0.3084808946877912, "loss_reasoning": 0.5227380990982056, "loss_utility": 1.041872262954712, "step": 331 }, { "epoch": 0.30941286113699906, "grad_norm": 1.5738653925438448, "learning_rate": 1.9930963065239906e-05, "loss": 1.9211, "step": 332 }, { "epoch": 0.30941286113699906, "loss_reasoning": 0.560478687286377, "loss_utility": 2.0010108947753906, "step": 332 }, { "epoch": 0.3103448275862069, "grad_norm": 1.8037318357969263, "learning_rate": 1.9924059371763895e-05, "loss": 2.2184, "step": 333 }, { "epoch": 0.3103448275862069, "loss_reasoning": 0.6122190356254578, "loss_utility": 2.1790459156036377, "step": 333 }, { "epoch": 0.31127679403541475, "grad_norm": 2.2421406824415593, "learning_rate": 1.9917155678287884e-05, "loss": 2.5407, "step": 334 }, { "epoch": 0.31127679403541475, "loss_reasoning": 0.5785408020019531, "loss_utility": 1.211939811706543, "step": 334 }, { "epoch": 0.31220876048462254, "grad_norm": 1.7414258322270133, "learning_rate": 1.9910251984811876e-05, "loss": 2.1419, "step": 335 }, { "epoch": 0.31220876048462254, "loss_reasoning": 0.5710539817810059, "loss_utility": 2.4019460678100586, "step": 335 }, { "epoch": 0.3131407269338304, "grad_norm": 1.8856034749037904, "learning_rate": 1.9903348291335865e-05, "loss": 2.5654, "step": 336 }, { "epoch": 0.3131407269338304, "loss_reasoning": 0.5411581993103027, "loss_utility": 2.076362133026123, "step": 336 }, { "epoch": 0.3140726933830382, "grad_norm": 1.977785594136264, "learning_rate": 1.9896444597859858e-05, "loss": 2.3312, "step": 337 }, { "epoch": 0.3140726933830382, "loss_reasoning": 0.4854792356491089, "loss_utility": 1.4238649606704712, "step": 337 }, { "epoch": 0.315004659832246, "grad_norm": 1.8249071190170996, "learning_rate": 1.9889540904383847e-05, "loss": 2.1865, "step": 338 }, { "epoch": 0.315004659832246, "loss_reasoning": 0.5595130920410156, "loss_utility": 1.3285400867462158, "step": 338 }, { "epoch": 0.31593662628145386, "grad_norm": 2.4182647161475397, "learning_rate": 1.988263721090784e-05, "loss": 2.4489, "step": 339 }, { "epoch": 0.31593662628145386, "loss_reasoning": 0.5600744485855103, "loss_utility": 1.6216970682144165, "step": 339 }, { "epoch": 0.3168685927306617, "grad_norm": 1.7584735941772938, "learning_rate": 1.9875733517431828e-05, "loss": 2.6872, "step": 340 }, { "epoch": 0.3168685927306617, "loss_reasoning": 0.5510708689689636, "loss_utility": 1.9994678497314453, "step": 340 }, { "epoch": 0.31780055917986955, "grad_norm": 1.8491464689226351, "learning_rate": 1.9868829823955817e-05, "loss": 2.5638, "step": 341 }, { "epoch": 0.31780055917986955, "loss_reasoning": 0.5826143026351929, "loss_utility": 1.2797248363494873, "step": 341 }, { "epoch": 0.31873252562907733, "grad_norm": 2.348925279093769, "learning_rate": 1.9861926130479806e-05, "loss": 2.3686, "step": 342 }, { "epoch": 0.31873252562907733, "loss_reasoning": 0.5957568287849426, "loss_utility": 2.2466254234313965, "step": 342 }, { "epoch": 0.3196644920782852, "grad_norm": 1.8013978086979094, "learning_rate": 1.98550224370038e-05, "loss": 2.3918, "step": 343 }, { "epoch": 0.3196644920782852, "loss_reasoning": 0.6047979593276978, "loss_utility": 1.709596872329712, "step": 343 }, { "epoch": 0.320596458527493, "grad_norm": 2.037514638790113, "learning_rate": 1.9848118743527788e-05, "loss": 2.5462, "step": 344 }, { "epoch": 0.320596458527493, "loss_reasoning": 0.4792858958244324, "loss_utility": 2.0554933547973633, "step": 344 }, { "epoch": 0.32152842497670087, "grad_norm": 2.358107484340407, "learning_rate": 1.984121505005178e-05, "loss": 2.7305, "step": 345 }, { "epoch": 0.32152842497670087, "loss_reasoning": 0.570858359336853, "loss_utility": 1.1021101474761963, "step": 345 }, { "epoch": 0.32246039142590865, "grad_norm": 1.617851209922192, "learning_rate": 1.983431135657577e-05, "loss": 2.0986, "step": 346 }, { "epoch": 0.32246039142590865, "loss_reasoning": 0.6254110336303711, "loss_utility": 1.7773735523223877, "step": 346 }, { "epoch": 0.3233923578751165, "grad_norm": 1.7691068283413376, "learning_rate": 1.982740766309976e-05, "loss": 2.19, "step": 347 }, { "epoch": 0.3233923578751165, "loss_reasoning": 0.6299483776092529, "loss_utility": 1.3061859607696533, "step": 347 }, { "epoch": 0.32432432432432434, "grad_norm": 1.5011342488931874, "learning_rate": 1.982050396962375e-05, "loss": 2.1588, "step": 348 }, { "epoch": 0.32432432432432434, "loss_reasoning": 0.5058258175849915, "loss_utility": 1.6192409992218018, "step": 348 }, { "epoch": 0.32525629077353213, "grad_norm": 2.235877455991639, "learning_rate": 1.981360027614774e-05, "loss": 2.4358, "step": 349 }, { "epoch": 0.32525629077353213, "loss_reasoning": 0.5417778491973877, "loss_utility": 1.8077380657196045, "step": 349 }, { "epoch": 0.32618825722274, "grad_norm": 1.681786222890361, "learning_rate": 1.9806696582671732e-05, "loss": 2.6585, "step": 350 }, { "epoch": 0.32618825722274, "loss_reasoning": 0.5649129748344421, "loss_utility": 1.7532966136932373, "step": 350 }, { "epoch": 0.3271202236719478, "grad_norm": 1.7839876525673248, "learning_rate": 1.979979288919572e-05, "loss": 2.372, "step": 351 }, { "epoch": 0.3271202236719478, "loss_reasoning": 0.5393126606941223, "loss_utility": 1.887427568435669, "step": 351 }, { "epoch": 0.32805219012115566, "grad_norm": 1.7471905372486851, "learning_rate": 1.979288919571971e-05, "loss": 2.5304, "step": 352 }, { "epoch": 0.32805219012115566, "loss_reasoning": 0.5261749625205994, "loss_utility": 1.5912644863128662, "step": 352 }, { "epoch": 0.32898415657036345, "grad_norm": 2.10751244933148, "learning_rate": 1.9785985502243702e-05, "loss": 2.8435, "step": 353 }, { "epoch": 0.32898415657036345, "loss_reasoning": 0.5762643218040466, "loss_utility": 1.7286999225616455, "step": 353 }, { "epoch": 0.3299161230195713, "grad_norm": 1.944286990403359, "learning_rate": 1.977908180876769e-05, "loss": 2.4424, "step": 354 }, { "epoch": 0.3299161230195713, "loss_reasoning": 0.5161623954772949, "loss_utility": 1.7480977773666382, "step": 354 }, { "epoch": 0.33084808946877914, "grad_norm": 1.714711612307071, "learning_rate": 1.9772178115291684e-05, "loss": 2.4447, "step": 355 }, { "epoch": 0.33084808946877914, "loss_reasoning": 0.5174885988235474, "loss_utility": 1.7709968090057373, "step": 355 }, { "epoch": 0.3317800559179869, "grad_norm": 1.9819738217859875, "learning_rate": 1.9765274421815673e-05, "loss": 2.7248, "step": 356 }, { "epoch": 0.3317800559179869, "loss_reasoning": 0.5864800214767456, "loss_utility": 1.1413145065307617, "step": 356 }, { "epoch": 0.33271202236719477, "grad_norm": 1.7104451946600583, "learning_rate": 1.9758370728339665e-05, "loss": 1.9681, "step": 357 }, { "epoch": 0.33271202236719477, "loss_reasoning": 0.5878646373748779, "loss_utility": 2.1320207118988037, "step": 357 }, { "epoch": 0.3336439888164026, "grad_norm": 2.0155506825948226, "learning_rate": 1.9751467034863654e-05, "loss": 2.4129, "step": 358 }, { "epoch": 0.3336439888164026, "loss_reasoning": 0.5892176628112793, "loss_utility": 1.9846436977386475, "step": 358 }, { "epoch": 0.33457595526561046, "grad_norm": 1.7591701060883407, "learning_rate": 1.9744563341387643e-05, "loss": 2.4319, "step": 359 }, { "epoch": 0.33457595526561046, "loss_reasoning": 0.5602681636810303, "loss_utility": 2.823389768600464, "step": 359 }, { "epoch": 0.33550792171481825, "grad_norm": 1.6124648264903534, "learning_rate": 1.9737659647911633e-05, "loss": 2.7517, "step": 360 }, { "epoch": 0.33550792171481825, "loss_reasoning": 0.5050050616264343, "loss_utility": 1.738358736038208, "step": 360 }, { "epoch": 0.3364398881640261, "grad_norm": 1.9638101226137963, "learning_rate": 1.9730755954435625e-05, "loss": 2.514, "step": 361 }, { "epoch": 0.3364398881640261, "loss_reasoning": 0.5352017879486084, "loss_utility": 0.8996619582176208, "step": 361 }, { "epoch": 0.33737185461323393, "grad_norm": 1.808335127634011, "learning_rate": 1.9723852260959614e-05, "loss": 1.851, "step": 362 }, { "epoch": 0.33737185461323393, "loss_reasoning": 0.5611268281936646, "loss_utility": 1.9141778945922852, "step": 362 }, { "epoch": 0.3383038210624418, "grad_norm": 1.8261690234595434, "learning_rate": 1.9716948567483606e-05, "loss": 2.4101, "step": 363 }, { "epoch": 0.3383038210624418, "loss_reasoning": 0.523495614528656, "loss_utility": 1.9639478921890259, "step": 363 }, { "epoch": 0.33923578751164957, "grad_norm": 1.9991105828223967, "learning_rate": 1.9710044874007595e-05, "loss": 2.7609, "step": 364 }, { "epoch": 0.33923578751164957, "loss_reasoning": 0.5586245656013489, "loss_utility": 1.382697343826294, "step": 364 }, { "epoch": 0.3401677539608574, "grad_norm": 1.7160908360667317, "learning_rate": 1.9703141180531588e-05, "loss": 2.2698, "step": 365 }, { "epoch": 0.3401677539608574, "loss_reasoning": 0.5601803064346313, "loss_utility": 2.4264016151428223, "step": 365 }, { "epoch": 0.34109972041006525, "grad_norm": 2.014217667669638, "learning_rate": 1.9696237487055577e-05, "loss": 2.7315, "step": 366 }, { "epoch": 0.34109972041006525, "loss_reasoning": 0.623767614364624, "loss_utility": 1.8493974208831787, "step": 366 }, { "epoch": 0.34203168685927304, "grad_norm": 1.6881739291693925, "learning_rate": 1.9689333793579566e-05, "loss": 2.7182, "step": 367 }, { "epoch": 0.34203168685927304, "loss_reasoning": 0.532457709312439, "loss_utility": 1.2430230379104614, "step": 367 }, { "epoch": 0.3429636533084809, "grad_norm": 1.5837470044840363, "learning_rate": 1.968243010010356e-05, "loss": 2.0311, "step": 368 }, { "epoch": 0.3429636533084809, "loss_reasoning": 0.5035503506660461, "loss_utility": 1.418298602104187, "step": 368 }, { "epoch": 0.34389561975768873, "grad_norm": 2.2055635859670577, "learning_rate": 1.9675526406627547e-05, "loss": 2.3583, "step": 369 }, { "epoch": 0.34389561975768873, "loss_reasoning": 0.5171729326248169, "loss_utility": 1.6504671573638916, "step": 369 }, { "epoch": 0.3448275862068966, "grad_norm": 2.687993512195808, "learning_rate": 1.9668622713151536e-05, "loss": 2.6573, "step": 370 }, { "epoch": 0.3448275862068966, "loss_reasoning": 0.5318048000335693, "loss_utility": 1.9714453220367432, "step": 370 }, { "epoch": 0.34575955265610436, "grad_norm": 2.048273938998344, "learning_rate": 1.9661719019675526e-05, "loss": 2.5176, "step": 371 }, { "epoch": 0.34575955265610436, "loss_reasoning": 0.5334678292274475, "loss_utility": 1.905465841293335, "step": 371 }, { "epoch": 0.3466915191053122, "grad_norm": 1.7953846137525866, "learning_rate": 1.9654815326199518e-05, "loss": 2.5211, "step": 372 }, { "epoch": 0.3466915191053122, "loss_reasoning": 0.559988260269165, "loss_utility": 1.1658458709716797, "step": 372 }, { "epoch": 0.34762348555452005, "grad_norm": 1.599112521311411, "learning_rate": 1.9647911632723507e-05, "loss": 2.1207, "step": 373 }, { "epoch": 0.34762348555452005, "loss_reasoning": 0.5079479813575745, "loss_utility": 1.2477425336837769, "step": 373 }, { "epoch": 0.34855545200372784, "grad_norm": 2.014720758026281, "learning_rate": 1.96410079392475e-05, "loss": 2.3725, "step": 374 }, { "epoch": 0.34855545200372784, "loss_reasoning": 0.5941770672798157, "loss_utility": 1.4454516172409058, "step": 374 }, { "epoch": 0.3494874184529357, "grad_norm": 1.7545171883609416, "learning_rate": 1.9634104245771492e-05, "loss": 2.5742, "step": 375 }, { "epoch": 0.3494874184529357, "loss_reasoning": 0.5766449570655823, "loss_utility": 0.9132527112960815, "step": 375 }, { "epoch": 0.3504193849021435, "grad_norm": 2.2160179735658243, "learning_rate": 1.962720055229548e-05, "loss": 2.5492, "step": 376 }, { "epoch": 0.3504193849021435, "loss_reasoning": 0.5966839790344238, "loss_utility": 1.1243863105773926, "step": 376 }, { "epoch": 0.35135135135135137, "grad_norm": 1.4918009537584644, "learning_rate": 1.962029685881947e-05, "loss": 2.032, "step": 377 }, { "epoch": 0.35135135135135137, "loss_reasoning": 0.5713527202606201, "loss_utility": 1.4802082777023315, "step": 377 }, { "epoch": 0.35228331780055916, "grad_norm": 2.0420407729324226, "learning_rate": 1.961339316534346e-05, "loss": 2.4391, "step": 378 }, { "epoch": 0.35228331780055916, "loss_reasoning": 0.5777997970581055, "loss_utility": 1.7853065729141235, "step": 378 }, { "epoch": 0.353215284249767, "grad_norm": 1.8140016886931918, "learning_rate": 1.960648947186745e-05, "loss": 2.2649, "step": 379 }, { "epoch": 0.353215284249767, "loss_reasoning": 0.48157140612602234, "loss_utility": 1.6552832126617432, "step": 379 }, { "epoch": 0.35414725069897485, "grad_norm": 1.5768610366716953, "learning_rate": 1.959958577839144e-05, "loss": 2.5911, "step": 380 }, { "epoch": 0.35414725069897485, "loss_reasoning": 0.5668995380401611, "loss_utility": 1.7710561752319336, "step": 380 }, { "epoch": 0.3550792171481827, "grad_norm": 1.953080305877853, "learning_rate": 1.959268208491543e-05, "loss": 2.26, "step": 381 }, { "epoch": 0.3550792171481827, "loss_reasoning": 0.6049039959907532, "loss_utility": 1.7355812788009644, "step": 381 }, { "epoch": 0.3560111835973905, "grad_norm": 1.6784276573754129, "learning_rate": 1.9585778391439422e-05, "loss": 2.2473, "step": 382 }, { "epoch": 0.3560111835973905, "loss_reasoning": 0.5898962020874023, "loss_utility": 2.011756181716919, "step": 382 }, { "epoch": 0.3569431500465983, "grad_norm": 1.8244926813128728, "learning_rate": 1.957887469796341e-05, "loss": 2.6007, "step": 383 }, { "epoch": 0.3569431500465983, "loss_reasoning": 0.49210673570632935, "loss_utility": 1.0128158330917358, "step": 383 }, { "epoch": 0.35787511649580617, "grad_norm": 1.2525962332365528, "learning_rate": 1.9571971004487403e-05, "loss": 2.0249, "step": 384 }, { "epoch": 0.35787511649580617, "loss_reasoning": 0.5397568941116333, "loss_utility": 1.9749826192855835, "step": 384 }, { "epoch": 0.35880708294501396, "grad_norm": 1.8640131729858131, "learning_rate": 1.9565067311011392e-05, "loss": 2.59, "step": 385 }, { "epoch": 0.35880708294501396, "loss_reasoning": 0.5272932648658752, "loss_utility": 2.0531086921691895, "step": 385 }, { "epoch": 0.3597390493942218, "grad_norm": 1.7288173521129602, "learning_rate": 1.9558163617535385e-05, "loss": 2.4695, "step": 386 }, { "epoch": 0.3597390493942218, "loss_reasoning": 0.5353418588638306, "loss_utility": 1.9808837175369263, "step": 386 }, { "epoch": 0.36067101584342964, "grad_norm": 1.6666594969544373, "learning_rate": 1.9551259924059374e-05, "loss": 2.6091, "step": 387 }, { "epoch": 0.36067101584342964, "loss_reasoning": 0.5507171750068665, "loss_utility": 2.91371488571167, "step": 387 }, { "epoch": 0.3616029822926375, "grad_norm": 1.824195284215484, "learning_rate": 1.9544356230583363e-05, "loss": 2.6876, "step": 388 }, { "epoch": 0.3616029822926375, "loss_reasoning": 0.5612307786941528, "loss_utility": 1.9849870204925537, "step": 388 }, { "epoch": 0.3625349487418453, "grad_norm": 1.3074892442909742, "learning_rate": 1.9537452537107352e-05, "loss": 2.397, "step": 389 }, { "epoch": 0.3625349487418453, "loss_reasoning": 0.523774266242981, "loss_utility": 1.809185266494751, "step": 389 }, { "epoch": 0.3634669151910531, "grad_norm": 1.62644756688569, "learning_rate": 1.9530548843631344e-05, "loss": 2.0354, "step": 390 }, { "epoch": 0.3634669151910531, "loss_reasoning": 0.5932571887969971, "loss_utility": 1.731044888496399, "step": 390 }, { "epoch": 0.36439888164026096, "grad_norm": 1.9302845069389862, "learning_rate": 1.9523645150155333e-05, "loss": 2.4469, "step": 391 }, { "epoch": 0.36439888164026096, "loss_reasoning": 0.5123932361602783, "loss_utility": 2.334507465362549, "step": 391 }, { "epoch": 0.36533084808946875, "grad_norm": 3.8841114397968, "learning_rate": 1.9516741456679326e-05, "loss": 3.0691, "step": 392 }, { "epoch": 0.36533084808946875, "loss_reasoning": 0.5438164472579956, "loss_utility": 2.6362504959106445, "step": 392 }, { "epoch": 0.3662628145386766, "grad_norm": 1.8745428408081806, "learning_rate": 1.9509837763203315e-05, "loss": 2.461, "step": 393 }, { "epoch": 0.3662628145386766, "loss_reasoning": 0.5887683629989624, "loss_utility": 1.3061301708221436, "step": 393 }, { "epoch": 0.36719478098788444, "grad_norm": 2.073426862692533, "learning_rate": 1.9502934069727307e-05, "loss": 2.3427, "step": 394 }, { "epoch": 0.36719478098788444, "loss_reasoning": 0.4913037419319153, "loss_utility": 1.6549084186553955, "step": 394 }, { "epoch": 0.3681267474370923, "grad_norm": 2.1788138259100847, "learning_rate": 1.9496030376251296e-05, "loss": 2.1208, "step": 395 }, { "epoch": 0.3681267474370923, "loss_reasoning": 0.5016734004020691, "loss_utility": 1.9260270595550537, "step": 395 }, { "epoch": 0.36905871388630007, "grad_norm": 2.4832953888952893, "learning_rate": 1.9489126682775285e-05, "loss": 2.75, "step": 396 }, { "epoch": 0.36905871388630007, "loss_reasoning": 0.4930132031440735, "loss_utility": 1.2821130752563477, "step": 396 }, { "epoch": 0.3699906803355079, "grad_norm": 1.9099447761382462, "learning_rate": 1.9482222989299278e-05, "loss": 2.0687, "step": 397 }, { "epoch": 0.3699906803355079, "loss_reasoning": 0.550513744354248, "loss_utility": 1.432894229888916, "step": 397 }, { "epoch": 0.37092264678471576, "grad_norm": 1.7454783708273736, "learning_rate": 1.9475319295823267e-05, "loss": 2.3554, "step": 398 }, { "epoch": 0.37092264678471576, "loss_reasoning": 0.591975748538971, "loss_utility": 1.7320079803466797, "step": 398 }, { "epoch": 0.3718546132339236, "grad_norm": 1.4074179347752809, "learning_rate": 1.9468415602347256e-05, "loss": 2.5047, "step": 399 }, { "epoch": 0.3718546132339236, "loss_reasoning": 0.4881497621536255, "loss_utility": 1.9247357845306396, "step": 399 }, { "epoch": 0.3727865796831314, "grad_norm": 2.61678604260471, "learning_rate": 1.9461511908871248e-05, "loss": 2.3398, "step": 400 }, { "epoch": 0.3727865796831314, "loss_reasoning": 0.5204564332962036, "loss_utility": 1.8801589012145996, "step": 400 }, { "epoch": 0.37371854613233924, "grad_norm": 2.2771752635246645, "learning_rate": 1.9454608215395237e-05, "loss": 2.5803, "step": 401 }, { "epoch": 0.37371854613233924, "loss_reasoning": 0.594984233379364, "loss_utility": 1.835716724395752, "step": 401 }, { "epoch": 0.3746505125815471, "grad_norm": 2.2242184130047917, "learning_rate": 1.944770452191923e-05, "loss": 2.5637, "step": 402 }, { "epoch": 0.3746505125815471, "loss_reasoning": 0.552354097366333, "loss_utility": 2.0905370712280273, "step": 402 }, { "epoch": 0.37558247903075487, "grad_norm": 1.5761568696261614, "learning_rate": 1.944080082844322e-05, "loss": 2.5561, "step": 403 }, { "epoch": 0.37558247903075487, "loss_reasoning": 0.5520540475845337, "loss_utility": 1.3459739685058594, "step": 403 }, { "epoch": 0.3765144454799627, "grad_norm": 1.3433017128929505, "learning_rate": 1.943389713496721e-05, "loss": 2.3402, "step": 404 }, { "epoch": 0.3765144454799627, "loss_reasoning": 0.5647464990615845, "loss_utility": 1.4752304553985596, "step": 404 }, { "epoch": 0.37744641192917056, "grad_norm": 1.9492677235920957, "learning_rate": 1.94269934414912e-05, "loss": 2.1099, "step": 405 }, { "epoch": 0.37744641192917056, "loss_reasoning": 0.5593060255050659, "loss_utility": 1.745406150817871, "step": 405 }, { "epoch": 0.3783783783783784, "grad_norm": 1.798985234277589, "learning_rate": 1.942008974801519e-05, "loss": 2.2447, "step": 406 }, { "epoch": 0.3783783783783784, "loss_reasoning": 0.5689140558242798, "loss_utility": 2.1382603645324707, "step": 406 }, { "epoch": 0.3793103448275862, "grad_norm": 1.7344253959369624, "learning_rate": 1.941318605453918e-05, "loss": 2.3114, "step": 407 }, { "epoch": 0.3793103448275862, "loss_reasoning": 0.5394951105117798, "loss_utility": 1.625854253768921, "step": 407 }, { "epoch": 0.38024231127679403, "grad_norm": 1.6914811380929877, "learning_rate": 1.940628236106317e-05, "loss": 2.141, "step": 408 }, { "epoch": 0.38024231127679403, "loss_reasoning": 0.5997684001922607, "loss_utility": 1.0540269613265991, "step": 408 }, { "epoch": 0.3811742777260019, "grad_norm": 2.3216414218435815, "learning_rate": 1.939937866758716e-05, "loss": 2.0997, "step": 409 }, { "epoch": 0.3811742777260019, "loss_reasoning": 0.5532622337341309, "loss_utility": 2.571927547454834, "step": 409 }, { "epoch": 0.3821062441752097, "grad_norm": 2.218572867143236, "learning_rate": 1.9392474974111152e-05, "loss": 2.4906, "step": 410 }, { "epoch": 0.3821062441752097, "loss_reasoning": 0.572744607925415, "loss_utility": 1.3385648727416992, "step": 410 }, { "epoch": 0.3830382106244175, "grad_norm": 1.746768940937563, "learning_rate": 1.938557128063514e-05, "loss": 2.2098, "step": 411 }, { "epoch": 0.3830382106244175, "loss_reasoning": 0.6288045048713684, "loss_utility": 1.4773190021514893, "step": 411 }, { "epoch": 0.38397017707362535, "grad_norm": 1.8122399041654753, "learning_rate": 1.9378667587159134e-05, "loss": 2.8007, "step": 412 }, { "epoch": 0.38397017707362535, "loss_reasoning": 0.48114365339279175, "loss_utility": 2.4597082138061523, "step": 412 }, { "epoch": 0.3849021435228332, "grad_norm": 2.317265479668949, "learning_rate": 1.9371763893683123e-05, "loss": 2.6441, "step": 413 }, { "epoch": 0.3849021435228332, "loss_reasoning": 0.5127997398376465, "loss_utility": 1.0675197839736938, "step": 413 }, { "epoch": 0.385834109972041, "grad_norm": 1.3931568714198144, "learning_rate": 1.9364860200207112e-05, "loss": 2.003, "step": 414 }, { "epoch": 0.385834109972041, "loss_reasoning": 0.5993623733520508, "loss_utility": 1.5780274868011475, "step": 414 }, { "epoch": 0.38676607642124883, "grad_norm": 2.232090571909122, "learning_rate": 1.9357956506731104e-05, "loss": 2.1847, "step": 415 }, { "epoch": 0.38676607642124883, "loss_reasoning": 0.5374181866645813, "loss_utility": 1.5950312614440918, "step": 415 }, { "epoch": 0.38769804287045667, "grad_norm": 1.6363130777355515, "learning_rate": 1.9351052813255093e-05, "loss": 2.2318, "step": 416 }, { "epoch": 0.38769804287045667, "loss_reasoning": 0.5125975608825684, "loss_utility": 2.1838178634643555, "step": 416 }, { "epoch": 0.3886300093196645, "grad_norm": 1.6357095330037565, "learning_rate": 1.9344149119779082e-05, "loss": 2.3039, "step": 417 }, { "epoch": 0.3886300093196645, "loss_reasoning": 0.6104772686958313, "loss_utility": 2.0455422401428223, "step": 417 }, { "epoch": 0.3895619757688723, "grad_norm": 1.6564154468066676, "learning_rate": 1.933724542630307e-05, "loss": 2.3994, "step": 418 }, { "epoch": 0.3895619757688723, "loss_reasoning": 0.5359913110733032, "loss_utility": 2.4019322395324707, "step": 418 }, { "epoch": 0.39049394221808015, "grad_norm": 1.7443066512324064, "learning_rate": 1.9330341732827064e-05, "loss": 2.4884, "step": 419 }, { "epoch": 0.39049394221808015, "loss_reasoning": 0.4927031397819519, "loss_utility": 1.7072449922561646, "step": 419 }, { "epoch": 0.391425908667288, "grad_norm": 1.3952325985146428, "learning_rate": 1.9323438039351056e-05, "loss": 2.5389, "step": 420 }, { "epoch": 0.391425908667288, "loss_reasoning": 0.5722787976264954, "loss_utility": 2.155536651611328, "step": 420 }, { "epoch": 0.3923578751164958, "grad_norm": 1.6233900690016094, "learning_rate": 1.9316534345875045e-05, "loss": 2.3835, "step": 421 }, { "epoch": 0.3923578751164958, "loss_reasoning": 0.5828697681427002, "loss_utility": 2.2419469356536865, "step": 421 }, { "epoch": 0.3932898415657036, "grad_norm": 1.6412732969202777, "learning_rate": 1.9309630652399034e-05, "loss": 2.3278, "step": 422 }, { "epoch": 0.3932898415657036, "loss_reasoning": 0.5601851940155029, "loss_utility": 2.169194221496582, "step": 422 }, { "epoch": 0.39422180801491147, "grad_norm": 2.046112589360035, "learning_rate": 1.9302726958923027e-05, "loss": 2.1955, "step": 423 }, { "epoch": 0.39422180801491147, "loss_reasoning": 0.520909309387207, "loss_utility": 2.1438517570495605, "step": 423 }, { "epoch": 0.3951537744641193, "grad_norm": 2.2544889267071886, "learning_rate": 1.9295823265447016e-05, "loss": 2.6735, "step": 424 }, { "epoch": 0.3951537744641193, "loss_reasoning": 0.5300150513648987, "loss_utility": 1.8298423290252686, "step": 424 }, { "epoch": 0.3960857409133271, "grad_norm": 1.3742438011450342, "learning_rate": 1.9288919571971005e-05, "loss": 1.9906, "step": 425 }, { "epoch": 0.3960857409133271, "loss_reasoning": 0.6077154874801636, "loss_utility": 1.3589438199996948, "step": 425 }, { "epoch": 0.39701770736253494, "grad_norm": 1.4169774901604768, "learning_rate": 1.9282015878494997e-05, "loss": 1.8452, "step": 426 }, { "epoch": 0.39701770736253494, "loss_reasoning": 0.539426326751709, "loss_utility": 2.061215877532959, "step": 426 }, { "epoch": 0.3979496738117428, "grad_norm": 1.4745825970128605, "learning_rate": 1.9275112185018986e-05, "loss": 2.2356, "step": 427 }, { "epoch": 0.3979496738117428, "loss_reasoning": 0.533618688583374, "loss_utility": 1.4813542366027832, "step": 427 }, { "epoch": 0.39888164026095063, "grad_norm": 1.7843483120649974, "learning_rate": 1.9268208491542975e-05, "loss": 2.3387, "step": 428 }, { "epoch": 0.39888164026095063, "loss_reasoning": 0.5841073989868164, "loss_utility": 2.4794087409973145, "step": 428 }, { "epoch": 0.3998136067101584, "grad_norm": 1.5975240815065421, "learning_rate": 1.9261304798066968e-05, "loss": 2.4476, "step": 429 }, { "epoch": 0.3998136067101584, "loss_reasoning": 0.5092024207115173, "loss_utility": 0.6753746271133423, "step": 429 }, { "epoch": 0.40074557315936626, "grad_norm": 1.5358909214264287, "learning_rate": 1.925440110459096e-05, "loss": 2.3276, "step": 430 }, { "epoch": 0.40074557315936626, "loss_reasoning": 0.5198483467102051, "loss_utility": 2.2636899948120117, "step": 430 }, { "epoch": 0.4016775396085741, "grad_norm": 1.6926049125110192, "learning_rate": 1.924749741111495e-05, "loss": 2.6734, "step": 431 }, { "epoch": 0.4016775396085741, "loss_reasoning": 0.591281533241272, "loss_utility": 1.429164171218872, "step": 431 }, { "epoch": 0.4026095060577819, "grad_norm": 1.4729567852550671, "learning_rate": 1.9240593717638938e-05, "loss": 1.9768, "step": 432 }, { "epoch": 0.4026095060577819, "loss_reasoning": 0.6019313931465149, "loss_utility": 2.6504006385803223, "step": 432 }, { "epoch": 0.40354147250698974, "grad_norm": 1.9505193963215623, "learning_rate": 1.9233690024162927e-05, "loss": 2.8005, "step": 433 }, { "epoch": 0.40354147250698974, "loss_reasoning": 0.6019761562347412, "loss_utility": 1.2908660173416138, "step": 433 }, { "epoch": 0.4044734389561976, "grad_norm": 1.683334750777218, "learning_rate": 1.922678633068692e-05, "loss": 2.1912, "step": 434 }, { "epoch": 0.4044734389561976, "loss_reasoning": 0.5775461196899414, "loss_utility": 1.2088170051574707, "step": 434 }, { "epoch": 0.40540540540540543, "grad_norm": 1.6356466035752903, "learning_rate": 1.921988263721091e-05, "loss": 2.1352, "step": 435 }, { "epoch": 0.40540540540540543, "loss_reasoning": 0.5674018859863281, "loss_utility": 1.8334264755249023, "step": 435 }, { "epoch": 0.4063373718546132, "grad_norm": 1.9357763503540857, "learning_rate": 1.9212978943734898e-05, "loss": 2.6396, "step": 436 }, { "epoch": 0.4063373718546132, "loss_reasoning": 0.6042214035987854, "loss_utility": 1.6251134872436523, "step": 436 }, { "epoch": 0.40726933830382106, "grad_norm": 2.0713181344483123, "learning_rate": 1.920607525025889e-05, "loss": 2.45, "step": 437 }, { "epoch": 0.40726933830382106, "loss_reasoning": 0.5719916224479675, "loss_utility": 1.6332062482833862, "step": 437 }, { "epoch": 0.4082013047530289, "grad_norm": 2.064757876156926, "learning_rate": 1.919917155678288e-05, "loss": 2.3941, "step": 438 }, { "epoch": 0.4082013047530289, "loss_reasoning": 0.4937470853328705, "loss_utility": 1.4379346370697021, "step": 438 }, { "epoch": 0.4091332712022367, "grad_norm": 1.4992681525030145, "learning_rate": 1.919226786330687e-05, "loss": 2.1927, "step": 439 }, { "epoch": 0.4091332712022367, "loss_reasoning": 0.5701416730880737, "loss_utility": 1.6243867874145508, "step": 439 }, { "epoch": 0.41006523765144454, "grad_norm": 3.052576604499769, "learning_rate": 1.918536416983086e-05, "loss": 2.38, "step": 440 }, { "epoch": 0.41006523765144454, "loss_reasoning": 0.5704823732376099, "loss_utility": 1.395397424697876, "step": 440 }, { "epoch": 0.4109972041006524, "grad_norm": 1.5242653353476165, "learning_rate": 1.9178460476354853e-05, "loss": 2.403, "step": 441 }, { "epoch": 0.4109972041006524, "loss_reasoning": 0.5906441807746887, "loss_utility": 1.7926079034805298, "step": 441 }, { "epoch": 0.4119291705498602, "grad_norm": 1.9849625429598903, "learning_rate": 1.9171556782878842e-05, "loss": 2.3683, "step": 442 }, { "epoch": 0.4119291705498602, "loss_reasoning": 0.5285156965255737, "loss_utility": 2.036433696746826, "step": 442 }, { "epoch": 0.412861136999068, "grad_norm": 1.7402116620894617, "learning_rate": 1.916465308940283e-05, "loss": 2.2478, "step": 443 }, { "epoch": 0.412861136999068, "loss_reasoning": 0.5108894109725952, "loss_utility": 1.5937601327896118, "step": 443 }, { "epoch": 0.41379310344827586, "grad_norm": 1.8538246500548543, "learning_rate": 1.9157749395926824e-05, "loss": 2.5048, "step": 444 }, { "epoch": 0.41379310344827586, "loss_reasoning": 0.5821850299835205, "loss_utility": 2.0356171131134033, "step": 444 }, { "epoch": 0.4147250698974837, "grad_norm": 2.0368022880663825, "learning_rate": 1.9150845702450813e-05, "loss": 2.4403, "step": 445 }, { "epoch": 0.4147250698974837, "loss_reasoning": 0.6206262111663818, "loss_utility": 2.3102195262908936, "step": 445 }, { "epoch": 0.41565703634669154, "grad_norm": 1.6357005731438845, "learning_rate": 1.91439420089748e-05, "loss": 2.5368, "step": 446 }, { "epoch": 0.41565703634669154, "loss_reasoning": 0.5664560794830322, "loss_utility": 2.001248598098755, "step": 446 }, { "epoch": 0.41658900279589933, "grad_norm": 2.215587741739171, "learning_rate": 1.9137038315498794e-05, "loss": 2.3694, "step": 447 }, { "epoch": 0.41658900279589933, "loss_reasoning": 0.5703977942466736, "loss_utility": 1.2793996334075928, "step": 447 }, { "epoch": 0.4175209692451072, "grad_norm": 1.3635936850792123, "learning_rate": 1.9130134622022783e-05, "loss": 2.0537, "step": 448 }, { "epoch": 0.4175209692451072, "loss_reasoning": 0.5534675121307373, "loss_utility": 3.1286840438842773, "step": 448 }, { "epoch": 0.418452935694315, "grad_norm": 1.6894975946565454, "learning_rate": 1.9123230928546776e-05, "loss": 2.6933, "step": 449 }, { "epoch": 0.418452935694315, "loss_reasoning": 0.5234108567237854, "loss_utility": 1.9252631664276123, "step": 449 }, { "epoch": 0.4193849021435228, "grad_norm": 2.1929507176183596, "learning_rate": 1.9116327235070765e-05, "loss": 2.4417, "step": 450 }, { "epoch": 0.4193849021435228, "loss_reasoning": 0.532953679561615, "loss_utility": 1.9115194082260132, "step": 450 }, { "epoch": 0.42031686859273065, "grad_norm": 1.6492569009524003, "learning_rate": 1.9109423541594754e-05, "loss": 2.3932, "step": 451 }, { "epoch": 0.42031686859273065, "loss_reasoning": 0.5538425445556641, "loss_utility": 1.9008164405822754, "step": 451 }, { "epoch": 0.4212488350419385, "grad_norm": 1.7783791116743428, "learning_rate": 1.9102519848118746e-05, "loss": 2.4716, "step": 452 }, { "epoch": 0.4212488350419385, "loss_reasoning": 0.5865309834480286, "loss_utility": 2.947524070739746, "step": 452 }, { "epoch": 0.42218080149114634, "grad_norm": 1.673026784212426, "learning_rate": 1.9095616154642735e-05, "loss": 2.663, "step": 453 }, { "epoch": 0.42218080149114634, "loss_reasoning": 0.5320645570755005, "loss_utility": 2.0689821243286133, "step": 453 }, { "epoch": 0.42311276794035413, "grad_norm": 1.3777888381611605, "learning_rate": 1.9088712461166724e-05, "loss": 2.1754, "step": 454 }, { "epoch": 0.42311276794035413, "loss_reasoning": 0.5840680599212646, "loss_utility": 1.9980530738830566, "step": 454 }, { "epoch": 0.424044734389562, "grad_norm": 1.4906924648357531, "learning_rate": 1.9081808767690717e-05, "loss": 2.3535, "step": 455 }, { "epoch": 0.424044734389562, "loss_reasoning": 0.5501950979232788, "loss_utility": 1.463918685913086, "step": 455 }, { "epoch": 0.4249767008387698, "grad_norm": 1.5172960505670725, "learning_rate": 1.9074905074214706e-05, "loss": 2.3841, "step": 456 }, { "epoch": 0.4249767008387698, "loss_reasoning": 0.5486850142478943, "loss_utility": 1.7036442756652832, "step": 456 }, { "epoch": 0.42590866728797766, "grad_norm": 1.6991917448461344, "learning_rate": 1.9068001380738698e-05, "loss": 2.4121, "step": 457 }, { "epoch": 0.42590866728797766, "loss_reasoning": 0.6931982040405273, "loss_utility": 1.500330924987793, "step": 457 }, { "epoch": 0.42684063373718545, "grad_norm": 1.8258234105307476, "learning_rate": 1.9061097687262687e-05, "loss": 2.2103, "step": 458 }, { "epoch": 0.42684063373718545, "loss_reasoning": 0.5768232345581055, "loss_utility": 2.2247891426086426, "step": 458 }, { "epoch": 0.4277726001863933, "grad_norm": 2.375280655996432, "learning_rate": 1.905419399378668e-05, "loss": 2.4616, "step": 459 }, { "epoch": 0.4277726001863933, "loss_reasoning": 0.5381778478622437, "loss_utility": 2.1892337799072266, "step": 459 }, { "epoch": 0.42870456663560114, "grad_norm": 2.0505663051596446, "learning_rate": 1.904729030031067e-05, "loss": 2.4018, "step": 460 }, { "epoch": 0.42870456663560114, "loss_reasoning": 0.5692301988601685, "loss_utility": 1.7493822574615479, "step": 460 }, { "epoch": 0.4296365330848089, "grad_norm": 1.7375020030930899, "learning_rate": 1.9040386606834658e-05, "loss": 2.2555, "step": 461 }, { "epoch": 0.4296365330848089, "loss_reasoning": 0.5411767959594727, "loss_utility": 1.8144924640655518, "step": 461 }, { "epoch": 0.43056849953401677, "grad_norm": 1.8368296321994435, "learning_rate": 1.9033482913358647e-05, "loss": 2.3787, "step": 462 }, { "epoch": 0.43056849953401677, "loss_reasoning": 0.49839240312576294, "loss_utility": 0.7463089227676392, "step": 462 }, { "epoch": 0.4315004659832246, "grad_norm": 1.2926697604692214, "learning_rate": 1.902657921988264e-05, "loss": 1.9578, "step": 463 }, { "epoch": 0.4315004659832246, "loss_reasoning": 0.5247158408164978, "loss_utility": 1.8277523517608643, "step": 463 }, { "epoch": 0.43243243243243246, "grad_norm": 1.7336301379172825, "learning_rate": 1.9019675526406628e-05, "loss": 2.439, "step": 464 }, { "epoch": 0.43243243243243246, "loss_reasoning": 0.5855495929718018, "loss_utility": 1.7344452142715454, "step": 464 }, { "epoch": 0.43336439888164024, "grad_norm": 1.548712037966106, "learning_rate": 1.901277183293062e-05, "loss": 2.6123, "step": 465 }, { "epoch": 0.43336439888164024, "loss_reasoning": 0.5684932470321655, "loss_utility": 1.1755573749542236, "step": 465 }, { "epoch": 0.4342963653308481, "grad_norm": 1.6567647292450318, "learning_rate": 1.900586813945461e-05, "loss": 2.1544, "step": 466 }, { "epoch": 0.4342963653308481, "loss_reasoning": 0.5415317416191101, "loss_utility": 1.602937936782837, "step": 466 }, { "epoch": 0.43522833178005593, "grad_norm": 2.3208812596540014, "learning_rate": 1.8998964445978602e-05, "loss": 2.2131, "step": 467 }, { "epoch": 0.43522833178005593, "loss_reasoning": 0.5177328586578369, "loss_utility": 1.8547695875167847, "step": 467 }, { "epoch": 0.4361602982292637, "grad_norm": 3.065941756208167, "learning_rate": 1.899206075250259e-05, "loss": 2.5605, "step": 468 }, { "epoch": 0.4361602982292637, "loss_reasoning": 0.5666568279266357, "loss_utility": 2.0750255584716797, "step": 468 }, { "epoch": 0.43709226467847156, "grad_norm": 1.4603573744382827, "learning_rate": 1.898515705902658e-05, "loss": 2.2689, "step": 469 }, { "epoch": 0.43709226467847156, "loss_reasoning": 0.539811909198761, "loss_utility": 1.8817058801651, "step": 469 }, { "epoch": 0.4380242311276794, "grad_norm": 1.600012950977941, "learning_rate": 1.8978253365550572e-05, "loss": 2.4535, "step": 470 }, { "epoch": 0.4380242311276794, "loss_reasoning": 0.4999922513961792, "loss_utility": 2.9225106239318848, "step": 470 }, { "epoch": 0.43895619757688725, "grad_norm": 1.9799914454286, "learning_rate": 1.897134967207456e-05, "loss": 2.6163, "step": 471 }, { "epoch": 0.43895619757688725, "loss_reasoning": 0.5991474390029907, "loss_utility": 1.4920662641525269, "step": 471 }, { "epoch": 0.43988816402609504, "grad_norm": 1.544332668206904, "learning_rate": 1.896444597859855e-05, "loss": 2.4547, "step": 472 }, { "epoch": 0.43988816402609504, "loss_reasoning": 0.5158244371414185, "loss_utility": 1.726870059967041, "step": 472 }, { "epoch": 0.4408201304753029, "grad_norm": 1.5424383425642623, "learning_rate": 1.895754228512254e-05, "loss": 2.6592, "step": 473 }, { "epoch": 0.4408201304753029, "loss_reasoning": 0.511902928352356, "loss_utility": 1.3265752792358398, "step": 473 }, { "epoch": 0.44175209692451073, "grad_norm": 1.3590299830904302, "learning_rate": 1.8950638591646532e-05, "loss": 1.877, "step": 474 }, { "epoch": 0.44175209692451073, "loss_reasoning": 0.48955637216567993, "loss_utility": 2.36626935005188, "step": 474 }, { "epoch": 0.4426840633737186, "grad_norm": 1.803831771160611, "learning_rate": 1.894373489817052e-05, "loss": 2.6493, "step": 475 }, { "epoch": 0.4426840633737186, "loss_reasoning": 0.5574763417243958, "loss_utility": 1.4141125679016113, "step": 475 }, { "epoch": 0.44361602982292636, "grad_norm": 1.4209645377571856, "learning_rate": 1.8936831204694513e-05, "loss": 1.9675, "step": 476 }, { "epoch": 0.44361602982292636, "loss_reasoning": 0.5902303457260132, "loss_utility": 1.907254934310913, "step": 476 }, { "epoch": 0.4445479962721342, "grad_norm": 1.4066785854634156, "learning_rate": 1.8929927511218506e-05, "loss": 2.2632, "step": 477 }, { "epoch": 0.4445479962721342, "loss_reasoning": 0.5466403365135193, "loss_utility": 1.6801623106002808, "step": 477 }, { "epoch": 0.44547996272134205, "grad_norm": 2.257769257236011, "learning_rate": 1.8923023817742495e-05, "loss": 2.32, "step": 478 }, { "epoch": 0.44547996272134205, "loss_reasoning": 0.5303167104721069, "loss_utility": 2.1315605640411377, "step": 478 }, { "epoch": 0.44641192917054984, "grad_norm": 1.5250569398995824, "learning_rate": 1.8916120124266484e-05, "loss": 2.2089, "step": 479 }, { "epoch": 0.44641192917054984, "loss_reasoning": 0.5288663506507874, "loss_utility": 1.964674472808838, "step": 479 }, { "epoch": 0.4473438956197577, "grad_norm": 1.8591027759894736, "learning_rate": 1.8909216430790473e-05, "loss": 2.0781, "step": 480 }, { "epoch": 0.4473438956197577, "loss_reasoning": 0.5691120028495789, "loss_utility": 1.9120466709136963, "step": 480 }, { "epoch": 0.4482758620689655, "grad_norm": 1.3614408863352028, "learning_rate": 1.8902312737314465e-05, "loss": 2.0387, "step": 481 }, { "epoch": 0.4482758620689655, "loss_reasoning": 0.5608319044113159, "loss_utility": 1.9319318532943726, "step": 481 }, { "epoch": 0.44920782851817337, "grad_norm": 1.680463354890364, "learning_rate": 1.8895409043838454e-05, "loss": 2.0455, "step": 482 }, { "epoch": 0.44920782851817337, "loss_reasoning": 0.5565509796142578, "loss_utility": 1.4715439081192017, "step": 482 }, { "epoch": 0.45013979496738116, "grad_norm": 1.7513144794224516, "learning_rate": 1.8888505350362443e-05, "loss": 2.7106, "step": 483 }, { "epoch": 0.45013979496738116, "loss_reasoning": 0.5364823341369629, "loss_utility": 2.296506643295288, "step": 483 }, { "epoch": 0.451071761416589, "grad_norm": 1.7674587338503307, "learning_rate": 1.8881601656886436e-05, "loss": 2.1854, "step": 484 }, { "epoch": 0.451071761416589, "loss_reasoning": 0.5672512054443359, "loss_utility": 2.107065200805664, "step": 484 }, { "epoch": 0.45200372786579684, "grad_norm": 1.6119859495027242, "learning_rate": 1.8874697963410425e-05, "loss": 2.2521, "step": 485 }, { "epoch": 0.45200372786579684, "loss_reasoning": 0.5247405767440796, "loss_utility": 1.5862460136413574, "step": 485 }, { "epoch": 0.45293569431500463, "grad_norm": 1.4947766017613586, "learning_rate": 1.8867794269934417e-05, "loss": 2.3229, "step": 486 }, { "epoch": 0.45293569431500463, "loss_reasoning": 0.5803564786911011, "loss_utility": 1.8986382484436035, "step": 486 }, { "epoch": 0.4538676607642125, "grad_norm": 1.62912787419585, "learning_rate": 1.8860890576458406e-05, "loss": 2.3895, "step": 487 }, { "epoch": 0.4538676607642125, "loss_reasoning": 0.5334932804107666, "loss_utility": 0.7677233219146729, "step": 487 }, { "epoch": 0.4547996272134203, "grad_norm": 1.4696144611903064, "learning_rate": 1.88539868829824e-05, "loss": 2.3166, "step": 488 }, { "epoch": 0.4547996272134203, "loss_reasoning": 0.5540090799331665, "loss_utility": 1.3468292951583862, "step": 488 }, { "epoch": 0.45573159366262816, "grad_norm": 1.5008000844114313, "learning_rate": 1.8847083189506388e-05, "loss": 1.7878, "step": 489 }, { "epoch": 0.45573159366262816, "loss_reasoning": 0.4810277819633484, "loss_utility": 2.3988184928894043, "step": 489 }, { "epoch": 0.45666356011183595, "grad_norm": 1.925616894826914, "learning_rate": 1.8840179496030377e-05, "loss": 2.3375, "step": 490 }, { "epoch": 0.45666356011183595, "loss_reasoning": 0.5272434949874878, "loss_utility": 1.6896165609359741, "step": 490 }, { "epoch": 0.4575955265610438, "grad_norm": 1.6867660050021445, "learning_rate": 1.8833275802554366e-05, "loss": 2.2045, "step": 491 }, { "epoch": 0.4575955265610438, "loss_reasoning": 0.5172989368438721, "loss_utility": 2.3214101791381836, "step": 491 }, { "epoch": 0.45852749301025164, "grad_norm": 1.813146593943155, "learning_rate": 1.882637210907836e-05, "loss": 2.5306, "step": 492 }, { "epoch": 0.45852749301025164, "loss_reasoning": 0.5088673233985901, "loss_utility": 2.1245837211608887, "step": 492 }, { "epoch": 0.4594594594594595, "grad_norm": 1.7087119674916558, "learning_rate": 1.8819468415602347e-05, "loss": 2.5689, "step": 493 }, { "epoch": 0.4594594594594595, "loss_reasoning": 0.5508376359939575, "loss_utility": 1.740755558013916, "step": 493 }, { "epoch": 0.4603914259086673, "grad_norm": 1.542766201015873, "learning_rate": 1.881256472212634e-05, "loss": 2.5267, "step": 494 }, { "epoch": 0.4603914259086673, "loss_reasoning": 0.5289093852043152, "loss_utility": 1.6583874225616455, "step": 494 }, { "epoch": 0.4613233923578751, "grad_norm": 1.6280632370653172, "learning_rate": 1.880566102865033e-05, "loss": 2.4132, "step": 495 }, { "epoch": 0.4613233923578751, "loss_reasoning": 0.4952707886695862, "loss_utility": 1.4022479057312012, "step": 495 }, { "epoch": 0.46225535880708296, "grad_norm": 1.6487857079807846, "learning_rate": 1.879875733517432e-05, "loss": 2.3497, "step": 496 }, { "epoch": 0.46225535880708296, "loss_reasoning": 0.5691220164299011, "loss_utility": 1.845024585723877, "step": 496 }, { "epoch": 0.46318732525629075, "grad_norm": 1.7103481252105213, "learning_rate": 1.879185364169831e-05, "loss": 2.4837, "step": 497 }, { "epoch": 0.46318732525629075, "loss_reasoning": 0.5404665470123291, "loss_utility": 1.339310646057129, "step": 497 }, { "epoch": 0.4641192917054986, "grad_norm": 1.5373734000215968, "learning_rate": 1.87849499482223e-05, "loss": 1.6723, "step": 498 }, { "epoch": 0.4641192917054986, "loss_reasoning": 0.542253851890564, "loss_utility": 1.4994906187057495, "step": 498 }, { "epoch": 0.46505125815470644, "grad_norm": 1.4557731382314374, "learning_rate": 1.8778046254746292e-05, "loss": 2.327, "step": 499 }, { "epoch": 0.46505125815470644, "loss_reasoning": 0.5104687809944153, "loss_utility": 2.6482794284820557, "step": 499 }, { "epoch": 0.4659832246039143, "grad_norm": 1.9082835656745585, "learning_rate": 1.877114256127028e-05, "loss": 2.3559, "step": 500 }, { "epoch": 0.4659832246039143, "loss_reasoning": 0.510610818862915, "loss_utility": 1.5204312801361084, "step": 500 }, { "epoch": 0.46691519105312207, "grad_norm": 1.9565385481788609, "learning_rate": 1.876423886779427e-05, "loss": 2.4788, "step": 501 }, { "epoch": 0.46691519105312207, "loss_reasoning": 0.5712047815322876, "loss_utility": 2.131093978881836, "step": 501 }, { "epoch": 0.4678471575023299, "grad_norm": 1.5834959449740078, "learning_rate": 1.8757335174318262e-05, "loss": 2.5624, "step": 502 }, { "epoch": 0.4678471575023299, "loss_reasoning": 0.49253594875335693, "loss_utility": 1.731813669204712, "step": 502 }, { "epoch": 0.46877912395153776, "grad_norm": 2.542581732033682, "learning_rate": 1.875043148084225e-05, "loss": 2.2204, "step": 503 }, { "epoch": 0.46877912395153776, "loss_reasoning": 0.5172770023345947, "loss_utility": 2.0107853412628174, "step": 503 }, { "epoch": 0.46971109040074555, "grad_norm": 1.569539172456591, "learning_rate": 1.8743527787366244e-05, "loss": 2.2056, "step": 504 }, { "epoch": 0.46971109040074555, "loss_reasoning": 0.49100562930107117, "loss_utility": 1.2078943252563477, "step": 504 }, { "epoch": 0.4706430568499534, "grad_norm": 1.5822046531061091, "learning_rate": 1.8736624093890233e-05, "loss": 2.0821, "step": 505 }, { "epoch": 0.4706430568499534, "loss_reasoning": 0.6176034212112427, "loss_utility": 1.1041443347930908, "step": 505 }, { "epoch": 0.47157502329916123, "grad_norm": 1.9093348699239985, "learning_rate": 1.8729720400414225e-05, "loss": 2.0257, "step": 506 }, { "epoch": 0.47157502329916123, "loss_reasoning": 0.5250317454338074, "loss_utility": 1.9148695468902588, "step": 506 }, { "epoch": 0.4725069897483691, "grad_norm": 2.073503597432921, "learning_rate": 1.8722816706938214e-05, "loss": 2.5134, "step": 507 }, { "epoch": 0.4725069897483691, "loss_reasoning": 0.522463858127594, "loss_utility": 2.0770936012268066, "step": 507 }, { "epoch": 0.47343895619757687, "grad_norm": 1.501161873893621, "learning_rate": 1.8715913013462203e-05, "loss": 2.3663, "step": 508 }, { "epoch": 0.47343895619757687, "loss_reasoning": 0.6098424196243286, "loss_utility": 2.0779025554656982, "step": 508 }, { "epoch": 0.4743709226467847, "grad_norm": 1.513973463377085, "learning_rate": 1.8709009319986192e-05, "loss": 2.2565, "step": 509 }, { "epoch": 0.4743709226467847, "loss_reasoning": 0.5170716643333435, "loss_utility": 1.3129181861877441, "step": 509 }, { "epoch": 0.47530288909599255, "grad_norm": 1.4637444758532236, "learning_rate": 1.8702105626510185e-05, "loss": 2.0578, "step": 510 }, { "epoch": 0.47530288909599255, "loss_reasoning": 0.4711269438266754, "loss_utility": 1.613591194152832, "step": 510 }, { "epoch": 0.4762348555452004, "grad_norm": 1.635313566239505, "learning_rate": 1.8695201933034174e-05, "loss": 2.0757, "step": 511 }, { "epoch": 0.4762348555452004, "loss_reasoning": 0.5129826068878174, "loss_utility": 1.4206955432891846, "step": 511 }, { "epoch": 0.4771668219944082, "grad_norm": 1.5336339425283783, "learning_rate": 1.8688298239558166e-05, "loss": 2.1056, "step": 512 }, { "epoch": 0.4771668219944082, "loss_reasoning": 0.5539647340774536, "loss_utility": 1.552317500114441, "step": 512 }, { "epoch": 0.47809878844361603, "grad_norm": 1.507338447280842, "learning_rate": 1.8681394546082155e-05, "loss": 2.0736, "step": 513 }, { "epoch": 0.47809878844361603, "loss_reasoning": 0.4964175522327423, "loss_utility": 1.4726002216339111, "step": 513 }, { "epoch": 0.4790307548928239, "grad_norm": 1.3918772363009015, "learning_rate": 1.8674490852606148e-05, "loss": 2.2267, "step": 514 }, { "epoch": 0.4790307548928239, "loss_reasoning": 0.5465246438980103, "loss_utility": 1.5949699878692627, "step": 514 }, { "epoch": 0.47996272134203166, "grad_norm": 1.5766584695412829, "learning_rate": 1.8667587159130137e-05, "loss": 2.2549, "step": 515 }, { "epoch": 0.47996272134203166, "loss_reasoning": 0.5383058190345764, "loss_utility": 2.0822157859802246, "step": 515 }, { "epoch": 0.4808946877912395, "grad_norm": 1.5061181083281994, "learning_rate": 1.8660683465654126e-05, "loss": 2.1903, "step": 516 }, { "epoch": 0.4808946877912395, "loss_reasoning": 0.559635579586029, "loss_utility": 2.1061153411865234, "step": 516 }, { "epoch": 0.48182665424044735, "grad_norm": 1.7307503256108998, "learning_rate": 1.8653779772178118e-05, "loss": 2.2753, "step": 517 }, { "epoch": 0.48182665424044735, "loss_reasoning": 0.49253812432289124, "loss_utility": 1.3341014385223389, "step": 517 }, { "epoch": 0.4827586206896552, "grad_norm": 1.6701800055099876, "learning_rate": 1.8646876078702107e-05, "loss": 2.1616, "step": 518 }, { "epoch": 0.4827586206896552, "loss_reasoning": 0.5314903855323792, "loss_utility": 2.1009716987609863, "step": 518 }, { "epoch": 0.483690587138863, "grad_norm": 1.535996188603238, "learning_rate": 1.8639972385226096e-05, "loss": 2.3567, "step": 519 }, { "epoch": 0.483690587138863, "loss_reasoning": 0.5267855525016785, "loss_utility": 1.9222886562347412, "step": 519 }, { "epoch": 0.4846225535880708, "grad_norm": 2.245415930833498, "learning_rate": 1.8633068691750085e-05, "loss": 2.3905, "step": 520 }, { "epoch": 0.4846225535880708, "loss_reasoning": 0.5535228252410889, "loss_utility": 1.801855444908142, "step": 520 }, { "epoch": 0.48555452003727867, "grad_norm": 1.5059320869745045, "learning_rate": 1.8626164998274078e-05, "loss": 2.1094, "step": 521 }, { "epoch": 0.48555452003727867, "loss_reasoning": 0.5126775503158569, "loss_utility": 2.1543684005737305, "step": 521 }, { "epoch": 0.4864864864864865, "grad_norm": 1.375408499401654, "learning_rate": 1.861926130479807e-05, "loss": 2.4607, "step": 522 }, { "epoch": 0.4864864864864865, "loss_reasoning": 0.4775778651237488, "loss_utility": 2.0441324710845947, "step": 522 }, { "epoch": 0.4874184529356943, "grad_norm": 1.5347841385791094, "learning_rate": 1.861235761132206e-05, "loss": 2.2872, "step": 523 }, { "epoch": 0.4874184529356943, "loss_reasoning": 0.619217574596405, "loss_utility": 1.329683542251587, "step": 523 }, { "epoch": 0.48835041938490215, "grad_norm": 1.561021346742499, "learning_rate": 1.860545391784605e-05, "loss": 2.239, "step": 524 }, { "epoch": 0.48835041938490215, "loss_reasoning": 0.5182191133499146, "loss_utility": 1.3200795650482178, "step": 524 }, { "epoch": 0.48928238583411, "grad_norm": 1.7233544321233993, "learning_rate": 1.859855022437004e-05, "loss": 2.3657, "step": 525 }, { "epoch": 0.48928238583411, "loss_reasoning": 0.5498396158218384, "loss_utility": 0.5957842469215393, "step": 525 }, { "epoch": 0.4902143522833178, "grad_norm": 2.4066719623872244, "learning_rate": 1.859164653089403e-05, "loss": 2.2199, "step": 526 }, { "epoch": 0.4902143522833178, "loss_reasoning": 0.5385034084320068, "loss_utility": 2.331890821456909, "step": 526 }, { "epoch": 0.4911463187325256, "grad_norm": 1.5534316435451858, "learning_rate": 1.858474283741802e-05, "loss": 2.5658, "step": 527 }, { "epoch": 0.4911463187325256, "loss_reasoning": 0.4968588650226593, "loss_utility": 1.5536634922027588, "step": 527 }, { "epoch": 0.49207828518173347, "grad_norm": 1.8131304523181908, "learning_rate": 1.857783914394201e-05, "loss": 2.077, "step": 528 }, { "epoch": 0.49207828518173347, "loss_reasoning": 0.5801815390586853, "loss_utility": 1.732991099357605, "step": 528 }, { "epoch": 0.4930102516309413, "grad_norm": 1.7839225438462807, "learning_rate": 1.8570935450466e-05, "loss": 2.5389, "step": 529 }, { "epoch": 0.4930102516309413, "loss_reasoning": 0.525707483291626, "loss_utility": 1.9275010824203491, "step": 529 }, { "epoch": 0.4939422180801491, "grad_norm": 1.7441673852545847, "learning_rate": 1.856403175698999e-05, "loss": 2.5555, "step": 530 }, { "epoch": 0.4939422180801491, "loss_reasoning": 0.5170484185218811, "loss_utility": 1.0044485330581665, "step": 530 }, { "epoch": 0.49487418452935694, "grad_norm": 2.0835134534548576, "learning_rate": 1.8557128063513982e-05, "loss": 2.426, "step": 531 }, { "epoch": 0.49487418452935694, "loss_reasoning": 0.523110032081604, "loss_utility": 1.9385497570037842, "step": 531 }, { "epoch": 0.4958061509785648, "grad_norm": 1.4752701446243666, "learning_rate": 1.8550224370037974e-05, "loss": 2.0641, "step": 532 }, { "epoch": 0.4958061509785648, "loss_reasoning": 0.6504535675048828, "loss_utility": 1.1854534149169922, "step": 532 }, { "epoch": 0.4967381174277726, "grad_norm": 1.6406149907366683, "learning_rate": 1.8543320676561963e-05, "loss": 2.1298, "step": 533 }, { "epoch": 0.4967381174277726, "loss_reasoning": 0.6799495220184326, "loss_utility": 2.676500082015991, "step": 533 }, { "epoch": 0.4976700838769804, "grad_norm": 1.3569544654557766, "learning_rate": 1.8536416983085952e-05, "loss": 2.5494, "step": 534 }, { "epoch": 0.4976700838769804, "loss_reasoning": 0.5282658338546753, "loss_utility": 1.5799736976623535, "step": 534 }, { "epoch": 0.49860205032618826, "grad_norm": 2.518540173782443, "learning_rate": 1.8529513289609945e-05, "loss": 1.9094, "step": 535 }, { "epoch": 0.49860205032618826, "loss_reasoning": 0.5269996523857117, "loss_utility": 1.2518551349639893, "step": 535 }, { "epoch": 0.4995340167753961, "grad_norm": 1.9630322315307642, "learning_rate": 1.8522609596133934e-05, "loss": 2.1508, "step": 536 }, { "epoch": 0.4995340167753961, "loss_reasoning": 0.5167027711868286, "loss_utility": 1.5381708145141602, "step": 536 }, { "epoch": 0.5004659832246039, "grad_norm": 1.417458763873134, "learning_rate": 1.8515705902657923e-05, "loss": 2.0918, "step": 537 }, { "epoch": 0.5004659832246039, "loss_reasoning": 0.6240420937538147, "loss_utility": 2.1083779335021973, "step": 537 }, { "epoch": 0.5013979496738118, "grad_norm": 1.8251278372345148, "learning_rate": 1.8508802209181912e-05, "loss": 1.9931, "step": 538 }, { "epoch": 0.5013979496738118, "loss_reasoning": 0.5333184003829956, "loss_utility": 1.5709905624389648, "step": 538 }, { "epoch": 0.5023299161230196, "grad_norm": 1.6007533739252395, "learning_rate": 1.8501898515705904e-05, "loss": 2.276, "step": 539 }, { "epoch": 0.5023299161230196, "loss_reasoning": 0.5186084508895874, "loss_utility": 1.887267827987671, "step": 539 }, { "epoch": 0.5032618825722274, "grad_norm": 1.5891028541317351, "learning_rate": 1.8494994822229893e-05, "loss": 2.481, "step": 540 }, { "epoch": 0.5032618825722274, "loss_reasoning": 0.5352290868759155, "loss_utility": 2.0980143547058105, "step": 540 }, { "epoch": 0.5041938490214353, "grad_norm": 1.7551768617009202, "learning_rate": 1.8488091128753886e-05, "loss": 2.4032, "step": 541 }, { "epoch": 0.5041938490214353, "loss_reasoning": 0.5479936003684998, "loss_utility": 2.5999813079833984, "step": 541 }, { "epoch": 0.5051258154706431, "grad_norm": 1.8975148954961643, "learning_rate": 1.8481187435277875e-05, "loss": 2.3954, "step": 542 }, { "epoch": 0.5051258154706431, "loss_reasoning": 0.5117115378379822, "loss_utility": 1.9274511337280273, "step": 542 }, { "epoch": 0.5060577819198508, "grad_norm": 1.96488675259493, "learning_rate": 1.8474283741801867e-05, "loss": 2.3396, "step": 543 }, { "epoch": 0.5060577819198508, "loss_reasoning": 0.5076459646224976, "loss_utility": 1.3361555337905884, "step": 543 }, { "epoch": 0.5069897483690587, "grad_norm": 1.4057542569173447, "learning_rate": 1.8467380048325856e-05, "loss": 2.0932, "step": 544 }, { "epoch": 0.5069897483690587, "loss_reasoning": 0.5007374286651611, "loss_utility": 1.9312946796417236, "step": 544 }, { "epoch": 0.5079217148182665, "grad_norm": 1.8051176043591513, "learning_rate": 1.8460476354849845e-05, "loss": 2.3122, "step": 545 }, { "epoch": 0.5079217148182665, "loss_reasoning": 0.5222713351249695, "loss_utility": 2.844336748123169, "step": 545 }, { "epoch": 0.5088536812674743, "grad_norm": 1.9679235965986683, "learning_rate": 1.8453572661373838e-05, "loss": 2.7987, "step": 546 }, { "epoch": 0.5088536812674743, "loss_reasoning": 0.5414530634880066, "loss_utility": 2.292663812637329, "step": 546 }, { "epoch": 0.5097856477166822, "grad_norm": 1.6764200691076168, "learning_rate": 1.8446668967897827e-05, "loss": 2.3874, "step": 547 }, { "epoch": 0.5097856477166822, "loss_reasoning": 0.5010660886764526, "loss_utility": 1.90609610080719, "step": 547 }, { "epoch": 0.51071761416589, "grad_norm": 1.3516822663258876, "learning_rate": 1.8439765274421816e-05, "loss": 2.1768, "step": 548 }, { "epoch": 0.51071761416589, "loss_reasoning": 0.5153045654296875, "loss_utility": 2.153322219848633, "step": 548 }, { "epoch": 0.5116495806150979, "grad_norm": 1.576372716759072, "learning_rate": 1.8432861580945808e-05, "loss": 2.7747, "step": 549 }, { "epoch": 0.5116495806150979, "loss_reasoning": 0.5276798605918884, "loss_utility": 2.121565818786621, "step": 549 }, { "epoch": 0.5125815470643057, "grad_norm": 1.7727752684416065, "learning_rate": 1.8425957887469797e-05, "loss": 2.4186, "step": 550 }, { "epoch": 0.5125815470643057, "loss_reasoning": 0.5663894414901733, "loss_utility": 2.164604425430298, "step": 550 }, { "epoch": 0.5135135135135135, "grad_norm": 1.66398188074087, "learning_rate": 1.841905419399379e-05, "loss": 2.3221, "step": 551 }, { "epoch": 0.5135135135135135, "loss_reasoning": 0.48026949167251587, "loss_utility": 2.0092456340789795, "step": 551 }, { "epoch": 0.5144454799627214, "grad_norm": 2.03194149219543, "learning_rate": 1.841215050051778e-05, "loss": 2.3863, "step": 552 }, { "epoch": 0.5144454799627214, "loss_reasoning": 0.48360198736190796, "loss_utility": 1.5487372875213623, "step": 552 }, { "epoch": 0.5153774464119292, "grad_norm": 1.8911076089552008, "learning_rate": 1.8405246807041768e-05, "loss": 2.2385, "step": 553 }, { "epoch": 0.5153774464119292, "loss_reasoning": 0.5120598673820496, "loss_utility": 1.2317529916763306, "step": 553 }, { "epoch": 0.516309412861137, "grad_norm": 1.8997121895647575, "learning_rate": 1.839834311356576e-05, "loss": 2.2662, "step": 554 }, { "epoch": 0.516309412861137, "loss_reasoning": 0.5612105131149292, "loss_utility": 1.9118540287017822, "step": 554 }, { "epoch": 0.5172413793103449, "grad_norm": 2.0263710628102474, "learning_rate": 1.839143942008975e-05, "loss": 2.4822, "step": 555 }, { "epoch": 0.5172413793103449, "loss_reasoning": 0.6333022713661194, "loss_utility": 1.9370287656784058, "step": 555 }, { "epoch": 0.5181733457595527, "grad_norm": 1.5917917624684759, "learning_rate": 1.8384535726613738e-05, "loss": 2.5001, "step": 556 }, { "epoch": 0.5181733457595527, "loss_reasoning": 0.5476895570755005, "loss_utility": 1.3636627197265625, "step": 556 }, { "epoch": 0.5191053122087604, "grad_norm": 1.4910967899677605, "learning_rate": 1.837763203313773e-05, "loss": 2.3478, "step": 557 }, { "epoch": 0.5191053122087604, "loss_reasoning": 0.6414680480957031, "loss_utility": 1.7090227603912354, "step": 557 }, { "epoch": 0.5200372786579683, "grad_norm": 1.695846014099274, "learning_rate": 1.837072833966172e-05, "loss": 2.3891, "step": 558 }, { "epoch": 0.5200372786579683, "loss_reasoning": 0.5646834969520569, "loss_utility": 1.298687219619751, "step": 558 }, { "epoch": 0.5209692451071761, "grad_norm": 1.5889746305668608, "learning_rate": 1.8363824646185712e-05, "loss": 2.0121, "step": 559 }, { "epoch": 0.5209692451071761, "loss_reasoning": 0.5466631650924683, "loss_utility": 1.5525388717651367, "step": 559 }, { "epoch": 0.5219012115563839, "grad_norm": 1.5959359274526825, "learning_rate": 1.83569209527097e-05, "loss": 2.0458, "step": 560 }, { "epoch": 0.5219012115563839, "loss_reasoning": 0.5140120387077332, "loss_utility": 1.5611786842346191, "step": 560 }, { "epoch": 0.5228331780055918, "grad_norm": 1.4916472889687724, "learning_rate": 1.8350017259233694e-05, "loss": 2.1833, "step": 561 }, { "epoch": 0.5228331780055918, "loss_reasoning": 0.5122215151786804, "loss_utility": 1.4857209920883179, "step": 561 }, { "epoch": 0.5237651444547996, "grad_norm": 1.5171766286232453, "learning_rate": 1.8343113565757683e-05, "loss": 1.9908, "step": 562 }, { "epoch": 0.5237651444547996, "loss_reasoning": 0.5577173233032227, "loss_utility": 1.885443091392517, "step": 562 }, { "epoch": 0.5246971109040075, "grad_norm": 1.412964289171189, "learning_rate": 1.833620987228167e-05, "loss": 2.2567, "step": 563 }, { "epoch": 0.5246971109040075, "loss_reasoning": 0.4801177382469177, "loss_utility": 0.5306960344314575, "step": 563 }, { "epoch": 0.5256290773532153, "grad_norm": 1.3363903434639621, "learning_rate": 1.832930617880566e-05, "loss": 1.9138, "step": 564 }, { "epoch": 0.5256290773532153, "loss_reasoning": 0.5221326351165771, "loss_utility": 1.731379747390747, "step": 564 }, { "epoch": 0.5265610438024231, "grad_norm": 1.3858843461486643, "learning_rate": 1.8322402485329653e-05, "loss": 2.0655, "step": 565 }, { "epoch": 0.5265610438024231, "loss_reasoning": 0.5803823471069336, "loss_utility": 1.6723674535751343, "step": 565 }, { "epoch": 0.527493010251631, "grad_norm": 2.417671956568943, "learning_rate": 1.8315498791853642e-05, "loss": 2.3237, "step": 566 }, { "epoch": 0.527493010251631, "loss_reasoning": 0.571154773235321, "loss_utility": 1.4837534427642822, "step": 566 }, { "epoch": 0.5284249767008388, "grad_norm": 1.6370820441098084, "learning_rate": 1.830859509837763e-05, "loss": 2.203, "step": 567 }, { "epoch": 0.5284249767008388, "loss_reasoning": 0.5412797927856445, "loss_utility": 1.6096899509429932, "step": 567 }, { "epoch": 0.5293569431500466, "grad_norm": 1.4581703093635467, "learning_rate": 1.8301691404901624e-05, "loss": 2.1479, "step": 568 }, { "epoch": 0.5293569431500466, "loss_reasoning": 0.44495415687561035, "loss_utility": 2.407062292098999, "step": 568 }, { "epoch": 0.5302889095992545, "grad_norm": 1.837602369435304, "learning_rate": 1.8294787711425616e-05, "loss": 2.5009, "step": 569 }, { "epoch": 0.5302889095992545, "loss_reasoning": 0.5273081660270691, "loss_utility": 1.3393518924713135, "step": 569 }, { "epoch": 0.5312208760484622, "grad_norm": 1.8916113743879253, "learning_rate": 1.8287884017949605e-05, "loss": 2.4217, "step": 570 }, { "epoch": 0.5312208760484622, "loss_reasoning": 0.5085254907608032, "loss_utility": 1.0283209085464478, "step": 570 }, { "epoch": 0.53215284249767, "grad_norm": 1.4459721534601169, "learning_rate": 1.8280980324473594e-05, "loss": 1.9329, "step": 571 }, { "epoch": 0.53215284249767, "loss_reasoning": 0.5620436668395996, "loss_utility": 1.578200101852417, "step": 571 }, { "epoch": 0.5330848089468779, "grad_norm": 1.5461579640624241, "learning_rate": 1.8274076630997587e-05, "loss": 2.3333, "step": 572 }, { "epoch": 0.5330848089468779, "loss_reasoning": 0.5928046703338623, "loss_utility": 1.6159083843231201, "step": 572 }, { "epoch": 0.5340167753960857, "grad_norm": 1.4511534940877098, "learning_rate": 1.8267172937521576e-05, "loss": 2.5239, "step": 573 }, { "epoch": 0.5340167753960857, "loss_reasoning": 0.4962502121925354, "loss_utility": 1.3584749698638916, "step": 573 }, { "epoch": 0.5349487418452936, "grad_norm": 1.4483566979840605, "learning_rate": 1.8260269244045565e-05, "loss": 2.1364, "step": 574 }, { "epoch": 0.5349487418452936, "loss_reasoning": 0.4859367609024048, "loss_utility": 1.525370478630066, "step": 574 }, { "epoch": 0.5358807082945014, "grad_norm": 1.591406934321644, "learning_rate": 1.8253365550569557e-05, "loss": 2.3496, "step": 575 }, { "epoch": 0.5358807082945014, "loss_reasoning": 0.5068644881248474, "loss_utility": 1.68417489528656, "step": 575 }, { "epoch": 0.5368126747437092, "grad_norm": 1.54232703781871, "learning_rate": 1.8246461857093546e-05, "loss": 2.1292, "step": 576 }, { "epoch": 0.5368126747437092, "loss_reasoning": 0.5277612805366516, "loss_utility": 0.892594575881958, "step": 576 }, { "epoch": 0.5377446411929171, "grad_norm": 1.4372974837371206, "learning_rate": 1.8239558163617535e-05, "loss": 2.2749, "step": 577 }, { "epoch": 0.5377446411929171, "loss_reasoning": 0.5128648281097412, "loss_utility": 1.6563973426818848, "step": 577 }, { "epoch": 0.5386766076421249, "grad_norm": 1.6839841587677253, "learning_rate": 1.8232654470141528e-05, "loss": 2.4032, "step": 578 }, { "epoch": 0.5386766076421249, "loss_reasoning": 0.565942645072937, "loss_utility": 1.4574583768844604, "step": 578 }, { "epoch": 0.5396085740913327, "grad_norm": 1.5439686670993698, "learning_rate": 1.822575077666552e-05, "loss": 2.3027, "step": 579 }, { "epoch": 0.5396085740913327, "loss_reasoning": 0.5515561103820801, "loss_utility": 1.5078715085983276, "step": 579 }, { "epoch": 0.5405405405405406, "grad_norm": 1.7819414694223756, "learning_rate": 1.821884708318951e-05, "loss": 2.4684, "step": 580 }, { "epoch": 0.5405405405405406, "loss_reasoning": 0.5388367176055908, "loss_utility": 1.5889849662780762, "step": 580 }, { "epoch": 0.5414725069897484, "grad_norm": 1.298502138145065, "learning_rate": 1.8211943389713498e-05, "loss": 2.4725, "step": 581 }, { "epoch": 0.5414725069897484, "loss_reasoning": 0.6109675168991089, "loss_utility": 0.9760427474975586, "step": 581 }, { "epoch": 0.5424044734389561, "grad_norm": 1.2685424310384386, "learning_rate": 1.8205039696237487e-05, "loss": 2.1722, "step": 582 }, { "epoch": 0.5424044734389561, "loss_reasoning": 0.548893928527832, "loss_utility": 1.5527353286743164, "step": 582 }, { "epoch": 0.543336439888164, "grad_norm": 1.3305781584124872, "learning_rate": 1.819813600276148e-05, "loss": 2.0011, "step": 583 }, { "epoch": 0.543336439888164, "loss_reasoning": 0.5620722770690918, "loss_utility": 1.602471113204956, "step": 583 }, { "epoch": 0.5442684063373718, "grad_norm": 1.3073001660840173, "learning_rate": 1.819123230928547e-05, "loss": 2.1966, "step": 584 }, { "epoch": 0.5442684063373718, "loss_reasoning": 0.5075252652168274, "loss_utility": 1.5942597389221191, "step": 584 }, { "epoch": 0.5452003727865797, "grad_norm": 1.5236308043907365, "learning_rate": 1.8184328615809458e-05, "loss": 2.1834, "step": 585 }, { "epoch": 0.5452003727865797, "loss_reasoning": 0.5035205483436584, "loss_utility": 1.2373411655426025, "step": 585 }, { "epoch": 0.5461323392357875, "grad_norm": 1.5919122540913857, "learning_rate": 1.817742492233345e-05, "loss": 1.9006, "step": 586 }, { "epoch": 0.5461323392357875, "loss_reasoning": 0.5077985525131226, "loss_utility": 2.0160269737243652, "step": 586 }, { "epoch": 0.5470643056849953, "grad_norm": 1.7197869591251478, "learning_rate": 1.817052122885744e-05, "loss": 2.2185, "step": 587 }, { "epoch": 0.5470643056849953, "loss_reasoning": 0.49865075945854187, "loss_utility": 2.32710337638855, "step": 587 }, { "epoch": 0.5479962721342032, "grad_norm": 9.138494174403188, "learning_rate": 1.816361753538143e-05, "loss": 2.5365, "step": 588 }, { "epoch": 0.5479962721342032, "loss_reasoning": 0.5781720876693726, "loss_utility": 2.200345754623413, "step": 588 }, { "epoch": 0.548928238583411, "grad_norm": 1.3558532096617866, "learning_rate": 1.815671384190542e-05, "loss": 2.3526, "step": 589 }, { "epoch": 0.548928238583411, "loss_reasoning": 0.5272273421287537, "loss_utility": 2.5915610790252686, "step": 589 }, { "epoch": 0.5498602050326188, "grad_norm": 1.3837198122217425, "learning_rate": 1.8149810148429413e-05, "loss": 2.2298, "step": 590 }, { "epoch": 0.5498602050326188, "loss_reasoning": 0.5181458592414856, "loss_utility": 1.7736234664916992, "step": 590 }, { "epoch": 0.5507921714818267, "grad_norm": 1.4481556483302909, "learning_rate": 1.8142906454953402e-05, "loss": 2.5286, "step": 591 }, { "epoch": 0.5507921714818267, "loss_reasoning": 0.4827597141265869, "loss_utility": 1.4846618175506592, "step": 591 }, { "epoch": 0.5517241379310345, "grad_norm": 1.3119819638897472, "learning_rate": 1.813600276147739e-05, "loss": 2.0737, "step": 592 }, { "epoch": 0.5517241379310345, "loss_reasoning": 0.539757490158081, "loss_utility": 1.5100979804992676, "step": 592 }, { "epoch": 0.5526561043802423, "grad_norm": 1.6549355928056693, "learning_rate": 1.812909906800138e-05, "loss": 2.2672, "step": 593 }, { "epoch": 0.5526561043802423, "loss_reasoning": 0.5933089256286621, "loss_utility": 2.3474068641662598, "step": 593 }, { "epoch": 0.5535880708294502, "grad_norm": 1.6511580738727332, "learning_rate": 1.8122195374525372e-05, "loss": 2.532, "step": 594 }, { "epoch": 0.5535880708294502, "loss_reasoning": 0.6294708251953125, "loss_utility": 1.7572438716888428, "step": 594 }, { "epoch": 0.554520037278658, "grad_norm": 1.5155644166854518, "learning_rate": 1.811529168104936e-05, "loss": 2.1877, "step": 595 }, { "epoch": 0.554520037278658, "loss_reasoning": 0.47106751799583435, "loss_utility": 2.0291829109191895, "step": 595 }, { "epoch": 0.5554520037278659, "grad_norm": 1.6575710226517077, "learning_rate": 1.8108387987573354e-05, "loss": 2.2308, "step": 596 }, { "epoch": 0.5554520037278659, "loss_reasoning": 0.5609259605407715, "loss_utility": 2.111708164215088, "step": 596 }, { "epoch": 0.5563839701770736, "grad_norm": 1.7410164978640106, "learning_rate": 1.8101484294097343e-05, "loss": 2.4067, "step": 597 }, { "epoch": 0.5563839701770736, "loss_reasoning": 0.5669912099838257, "loss_utility": 1.9768397808074951, "step": 597 }, { "epoch": 0.5573159366262814, "grad_norm": 1.429315051471392, "learning_rate": 1.8094580600621335e-05, "loss": 2.1618, "step": 598 }, { "epoch": 0.5573159366262814, "loss_reasoning": 0.5324498414993286, "loss_utility": 2.0988142490386963, "step": 598 }, { "epoch": 0.5582479030754893, "grad_norm": 1.507914698368224, "learning_rate": 1.8087676907145324e-05, "loss": 2.53, "step": 599 }, { "epoch": 0.5582479030754893, "loss_reasoning": 0.49734050035476685, "loss_utility": 1.9539213180541992, "step": 599 }, { "epoch": 0.5591798695246971, "grad_norm": 1.4114212411171858, "learning_rate": 1.8080773213669313e-05, "loss": 2.3262, "step": 600 }, { "epoch": 0.5591798695246971, "loss_reasoning": 0.5412932634353638, "loss_utility": 1.6673967838287354, "step": 600 }, { "epoch": 0.5601118359739049, "grad_norm": 1.9626804874169583, "learning_rate": 1.8073869520193306e-05, "loss": 2.1793, "step": 601 }, { "epoch": 0.5601118359739049, "loss_reasoning": 0.5466642379760742, "loss_utility": 1.9723913669586182, "step": 601 }, { "epoch": 0.5610438024231128, "grad_norm": 1.1192441424144561, "learning_rate": 1.8066965826717295e-05, "loss": 1.9444, "step": 602 }, { "epoch": 0.5610438024231128, "loss_reasoning": 0.5960562229156494, "loss_utility": 1.3182393312454224, "step": 602 }, { "epoch": 0.5619757688723206, "grad_norm": 1.6081300062877748, "learning_rate": 1.8060062133241284e-05, "loss": 2.1352, "step": 603 }, { "epoch": 0.5619757688723206, "loss_reasoning": 0.5123646259307861, "loss_utility": 1.1899583339691162, "step": 603 }, { "epoch": 0.5629077353215284, "grad_norm": 2.19609469122776, "learning_rate": 1.8053158439765276e-05, "loss": 2.5109, "step": 604 }, { "epoch": 0.5629077353215284, "loss_reasoning": 0.4855717420578003, "loss_utility": 1.5611951351165771, "step": 604 }, { "epoch": 0.5638397017707363, "grad_norm": 1.5604892482488677, "learning_rate": 1.8046254746289265e-05, "loss": 2.362, "step": 605 }, { "epoch": 0.5638397017707363, "loss_reasoning": 0.5478114485740662, "loss_utility": 0.9750139713287354, "step": 605 }, { "epoch": 0.5647716682199441, "grad_norm": 1.6101061369566747, "learning_rate": 1.8039351052813258e-05, "loss": 2.1814, "step": 606 }, { "epoch": 0.5647716682199441, "loss_reasoning": 0.5505566000938416, "loss_utility": 2.289560556411743, "step": 606 }, { "epoch": 0.5657036346691519, "grad_norm": 1.6007947629580752, "learning_rate": 1.8032447359337247e-05, "loss": 2.3989, "step": 607 }, { "epoch": 0.5657036346691519, "loss_reasoning": 0.5677198767662048, "loss_utility": 1.665417194366455, "step": 607 }, { "epoch": 0.5666356011183598, "grad_norm": 1.8077696104045649, "learning_rate": 1.802554366586124e-05, "loss": 2.113, "step": 608 }, { "epoch": 0.5666356011183598, "loss_reasoning": 0.5578348636627197, "loss_utility": 2.0033297538757324, "step": 608 }, { "epoch": 0.5675675675675675, "grad_norm": 1.6597310170004953, "learning_rate": 1.801863997238523e-05, "loss": 2.2624, "step": 609 }, { "epoch": 0.5675675675675675, "loss_reasoning": 0.4890407919883728, "loss_utility": 1.459031105041504, "step": 609 }, { "epoch": 0.5684995340167754, "grad_norm": 1.9606085979170123, "learning_rate": 1.8011736278909217e-05, "loss": 2.1505, "step": 610 }, { "epoch": 0.5684995340167754, "loss_reasoning": 0.5464850664138794, "loss_utility": 2.068354606628418, "step": 610 }, { "epoch": 0.5694315004659832, "grad_norm": 1.5595637502596358, "learning_rate": 1.8004832585433206e-05, "loss": 2.3776, "step": 611 }, { "epoch": 0.5694315004659832, "loss_reasoning": 0.5043385028839111, "loss_utility": 1.757262110710144, "step": 611 }, { "epoch": 0.570363466915191, "grad_norm": 1.6311830818409052, "learning_rate": 1.79979288919572e-05, "loss": 2.2566, "step": 612 }, { "epoch": 0.570363466915191, "loss_reasoning": 0.5208519697189331, "loss_utility": 1.7268530130386353, "step": 612 }, { "epoch": 0.5712954333643989, "grad_norm": 1.8631747093730873, "learning_rate": 1.7991025198481188e-05, "loss": 2.3311, "step": 613 }, { "epoch": 0.5712954333643989, "loss_reasoning": 0.4923871159553528, "loss_utility": 1.1179065704345703, "step": 613 }, { "epoch": 0.5722273998136067, "grad_norm": 1.5465976440489015, "learning_rate": 1.798412150500518e-05, "loss": 2.406, "step": 614 }, { "epoch": 0.5722273998136067, "loss_reasoning": 0.5371050834655762, "loss_utility": 1.7542145252227783, "step": 614 }, { "epoch": 0.5731593662628145, "grad_norm": 1.3359816487101273, "learning_rate": 1.797721781152917e-05, "loss": 2.0057, "step": 615 }, { "epoch": 0.5731593662628145, "loss_reasoning": 0.525409460067749, "loss_utility": 2.1526589393615723, "step": 615 }, { "epoch": 0.5740913327120224, "grad_norm": 1.7164897010569833, "learning_rate": 1.7970314118053162e-05, "loss": 2.4954, "step": 616 }, { "epoch": 0.5740913327120224, "loss_reasoning": 0.5482897162437439, "loss_utility": 1.6245051622390747, "step": 616 }, { "epoch": 0.5750232991612302, "grad_norm": 1.5844338379014582, "learning_rate": 1.796341042457715e-05, "loss": 2.4547, "step": 617 }, { "epoch": 0.5750232991612302, "loss_reasoning": 0.536808967590332, "loss_utility": 1.3496942520141602, "step": 617 }, { "epoch": 0.575955265610438, "grad_norm": 1.4214769829878027, "learning_rate": 1.795650673110114e-05, "loss": 2.3744, "step": 618 }, { "epoch": 0.575955265610438, "loss_reasoning": 0.5529240965843201, "loss_utility": 1.684444546699524, "step": 618 }, { "epoch": 0.5768872320596459, "grad_norm": 1.5417828480721976, "learning_rate": 1.7949603037625132e-05, "loss": 2.1296, "step": 619 }, { "epoch": 0.5768872320596459, "loss_reasoning": 0.5196974277496338, "loss_utility": 1.508062481880188, "step": 619 }, { "epoch": 0.5778191985088537, "grad_norm": 1.9318784131852795, "learning_rate": 1.794269934414912e-05, "loss": 2.0926, "step": 620 }, { "epoch": 0.5778191985088537, "loss_reasoning": 0.5430735349655151, "loss_utility": 0.8581851720809937, "step": 620 }, { "epoch": 0.5787511649580616, "grad_norm": 1.2599282117209232, "learning_rate": 1.793579565067311e-05, "loss": 1.9508, "step": 621 }, { "epoch": 0.5787511649580616, "loss_reasoning": 0.5272073149681091, "loss_utility": 2.3129241466522217, "step": 621 }, { "epoch": 0.5796831314072693, "grad_norm": 1.326354535002313, "learning_rate": 1.79288919571971e-05, "loss": 2.3642, "step": 622 }, { "epoch": 0.5796831314072693, "loss_reasoning": 0.5427833795547485, "loss_utility": 1.130892276763916, "step": 622 }, { "epoch": 0.5806150978564771, "grad_norm": 1.475672788841647, "learning_rate": 1.7921988263721092e-05, "loss": 1.9128, "step": 623 }, { "epoch": 0.5806150978564771, "loss_reasoning": 0.5715820789337158, "loss_utility": 2.3416597843170166, "step": 623 }, { "epoch": 0.581547064305685, "grad_norm": 1.7894069483728188, "learning_rate": 1.7915084570245084e-05, "loss": 2.3502, "step": 624 }, { "epoch": 0.581547064305685, "loss_reasoning": 0.6028883457183838, "loss_utility": 0.9409776926040649, "step": 624 }, { "epoch": 0.5824790307548928, "grad_norm": 1.4276350408079135, "learning_rate": 1.7908180876769073e-05, "loss": 2.4326, "step": 625 }, { "epoch": 0.5824790307548928, "loss_reasoning": 0.5504224896430969, "loss_utility": 2.1096267700195312, "step": 625 }, { "epoch": 0.5834109972041006, "grad_norm": 1.700971034294122, "learning_rate": 1.7901277183293066e-05, "loss": 2.5225, "step": 626 }, { "epoch": 0.5834109972041006, "loss_reasoning": 0.6314215064048767, "loss_utility": 1.244003176689148, "step": 626 }, { "epoch": 0.5843429636533085, "grad_norm": 1.5364259646966756, "learning_rate": 1.7894373489817055e-05, "loss": 2.228, "step": 627 }, { "epoch": 0.5843429636533085, "loss_reasoning": 0.5193115472793579, "loss_utility": 2.402764320373535, "step": 627 }, { "epoch": 0.5852749301025163, "grad_norm": 1.5325328000928606, "learning_rate": 1.7887469796341044e-05, "loss": 2.2964, "step": 628 }, { "epoch": 0.5852749301025163, "loss_reasoning": 0.5478780269622803, "loss_utility": 2.1299331188201904, "step": 628 }, { "epoch": 0.5862068965517241, "grad_norm": 1.6734532059747342, "learning_rate": 1.7880566102865033e-05, "loss": 2.2556, "step": 629 }, { "epoch": 0.5862068965517241, "loss_reasoning": 0.48347043991088867, "loss_utility": 1.8103656768798828, "step": 629 }, { "epoch": 0.587138863000932, "grad_norm": 2.3256593266646965, "learning_rate": 1.7873662409389025e-05, "loss": 2.3564, "step": 630 }, { "epoch": 0.587138863000932, "loss_reasoning": 0.5618557333946228, "loss_utility": 1.5303082466125488, "step": 630 }, { "epoch": 0.5880708294501398, "grad_norm": 1.615669421526156, "learning_rate": 1.7866758715913014e-05, "loss": 2.3155, "step": 631 }, { "epoch": 0.5880708294501398, "loss_reasoning": 0.5575630068778992, "loss_utility": 1.9449145793914795, "step": 631 }, { "epoch": 0.5890027958993477, "grad_norm": 1.362784982222196, "learning_rate": 1.7859855022437003e-05, "loss": 2.1325, "step": 632 }, { "epoch": 0.5890027958993477, "loss_reasoning": 0.5296851396560669, "loss_utility": 1.5089210271835327, "step": 632 }, { "epoch": 0.5899347623485555, "grad_norm": 1.9613788694540708, "learning_rate": 1.7852951328960996e-05, "loss": 2.3882, "step": 633 }, { "epoch": 0.5899347623485555, "loss_reasoning": 0.5477651357650757, "loss_utility": 1.973421573638916, "step": 633 }, { "epoch": 0.5908667287977633, "grad_norm": 1.3675526415502914, "learning_rate": 1.7846047635484988e-05, "loss": 2.2676, "step": 634 }, { "epoch": 0.5908667287977633, "loss_reasoning": 0.5122578144073486, "loss_utility": 1.6637217998504639, "step": 634 }, { "epoch": 0.5917986952469712, "grad_norm": 1.4602491196068461, "learning_rate": 1.7839143942008977e-05, "loss": 2.3529, "step": 635 }, { "epoch": 0.5917986952469712, "loss_reasoning": 0.508966863155365, "loss_utility": 1.4374912977218628, "step": 635 }, { "epoch": 0.5927306616961789, "grad_norm": 1.5235465482407688, "learning_rate": 1.7832240248532966e-05, "loss": 2.1642, "step": 636 }, { "epoch": 0.5927306616961789, "loss_reasoning": 0.5383109450340271, "loss_utility": 1.9561352729797363, "step": 636 }, { "epoch": 0.5936626281453867, "grad_norm": 1.4604409134743754, "learning_rate": 1.782533655505696e-05, "loss": 2.4523, "step": 637 }, { "epoch": 0.5936626281453867, "loss_reasoning": 0.5523334741592407, "loss_utility": 1.4177699089050293, "step": 637 }, { "epoch": 0.5945945945945946, "grad_norm": 2.2895541837651656, "learning_rate": 1.7818432861580948e-05, "loss": 2.3742, "step": 638 }, { "epoch": 0.5945945945945946, "loss_reasoning": 0.6186740398406982, "loss_utility": 1.817367434501648, "step": 638 }, { "epoch": 0.5955265610438024, "grad_norm": 1.5474415286479113, "learning_rate": 1.7811529168104937e-05, "loss": 2.3523, "step": 639 }, { "epoch": 0.5955265610438024, "loss_reasoning": 0.5090248584747314, "loss_utility": 1.3147908449172974, "step": 639 }, { "epoch": 0.5964585274930102, "grad_norm": 2.241608726126692, "learning_rate": 1.7804625474628926e-05, "loss": 2.1168, "step": 640 }, { "epoch": 0.5964585274930102, "loss_reasoning": 0.4989926517009735, "loss_utility": 1.6489416360855103, "step": 640 }, { "epoch": 0.5973904939422181, "grad_norm": 1.5995474051436607, "learning_rate": 1.7797721781152918e-05, "loss": 2.1551, "step": 641 }, { "epoch": 0.5973904939422181, "loss_reasoning": 0.6152982115745544, "loss_utility": 1.6272811889648438, "step": 641 }, { "epoch": 0.5983224603914259, "grad_norm": 1.21916274285794, "learning_rate": 1.7790818087676907e-05, "loss": 2.1634, "step": 642 }, { "epoch": 0.5983224603914259, "loss_reasoning": 0.59907066822052, "loss_utility": 1.809816837310791, "step": 642 }, { "epoch": 0.5992544268406338, "grad_norm": 1.5699971230269767, "learning_rate": 1.77839143942009e-05, "loss": 2.2086, "step": 643 }, { "epoch": 0.5992544268406338, "loss_reasoning": 0.4946895241737366, "loss_utility": 1.6598718166351318, "step": 643 }, { "epoch": 0.6001863932898416, "grad_norm": 1.4132057105642526, "learning_rate": 1.777701070072489e-05, "loss": 2.1766, "step": 644 }, { "epoch": 0.6001863932898416, "loss_reasoning": 0.5210447907447815, "loss_utility": 2.522491455078125, "step": 644 }, { "epoch": 0.6011183597390494, "grad_norm": 1.594526277719621, "learning_rate": 1.777010700724888e-05, "loss": 2.2716, "step": 645 }, { "epoch": 0.6011183597390494, "loss_reasoning": 0.5546929836273193, "loss_utility": 1.3742868900299072, "step": 645 }, { "epoch": 0.6020503261882573, "grad_norm": 1.4332975968596873, "learning_rate": 1.776320331377287e-05, "loss": 1.8394, "step": 646 }, { "epoch": 0.6020503261882573, "loss_reasoning": 0.6077944040298462, "loss_utility": 2.3594985008239746, "step": 646 }, { "epoch": 0.6029822926374651, "grad_norm": 1.8156052304951886, "learning_rate": 1.775629962029686e-05, "loss": 2.2048, "step": 647 }, { "epoch": 0.6029822926374651, "loss_reasoning": 0.5747516751289368, "loss_utility": 0.976352870464325, "step": 647 }, { "epoch": 0.6039142590866728, "grad_norm": 1.5822863496893376, "learning_rate": 1.774939592682085e-05, "loss": 2.0885, "step": 648 }, { "epoch": 0.6039142590866728, "loss_reasoning": 0.47127997875213623, "loss_utility": 1.4137673377990723, "step": 648 }, { "epoch": 0.6048462255358807, "grad_norm": 1.7913107129595143, "learning_rate": 1.774249223334484e-05, "loss": 2.0027, "step": 649 }, { "epoch": 0.6048462255358807, "loss_reasoning": 0.622641384601593, "loss_utility": 1.3630356788635254, "step": 649 }, { "epoch": 0.6057781919850885, "grad_norm": 1.3316038968379296, "learning_rate": 1.773558853986883e-05, "loss": 1.9028, "step": 650 }, { "epoch": 0.6057781919850885, "loss_reasoning": 0.5566471815109253, "loss_utility": 1.1249064207077026, "step": 650 }, { "epoch": 0.6067101584342963, "grad_norm": 1.7522381831792089, "learning_rate": 1.7728684846392822e-05, "loss": 2.159, "step": 651 }, { "epoch": 0.6067101584342963, "loss_reasoning": 0.5725723505020142, "loss_utility": 0.8497283458709717, "step": 651 }, { "epoch": 0.6076421248835042, "grad_norm": 1.5037199877759888, "learning_rate": 1.772178115291681e-05, "loss": 2.0295, "step": 652 }, { "epoch": 0.6076421248835042, "loss_reasoning": 0.5296659469604492, "loss_utility": 2.2780375480651855, "step": 652 }, { "epoch": 0.608574091332712, "grad_norm": 1.4461696405036393, "learning_rate": 1.7714877459440804e-05, "loss": 2.2811, "step": 653 }, { "epoch": 0.608574091332712, "loss_reasoning": 0.49815258383750916, "loss_utility": 1.5195446014404297, "step": 653 }, { "epoch": 0.6095060577819198, "grad_norm": 1.3730700711313004, "learning_rate": 1.7707973765964793e-05, "loss": 2.2944, "step": 654 }, { "epoch": 0.6095060577819198, "loss_reasoning": 0.5151928663253784, "loss_utility": 1.9948315620422363, "step": 654 }, { "epoch": 0.6104380242311277, "grad_norm": 1.8966375699918967, "learning_rate": 1.7701070072488782e-05, "loss": 2.2692, "step": 655 }, { "epoch": 0.6104380242311277, "loss_reasoning": 0.5579550266265869, "loss_utility": 1.6614596843719482, "step": 655 }, { "epoch": 0.6113699906803355, "grad_norm": 1.754864095779408, "learning_rate": 1.7694166379012774e-05, "loss": 2.2058, "step": 656 }, { "epoch": 0.6113699906803355, "loss_reasoning": 0.5902594327926636, "loss_utility": 2.0049147605895996, "step": 656 }, { "epoch": 0.6123019571295434, "grad_norm": 1.7880117256793548, "learning_rate": 1.7687262685536763e-05, "loss": 2.3642, "step": 657 }, { "epoch": 0.6123019571295434, "loss_reasoning": 0.508385419845581, "loss_utility": 1.828270673751831, "step": 657 }, { "epoch": 0.6132339235787512, "grad_norm": 1.4186246363839567, "learning_rate": 1.7680358992060752e-05, "loss": 2.2248, "step": 658 }, { "epoch": 0.6132339235787512, "loss_reasoning": 0.42833855748176575, "loss_utility": 1.3461254835128784, "step": 658 }, { "epoch": 0.614165890027959, "grad_norm": 1.2133579192845743, "learning_rate": 1.7673455298584745e-05, "loss": 1.9796, "step": 659 }, { "epoch": 0.614165890027959, "loss_reasoning": 0.550904393196106, "loss_utility": 1.916204571723938, "step": 659 }, { "epoch": 0.6150978564771669, "grad_norm": 1.6224269739117416, "learning_rate": 1.7666551605108734e-05, "loss": 1.9517, "step": 660 }, { "epoch": 0.6150978564771669, "loss_reasoning": 0.488017201423645, "loss_utility": 1.529475450515747, "step": 660 }, { "epoch": 0.6160298229263746, "grad_norm": 1.630370186178139, "learning_rate": 1.7659647911632726e-05, "loss": 1.9756, "step": 661 }, { "epoch": 0.6160298229263746, "loss_reasoning": 0.4871074855327606, "loss_utility": 1.0160627365112305, "step": 661 }, { "epoch": 0.6169617893755824, "grad_norm": 1.2598268668579649, "learning_rate": 1.7652744218156715e-05, "loss": 1.8629, "step": 662 }, { "epoch": 0.6169617893755824, "loss_reasoning": 0.5864755511283875, "loss_utility": 1.4601309299468994, "step": 662 }, { "epoch": 0.6178937558247903, "grad_norm": 1.3409021763267748, "learning_rate": 1.7645840524680708e-05, "loss": 1.9934, "step": 663 }, { "epoch": 0.6178937558247903, "loss_reasoning": 0.5775175094604492, "loss_utility": 1.862297534942627, "step": 663 }, { "epoch": 0.6188257222739981, "grad_norm": 1.7788647332572756, "learning_rate": 1.7638936831204697e-05, "loss": 2.1934, "step": 664 }, { "epoch": 0.6188257222739981, "loss_reasoning": 0.5479811429977417, "loss_utility": 1.7413527965545654, "step": 664 }, { "epoch": 0.6197576887232059, "grad_norm": 1.286779386411858, "learning_rate": 1.7632033137728686e-05, "loss": 2.3561, "step": 665 }, { "epoch": 0.6197576887232059, "loss_reasoning": 0.5755351781845093, "loss_utility": 2.2697930335998535, "step": 665 }, { "epoch": 0.6206896551724138, "grad_norm": 1.5250987013791995, "learning_rate": 1.7625129444252678e-05, "loss": 2.2756, "step": 666 }, { "epoch": 0.6206896551724138, "loss_reasoning": 0.5031300187110901, "loss_utility": 1.5816833972930908, "step": 666 }, { "epoch": 0.6216216216216216, "grad_norm": 1.32081746005974, "learning_rate": 1.7618225750776667e-05, "loss": 2.4839, "step": 667 }, { "epoch": 0.6216216216216216, "loss_reasoning": 0.5223464965820312, "loss_utility": 1.6774276494979858, "step": 667 }, { "epoch": 0.6225535880708295, "grad_norm": 1.4938449267925447, "learning_rate": 1.7611322057300656e-05, "loss": 2.146, "step": 668 }, { "epoch": 0.6225535880708295, "loss_reasoning": 0.5631036758422852, "loss_utility": 2.17378306388855, "step": 668 }, { "epoch": 0.6234855545200373, "grad_norm": 1.8172976792030522, "learning_rate": 1.7604418363824645e-05, "loss": 2.3365, "step": 669 }, { "epoch": 0.6234855545200373, "loss_reasoning": 0.5609008073806763, "loss_utility": 2.082091808319092, "step": 669 }, { "epoch": 0.6244175209692451, "grad_norm": 2.1632408719546925, "learning_rate": 1.7597514670348638e-05, "loss": 2.2473, "step": 670 }, { "epoch": 0.6244175209692451, "loss_reasoning": 0.5924201011657715, "loss_utility": 0.9648774862289429, "step": 670 }, { "epoch": 0.625349487418453, "grad_norm": 1.3570812822938616, "learning_rate": 1.759061097687263e-05, "loss": 2.1685, "step": 671 }, { "epoch": 0.625349487418453, "loss_reasoning": 0.5112905502319336, "loss_utility": 1.8186650276184082, "step": 671 }, { "epoch": 0.6262814538676608, "grad_norm": 1.5083066110112133, "learning_rate": 1.758370728339662e-05, "loss": 2.253, "step": 672 }, { "epoch": 0.6262814538676608, "loss_reasoning": 0.49149107933044434, "loss_utility": 1.558524250984192, "step": 672 }, { "epoch": 0.6272134203168686, "grad_norm": 1.3016071925309023, "learning_rate": 1.7576803589920608e-05, "loss": 2.1958, "step": 673 }, { "epoch": 0.6272134203168686, "loss_reasoning": 0.45767807960510254, "loss_utility": 1.0988333225250244, "step": 673 }, { "epoch": 0.6281453867660765, "grad_norm": 1.598508542545289, "learning_rate": 1.75698998964446e-05, "loss": 2.1611, "step": 674 }, { "epoch": 0.6281453867660765, "loss_reasoning": 0.5473980903625488, "loss_utility": 1.982709288597107, "step": 674 }, { "epoch": 0.6290773532152842, "grad_norm": 1.4358153087179601, "learning_rate": 1.756299620296859e-05, "loss": 1.9952, "step": 675 }, { "epoch": 0.6290773532152842, "loss_reasoning": 0.5702227354049683, "loss_utility": 2.041086196899414, "step": 675 }, { "epoch": 0.630009319664492, "grad_norm": 1.6042102869922157, "learning_rate": 1.755609250949258e-05, "loss": 2.5373, "step": 676 }, { "epoch": 0.630009319664492, "loss_reasoning": 0.531679093837738, "loss_utility": 2.145423650741577, "step": 676 }, { "epoch": 0.6309412861136999, "grad_norm": 1.6803403730832904, "learning_rate": 1.754918881601657e-05, "loss": 2.3919, "step": 677 }, { "epoch": 0.6309412861136999, "loss_reasoning": 0.5311025381088257, "loss_utility": 2.1061835289001465, "step": 677 }, { "epoch": 0.6318732525629077, "grad_norm": 1.484896110734703, "learning_rate": 1.754228512254056e-05, "loss": 2.5615, "step": 678 }, { "epoch": 0.6318732525629077, "loss_reasoning": 0.5266562700271606, "loss_utility": 0.9598944187164307, "step": 678 }, { "epoch": 0.6328052190121156, "grad_norm": 1.695931938007366, "learning_rate": 1.753538142906455e-05, "loss": 2.4557, "step": 679 }, { "epoch": 0.6328052190121156, "loss_reasoning": 0.6057389378547668, "loss_utility": 2.116356372833252, "step": 679 }, { "epoch": 0.6337371854613234, "grad_norm": 1.633694582192121, "learning_rate": 1.752847773558854e-05, "loss": 2.3757, "step": 680 }, { "epoch": 0.6337371854613234, "loss_reasoning": 0.5414182543754578, "loss_utility": 2.3138794898986816, "step": 680 }, { "epoch": 0.6346691519105312, "grad_norm": 1.3542153385417381, "learning_rate": 1.7521574042112534e-05, "loss": 2.3477, "step": 681 }, { "epoch": 0.6346691519105312, "loss_reasoning": 0.557140588760376, "loss_utility": 1.627561092376709, "step": 681 }, { "epoch": 0.6356011183597391, "grad_norm": 1.4059103237048396, "learning_rate": 1.7514670348636523e-05, "loss": 2.3069, "step": 682 }, { "epoch": 0.6356011183597391, "loss_reasoning": 0.5713567733764648, "loss_utility": 1.7316672801971436, "step": 682 }, { "epoch": 0.6365330848089469, "grad_norm": 1.3633767264159082, "learning_rate": 1.7507766655160512e-05, "loss": 2.2396, "step": 683 }, { "epoch": 0.6365330848089469, "loss_reasoning": 0.5564888715744019, "loss_utility": 2.1827645301818848, "step": 683 }, { "epoch": 0.6374650512581547, "grad_norm": 1.692318479990547, "learning_rate": 1.75008629616845e-05, "loss": 2.1977, "step": 684 }, { "epoch": 0.6374650512581547, "loss_reasoning": 0.5616147518157959, "loss_utility": 2.1547560691833496, "step": 684 }, { "epoch": 0.6383970177073626, "grad_norm": 1.4326299442483954, "learning_rate": 1.7493959268208494e-05, "loss": 2.3468, "step": 685 }, { "epoch": 0.6383970177073626, "loss_reasoning": 0.5528532266616821, "loss_utility": 1.6759543418884277, "step": 685 }, { "epoch": 0.6393289841565704, "grad_norm": 1.5427611017489284, "learning_rate": 1.7487055574732483e-05, "loss": 2.107, "step": 686 }, { "epoch": 0.6393289841565704, "loss_reasoning": 0.5332674384117126, "loss_utility": 1.5213830471038818, "step": 686 }, { "epoch": 0.6402609506057781, "grad_norm": 1.5914310913834935, "learning_rate": 1.748015188125647e-05, "loss": 2.0922, "step": 687 }, { "epoch": 0.6402609506057781, "loss_reasoning": 0.5786629915237427, "loss_utility": 1.6384141445159912, "step": 687 }, { "epoch": 0.641192917054986, "grad_norm": 1.5860327585406315, "learning_rate": 1.7473248187780464e-05, "loss": 2.1622, "step": 688 }, { "epoch": 0.641192917054986, "loss_reasoning": 0.47311440110206604, "loss_utility": 1.149590015411377, "step": 688 }, { "epoch": 0.6421248835041938, "grad_norm": 1.3734353676598363, "learning_rate": 1.7466344494304453e-05, "loss": 2.391, "step": 689 }, { "epoch": 0.6421248835041938, "loss_reasoning": 0.4910590648651123, "loss_utility": 2.0110275745391846, "step": 689 }, { "epoch": 0.6430568499534017, "grad_norm": 1.5396734145502187, "learning_rate": 1.7459440800828446e-05, "loss": 2.1372, "step": 690 }, { "epoch": 0.6430568499534017, "loss_reasoning": 0.5761613845825195, "loss_utility": 1.3754833936691284, "step": 690 }, { "epoch": 0.6439888164026095, "grad_norm": 1.6862064771046243, "learning_rate": 1.7452537107352435e-05, "loss": 1.9543, "step": 691 }, { "epoch": 0.6439888164026095, "loss_reasoning": 0.5273497700691223, "loss_utility": 1.4101150035858154, "step": 691 }, { "epoch": 0.6449207828518173, "grad_norm": 1.4311293764492856, "learning_rate": 1.7445633413876427e-05, "loss": 2.0159, "step": 692 }, { "epoch": 0.6449207828518173, "loss_reasoning": 0.47663214802742004, "loss_utility": 1.559524416923523, "step": 692 }, { "epoch": 0.6458527493010252, "grad_norm": 1.4932915661397483, "learning_rate": 1.7438729720400416e-05, "loss": 2.45, "step": 693 }, { "epoch": 0.6458527493010252, "loss_reasoning": 0.5556049346923828, "loss_utility": 1.9386340379714966, "step": 693 }, { "epoch": 0.646784715750233, "grad_norm": 1.363749580682623, "learning_rate": 1.7431826026924405e-05, "loss": 1.8846, "step": 694 }, { "epoch": 0.646784715750233, "loss_reasoning": 0.5295621156692505, "loss_utility": 1.814348578453064, "step": 694 }, { "epoch": 0.6477166821994408, "grad_norm": 1.3615910048762867, "learning_rate": 1.7424922333448394e-05, "loss": 2.2277, "step": 695 }, { "epoch": 0.6477166821994408, "loss_reasoning": 0.5975823998451233, "loss_utility": 1.6429269313812256, "step": 695 }, { "epoch": 0.6486486486486487, "grad_norm": 1.5711652228369724, "learning_rate": 1.7418018639972387e-05, "loss": 2.2137, "step": 696 }, { "epoch": 0.6486486486486487, "loss_reasoning": 0.5468980073928833, "loss_utility": 1.6852903366088867, "step": 696 }, { "epoch": 0.6495806150978565, "grad_norm": 1.5578202382409319, "learning_rate": 1.7411114946496376e-05, "loss": 1.9677, "step": 697 }, { "epoch": 0.6495806150978565, "loss_reasoning": 0.5056993961334229, "loss_utility": 1.9506572484970093, "step": 697 }, { "epoch": 0.6505125815470643, "grad_norm": 1.7959521686146156, "learning_rate": 1.7404211253020368e-05, "loss": 2.4906, "step": 698 }, { "epoch": 0.6505125815470643, "loss_reasoning": 0.5066046714782715, "loss_utility": 1.786301612854004, "step": 698 }, { "epoch": 0.6514445479962722, "grad_norm": 2.4407752582083595, "learning_rate": 1.7397307559544357e-05, "loss": 2.403, "step": 699 }, { "epoch": 0.6514445479962722, "loss_reasoning": 0.5798237323760986, "loss_utility": 1.5326322317123413, "step": 699 }, { "epoch": 0.65237651444548, "grad_norm": 1.884386310788006, "learning_rate": 1.739040386606835e-05, "loss": 2.2515, "step": 700 }, { "epoch": 0.65237651444548, "loss_reasoning": 0.5470842123031616, "loss_utility": 1.577014446258545, "step": 700 }, { "epoch": 0.6533084808946877, "grad_norm": 1.377315195180737, "learning_rate": 1.738350017259234e-05, "loss": 2.241, "step": 701 }, { "epoch": 0.6533084808946877, "loss_reasoning": 0.5796852111816406, "loss_utility": 2.0835094451904297, "step": 701 }, { "epoch": 0.6542404473438956, "grad_norm": 1.7116353694285507, "learning_rate": 1.7376596479116328e-05, "loss": 2.46, "step": 702 }, { "epoch": 0.6542404473438956, "loss_reasoning": 0.5871039032936096, "loss_utility": 2.4289636611938477, "step": 702 }, { "epoch": 0.6551724137931034, "grad_norm": 1.3845630044990729, "learning_rate": 1.736969278564032e-05, "loss": 2.4435, "step": 703 }, { "epoch": 0.6551724137931034, "loss_reasoning": 0.6006608009338379, "loss_utility": 2.1808040142059326, "step": 703 }, { "epoch": 0.6561043802423113, "grad_norm": 1.6650844260166973, "learning_rate": 1.736278909216431e-05, "loss": 2.3126, "step": 704 }, { "epoch": 0.6561043802423113, "loss_reasoning": 0.5333585143089294, "loss_utility": 1.4202697277069092, "step": 704 }, { "epoch": 0.6570363466915191, "grad_norm": 1.236300184937403, "learning_rate": 1.7355885398688298e-05, "loss": 2.1542, "step": 705 }, { "epoch": 0.6570363466915191, "loss_reasoning": 0.4924170672893524, "loss_utility": 1.6043198108673096, "step": 705 }, { "epoch": 0.6579683131407269, "grad_norm": 1.5463319776368036, "learning_rate": 1.734898170521229e-05, "loss": 2.1005, "step": 706 }, { "epoch": 0.6579683131407269, "loss_reasoning": 0.5434353351593018, "loss_utility": 1.5498961210250854, "step": 706 }, { "epoch": 0.6589002795899348, "grad_norm": 1.5293404987457277, "learning_rate": 1.734207801173628e-05, "loss": 2.5306, "step": 707 }, { "epoch": 0.6589002795899348, "loss_reasoning": 0.5407015085220337, "loss_utility": 1.3306777477264404, "step": 707 }, { "epoch": 0.6598322460391426, "grad_norm": 1.6202138596412383, "learning_rate": 1.7335174318260272e-05, "loss": 1.9079, "step": 708 }, { "epoch": 0.6598322460391426, "loss_reasoning": 0.6269479393959045, "loss_utility": 1.8241767883300781, "step": 708 }, { "epoch": 0.6607642124883504, "grad_norm": 1.5014443815385712, "learning_rate": 1.732827062478426e-05, "loss": 2.5457, "step": 709 }, { "epoch": 0.6607642124883504, "loss_reasoning": 0.4717077314853668, "loss_utility": 1.6072410345077515, "step": 709 }, { "epoch": 0.6616961789375583, "grad_norm": 1.3394385778702174, "learning_rate": 1.7321366931308253e-05, "loss": 2.1705, "step": 710 }, { "epoch": 0.6616961789375583, "loss_reasoning": 0.4352530241012573, "loss_utility": 1.8873305320739746, "step": 710 }, { "epoch": 0.6626281453867661, "grad_norm": 1.6770407532063325, "learning_rate": 1.7314463237832242e-05, "loss": 2.2345, "step": 711 }, { "epoch": 0.6626281453867661, "loss_reasoning": 0.5087963938713074, "loss_utility": 2.729306697845459, "step": 711 }, { "epoch": 0.6635601118359739, "grad_norm": 1.3842198078200632, "learning_rate": 1.730755954435623e-05, "loss": 2.4639, "step": 712 }, { "epoch": 0.6635601118359739, "loss_reasoning": 0.5261638164520264, "loss_utility": 1.3350727558135986, "step": 712 }, { "epoch": 0.6644920782851818, "grad_norm": 1.6249023851161888, "learning_rate": 1.730065585088022e-05, "loss": 2.0274, "step": 713 }, { "epoch": 0.6644920782851818, "loss_reasoning": 0.5149037837982178, "loss_utility": 1.9344285726547241, "step": 713 }, { "epoch": 0.6654240447343895, "grad_norm": 1.3193990231189645, "learning_rate": 1.7293752157404213e-05, "loss": 2.3443, "step": 714 }, { "epoch": 0.6654240447343895, "loss_reasoning": 0.561618447303772, "loss_utility": 1.3463889360427856, "step": 714 }, { "epoch": 0.6663560111835974, "grad_norm": 1.3779323296735222, "learning_rate": 1.7286848463928202e-05, "loss": 2.1569, "step": 715 }, { "epoch": 0.6663560111835974, "loss_reasoning": 0.5457118153572083, "loss_utility": 1.3740438222885132, "step": 715 }, { "epoch": 0.6672879776328052, "grad_norm": 1.3684725104717212, "learning_rate": 1.7279944770452194e-05, "loss": 1.9898, "step": 716 }, { "epoch": 0.6672879776328052, "loss_reasoning": 0.4770028591156006, "loss_utility": 2.166950225830078, "step": 716 }, { "epoch": 0.668219944082013, "grad_norm": 1.2836490931448257, "learning_rate": 1.7273041076976183e-05, "loss": 2.3585, "step": 717 }, { "epoch": 0.668219944082013, "loss_reasoning": 0.5700624585151672, "loss_utility": 1.737098217010498, "step": 717 }, { "epoch": 0.6691519105312209, "grad_norm": 1.2642978430194372, "learning_rate": 1.7266137383500176e-05, "loss": 2.2036, "step": 718 }, { "epoch": 0.6691519105312209, "loss_reasoning": 0.5108978152275085, "loss_utility": 1.927652359008789, "step": 718 }, { "epoch": 0.6700838769804287, "grad_norm": 1.4752716580842202, "learning_rate": 1.7259233690024165e-05, "loss": 2.3106, "step": 719 }, { "epoch": 0.6700838769804287, "loss_reasoning": 0.5360509753227234, "loss_utility": 1.1341863870620728, "step": 719 }, { "epoch": 0.6710158434296365, "grad_norm": 1.4862466627326851, "learning_rate": 1.7252329996548154e-05, "loss": 2.3094, "step": 720 }, { "epoch": 0.6710158434296365, "loss_reasoning": 0.5792790651321411, "loss_utility": 1.5859158039093018, "step": 720 }, { "epoch": 0.6719478098788444, "grad_norm": 1.6361154635968027, "learning_rate": 1.7245426303072146e-05, "loss": 2.1003, "step": 721 }, { "epoch": 0.6719478098788444, "loss_reasoning": 0.4817737340927124, "loss_utility": 1.8263256549835205, "step": 721 }, { "epoch": 0.6728797763280522, "grad_norm": 1.3508423738854645, "learning_rate": 1.7238522609596135e-05, "loss": 2.3168, "step": 722 }, { "epoch": 0.6728797763280522, "loss_reasoning": 0.6047051548957825, "loss_utility": 1.5292515754699707, "step": 722 }, { "epoch": 0.67381174277726, "grad_norm": 1.3717725357278965, "learning_rate": 1.7231618916120124e-05, "loss": 2.0788, "step": 723 }, { "epoch": 0.67381174277726, "loss_reasoning": 0.5287054181098938, "loss_utility": 2.108969211578369, "step": 723 }, { "epoch": 0.6747437092264679, "grad_norm": 2.208106325068529, "learning_rate": 1.7224715222644113e-05, "loss": 2.3134, "step": 724 }, { "epoch": 0.6747437092264679, "loss_reasoning": 0.5329186916351318, "loss_utility": 1.3137359619140625, "step": 724 }, { "epoch": 0.6756756756756757, "grad_norm": 1.4659849341058184, "learning_rate": 1.7217811529168106e-05, "loss": 2.505, "step": 725 }, { "epoch": 0.6756756756756757, "loss_reasoning": 0.5411585569381714, "loss_utility": 1.3702844381332397, "step": 725 }, { "epoch": 0.6766076421248836, "grad_norm": 1.7788173646463388, "learning_rate": 1.72109078356921e-05, "loss": 2.2503, "step": 726 }, { "epoch": 0.6766076421248836, "loss_reasoning": 0.5304583311080933, "loss_utility": 1.887704610824585, "step": 726 }, { "epoch": 0.6775396085740913, "grad_norm": 1.473882455789398, "learning_rate": 1.7204004142216087e-05, "loss": 2.0396, "step": 727 }, { "epoch": 0.6775396085740913, "loss_reasoning": 0.49977922439575195, "loss_utility": 1.1832921504974365, "step": 727 }, { "epoch": 0.6784715750232991, "grad_norm": 1.3633763108609558, "learning_rate": 1.719710044874008e-05, "loss": 2.1774, "step": 728 }, { "epoch": 0.6784715750232991, "loss_reasoning": 0.5280669331550598, "loss_utility": 1.2126511335372925, "step": 728 }, { "epoch": 0.679403541472507, "grad_norm": 1.814021224746526, "learning_rate": 1.719019675526407e-05, "loss": 2.0082, "step": 729 }, { "epoch": 0.679403541472507, "loss_reasoning": 0.5127002596855164, "loss_utility": 0.7861993312835693, "step": 729 }, { "epoch": 0.6803355079217148, "grad_norm": 1.587671596612112, "learning_rate": 1.7183293061788058e-05, "loss": 2.0753, "step": 730 }, { "epoch": 0.6803355079217148, "loss_reasoning": 0.45780080556869507, "loss_utility": 1.849297285079956, "step": 730 }, { "epoch": 0.6812674743709226, "grad_norm": 1.327614029525048, "learning_rate": 1.7176389368312047e-05, "loss": 2.1526, "step": 731 }, { "epoch": 0.6812674743709226, "loss_reasoning": 0.519673228263855, "loss_utility": 1.4952882528305054, "step": 731 }, { "epoch": 0.6821994408201305, "grad_norm": 1.3633055945784616, "learning_rate": 1.716948567483604e-05, "loss": 2.2091, "step": 732 }, { "epoch": 0.6821994408201305, "loss_reasoning": 0.542631983757019, "loss_utility": 1.7286934852600098, "step": 732 }, { "epoch": 0.6831314072693383, "grad_norm": 1.5578093059561822, "learning_rate": 1.716258198136003e-05, "loss": 2.1687, "step": 733 }, { "epoch": 0.6831314072693383, "loss_reasoning": 0.4890652894973755, "loss_utility": 1.6863222122192383, "step": 733 }, { "epoch": 0.6840633737185461, "grad_norm": 1.3943623443287458, "learning_rate": 1.7155678287884017e-05, "loss": 2.3428, "step": 734 }, { "epoch": 0.6840633737185461, "loss_reasoning": 0.5244503021240234, "loss_utility": 1.781233787536621, "step": 734 }, { "epoch": 0.684995340167754, "grad_norm": 1.7366100013206163, "learning_rate": 1.714877459440801e-05, "loss": 2.0476, "step": 735 }, { "epoch": 0.684995340167754, "loss_reasoning": 0.5012544393539429, "loss_utility": 1.1748840808868408, "step": 735 }, { "epoch": 0.6859273066169618, "grad_norm": 1.3289107508745723, "learning_rate": 1.7141870900932002e-05, "loss": 2.1626, "step": 736 }, { "epoch": 0.6859273066169618, "loss_reasoning": 0.5048922300338745, "loss_utility": 1.4168665409088135, "step": 736 }, { "epoch": 0.6868592730661697, "grad_norm": 2.128200051866658, "learning_rate": 1.713496720745599e-05, "loss": 2.2114, "step": 737 }, { "epoch": 0.6868592730661697, "loss_reasoning": 0.5551849603652954, "loss_utility": 1.4935812950134277, "step": 737 }, { "epoch": 0.6877912395153775, "grad_norm": 1.8755424298715984, "learning_rate": 1.712806351397998e-05, "loss": 2.2968, "step": 738 }, { "epoch": 0.6877912395153775, "loss_reasoning": 0.4705964922904968, "loss_utility": 1.6175365447998047, "step": 738 }, { "epoch": 0.6887232059645852, "grad_norm": 1.4090011660936612, "learning_rate": 1.7121159820503973e-05, "loss": 2.3258, "step": 739 }, { "epoch": 0.6887232059645852, "loss_reasoning": 0.4517185688018799, "loss_utility": 1.6741193532943726, "step": 739 }, { "epoch": 0.6896551724137931, "grad_norm": 1.2406582925092144, "learning_rate": 1.7114256127027962e-05, "loss": 1.9308, "step": 740 }, { "epoch": 0.6896551724137931, "loss_reasoning": 0.5043998956680298, "loss_utility": 1.2861194610595703, "step": 740 }, { "epoch": 0.6905871388630009, "grad_norm": 1.1612390562100765, "learning_rate": 1.710735243355195e-05, "loss": 2.0149, "step": 741 }, { "epoch": 0.6905871388630009, "loss_reasoning": 0.4942229986190796, "loss_utility": 0.5878903865814209, "step": 741 }, { "epoch": 0.6915191053122087, "grad_norm": 1.429497787200914, "learning_rate": 1.710044874007594e-05, "loss": 1.9951, "step": 742 }, { "epoch": 0.6915191053122087, "loss_reasoning": 0.4590885639190674, "loss_utility": 2.055219888687134, "step": 742 }, { "epoch": 0.6924510717614166, "grad_norm": 1.3852123815745725, "learning_rate": 1.7093545046599932e-05, "loss": 2.2098, "step": 743 }, { "epoch": 0.6924510717614166, "loss_reasoning": 0.5316877961158752, "loss_utility": 1.5165340900421143, "step": 743 }, { "epoch": 0.6933830382106244, "grad_norm": 1.6238385130016175, "learning_rate": 1.708664135312392e-05, "loss": 2.1091, "step": 744 }, { "epoch": 0.6933830382106244, "loss_reasoning": 0.49030083417892456, "loss_utility": 1.8824124336242676, "step": 744 }, { "epoch": 0.6943150046598322, "grad_norm": 1.4410478320505042, "learning_rate": 1.7079737659647914e-05, "loss": 2.3352, "step": 745 }, { "epoch": 0.6943150046598322, "loss_reasoning": 0.4984358549118042, "loss_utility": 1.6728160381317139, "step": 745 }, { "epoch": 0.6952469711090401, "grad_norm": 1.4799402736814324, "learning_rate": 1.7072833966171903e-05, "loss": 2.188, "step": 746 }, { "epoch": 0.6952469711090401, "loss_reasoning": 0.5590840578079224, "loss_utility": 1.5544131994247437, "step": 746 }, { "epoch": 0.6961789375582479, "grad_norm": 1.3452400134427271, "learning_rate": 1.7065930272695895e-05, "loss": 2.0352, "step": 747 }, { "epoch": 0.6961789375582479, "loss_reasoning": 0.49547886848449707, "loss_utility": 0.9151094555854797, "step": 747 }, { "epoch": 0.6971109040074557, "grad_norm": 1.3613458218414762, "learning_rate": 1.7059026579219884e-05, "loss": 2.1342, "step": 748 }, { "epoch": 0.6971109040074557, "loss_reasoning": 0.5567993521690369, "loss_utility": 1.454558253288269, "step": 748 }, { "epoch": 0.6980428704566636, "grad_norm": 1.311791561059084, "learning_rate": 1.7052122885743873e-05, "loss": 2.1853, "step": 749 }, { "epoch": 0.6980428704566636, "loss_reasoning": 0.4776286780834198, "loss_utility": 1.3515862226486206, "step": 749 }, { "epoch": 0.6989748369058714, "grad_norm": 2.01804792742168, "learning_rate": 1.7045219192267866e-05, "loss": 1.9618, "step": 750 }, { "epoch": 0.6989748369058714, "loss_reasoning": 0.5939892530441284, "loss_utility": 1.9091496467590332, "step": 750 }, { "epoch": 0.6999068033550793, "grad_norm": 1.282207973299874, "learning_rate": 1.7038315498791855e-05, "loss": 2.0117, "step": 751 }, { "epoch": 0.6999068033550793, "loss_reasoning": 0.4685761332511902, "loss_utility": 1.5063576698303223, "step": 751 }, { "epoch": 0.700838769804287, "grad_norm": 2.29613024111541, "learning_rate": 1.7031411805315844e-05, "loss": 2.1123, "step": 752 }, { "epoch": 0.700838769804287, "loss_reasoning": 0.5739130973815918, "loss_utility": 1.2435473203659058, "step": 752 }, { "epoch": 0.7017707362534948, "grad_norm": 1.5410197320968582, "learning_rate": 1.7024508111839836e-05, "loss": 2.4247, "step": 753 }, { "epoch": 0.7017707362534948, "loss_reasoning": 0.6118367314338684, "loss_utility": 2.3212594985961914, "step": 753 }, { "epoch": 0.7027027027027027, "grad_norm": 1.4478227700867285, "learning_rate": 1.7017604418363825e-05, "loss": 2.3739, "step": 754 }, { "epoch": 0.7027027027027027, "loss_reasoning": 0.5590207576751709, "loss_utility": 2.2359795570373535, "step": 754 }, { "epoch": 0.7036346691519105, "grad_norm": 1.2934561626212946, "learning_rate": 1.7010700724887818e-05, "loss": 2.3259, "step": 755 }, { "epoch": 0.7036346691519105, "loss_reasoning": 0.511342465877533, "loss_utility": 1.5531723499298096, "step": 755 }, { "epoch": 0.7045666356011183, "grad_norm": 1.4915851038364014, "learning_rate": 1.7003797031411807e-05, "loss": 1.959, "step": 756 }, { "epoch": 0.7045666356011183, "loss_reasoning": 0.5138049721717834, "loss_utility": 1.3014750480651855, "step": 756 }, { "epoch": 0.7054986020503262, "grad_norm": 1.6075556111977285, "learning_rate": 1.69968933379358e-05, "loss": 1.9315, "step": 757 }, { "epoch": 0.7054986020503262, "loss_reasoning": 0.6119765639305115, "loss_utility": 2.1411938667297363, "step": 757 }, { "epoch": 0.706430568499534, "grad_norm": 1.7051121285046449, "learning_rate": 1.6989989644459788e-05, "loss": 2.3442, "step": 758 }, { "epoch": 0.706430568499534, "loss_reasoning": 0.5895401239395142, "loss_utility": 1.5086740255355835, "step": 758 }, { "epoch": 0.7073625349487418, "grad_norm": 1.4081982790095307, "learning_rate": 1.6983085950983777e-05, "loss": 2.2762, "step": 759 }, { "epoch": 0.7073625349487418, "loss_reasoning": 0.5394715070724487, "loss_utility": 1.543573021888733, "step": 759 }, { "epoch": 0.7082945013979497, "grad_norm": 1.4407712831412656, "learning_rate": 1.6976182257507766e-05, "loss": 2.041, "step": 760 }, { "epoch": 0.7082945013979497, "loss_reasoning": 0.4558432996273041, "loss_utility": 1.9022271633148193, "step": 760 }, { "epoch": 0.7092264678471575, "grad_norm": 1.54874126949089, "learning_rate": 1.696927856403176e-05, "loss": 2.0167, "step": 761 }, { "epoch": 0.7092264678471575, "loss_reasoning": 0.5707101821899414, "loss_utility": 3.923436164855957, "step": 761 }, { "epoch": 0.7101584342963654, "grad_norm": 1.4636786655837422, "learning_rate": 1.6962374870555748e-05, "loss": 2.8641, "step": 762 }, { "epoch": 0.7101584342963654, "loss_reasoning": 0.4710613191127777, "loss_utility": 1.9860403537750244, "step": 762 }, { "epoch": 0.7110904007455732, "grad_norm": 1.477882211393594, "learning_rate": 1.695547117707974e-05, "loss": 2.3228, "step": 763 }, { "epoch": 0.7110904007455732, "loss_reasoning": 0.4992312788963318, "loss_utility": 2.1776421070098877, "step": 763 }, { "epoch": 0.712022367194781, "grad_norm": 1.490875180858165, "learning_rate": 1.694856748360373e-05, "loss": 2.2748, "step": 764 }, { "epoch": 0.712022367194781, "loss_reasoning": 0.5168987512588501, "loss_utility": 2.1268014907836914, "step": 764 }, { "epoch": 0.7129543336439889, "grad_norm": 1.2607165422236348, "learning_rate": 1.694166379012772e-05, "loss": 2.0944, "step": 765 }, { "epoch": 0.7129543336439889, "loss_reasoning": 0.6085758805274963, "loss_utility": 1.6568899154663086, "step": 765 }, { "epoch": 0.7138863000931966, "grad_norm": 1.754084423608795, "learning_rate": 1.693476009665171e-05, "loss": 2.3099, "step": 766 }, { "epoch": 0.7138863000931966, "loss_reasoning": 0.4310743510723114, "loss_utility": 1.7802683115005493, "step": 766 }, { "epoch": 0.7148182665424044, "grad_norm": 1.5228395703250104, "learning_rate": 1.69278564031757e-05, "loss": 2.1161, "step": 767 }, { "epoch": 0.7148182665424044, "loss_reasoning": 0.5083369016647339, "loss_utility": 1.5672941207885742, "step": 767 }, { "epoch": 0.7157502329916123, "grad_norm": 1.2527418952987173, "learning_rate": 1.6920952709699692e-05, "loss": 2.1968, "step": 768 }, { "epoch": 0.7157502329916123, "loss_reasoning": 0.535535454750061, "loss_utility": 1.3929413557052612, "step": 768 }, { "epoch": 0.7166821994408201, "grad_norm": 1.172045259306878, "learning_rate": 1.691404901622368e-05, "loss": 2.0774, "step": 769 }, { "epoch": 0.7166821994408201, "loss_reasoning": 0.4949953556060791, "loss_utility": 1.45429265499115, "step": 769 }, { "epoch": 0.7176141658900279, "grad_norm": 1.6064471345611435, "learning_rate": 1.690714532274767e-05, "loss": 1.9905, "step": 770 }, { "epoch": 0.7176141658900279, "loss_reasoning": 0.539626955986023, "loss_utility": 1.633547067642212, "step": 770 }, { "epoch": 0.7185461323392358, "grad_norm": 1.79931108271413, "learning_rate": 1.690024162927166e-05, "loss": 2.0501, "step": 771 }, { "epoch": 0.7185461323392358, "loss_reasoning": 0.5832306742668152, "loss_utility": 1.8654696941375732, "step": 771 }, { "epoch": 0.7194780987884436, "grad_norm": 1.4197415825519548, "learning_rate": 1.689333793579565e-05, "loss": 2.2546, "step": 772 }, { "epoch": 0.7194780987884436, "loss_reasoning": 0.4526856541633606, "loss_utility": 1.6349592208862305, "step": 772 }, { "epoch": 0.7204100652376515, "grad_norm": 1.4447301250378581, "learning_rate": 1.6886434242319644e-05, "loss": 2.0779, "step": 773 }, { "epoch": 0.7204100652376515, "loss_reasoning": 0.5261632204055786, "loss_utility": 1.194340467453003, "step": 773 }, { "epoch": 0.7213420316868593, "grad_norm": 1.2571392368689096, "learning_rate": 1.6879530548843633e-05, "loss": 2.0556, "step": 774 }, { "epoch": 0.7213420316868593, "loss_reasoning": 0.47307485342025757, "loss_utility": 2.3102669715881348, "step": 774 }, { "epoch": 0.7222739981360671, "grad_norm": 1.440114761263482, "learning_rate": 1.6872626855367622e-05, "loss": 2.5214, "step": 775 }, { "epoch": 0.7222739981360671, "loss_reasoning": 0.574672520160675, "loss_utility": 1.5337715148925781, "step": 775 }, { "epoch": 0.723205964585275, "grad_norm": 1.4533485269222626, "learning_rate": 1.6865723161891615e-05, "loss": 2.1274, "step": 776 }, { "epoch": 0.723205964585275, "loss_reasoning": 0.5601837038993835, "loss_utility": 0.7653975486755371, "step": 776 }, { "epoch": 0.7241379310344828, "grad_norm": 1.3828433486351683, "learning_rate": 1.6858819468415604e-05, "loss": 1.8375, "step": 777 }, { "epoch": 0.7241379310344828, "loss_reasoning": 0.5240569114685059, "loss_utility": 1.0412838459014893, "step": 777 }, { "epoch": 0.7250698974836906, "grad_norm": 1.6803920463708115, "learning_rate": 1.6851915774939593e-05, "loss": 1.9378, "step": 778 }, { "epoch": 0.7250698974836906, "loss_reasoning": 0.5029186606407166, "loss_utility": 1.8240931034088135, "step": 778 }, { "epoch": 0.7260018639328985, "grad_norm": 2.3653291175410374, "learning_rate": 1.6845012081463585e-05, "loss": 2.3546, "step": 779 }, { "epoch": 0.7260018639328985, "loss_reasoning": 0.49750930070877075, "loss_utility": 1.2657074928283691, "step": 779 }, { "epoch": 0.7269338303821062, "grad_norm": 1.4654291651632514, "learning_rate": 1.6838108387987574e-05, "loss": 1.8203, "step": 780 }, { "epoch": 0.7269338303821062, "loss_reasoning": 0.5336124897003174, "loss_utility": 1.9306926727294922, "step": 780 }, { "epoch": 0.727865796831314, "grad_norm": 1.350124666305024, "learning_rate": 1.6831204694511563e-05, "loss": 2.2872, "step": 781 }, { "epoch": 0.727865796831314, "loss_reasoning": 0.5519636273384094, "loss_utility": 1.531419277191162, "step": 781 }, { "epoch": 0.7287977632805219, "grad_norm": 1.3098421372908111, "learning_rate": 1.6824301001035556e-05, "loss": 2.1414, "step": 782 }, { "epoch": 0.7287977632805219, "loss_reasoning": 0.5368504524230957, "loss_utility": 1.279475450515747, "step": 782 }, { "epoch": 0.7297297297297297, "grad_norm": 1.2575070213910795, "learning_rate": 1.6817397307559548e-05, "loss": 1.7674, "step": 783 }, { "epoch": 0.7297297297297297, "loss_reasoning": 0.6104555726051331, "loss_utility": 1.5382810831069946, "step": 783 }, { "epoch": 0.7306616961789375, "grad_norm": 1.8075479967452668, "learning_rate": 1.6810493614083537e-05, "loss": 2.0174, "step": 784 }, { "epoch": 0.7306616961789375, "loss_reasoning": 0.5085102319717407, "loss_utility": 2.4533207416534424, "step": 784 }, { "epoch": 0.7315936626281454, "grad_norm": 2.1328317184854653, "learning_rate": 1.6803589920607526e-05, "loss": 2.576, "step": 785 }, { "epoch": 0.7315936626281454, "loss_reasoning": 0.5446473360061646, "loss_utility": 1.9445085525512695, "step": 785 }, { "epoch": 0.7325256290773532, "grad_norm": 1.8351530795162265, "learning_rate": 1.6796686227131515e-05, "loss": 2.4843, "step": 786 }, { "epoch": 0.7325256290773532, "loss_reasoning": 0.49175649881362915, "loss_utility": 0.6117463111877441, "step": 786 }, { "epoch": 0.7334575955265611, "grad_norm": 1.6164488264582493, "learning_rate": 1.6789782533655508e-05, "loss": 2.0945, "step": 787 }, { "epoch": 0.7334575955265611, "loss_reasoning": 0.5180258750915527, "loss_utility": 1.702756404876709, "step": 787 }, { "epoch": 0.7343895619757689, "grad_norm": 1.9474628159520262, "learning_rate": 1.6782878840179497e-05, "loss": 2.3274, "step": 788 }, { "epoch": 0.7343895619757689, "loss_reasoning": 0.4930136799812317, "loss_utility": 0.8876333236694336, "step": 788 }, { "epoch": 0.7353215284249767, "grad_norm": 2.1427274405356673, "learning_rate": 1.6775975146703486e-05, "loss": 2.1641, "step": 789 }, { "epoch": 0.7353215284249767, "loss_reasoning": 0.5599953532218933, "loss_utility": 1.1163721084594727, "step": 789 }, { "epoch": 0.7362534948741846, "grad_norm": 1.5554972138278609, "learning_rate": 1.6769071453227478e-05, "loss": 2.3853, "step": 790 }, { "epoch": 0.7362534948741846, "loss_reasoning": 0.5625511407852173, "loss_utility": 1.5368849039077759, "step": 790 }, { "epoch": 0.7371854613233924, "grad_norm": 1.2887270406343039, "learning_rate": 1.6762167759751467e-05, "loss": 1.9103, "step": 791 }, { "epoch": 0.7371854613233924, "loss_reasoning": 0.5582486987113953, "loss_utility": 1.3467453718185425, "step": 791 }, { "epoch": 0.7381174277726001, "grad_norm": 1.3692432048352412, "learning_rate": 1.675526406627546e-05, "loss": 2.0234, "step": 792 }, { "epoch": 0.7381174277726001, "loss_reasoning": 0.5057199001312256, "loss_utility": 1.685075283050537, "step": 792 }, { "epoch": 0.739049394221808, "grad_norm": 1.5852098844845102, "learning_rate": 1.674836037279945e-05, "loss": 2.2739, "step": 793 }, { "epoch": 0.739049394221808, "loss_reasoning": 0.5240524411201477, "loss_utility": 1.6589196920394897, "step": 793 }, { "epoch": 0.7399813606710158, "grad_norm": 1.485830426299809, "learning_rate": 1.674145667932344e-05, "loss": 2.0098, "step": 794 }, { "epoch": 0.7399813606710158, "loss_reasoning": 0.507856547832489, "loss_utility": 1.9339098930358887, "step": 794 }, { "epoch": 0.7409133271202236, "grad_norm": 1.4669008478678123, "learning_rate": 1.673455298584743e-05, "loss": 2.1559, "step": 795 }, { "epoch": 0.7409133271202236, "loss_reasoning": 0.49362632632255554, "loss_utility": 1.2919553518295288, "step": 795 }, { "epoch": 0.7418452935694315, "grad_norm": 1.2471042435873354, "learning_rate": 1.672764929237142e-05, "loss": 1.7999, "step": 796 }, { "epoch": 0.7418452935694315, "loss_reasoning": 0.5975486636161804, "loss_utility": 2.1459014415740967, "step": 796 }, { "epoch": 0.7427772600186393, "grad_norm": 1.4676816488202316, "learning_rate": 1.672074559889541e-05, "loss": 2.4134, "step": 797 }, { "epoch": 0.7427772600186393, "loss_reasoning": 0.5125666856765747, "loss_utility": 1.8077143430709839, "step": 797 }, { "epoch": 0.7437092264678472, "grad_norm": 1.3552156851747765, "learning_rate": 1.67138419054194e-05, "loss": 2.0837, "step": 798 }, { "epoch": 0.7437092264678472, "loss_reasoning": 0.5689746737480164, "loss_utility": 1.294567584991455, "step": 798 }, { "epoch": 0.744641192917055, "grad_norm": 2.3447612524436843, "learning_rate": 1.670693821194339e-05, "loss": 2.1394, "step": 799 }, { "epoch": 0.744641192917055, "loss_reasoning": 0.5035766959190369, "loss_utility": 1.8951678276062012, "step": 799 }, { "epoch": 0.7455731593662628, "grad_norm": 1.75613299886221, "learning_rate": 1.6700034518467382e-05, "loss": 2.0817, "step": 800 }, { "epoch": 0.7455731593662628, "loss_reasoning": 0.49943113327026367, "loss_utility": 1.2900440692901611, "step": 800 }, { "epoch": 0.7465051258154707, "grad_norm": 1.4532473437430729, "learning_rate": 1.669313082499137e-05, "loss": 2.0774, "step": 801 }, { "epoch": 0.7465051258154707, "loss_reasoning": 0.4981233775615692, "loss_utility": 1.6001272201538086, "step": 801 }, { "epoch": 0.7474370922646785, "grad_norm": 1.2910330486310548, "learning_rate": 1.6686227131515363e-05, "loss": 2.0534, "step": 802 }, { "epoch": 0.7474370922646785, "loss_reasoning": 0.5346336364746094, "loss_utility": 1.469944715499878, "step": 802 }, { "epoch": 0.7483690587138863, "grad_norm": 1.3974958180750805, "learning_rate": 1.6679323438039353e-05, "loss": 1.9128, "step": 803 }, { "epoch": 0.7483690587138863, "loss_reasoning": 0.47561395168304443, "loss_utility": 1.7549853324890137, "step": 803 }, { "epoch": 0.7493010251630942, "grad_norm": 1.5360111603482194, "learning_rate": 1.667241974456334e-05, "loss": 1.9334, "step": 804 }, { "epoch": 0.7493010251630942, "loss_reasoning": 0.5320535898208618, "loss_utility": 1.7923022508621216, "step": 804 }, { "epoch": 0.750232991612302, "grad_norm": 1.2737619372884041, "learning_rate": 1.6665516051087334e-05, "loss": 2.0889, "step": 805 }, { "epoch": 0.750232991612302, "loss_reasoning": 0.5331442356109619, "loss_utility": 1.6260703802108765, "step": 805 }, { "epoch": 0.7511649580615097, "grad_norm": 1.9175320171957049, "learning_rate": 1.6658612357611323e-05, "loss": 2.0733, "step": 806 }, { "epoch": 0.7511649580615097, "loss_reasoning": 0.49395614862442017, "loss_utility": 1.160315752029419, "step": 806 }, { "epoch": 0.7520969245107176, "grad_norm": 1.5875640133897233, "learning_rate": 1.6651708664135312e-05, "loss": 2.419, "step": 807 }, { "epoch": 0.7520969245107176, "loss_reasoning": 0.531780481338501, "loss_utility": 1.6064685583114624, "step": 807 }, { "epoch": 0.7530288909599254, "grad_norm": 1.4262282002008986, "learning_rate": 1.6644804970659305e-05, "loss": 2.5016, "step": 808 }, { "epoch": 0.7530288909599254, "loss_reasoning": 0.5582424402236938, "loss_utility": 1.6956207752227783, "step": 808 }, { "epoch": 0.7539608574091333, "grad_norm": 1.4182701841624086, "learning_rate": 1.6637901277183294e-05, "loss": 2.3352, "step": 809 }, { "epoch": 0.7539608574091333, "loss_reasoning": 0.5590550899505615, "loss_utility": 1.5453276634216309, "step": 809 }, { "epoch": 0.7548928238583411, "grad_norm": 1.3081243922429053, "learning_rate": 1.6630997583707286e-05, "loss": 2.0096, "step": 810 }, { "epoch": 0.7548928238583411, "loss_reasoning": 0.5182387232780457, "loss_utility": 2.2428150177001953, "step": 810 }, { "epoch": 0.7558247903075489, "grad_norm": 1.332031821903687, "learning_rate": 1.6624093890231275e-05, "loss": 2.174, "step": 811 }, { "epoch": 0.7558247903075489, "loss_reasoning": 0.5177538394927979, "loss_utility": 1.0976561307907104, "step": 811 }, { "epoch": 0.7567567567567568, "grad_norm": 1.708145122088416, "learning_rate": 1.6617190196755267e-05, "loss": 2.2607, "step": 812 }, { "epoch": 0.7567567567567568, "loss_reasoning": 0.4512658715248108, "loss_utility": 1.1900687217712402, "step": 812 }, { "epoch": 0.7576887232059646, "grad_norm": 1.4210853472687304, "learning_rate": 1.6610286503279256e-05, "loss": 2.2666, "step": 813 }, { "epoch": 0.7576887232059646, "loss_reasoning": 0.5383164882659912, "loss_utility": 1.6711695194244385, "step": 813 }, { "epoch": 0.7586206896551724, "grad_norm": 1.330275835116874, "learning_rate": 1.6603382809803246e-05, "loss": 2.3242, "step": 814 }, { "epoch": 0.7586206896551724, "loss_reasoning": 0.4988267719745636, "loss_utility": 1.548118233680725, "step": 814 }, { "epoch": 0.7595526561043803, "grad_norm": 1.2376819893130293, "learning_rate": 1.6596479116327235e-05, "loss": 1.8369, "step": 815 }, { "epoch": 0.7595526561043803, "loss_reasoning": 0.5540463924407959, "loss_utility": 1.0081994533538818, "step": 815 }, { "epoch": 0.7604846225535881, "grad_norm": 1.3223366690844578, "learning_rate": 1.6589575422851227e-05, "loss": 1.6633, "step": 816 }, { "epoch": 0.7604846225535881, "loss_reasoning": 0.5371490120887756, "loss_utility": 1.5316131114959717, "step": 816 }, { "epoch": 0.7614165890027959, "grad_norm": 1.466023718976525, "learning_rate": 1.6582671729375216e-05, "loss": 2.2421, "step": 817 }, { "epoch": 0.7614165890027959, "loss_reasoning": 0.5663847923278809, "loss_utility": 1.1841720342636108, "step": 817 }, { "epoch": 0.7623485554520038, "grad_norm": 1.1167156171054702, "learning_rate": 1.657576803589921e-05, "loss": 1.7881, "step": 818 }, { "epoch": 0.7623485554520038, "loss_reasoning": 0.5527375936508179, "loss_utility": 1.2956068515777588, "step": 818 }, { "epoch": 0.7632805219012115, "grad_norm": 1.2881181589913366, "learning_rate": 1.6568864342423197e-05, "loss": 1.9695, "step": 819 }, { "epoch": 0.7632805219012115, "loss_reasoning": 0.5009361505508423, "loss_utility": 1.1158783435821533, "step": 819 }, { "epoch": 0.7642124883504194, "grad_norm": 1.2602703245903315, "learning_rate": 1.656196064894719e-05, "loss": 1.9206, "step": 820 }, { "epoch": 0.7642124883504194, "loss_reasoning": 0.509070873260498, "loss_utility": 1.7501883506774902, "step": 820 }, { "epoch": 0.7651444547996272, "grad_norm": 1.3199241813754858, "learning_rate": 1.655505695547118e-05, "loss": 2.0974, "step": 821 }, { "epoch": 0.7651444547996272, "loss_reasoning": 0.5227916240692139, "loss_utility": 1.8963632583618164, "step": 821 }, { "epoch": 0.766076421248835, "grad_norm": 1.8300262280143105, "learning_rate": 1.6548153261995168e-05, "loss": 2.1214, "step": 822 }, { "epoch": 0.766076421248835, "loss_reasoning": 0.5388779044151306, "loss_utility": 2.26284122467041, "step": 822 }, { "epoch": 0.7670083876980429, "grad_norm": 1.641695516111987, "learning_rate": 1.654124956851916e-05, "loss": 2.289, "step": 823 }, { "epoch": 0.7670083876980429, "loss_reasoning": 0.5437600016593933, "loss_utility": 2.2739968299865723, "step": 823 }, { "epoch": 0.7679403541472507, "grad_norm": 1.5374119633205034, "learning_rate": 1.653434587504315e-05, "loss": 2.5932, "step": 824 }, { "epoch": 0.7679403541472507, "loss_reasoning": 0.560768723487854, "loss_utility": 1.6708989143371582, "step": 824 }, { "epoch": 0.7688723205964585, "grad_norm": 1.9881200302166249, "learning_rate": 1.652744218156714e-05, "loss": 2.3815, "step": 825 }, { "epoch": 0.7688723205964585, "loss_reasoning": 0.5104084014892578, "loss_utility": 1.457000970840454, "step": 825 }, { "epoch": 0.7698042870456664, "grad_norm": 1.1393071943709812, "learning_rate": 1.6520538488091128e-05, "loss": 1.9183, "step": 826 }, { "epoch": 0.7698042870456664, "loss_reasoning": 0.523703932762146, "loss_utility": 1.9368863105773926, "step": 826 }, { "epoch": 0.7707362534948742, "grad_norm": 2.5625454501133422, "learning_rate": 1.651363479461512e-05, "loss": 2.2644, "step": 827 }, { "epoch": 0.7707362534948742, "loss_reasoning": 0.5424239039421082, "loss_utility": 1.5964596271514893, "step": 827 }, { "epoch": 0.771668219944082, "grad_norm": 1.2832093074004327, "learning_rate": 1.6506731101139112e-05, "loss": 2.1849, "step": 828 }, { "epoch": 0.771668219944082, "loss_reasoning": 0.4821566939353943, "loss_utility": 1.3302439451217651, "step": 828 }, { "epoch": 0.7726001863932899, "grad_norm": 2.0081520097999275, "learning_rate": 1.64998274076631e-05, "loss": 1.892, "step": 829 }, { "epoch": 0.7726001863932899, "loss_reasoning": 0.4866079092025757, "loss_utility": 2.1810383796691895, "step": 829 }, { "epoch": 0.7735321528424977, "grad_norm": 1.630202300895293, "learning_rate": 1.6492923714187094e-05, "loss": 2.427, "step": 830 }, { "epoch": 0.7735321528424977, "loss_reasoning": 0.5251706838607788, "loss_utility": 1.3251228332519531, "step": 830 }, { "epoch": 0.7744641192917054, "grad_norm": 1.1967305332228002, "learning_rate": 1.6486020020711083e-05, "loss": 1.9743, "step": 831 }, { "epoch": 0.7744641192917054, "loss_reasoning": 0.4885994791984558, "loss_utility": 1.3567605018615723, "step": 831 }, { "epoch": 0.7753960857409133, "grad_norm": 1.374596838934747, "learning_rate": 1.6479116327235072e-05, "loss": 1.9419, "step": 832 }, { "epoch": 0.7753960857409133, "loss_reasoning": 0.563148021697998, "loss_utility": 1.236220359802246, "step": 832 }, { "epoch": 0.7763280521901211, "grad_norm": 1.2996703263053355, "learning_rate": 1.647221263375906e-05, "loss": 2.2216, "step": 833 }, { "epoch": 0.7763280521901211, "loss_reasoning": 0.5003880262374878, "loss_utility": 1.9634792804718018, "step": 833 }, { "epoch": 0.777260018639329, "grad_norm": 1.1532873721313486, "learning_rate": 1.6465308940283053e-05, "loss": 2.1119, "step": 834 }, { "epoch": 0.777260018639329, "loss_reasoning": 0.5089751482009888, "loss_utility": 1.9251279830932617, "step": 834 }, { "epoch": 0.7781919850885368, "grad_norm": 1.7102679688845208, "learning_rate": 1.6458405246807042e-05, "loss": 2.2074, "step": 835 }, { "epoch": 0.7781919850885368, "loss_reasoning": 0.5684109926223755, "loss_utility": 1.9313561916351318, "step": 835 }, { "epoch": 0.7791239515377446, "grad_norm": 2.1550466220070525, "learning_rate": 1.645150155333103e-05, "loss": 2.1577, "step": 836 }, { "epoch": 0.7791239515377446, "loss_reasoning": 0.5388091802597046, "loss_utility": 2.0639657974243164, "step": 836 }, { "epoch": 0.7800559179869525, "grad_norm": 1.2867587411855825, "learning_rate": 1.6444597859855024e-05, "loss": 2.3259, "step": 837 }, { "epoch": 0.7800559179869525, "loss_reasoning": 0.5737305879592896, "loss_utility": 1.6096562147140503, "step": 837 }, { "epoch": 0.7809878844361603, "grad_norm": 1.6596086360045381, "learning_rate": 1.6437694166379013e-05, "loss": 2.0228, "step": 838 }, { "epoch": 0.7809878844361603, "loss_reasoning": 0.513792097568512, "loss_utility": 1.9478585720062256, "step": 838 }, { "epoch": 0.7819198508853681, "grad_norm": 1.4446300875949625, "learning_rate": 1.6430790472903005e-05, "loss": 2.3652, "step": 839 }, { "epoch": 0.7819198508853681, "loss_reasoning": 0.5540823936462402, "loss_utility": 1.5890510082244873, "step": 839 }, { "epoch": 0.782851817334576, "grad_norm": 1.3904426014159996, "learning_rate": 1.6423886779426994e-05, "loss": 2.2301, "step": 840 }, { "epoch": 0.782851817334576, "loss_reasoning": 0.5745231509208679, "loss_utility": 1.811112642288208, "step": 840 }, { "epoch": 0.7837837837837838, "grad_norm": 1.54431002763114, "learning_rate": 1.6416983085950987e-05, "loss": 2.2129, "step": 841 }, { "epoch": 0.7837837837837838, "loss_reasoning": 0.5706853866577148, "loss_utility": 1.7039787769317627, "step": 841 }, { "epoch": 0.7847157502329916, "grad_norm": 1.3812138347338323, "learning_rate": 1.6410079392474976e-05, "loss": 1.9082, "step": 842 }, { "epoch": 0.7847157502329916, "loss_reasoning": 0.5292524099349976, "loss_utility": 2.192214012145996, "step": 842 }, { "epoch": 0.7856477166821995, "grad_norm": 1.5736647830653938, "learning_rate": 1.6403175698998965e-05, "loss": 2.2319, "step": 843 }, { "epoch": 0.7856477166821995, "loss_reasoning": 0.5767593383789062, "loss_utility": 1.1085052490234375, "step": 843 }, { "epoch": 0.7865796831314072, "grad_norm": 1.3831522671172958, "learning_rate": 1.6396272005522954e-05, "loss": 1.8948, "step": 844 }, { "epoch": 0.7865796831314072, "loss_reasoning": 0.5463489294052124, "loss_utility": 1.6375093460083008, "step": 844 }, { "epoch": 0.7875116495806151, "grad_norm": 1.6927597793625289, "learning_rate": 1.6389368312046946e-05, "loss": 2.1409, "step": 845 }, { "epoch": 0.7875116495806151, "loss_reasoning": 0.533467710018158, "loss_utility": 1.4598898887634277, "step": 845 }, { "epoch": 0.7884436160298229, "grad_norm": 1.5425962042018282, "learning_rate": 1.6382464618570935e-05, "loss": 2.3211, "step": 846 }, { "epoch": 0.7884436160298229, "loss_reasoning": 0.5426647663116455, "loss_utility": 1.0150728225708008, "step": 846 }, { "epoch": 0.7893755824790307, "grad_norm": 1.606264807879328, "learning_rate": 1.6375560925094928e-05, "loss": 1.8399, "step": 847 }, { "epoch": 0.7893755824790307, "loss_reasoning": 0.4692978858947754, "loss_utility": 1.0389074087142944, "step": 847 }, { "epoch": 0.7903075489282386, "grad_norm": 1.2986035522671247, "learning_rate": 1.6368657231618917e-05, "loss": 1.919, "step": 848 }, { "epoch": 0.7903075489282386, "loss_reasoning": 0.553299069404602, "loss_utility": 1.6094367504119873, "step": 848 }, { "epoch": 0.7912395153774464, "grad_norm": 1.324032502259088, "learning_rate": 1.636175353814291e-05, "loss": 1.8173, "step": 849 }, { "epoch": 0.7912395153774464, "loss_reasoning": 0.5804060697555542, "loss_utility": 1.3927029371261597, "step": 849 }, { "epoch": 0.7921714818266542, "grad_norm": 1.3150356356038329, "learning_rate": 1.63548498446669e-05, "loss": 2.1223, "step": 850 }, { "epoch": 0.7921714818266542, "loss_reasoning": 0.5151644945144653, "loss_utility": 2.2775652408599854, "step": 850 }, { "epoch": 0.7931034482758621, "grad_norm": 1.2849014652549076, "learning_rate": 1.6347946151190887e-05, "loss": 2.4462, "step": 851 }, { "epoch": 0.7931034482758621, "loss_reasoning": 0.506820797920227, "loss_utility": 1.9941412210464478, "step": 851 }, { "epoch": 0.7940354147250699, "grad_norm": 2.2084350608091423, "learning_rate": 1.634104245771488e-05, "loss": 2.6324, "step": 852 }, { "epoch": 0.7940354147250699, "loss_reasoning": 0.48297277092933655, "loss_utility": 1.923431396484375, "step": 852 }, { "epoch": 0.7949673811742777, "grad_norm": 1.6719090861037815, "learning_rate": 1.633413876423887e-05, "loss": 2.1491, "step": 853 }, { "epoch": 0.7949673811742777, "loss_reasoning": 0.5182048082351685, "loss_utility": 1.6898930072784424, "step": 853 }, { "epoch": 0.7958993476234856, "grad_norm": 1.542201343232136, "learning_rate": 1.6327235070762858e-05, "loss": 2.5259, "step": 854 }, { "epoch": 0.7958993476234856, "loss_reasoning": 0.5256329774856567, "loss_utility": 1.7728452682495117, "step": 854 }, { "epoch": 0.7968313140726934, "grad_norm": 2.188432061860187, "learning_rate": 1.632033137728685e-05, "loss": 2.1713, "step": 855 }, { "epoch": 0.7968313140726934, "loss_reasoning": 0.5805055499076843, "loss_utility": 1.4436753988265991, "step": 855 }, { "epoch": 0.7977632805219013, "grad_norm": 1.6831961554718697, "learning_rate": 1.631342768381084e-05, "loss": 2.4372, "step": 856 }, { "epoch": 0.7977632805219013, "loss_reasoning": 0.5480160117149353, "loss_utility": 1.2869548797607422, "step": 856 }, { "epoch": 0.798695246971109, "grad_norm": 1.0172125787035384, "learning_rate": 1.6306523990334832e-05, "loss": 1.6777, "step": 857 }, { "epoch": 0.798695246971109, "loss_reasoning": 0.4831455647945404, "loss_utility": 0.6215935945510864, "step": 857 }, { "epoch": 0.7996272134203168, "grad_norm": 1.1397593277569, "learning_rate": 1.629962029685882e-05, "loss": 1.8369, "step": 858 }, { "epoch": 0.7996272134203168, "loss_reasoning": 0.6059157848358154, "loss_utility": 2.223623275756836, "step": 858 }, { "epoch": 0.8005591798695247, "grad_norm": 2.0193210798088974, "learning_rate": 1.6292716603382813e-05, "loss": 2.4976, "step": 859 }, { "epoch": 0.8005591798695247, "loss_reasoning": 0.5175247192382812, "loss_utility": 1.8009369373321533, "step": 859 }, { "epoch": 0.8014911463187325, "grad_norm": 1.4701314890400412, "learning_rate": 1.6285812909906802e-05, "loss": 2.0613, "step": 860 }, { "epoch": 0.8014911463187325, "loss_reasoning": 0.47593164443969727, "loss_utility": 1.5196764469146729, "step": 860 }, { "epoch": 0.8024231127679403, "grad_norm": 1.3169886620180342, "learning_rate": 1.627890921643079e-05, "loss": 2.1433, "step": 861 }, { "epoch": 0.8024231127679403, "loss_reasoning": 0.536876916885376, "loss_utility": 1.7788687944412231, "step": 861 }, { "epoch": 0.8033550792171482, "grad_norm": 1.0918514877352852, "learning_rate": 1.627200552295478e-05, "loss": 1.8938, "step": 862 }, { "epoch": 0.8033550792171482, "loss_reasoning": 0.5311778783798218, "loss_utility": 2.116290807723999, "step": 862 }, { "epoch": 0.804287045666356, "grad_norm": 1.3622178888015148, "learning_rate": 1.6265101829478773e-05, "loss": 1.998, "step": 863 }, { "epoch": 0.804287045666356, "loss_reasoning": 0.5159060955047607, "loss_utility": 2.052802085876465, "step": 863 }, { "epoch": 0.8052190121155638, "grad_norm": 1.5779455922617618, "learning_rate": 1.6258198136002762e-05, "loss": 2.1499, "step": 864 }, { "epoch": 0.8052190121155638, "loss_reasoning": 0.5338150858879089, "loss_utility": 1.520643949508667, "step": 864 }, { "epoch": 0.8061509785647717, "grad_norm": 2.439748246091601, "learning_rate": 1.6251294442526754e-05, "loss": 2.2158, "step": 865 }, { "epoch": 0.8061509785647717, "loss_reasoning": 0.5607766509056091, "loss_utility": 1.3239821195602417, "step": 865 }, { "epoch": 0.8070829450139795, "grad_norm": 1.4188514748848848, "learning_rate": 1.6244390749050743e-05, "loss": 2.3339, "step": 866 }, { "epoch": 0.8070829450139795, "loss_reasoning": 0.48567038774490356, "loss_utility": 2.0815250873565674, "step": 866 }, { "epoch": 0.8080149114631874, "grad_norm": 1.2346940921474623, "learning_rate": 1.6237487055574736e-05, "loss": 2.0738, "step": 867 }, { "epoch": 0.8080149114631874, "loss_reasoning": 0.5463866591453552, "loss_utility": 1.5939100980758667, "step": 867 }, { "epoch": 0.8089468779123952, "grad_norm": 1.5215906143126063, "learning_rate": 1.6230583362098725e-05, "loss": 2.3887, "step": 868 }, { "epoch": 0.8089468779123952, "loss_reasoning": 0.6055265665054321, "loss_utility": 1.9628101587295532, "step": 868 }, { "epoch": 0.809878844361603, "grad_norm": 1.7711208353124406, "learning_rate": 1.6223679668622714e-05, "loss": 2.3446, "step": 869 }, { "epoch": 0.809878844361603, "loss_reasoning": 0.5751700401306152, "loss_utility": 1.1780145168304443, "step": 869 }, { "epoch": 0.8108108108108109, "grad_norm": 1.4552020364332596, "learning_rate": 1.6216775975146706e-05, "loss": 2.2437, "step": 870 }, { "epoch": 0.8108108108108109, "loss_reasoning": 0.517564058303833, "loss_utility": 1.4486593008041382, "step": 870 }, { "epoch": 0.8117427772600186, "grad_norm": 1.7020703132370925, "learning_rate": 1.6209872281670695e-05, "loss": 1.9545, "step": 871 }, { "epoch": 0.8117427772600186, "loss_reasoning": 0.5925719738006592, "loss_utility": 1.2962758541107178, "step": 871 }, { "epoch": 0.8126747437092264, "grad_norm": 1.4237871131121527, "learning_rate": 1.6202968588194684e-05, "loss": 2.1324, "step": 872 }, { "epoch": 0.8126747437092264, "loss_reasoning": 0.48392707109451294, "loss_utility": 1.4651659727096558, "step": 872 }, { "epoch": 0.8136067101584343, "grad_norm": 1.733191884270928, "learning_rate": 1.6196064894718673e-05, "loss": 2.2751, "step": 873 }, { "epoch": 0.8136067101584343, "loss_reasoning": 0.5444805026054382, "loss_utility": 1.171238660812378, "step": 873 }, { "epoch": 0.8145386766076421, "grad_norm": 1.244391896480773, "learning_rate": 1.6189161201242666e-05, "loss": 1.8411, "step": 874 }, { "epoch": 0.8145386766076421, "loss_reasoning": 0.49426034092903137, "loss_utility": 2.3768362998962402, "step": 874 }, { "epoch": 0.8154706430568499, "grad_norm": 1.408022810792585, "learning_rate": 1.6182257507766658e-05, "loss": 2.2709, "step": 875 }, { "epoch": 0.8154706430568499, "loss_reasoning": 0.5021131038665771, "loss_utility": 1.6224770545959473, "step": 875 }, { "epoch": 0.8164026095060578, "grad_norm": 1.4035686949483503, "learning_rate": 1.6175353814290647e-05, "loss": 2.1189, "step": 876 }, { "epoch": 0.8164026095060578, "loss_reasoning": 0.49572235345840454, "loss_utility": 1.5403887033462524, "step": 876 }, { "epoch": 0.8173345759552656, "grad_norm": 1.4204235223367738, "learning_rate": 1.6168450120814636e-05, "loss": 1.9177, "step": 877 }, { "epoch": 0.8173345759552656, "loss_reasoning": 0.4787349998950958, "loss_utility": 0.9078153371810913, "step": 877 }, { "epoch": 0.8182665424044734, "grad_norm": 1.542600054211446, "learning_rate": 1.616154642733863e-05, "loss": 2.033, "step": 878 }, { "epoch": 0.8182665424044734, "loss_reasoning": 0.5256956219673157, "loss_utility": 1.6442753076553345, "step": 878 }, { "epoch": 0.8191985088536813, "grad_norm": 1.1624518746834818, "learning_rate": 1.6154642733862618e-05, "loss": 1.9241, "step": 879 }, { "epoch": 0.8191985088536813, "loss_reasoning": 0.5102936029434204, "loss_utility": 1.7446818351745605, "step": 879 }, { "epoch": 0.8201304753028891, "grad_norm": 1.2163998551916908, "learning_rate": 1.6147739040386607e-05, "loss": 1.8527, "step": 880 }, { "epoch": 0.8201304753028891, "loss_reasoning": 0.5693436861038208, "loss_utility": 1.2440168857574463, "step": 880 }, { "epoch": 0.821062441752097, "grad_norm": 1.946580266823862, "learning_rate": 1.61408353469106e-05, "loss": 2.2289, "step": 881 }, { "epoch": 0.821062441752097, "loss_reasoning": 0.4841350317001343, "loss_utility": 2.0604946613311768, "step": 881 }, { "epoch": 0.8219944082013048, "grad_norm": 1.4770550488222967, "learning_rate": 1.6133931653434588e-05, "loss": 2.1889, "step": 882 }, { "epoch": 0.8219944082013048, "loss_reasoning": 0.5355541706085205, "loss_utility": 1.7703101634979248, "step": 882 }, { "epoch": 0.8229263746505125, "grad_norm": 1.255468858284728, "learning_rate": 1.6127027959958577e-05, "loss": 2.0444, "step": 883 }, { "epoch": 0.8229263746505125, "loss_reasoning": 0.5508888959884644, "loss_utility": 1.795300006866455, "step": 883 }, { "epoch": 0.8238583410997204, "grad_norm": 1.3080556229482627, "learning_rate": 1.612012426648257e-05, "loss": 2.0011, "step": 884 }, { "epoch": 0.8238583410997204, "loss_reasoning": 0.4975042939186096, "loss_utility": 1.6393224000930786, "step": 884 }, { "epoch": 0.8247903075489282, "grad_norm": 1.410333382641989, "learning_rate": 1.6113220573006562e-05, "loss": 2.2182, "step": 885 }, { "epoch": 0.8247903075489282, "loss_reasoning": 0.5077247023582458, "loss_utility": 2.0605764389038086, "step": 885 }, { "epoch": 0.825722273998136, "grad_norm": 1.4742523264662155, "learning_rate": 1.610631687953055e-05, "loss": 1.9506, "step": 886 }, { "epoch": 0.825722273998136, "loss_reasoning": 0.5557733178138733, "loss_utility": 1.0756378173828125, "step": 886 }, { "epoch": 0.8266542404473439, "grad_norm": 1.460464181664968, "learning_rate": 1.609941318605454e-05, "loss": 1.9075, "step": 887 }, { "epoch": 0.8266542404473439, "loss_reasoning": 0.5116265416145325, "loss_utility": 1.3602769374847412, "step": 887 }, { "epoch": 0.8275862068965517, "grad_norm": 1.893463382173134, "learning_rate": 1.6092509492578533e-05, "loss": 2.0593, "step": 888 }, { "epoch": 0.8275862068965517, "loss_reasoning": 0.4977084994316101, "loss_utility": 1.6368144750595093, "step": 888 }, { "epoch": 0.8285181733457595, "grad_norm": 1.2845913041824524, "learning_rate": 1.608560579910252e-05, "loss": 2.0175, "step": 889 }, { "epoch": 0.8285181733457595, "loss_reasoning": 0.5196468234062195, "loss_utility": 1.1898287534713745, "step": 889 }, { "epoch": 0.8294501397949674, "grad_norm": 1.5047027874737184, "learning_rate": 1.607870210562651e-05, "loss": 2.1223, "step": 890 }, { "epoch": 0.8294501397949674, "loss_reasoning": 0.5311230421066284, "loss_utility": 1.2052608728408813, "step": 890 }, { "epoch": 0.8303821062441752, "grad_norm": 1.4633747175195748, "learning_rate": 1.60717984121505e-05, "loss": 1.9798, "step": 891 }, { "epoch": 0.8303821062441752, "loss_reasoning": 0.5904989242553711, "loss_utility": 1.3497576713562012, "step": 891 }, { "epoch": 0.8313140726933831, "grad_norm": 1.3217101436203729, "learning_rate": 1.6064894718674492e-05, "loss": 1.7455, "step": 892 }, { "epoch": 0.8313140726933831, "loss_reasoning": 0.5086095333099365, "loss_utility": 1.741557002067566, "step": 892 }, { "epoch": 0.8322460391425909, "grad_norm": 1.1361976599724135, "learning_rate": 1.605799102519848e-05, "loss": 1.9454, "step": 893 }, { "epoch": 0.8322460391425909, "loss_reasoning": 0.5891686677932739, "loss_utility": 1.511380672454834, "step": 893 }, { "epoch": 0.8331780055917987, "grad_norm": 1.2506303949685955, "learning_rate": 1.6051087331722474e-05, "loss": 2.0426, "step": 894 }, { "epoch": 0.8331780055917987, "loss_reasoning": 0.5491173267364502, "loss_utility": 1.586328387260437, "step": 894 }, { "epoch": 0.8341099720410066, "grad_norm": 1.432876060506149, "learning_rate": 1.6044183638246463e-05, "loss": 2.1995, "step": 895 }, { "epoch": 0.8341099720410066, "loss_reasoning": 0.5274358987808228, "loss_utility": 1.5933090448379517, "step": 895 }, { "epoch": 0.8350419384902144, "grad_norm": 1.4559023591738662, "learning_rate": 1.6037279944770455e-05, "loss": 2.4331, "step": 896 }, { "epoch": 0.8350419384902144, "loss_reasoning": 0.5222306847572327, "loss_utility": 1.0731627941131592, "step": 896 }, { "epoch": 0.8359739049394221, "grad_norm": 1.4454945791324654, "learning_rate": 1.6030376251294444e-05, "loss": 1.8135, "step": 897 }, { "epoch": 0.8359739049394221, "loss_reasoning": 0.5664961934089661, "loss_utility": 1.840688705444336, "step": 897 }, { "epoch": 0.83690587138863, "grad_norm": 1.1953482694338364, "learning_rate": 1.6023472557818433e-05, "loss": 1.7299, "step": 898 }, { "epoch": 0.83690587138863, "loss_reasoning": 0.5304886102676392, "loss_utility": 1.5327811241149902, "step": 898 }, { "epoch": 0.8378378378378378, "grad_norm": 1.4022460592463082, "learning_rate": 1.6016568864342426e-05, "loss": 2.289, "step": 899 }, { "epoch": 0.8378378378378378, "loss_reasoning": 0.5364740490913391, "loss_utility": 1.8534138202667236, "step": 899 }, { "epoch": 0.8387698042870456, "grad_norm": 1.6276656023244795, "learning_rate": 1.6009665170866415e-05, "loss": 2.3224, "step": 900 }, { "epoch": 0.8387698042870456, "loss_reasoning": 0.5610030889511108, "loss_utility": 2.2586421966552734, "step": 900 }, { "epoch": 0.8397017707362535, "grad_norm": 1.6091824918503037, "learning_rate": 1.6002761477390404e-05, "loss": 2.2559, "step": 901 }, { "epoch": 0.8397017707362535, "loss_reasoning": 0.48284855484962463, "loss_utility": 2.314743995666504, "step": 901 }, { "epoch": 0.8406337371854613, "grad_norm": 1.3566974139807233, "learning_rate": 1.5995857783914396e-05, "loss": 2.0043, "step": 902 }, { "epoch": 0.8406337371854613, "loss_reasoning": 0.5305267572402954, "loss_utility": 1.1712126731872559, "step": 902 }, { "epoch": 0.8415657036346692, "grad_norm": 1.1947613726459891, "learning_rate": 1.5988954090438385e-05, "loss": 1.787, "step": 903 }, { "epoch": 0.8415657036346692, "loss_reasoning": 0.499784916639328, "loss_utility": 1.6046191453933716, "step": 903 }, { "epoch": 0.842497670083877, "grad_norm": 1.2031217625646322, "learning_rate": 1.5982050396962378e-05, "loss": 2.0924, "step": 904 }, { "epoch": 0.842497670083877, "loss_reasoning": 0.45320284366607666, "loss_utility": 1.1979377269744873, "step": 904 }, { "epoch": 0.8434296365330848, "grad_norm": 1.4026584789407686, "learning_rate": 1.5975146703486367e-05, "loss": 2.0439, "step": 905 }, { "epoch": 0.8434296365330848, "loss_reasoning": 0.6532308459281921, "loss_utility": 1.5099616050720215, "step": 905 }, { "epoch": 0.8443616029822927, "grad_norm": 1.5743436797623345, "learning_rate": 1.5968243010010356e-05, "loss": 2.1623, "step": 906 }, { "epoch": 0.8443616029822927, "loss_reasoning": 0.5662205219268799, "loss_utility": 1.8534523248672485, "step": 906 }, { "epoch": 0.8452935694315005, "grad_norm": 1.5425057484145526, "learning_rate": 1.5961339316534348e-05, "loss": 2.3863, "step": 907 }, { "epoch": 0.8452935694315005, "loss_reasoning": 0.5138424634933472, "loss_utility": 1.547043800354004, "step": 907 }, { "epoch": 0.8462255358807083, "grad_norm": 1.2719233783890562, "learning_rate": 1.5954435623058337e-05, "loss": 2.182, "step": 908 }, { "epoch": 0.8462255358807083, "loss_reasoning": 0.5006508827209473, "loss_utility": 1.2466235160827637, "step": 908 }, { "epoch": 0.8471575023299162, "grad_norm": 1.323209188077514, "learning_rate": 1.5947531929582326e-05, "loss": 1.8284, "step": 909 }, { "epoch": 0.8471575023299162, "loss_reasoning": 0.5121772289276123, "loss_utility": 1.2402783632278442, "step": 909 }, { "epoch": 0.848089468779124, "grad_norm": 3.329197342067003, "learning_rate": 1.594062823610632e-05, "loss": 1.9492, "step": 910 }, { "epoch": 0.848089468779124, "loss_reasoning": 0.4756484925746918, "loss_utility": 1.4222389459609985, "step": 910 }, { "epoch": 0.8490214352283317, "grad_norm": 1.2126939332178412, "learning_rate": 1.5933724542630308e-05, "loss": 2.0944, "step": 911 }, { "epoch": 0.8490214352283317, "loss_reasoning": 0.4985711872577667, "loss_utility": 1.2151075601577759, "step": 911 }, { "epoch": 0.8499534016775396, "grad_norm": 1.2471723475753687, "learning_rate": 1.59268208491543e-05, "loss": 2.169, "step": 912 }, { "epoch": 0.8499534016775396, "loss_reasoning": 0.5687080025672913, "loss_utility": 1.9857735633850098, "step": 912 }, { "epoch": 0.8508853681267474, "grad_norm": 1.2601514248097392, "learning_rate": 1.591991715567829e-05, "loss": 2.1527, "step": 913 }, { "epoch": 0.8508853681267474, "loss_reasoning": 0.5523228645324707, "loss_utility": 1.483223795890808, "step": 913 }, { "epoch": 0.8518173345759553, "grad_norm": 1.4196860530786362, "learning_rate": 1.591301346220228e-05, "loss": 1.9971, "step": 914 }, { "epoch": 0.8518173345759553, "loss_reasoning": 0.46073347330093384, "loss_utility": 1.1089849472045898, "step": 914 }, { "epoch": 0.8527493010251631, "grad_norm": 3.8380607431109226, "learning_rate": 1.590610976872627e-05, "loss": 2.0842, "step": 915 }, { "epoch": 0.8527493010251631, "loss_reasoning": 0.5090175271034241, "loss_utility": 2.0404560565948486, "step": 915 }, { "epoch": 0.8536812674743709, "grad_norm": 1.3332734797160124, "learning_rate": 1.589920607525026e-05, "loss": 2.4514, "step": 916 }, { "epoch": 0.8536812674743709, "loss_reasoning": 0.5752672553062439, "loss_utility": 1.378462791442871, "step": 916 }, { "epoch": 0.8546132339235788, "grad_norm": 1.518900303517593, "learning_rate": 1.589230238177425e-05, "loss": 2.3004, "step": 917 }, { "epoch": 0.8546132339235788, "loss_reasoning": 0.49548402428627014, "loss_utility": 1.1372613906860352, "step": 917 }, { "epoch": 0.8555452003727866, "grad_norm": 1.2152968133504114, "learning_rate": 1.588539868829824e-05, "loss": 1.8643, "step": 918 }, { "epoch": 0.8555452003727866, "loss_reasoning": 0.5311979651451111, "loss_utility": 2.829995632171631, "step": 918 }, { "epoch": 0.8564771668219944, "grad_norm": 1.3602902466379831, "learning_rate": 1.587849499482223e-05, "loss": 2.366, "step": 919 }, { "epoch": 0.8564771668219944, "loss_reasoning": 0.5864536166191101, "loss_utility": 1.6450648307800293, "step": 919 }, { "epoch": 0.8574091332712023, "grad_norm": 1.2628100088492091, "learning_rate": 1.5871591301346222e-05, "loss": 2.0102, "step": 920 }, { "epoch": 0.8574091332712023, "loss_reasoning": 0.4583958387374878, "loss_utility": 1.0464894771575928, "step": 920 }, { "epoch": 0.8583410997204101, "grad_norm": 1.2748328046528055, "learning_rate": 1.586468760787021e-05, "loss": 1.8651, "step": 921 }, { "epoch": 0.8583410997204101, "loss_reasoning": 0.5233837366104126, "loss_utility": 2.148442268371582, "step": 921 }, { "epoch": 0.8592730661696178, "grad_norm": 1.3638227752649401, "learning_rate": 1.5857783914394204e-05, "loss": 2.3456, "step": 922 }, { "epoch": 0.8592730661696178, "loss_reasoning": 0.533403217792511, "loss_utility": 1.2674270868301392, "step": 922 }, { "epoch": 0.8602050326188257, "grad_norm": 1.5334803352177553, "learning_rate": 1.5850880220918193e-05, "loss": 2.0463, "step": 923 }, { "epoch": 0.8602050326188257, "loss_reasoning": 0.507932186126709, "loss_utility": 1.8666372299194336, "step": 923 }, { "epoch": 0.8611369990680335, "grad_norm": 1.1757075189964363, "learning_rate": 1.5843976527442182e-05, "loss": 1.9701, "step": 924 }, { "epoch": 0.8611369990680335, "loss_reasoning": 0.5292632579803467, "loss_utility": 1.4924954175949097, "step": 924 }, { "epoch": 0.8620689655172413, "grad_norm": 1.9311133657833215, "learning_rate": 1.5837072833966174e-05, "loss": 2.1375, "step": 925 }, { "epoch": 0.8620689655172413, "loss_reasoning": 0.48990005254745483, "loss_utility": 1.502722978591919, "step": 925 }, { "epoch": 0.8630009319664492, "grad_norm": 1.2378361725090083, "learning_rate": 1.5830169140490164e-05, "loss": 2.2014, "step": 926 }, { "epoch": 0.8630009319664492, "loss_reasoning": 0.5291731357574463, "loss_utility": 1.2508585453033447, "step": 926 }, { "epoch": 0.863932898415657, "grad_norm": 1.1283625357598044, "learning_rate": 1.5823265447014153e-05, "loss": 1.8764, "step": 927 }, { "epoch": 0.863932898415657, "loss_reasoning": 0.5023887157440186, "loss_utility": 1.7297682762145996, "step": 927 }, { "epoch": 0.8648648648648649, "grad_norm": 1.4553673452121085, "learning_rate": 1.5816361753538145e-05, "loss": 2.3324, "step": 928 }, { "epoch": 0.8648648648648649, "loss_reasoning": 0.5265438556671143, "loss_utility": 1.1669857501983643, "step": 928 }, { "epoch": 0.8657968313140727, "grad_norm": 1.694898244836643, "learning_rate": 1.5809458060062134e-05, "loss": 2.0577, "step": 929 }, { "epoch": 0.8657968313140727, "loss_reasoning": 0.5025845766067505, "loss_utility": 1.27916419506073, "step": 929 }, { "epoch": 0.8667287977632805, "grad_norm": 1.4167520347662834, "learning_rate": 1.5802554366586126e-05, "loss": 1.973, "step": 930 }, { "epoch": 0.8667287977632805, "loss_reasoning": 0.49198034405708313, "loss_utility": 1.5606160163879395, "step": 930 }, { "epoch": 0.8676607642124884, "grad_norm": 1.2785260651583956, "learning_rate": 1.5795650673110115e-05, "loss": 2.3309, "step": 931 }, { "epoch": 0.8676607642124884, "loss_reasoning": 0.5214901566505432, "loss_utility": 2.3085997104644775, "step": 931 }, { "epoch": 0.8685927306616962, "grad_norm": 3.6870512636727537, "learning_rate": 1.5788746979634108e-05, "loss": 2.1943, "step": 932 }, { "epoch": 0.8685927306616962, "loss_reasoning": 0.5702040195465088, "loss_utility": 1.2971543073654175, "step": 932 }, { "epoch": 0.869524697110904, "grad_norm": 1.216511927821655, "learning_rate": 1.5781843286158097e-05, "loss": 1.9191, "step": 933 }, { "epoch": 0.869524697110904, "loss_reasoning": 0.531781017780304, "loss_utility": 1.4258320331573486, "step": 933 }, { "epoch": 0.8704566635601119, "grad_norm": 1.2683085287056726, "learning_rate": 1.5774939592682086e-05, "loss": 2.0776, "step": 934 }, { "epoch": 0.8704566635601119, "loss_reasoning": 0.5050461292266846, "loss_utility": 1.4106371402740479, "step": 934 }, { "epoch": 0.8713886300093197, "grad_norm": 1.141457095392707, "learning_rate": 1.5768035899206075e-05, "loss": 2.0348, "step": 935 }, { "epoch": 0.8713886300093197, "loss_reasoning": 0.5457738637924194, "loss_utility": 2.4457240104675293, "step": 935 }, { "epoch": 0.8723205964585274, "grad_norm": 1.7014136766692132, "learning_rate": 1.5761132205730067e-05, "loss": 2.3012, "step": 936 }, { "epoch": 0.8723205964585274, "loss_reasoning": 0.49163302779197693, "loss_utility": 2.1572248935699463, "step": 936 }, { "epoch": 0.8732525629077353, "grad_norm": 1.4680752441932021, "learning_rate": 1.5754228512254056e-05, "loss": 2.1633, "step": 937 }, { "epoch": 0.8732525629077353, "loss_reasoning": 0.4593898355960846, "loss_utility": 1.664610505104065, "step": 937 }, { "epoch": 0.8741845293569431, "grad_norm": 1.3811296650791998, "learning_rate": 1.5747324818778046e-05, "loss": 1.7982, "step": 938 }, { "epoch": 0.8741845293569431, "loss_reasoning": 0.5589733123779297, "loss_utility": 1.139580488204956, "step": 938 }, { "epoch": 0.875116495806151, "grad_norm": 1.2362203728043801, "learning_rate": 1.5740421125302038e-05, "loss": 2.1105, "step": 939 }, { "epoch": 0.875116495806151, "loss_reasoning": 0.5475045442581177, "loss_utility": 1.8245458602905273, "step": 939 }, { "epoch": 0.8760484622553588, "grad_norm": 1.4213909969572622, "learning_rate": 1.5733517431826027e-05, "loss": 2.279, "step": 940 }, { "epoch": 0.8760484622553588, "loss_reasoning": 0.4649578630924225, "loss_utility": 1.7400364875793457, "step": 940 }, { "epoch": 0.8769804287045666, "grad_norm": 1.6835651431719258, "learning_rate": 1.572661373835002e-05, "loss": 2.2779, "step": 941 }, { "epoch": 0.8769804287045666, "loss_reasoning": 0.4906333088874817, "loss_utility": 1.8198096752166748, "step": 941 }, { "epoch": 0.8779123951537745, "grad_norm": 1.2985765480630156, "learning_rate": 1.571971004487401e-05, "loss": 2.0795, "step": 942 }, { "epoch": 0.8779123951537745, "loss_reasoning": 0.5502206683158875, "loss_utility": 1.6576852798461914, "step": 942 }, { "epoch": 0.8788443616029823, "grad_norm": 1.2294739260036283, "learning_rate": 1.5712806351398e-05, "loss": 2.114, "step": 943 }, { "epoch": 0.8788443616029823, "loss_reasoning": 0.5602366924285889, "loss_utility": 1.820770502090454, "step": 943 }, { "epoch": 0.8797763280521901, "grad_norm": 1.4923984948759539, "learning_rate": 1.570590265792199e-05, "loss": 2.1382, "step": 944 }, { "epoch": 0.8797763280521901, "loss_reasoning": 0.520062267780304, "loss_utility": 0.5103857517242432, "step": 944 }, { "epoch": 0.880708294501398, "grad_norm": 1.3969080447864128, "learning_rate": 1.569899896444598e-05, "loss": 1.7198, "step": 945 }, { "epoch": 0.880708294501398, "loss_reasoning": 0.4829031825065613, "loss_utility": 1.2934558391571045, "step": 945 }, { "epoch": 0.8816402609506058, "grad_norm": 1.5978333150684385, "learning_rate": 1.5692095270969968e-05, "loss": 2.163, "step": 946 }, { "epoch": 0.8816402609506058, "loss_reasoning": 0.5022861957550049, "loss_utility": 1.8786635398864746, "step": 946 }, { "epoch": 0.8825722273998136, "grad_norm": 1.1975340319492238, "learning_rate": 1.568519157749396e-05, "loss": 2.18, "step": 947 }, { "epoch": 0.8825722273998136, "loss_reasoning": 0.5708118677139282, "loss_utility": 1.7221728563308716, "step": 947 }, { "epoch": 0.8835041938490215, "grad_norm": 2.201515588182402, "learning_rate": 1.567828788401795e-05, "loss": 2.3996, "step": 948 }, { "epoch": 0.8835041938490215, "loss_reasoning": 0.5221066474914551, "loss_utility": 1.5884038209915161, "step": 948 }, { "epoch": 0.8844361602982292, "grad_norm": 1.16325438947227, "learning_rate": 1.5671384190541942e-05, "loss": 1.9304, "step": 949 }, { "epoch": 0.8844361602982292, "loss_reasoning": 0.5332800149917603, "loss_utility": 1.6813147068023682, "step": 949 }, { "epoch": 0.8853681267474371, "grad_norm": 1.7488760069036509, "learning_rate": 1.566448049706593e-05, "loss": 2.3108, "step": 950 }, { "epoch": 0.8853681267474371, "loss_reasoning": 0.4805595874786377, "loss_utility": 1.2863693237304688, "step": 950 }, { "epoch": 0.8863000931966449, "grad_norm": 1.4578787614358113, "learning_rate": 1.5657576803589923e-05, "loss": 2.0078, "step": 951 }, { "epoch": 0.8863000931966449, "loss_reasoning": 0.549712061882019, "loss_utility": 2.222212791442871, "step": 951 }, { "epoch": 0.8872320596458527, "grad_norm": 1.725360038672547, "learning_rate": 1.5650673110113912e-05, "loss": 2.2158, "step": 952 }, { "epoch": 0.8872320596458527, "loss_reasoning": 0.6199912428855896, "loss_utility": 1.6194255352020264, "step": 952 }, { "epoch": 0.8881640260950606, "grad_norm": 1.214885072194442, "learning_rate": 1.56437694166379e-05, "loss": 2.0969, "step": 953 }, { "epoch": 0.8881640260950606, "loss_reasoning": 0.5064463019371033, "loss_utility": 1.7941118478775024, "step": 953 }, { "epoch": 0.8890959925442684, "grad_norm": 1.4098469716966568, "learning_rate": 1.5636865723161894e-05, "loss": 2.0569, "step": 954 }, { "epoch": 0.8890959925442684, "loss_reasoning": 0.47959035634994507, "loss_utility": 1.5655405521392822, "step": 954 }, { "epoch": 0.8900279589934762, "grad_norm": 1.3952321465438975, "learning_rate": 1.5629962029685883e-05, "loss": 2.0454, "step": 955 }, { "epoch": 0.8900279589934762, "loss_reasoning": 0.5261926054954529, "loss_utility": 1.1610032320022583, "step": 955 }, { "epoch": 0.8909599254426841, "grad_norm": 1.5746833657072896, "learning_rate": 1.5623058336209872e-05, "loss": 2.0598, "step": 956 }, { "epoch": 0.8909599254426841, "loss_reasoning": 0.5739575028419495, "loss_utility": 2.7056829929351807, "step": 956 }, { "epoch": 0.8918918918918919, "grad_norm": 1.4638851999144364, "learning_rate": 1.5616154642733864e-05, "loss": 2.3001, "step": 957 }, { "epoch": 0.8918918918918919, "loss_reasoning": 0.5592710375785828, "loss_utility": 1.7813575267791748, "step": 957 }, { "epoch": 0.8928238583410997, "grad_norm": 1.3314504306927153, "learning_rate": 1.5609250949257853e-05, "loss": 2.3168, "step": 958 }, { "epoch": 0.8928238583410997, "loss_reasoning": 0.5205883979797363, "loss_utility": 1.4632689952850342, "step": 958 }, { "epoch": 0.8937558247903076, "grad_norm": 1.1536509663048289, "learning_rate": 1.5602347255781846e-05, "loss": 1.695, "step": 959 }, { "epoch": 0.8937558247903076, "loss_reasoning": 0.4418996572494507, "loss_utility": 2.3375186920166016, "step": 959 }, { "epoch": 0.8946877912395154, "grad_norm": 1.2771645353477807, "learning_rate": 1.5595443562305835e-05, "loss": 2.4556, "step": 960 }, { "epoch": 0.8946877912395154, "loss_reasoning": 0.5159875154495239, "loss_utility": 1.7851288318634033, "step": 960 }, { "epoch": 0.8956197576887233, "grad_norm": 1.7767306754159378, "learning_rate": 1.5588539868829827e-05, "loss": 2.292, "step": 961 }, { "epoch": 0.8956197576887233, "loss_reasoning": 0.4949966073036194, "loss_utility": 1.9307210445404053, "step": 961 }, { "epoch": 0.896551724137931, "grad_norm": 1.2705064629537917, "learning_rate": 1.5581636175353816e-05, "loss": 2.1685, "step": 962 }, { "epoch": 0.896551724137931, "loss_reasoning": 0.44701817631721497, "loss_utility": 1.2173092365264893, "step": 962 }, { "epoch": 0.8974836905871388, "grad_norm": 1.302736241986992, "learning_rate": 1.5574732481877805e-05, "loss": 1.9301, "step": 963 }, { "epoch": 0.8974836905871388, "loss_reasoning": 0.4891890287399292, "loss_utility": 1.7018589973449707, "step": 963 }, { "epoch": 0.8984156570363467, "grad_norm": 1.4422746838636165, "learning_rate": 1.5567828788401794e-05, "loss": 1.7105, "step": 964 }, { "epoch": 0.8984156570363467, "loss_reasoning": 0.47981104254722595, "loss_utility": 1.5621631145477295, "step": 964 }, { "epoch": 0.8993476234855545, "grad_norm": 1.4835888942048248, "learning_rate": 1.5560925094925787e-05, "loss": 2.0433, "step": 965 }, { "epoch": 0.8993476234855545, "loss_reasoning": 0.46563252806663513, "loss_utility": 1.7559657096862793, "step": 965 }, { "epoch": 0.9002795899347623, "grad_norm": 1.1635799951077928, "learning_rate": 1.5554021401449776e-05, "loss": 1.803, "step": 966 }, { "epoch": 0.9002795899347623, "loss_reasoning": 0.521418571472168, "loss_utility": 1.6918752193450928, "step": 966 }, { "epoch": 0.9012115563839702, "grad_norm": 1.1347276459875224, "learning_rate": 1.5547117707973768e-05, "loss": 2.135, "step": 967 }, { "epoch": 0.9012115563839702, "loss_reasoning": 0.4703768193721771, "loss_utility": 1.3692357540130615, "step": 967 }, { "epoch": 0.902143522833178, "grad_norm": 1.5987682824455194, "learning_rate": 1.5540214014497757e-05, "loss": 2.0658, "step": 968 }, { "epoch": 0.902143522833178, "loss_reasoning": 0.5277526378631592, "loss_utility": 1.460348129272461, "step": 968 }, { "epoch": 0.9030754892823858, "grad_norm": 1.6350357739746917, "learning_rate": 1.553331032102175e-05, "loss": 2.3035, "step": 969 }, { "epoch": 0.9030754892823858, "loss_reasoning": 0.48165327310562134, "loss_utility": 1.4180446863174438, "step": 969 }, { "epoch": 0.9040074557315937, "grad_norm": 1.5153914786710392, "learning_rate": 1.552640662754574e-05, "loss": 2.2592, "step": 970 }, { "epoch": 0.9040074557315937, "loss_reasoning": 0.535407543182373, "loss_utility": 1.7338714599609375, "step": 970 }, { "epoch": 0.9049394221808015, "grad_norm": 1.893559654943491, "learning_rate": 1.5519502934069728e-05, "loss": 2.067, "step": 971 }, { "epoch": 0.9049394221808015, "loss_reasoning": 0.5583906173706055, "loss_utility": 1.5588220357894897, "step": 971 }, { "epoch": 0.9058713886300093, "grad_norm": 1.5181581019533223, "learning_rate": 1.551259924059372e-05, "loss": 2.1909, "step": 972 }, { "epoch": 0.9058713886300093, "loss_reasoning": 0.529962956905365, "loss_utility": 1.0069763660430908, "step": 972 }, { "epoch": 0.9068033550792172, "grad_norm": 1.4939774834567576, "learning_rate": 1.550569554711771e-05, "loss": 1.8233, "step": 973 }, { "epoch": 0.9068033550792172, "loss_reasoning": 0.5353301167488098, "loss_utility": 1.5338218212127686, "step": 973 }, { "epoch": 0.907735321528425, "grad_norm": 1.6153179543139808, "learning_rate": 1.54987918536417e-05, "loss": 1.9518, "step": 974 }, { "epoch": 0.907735321528425, "loss_reasoning": 0.5122888088226318, "loss_utility": 1.7368853092193604, "step": 974 }, { "epoch": 0.9086672879776329, "grad_norm": 1.7879993939122063, "learning_rate": 1.5491888160165687e-05, "loss": 2.3257, "step": 975 }, { "epoch": 0.9086672879776329, "loss_reasoning": 0.531907856464386, "loss_utility": 1.3257954120635986, "step": 975 }, { "epoch": 0.9095992544268406, "grad_norm": 1.3613367486495476, "learning_rate": 1.548498446668968e-05, "loss": 2.2371, "step": 976 }, { "epoch": 0.9095992544268406, "loss_reasoning": 0.5073180198669434, "loss_utility": 1.582810878753662, "step": 976 }, { "epoch": 0.9105312208760484, "grad_norm": 1.101804060862201, "learning_rate": 1.5478080773213672e-05, "loss": 2.0381, "step": 977 }, { "epoch": 0.9105312208760484, "loss_reasoning": 0.5472224950790405, "loss_utility": 0.9757028222084045, "step": 977 }, { "epoch": 0.9114631873252563, "grad_norm": 1.6611828479064723, "learning_rate": 1.547117707973766e-05, "loss": 1.5001, "step": 978 }, { "epoch": 0.9114631873252563, "loss_reasoning": 0.6046829223632812, "loss_utility": 1.3979616165161133, "step": 978 }, { "epoch": 0.9123951537744641, "grad_norm": 1.457272796484111, "learning_rate": 1.5464273386261654e-05, "loss": 1.8257, "step": 979 }, { "epoch": 0.9123951537744641, "loss_reasoning": 0.5441213846206665, "loss_utility": 1.727672815322876, "step": 979 }, { "epoch": 0.9133271202236719, "grad_norm": 1.4499506901935428, "learning_rate": 1.5457369692785643e-05, "loss": 2.0376, "step": 980 }, { "epoch": 0.9133271202236719, "loss_reasoning": 0.5201684236526489, "loss_utility": 1.8654472827911377, "step": 980 }, { "epoch": 0.9142590866728798, "grad_norm": 1.422916214569628, "learning_rate": 1.5450465999309632e-05, "loss": 2.1905, "step": 981 }, { "epoch": 0.9142590866728798, "loss_reasoning": 0.4595987796783447, "loss_utility": 1.7639985084533691, "step": 981 }, { "epoch": 0.9151910531220876, "grad_norm": 1.275159812468917, "learning_rate": 1.544356230583362e-05, "loss": 2.3245, "step": 982 }, { "epoch": 0.9151910531220876, "loss_reasoning": 0.5423862934112549, "loss_utility": 1.694422721862793, "step": 982 }, { "epoch": 0.9161230195712954, "grad_norm": 1.7546518209673532, "learning_rate": 1.5436658612357613e-05, "loss": 2.0426, "step": 983 }, { "epoch": 0.9161230195712954, "loss_reasoning": 0.528507649898529, "loss_utility": 1.813413143157959, "step": 983 }, { "epoch": 0.9170549860205033, "grad_norm": 1.2640203432713426, "learning_rate": 1.5429754918881602e-05, "loss": 2.0215, "step": 984 }, { "epoch": 0.9170549860205033, "loss_reasoning": 0.6597809791564941, "loss_utility": 1.4976539611816406, "step": 984 }, { "epoch": 0.9179869524697111, "grad_norm": 1.489627589119726, "learning_rate": 1.542285122540559e-05, "loss": 2.0709, "step": 985 }, { "epoch": 0.9179869524697111, "loss_reasoning": 0.4878653883934021, "loss_utility": 1.8292105197906494, "step": 985 }, { "epoch": 0.918918918918919, "grad_norm": 1.7409327491357338, "learning_rate": 1.5415947531929584e-05, "loss": 2.4963, "step": 986 }, { "epoch": 0.918918918918919, "loss_reasoning": 0.5311672687530518, "loss_utility": 0.69528728723526, "step": 986 }, { "epoch": 0.9198508853681268, "grad_norm": 1.2912802159989447, "learning_rate": 1.5409043838453576e-05, "loss": 1.8301, "step": 987 }, { "epoch": 0.9198508853681268, "loss_reasoning": 0.578489363193512, "loss_utility": 1.933424949645996, "step": 987 }, { "epoch": 0.9207828518173345, "grad_norm": 1.1113145394620276, "learning_rate": 1.5402140144977565e-05, "loss": 2.2292, "step": 988 }, { "epoch": 0.9207828518173345, "loss_reasoning": 0.5701159238815308, "loss_utility": 1.4851605892181396, "step": 988 }, { "epoch": 0.9217148182665424, "grad_norm": 1.522613986955421, "learning_rate": 1.5395236451501554e-05, "loss": 2.127, "step": 989 }, { "epoch": 0.9217148182665424, "loss_reasoning": 0.5388847589492798, "loss_utility": 1.7476056814193726, "step": 989 }, { "epoch": 0.9226467847157502, "grad_norm": 1.2668716997349174, "learning_rate": 1.5388332758025547e-05, "loss": 1.9219, "step": 990 }, { "epoch": 0.9226467847157502, "loss_reasoning": 0.5134075880050659, "loss_utility": 1.8390085697174072, "step": 990 }, { "epoch": 0.923578751164958, "grad_norm": 1.4745195333760897, "learning_rate": 1.5381429064549536e-05, "loss": 2.2903, "step": 991 }, { "epoch": 0.923578751164958, "loss_reasoning": 0.5498957633972168, "loss_utility": 1.468461275100708, "step": 991 }, { "epoch": 0.9245107176141659, "grad_norm": 1.3531182161917161, "learning_rate": 1.5374525371073525e-05, "loss": 2.2402, "step": 992 }, { "epoch": 0.9245107176141659, "loss_reasoning": 0.5679690837860107, "loss_utility": 1.3289686441421509, "step": 992 }, { "epoch": 0.9254426840633737, "grad_norm": 1.9205517149925386, "learning_rate": 1.5367621677597514e-05, "loss": 2.3332, "step": 993 }, { "epoch": 0.9254426840633737, "loss_reasoning": 0.5861232280731201, "loss_utility": 1.2363309860229492, "step": 993 }, { "epoch": 0.9263746505125815, "grad_norm": 1.240159358770797, "learning_rate": 1.5360717984121506e-05, "loss": 2.2201, "step": 994 }, { "epoch": 0.9263746505125815, "loss_reasoning": 0.4888063371181488, "loss_utility": 1.332759141921997, "step": 994 }, { "epoch": 0.9273066169617894, "grad_norm": 1.628978701351122, "learning_rate": 1.5353814290645495e-05, "loss": 2.0768, "step": 995 }, { "epoch": 0.9273066169617894, "loss_reasoning": 0.4654974937438965, "loss_utility": 1.5795226097106934, "step": 995 }, { "epoch": 0.9282385834109972, "grad_norm": 1.4514129244764022, "learning_rate": 1.5346910597169488e-05, "loss": 2.1378, "step": 996 }, { "epoch": 0.9282385834109972, "loss_reasoning": 0.4845423400402069, "loss_utility": 1.7754286527633667, "step": 996 }, { "epoch": 0.9291705498602051, "grad_norm": 1.893202771382873, "learning_rate": 1.5340006903693477e-05, "loss": 2.1551, "step": 997 }, { "epoch": 0.9291705498602051, "loss_reasoning": 0.43443411588668823, "loss_utility": 1.3374934196472168, "step": 997 }, { "epoch": 0.9301025163094129, "grad_norm": 1.6859205227926781, "learning_rate": 1.533310321021747e-05, "loss": 2.1491, "step": 998 }, { "epoch": 0.9301025163094129, "loss_reasoning": 0.5255013108253479, "loss_utility": 1.4380216598510742, "step": 998 }, { "epoch": 0.9310344827586207, "grad_norm": 1.3944322215779643, "learning_rate": 1.5326199516741458e-05, "loss": 1.8571, "step": 999 }, { "epoch": 0.9310344827586207, "loss_reasoning": 0.551726758480072, "loss_utility": 1.4858312606811523, "step": 999 }, { "epoch": 0.9319664492078286, "grad_norm": 1.5220133055978744, "learning_rate": 1.5319295823265447e-05, "loss": 2.3448, "step": 1000 }, { "epoch": 0.9319664492078286, "loss_reasoning": 0.5167378783226013, "loss_utility": 1.7067309617996216, "step": 1000 }, { "epoch": 0.9328984156570364, "grad_norm": 1.3169466205624463, "learning_rate": 1.531239212978944e-05, "loss": 2.0753, "step": 1001 }, { "epoch": 0.9328984156570364, "loss_reasoning": 0.4704015254974365, "loss_utility": 1.5358374118804932, "step": 1001 }, { "epoch": 0.9338303821062441, "grad_norm": 1.370761447478891, "learning_rate": 1.530548843631343e-05, "loss": 2.1336, "step": 1002 }, { "epoch": 0.9338303821062441, "loss_reasoning": 0.5026507377624512, "loss_utility": 2.1336045265197754, "step": 1002 }, { "epoch": 0.934762348555452, "grad_norm": 1.1723116073152806, "learning_rate": 1.5298584742837418e-05, "loss": 1.9742, "step": 1003 }, { "epoch": 0.934762348555452, "loss_reasoning": 0.4981974959373474, "loss_utility": 1.606276273727417, "step": 1003 }, { "epoch": 0.9356943150046598, "grad_norm": 1.342500752256838, "learning_rate": 1.529168104936141e-05, "loss": 2.3854, "step": 1004 }, { "epoch": 0.9356943150046598, "loss_reasoning": 0.5402559041976929, "loss_utility": 1.8464641571044922, "step": 1004 }, { "epoch": 0.9366262814538676, "grad_norm": 1.470421836676492, "learning_rate": 1.52847773558854e-05, "loss": 2.1856, "step": 1005 }, { "epoch": 0.9366262814538676, "loss_reasoning": 0.5090717673301697, "loss_utility": 2.5782294273376465, "step": 1005 }, { "epoch": 0.9375582479030755, "grad_norm": 2.2989440895203455, "learning_rate": 1.527787366240939e-05, "loss": 2.2545, "step": 1006 }, { "epoch": 0.9375582479030755, "loss_reasoning": 0.5669325590133667, "loss_utility": 1.2606182098388672, "step": 1006 }, { "epoch": 0.9384902143522833, "grad_norm": 1.3257024565835875, "learning_rate": 1.527096996893338e-05, "loss": 2.1764, "step": 1007 }, { "epoch": 0.9384902143522833, "loss_reasoning": 0.5530509352684021, "loss_utility": 2.3055906295776367, "step": 1007 }, { "epoch": 0.9394221808014911, "grad_norm": 1.1970400863200426, "learning_rate": 1.526406627545737e-05, "loss": 2.4205, "step": 1008 }, { "epoch": 0.9394221808014911, "loss_reasoning": 0.5285634994506836, "loss_utility": 1.387333869934082, "step": 1008 }, { "epoch": 0.940354147250699, "grad_norm": 1.184861034846389, "learning_rate": 1.5257162581981362e-05, "loss": 1.7653, "step": 1009 }, { "epoch": 0.940354147250699, "loss_reasoning": 0.5638701319694519, "loss_utility": 1.236555576324463, "step": 1009 }, { "epoch": 0.9412861136999068, "grad_norm": 1.0905842807476318, "learning_rate": 1.5250258888505351e-05, "loss": 1.72, "step": 1010 }, { "epoch": 0.9412861136999068, "loss_reasoning": 0.5441794395446777, "loss_utility": 1.086461067199707, "step": 1010 }, { "epoch": 0.9422180801491147, "grad_norm": 1.480565866866674, "learning_rate": 1.5243355195029342e-05, "loss": 1.6984, "step": 1011 }, { "epoch": 0.9422180801491147, "loss_reasoning": 0.47991710901260376, "loss_utility": 2.4816794395446777, "step": 1011 }, { "epoch": 0.9431500465983225, "grad_norm": 1.18344372708133, "learning_rate": 1.5236451501553333e-05, "loss": 2.5999, "step": 1012 }, { "epoch": 0.9431500465983225, "loss_reasoning": 0.4833419919013977, "loss_utility": 1.319270372390747, "step": 1012 }, { "epoch": 0.9440820130475303, "grad_norm": 1.019562626846878, "learning_rate": 1.5229547808077323e-05, "loss": 1.6669, "step": 1013 }, { "epoch": 0.9440820130475303, "loss_reasoning": 0.5562101006507874, "loss_utility": 1.5775001049041748, "step": 1013 }, { "epoch": 0.9450139794967382, "grad_norm": 1.6766362192738804, "learning_rate": 1.5222644114601312e-05, "loss": 2.1508, "step": 1014 }, { "epoch": 0.9450139794967382, "loss_reasoning": 0.50870680809021, "loss_utility": 1.7617665529251099, "step": 1014 }, { "epoch": 0.9459459459459459, "grad_norm": 1.361011897123725, "learning_rate": 1.5215740421125303e-05, "loss": 2.208, "step": 1015 }, { "epoch": 0.9459459459459459, "loss_reasoning": 0.5031023621559143, "loss_utility": 1.6468050479888916, "step": 1015 }, { "epoch": 0.9468779123951537, "grad_norm": 1.4063421444489135, "learning_rate": 1.5208836727649294e-05, "loss": 2.0004, "step": 1016 }, { "epoch": 0.9468779123951537, "loss_reasoning": 0.5593547224998474, "loss_utility": 1.9874576330184937, "step": 1016 }, { "epoch": 0.9478098788443616, "grad_norm": 1.4297697861158316, "learning_rate": 1.5201933034173285e-05, "loss": 2.0103, "step": 1017 }, { "epoch": 0.9478098788443616, "loss_reasoning": 0.5024580955505371, "loss_utility": 1.5873841047286987, "step": 1017 }, { "epoch": 0.9487418452935694, "grad_norm": 1.2942174314828305, "learning_rate": 1.5195029340697274e-05, "loss": 1.9849, "step": 1018 }, { "epoch": 0.9487418452935694, "loss_reasoning": 0.5818562507629395, "loss_utility": 1.6187829971313477, "step": 1018 }, { "epoch": 0.9496738117427772, "grad_norm": 1.5718373366169671, "learning_rate": 1.5188125647221266e-05, "loss": 2.1488, "step": 1019 }, { "epoch": 0.9496738117427772, "loss_reasoning": 0.4587126076221466, "loss_utility": 1.9943506717681885, "step": 1019 }, { "epoch": 0.9506057781919851, "grad_norm": 1.4534211121950102, "learning_rate": 1.5181221953745255e-05, "loss": 2.1164, "step": 1020 }, { "epoch": 0.9506057781919851, "loss_reasoning": 0.47479772567749023, "loss_utility": 1.6563892364501953, "step": 1020 }, { "epoch": 0.9515377446411929, "grad_norm": 1.2531251716139755, "learning_rate": 1.5174318260269246e-05, "loss": 2.1919, "step": 1021 }, { "epoch": 0.9515377446411929, "loss_reasoning": 0.5437541007995605, "loss_utility": 1.5692875385284424, "step": 1021 }, { "epoch": 0.9524697110904008, "grad_norm": 1.4126035401733008, "learning_rate": 1.5167414566793235e-05, "loss": 2.1733, "step": 1022 }, { "epoch": 0.9524697110904008, "loss_reasoning": 0.47527140378952026, "loss_utility": 2.0701422691345215, "step": 1022 }, { "epoch": 0.9534016775396086, "grad_norm": 1.3256716593552689, "learning_rate": 1.5160510873317227e-05, "loss": 2.2119, "step": 1023 }, { "epoch": 0.9534016775396086, "loss_reasoning": 0.5131115913391113, "loss_utility": 1.2601478099822998, "step": 1023 }, { "epoch": 0.9543336439888164, "grad_norm": 1.4857265023718733, "learning_rate": 1.5153607179841216e-05, "loss": 2.1356, "step": 1024 }, { "epoch": 0.9543336439888164, "loss_reasoning": 0.5438641309738159, "loss_utility": 1.709214687347412, "step": 1024 }, { "epoch": 0.9552656104380243, "grad_norm": 1.2619233950107396, "learning_rate": 1.5146703486365207e-05, "loss": 2.2521, "step": 1025 }, { "epoch": 0.9552656104380243, "loss_reasoning": 0.502606987953186, "loss_utility": 1.8127691745758057, "step": 1025 }, { "epoch": 0.9561975768872321, "grad_norm": 1.120932250030687, "learning_rate": 1.5139799792889196e-05, "loss": 2.0794, "step": 1026 }, { "epoch": 0.9561975768872321, "loss_reasoning": 0.49238044023513794, "loss_utility": 1.3764514923095703, "step": 1026 }, { "epoch": 0.9571295433364398, "grad_norm": 1.4508862733087748, "learning_rate": 1.5132896099413189e-05, "loss": 2.1994, "step": 1027 }, { "epoch": 0.9571295433364398, "loss_reasoning": 0.5805052518844604, "loss_utility": 1.5656671524047852, "step": 1027 }, { "epoch": 0.9580615097856477, "grad_norm": 1.3710008535011042, "learning_rate": 1.5125992405937178e-05, "loss": 2.1845, "step": 1028 }, { "epoch": 0.9580615097856477, "loss_reasoning": 0.4963848888874054, "loss_utility": 1.9784703254699707, "step": 1028 }, { "epoch": 0.9589934762348555, "grad_norm": 1.1969016950782052, "learning_rate": 1.5119088712461167e-05, "loss": 1.9525, "step": 1029 }, { "epoch": 0.9589934762348555, "loss_reasoning": 0.48048532009124756, "loss_utility": 1.3593584299087524, "step": 1029 }, { "epoch": 0.9599254426840633, "grad_norm": 1.361584334686818, "learning_rate": 1.5112185018985159e-05, "loss": 2.0357, "step": 1030 }, { "epoch": 0.9599254426840633, "loss_reasoning": 0.5646012425422668, "loss_utility": 1.42836332321167, "step": 1030 }, { "epoch": 0.9608574091332712, "grad_norm": 1.2671346218324058, "learning_rate": 1.510528132550915e-05, "loss": 2.1391, "step": 1031 }, { "epoch": 0.9608574091332712, "loss_reasoning": 0.44749003648757935, "loss_utility": 2.1090426445007324, "step": 1031 }, { "epoch": 0.961789375582479, "grad_norm": 1.3579075602826423, "learning_rate": 1.5098377632033139e-05, "loss": 2.1925, "step": 1032 }, { "epoch": 0.961789375582479, "loss_reasoning": 0.5307883024215698, "loss_utility": 1.503999948501587, "step": 1032 }, { "epoch": 0.9627213420316869, "grad_norm": 1.4639436228990859, "learning_rate": 1.5091473938557128e-05, "loss": 2.0781, "step": 1033 }, { "epoch": 0.9627213420316869, "loss_reasoning": 0.5087119340896606, "loss_utility": 1.6967573165893555, "step": 1033 }, { "epoch": 0.9636533084808947, "grad_norm": 1.8962196865030272, "learning_rate": 1.508457024508112e-05, "loss": 2.0634, "step": 1034 }, { "epoch": 0.9636533084808947, "loss_reasoning": 0.49566423892974854, "loss_utility": 1.4692540168762207, "step": 1034 }, { "epoch": 0.9645852749301025, "grad_norm": 1.5031244250351892, "learning_rate": 1.5077666551605111e-05, "loss": 2.3491, "step": 1035 }, { "epoch": 0.9645852749301025, "loss_reasoning": 0.49241605401039124, "loss_utility": 1.4474248886108398, "step": 1035 }, { "epoch": 0.9655172413793104, "grad_norm": 3.7103538666252867, "learning_rate": 1.50707628581291e-05, "loss": 2.1292, "step": 1036 }, { "epoch": 0.9655172413793104, "loss_reasoning": 0.5041386485099792, "loss_utility": 2.0868847370147705, "step": 1036 }, { "epoch": 0.9664492078285182, "grad_norm": 1.3859177811614096, "learning_rate": 1.5063859164653089e-05, "loss": 1.7461, "step": 1037 }, { "epoch": 0.9664492078285182, "loss_reasoning": 0.4967626929283142, "loss_utility": 1.4193744659423828, "step": 1037 }, { "epoch": 0.967381174277726, "grad_norm": 1.0371014946226271, "learning_rate": 1.5056955471177081e-05, "loss": 1.8411, "step": 1038 }, { "epoch": 0.967381174277726, "loss_reasoning": 0.5823574662208557, "loss_utility": 1.5501658916473389, "step": 1038 }, { "epoch": 0.9683131407269339, "grad_norm": 1.554979109013196, "learning_rate": 1.505005177770107e-05, "loss": 2.1627, "step": 1039 }, { "epoch": 0.9683131407269339, "loss_reasoning": 0.5163768529891968, "loss_utility": 1.6477712392807007, "step": 1039 }, { "epoch": 0.9692451071761417, "grad_norm": 1.8124007255446277, "learning_rate": 1.5043148084225061e-05, "loss": 1.7365, "step": 1040 }, { "epoch": 0.9692451071761417, "loss_reasoning": 0.5478793382644653, "loss_utility": 1.5806034803390503, "step": 1040 }, { "epoch": 0.9701770736253494, "grad_norm": 1.3025965505989217, "learning_rate": 1.5036244390749054e-05, "loss": 1.9724, "step": 1041 }, { "epoch": 0.9701770736253494, "loss_reasoning": 0.4979552626609802, "loss_utility": 2.2081708908081055, "step": 1041 }, { "epoch": 0.9711090400745573, "grad_norm": 1.7937113131023679, "learning_rate": 1.5029340697273043e-05, "loss": 2.2956, "step": 1042 }, { "epoch": 0.9711090400745573, "loss_reasoning": 0.4506627917289734, "loss_utility": 1.758831262588501, "step": 1042 }, { "epoch": 0.9720410065237651, "grad_norm": 1.7716660742245416, "learning_rate": 1.5022437003797032e-05, "loss": 2.2279, "step": 1043 }, { "epoch": 0.9720410065237651, "loss_reasoning": 0.5117201805114746, "loss_utility": 0.97941654920578, "step": 1043 }, { "epoch": 0.972972972972973, "grad_norm": 1.6132370830295084, "learning_rate": 1.5015533310321023e-05, "loss": 1.9527, "step": 1044 }, { "epoch": 0.972972972972973, "loss_reasoning": 0.5330040454864502, "loss_utility": 2.343914031982422, "step": 1044 }, { "epoch": 0.9739049394221808, "grad_norm": 1.2786130225323806, "learning_rate": 1.5008629616845013e-05, "loss": 2.259, "step": 1045 }, { "epoch": 0.9739049394221808, "loss_reasoning": 0.589667558670044, "loss_utility": 1.693710207939148, "step": 1045 }, { "epoch": 0.9748369058713886, "grad_norm": 1.2769818143318974, "learning_rate": 1.5001725923369004e-05, "loss": 2.1921, "step": 1046 }, { "epoch": 0.9748369058713886, "loss_reasoning": 0.5309628248214722, "loss_utility": 1.873200535774231, "step": 1046 }, { "epoch": 0.9757688723205965, "grad_norm": 1.6287250223931884, "learning_rate": 1.4994822229892993e-05, "loss": 2.6829, "step": 1047 }, { "epoch": 0.9757688723205965, "loss_reasoning": 0.4922909140586853, "loss_utility": 1.1792899370193481, "step": 1047 }, { "epoch": 0.9767008387698043, "grad_norm": 1.2177909673425509, "learning_rate": 1.4987918536416984e-05, "loss": 2.0009, "step": 1048 }, { "epoch": 0.9767008387698043, "loss_reasoning": 0.507972240447998, "loss_utility": 1.7957947254180908, "step": 1048 }, { "epoch": 0.9776328052190121, "grad_norm": 1.4587406543673809, "learning_rate": 1.4981014842940974e-05, "loss": 2.395, "step": 1049 }, { "epoch": 0.9776328052190121, "loss_reasoning": 0.5561301708221436, "loss_utility": 1.3527394533157349, "step": 1049 }, { "epoch": 0.97856477166822, "grad_norm": 1.6364376524370552, "learning_rate": 1.4974111149464965e-05, "loss": 2.0805, "step": 1050 }, { "epoch": 0.97856477166822, "loss_reasoning": 0.4921152591705322, "loss_utility": 1.224178433418274, "step": 1050 }, { "epoch": 0.9794967381174278, "grad_norm": 1.6760282911780773, "learning_rate": 1.4967207455988954e-05, "loss": 2.0375, "step": 1051 }, { "epoch": 0.9794967381174278, "loss_reasoning": 0.5293060541152954, "loss_utility": 1.6361531019210815, "step": 1051 }, { "epoch": 0.9804287045666356, "grad_norm": 1.4428210520879072, "learning_rate": 1.4960303762512947e-05, "loss": 1.9162, "step": 1052 }, { "epoch": 0.9804287045666356, "loss_reasoning": 0.4484131932258606, "loss_utility": 1.0671175718307495, "step": 1052 }, { "epoch": 0.9813606710158435, "grad_norm": 1.4462654151624121, "learning_rate": 1.4953400069036936e-05, "loss": 2.2111, "step": 1053 }, { "epoch": 0.9813606710158435, "loss_reasoning": 0.5382421016693115, "loss_utility": 1.186772346496582, "step": 1053 }, { "epoch": 0.9822926374650512, "grad_norm": 1.224795679712603, "learning_rate": 1.4946496375560926e-05, "loss": 2.1522, "step": 1054 }, { "epoch": 0.9822926374650512, "loss_reasoning": 0.504788875579834, "loss_utility": 2.122392177581787, "step": 1054 }, { "epoch": 0.983224603914259, "grad_norm": 1.1393413782896131, "learning_rate": 1.4939592682084915e-05, "loss": 2.2967, "step": 1055 }, { "epoch": 0.983224603914259, "loss_reasoning": 0.5433965921401978, "loss_utility": 1.6946561336517334, "step": 1055 }, { "epoch": 0.9841565703634669, "grad_norm": 1.058750414780614, "learning_rate": 1.4932688988608908e-05, "loss": 1.8584, "step": 1056 }, { "epoch": 0.9841565703634669, "loss_reasoning": 0.5294781923294067, "loss_utility": 1.298291563987732, "step": 1056 }, { "epoch": 0.9850885368126747, "grad_norm": 1.5903340710825764, "learning_rate": 1.4925785295132897e-05, "loss": 2.4555, "step": 1057 }, { "epoch": 0.9850885368126747, "loss_reasoning": 0.5146420001983643, "loss_utility": 1.7695918083190918, "step": 1057 }, { "epoch": 0.9860205032618826, "grad_norm": 1.4992165802219801, "learning_rate": 1.4918881601656888e-05, "loss": 2.097, "step": 1058 }, { "epoch": 0.9860205032618826, "loss_reasoning": 0.5347336530685425, "loss_utility": 1.7495713233947754, "step": 1058 }, { "epoch": 0.9869524697110904, "grad_norm": 1.1342158314796698, "learning_rate": 1.4911977908180878e-05, "loss": 2.4626, "step": 1059 }, { "epoch": 0.9869524697110904, "loss_reasoning": 0.4730284512042999, "loss_utility": 1.9435226917266846, "step": 1059 }, { "epoch": 0.9878844361602982, "grad_norm": 1.598830387242741, "learning_rate": 1.4905074214704869e-05, "loss": 1.972, "step": 1060 }, { "epoch": 0.9878844361602982, "loss_reasoning": 0.5378710031509399, "loss_utility": 2.065016269683838, "step": 1060 }, { "epoch": 0.9888164026095061, "grad_norm": 1.2482902579974164, "learning_rate": 1.4898170521228858e-05, "loss": 2.137, "step": 1061 }, { "epoch": 0.9888164026095061, "loss_reasoning": 0.5620499849319458, "loss_utility": 1.634987711906433, "step": 1061 }, { "epoch": 0.9897483690587139, "grad_norm": 1.681647729397212, "learning_rate": 1.4891266827752849e-05, "loss": 2.0871, "step": 1062 }, { "epoch": 0.9897483690587139, "loss_reasoning": 0.472053587436676, "loss_utility": 1.4301085472106934, "step": 1062 }, { "epoch": 0.9906803355079217, "grad_norm": 1.1622979572945853, "learning_rate": 1.488436313427684e-05, "loss": 1.8171, "step": 1063 }, { "epoch": 0.9906803355079217, "loss_reasoning": 0.5293821692466736, "loss_utility": 1.873328447341919, "step": 1063 }, { "epoch": 0.9916123019571296, "grad_norm": 1.3914392559824311, "learning_rate": 1.487745944080083e-05, "loss": 2.2557, "step": 1064 }, { "epoch": 0.9916123019571296, "loss_reasoning": 0.546889066696167, "loss_utility": 0.9544090628623962, "step": 1064 }, { "epoch": 0.9925442684063374, "grad_norm": 1.0784459675274183, "learning_rate": 1.487055574732482e-05, "loss": 1.649, "step": 1065 }, { "epoch": 0.9925442684063374, "loss_reasoning": 0.5233762264251709, "loss_utility": 1.8223457336425781, "step": 1065 }, { "epoch": 0.9934762348555451, "grad_norm": 1.2822902377092564, "learning_rate": 1.486365205384881e-05, "loss": 2.0853, "step": 1066 }, { "epoch": 0.9934762348555451, "loss_reasoning": 0.4998095631599426, "loss_utility": 1.1466741561889648, "step": 1066 }, { "epoch": 0.994408201304753, "grad_norm": 1.3000759853412795, "learning_rate": 1.4856748360372801e-05, "loss": 1.8338, "step": 1067 }, { "epoch": 0.994408201304753, "loss_reasoning": 0.5776085257530212, "loss_utility": 1.1062679290771484, "step": 1067 }, { "epoch": 0.9953401677539608, "grad_norm": 1.4284854971399206, "learning_rate": 1.4849844666896792e-05, "loss": 2.2313, "step": 1068 }, { "epoch": 0.9953401677539608, "loss_reasoning": 0.4480289816856384, "loss_utility": 2.335908889770508, "step": 1068 }, { "epoch": 0.9962721342031687, "grad_norm": 1.2922997489762515, "learning_rate": 1.484294097342078e-05, "loss": 2.3584, "step": 1069 }, { "epoch": 0.9962721342031687, "loss_reasoning": 0.5145242214202881, "loss_utility": 1.1067856550216675, "step": 1069 }, { "epoch": 0.9972041006523765, "grad_norm": 1.1704908773306981, "learning_rate": 1.4836037279944773e-05, "loss": 1.989, "step": 1070 }, { "epoch": 0.9972041006523765, "loss_reasoning": 0.5046164393424988, "loss_utility": 1.3015542030334473, "step": 1070 }, { "epoch": 0.9981360671015843, "grad_norm": 1.2648289606300096, "learning_rate": 1.4829133586468762e-05, "loss": 1.9222, "step": 1071 }, { "epoch": 0.9981360671015843, "loss_reasoning": 0.504816472530365, "loss_utility": 1.593282699584961, "step": 1071 }, { "epoch": 0.9990680335507922, "grad_norm": 1.4241842238036542, "learning_rate": 1.4822229892992753e-05, "loss": 2.4193, "step": 1072 }, { "epoch": 0.9990680335507922, "loss_reasoning": 0.4716208577156067, "loss_utility": 1.1599985361099243, "step": 1072 }, { "epoch": 1.0, "grad_norm": 1.1289878585801643, "learning_rate": 1.4815326199516742e-05, "loss": 2.0427, "step": 1073 }, { "epoch": 1.0, "loss_reasoning": 0.4602224826812744, "loss_utility": 0.9733425378799438, "step": 1073 }, { "epoch": 1.0009319664492078, "grad_norm": 1.3850945924920472, "learning_rate": 1.4808422506040734e-05, "loss": 1.9502, "step": 1074 }, { "epoch": 1.0009319664492078, "loss_reasoning": 0.4881317913532257, "loss_utility": 2.279876947402954, "step": 1074 }, { "epoch": 1.0018639328984156, "grad_norm": 1.4769015349096344, "learning_rate": 1.4801518812564723e-05, "loss": 2.0784, "step": 1075 }, { "epoch": 1.0018639328984156, "loss_reasoning": 0.4218083322048187, "loss_utility": 1.5011980533599854, "step": 1075 }, { "epoch": 1.0027958993476236, "grad_norm": 1.0727357487480078, "learning_rate": 1.4794615119088714e-05, "loss": 1.6923, "step": 1076 }, { "epoch": 1.0027958993476236, "loss_reasoning": 0.5233516693115234, "loss_utility": 1.1940631866455078, "step": 1076 }, { "epoch": 1.0037278657968314, "grad_norm": 1.230498273629553, "learning_rate": 1.4787711425612703e-05, "loss": 1.7072, "step": 1077 }, { "epoch": 1.0037278657968314, "loss_reasoning": 0.5538877844810486, "loss_utility": 1.1527260541915894, "step": 1077 }, { "epoch": 1.0046598322460392, "grad_norm": 1.0686511496877074, "learning_rate": 1.4780807732136696e-05, "loss": 1.9885, "step": 1078 }, { "epoch": 1.0046598322460392, "loss_reasoning": 0.5880542993545532, "loss_utility": 1.042850136756897, "step": 1078 }, { "epoch": 1.005591798695247, "grad_norm": 1.0883696110527676, "learning_rate": 1.4773904038660685e-05, "loss": 1.6279, "step": 1079 }, { "epoch": 1.005591798695247, "loss_reasoning": 0.5342233777046204, "loss_utility": 1.0850613117218018, "step": 1079 }, { "epoch": 1.0065237651444547, "grad_norm": 1.08345389069272, "learning_rate": 1.4767000345184674e-05, "loss": 1.4943, "step": 1080 }, { "epoch": 1.0065237651444547, "loss_reasoning": 0.49347564578056335, "loss_utility": 1.143836498260498, "step": 1080 }, { "epoch": 1.0074557315936625, "grad_norm": 1.1780526909795035, "learning_rate": 1.4760096651708666e-05, "loss": 2.0294, "step": 1081 }, { "epoch": 1.0074557315936625, "loss_reasoning": 0.4782099425792694, "loss_utility": 0.9479246139526367, "step": 1081 }, { "epoch": 1.0083876980428705, "grad_norm": 1.160208150524845, "learning_rate": 1.4753192958232657e-05, "loss": 1.7409, "step": 1082 }, { "epoch": 1.0083876980428705, "loss_reasoning": 0.5241371393203735, "loss_utility": 1.7114225625991821, "step": 1082 }, { "epoch": 1.0093196644920783, "grad_norm": 1.393650418531998, "learning_rate": 1.4746289264756646e-05, "loss": 1.819, "step": 1083 }, { "epoch": 1.0093196644920783, "loss_reasoning": 0.565302848815918, "loss_utility": 0.8143603801727295, "step": 1083 }, { "epoch": 1.0102516309412861, "grad_norm": 1.2778680177407185, "learning_rate": 1.4739385571280635e-05, "loss": 1.6312, "step": 1084 }, { "epoch": 1.0102516309412861, "loss_reasoning": 0.5502501726150513, "loss_utility": 1.4519741535186768, "step": 1084 }, { "epoch": 1.011183597390494, "grad_norm": 1.0920199758939655, "learning_rate": 1.4732481877804627e-05, "loss": 1.8935, "step": 1085 }, { "epoch": 1.011183597390494, "loss_reasoning": 0.4936979413032532, "loss_utility": 1.4335260391235352, "step": 1085 }, { "epoch": 1.0121155638397017, "grad_norm": 1.126340018958648, "learning_rate": 1.4725578184328618e-05, "loss": 1.7243, "step": 1086 }, { "epoch": 1.0121155638397017, "loss_reasoning": 0.5844534635543823, "loss_utility": 1.1937885284423828, "step": 1086 }, { "epoch": 1.0130475302889097, "grad_norm": 0.9558029735065743, "learning_rate": 1.4718674490852607e-05, "loss": 1.6939, "step": 1087 }, { "epoch": 1.0130475302889097, "loss_reasoning": 0.5141621828079224, "loss_utility": 1.1167497634887695, "step": 1087 }, { "epoch": 1.0139794967381175, "grad_norm": 1.1734217579869963, "learning_rate": 1.4711770797376596e-05, "loss": 1.7164, "step": 1088 }, { "epoch": 1.0139794967381175, "loss_reasoning": 0.581387996673584, "loss_utility": 1.4811675548553467, "step": 1088 }, { "epoch": 1.0149114631873253, "grad_norm": 1.7511145094659868, "learning_rate": 1.4704867103900589e-05, "loss": 1.8948, "step": 1089 }, { "epoch": 1.0149114631873253, "loss_reasoning": 0.5331395864486694, "loss_utility": 1.373321771621704, "step": 1089 }, { "epoch": 1.015843429636533, "grad_norm": 1.3012342041407727, "learning_rate": 1.4697963410424578e-05, "loss": 1.8447, "step": 1090 }, { "epoch": 1.015843429636533, "loss_reasoning": 0.4812648892402649, "loss_utility": 1.1408889293670654, "step": 1090 }, { "epoch": 1.0167753960857409, "grad_norm": 1.0677577819143413, "learning_rate": 1.4691059716948568e-05, "loss": 1.7373, "step": 1091 }, { "epoch": 1.0167753960857409, "loss_reasoning": 0.5312657356262207, "loss_utility": 1.2293741703033447, "step": 1091 }, { "epoch": 1.0177073625349486, "grad_norm": 1.245290769261688, "learning_rate": 1.468415602347256e-05, "loss": 1.9364, "step": 1092 }, { "epoch": 1.0177073625349486, "loss_reasoning": 0.5017889142036438, "loss_utility": 1.8131638765335083, "step": 1092 }, { "epoch": 1.0186393289841567, "grad_norm": 1.165413080500932, "learning_rate": 1.467725232999655e-05, "loss": 2.0749, "step": 1093 }, { "epoch": 1.0186393289841567, "loss_reasoning": 0.4931926131248474, "loss_utility": 1.7481257915496826, "step": 1093 }, { "epoch": 1.0195712954333644, "grad_norm": 1.4972607053918998, "learning_rate": 1.4670348636520539e-05, "loss": 1.8649, "step": 1094 }, { "epoch": 1.0195712954333644, "loss_reasoning": 0.4961288273334503, "loss_utility": 0.8161529302597046, "step": 1094 }, { "epoch": 1.0205032618825722, "grad_norm": 1.1044134706487894, "learning_rate": 1.466344494304453e-05, "loss": 1.5545, "step": 1095 }, { "epoch": 1.0205032618825722, "loss_reasoning": 0.46758562326431274, "loss_utility": 0.8622045516967773, "step": 1095 }, { "epoch": 1.02143522833178, "grad_norm": 1.0442216666973798, "learning_rate": 1.465654124956852e-05, "loss": 1.6541, "step": 1096 }, { "epoch": 1.02143522833178, "loss_reasoning": 0.4797658920288086, "loss_utility": 1.1319706439971924, "step": 1096 }, { "epoch": 1.0223671947809878, "grad_norm": 1.1613827155214498, "learning_rate": 1.4649637556092511e-05, "loss": 1.6837, "step": 1097 }, { "epoch": 1.0223671947809878, "loss_reasoning": 0.5282174348831177, "loss_utility": 1.0001816749572754, "step": 1097 }, { "epoch": 1.0232991612301958, "grad_norm": 1.2614044782542881, "learning_rate": 1.46427338626165e-05, "loss": 1.4986, "step": 1098 }, { "epoch": 1.0232991612301958, "loss_reasoning": 0.4948474168777466, "loss_utility": 1.5711169242858887, "step": 1098 }, { "epoch": 1.0242311276794036, "grad_norm": 1.5388913286189496, "learning_rate": 1.463583016914049e-05, "loss": 2.0292, "step": 1099 }, { "epoch": 1.0242311276794036, "loss_reasoning": 0.47066420316696167, "loss_utility": 0.9798871874809265, "step": 1099 }, { "epoch": 1.0251630941286114, "grad_norm": 1.2813610278026786, "learning_rate": 1.4628926475664481e-05, "loss": 1.7297, "step": 1100 }, { "epoch": 1.0251630941286114, "loss_reasoning": 0.5198423266410828, "loss_utility": 1.1196800470352173, "step": 1100 }, { "epoch": 1.0260950605778192, "grad_norm": 1.1381512182351297, "learning_rate": 1.4622022782188472e-05, "loss": 1.6385, "step": 1101 }, { "epoch": 1.0260950605778192, "loss_reasoning": 0.4914569556713104, "loss_utility": 1.5384654998779297, "step": 1101 }, { "epoch": 1.027027027027027, "grad_norm": 1.507681651027992, "learning_rate": 1.4615119088712461e-05, "loss": 1.9353, "step": 1102 }, { "epoch": 1.027027027027027, "loss_reasoning": 0.5320403575897217, "loss_utility": 0.9767223596572876, "step": 1102 }, { "epoch": 1.0279589934762348, "grad_norm": 1.439569365788276, "learning_rate": 1.4608215395236454e-05, "loss": 1.7209, "step": 1103 }, { "epoch": 1.0279589934762348, "loss_reasoning": 0.5156765580177307, "loss_utility": 1.5870294570922852, "step": 1103 }, { "epoch": 1.0288909599254428, "grad_norm": 1.0418085683087188, "learning_rate": 1.4601311701760443e-05, "loss": 1.8888, "step": 1104 }, { "epoch": 1.0288909599254428, "loss_reasoning": 0.4924525022506714, "loss_utility": 1.963857889175415, "step": 1104 }, { "epoch": 1.0298229263746506, "grad_norm": 1.4226530033972213, "learning_rate": 1.4594408008284433e-05, "loss": 1.8011, "step": 1105 }, { "epoch": 1.0298229263746506, "loss_reasoning": 0.4692358374595642, "loss_utility": 1.149548053741455, "step": 1105 }, { "epoch": 1.0307548928238583, "grad_norm": 1.2214376057908123, "learning_rate": 1.4587504314808423e-05, "loss": 1.6877, "step": 1106 }, { "epoch": 1.0307548928238583, "loss_reasoning": 0.5160843729972839, "loss_utility": 1.7320202589035034, "step": 1106 }, { "epoch": 1.0316868592730661, "grad_norm": 1.3742717160865103, "learning_rate": 1.4580600621332415e-05, "loss": 1.7197, "step": 1107 }, { "epoch": 1.0316868592730661, "loss_reasoning": 0.5298276543617249, "loss_utility": 1.5113067626953125, "step": 1107 }, { "epoch": 1.032618825722274, "grad_norm": 1.2780767043829198, "learning_rate": 1.4573696927856404e-05, "loss": 1.9471, "step": 1108 }, { "epoch": 1.032618825722274, "loss_reasoning": 0.5131372213363647, "loss_utility": 1.1003193855285645, "step": 1108 }, { "epoch": 1.0335507921714817, "grad_norm": 1.322222331788967, "learning_rate": 1.4566793234380395e-05, "loss": 1.6694, "step": 1109 }, { "epoch": 1.0335507921714817, "loss_reasoning": 0.4939265549182892, "loss_utility": 1.3743616342544556, "step": 1109 }, { "epoch": 1.0344827586206897, "grad_norm": 1.2133643118140687, "learning_rate": 1.4559889540904385e-05, "loss": 1.759, "step": 1110 }, { "epoch": 1.0344827586206897, "loss_reasoning": 0.5638916492462158, "loss_utility": 0.6221537590026855, "step": 1110 }, { "epoch": 1.0354147250698975, "grad_norm": 1.3223537094247961, "learning_rate": 1.4552985847428376e-05, "loss": 1.5708, "step": 1111 }, { "epoch": 1.0354147250698975, "loss_reasoning": 0.5062861442565918, "loss_utility": 1.2906852960586548, "step": 1111 }, { "epoch": 1.0363466915191053, "grad_norm": 1.0942033677641991, "learning_rate": 1.4546082153952365e-05, "loss": 1.808, "step": 1112 }, { "epoch": 1.0363466915191053, "loss_reasoning": 0.515778660774231, "loss_utility": 1.8285417556762695, "step": 1112 }, { "epoch": 1.037278657968313, "grad_norm": 1.2429654709405646, "learning_rate": 1.4539178460476356e-05, "loss": 1.7165, "step": 1113 }, { "epoch": 1.037278657968313, "loss_reasoning": 0.4999946653842926, "loss_utility": 1.3317444324493408, "step": 1113 }, { "epoch": 1.0382106244175209, "grad_norm": 1.2713022280559232, "learning_rate": 1.4532274767000347e-05, "loss": 1.9361, "step": 1114 }, { "epoch": 1.0382106244175209, "loss_reasoning": 0.4331441819667816, "loss_utility": 1.8613533973693848, "step": 1114 }, { "epoch": 1.0391425908667289, "grad_norm": 1.3183397146816676, "learning_rate": 1.4525371073524337e-05, "loss": 1.9573, "step": 1115 }, { "epoch": 1.0391425908667289, "loss_reasoning": 0.42605310678482056, "loss_utility": 1.4447530508041382, "step": 1115 }, { "epoch": 1.0400745573159367, "grad_norm": 1.2110913091915245, "learning_rate": 1.4518467380048326e-05, "loss": 1.5965, "step": 1116 }, { "epoch": 1.0400745573159367, "loss_reasoning": 0.5483272075653076, "loss_utility": 1.1990479230880737, "step": 1116 }, { "epoch": 1.0410065237651445, "grad_norm": 1.2669660750347007, "learning_rate": 1.4511563686572317e-05, "loss": 1.8878, "step": 1117 }, { "epoch": 1.0410065237651445, "loss_reasoning": 0.46280229091644287, "loss_utility": 1.4338598251342773, "step": 1117 }, { "epoch": 1.0419384902143523, "grad_norm": 1.4118443409882682, "learning_rate": 1.4504659993096308e-05, "loss": 2.0244, "step": 1118 }, { "epoch": 1.0419384902143523, "loss_reasoning": 0.5391567945480347, "loss_utility": 1.461301326751709, "step": 1118 }, { "epoch": 1.04287045666356, "grad_norm": 1.3040392389020194, "learning_rate": 1.4497756299620299e-05, "loss": 2.2206, "step": 1119 }, { "epoch": 1.04287045666356, "loss_reasoning": 0.4942222237586975, "loss_utility": 1.555415153503418, "step": 1119 }, { "epoch": 1.0438024231127678, "grad_norm": 1.1729023065054796, "learning_rate": 1.4490852606144288e-05, "loss": 1.9619, "step": 1120 }, { "epoch": 1.0438024231127678, "loss_reasoning": 0.5222710371017456, "loss_utility": 1.025019645690918, "step": 1120 }, { "epoch": 1.0447343895619758, "grad_norm": 1.0868936889595644, "learning_rate": 1.448394891266828e-05, "loss": 1.6429, "step": 1121 }, { "epoch": 1.0447343895619758, "loss_reasoning": 0.5256035327911377, "loss_utility": 1.412579894065857, "step": 1121 }, { "epoch": 1.0456663560111836, "grad_norm": 1.1467506834252745, "learning_rate": 1.4477045219192269e-05, "loss": 1.7124, "step": 1122 }, { "epoch": 1.0456663560111836, "loss_reasoning": 0.5043509006500244, "loss_utility": 1.6392431259155273, "step": 1122 }, { "epoch": 1.0465983224603914, "grad_norm": 1.294966845186761, "learning_rate": 1.447014152571626e-05, "loss": 2.0832, "step": 1123 }, { "epoch": 1.0465983224603914, "loss_reasoning": 0.5324890613555908, "loss_utility": 1.8101005554199219, "step": 1123 }, { "epoch": 1.0475302889095992, "grad_norm": 1.1482684074991598, "learning_rate": 1.4463237832240249e-05, "loss": 2.0046, "step": 1124 }, { "epoch": 1.0475302889095992, "loss_reasoning": 0.5157575011253357, "loss_utility": 1.565878987312317, "step": 1124 }, { "epoch": 1.048462255358807, "grad_norm": 1.1996363344158376, "learning_rate": 1.4456334138764241e-05, "loss": 1.909, "step": 1125 }, { "epoch": 1.048462255358807, "loss_reasoning": 0.5361529588699341, "loss_utility": 1.3430463075637817, "step": 1125 }, { "epoch": 1.049394221808015, "grad_norm": 1.2191507371393282, "learning_rate": 1.444943044528823e-05, "loss": 2.0108, "step": 1126 }, { "epoch": 1.049394221808015, "loss_reasoning": 0.5798490047454834, "loss_utility": 0.8369486331939697, "step": 1126 }, { "epoch": 1.0503261882572228, "grad_norm": 1.0531015967401123, "learning_rate": 1.4442526751812221e-05, "loss": 1.632, "step": 1127 }, { "epoch": 1.0503261882572228, "loss_reasoning": 0.4775196313858032, "loss_utility": 1.1511101722717285, "step": 1127 }, { "epoch": 1.0512581547064306, "grad_norm": 1.136511567168095, "learning_rate": 1.443562305833621e-05, "loss": 1.688, "step": 1128 }, { "epoch": 1.0512581547064306, "loss_reasoning": 0.5140495896339417, "loss_utility": 0.6056227684020996, "step": 1128 }, { "epoch": 1.0521901211556384, "grad_norm": 1.2021680654696925, "learning_rate": 1.4428719364860203e-05, "loss": 1.9905, "step": 1129 }, { "epoch": 1.0521901211556384, "loss_reasoning": 0.4703461229801178, "loss_utility": 0.9454290866851807, "step": 1129 }, { "epoch": 1.0531220876048462, "grad_norm": 1.4864512109312467, "learning_rate": 1.4421815671384192e-05, "loss": 1.786, "step": 1130 }, { "epoch": 1.0531220876048462, "loss_reasoning": 0.5332306623458862, "loss_utility": 1.3504927158355713, "step": 1130 }, { "epoch": 1.054054054054054, "grad_norm": 1.5365690056353827, "learning_rate": 1.441491197790818e-05, "loss": 2.1103, "step": 1131 }, { "epoch": 1.054054054054054, "loss_reasoning": 0.5402069687843323, "loss_utility": 0.8719475269317627, "step": 1131 }, { "epoch": 1.054986020503262, "grad_norm": 1.2386829121587435, "learning_rate": 1.4408008284432173e-05, "loss": 1.4449, "step": 1132 }, { "epoch": 1.054986020503262, "loss_reasoning": 0.4981541037559509, "loss_utility": 1.0685017108917236, "step": 1132 }, { "epoch": 1.0559179869524697, "grad_norm": 1.180539811649413, "learning_rate": 1.4401104590956164e-05, "loss": 1.9031, "step": 1133 }, { "epoch": 1.0559179869524697, "loss_reasoning": 0.5086700916290283, "loss_utility": 1.8970845937728882, "step": 1133 }, { "epoch": 1.0568499534016775, "grad_norm": 1.138485764254813, "learning_rate": 1.4394200897480153e-05, "loss": 1.9073, "step": 1134 }, { "epoch": 1.0568499534016775, "loss_reasoning": 0.581527590751648, "loss_utility": 1.314942717552185, "step": 1134 }, { "epoch": 1.0577819198508853, "grad_norm": 1.5772712798122672, "learning_rate": 1.4387297204004142e-05, "loss": 1.8772, "step": 1135 }, { "epoch": 1.0577819198508853, "loss_reasoning": 0.49459388852119446, "loss_utility": 1.53040611743927, "step": 1135 }, { "epoch": 1.058713886300093, "grad_norm": 1.196105265726313, "learning_rate": 1.4380393510528134e-05, "loss": 1.7491, "step": 1136 }, { "epoch": 1.058713886300093, "loss_reasoning": 0.522409975528717, "loss_utility": 1.2986977100372314, "step": 1136 }, { "epoch": 1.0596458527493011, "grad_norm": 1.0589904743377991, "learning_rate": 1.4373489817052125e-05, "loss": 1.9613, "step": 1137 }, { "epoch": 1.0596458527493011, "loss_reasoning": 0.5411720275878906, "loss_utility": 1.3047105073928833, "step": 1137 }, { "epoch": 1.060577819198509, "grad_norm": 1.3038618737956014, "learning_rate": 1.4366586123576114e-05, "loss": 1.8999, "step": 1138 }, { "epoch": 1.060577819198509, "loss_reasoning": 0.4949926733970642, "loss_utility": 0.7736489176750183, "step": 1138 }, { "epoch": 1.0615097856477167, "grad_norm": 1.1279552068673984, "learning_rate": 1.4359682430100103e-05, "loss": 1.7891, "step": 1139 }, { "epoch": 1.0615097856477167, "loss_reasoning": 0.5305289030075073, "loss_utility": 1.5614278316497803, "step": 1139 }, { "epoch": 1.0624417520969245, "grad_norm": 1.6343570640132483, "learning_rate": 1.4352778736624096e-05, "loss": 2.2827, "step": 1140 }, { "epoch": 1.0624417520969245, "loss_reasoning": 0.47552552819252014, "loss_utility": 0.9574073553085327, "step": 1140 }, { "epoch": 1.0633737185461323, "grad_norm": 1.1491064613111686, "learning_rate": 1.4345875043148085e-05, "loss": 1.824, "step": 1141 }, { "epoch": 1.0633737185461323, "loss_reasoning": 0.4567146897315979, "loss_utility": 0.5319051146507263, "step": 1141 }, { "epoch": 1.06430568499534, "grad_norm": 1.410787878992509, "learning_rate": 1.4338971349672075e-05, "loss": 1.3638, "step": 1142 }, { "epoch": 1.06430568499534, "loss_reasoning": 0.5013785362243652, "loss_utility": 1.011849284172058, "step": 1142 }, { "epoch": 1.065237651444548, "grad_norm": 1.053666884334734, "learning_rate": 1.4332067656196068e-05, "loss": 1.7826, "step": 1143 }, { "epoch": 1.065237651444548, "loss_reasoning": 0.470371812582016, "loss_utility": 1.3329582214355469, "step": 1143 }, { "epoch": 1.0661696178937559, "grad_norm": 1.2520500922209508, "learning_rate": 1.4325163962720057e-05, "loss": 1.9129, "step": 1144 }, { "epoch": 1.0661696178937559, "loss_reasoning": 0.5152015089988708, "loss_utility": 1.3725523948669434, "step": 1144 }, { "epoch": 1.0671015843429636, "grad_norm": 1.2560495418918516, "learning_rate": 1.4318260269244046e-05, "loss": 1.7301, "step": 1145 }, { "epoch": 1.0671015843429636, "loss_reasoning": 0.4848071038722992, "loss_utility": 1.5376932621002197, "step": 1145 }, { "epoch": 1.0680335507921714, "grad_norm": 1.2480885938554165, "learning_rate": 1.4311356575768037e-05, "loss": 1.8933, "step": 1146 }, { "epoch": 1.0680335507921714, "loss_reasoning": 0.4839175343513489, "loss_utility": 1.911975383758545, "step": 1146 }, { "epoch": 1.0689655172413792, "grad_norm": 1.163361453857391, "learning_rate": 1.4304452882292027e-05, "loss": 1.9564, "step": 1147 }, { "epoch": 1.0689655172413792, "loss_reasoning": 0.42251962423324585, "loss_utility": 1.139241337776184, "step": 1147 }, { "epoch": 1.0698974836905872, "grad_norm": 1.1002525198927755, "learning_rate": 1.4297549188816018e-05, "loss": 1.7846, "step": 1148 }, { "epoch": 1.0698974836905872, "loss_reasoning": 0.5009964108467102, "loss_utility": 1.5513713359832764, "step": 1148 }, { "epoch": 1.070829450139795, "grad_norm": 1.5834127216778224, "learning_rate": 1.4290645495340007e-05, "loss": 1.9793, "step": 1149 }, { "epoch": 1.070829450139795, "loss_reasoning": 0.5182167291641235, "loss_utility": 1.103623867034912, "step": 1149 }, { "epoch": 1.0717614165890028, "grad_norm": 1.203088437032557, "learning_rate": 1.4283741801864e-05, "loss": 1.7495, "step": 1150 }, { "epoch": 1.0717614165890028, "loss_reasoning": 0.4780954122543335, "loss_utility": 1.6455552577972412, "step": 1150 }, { "epoch": 1.0726933830382106, "grad_norm": 1.5818706742853206, "learning_rate": 1.4276838108387989e-05, "loss": 2.1025, "step": 1151 }, { "epoch": 1.0726933830382106, "loss_reasoning": 0.48325440287590027, "loss_utility": 1.321002721786499, "step": 1151 }, { "epoch": 1.0736253494874184, "grad_norm": 1.502362843660268, "learning_rate": 1.426993441491198e-05, "loss": 1.8457, "step": 1152 }, { "epoch": 1.0736253494874184, "loss_reasoning": 0.5577715635299683, "loss_utility": 1.0613806247711182, "step": 1152 }, { "epoch": 1.0745573159366262, "grad_norm": 1.4162721821268194, "learning_rate": 1.4263030721435968e-05, "loss": 1.8423, "step": 1153 }, { "epoch": 1.0745573159366262, "loss_reasoning": 0.49091386795043945, "loss_utility": 0.9260121583938599, "step": 1153 }, { "epoch": 1.0754892823858342, "grad_norm": 1.1327372145549603, "learning_rate": 1.425612702795996e-05, "loss": 1.7356, "step": 1154 }, { "epoch": 1.0754892823858342, "loss_reasoning": 0.5497727990150452, "loss_utility": 1.5585851669311523, "step": 1154 }, { "epoch": 1.076421248835042, "grad_norm": 1.4840601416921178, "learning_rate": 1.424922333448395e-05, "loss": 2.1783, "step": 1155 }, { "epoch": 1.076421248835042, "loss_reasoning": 0.5177275538444519, "loss_utility": 1.4797701835632324, "step": 1155 }, { "epoch": 1.0773532152842498, "grad_norm": 1.1340051808328722, "learning_rate": 1.424231964100794e-05, "loss": 1.9696, "step": 1156 }, { "epoch": 1.0773532152842498, "loss_reasoning": 0.5116564035415649, "loss_utility": 1.0434249639511108, "step": 1156 }, { "epoch": 1.0782851817334576, "grad_norm": 1.1172933059538739, "learning_rate": 1.423541594753193e-05, "loss": 2.0983, "step": 1157 }, { "epoch": 1.0782851817334576, "loss_reasoning": 0.5682657957077026, "loss_utility": 1.7346479892730713, "step": 1157 }, { "epoch": 1.0792171481826653, "grad_norm": 1.2537652262157855, "learning_rate": 1.4228512254055922e-05, "loss": 2.1408, "step": 1158 }, { "epoch": 1.0792171481826653, "loss_reasoning": 0.4821782410144806, "loss_utility": 1.3162862062454224, "step": 1158 }, { "epoch": 1.0801491146318734, "grad_norm": 1.1710364780750717, "learning_rate": 1.4221608560579911e-05, "loss": 1.5591, "step": 1159 }, { "epoch": 1.0801491146318734, "loss_reasoning": 0.46937236189842224, "loss_utility": 1.8994537591934204, "step": 1159 }, { "epoch": 1.0810810810810811, "grad_norm": 1.2556383867967325, "learning_rate": 1.4214704867103902e-05, "loss": 1.6704, "step": 1160 }, { "epoch": 1.0810810810810811, "loss_reasoning": 0.47916528582572937, "loss_utility": 0.842165470123291, "step": 1160 }, { "epoch": 1.082013047530289, "grad_norm": 1.4389236436445036, "learning_rate": 1.4207801173627892e-05, "loss": 1.4663, "step": 1161 }, { "epoch": 1.082013047530289, "loss_reasoning": 0.5233089923858643, "loss_utility": 1.4998676776885986, "step": 1161 }, { "epoch": 1.0829450139794967, "grad_norm": 1.0777158003774088, "learning_rate": 1.4200897480151883e-05, "loss": 1.6044, "step": 1162 }, { "epoch": 1.0829450139794967, "loss_reasoning": 0.5055186748504639, "loss_utility": 1.5319418907165527, "step": 1162 }, { "epoch": 1.0838769804287045, "grad_norm": 1.3935615117398763, "learning_rate": 1.4193993786675872e-05, "loss": 2.0707, "step": 1163 }, { "epoch": 1.0838769804287045, "loss_reasoning": 0.4383326768875122, "loss_utility": 0.8937731981277466, "step": 1163 }, { "epoch": 1.0848089468779123, "grad_norm": 1.1111252192323888, "learning_rate": 1.4187090093199863e-05, "loss": 1.7834, "step": 1164 }, { "epoch": 1.0848089468779123, "loss_reasoning": 0.44836586713790894, "loss_utility": 1.7515678405761719, "step": 1164 }, { "epoch": 1.0857409133271203, "grad_norm": 1.2738614479910402, "learning_rate": 1.4180186399723854e-05, "loss": 1.8226, "step": 1165 }, { "epoch": 1.0857409133271203, "loss_reasoning": 0.4778456687927246, "loss_utility": 1.175964117050171, "step": 1165 }, { "epoch": 1.086672879776328, "grad_norm": 1.0829684593172968, "learning_rate": 1.4173282706247844e-05, "loss": 1.6422, "step": 1166 }, { "epoch": 1.086672879776328, "loss_reasoning": 0.555095911026001, "loss_utility": 1.5796499252319336, "step": 1166 }, { "epoch": 1.0876048462255359, "grad_norm": 1.5192397005746414, "learning_rate": 1.4166379012771833e-05, "loss": 1.9629, "step": 1167 }, { "epoch": 1.0876048462255359, "loss_reasoning": 0.47785452008247375, "loss_utility": 1.7054023742675781, "step": 1167 }, { "epoch": 1.0885368126747437, "grad_norm": 1.232933360031871, "learning_rate": 1.4159475319295824e-05, "loss": 1.8658, "step": 1168 }, { "epoch": 1.0885368126747437, "loss_reasoning": 0.5691483020782471, "loss_utility": 1.3156969547271729, "step": 1168 }, { "epoch": 1.0894687791239515, "grad_norm": 1.3312593072956402, "learning_rate": 1.4152571625819815e-05, "loss": 2.091, "step": 1169 }, { "epoch": 1.0894687791239515, "loss_reasoning": 0.47345685958862305, "loss_utility": 1.7881739139556885, "step": 1169 }, { "epoch": 1.0904007455731595, "grad_norm": 1.0067878427933172, "learning_rate": 1.4145667932343806e-05, "loss": 1.8008, "step": 1170 }, { "epoch": 1.0904007455731595, "loss_reasoning": 0.5738539695739746, "loss_utility": 1.4428718090057373, "step": 1170 }, { "epoch": 1.0913327120223673, "grad_norm": 1.3096097728995386, "learning_rate": 1.4138764238867795e-05, "loss": 1.7149, "step": 1171 }, { "epoch": 1.0913327120223673, "loss_reasoning": 0.5598982572555542, "loss_utility": 0.7791116237640381, "step": 1171 }, { "epoch": 1.092264678471575, "grad_norm": 1.1208593078063909, "learning_rate": 1.4131860545391787e-05, "loss": 1.7002, "step": 1172 }, { "epoch": 1.092264678471575, "loss_reasoning": 0.5002721548080444, "loss_utility": 1.4097228050231934, "step": 1172 }, { "epoch": 1.0931966449207828, "grad_norm": 1.4269675705569633, "learning_rate": 1.4124956851915776e-05, "loss": 1.5152, "step": 1173 }, { "epoch": 1.0931966449207828, "loss_reasoning": 0.4899870455265045, "loss_utility": 0.93890780210495, "step": 1173 }, { "epoch": 1.0941286113699906, "grad_norm": 1.2243032197260637, "learning_rate": 1.4118053158439767e-05, "loss": 1.5682, "step": 1174 }, { "epoch": 1.0941286113699906, "loss_reasoning": 0.5148886442184448, "loss_utility": 1.3072655200958252, "step": 1174 }, { "epoch": 1.0950605778191984, "grad_norm": 1.1862111611935646, "learning_rate": 1.4111149464963756e-05, "loss": 1.9694, "step": 1175 }, { "epoch": 1.0950605778191984, "loss_reasoning": 0.5194129943847656, "loss_utility": 1.169843316078186, "step": 1175 }, { "epoch": 1.0959925442684064, "grad_norm": 1.2569885451566196, "learning_rate": 1.4104245771487748e-05, "loss": 1.8522, "step": 1176 }, { "epoch": 1.0959925442684064, "loss_reasoning": 0.5012482404708862, "loss_utility": 1.6501110792160034, "step": 1176 }, { "epoch": 1.0969245107176142, "grad_norm": 1.2604360662228253, "learning_rate": 1.4097342078011737e-05, "loss": 2.1303, "step": 1177 }, { "epoch": 1.0969245107176142, "loss_reasoning": 0.454609215259552, "loss_utility": 1.5656476020812988, "step": 1177 }, { "epoch": 1.097856477166822, "grad_norm": 1.258901376786618, "learning_rate": 1.4090438384535728e-05, "loss": 1.5742, "step": 1178 }, { "epoch": 1.097856477166822, "loss_reasoning": 0.45196282863616943, "loss_utility": 1.916029453277588, "step": 1178 }, { "epoch": 1.0987884436160298, "grad_norm": 1.2493442462949438, "learning_rate": 1.4083534691059717e-05, "loss": 1.961, "step": 1179 }, { "epoch": 1.0987884436160298, "loss_reasoning": 0.5640156865119934, "loss_utility": 1.168402910232544, "step": 1179 }, { "epoch": 1.0997204100652376, "grad_norm": 1.1581714333505104, "learning_rate": 1.407663099758371e-05, "loss": 1.5992, "step": 1180 }, { "epoch": 1.0997204100652376, "loss_reasoning": 0.4440426230430603, "loss_utility": 0.6737372875213623, "step": 1180 }, { "epoch": 1.1006523765144456, "grad_norm": 1.3702633429653297, "learning_rate": 1.4069727304107699e-05, "loss": 1.7025, "step": 1181 }, { "epoch": 1.1006523765144456, "loss_reasoning": 0.455748051404953, "loss_utility": 1.1763240098953247, "step": 1181 }, { "epoch": 1.1015843429636534, "grad_norm": 1.070080745843348, "learning_rate": 1.4062823610631688e-05, "loss": 1.5268, "step": 1182 }, { "epoch": 1.1015843429636534, "loss_reasoning": 0.4843713045120239, "loss_utility": 1.7773278951644897, "step": 1182 }, { "epoch": 1.1025163094128612, "grad_norm": 1.2190563022034853, "learning_rate": 1.405591991715568e-05, "loss": 1.7029, "step": 1183 }, { "epoch": 1.1025163094128612, "loss_reasoning": 0.518080472946167, "loss_utility": 1.908082127571106, "step": 1183 }, { "epoch": 1.103448275862069, "grad_norm": 1.2279206586451283, "learning_rate": 1.404901622367967e-05, "loss": 2.0133, "step": 1184 }, { "epoch": 1.103448275862069, "loss_reasoning": 0.5650762319564819, "loss_utility": 0.3617231845855713, "step": 1184 }, { "epoch": 1.1043802423112767, "grad_norm": 1.2773484967799662, "learning_rate": 1.404211253020366e-05, "loss": 1.7074, "step": 1185 }, { "epoch": 1.1043802423112767, "loss_reasoning": 0.6182785034179688, "loss_utility": 1.5947096347808838, "step": 1185 }, { "epoch": 1.1053122087604845, "grad_norm": 1.2359042115600074, "learning_rate": 1.4035208836727649e-05, "loss": 1.8595, "step": 1186 }, { "epoch": 1.1053122087604845, "loss_reasoning": 0.45539140701293945, "loss_utility": 1.0600415468215942, "step": 1186 }, { "epoch": 1.1062441752096925, "grad_norm": 1.2595344675854474, "learning_rate": 1.4028305143251641e-05, "loss": 1.9269, "step": 1187 }, { "epoch": 1.1062441752096925, "loss_reasoning": 0.5155929327011108, "loss_utility": 1.4835926294326782, "step": 1187 }, { "epoch": 1.1071761416589003, "grad_norm": 1.3542201653630284, "learning_rate": 1.402140144977563e-05, "loss": 1.9265, "step": 1188 }, { "epoch": 1.1071761416589003, "loss_reasoning": 0.4330042600631714, "loss_utility": 1.4182937145233154, "step": 1188 }, { "epoch": 1.1081081081081081, "grad_norm": 1.1252284046405976, "learning_rate": 1.4014497756299621e-05, "loss": 1.8835, "step": 1189 }, { "epoch": 1.1081081081081081, "loss_reasoning": 0.4677421450614929, "loss_utility": 0.8397023677825928, "step": 1189 }, { "epoch": 1.109040074557316, "grad_norm": 1.2208694155119955, "learning_rate": 1.400759406282361e-05, "loss": 1.5593, "step": 1190 }, { "epoch": 1.109040074557316, "loss_reasoning": 0.4827866554260254, "loss_utility": 1.230281949043274, "step": 1190 }, { "epoch": 1.1099720410065237, "grad_norm": 1.0989574294349045, "learning_rate": 1.4000690369347603e-05, "loss": 1.8637, "step": 1191 }, { "epoch": 1.1099720410065237, "loss_reasoning": 0.5552942752838135, "loss_utility": 1.1313464641571045, "step": 1191 }, { "epoch": 1.1109040074557317, "grad_norm": 1.2305804450583937, "learning_rate": 1.3993786675871592e-05, "loss": 1.7623, "step": 1192 }, { "epoch": 1.1109040074557317, "loss_reasoning": 0.5233086347579956, "loss_utility": 1.4357953071594238, "step": 1192 }, { "epoch": 1.1118359739049395, "grad_norm": 1.170881757981615, "learning_rate": 1.3986882982395582e-05, "loss": 1.8793, "step": 1193 }, { "epoch": 1.1118359739049395, "loss_reasoning": 0.5375800132751465, "loss_utility": 1.2444589138031006, "step": 1193 }, { "epoch": 1.1127679403541473, "grad_norm": 1.2801257771284698, "learning_rate": 1.3979979288919575e-05, "loss": 1.9162, "step": 1194 }, { "epoch": 1.1127679403541473, "loss_reasoning": 0.5188099145889282, "loss_utility": 1.0372717380523682, "step": 1194 }, { "epoch": 1.113699906803355, "grad_norm": 1.1950025421744155, "learning_rate": 1.3973075595443564e-05, "loss": 1.7299, "step": 1195 }, { "epoch": 1.113699906803355, "loss_reasoning": 0.5218873023986816, "loss_utility": 1.0283453464508057, "step": 1195 }, { "epoch": 1.1146318732525629, "grad_norm": 1.4469533805804962, "learning_rate": 1.3966171901967553e-05, "loss": 1.4811, "step": 1196 }, { "epoch": 1.1146318732525629, "loss_reasoning": 0.5285834074020386, "loss_utility": 1.8800833225250244, "step": 1196 }, { "epoch": 1.1155638397017706, "grad_norm": 1.0735670216600899, "learning_rate": 1.3959268208491544e-05, "loss": 2.145, "step": 1197 }, { "epoch": 1.1155638397017706, "loss_reasoning": 0.5072015523910522, "loss_utility": 1.5702860355377197, "step": 1197 }, { "epoch": 1.1164958061509787, "grad_norm": 1.2033708198558355, "learning_rate": 1.3952364515015534e-05, "loss": 2.2226, "step": 1198 }, { "epoch": 1.1164958061509787, "loss_reasoning": 0.532204270362854, "loss_utility": 2.028496265411377, "step": 1198 }, { "epoch": 1.1174277726001864, "grad_norm": 1.41773235711196, "learning_rate": 1.3945460821539525e-05, "loss": 1.957, "step": 1199 }, { "epoch": 1.1174277726001864, "loss_reasoning": 0.49362608790397644, "loss_utility": 1.02839195728302, "step": 1199 }, { "epoch": 1.1183597390493942, "grad_norm": 1.1601489943563565, "learning_rate": 1.3938557128063514e-05, "loss": 1.6382, "step": 1200 }, { "epoch": 1.1183597390493942, "loss_reasoning": 0.5349166989326477, "loss_utility": 2.2880115509033203, "step": 1200 }, { "epoch": 1.119291705498602, "grad_norm": 1.4643676755405282, "learning_rate": 1.3931653434587507e-05, "loss": 1.9688, "step": 1201 }, { "epoch": 1.119291705498602, "loss_reasoning": 0.5021581649780273, "loss_utility": 1.8404804468154907, "step": 1201 }, { "epoch": 1.1202236719478098, "grad_norm": 1.1371012556524733, "learning_rate": 1.3924749741111496e-05, "loss": 2.2967, "step": 1202 }, { "epoch": 1.1202236719478098, "loss_reasoning": 0.48023685812950134, "loss_utility": 0.8998496532440186, "step": 1202 }, { "epoch": 1.1211556383970178, "grad_norm": 1.4938113329189973, "learning_rate": 1.3917846047635486e-05, "loss": 1.8795, "step": 1203 }, { "epoch": 1.1211556383970178, "loss_reasoning": 0.46698346734046936, "loss_utility": 1.2149996757507324, "step": 1203 }, { "epoch": 1.1220876048462256, "grad_norm": 1.3074150615839146, "learning_rate": 1.3910942354159475e-05, "loss": 1.7191, "step": 1204 }, { "epoch": 1.1220876048462256, "loss_reasoning": 0.5176113247871399, "loss_utility": 1.6479854583740234, "step": 1204 }, { "epoch": 1.1230195712954334, "grad_norm": 1.3375852986324739, "learning_rate": 1.3904038660683468e-05, "loss": 1.8737, "step": 1205 }, { "epoch": 1.1230195712954334, "loss_reasoning": 0.5412139892578125, "loss_utility": 1.2634894847869873, "step": 1205 }, { "epoch": 1.1239515377446412, "grad_norm": 1.2398122305148553, "learning_rate": 1.3897134967207457e-05, "loss": 1.9657, "step": 1206 }, { "epoch": 1.1239515377446412, "loss_reasoning": 0.4891852140426636, "loss_utility": 1.1711783409118652, "step": 1206 }, { "epoch": 1.124883504193849, "grad_norm": 1.204468435485415, "learning_rate": 1.3890231273731448e-05, "loss": 1.7904, "step": 1207 }, { "epoch": 1.124883504193849, "loss_reasoning": 0.4958980977535248, "loss_utility": 1.4754526615142822, "step": 1207 }, { "epoch": 1.1258154706430568, "grad_norm": 1.2981881035610223, "learning_rate": 1.3883327580255437e-05, "loss": 1.9225, "step": 1208 }, { "epoch": 1.1258154706430568, "loss_reasoning": 0.4883168935775757, "loss_utility": 1.4025332927703857, "step": 1208 }, { "epoch": 1.1267474370922648, "grad_norm": 1.3261004337187317, "learning_rate": 1.3876423886779429e-05, "loss": 2.0114, "step": 1209 }, { "epoch": 1.1267474370922648, "loss_reasoning": 0.47402530908584595, "loss_utility": 1.4898020029067993, "step": 1209 }, { "epoch": 1.1276794035414726, "grad_norm": 1.5827415974734862, "learning_rate": 1.3869520193303418e-05, "loss": 1.8861, "step": 1210 }, { "epoch": 1.1276794035414726, "loss_reasoning": 0.5081807374954224, "loss_utility": 1.5683728456497192, "step": 1210 }, { "epoch": 1.1286113699906803, "grad_norm": 1.243380100136713, "learning_rate": 1.3862616499827409e-05, "loss": 1.8946, "step": 1211 }, { "epoch": 1.1286113699906803, "loss_reasoning": 0.5396127700805664, "loss_utility": 0.7578475475311279, "step": 1211 }, { "epoch": 1.1295433364398881, "grad_norm": 0.9090147568414595, "learning_rate": 1.38557128063514e-05, "loss": 1.3884, "step": 1212 }, { "epoch": 1.1295433364398881, "loss_reasoning": 0.5277676582336426, "loss_utility": 2.045616388320923, "step": 1212 }, { "epoch": 1.130475302889096, "grad_norm": 1.4066867551998745, "learning_rate": 1.384880911287539e-05, "loss": 2.2937, "step": 1213 }, { "epoch": 1.130475302889096, "loss_reasoning": 0.49519583582878113, "loss_utility": 1.7922594547271729, "step": 1213 }, { "epoch": 1.131407269338304, "grad_norm": 1.2933962396728047, "learning_rate": 1.384190541939938e-05, "loss": 1.9427, "step": 1214 }, { "epoch": 1.131407269338304, "loss_reasoning": 0.5571943521499634, "loss_utility": 1.0405805110931396, "step": 1214 }, { "epoch": 1.1323392357875117, "grad_norm": 1.0744249828833776, "learning_rate": 1.383500172592337e-05, "loss": 1.7976, "step": 1215 }, { "epoch": 1.1323392357875117, "loss_reasoning": 0.4894583821296692, "loss_utility": 1.4808226823806763, "step": 1215 }, { "epoch": 1.1332712022367195, "grad_norm": 1.3539354495855451, "learning_rate": 1.382809803244736e-05, "loss": 2.1306, "step": 1216 }, { "epoch": 1.1332712022367195, "loss_reasoning": 0.48524683713912964, "loss_utility": 1.4275314807891846, "step": 1216 }, { "epoch": 1.1342031686859273, "grad_norm": 1.476546985777759, "learning_rate": 1.3821194338971351e-05, "loss": 1.6817, "step": 1217 }, { "epoch": 1.1342031686859273, "loss_reasoning": 0.4524511992931366, "loss_utility": 1.2397123575210571, "step": 1217 }, { "epoch": 1.135135135135135, "grad_norm": 1.1635853889946408, "learning_rate": 1.381429064549534e-05, "loss": 2.0086, "step": 1218 }, { "epoch": 1.135135135135135, "loss_reasoning": 0.48592203855514526, "loss_utility": 1.094264030456543, "step": 1218 }, { "epoch": 1.1360671015843429, "grad_norm": 0.9428415640109309, "learning_rate": 1.3807386952019331e-05, "loss": 1.7589, "step": 1219 }, { "epoch": 1.1360671015843429, "loss_reasoning": 0.475355863571167, "loss_utility": 1.2328778505325317, "step": 1219 }, { "epoch": 1.1369990680335509, "grad_norm": 1.2829964947348877, "learning_rate": 1.3800483258543322e-05, "loss": 2.0083, "step": 1220 }, { "epoch": 1.1369990680335509, "loss_reasoning": 0.509338915348053, "loss_utility": 0.8728242516517639, "step": 1220 }, { "epoch": 1.1379310344827587, "grad_norm": 1.2401934709064781, "learning_rate": 1.3793579565067313e-05, "loss": 1.7473, "step": 1221 }, { "epoch": 1.1379310344827587, "loss_reasoning": 0.5429061055183411, "loss_utility": 1.0271844863891602, "step": 1221 }, { "epoch": 1.1388630009319665, "grad_norm": 1.19325828628261, "learning_rate": 1.3786675871591302e-05, "loss": 1.641, "step": 1222 }, { "epoch": 1.1388630009319665, "loss_reasoning": 0.5319566130638123, "loss_utility": 1.5224100351333618, "step": 1222 }, { "epoch": 1.1397949673811743, "grad_norm": 1.1909179196503834, "learning_rate": 1.3779772178115294e-05, "loss": 1.9367, "step": 1223 }, { "epoch": 1.1397949673811743, "loss_reasoning": 0.47029823064804077, "loss_utility": 1.210278034210205, "step": 1223 }, { "epoch": 1.140726933830382, "grad_norm": 1.4066308741907207, "learning_rate": 1.3772868484639283e-05, "loss": 2.0482, "step": 1224 }, { "epoch": 1.140726933830382, "loss_reasoning": 0.5479521751403809, "loss_utility": 1.8566153049468994, "step": 1224 }, { "epoch": 1.14165890027959, "grad_norm": 1.1067351997681727, "learning_rate": 1.3765964791163274e-05, "loss": 1.7636, "step": 1225 }, { "epoch": 1.14165890027959, "loss_reasoning": 0.5540486574172974, "loss_utility": 0.9492836594581604, "step": 1225 }, { "epoch": 1.1425908667287978, "grad_norm": 1.0209060625583066, "learning_rate": 1.3759061097687263e-05, "loss": 1.6066, "step": 1226 }, { "epoch": 1.1425908667287978, "loss_reasoning": 0.4544595777988434, "loss_utility": 1.083321213722229, "step": 1226 }, { "epoch": 1.1435228331780056, "grad_norm": 1.102823461716175, "learning_rate": 1.3752157404211255e-05, "loss": 1.4249, "step": 1227 }, { "epoch": 1.1435228331780056, "loss_reasoning": 0.44529855251312256, "loss_utility": 2.212684154510498, "step": 1227 }, { "epoch": 1.1444547996272134, "grad_norm": 2.3608454698612964, "learning_rate": 1.3745253710735244e-05, "loss": 2.4647, "step": 1228 }, { "epoch": 1.1444547996272134, "loss_reasoning": 0.5136493444442749, "loss_utility": 1.4606482982635498, "step": 1228 }, { "epoch": 1.1453867660764212, "grad_norm": 1.411290089450947, "learning_rate": 1.3738350017259235e-05, "loss": 1.4583, "step": 1229 }, { "epoch": 1.1453867660764212, "loss_reasoning": 0.528867244720459, "loss_utility": 1.074776291847229, "step": 1229 }, { "epoch": 1.146318732525629, "grad_norm": 1.373053285095619, "learning_rate": 1.3731446323783224e-05, "loss": 1.9481, "step": 1230 }, { "epoch": 1.146318732525629, "loss_reasoning": 0.5217773914337158, "loss_utility": 1.1128864288330078, "step": 1230 }, { "epoch": 1.147250698974837, "grad_norm": 1.2164306811655163, "learning_rate": 1.3724542630307217e-05, "loss": 1.7688, "step": 1231 }, { "epoch": 1.147250698974837, "loss_reasoning": 0.5058733224868774, "loss_utility": 1.656668782234192, "step": 1231 }, { "epoch": 1.1481826654240448, "grad_norm": 1.228916416259685, "learning_rate": 1.3717638936831206e-05, "loss": 1.9591, "step": 1232 }, { "epoch": 1.1481826654240448, "loss_reasoning": 0.4559694826602936, "loss_utility": 0.41596725583076477, "step": 1232 }, { "epoch": 1.1491146318732526, "grad_norm": 1.2738209512198408, "learning_rate": 1.3710735243355195e-05, "loss": 1.4839, "step": 1233 }, { "epoch": 1.1491146318732526, "loss_reasoning": 0.5297672748565674, "loss_utility": 1.338284969329834, "step": 1233 }, { "epoch": 1.1500465983224604, "grad_norm": 1.431791621982381, "learning_rate": 1.3703831549879187e-05, "loss": 1.6989, "step": 1234 }, { "epoch": 1.1500465983224604, "loss_reasoning": 0.48905977606773376, "loss_utility": 1.3166863918304443, "step": 1234 }, { "epoch": 1.1509785647716682, "grad_norm": 1.3924376028734946, "learning_rate": 1.3696927856403178e-05, "loss": 1.9214, "step": 1235 }, { "epoch": 1.1509785647716682, "loss_reasoning": 0.5334790945053101, "loss_utility": 1.7069590091705322, "step": 1235 }, { "epoch": 1.1519105312208762, "grad_norm": 1.0743446432454886, "learning_rate": 1.3690024162927167e-05, "loss": 2.0003, "step": 1236 }, { "epoch": 1.1519105312208762, "loss_reasoning": 0.5058505535125732, "loss_utility": 1.2236790657043457, "step": 1236 }, { "epoch": 1.152842497670084, "grad_norm": 1.1702983274465861, "learning_rate": 1.3683120469451156e-05, "loss": 1.9537, "step": 1237 }, { "epoch": 1.152842497670084, "loss_reasoning": 0.4924415946006775, "loss_utility": 1.1839869022369385, "step": 1237 }, { "epoch": 1.1537744641192917, "grad_norm": 1.1342663932120667, "learning_rate": 1.3676216775975148e-05, "loss": 1.8913, "step": 1238 }, { "epoch": 1.1537744641192917, "loss_reasoning": 0.458976686000824, "loss_utility": 1.5060844421386719, "step": 1238 }, { "epoch": 1.1547064305684995, "grad_norm": 1.2619570417227735, "learning_rate": 1.3669313082499137e-05, "loss": 1.7826, "step": 1239 }, { "epoch": 1.1547064305684995, "loss_reasoning": 0.5342459082603455, "loss_utility": 1.108048677444458, "step": 1239 }, { "epoch": 1.1556383970177073, "grad_norm": 1.4456184384322301, "learning_rate": 1.3662409389023128e-05, "loss": 1.7831, "step": 1240 }, { "epoch": 1.1556383970177073, "loss_reasoning": 0.5794105529785156, "loss_utility": 1.1045916080474854, "step": 1240 }, { "epoch": 1.156570363466915, "grad_norm": 1.227196524359293, "learning_rate": 1.365550569554712e-05, "loss": 1.7819, "step": 1241 }, { "epoch": 1.156570363466915, "loss_reasoning": 0.48415622115135193, "loss_utility": 1.8624482154846191, "step": 1241 }, { "epoch": 1.157502329916123, "grad_norm": 1.4603880022565336, "learning_rate": 1.364860200207111e-05, "loss": 1.9979, "step": 1242 }, { "epoch": 1.157502329916123, "loss_reasoning": 0.4633300304412842, "loss_utility": 1.5318119525909424, "step": 1242 }, { "epoch": 1.158434296365331, "grad_norm": 1.504204070801984, "learning_rate": 1.3641698308595099e-05, "loss": 1.7271, "step": 1243 }, { "epoch": 1.158434296365331, "loss_reasoning": 0.5209801197052002, "loss_utility": 1.3517221212387085, "step": 1243 }, { "epoch": 1.1593662628145387, "grad_norm": 1.3347700296410305, "learning_rate": 1.363479461511909e-05, "loss": 2.04, "step": 1244 }, { "epoch": 1.1593662628145387, "loss_reasoning": 0.5410217046737671, "loss_utility": 1.4759197235107422, "step": 1244 }, { "epoch": 1.1602982292637465, "grad_norm": 1.179982136917814, "learning_rate": 1.3627890921643082e-05, "loss": 1.8325, "step": 1245 }, { "epoch": 1.1602982292637465, "loss_reasoning": 0.48476916551589966, "loss_utility": 0.9996398687362671, "step": 1245 }, { "epoch": 1.1612301957129543, "grad_norm": 1.02918160645871, "learning_rate": 1.362098722816707e-05, "loss": 1.7036, "step": 1246 }, { "epoch": 1.1612301957129543, "loss_reasoning": 0.5333903431892395, "loss_utility": 0.5930050611495972, "step": 1246 }, { "epoch": 1.1621621621621623, "grad_norm": 1.1384179433667254, "learning_rate": 1.361408353469106e-05, "loss": 1.8345, "step": 1247 }, { "epoch": 1.1621621621621623, "loss_reasoning": 0.4727441370487213, "loss_utility": 1.6806381940841675, "step": 1247 }, { "epoch": 1.16309412861137, "grad_norm": 1.6112796209324727, "learning_rate": 1.360717984121505e-05, "loss": 1.6431, "step": 1248 }, { "epoch": 1.16309412861137, "loss_reasoning": 0.5098573565483093, "loss_utility": 1.6883678436279297, "step": 1248 }, { "epoch": 1.1640260950605779, "grad_norm": 2.099731044272882, "learning_rate": 1.3600276147739041e-05, "loss": 2.3521, "step": 1249 }, { "epoch": 1.1640260950605779, "loss_reasoning": 0.4881410598754883, "loss_utility": 1.35191011428833, "step": 1249 }, { "epoch": 1.1649580615097856, "grad_norm": 1.1704187966360244, "learning_rate": 1.3593372454263032e-05, "loss": 1.8237, "step": 1250 }, { "epoch": 1.1649580615097856, "loss_reasoning": 0.47731178998947144, "loss_utility": 1.508148431777954, "step": 1250 }, { "epoch": 1.1658900279589934, "grad_norm": 1.1583741857618153, "learning_rate": 1.3586468760787021e-05, "loss": 1.7364, "step": 1251 }, { "epoch": 1.1658900279589934, "loss_reasoning": 0.46700388193130493, "loss_utility": 0.5271731615066528, "step": 1251 }, { "epoch": 1.1668219944082012, "grad_norm": 1.1367860948822963, "learning_rate": 1.3579565067311014e-05, "loss": 1.526, "step": 1252 }, { "epoch": 1.1668219944082012, "loss_reasoning": 0.4900282025337219, "loss_utility": 1.3405585289001465, "step": 1252 }, { "epoch": 1.167753960857409, "grad_norm": 1.2677657839440317, "learning_rate": 1.3572661373835003e-05, "loss": 1.7887, "step": 1253 }, { "epoch": 1.167753960857409, "loss_reasoning": 0.4689238667488098, "loss_utility": 1.4359666109085083, "step": 1253 }, { "epoch": 1.168685927306617, "grad_norm": 1.209300507613165, "learning_rate": 1.3565757680358993e-05, "loss": 1.8928, "step": 1254 }, { "epoch": 1.168685927306617, "loss_reasoning": 0.5355043411254883, "loss_utility": 1.4199219942092896, "step": 1254 }, { "epoch": 1.1696178937558248, "grad_norm": 1.3715226012123958, "learning_rate": 1.3558853986882982e-05, "loss": 1.9052, "step": 1255 }, { "epoch": 1.1696178937558248, "loss_reasoning": 0.604995608329773, "loss_utility": 1.4755324125289917, "step": 1255 }, { "epoch": 1.1705498602050326, "grad_norm": 1.4395922157895549, "learning_rate": 1.3551950293406975e-05, "loss": 1.9926, "step": 1256 }, { "epoch": 1.1705498602050326, "loss_reasoning": 0.5064734220504761, "loss_utility": 2.254579782485962, "step": 1256 }, { "epoch": 1.1714818266542404, "grad_norm": 1.0955102393283354, "learning_rate": 1.3545046599930964e-05, "loss": 2.0139, "step": 1257 }, { "epoch": 1.1714818266542404, "loss_reasoning": 0.45183444023132324, "loss_utility": 1.4071896076202393, "step": 1257 }, { "epoch": 1.1724137931034484, "grad_norm": 1.1083537545064492, "learning_rate": 1.3538142906454955e-05, "loss": 1.5187, "step": 1258 }, { "epoch": 1.1724137931034484, "loss_reasoning": 0.5817376971244812, "loss_utility": 1.5406380891799927, "step": 1258 }, { "epoch": 1.1733457595526562, "grad_norm": 1.0384198191565652, "learning_rate": 1.3531239212978944e-05, "loss": 1.702, "step": 1259 }, { "epoch": 1.1733457595526562, "loss_reasoning": 0.49954065680503845, "loss_utility": 0.9958361387252808, "step": 1259 }, { "epoch": 1.174277726001864, "grad_norm": 1.0786164372136204, "learning_rate": 1.3524335519502936e-05, "loss": 1.7801, "step": 1260 }, { "epoch": 1.174277726001864, "loss_reasoning": 0.4713127017021179, "loss_utility": 1.1254267692565918, "step": 1260 }, { "epoch": 1.1752096924510718, "grad_norm": 1.5267431012413717, "learning_rate": 1.3517431826026925e-05, "loss": 1.692, "step": 1261 }, { "epoch": 1.1752096924510718, "loss_reasoning": 0.42969727516174316, "loss_utility": 1.2091922760009766, "step": 1261 }, { "epoch": 1.1761416589002796, "grad_norm": 1.3048222232535183, "learning_rate": 1.3510528132550916e-05, "loss": 1.7399, "step": 1262 }, { "epoch": 1.1761416589002796, "loss_reasoning": 0.48879119753837585, "loss_utility": 1.276002287864685, "step": 1262 }, { "epoch": 1.1770736253494873, "grad_norm": 1.0551138175917714, "learning_rate": 1.3503624439074907e-05, "loss": 1.7866, "step": 1263 }, { "epoch": 1.1770736253494873, "loss_reasoning": 0.537793755531311, "loss_utility": 1.4553650617599487, "step": 1263 }, { "epoch": 1.1780055917986951, "grad_norm": 1.1483415727641413, "learning_rate": 1.3496720745598897e-05, "loss": 1.8327, "step": 1264 }, { "epoch": 1.1780055917986951, "loss_reasoning": 0.441882848739624, "loss_utility": 1.3164863586425781, "step": 1264 }, { "epoch": 1.1789375582479031, "grad_norm": 1.0966284719771184, "learning_rate": 1.3489817052122886e-05, "loss": 1.8643, "step": 1265 }, { "epoch": 1.1789375582479031, "loss_reasoning": 0.5187690258026123, "loss_utility": 0.7706941962242126, "step": 1265 }, { "epoch": 1.179869524697111, "grad_norm": 1.1640124997529244, "learning_rate": 1.3482913358646877e-05, "loss": 1.7088, "step": 1266 }, { "epoch": 1.179869524697111, "loss_reasoning": 0.501538872718811, "loss_utility": 1.3119285106658936, "step": 1266 }, { "epoch": 1.1808014911463187, "grad_norm": 1.2055835933513817, "learning_rate": 1.3476009665170868e-05, "loss": 1.8922, "step": 1267 }, { "epoch": 1.1808014911463187, "loss_reasoning": 0.47318512201309204, "loss_utility": 1.182835578918457, "step": 1267 }, { "epoch": 1.1817334575955265, "grad_norm": 1.4283743095129517, "learning_rate": 1.3469105971694858e-05, "loss": 1.8775, "step": 1268 }, { "epoch": 1.1817334575955265, "loss_reasoning": 0.5219102501869202, "loss_utility": 1.1011624336242676, "step": 1268 }, { "epoch": 1.1826654240447343, "grad_norm": 1.0517912932309392, "learning_rate": 1.3462202278218848e-05, "loss": 1.8131, "step": 1269 }, { "epoch": 1.1826654240447343, "loss_reasoning": 0.47840380668640137, "loss_utility": 1.2156476974487305, "step": 1269 }, { "epoch": 1.1835973904939423, "grad_norm": 1.211413244351553, "learning_rate": 1.3455298584742838e-05, "loss": 1.8478, "step": 1270 }, { "epoch": 1.1835973904939423, "loss_reasoning": 0.5393287539482117, "loss_utility": 0.6294991970062256, "step": 1270 }, { "epoch": 1.18452935694315, "grad_norm": 1.0332520082255168, "learning_rate": 1.3448394891266829e-05, "loss": 1.5473, "step": 1271 }, { "epoch": 1.18452935694315, "loss_reasoning": 0.48346126079559326, "loss_utility": 1.1105916500091553, "step": 1271 }, { "epoch": 1.1854613233923579, "grad_norm": 1.0859587324529447, "learning_rate": 1.344149119779082e-05, "loss": 1.5631, "step": 1272 }, { "epoch": 1.1854613233923579, "loss_reasoning": 0.4188575744628906, "loss_utility": 1.8157193660736084, "step": 1272 }, { "epoch": 1.1863932898415657, "grad_norm": 1.2185365444023974, "learning_rate": 1.3434587504314809e-05, "loss": 1.6989, "step": 1273 }, { "epoch": 1.1863932898415657, "loss_reasoning": 0.4993203580379486, "loss_utility": 1.2091870307922363, "step": 1273 }, { "epoch": 1.1873252562907735, "grad_norm": 1.2447583077836062, "learning_rate": 1.3427683810838801e-05, "loss": 1.7107, "step": 1274 }, { "epoch": 1.1873252562907735, "loss_reasoning": 0.49308645725250244, "loss_utility": 2.152461528778076, "step": 1274 }, { "epoch": 1.1882572227399812, "grad_norm": 1.4634314977290965, "learning_rate": 1.342078011736279e-05, "loss": 1.9194, "step": 1275 }, { "epoch": 1.1882572227399812, "loss_reasoning": 0.501106858253479, "loss_utility": 1.4085110425949097, "step": 1275 }, { "epoch": 1.1891891891891893, "grad_norm": 1.1658216530984253, "learning_rate": 1.3413876423886781e-05, "loss": 1.5702, "step": 1276 }, { "epoch": 1.1891891891891893, "loss_reasoning": 0.4926104247570038, "loss_utility": 1.1373369693756104, "step": 1276 }, { "epoch": 1.190121155638397, "grad_norm": 1.2198579863600238, "learning_rate": 1.340697273041077e-05, "loss": 1.8614, "step": 1277 }, { "epoch": 1.190121155638397, "loss_reasoning": 0.5490109324455261, "loss_utility": 1.2870947122573853, "step": 1277 }, { "epoch": 1.1910531220876048, "grad_norm": 1.0633054055064017, "learning_rate": 1.3400069036934762e-05, "loss": 1.8055, "step": 1278 }, { "epoch": 1.1910531220876048, "loss_reasoning": 0.48574334383010864, "loss_utility": 1.2863500118255615, "step": 1278 }, { "epoch": 1.1919850885368126, "grad_norm": 1.290120503425908, "learning_rate": 1.3393165343458751e-05, "loss": 1.833, "step": 1279 }, { "epoch": 1.1919850885368126, "loss_reasoning": 0.5188897848129272, "loss_utility": 0.8244497776031494, "step": 1279 }, { "epoch": 1.1929170549860204, "grad_norm": 1.128102555019452, "learning_rate": 1.3386261649982742e-05, "loss": 1.4855, "step": 1280 }, { "epoch": 1.1929170549860204, "loss_reasoning": 0.4995259642601013, "loss_utility": 1.57249915599823, "step": 1280 }, { "epoch": 1.1938490214352284, "grad_norm": 1.1730220394267932, "learning_rate": 1.3379357956506733e-05, "loss": 1.9554, "step": 1281 }, { "epoch": 1.1938490214352284, "loss_reasoning": 0.5108991265296936, "loss_utility": 1.1712210178375244, "step": 1281 }, { "epoch": 1.1947809878844362, "grad_norm": 1.2298907728130655, "learning_rate": 1.3372454263030724e-05, "loss": 1.834, "step": 1282 }, { "epoch": 1.1947809878844362, "loss_reasoning": 0.5333400964736938, "loss_utility": 0.6803401708602905, "step": 1282 }, { "epoch": 1.195712954333644, "grad_norm": 1.0239130637628073, "learning_rate": 1.3365550569554713e-05, "loss": 1.5645, "step": 1283 }, { "epoch": 1.195712954333644, "loss_reasoning": 0.5376918315887451, "loss_utility": 1.311629056930542, "step": 1283 }, { "epoch": 1.1966449207828518, "grad_norm": 1.165176270094229, "learning_rate": 1.3358646876078702e-05, "loss": 1.893, "step": 1284 }, { "epoch": 1.1966449207828518, "loss_reasoning": 0.4895755350589752, "loss_utility": 1.0725162029266357, "step": 1284 }, { "epoch": 1.1975768872320596, "grad_norm": 1.2992285536591317, "learning_rate": 1.3351743182602694e-05, "loss": 1.9892, "step": 1285 }, { "epoch": 1.1975768872320596, "loss_reasoning": 0.5613129138946533, "loss_utility": 1.4468903541564941, "step": 1285 }, { "epoch": 1.1985088536812674, "grad_norm": 0.9879170959946644, "learning_rate": 1.3344839489126685e-05, "loss": 1.6459, "step": 1286 }, { "epoch": 1.1985088536812674, "loss_reasoning": 0.45037180185317993, "loss_utility": 1.7015961408615112, "step": 1286 }, { "epoch": 1.1994408201304754, "grad_norm": 1.2591563541214297, "learning_rate": 1.3337935795650674e-05, "loss": 1.8605, "step": 1287 }, { "epoch": 1.1994408201304754, "loss_reasoning": 0.4666532576084137, "loss_utility": 1.865297555923462, "step": 1287 }, { "epoch": 1.2003727865796832, "grad_norm": 1.0674118532109762, "learning_rate": 1.3331032102174663e-05, "loss": 1.6128, "step": 1288 }, { "epoch": 1.2003727865796832, "loss_reasoning": 0.522860586643219, "loss_utility": 0.877140998840332, "step": 1288 }, { "epoch": 1.201304753028891, "grad_norm": 0.9544761529087301, "learning_rate": 1.3324128408698655e-05, "loss": 1.16, "step": 1289 }, { "epoch": 1.201304753028891, "loss_reasoning": 0.5165968537330627, "loss_utility": 1.6980897188186646, "step": 1289 }, { "epoch": 1.2022367194780987, "grad_norm": 1.3661004044509315, "learning_rate": 1.3317224715222644e-05, "loss": 1.867, "step": 1290 }, { "epoch": 1.2022367194780987, "loss_reasoning": 0.5427713990211487, "loss_utility": 1.4208476543426514, "step": 1290 }, { "epoch": 1.2031686859273065, "grad_norm": 1.6337947005115574, "learning_rate": 1.3310321021746635e-05, "loss": 1.6267, "step": 1291 }, { "epoch": 1.2031686859273065, "loss_reasoning": 0.48595374822616577, "loss_utility": 1.850136399269104, "step": 1291 }, { "epoch": 1.2041006523765145, "grad_norm": 2.5678916426999945, "learning_rate": 1.3303417328270628e-05, "loss": 1.7715, "step": 1292 }, { "epoch": 1.2041006523765145, "loss_reasoning": 0.5327886343002319, "loss_utility": 1.3398942947387695, "step": 1292 }, { "epoch": 1.2050326188257223, "grad_norm": 1.3753207192679862, "learning_rate": 1.3296513634794617e-05, "loss": 1.8856, "step": 1293 }, { "epoch": 1.2050326188257223, "loss_reasoning": 0.5341184735298157, "loss_utility": 1.1857590675354004, "step": 1293 }, { "epoch": 1.2059645852749301, "grad_norm": 1.349767914810132, "learning_rate": 1.3289609941318606e-05, "loss": 2.2909, "step": 1294 }, { "epoch": 1.2059645852749301, "loss_reasoning": 0.5058311223983765, "loss_utility": 0.9897795915603638, "step": 1294 }, { "epoch": 1.206896551724138, "grad_norm": 1.2344091332463463, "learning_rate": 1.3282706247842596e-05, "loss": 1.6201, "step": 1295 }, { "epoch": 1.206896551724138, "loss_reasoning": 0.5079570412635803, "loss_utility": 1.449323296546936, "step": 1295 }, { "epoch": 1.2078285181733457, "grad_norm": 1.5184552651123053, "learning_rate": 1.3275802554366589e-05, "loss": 1.7516, "step": 1296 }, { "epoch": 1.2078285181733457, "loss_reasoning": 0.5854381322860718, "loss_utility": 1.0752193927764893, "step": 1296 }, { "epoch": 1.2087604846225535, "grad_norm": 1.2595744178946695, "learning_rate": 1.3268898860890578e-05, "loss": 1.7402, "step": 1297 }, { "epoch": 1.2087604846225535, "loss_reasoning": 0.46882906556129456, "loss_utility": 1.5585658550262451, "step": 1297 }, { "epoch": 1.2096924510717615, "grad_norm": 1.148854245206818, "learning_rate": 1.3261995167414567e-05, "loss": 1.8127, "step": 1298 }, { "epoch": 1.2096924510717615, "loss_reasoning": 0.5040085315704346, "loss_utility": 1.17972731590271, "step": 1298 }, { "epoch": 1.2106244175209693, "grad_norm": 1.0553922477644346, "learning_rate": 1.3255091473938558e-05, "loss": 1.7188, "step": 1299 }, { "epoch": 1.2106244175209693, "loss_reasoning": 0.4962509870529175, "loss_utility": 1.4443578720092773, "step": 1299 }, { "epoch": 1.211556383970177, "grad_norm": 0.9782424804822353, "learning_rate": 1.3248187780462548e-05, "loss": 1.6052, "step": 1300 }, { "epoch": 1.211556383970177, "loss_reasoning": 0.5187530517578125, "loss_utility": 1.6385753154754639, "step": 1300 }, { "epoch": 1.2124883504193849, "grad_norm": 1.1718431578917132, "learning_rate": 1.3241284086986539e-05, "loss": 2.0245, "step": 1301 }, { "epoch": 1.2124883504193849, "loss_reasoning": 0.5367618799209595, "loss_utility": 2.092963457107544, "step": 1301 }, { "epoch": 1.2134203168685926, "grad_norm": 3.156943091762351, "learning_rate": 1.3234380393510528e-05, "loss": 1.919, "step": 1302 }, { "epoch": 1.2134203168685926, "loss_reasoning": 0.4625588655471802, "loss_utility": 1.1553318500518799, "step": 1302 }, { "epoch": 1.2143522833178007, "grad_norm": 1.1057928777472192, "learning_rate": 1.322747670003452e-05, "loss": 1.6375, "step": 1303 }, { "epoch": 1.2143522833178007, "loss_reasoning": 0.41296452283859253, "loss_utility": 1.4556803703308105, "step": 1303 }, { "epoch": 1.2152842497670084, "grad_norm": 1.1459567818400016, "learning_rate": 1.322057300655851e-05, "loss": 1.9063, "step": 1304 }, { "epoch": 1.2152842497670084, "loss_reasoning": 0.5681904554367065, "loss_utility": 1.3768141269683838, "step": 1304 }, { "epoch": 1.2162162162162162, "grad_norm": 1.3892973016226395, "learning_rate": 1.32136693130825e-05, "loss": 1.9642, "step": 1305 }, { "epoch": 1.2162162162162162, "loss_reasoning": 0.5789846181869507, "loss_utility": 1.5989210605621338, "step": 1305 }, { "epoch": 1.217148182665424, "grad_norm": 1.4607902058353641, "learning_rate": 1.320676561960649e-05, "loss": 1.7705, "step": 1306 }, { "epoch": 1.217148182665424, "loss_reasoning": 0.47528791427612305, "loss_utility": 1.399752140045166, "step": 1306 }, { "epoch": 1.2180801491146318, "grad_norm": 1.3397370650733753, "learning_rate": 1.3199861926130482e-05, "loss": 1.9866, "step": 1307 }, { "epoch": 1.2180801491146318, "loss_reasoning": 0.48481184244155884, "loss_utility": 1.5410823822021484, "step": 1307 }, { "epoch": 1.2190121155638396, "grad_norm": 1.4363731870978675, "learning_rate": 1.3192958232654471e-05, "loss": 1.9771, "step": 1308 }, { "epoch": 1.2190121155638396, "loss_reasoning": 0.4969836175441742, "loss_utility": 0.7834044098854065, "step": 1308 }, { "epoch": 1.2199440820130476, "grad_norm": 1.1206693233426472, "learning_rate": 1.3186054539178462e-05, "loss": 1.5796, "step": 1309 }, { "epoch": 1.2199440820130476, "loss_reasoning": 0.5251271724700928, "loss_utility": 0.9217965602874756, "step": 1309 }, { "epoch": 1.2208760484622554, "grad_norm": 1.3528528364403452, "learning_rate": 1.317915084570245e-05, "loss": 1.6577, "step": 1310 }, { "epoch": 1.2208760484622554, "loss_reasoning": 0.544810950756073, "loss_utility": 1.0336940288543701, "step": 1310 }, { "epoch": 1.2218080149114632, "grad_norm": 1.2300822217063716, "learning_rate": 1.3172247152226443e-05, "loss": 1.8204, "step": 1311 }, { "epoch": 1.2218080149114632, "loss_reasoning": 0.6464502811431885, "loss_utility": 1.5232787132263184, "step": 1311 }, { "epoch": 1.222739981360671, "grad_norm": 1.1148774431942285, "learning_rate": 1.3165343458750432e-05, "loss": 1.9598, "step": 1312 }, { "epoch": 1.222739981360671, "loss_reasoning": 0.4481750726699829, "loss_utility": 0.8120059967041016, "step": 1312 }, { "epoch": 1.2236719478098788, "grad_norm": 1.197445114024172, "learning_rate": 1.3158439765274423e-05, "loss": 1.5977, "step": 1313 }, { "epoch": 1.2236719478098788, "loss_reasoning": 0.5783456563949585, "loss_utility": 0.9250452518463135, "step": 1313 }, { "epoch": 1.2246039142590868, "grad_norm": 1.1241006999566465, "learning_rate": 1.3151536071798414e-05, "loss": 1.742, "step": 1314 }, { "epoch": 1.2246039142590868, "loss_reasoning": 0.5705065727233887, "loss_utility": 1.391736388206482, "step": 1314 }, { "epoch": 1.2255358807082946, "grad_norm": 1.1882395902233553, "learning_rate": 1.3144632378322404e-05, "loss": 1.7493, "step": 1315 }, { "epoch": 1.2255358807082946, "loss_reasoning": 0.4874407947063446, "loss_utility": 1.6112730503082275, "step": 1315 }, { "epoch": 1.2264678471575023, "grad_norm": 1.2576386104385682, "learning_rate": 1.3137728684846393e-05, "loss": 1.7737, "step": 1316 }, { "epoch": 1.2264678471575023, "loss_reasoning": 0.5115705728530884, "loss_utility": 1.0542113780975342, "step": 1316 }, { "epoch": 1.2273998136067101, "grad_norm": 1.1933095302463261, "learning_rate": 1.3130824991370384e-05, "loss": 1.78, "step": 1317 }, { "epoch": 1.2273998136067101, "loss_reasoning": 0.5115485191345215, "loss_utility": 0.8252035975456238, "step": 1317 }, { "epoch": 1.228331780055918, "grad_norm": 1.1071908345489494, "learning_rate": 1.3123921297894375e-05, "loss": 1.7492, "step": 1318 }, { "epoch": 1.228331780055918, "loss_reasoning": 0.507523238658905, "loss_utility": 1.709467887878418, "step": 1318 }, { "epoch": 1.2292637465051257, "grad_norm": 1.8501190727904397, "learning_rate": 1.3117017604418366e-05, "loss": 1.9696, "step": 1319 }, { "epoch": 1.2292637465051257, "loss_reasoning": 0.4792087972164154, "loss_utility": 1.4896008968353271, "step": 1319 }, { "epoch": 1.2301957129543337, "grad_norm": 1.457976106646755, "learning_rate": 1.3110113910942355e-05, "loss": 1.91, "step": 1320 }, { "epoch": 1.2301957129543337, "loss_reasoning": 0.5164315700531006, "loss_utility": 1.2479770183563232, "step": 1320 }, { "epoch": 1.2311276794035415, "grad_norm": 1.2283038694766835, "learning_rate": 1.3103210217466345e-05, "loss": 1.6946, "step": 1321 }, { "epoch": 1.2311276794035415, "loss_reasoning": 0.501127302646637, "loss_utility": 0.882783055305481, "step": 1321 }, { "epoch": 1.2320596458527493, "grad_norm": 1.1384925579902712, "learning_rate": 1.3096306523990336e-05, "loss": 1.5311, "step": 1322 }, { "epoch": 1.2320596458527493, "loss_reasoning": 0.5156173706054688, "loss_utility": 0.9043595790863037, "step": 1322 }, { "epoch": 1.232991612301957, "grad_norm": 1.2151952969368103, "learning_rate": 1.3089402830514327e-05, "loss": 1.8604, "step": 1323 }, { "epoch": 1.232991612301957, "loss_reasoning": 0.49737435579299927, "loss_utility": 1.4029490947723389, "step": 1323 }, { "epoch": 1.2339235787511649, "grad_norm": 1.1511920942456721, "learning_rate": 1.3082499137038316e-05, "loss": 1.7933, "step": 1324 }, { "epoch": 1.2339235787511649, "loss_reasoning": 0.4744768440723419, "loss_utility": 1.7303895950317383, "step": 1324 }, { "epoch": 1.2348555452003729, "grad_norm": 1.2337208644374498, "learning_rate": 1.3075595443562308e-05, "loss": 1.6085, "step": 1325 }, { "epoch": 1.2348555452003729, "loss_reasoning": 0.5294736623764038, "loss_utility": 1.41188383102417, "step": 1325 }, { "epoch": 1.2357875116495807, "grad_norm": 1.3140171981991757, "learning_rate": 1.3068691750086297e-05, "loss": 1.7281, "step": 1326 }, { "epoch": 1.2357875116495807, "loss_reasoning": 0.5098634958267212, "loss_utility": 1.7184776067733765, "step": 1326 }, { "epoch": 1.2367194780987885, "grad_norm": 1.2257161594544133, "learning_rate": 1.3061788056610288e-05, "loss": 2.2219, "step": 1327 }, { "epoch": 1.2367194780987885, "loss_reasoning": 0.5037363767623901, "loss_utility": 1.1480610370635986, "step": 1327 }, { "epoch": 1.2376514445479962, "grad_norm": 1.0442328869290065, "learning_rate": 1.3054884363134277e-05, "loss": 1.5052, "step": 1328 }, { "epoch": 1.2376514445479962, "loss_reasoning": 0.5344975590705872, "loss_utility": 0.8342217206954956, "step": 1328 }, { "epoch": 1.238583410997204, "grad_norm": 1.2217338567488223, "learning_rate": 1.304798066965827e-05, "loss": 1.5757, "step": 1329 }, { "epoch": 1.238583410997204, "loss_reasoning": 0.518363893032074, "loss_utility": 0.8999200463294983, "step": 1329 }, { "epoch": 1.2395153774464118, "grad_norm": 1.5580326954786419, "learning_rate": 1.3041076976182258e-05, "loss": 1.6386, "step": 1330 }, { "epoch": 1.2395153774464118, "loss_reasoning": 0.5081015825271606, "loss_utility": 2.2081947326660156, "step": 1330 }, { "epoch": 1.2404473438956198, "grad_norm": 1.2003655289216515, "learning_rate": 1.3034173282706248e-05, "loss": 2.0559, "step": 1331 }, { "epoch": 1.2404473438956198, "loss_reasoning": 0.4532545208930969, "loss_utility": 1.5406566858291626, "step": 1331 }, { "epoch": 1.2413793103448276, "grad_norm": 1.1796026470477377, "learning_rate": 1.302726958923024e-05, "loss": 1.8888, "step": 1332 }, { "epoch": 1.2413793103448276, "loss_reasoning": 0.4829346835613251, "loss_utility": 1.081141471862793, "step": 1332 }, { "epoch": 1.2423112767940354, "grad_norm": 1.2466446825806063, "learning_rate": 1.302036589575423e-05, "loss": 1.6021, "step": 1333 }, { "epoch": 1.2423112767940354, "loss_reasoning": 0.5472081899642944, "loss_utility": 1.525970697402954, "step": 1333 }, { "epoch": 1.2432432432432432, "grad_norm": 1.1749780447841738, "learning_rate": 1.301346220227822e-05, "loss": 1.8063, "step": 1334 }, { "epoch": 1.2432432432432432, "loss_reasoning": 0.4590650200843811, "loss_utility": 1.215026617050171, "step": 1334 }, { "epoch": 1.244175209692451, "grad_norm": 1.3045127065632154, "learning_rate": 1.3006558508802209e-05, "loss": 1.9179, "step": 1335 }, { "epoch": 1.244175209692451, "loss_reasoning": 0.4949459433555603, "loss_utility": 1.7119395732879639, "step": 1335 }, { "epoch": 1.245107176141659, "grad_norm": 1.209547050683027, "learning_rate": 1.2999654815326201e-05, "loss": 2.0881, "step": 1336 }, { "epoch": 1.245107176141659, "loss_reasoning": 0.4905736446380615, "loss_utility": 0.8369624018669128, "step": 1336 }, { "epoch": 1.2460391425908668, "grad_norm": 1.1172142114676604, "learning_rate": 1.2992751121850192e-05, "loss": 1.7139, "step": 1337 }, { "epoch": 1.2460391425908668, "loss_reasoning": 0.44868189096450806, "loss_utility": 1.3155460357666016, "step": 1337 }, { "epoch": 1.2469711090400746, "grad_norm": 1.1058179241162296, "learning_rate": 1.2985847428374181e-05, "loss": 1.9698, "step": 1338 }, { "epoch": 1.2469711090400746, "loss_reasoning": 0.5471678972244263, "loss_utility": 0.9630831480026245, "step": 1338 }, { "epoch": 1.2479030754892824, "grad_norm": 1.1936169848431504, "learning_rate": 1.297894373489817e-05, "loss": 1.7218, "step": 1339 }, { "epoch": 1.2479030754892824, "loss_reasoning": 0.5585452914237976, "loss_utility": 1.42877197265625, "step": 1339 }, { "epoch": 1.2488350419384902, "grad_norm": 1.3939912216655168, "learning_rate": 1.2972040041422162e-05, "loss": 2.04, "step": 1340 }, { "epoch": 1.2488350419384902, "loss_reasoning": 0.5302481651306152, "loss_utility": 1.4451406002044678, "step": 1340 }, { "epoch": 1.249767008387698, "grad_norm": 1.3122416158615466, "learning_rate": 1.2965136347946151e-05, "loss": 1.9319, "step": 1341 }, { "epoch": 1.249767008387698, "loss_reasoning": 0.4942878484725952, "loss_utility": 1.1937004327774048, "step": 1341 }, { "epoch": 1.250698974836906, "grad_norm": 1.599880362220044, "learning_rate": 1.2958232654470142e-05, "loss": 1.9186, "step": 1342 }, { "epoch": 1.250698974836906, "loss_reasoning": 0.48628538846969604, "loss_utility": 1.1945374011993408, "step": 1342 }, { "epoch": 1.2516309412861137, "grad_norm": 1.442570338146316, "learning_rate": 1.2951328960994135e-05, "loss": 1.5913, "step": 1343 }, { "epoch": 1.2516309412861137, "loss_reasoning": 0.5072439312934875, "loss_utility": 1.1967347860336304, "step": 1343 }, { "epoch": 1.2525629077353215, "grad_norm": 1.590418656706779, "learning_rate": 1.2944425267518124e-05, "loss": 1.9422, "step": 1344 }, { "epoch": 1.2525629077353215, "loss_reasoning": 0.5332514643669128, "loss_utility": 1.4533653259277344, "step": 1344 }, { "epoch": 1.2534948741845293, "grad_norm": 1.2032838402852153, "learning_rate": 1.2937521574042113e-05, "loss": 1.7061, "step": 1345 }, { "epoch": 1.2534948741845293, "loss_reasoning": 0.48499369621276855, "loss_utility": 1.1225147247314453, "step": 1345 }, { "epoch": 1.254426840633737, "grad_norm": 1.117614679308567, "learning_rate": 1.2930617880566103e-05, "loss": 1.9139, "step": 1346 }, { "epoch": 1.254426840633737, "loss_reasoning": 0.479816198348999, "loss_utility": 1.5277060270309448, "step": 1346 }, { "epoch": 1.2553588070829451, "grad_norm": 1.1923523917981538, "learning_rate": 1.2923714187090096e-05, "loss": 2.0474, "step": 1347 }, { "epoch": 1.2553588070829451, "loss_reasoning": 0.4914776086807251, "loss_utility": 1.1175510883331299, "step": 1347 }, { "epoch": 1.256290773532153, "grad_norm": 1.2498200322144049, "learning_rate": 1.2916810493614085e-05, "loss": 1.8452, "step": 1348 }, { "epoch": 1.256290773532153, "loss_reasoning": 0.5606904029846191, "loss_utility": 1.3777616024017334, "step": 1348 }, { "epoch": 1.2572227399813607, "grad_norm": 1.0045975530755713, "learning_rate": 1.2909906800138074e-05, "loss": 1.824, "step": 1349 }, { "epoch": 1.2572227399813607, "loss_reasoning": 0.5281621217727661, "loss_utility": 1.1424065828323364, "step": 1349 }, { "epoch": 1.2581547064305685, "grad_norm": 1.0840255995810475, "learning_rate": 1.2903003106662065e-05, "loss": 1.5359, "step": 1350 }, { "epoch": 1.2581547064305685, "loss_reasoning": 0.4808577299118042, "loss_utility": 0.7778652906417847, "step": 1350 }, { "epoch": 1.2590866728797763, "grad_norm": 1.1155484904181894, "learning_rate": 1.2896099413186055e-05, "loss": 1.4761, "step": 1351 }, { "epoch": 1.2590866728797763, "loss_reasoning": 0.5256702303886414, "loss_utility": 2.1675758361816406, "step": 1351 }, { "epoch": 1.260018639328984, "grad_norm": 1.3026394262980332, "learning_rate": 1.2889195719710046e-05, "loss": 2.0485, "step": 1352 }, { "epoch": 1.260018639328984, "loss_reasoning": 0.5094039440155029, "loss_utility": 1.855475902557373, "step": 1352 }, { "epoch": 1.2609506057781918, "grad_norm": 1.244400954666611, "learning_rate": 1.2882292026234035e-05, "loss": 1.8593, "step": 1353 }, { "epoch": 1.2609506057781918, "loss_reasoning": 0.5687753558158875, "loss_utility": 1.0987071990966797, "step": 1353 }, { "epoch": 1.2618825722273999, "grad_norm": 1.1759165983975666, "learning_rate": 1.2875388332758028e-05, "loss": 2.0687, "step": 1354 }, { "epoch": 1.2618825722273999, "loss_reasoning": 0.48405420780181885, "loss_utility": 1.25067937374115, "step": 1354 }, { "epoch": 1.2628145386766076, "grad_norm": 1.349663440538595, "learning_rate": 1.2868484639282017e-05, "loss": 1.7712, "step": 1355 }, { "epoch": 1.2628145386766076, "loss_reasoning": 0.5186141729354858, "loss_utility": 1.5142148733139038, "step": 1355 }, { "epoch": 1.2637465051258154, "grad_norm": 1.2500545141411092, "learning_rate": 1.2861580945806007e-05, "loss": 1.8432, "step": 1356 }, { "epoch": 1.2637465051258154, "loss_reasoning": 0.4778417646884918, "loss_utility": 1.1090586185455322, "step": 1356 }, { "epoch": 1.2646784715750232, "grad_norm": 1.1402474974974481, "learning_rate": 1.2854677252329996e-05, "loss": 1.915, "step": 1357 }, { "epoch": 1.2646784715750232, "loss_reasoning": 0.4963839650154114, "loss_utility": 1.729272484779358, "step": 1357 }, { "epoch": 1.2656104380242312, "grad_norm": 1.1341035526099454, "learning_rate": 1.2847773558853989e-05, "loss": 1.6069, "step": 1358 }, { "epoch": 1.2656104380242312, "loss_reasoning": 0.4370335638523102, "loss_utility": 0.8497827053070068, "step": 1358 }, { "epoch": 1.266542404473439, "grad_norm": 1.1105838291783678, "learning_rate": 1.2840869865377978e-05, "loss": 1.4225, "step": 1359 }, { "epoch": 1.266542404473439, "loss_reasoning": 0.4933251142501831, "loss_utility": 1.5368480682373047, "step": 1359 }, { "epoch": 1.2674743709226468, "grad_norm": 1.0525060488507825, "learning_rate": 1.2833966171901969e-05, "loss": 1.7424, "step": 1360 }, { "epoch": 1.2674743709226468, "loss_reasoning": 0.4913359582424164, "loss_utility": 1.5915120840072632, "step": 1360 }, { "epoch": 1.2684063373718546, "grad_norm": 1.0968843697395478, "learning_rate": 1.2827062478425958e-05, "loss": 1.8567, "step": 1361 }, { "epoch": 1.2684063373718546, "loss_reasoning": 0.4672277867794037, "loss_utility": 0.4081198573112488, "step": 1361 }, { "epoch": 1.2693383038210624, "grad_norm": 1.329867064058347, "learning_rate": 1.282015878494995e-05, "loss": 1.8483, "step": 1362 }, { "epoch": 1.2693383038210624, "loss_reasoning": 0.5628071427345276, "loss_utility": 0.6916912794113159, "step": 1362 }, { "epoch": 1.2702702702702702, "grad_norm": 1.0405870753851776, "learning_rate": 1.2813255091473939e-05, "loss": 1.4936, "step": 1363 }, { "epoch": 1.2702702702702702, "loss_reasoning": 0.4873352646827698, "loss_utility": 2.61468505859375, "step": 1363 }, { "epoch": 1.271202236719478, "grad_norm": 1.2149407940342511, "learning_rate": 1.280635139799793e-05, "loss": 2.0062, "step": 1364 }, { "epoch": 1.271202236719478, "loss_reasoning": 0.5279762744903564, "loss_utility": 1.0663120746612549, "step": 1364 }, { "epoch": 1.272134203168686, "grad_norm": 1.2480462394823386, "learning_rate": 1.279944770452192e-05, "loss": 1.5871, "step": 1365 }, { "epoch": 1.272134203168686, "loss_reasoning": 0.5128462314605713, "loss_utility": 1.9517797231674194, "step": 1365 }, { "epoch": 1.2730661696178938, "grad_norm": 1.4599415928198733, "learning_rate": 1.2792544011045911e-05, "loss": 1.7683, "step": 1366 }, { "epoch": 1.2730661696178938, "loss_reasoning": 0.44255727529525757, "loss_utility": 1.783981442451477, "step": 1366 }, { "epoch": 1.2739981360671015, "grad_norm": 1.3468548445865896, "learning_rate": 1.27856403175699e-05, "loss": 2.1781, "step": 1367 }, { "epoch": 1.2739981360671015, "loss_reasoning": 0.5586541891098022, "loss_utility": 2.1675426959991455, "step": 1367 }, { "epoch": 1.2749301025163093, "grad_norm": 1.2286304861525021, "learning_rate": 1.2778736624093891e-05, "loss": 1.9545, "step": 1368 }, { "epoch": 1.2749301025163093, "loss_reasoning": 0.49251610040664673, "loss_utility": 1.4034266471862793, "step": 1368 }, { "epoch": 1.2758620689655173, "grad_norm": 1.169137643905375, "learning_rate": 1.2771832930617882e-05, "loss": 2.0229, "step": 1369 }, { "epoch": 1.2758620689655173, "loss_reasoning": 0.4884360134601593, "loss_utility": 1.4542235136032104, "step": 1369 }, { "epoch": 1.2767940354147251, "grad_norm": 1.222514757424759, "learning_rate": 1.2764929237141873e-05, "loss": 1.7405, "step": 1370 }, { "epoch": 1.2767940354147251, "loss_reasoning": 0.4693765342235565, "loss_utility": 1.6494524478912354, "step": 1370 }, { "epoch": 1.277726001863933, "grad_norm": 1.2661283651259163, "learning_rate": 1.2758025543665862e-05, "loss": 1.8319, "step": 1371 }, { "epoch": 1.277726001863933, "loss_reasoning": 0.5309505462646484, "loss_utility": 1.6049801111221313, "step": 1371 }, { "epoch": 1.2786579683131407, "grad_norm": 1.0551333896133417, "learning_rate": 1.2751121850189854e-05, "loss": 2.0291, "step": 1372 }, { "epoch": 1.2786579683131407, "loss_reasoning": 0.46764642000198364, "loss_utility": 1.9352002143859863, "step": 1372 }, { "epoch": 1.2795899347623485, "grad_norm": 1.1996543725385356, "learning_rate": 1.2744218156713843e-05, "loss": 2.0849, "step": 1373 }, { "epoch": 1.2795899347623485, "loss_reasoning": 0.49899497628211975, "loss_utility": 1.2921162843704224, "step": 1373 }, { "epoch": 1.2805219012115563, "grad_norm": 1.0487688770266288, "learning_rate": 1.2737314463237834e-05, "loss": 1.6432, "step": 1374 }, { "epoch": 1.2805219012115563, "loss_reasoning": 0.4444258511066437, "loss_utility": 1.3924024105072021, "step": 1374 }, { "epoch": 1.281453867660764, "grad_norm": 0.9570226597432552, "learning_rate": 1.2730410769761823e-05, "loss": 1.9026, "step": 1375 }, { "epoch": 1.281453867660764, "loss_reasoning": 0.5232190489768982, "loss_utility": 1.1830593347549438, "step": 1375 }, { "epoch": 1.282385834109972, "grad_norm": 1.2730182262629324, "learning_rate": 1.2723507076285815e-05, "loss": 1.6394, "step": 1376 }, { "epoch": 1.282385834109972, "loss_reasoning": 0.40214306116104126, "loss_utility": 0.5263645648956299, "step": 1376 }, { "epoch": 1.2833178005591799, "grad_norm": 1.325348528813855, "learning_rate": 1.2716603382809804e-05, "loss": 1.7848, "step": 1377 }, { "epoch": 1.2833178005591799, "loss_reasoning": 0.5262805819511414, "loss_utility": 1.178325891494751, "step": 1377 }, { "epoch": 1.2842497670083877, "grad_norm": 1.2649978757868217, "learning_rate": 1.2709699689333795e-05, "loss": 1.6495, "step": 1378 }, { "epoch": 1.2842497670083877, "loss_reasoning": 0.4775051474571228, "loss_utility": 1.4312236309051514, "step": 1378 }, { "epoch": 1.2851817334575955, "grad_norm": 1.5708412841674624, "learning_rate": 1.2702795995857784e-05, "loss": 1.7379, "step": 1379 }, { "epoch": 1.2851817334575955, "loss_reasoning": 0.49538683891296387, "loss_utility": 1.9728171825408936, "step": 1379 }, { "epoch": 1.2861136999068035, "grad_norm": 1.3268432483079606, "learning_rate": 1.2695892302381776e-05, "loss": 1.8418, "step": 1380 }, { "epoch": 1.2861136999068035, "loss_reasoning": 0.5397186279296875, "loss_utility": 1.0964325666427612, "step": 1380 }, { "epoch": 1.2870456663560113, "grad_norm": 1.2333826560807162, "learning_rate": 1.2688988608905766e-05, "loss": 1.9957, "step": 1381 }, { "epoch": 1.2870456663560113, "loss_reasoning": 0.4753451943397522, "loss_utility": 0.9667288064956665, "step": 1381 }, { "epoch": 1.287977632805219, "grad_norm": 1.0784572514375825, "learning_rate": 1.2682084915429755e-05, "loss": 1.589, "step": 1382 }, { "epoch": 1.287977632805219, "loss_reasoning": 0.4681696593761444, "loss_utility": 1.5154212713241577, "step": 1382 }, { "epoch": 1.2889095992544268, "grad_norm": 1.266155246683016, "learning_rate": 1.2675181221953747e-05, "loss": 1.8154, "step": 1383 }, { "epoch": 1.2889095992544268, "loss_reasoning": 0.5309741497039795, "loss_utility": 1.7208548784255981, "step": 1383 }, { "epoch": 1.2898415657036346, "grad_norm": 1.2783406740478331, "learning_rate": 1.2668277528477738e-05, "loss": 1.8542, "step": 1384 }, { "epoch": 1.2898415657036346, "loss_reasoning": 0.49114590883255005, "loss_utility": 0.9635565280914307, "step": 1384 }, { "epoch": 1.2907735321528424, "grad_norm": 1.1979415980960404, "learning_rate": 1.2661373835001727e-05, "loss": 1.7956, "step": 1385 }, { "epoch": 1.2907735321528424, "loss_reasoning": 0.471473753452301, "loss_utility": 1.0285835266113281, "step": 1385 }, { "epoch": 1.2917054986020502, "grad_norm": 1.1978751376499805, "learning_rate": 1.2654470141525716e-05, "loss": 1.9515, "step": 1386 }, { "epoch": 1.2917054986020502, "loss_reasoning": 0.5001323819160461, "loss_utility": 1.115308403968811, "step": 1386 }, { "epoch": 1.2926374650512582, "grad_norm": 1.3037578865466943, "learning_rate": 1.2647566448049708e-05, "loss": 1.5737, "step": 1387 }, { "epoch": 1.2926374650512582, "loss_reasoning": 0.5412094593048096, "loss_utility": 1.434574842453003, "step": 1387 }, { "epoch": 1.293569431500466, "grad_norm": 1.207150247390048, "learning_rate": 1.2640662754573699e-05, "loss": 1.523, "step": 1388 }, { "epoch": 1.293569431500466, "loss_reasoning": 0.4945909380912781, "loss_utility": 1.251824140548706, "step": 1388 }, { "epoch": 1.2945013979496738, "grad_norm": 1.302292680535428, "learning_rate": 1.2633759061097688e-05, "loss": 1.6448, "step": 1389 }, { "epoch": 1.2945013979496738, "loss_reasoning": 0.5039550065994263, "loss_utility": 1.0125020742416382, "step": 1389 }, { "epoch": 1.2954333643988816, "grad_norm": 1.141256779914751, "learning_rate": 1.2626855367621677e-05, "loss": 1.7315, "step": 1390 }, { "epoch": 1.2954333643988816, "loss_reasoning": 0.4801137447357178, "loss_utility": 1.1654763221740723, "step": 1390 }, { "epoch": 1.2963653308480896, "grad_norm": 1.4729475535007417, "learning_rate": 1.261995167414567e-05, "loss": 1.7661, "step": 1391 }, { "epoch": 1.2963653308480896, "loss_reasoning": 0.5320214033126831, "loss_utility": 1.8169488906860352, "step": 1391 }, { "epoch": 1.2972972972972974, "grad_norm": 1.2766623339356966, "learning_rate": 1.2613047980669658e-05, "loss": 2.0122, "step": 1392 }, { "epoch": 1.2972972972972974, "loss_reasoning": 0.44257742166519165, "loss_utility": 1.4556258916854858, "step": 1392 }, { "epoch": 1.2982292637465052, "grad_norm": 1.3632144132842214, "learning_rate": 1.260614428719365e-05, "loss": 1.7258, "step": 1393 }, { "epoch": 1.2982292637465052, "loss_reasoning": 0.49350595474243164, "loss_utility": 1.4694507122039795, "step": 1393 }, { "epoch": 1.299161230195713, "grad_norm": 1.1628714495938421, "learning_rate": 1.2599240593717642e-05, "loss": 1.7572, "step": 1394 }, { "epoch": 1.299161230195713, "loss_reasoning": 0.5120050311088562, "loss_utility": 1.2817115783691406, "step": 1394 }, { "epoch": 1.3000931966449207, "grad_norm": 1.1708486119963637, "learning_rate": 1.259233690024163e-05, "loss": 1.7559, "step": 1395 }, { "epoch": 1.3000931966449207, "loss_reasoning": 0.5298084020614624, "loss_utility": 1.144611120223999, "step": 1395 }, { "epoch": 1.3010251630941285, "grad_norm": 1.2923767118865466, "learning_rate": 1.258543320676562e-05, "loss": 1.9909, "step": 1396 }, { "epoch": 1.3010251630941285, "loss_reasoning": 0.5151944756507874, "loss_utility": 0.9274799823760986, "step": 1396 }, { "epoch": 1.3019571295433363, "grad_norm": 1.1300416171724608, "learning_rate": 1.257852951328961e-05, "loss": 1.6415, "step": 1397 }, { "epoch": 1.3019571295433363, "loss_reasoning": 0.5443346500396729, "loss_utility": 1.2553678750991821, "step": 1397 }, { "epoch": 1.3028890959925443, "grad_norm": 1.091184687965122, "learning_rate": 1.2571625819813603e-05, "loss": 1.2891, "step": 1398 }, { "epoch": 1.3028890959925443, "loss_reasoning": 0.4052061438560486, "loss_utility": 1.098602533340454, "step": 1398 }, { "epoch": 1.303821062441752, "grad_norm": 1.2481382281134341, "learning_rate": 1.2564722126337592e-05, "loss": 1.7559, "step": 1399 }, { "epoch": 1.303821062441752, "loss_reasoning": 0.5098680853843689, "loss_utility": 0.6523131132125854, "step": 1399 }, { "epoch": 1.30475302889096, "grad_norm": 1.0020311702794529, "learning_rate": 1.2557818432861581e-05, "loss": 1.5979, "step": 1400 }, { "epoch": 1.30475302889096, "loss_reasoning": 0.5080487132072449, "loss_utility": 1.1615722179412842, "step": 1400 }, { "epoch": 1.3056849953401677, "grad_norm": 1.2881467432520173, "learning_rate": 1.2550914739385572e-05, "loss": 1.5734, "step": 1401 }, { "epoch": 1.3056849953401677, "loss_reasoning": 0.5540730357170105, "loss_utility": 1.4584729671478271, "step": 1401 }, { "epoch": 1.3066169617893757, "grad_norm": 1.1096827497899133, "learning_rate": 1.2544011045909562e-05, "loss": 1.6667, "step": 1402 }, { "epoch": 1.3066169617893757, "loss_reasoning": 0.4818248152732849, "loss_utility": 1.463129997253418, "step": 1402 }, { "epoch": 1.3075489282385835, "grad_norm": 1.240900357450003, "learning_rate": 1.2537107352433553e-05, "loss": 1.7726, "step": 1403 }, { "epoch": 1.3075489282385835, "loss_reasoning": 0.5306613445281982, "loss_utility": 0.8470863699913025, "step": 1403 }, { "epoch": 1.3084808946877913, "grad_norm": 1.1791641999571445, "learning_rate": 1.2530203658957542e-05, "loss": 1.9134, "step": 1404 }, { "epoch": 1.3084808946877913, "loss_reasoning": 0.478937029838562, "loss_utility": 1.2603446245193481, "step": 1404 }, { "epoch": 1.309412861136999, "grad_norm": 1.5002271348388434, "learning_rate": 1.2523299965481535e-05, "loss": 1.779, "step": 1405 }, { "epoch": 1.309412861136999, "loss_reasoning": 0.47850048542022705, "loss_utility": 1.451135277748108, "step": 1405 }, { "epoch": 1.3103448275862069, "grad_norm": 1.2424312717521406, "learning_rate": 1.2516396272005524e-05, "loss": 2.0176, "step": 1406 }, { "epoch": 1.3103448275862069, "loss_reasoning": 0.4990621507167816, "loss_utility": 1.3199739456176758, "step": 1406 }, { "epoch": 1.3112767940354146, "grad_norm": 1.3221030727913452, "learning_rate": 1.2509492578529514e-05, "loss": 1.9447, "step": 1407 }, { "epoch": 1.3112767940354146, "loss_reasoning": 0.5468349456787109, "loss_utility": 1.5438032150268555, "step": 1407 }, { "epoch": 1.3122087604846224, "grad_norm": 1.2546596670397048, "learning_rate": 1.2502588885053503e-05, "loss": 2.0743, "step": 1408 }, { "epoch": 1.3122087604846224, "loss_reasoning": 0.4791181683540344, "loss_utility": 1.5463621616363525, "step": 1408 }, { "epoch": 1.3131407269338304, "grad_norm": 1.4412492481702415, "learning_rate": 1.2495685191577496e-05, "loss": 1.7747, "step": 1409 }, { "epoch": 1.3131407269338304, "loss_reasoning": 0.5242942571640015, "loss_utility": 1.1562435626983643, "step": 1409 }, { "epoch": 1.3140726933830382, "grad_norm": 1.3419201945981962, "learning_rate": 1.2488781498101485e-05, "loss": 1.8973, "step": 1410 }, { "epoch": 1.3140726933830382, "loss_reasoning": 0.4748465418815613, "loss_utility": 1.3946422338485718, "step": 1410 }, { "epoch": 1.315004659832246, "grad_norm": 1.4006096896212994, "learning_rate": 1.2481877804625476e-05, "loss": 1.7995, "step": 1411 }, { "epoch": 1.315004659832246, "loss_reasoning": 0.49620118737220764, "loss_utility": 0.4198283851146698, "step": 1411 }, { "epoch": 1.3159366262814538, "grad_norm": 1.4130049339747262, "learning_rate": 1.2474974111149465e-05, "loss": 1.5884, "step": 1412 }, { "epoch": 1.3159366262814538, "loss_reasoning": 0.4990765452384949, "loss_utility": 1.5668549537658691, "step": 1412 }, { "epoch": 1.3168685927306618, "grad_norm": 1.3239225109944077, "learning_rate": 1.2468070417673457e-05, "loss": 1.9457, "step": 1413 }, { "epoch": 1.3168685927306618, "loss_reasoning": 0.47848570346832275, "loss_utility": 0.7219462990760803, "step": 1413 }, { "epoch": 1.3178005591798696, "grad_norm": 1.6365766165987918, "learning_rate": 1.2461166724197446e-05, "loss": 1.759, "step": 1414 }, { "epoch": 1.3178005591798696, "loss_reasoning": 0.5412310361862183, "loss_utility": 1.3034932613372803, "step": 1414 }, { "epoch": 1.3187325256290774, "grad_norm": 1.130994930524733, "learning_rate": 1.2454263030721437e-05, "loss": 2.0419, "step": 1415 }, { "epoch": 1.3187325256290774, "loss_reasoning": 0.4714694023132324, "loss_utility": 1.6278231143951416, "step": 1415 }, { "epoch": 1.3196644920782852, "grad_norm": 1.3925145719450183, "learning_rate": 1.2447359337245428e-05, "loss": 1.7742, "step": 1416 }, { "epoch": 1.3196644920782852, "loss_reasoning": 0.4581100344657898, "loss_utility": 1.1946920156478882, "step": 1416 }, { "epoch": 1.320596458527493, "grad_norm": 1.0109906260285813, "learning_rate": 1.2440455643769418e-05, "loss": 1.7339, "step": 1417 }, { "epoch": 1.320596458527493, "loss_reasoning": 0.5287824869155884, "loss_utility": 1.929410696029663, "step": 1417 }, { "epoch": 1.3215284249767008, "grad_norm": 1.143672857743094, "learning_rate": 1.2433551950293407e-05, "loss": 1.7693, "step": 1418 }, { "epoch": 1.3215284249767008, "loss_reasoning": 0.4606093764305115, "loss_utility": 2.049898147583008, "step": 1418 }, { "epoch": 1.3224603914259085, "grad_norm": 1.1524619988334366, "learning_rate": 1.2426648256817398e-05, "loss": 1.8981, "step": 1419 }, { "epoch": 1.3224603914259085, "loss_reasoning": 0.48356080055236816, "loss_utility": 1.095207929611206, "step": 1419 }, { "epoch": 1.3233923578751166, "grad_norm": 1.2645250226882156, "learning_rate": 1.2419744563341389e-05, "loss": 1.8895, "step": 1420 }, { "epoch": 1.3233923578751166, "loss_reasoning": 0.45990467071533203, "loss_utility": 2.096017360687256, "step": 1420 }, { "epoch": 1.3243243243243243, "grad_norm": 1.375611263191699, "learning_rate": 1.241284086986538e-05, "loss": 2.0081, "step": 1421 }, { "epoch": 1.3243243243243243, "loss_reasoning": 0.5118745565414429, "loss_utility": 1.0784058570861816, "step": 1421 }, { "epoch": 1.3252562907735321, "grad_norm": 1.1209969283246042, "learning_rate": 1.2405937176389369e-05, "loss": 1.5274, "step": 1422 }, { "epoch": 1.3252562907735321, "loss_reasoning": 0.5079376697540283, "loss_utility": 2.002397060394287, "step": 1422 }, { "epoch": 1.32618825722274, "grad_norm": 1.2431357451625198, "learning_rate": 1.2399033482913361e-05, "loss": 1.9018, "step": 1423 }, { "epoch": 1.32618825722274, "loss_reasoning": 0.5593730211257935, "loss_utility": 0.9674869775772095, "step": 1423 }, { "epoch": 1.327120223671948, "grad_norm": 1.2197010540048512, "learning_rate": 1.239212978943735e-05, "loss": 1.733, "step": 1424 }, { "epoch": 1.327120223671948, "loss_reasoning": 0.5697944760322571, "loss_utility": 0.806061327457428, "step": 1424 }, { "epoch": 1.3280521901211557, "grad_norm": 0.9947183446526705, "learning_rate": 1.238522609596134e-05, "loss": 1.5905, "step": 1425 }, { "epoch": 1.3280521901211557, "loss_reasoning": 0.5538457632064819, "loss_utility": 1.151233434677124, "step": 1425 }, { "epoch": 1.3289841565703635, "grad_norm": 1.2672454968591986, "learning_rate": 1.237832240248533e-05, "loss": 1.9054, "step": 1426 }, { "epoch": 1.3289841565703635, "loss_reasoning": 0.5071620345115662, "loss_utility": 1.3427516222000122, "step": 1426 }, { "epoch": 1.3299161230195713, "grad_norm": 1.1752674497696964, "learning_rate": 1.2371418709009322e-05, "loss": 1.7842, "step": 1427 }, { "epoch": 1.3299161230195713, "loss_reasoning": 0.5015466213226318, "loss_utility": 1.411513090133667, "step": 1427 }, { "epoch": 1.330848089468779, "grad_norm": 1.0996815389860173, "learning_rate": 1.2364515015533311e-05, "loss": 1.8559, "step": 1428 }, { "epoch": 1.330848089468779, "loss_reasoning": 0.5113301277160645, "loss_utility": 1.4390089511871338, "step": 1428 }, { "epoch": 1.3317800559179869, "grad_norm": 1.1466320744787792, "learning_rate": 1.2357611322057302e-05, "loss": 1.9498, "step": 1429 }, { "epoch": 1.3317800559179869, "loss_reasoning": 0.5164258480072021, "loss_utility": 1.2086514234542847, "step": 1429 }, { "epoch": 1.3327120223671947, "grad_norm": 1.3580519286951194, "learning_rate": 1.2350707628581291e-05, "loss": 1.8832, "step": 1430 }, { "epoch": 1.3327120223671947, "loss_reasoning": 0.48560792207717896, "loss_utility": 0.9189063906669617, "step": 1430 }, { "epoch": 1.3336439888164027, "grad_norm": 1.2252610606013368, "learning_rate": 1.2343803935105284e-05, "loss": 1.8328, "step": 1431 }, { "epoch": 1.3336439888164027, "loss_reasoning": 0.5416618585586548, "loss_utility": 1.543639063835144, "step": 1431 }, { "epoch": 1.3345759552656105, "grad_norm": 1.1515485076237635, "learning_rate": 1.2336900241629273e-05, "loss": 1.9836, "step": 1432 }, { "epoch": 1.3345759552656105, "loss_reasoning": 0.5029978156089783, "loss_utility": 1.2164182662963867, "step": 1432 }, { "epoch": 1.3355079217148182, "grad_norm": 1.229223498469337, "learning_rate": 1.2329996548153262e-05, "loss": 1.5844, "step": 1433 }, { "epoch": 1.3355079217148182, "loss_reasoning": 0.48240309953689575, "loss_utility": 0.6852685213088989, "step": 1433 }, { "epoch": 1.336439888164026, "grad_norm": 1.5220223898663656, "learning_rate": 1.2323092854677254e-05, "loss": 1.4046, "step": 1434 }, { "epoch": 1.336439888164026, "loss_reasoning": 0.48583340644836426, "loss_utility": 1.4222782850265503, "step": 1434 }, { "epoch": 1.337371854613234, "grad_norm": 1.337504610371887, "learning_rate": 1.2316189161201245e-05, "loss": 1.7579, "step": 1435 }, { "epoch": 1.337371854613234, "loss_reasoning": 0.5507625341415405, "loss_utility": 1.7935643196105957, "step": 1435 }, { "epoch": 1.3383038210624418, "grad_norm": 1.137827192608703, "learning_rate": 1.2309285467725234e-05, "loss": 1.7924, "step": 1436 }, { "epoch": 1.3383038210624418, "loss_reasoning": 0.5362703800201416, "loss_utility": 1.4479069709777832, "step": 1436 }, { "epoch": 1.3392357875116496, "grad_norm": 1.6871716369662197, "learning_rate": 1.2302381774249223e-05, "loss": 1.7388, "step": 1437 }, { "epoch": 1.3392357875116496, "loss_reasoning": 0.5717370510101318, "loss_utility": 0.8481369018554688, "step": 1437 }, { "epoch": 1.3401677539608574, "grad_norm": 1.1550756717455, "learning_rate": 1.2295478080773215e-05, "loss": 1.713, "step": 1438 }, { "epoch": 1.3401677539608574, "loss_reasoning": 0.46046456694602966, "loss_utility": 2.111003875732422, "step": 1438 }, { "epoch": 1.3410997204100652, "grad_norm": 1.1888307964404694, "learning_rate": 1.2288574387297206e-05, "loss": 2.0826, "step": 1439 }, { "epoch": 1.3410997204100652, "loss_reasoning": 0.5768300890922546, "loss_utility": 2.2077579498291016, "step": 1439 }, { "epoch": 1.342031686859273, "grad_norm": 1.105414948781273, "learning_rate": 1.2281670693821195e-05, "loss": 2.1466, "step": 1440 }, { "epoch": 1.342031686859273, "loss_reasoning": 0.5210350751876831, "loss_utility": 1.1492624282836914, "step": 1440 }, { "epoch": 1.3429636533084808, "grad_norm": 1.9376862338186314, "learning_rate": 1.2274767000345184e-05, "loss": 2.0123, "step": 1441 }, { "epoch": 1.3429636533084808, "loss_reasoning": 0.44934138655662537, "loss_utility": 1.417890191078186, "step": 1441 }, { "epoch": 1.3438956197576888, "grad_norm": 1.3071474503921385, "learning_rate": 1.2267863306869176e-05, "loss": 1.8405, "step": 1442 }, { "epoch": 1.3438956197576888, "loss_reasoning": 0.5059422850608826, "loss_utility": 1.5988824367523193, "step": 1442 }, { "epoch": 1.3448275862068966, "grad_norm": 1.0184803059229979, "learning_rate": 1.2260959613393166e-05, "loss": 1.5908, "step": 1443 }, { "epoch": 1.3448275862068966, "loss_reasoning": 0.551100492477417, "loss_utility": 1.06343412399292, "step": 1443 }, { "epoch": 1.3457595526561044, "grad_norm": 1.380702863672117, "learning_rate": 1.2254055919917156e-05, "loss": 1.8747, "step": 1444 }, { "epoch": 1.3457595526561044, "loss_reasoning": 0.47938108444213867, "loss_utility": 1.4268641471862793, "step": 1444 }, { "epoch": 1.3466915191053122, "grad_norm": 1.1044093177406311, "learning_rate": 1.2247152226441149e-05, "loss": 1.9711, "step": 1445 }, { "epoch": 1.3466915191053122, "loss_reasoning": 0.4705583453178406, "loss_utility": 1.3699767589569092, "step": 1445 }, { "epoch": 1.3476234855545202, "grad_norm": 1.1763201388333924, "learning_rate": 1.2240248532965138e-05, "loss": 1.9508, "step": 1446 }, { "epoch": 1.3476234855545202, "loss_reasoning": 0.48700085282325745, "loss_utility": 1.8292152881622314, "step": 1446 }, { "epoch": 1.348555452003728, "grad_norm": 1.2564801608535385, "learning_rate": 1.2233344839489127e-05, "loss": 1.965, "step": 1447 }, { "epoch": 1.348555452003728, "loss_reasoning": 0.5716148018836975, "loss_utility": 0.8103623390197754, "step": 1447 }, { "epoch": 1.3494874184529357, "grad_norm": 1.2857707592922385, "learning_rate": 1.2226441146013117e-05, "loss": 1.6044, "step": 1448 }, { "epoch": 1.3494874184529357, "loss_reasoning": 0.5210938453674316, "loss_utility": 0.9881749153137207, "step": 1448 }, { "epoch": 1.3504193849021435, "grad_norm": 1.0475427897732794, "learning_rate": 1.221953745253711e-05, "loss": 1.7007, "step": 1449 }, { "epoch": 1.3504193849021435, "loss_reasoning": 0.5219800472259521, "loss_utility": 1.322770357131958, "step": 1449 }, { "epoch": 1.3513513513513513, "grad_norm": 1.1149022397260377, "learning_rate": 1.2212633759061099e-05, "loss": 1.6125, "step": 1450 }, { "epoch": 1.3513513513513513, "loss_reasoning": 0.5724542737007141, "loss_utility": 1.1082730293273926, "step": 1450 }, { "epoch": 1.352283317800559, "grad_norm": 1.4146180466147067, "learning_rate": 1.2205730065585088e-05, "loss": 1.783, "step": 1451 }, { "epoch": 1.352283317800559, "loss_reasoning": 0.4658673405647278, "loss_utility": 1.0166029930114746, "step": 1451 }, { "epoch": 1.353215284249767, "grad_norm": 1.489689573352157, "learning_rate": 1.2198826372109079e-05, "loss": 1.6906, "step": 1452 }, { "epoch": 1.353215284249767, "loss_reasoning": 0.5147226452827454, "loss_utility": 1.3038444519042969, "step": 1452 }, { "epoch": 1.354147250698975, "grad_norm": 1.016622898669153, "learning_rate": 1.219192267863307e-05, "loss": 1.3317, "step": 1453 }, { "epoch": 1.354147250698975, "loss_reasoning": 0.5693184733390808, "loss_utility": 0.9314687252044678, "step": 1453 }, { "epoch": 1.3550792171481827, "grad_norm": 0.9254130988734065, "learning_rate": 1.218501898515706e-05, "loss": 1.4523, "step": 1454 }, { "epoch": 1.3550792171481827, "loss_reasoning": 0.48874497413635254, "loss_utility": 1.37129807472229, "step": 1454 }, { "epoch": 1.3560111835973905, "grad_norm": 1.2815573982769233, "learning_rate": 1.217811529168105e-05, "loss": 1.6169, "step": 1455 }, { "epoch": 1.3560111835973905, "loss_reasoning": 0.5380947589874268, "loss_utility": 0.9444277286529541, "step": 1455 }, { "epoch": 1.3569431500465983, "grad_norm": 1.1727243155992575, "learning_rate": 1.2171211598205042e-05, "loss": 1.6485, "step": 1456 }, { "epoch": 1.3569431500465983, "loss_reasoning": 0.5534405708312988, "loss_utility": 0.8016633987426758, "step": 1456 }, { "epoch": 1.3578751164958063, "grad_norm": 1.4871243631414233, "learning_rate": 1.216430790472903e-05, "loss": 1.7882, "step": 1457 }, { "epoch": 1.3578751164958063, "loss_reasoning": 0.5803515911102295, "loss_utility": 1.635599136352539, "step": 1457 }, { "epoch": 1.358807082945014, "grad_norm": 1.2807288674441573, "learning_rate": 1.2157404211253021e-05, "loss": 1.7005, "step": 1458 }, { "epoch": 1.358807082945014, "loss_reasoning": 0.5126991271972656, "loss_utility": 1.2298938035964966, "step": 1458 }, { "epoch": 1.3597390493942219, "grad_norm": 1.3914395677757734, "learning_rate": 1.215050051777701e-05, "loss": 2.1862, "step": 1459 }, { "epoch": 1.3597390493942219, "loss_reasoning": 0.4974302649497986, "loss_utility": 1.3727658987045288, "step": 1459 }, { "epoch": 1.3606710158434296, "grad_norm": 1.2131231049601874, "learning_rate": 1.2143596824301003e-05, "loss": 1.9198, "step": 1460 }, { "epoch": 1.3606710158434296, "loss_reasoning": 0.5180118680000305, "loss_utility": 0.6727825403213501, "step": 1460 }, { "epoch": 1.3616029822926374, "grad_norm": 1.0641214425312284, "learning_rate": 1.2136693130824992e-05, "loss": 1.5592, "step": 1461 }, { "epoch": 1.3616029822926374, "loss_reasoning": 0.44621896743774414, "loss_utility": 1.1446759700775146, "step": 1461 }, { "epoch": 1.3625349487418452, "grad_norm": 1.3341964704885745, "learning_rate": 1.2129789437348983e-05, "loss": 1.7869, "step": 1462 }, { "epoch": 1.3625349487418452, "loss_reasoning": 0.47817474603652954, "loss_utility": 1.9275281429290771, "step": 1462 }, { "epoch": 1.363466915191053, "grad_norm": 1.2465245060874808, "learning_rate": 1.2122885743872973e-05, "loss": 1.9877, "step": 1463 }, { "epoch": 1.363466915191053, "loss_reasoning": 0.534704327583313, "loss_utility": 0.8128207921981812, "step": 1463 }, { "epoch": 1.364398881640261, "grad_norm": 1.02773817707575, "learning_rate": 1.2115982050396964e-05, "loss": 1.5585, "step": 1464 }, { "epoch": 1.364398881640261, "loss_reasoning": 0.5323562622070312, "loss_utility": 1.3962860107421875, "step": 1464 }, { "epoch": 1.3653308480894688, "grad_norm": 1.2660900445536523, "learning_rate": 1.2109078356920953e-05, "loss": 1.7097, "step": 1465 }, { "epoch": 1.3653308480894688, "loss_reasoning": 0.5552525520324707, "loss_utility": 1.573073387145996, "step": 1465 }, { "epoch": 1.3662628145386766, "grad_norm": 1.2119512641501344, "learning_rate": 1.2102174663444944e-05, "loss": 2.0343, "step": 1466 }, { "epoch": 1.3662628145386766, "loss_reasoning": 0.4829038083553314, "loss_utility": 0.9721842408180237, "step": 1466 }, { "epoch": 1.3671947809878844, "grad_norm": 1.3390772754088909, "learning_rate": 1.2095270969968935e-05, "loss": 1.6463, "step": 1467 }, { "epoch": 1.3671947809878844, "loss_reasoning": 0.451310932636261, "loss_utility": 1.588463544845581, "step": 1467 }, { "epoch": 1.3681267474370924, "grad_norm": 1.3154546245175172, "learning_rate": 1.2088367276492925e-05, "loss": 2.0581, "step": 1468 }, { "epoch": 1.3681267474370924, "loss_reasoning": 0.45938605070114136, "loss_utility": 1.07210373878479, "step": 1468 }, { "epoch": 1.3690587138863002, "grad_norm": 1.3076002920806995, "learning_rate": 1.2081463583016914e-05, "loss": 1.7745, "step": 1469 }, { "epoch": 1.3690587138863002, "loss_reasoning": 0.5990003347396851, "loss_utility": 1.4326021671295166, "step": 1469 }, { "epoch": 1.369990680335508, "grad_norm": 1.2569395478873024, "learning_rate": 1.2074559889540905e-05, "loss": 2.1976, "step": 1470 }, { "epoch": 1.369990680335508, "loss_reasoning": 0.489209920167923, "loss_utility": 0.9845213294029236, "step": 1470 }, { "epoch": 1.3709226467847158, "grad_norm": 1.331309959578479, "learning_rate": 1.2067656196064896e-05, "loss": 1.6666, "step": 1471 }, { "epoch": 1.3709226467847158, "loss_reasoning": 0.49898993968963623, "loss_utility": 1.6952922344207764, "step": 1471 }, { "epoch": 1.3718546132339235, "grad_norm": 1.1428954975802261, "learning_rate": 1.2060752502588887e-05, "loss": 2.0696, "step": 1472 }, { "epoch": 1.3718546132339235, "loss_reasoning": 0.520298957824707, "loss_utility": 1.6004130840301514, "step": 1472 }, { "epoch": 1.3727865796831313, "grad_norm": 1.0691264635996542, "learning_rate": 1.2053848809112876e-05, "loss": 1.842, "step": 1473 }, { "epoch": 1.3727865796831313, "loss_reasoning": 0.5165207386016846, "loss_utility": 1.7150547504425049, "step": 1473 }, { "epoch": 1.3737185461323391, "grad_norm": 1.0879632448182561, "learning_rate": 1.2046945115636868e-05, "loss": 1.7193, "step": 1474 }, { "epoch": 1.3737185461323391, "loss_reasoning": 0.46914586424827576, "loss_utility": 1.3192660808563232, "step": 1474 }, { "epoch": 1.3746505125815471, "grad_norm": 1.281863431814427, "learning_rate": 1.2040041422160857e-05, "loss": 1.7614, "step": 1475 }, { "epoch": 1.3746505125815471, "loss_reasoning": 0.49148160219192505, "loss_utility": 0.7784043550491333, "step": 1475 }, { "epoch": 1.375582479030755, "grad_norm": 1.082307622031694, "learning_rate": 1.2033137728684848e-05, "loss": 1.7215, "step": 1476 }, { "epoch": 1.375582479030755, "loss_reasoning": 0.4651017189025879, "loss_utility": 0.9874416589736938, "step": 1476 }, { "epoch": 1.3765144454799627, "grad_norm": 1.0329918929566222, "learning_rate": 1.2026234035208837e-05, "loss": 1.5503, "step": 1477 }, { "epoch": 1.3765144454799627, "loss_reasoning": 0.5246518850326538, "loss_utility": 1.3859379291534424, "step": 1477 }, { "epoch": 1.3774464119291705, "grad_norm": 1.094148949225908, "learning_rate": 1.201933034173283e-05, "loss": 1.8928, "step": 1478 }, { "epoch": 1.3774464119291705, "loss_reasoning": 0.46384361386299133, "loss_utility": 0.7700098752975464, "step": 1478 }, { "epoch": 1.3783783783783785, "grad_norm": 1.1016751462200038, "learning_rate": 1.2012426648256818e-05, "loss": 1.4241, "step": 1479 }, { "epoch": 1.3783783783783785, "loss_reasoning": 0.45663541555404663, "loss_utility": 2.1654744148254395, "step": 1479 }, { "epoch": 1.3793103448275863, "grad_norm": 1.037637122050938, "learning_rate": 1.2005522954780809e-05, "loss": 1.9112, "step": 1480 }, { "epoch": 1.3793103448275863, "loss_reasoning": 0.4562022387981415, "loss_utility": 1.5098443031311035, "step": 1480 }, { "epoch": 1.380242311276794, "grad_norm": 1.4689207216785662, "learning_rate": 1.1998619261304798e-05, "loss": 1.7929, "step": 1481 }, { "epoch": 1.380242311276794, "loss_reasoning": 0.4944690763950348, "loss_utility": 1.4750940799713135, "step": 1481 }, { "epoch": 1.3811742777260019, "grad_norm": 1.2438140489310607, "learning_rate": 1.199171556782879e-05, "loss": 1.8665, "step": 1482 }, { "epoch": 1.3811742777260019, "loss_reasoning": 0.4844309389591217, "loss_utility": 1.463609218597412, "step": 1482 }, { "epoch": 1.3821062441752097, "grad_norm": 1.1971982130471643, "learning_rate": 1.198481187435278e-05, "loss": 1.7786, "step": 1483 }, { "epoch": 1.3821062441752097, "loss_reasoning": 0.5119212865829468, "loss_utility": 1.4219005107879639, "step": 1483 }, { "epoch": 1.3830382106244175, "grad_norm": 1.1664196833806733, "learning_rate": 1.1977908180876769e-05, "loss": 1.9837, "step": 1484 }, { "epoch": 1.3830382106244175, "loss_reasoning": 0.4978722929954529, "loss_utility": 1.195412516593933, "step": 1484 }, { "epoch": 1.3839701770736252, "grad_norm": 1.379314347742158, "learning_rate": 1.1971004487400761e-05, "loss": 1.9261, "step": 1485 }, { "epoch": 1.3839701770736252, "loss_reasoning": 0.5456185340881348, "loss_utility": 1.2066437005996704, "step": 1485 }, { "epoch": 1.3849021435228333, "grad_norm": 1.2122250873258402, "learning_rate": 1.1964100793924752e-05, "loss": 1.8158, "step": 1486 }, { "epoch": 1.3849021435228333, "loss_reasoning": 0.5836114883422852, "loss_utility": 1.2830326557159424, "step": 1486 }, { "epoch": 1.385834109972041, "grad_norm": 1.1319587644374436, "learning_rate": 1.195719710044874e-05, "loss": 1.4043, "step": 1487 }, { "epoch": 1.385834109972041, "loss_reasoning": 0.5055216550827026, "loss_utility": 1.2140133380889893, "step": 1487 }, { "epoch": 1.3867660764212488, "grad_norm": 0.9846930077765904, "learning_rate": 1.195029340697273e-05, "loss": 1.5541, "step": 1488 }, { "epoch": 1.3867660764212488, "loss_reasoning": 0.5113231539726257, "loss_utility": 1.8346266746520996, "step": 1488 }, { "epoch": 1.3876980428704566, "grad_norm": 1.2453211877954293, "learning_rate": 1.1943389713496722e-05, "loss": 1.6919, "step": 1489 }, { "epoch": 1.3876980428704566, "loss_reasoning": 0.5028795599937439, "loss_utility": 1.355745553970337, "step": 1489 }, { "epoch": 1.3886300093196646, "grad_norm": 1.159513188580919, "learning_rate": 1.1936486020020713e-05, "loss": 1.921, "step": 1490 }, { "epoch": 1.3886300093196646, "loss_reasoning": 0.5313879251480103, "loss_utility": 0.9576816558837891, "step": 1490 }, { "epoch": 1.3895619757688724, "grad_norm": 1.1575927378351212, "learning_rate": 1.1929582326544702e-05, "loss": 1.6117, "step": 1491 }, { "epoch": 1.3895619757688724, "loss_reasoning": 0.5143643021583557, "loss_utility": 1.4335856437683105, "step": 1491 }, { "epoch": 1.3904939422180802, "grad_norm": 1.263834102085644, "learning_rate": 1.1922678633068691e-05, "loss": 1.8434, "step": 1492 }, { "epoch": 1.3904939422180802, "loss_reasoning": 0.48565343022346497, "loss_utility": 0.9432299137115479, "step": 1492 }, { "epoch": 1.391425908667288, "grad_norm": 1.2846594088851073, "learning_rate": 1.1915774939592684e-05, "loss": 1.8702, "step": 1493 }, { "epoch": 1.391425908667288, "loss_reasoning": 0.48179227113723755, "loss_utility": 1.5113897323608398, "step": 1493 }, { "epoch": 1.3923578751164958, "grad_norm": 1.2270291062780994, "learning_rate": 1.1908871246116673e-05, "loss": 1.6926, "step": 1494 }, { "epoch": 1.3923578751164958, "loss_reasoning": 0.5382596254348755, "loss_utility": 0.9889238476753235, "step": 1494 }, { "epoch": 1.3932898415657036, "grad_norm": 1.2623594470453372, "learning_rate": 1.1901967552640663e-05, "loss": 1.7384, "step": 1495 }, { "epoch": 1.3932898415657036, "loss_reasoning": 0.5246732831001282, "loss_utility": 1.6526594161987305, "step": 1495 }, { "epoch": 1.3942218080149114, "grad_norm": 1.3146845281511352, "learning_rate": 1.1895063859164656e-05, "loss": 1.8275, "step": 1496 }, { "epoch": 1.3942218080149114, "loss_reasoning": 0.5258930921554565, "loss_utility": 1.3658846616744995, "step": 1496 }, { "epoch": 1.3951537744641194, "grad_norm": 1.415148965008159, "learning_rate": 1.1888160165688645e-05, "loss": 1.8465, "step": 1497 }, { "epoch": 1.3951537744641194, "loss_reasoning": 0.49470067024230957, "loss_utility": 0.9743058085441589, "step": 1497 }, { "epoch": 1.3960857409133272, "grad_norm": 1.154410732763759, "learning_rate": 1.1881256472212634e-05, "loss": 1.5788, "step": 1498 }, { "epoch": 1.3960857409133272, "loss_reasoning": 0.5196874141693115, "loss_utility": 0.8313690423965454, "step": 1498 }, { "epoch": 1.397017707362535, "grad_norm": 1.1367607188363804, "learning_rate": 1.1874352778736625e-05, "loss": 1.7107, "step": 1499 }, { "epoch": 1.397017707362535, "loss_reasoning": 0.4945802688598633, "loss_utility": 1.5909820795059204, "step": 1499 }, { "epoch": 1.3979496738117427, "grad_norm": 1.4285048593948655, "learning_rate": 1.1867449085260617e-05, "loss": 1.7443, "step": 1500 }, { "epoch": 1.3979496738117427, "loss_reasoning": 0.5144444704055786, "loss_utility": 1.4841041564941406, "step": 1500 }, { "epoch": 1.3988816402609507, "grad_norm": 1.485651154784752, "learning_rate": 1.1860545391784606e-05, "loss": 1.681, "step": 1501 }, { "epoch": 1.3988816402609507, "loss_reasoning": 0.46627750992774963, "loss_utility": 0.5535457134246826, "step": 1501 }, { "epoch": 1.3998136067101585, "grad_norm": 1.3037984578508428, "learning_rate": 1.1853641698308595e-05, "loss": 1.7525, "step": 1502 }, { "epoch": 1.3998136067101585, "loss_reasoning": 0.48068946599960327, "loss_utility": 1.5793776512145996, "step": 1502 }, { "epoch": 1.4007455731593663, "grad_norm": 1.127373388744037, "learning_rate": 1.1846738004832586e-05, "loss": 1.7435, "step": 1503 }, { "epoch": 1.4007455731593663, "loss_reasoning": 0.49243810772895813, "loss_utility": 1.5696463584899902, "step": 1503 }, { "epoch": 1.401677539608574, "grad_norm": 1.360062967239845, "learning_rate": 1.1839834311356576e-05, "loss": 1.6721, "step": 1504 }, { "epoch": 1.401677539608574, "loss_reasoning": 0.5128097534179688, "loss_utility": 1.5997719764709473, "step": 1504 }, { "epoch": 1.402609506057782, "grad_norm": 1.372416306054823, "learning_rate": 1.1832930617880567e-05, "loss": 2.0514, "step": 1505 }, { "epoch": 1.402609506057782, "loss_reasoning": 0.48999038338661194, "loss_utility": 1.156886100769043, "step": 1505 }, { "epoch": 1.4035414725069897, "grad_norm": 1.195292375806797, "learning_rate": 1.1826026924404556e-05, "loss": 1.9662, "step": 1506 }, { "epoch": 1.4035414725069897, "loss_reasoning": 0.45363283157348633, "loss_utility": 2.0049922466278076, "step": 1506 }, { "epoch": 1.4044734389561975, "grad_norm": 1.403439746067273, "learning_rate": 1.1819123230928549e-05, "loss": 2.0484, "step": 1507 }, { "epoch": 1.4044734389561975, "loss_reasoning": 0.4944099485874176, "loss_utility": 0.7210838794708252, "step": 1507 }, { "epoch": 1.4054054054054055, "grad_norm": 1.2974634556019142, "learning_rate": 1.1812219537452538e-05, "loss": 1.5565, "step": 1508 }, { "epoch": 1.4054054054054055, "loss_reasoning": 0.4599149823188782, "loss_utility": 2.120211601257324, "step": 1508 }, { "epoch": 1.4063373718546133, "grad_norm": 1.607848092444827, "learning_rate": 1.1805315843976528e-05, "loss": 2.3037, "step": 1509 }, { "epoch": 1.4063373718546133, "loss_reasoning": 0.5219006538391113, "loss_utility": 1.0283770561218262, "step": 1509 }, { "epoch": 1.407269338303821, "grad_norm": 1.1845993109747481, "learning_rate": 1.1798412150500517e-05, "loss": 1.6354, "step": 1510 }, { "epoch": 1.407269338303821, "loss_reasoning": 0.5021498203277588, "loss_utility": 1.0801265239715576, "step": 1510 }, { "epoch": 1.4082013047530288, "grad_norm": 1.3960032900668664, "learning_rate": 1.179150845702451e-05, "loss": 1.7229, "step": 1511 }, { "epoch": 1.4082013047530288, "loss_reasoning": 0.4645891785621643, "loss_utility": 0.7196844220161438, "step": 1511 }, { "epoch": 1.4091332712022366, "grad_norm": 1.0139896189474433, "learning_rate": 1.1784604763548499e-05, "loss": 1.3923, "step": 1512 }, { "epoch": 1.4091332712022366, "loss_reasoning": 0.509644091129303, "loss_utility": 1.0149452686309814, "step": 1512 }, { "epoch": 1.4100652376514446, "grad_norm": 1.344160101330338, "learning_rate": 1.177770107007249e-05, "loss": 1.8917, "step": 1513 }, { "epoch": 1.4100652376514446, "loss_reasoning": 0.46654295921325684, "loss_utility": 1.718754529953003, "step": 1513 }, { "epoch": 1.4109972041006524, "grad_norm": 1.3669393928784752, "learning_rate": 1.177079737659648e-05, "loss": 1.9774, "step": 1514 }, { "epoch": 1.4109972041006524, "loss_reasoning": 0.5209000110626221, "loss_utility": 0.8220692276954651, "step": 1514 }, { "epoch": 1.4119291705498602, "grad_norm": 1.3073070496116759, "learning_rate": 1.1763893683120471e-05, "loss": 1.5331, "step": 1515 }, { "epoch": 1.4119291705498602, "loss_reasoning": 0.5238791704177856, "loss_utility": 1.3528016805648804, "step": 1515 }, { "epoch": 1.412861136999068, "grad_norm": 1.417437866014816, "learning_rate": 1.175698998964446e-05, "loss": 1.7312, "step": 1516 }, { "epoch": 1.412861136999068, "loss_reasoning": 0.5182137489318848, "loss_utility": 2.066664218902588, "step": 1516 }, { "epoch": 1.4137931034482758, "grad_norm": 0.9836440698773564, "learning_rate": 1.1750086296168451e-05, "loss": 1.8046, "step": 1517 }, { "epoch": 1.4137931034482758, "loss_reasoning": 0.5212713479995728, "loss_utility": 1.415932297706604, "step": 1517 }, { "epoch": 1.4147250698974836, "grad_norm": 1.2674078622294949, "learning_rate": 1.1743182602692442e-05, "loss": 1.7586, "step": 1518 }, { "epoch": 1.4147250698974836, "loss_reasoning": 0.5199986696243286, "loss_utility": 0.4251728653907776, "step": 1518 }, { "epoch": 1.4156570363466916, "grad_norm": 1.5670133418154082, "learning_rate": 1.1736278909216432e-05, "loss": 1.5743, "step": 1519 }, { "epoch": 1.4156570363466916, "loss_reasoning": 0.4831545352935791, "loss_utility": 1.474050760269165, "step": 1519 }, { "epoch": 1.4165890027958994, "grad_norm": 1.1410703801067505, "learning_rate": 1.1729375215740421e-05, "loss": 1.6668, "step": 1520 }, { "epoch": 1.4165890027958994, "loss_reasoning": 0.5340957641601562, "loss_utility": 1.3980090618133545, "step": 1520 }, { "epoch": 1.4175209692451072, "grad_norm": 1.0164681953861634, "learning_rate": 1.1722471522264412e-05, "loss": 1.8656, "step": 1521 }, { "epoch": 1.4175209692451072, "loss_reasoning": 0.48334306478500366, "loss_utility": 2.329413414001465, "step": 1521 }, { "epoch": 1.418452935694315, "grad_norm": 1.9518157828734986, "learning_rate": 1.1715567828788403e-05, "loss": 2.1107, "step": 1522 }, { "epoch": 1.418452935694315, "loss_reasoning": 0.5069926977157593, "loss_utility": 1.583295226097107, "step": 1522 }, { "epoch": 1.4193849021435228, "grad_norm": 1.122579936466399, "learning_rate": 1.1708664135312394e-05, "loss": 1.6901, "step": 1523 }, { "epoch": 1.4193849021435228, "loss_reasoning": 0.5244905352592468, "loss_utility": 1.2922810316085815, "step": 1523 }, { "epoch": 1.4203168685927308, "grad_norm": 1.9269440585118305, "learning_rate": 1.1701760441836383e-05, "loss": 1.7213, "step": 1524 }, { "epoch": 1.4203168685927308, "loss_reasoning": 0.5416194200515747, "loss_utility": 1.9117776155471802, "step": 1524 }, { "epoch": 1.4212488350419386, "grad_norm": 1.3831626940327342, "learning_rate": 1.1694856748360375e-05, "loss": 1.8308, "step": 1525 }, { "epoch": 1.4212488350419386, "loss_reasoning": 0.5208641290664673, "loss_utility": 1.4629652500152588, "step": 1525 }, { "epoch": 1.4221808014911463, "grad_norm": 1.4784433458602777, "learning_rate": 1.1687953054884364e-05, "loss": 2.0977, "step": 1526 }, { "epoch": 1.4221808014911463, "loss_reasoning": 0.49909722805023193, "loss_utility": 1.3249380588531494, "step": 1526 }, { "epoch": 1.4231127679403541, "grad_norm": 1.2771319437805262, "learning_rate": 1.1681049361408355e-05, "loss": 1.8311, "step": 1527 }, { "epoch": 1.4231127679403541, "loss_reasoning": 0.5375406742095947, "loss_utility": 0.7375667095184326, "step": 1527 }, { "epoch": 1.424044734389562, "grad_norm": 1.3741269191416086, "learning_rate": 1.1674145667932344e-05, "loss": 1.723, "step": 1528 }, { "epoch": 1.424044734389562, "loss_reasoning": 0.5638347864151001, "loss_utility": 1.1378778219223022, "step": 1528 }, { "epoch": 1.4249767008387697, "grad_norm": 1.2075846591505182, "learning_rate": 1.1667241974456336e-05, "loss": 1.5106, "step": 1529 }, { "epoch": 1.4249767008387697, "loss_reasoning": 0.4923766255378723, "loss_utility": 1.7148815393447876, "step": 1529 }, { "epoch": 1.4259086672879777, "grad_norm": 1.4118695986077632, "learning_rate": 1.1660338280980325e-05, "loss": 1.8398, "step": 1530 }, { "epoch": 1.4259086672879777, "loss_reasoning": 0.4648129642009735, "loss_utility": 1.0225191116333008, "step": 1530 }, { "epoch": 1.4268406337371855, "grad_norm": 1.4970717549552766, "learning_rate": 1.1653434587504316e-05, "loss": 1.5967, "step": 1531 }, { "epoch": 1.4268406337371855, "loss_reasoning": 0.4774373471736908, "loss_utility": 1.6862528324127197, "step": 1531 }, { "epoch": 1.4277726001863933, "grad_norm": 1.4690482754510492, "learning_rate": 1.1646530894028305e-05, "loss": 2.0598, "step": 1532 }, { "epoch": 1.4277726001863933, "loss_reasoning": 0.5456407070159912, "loss_utility": 1.3700602054595947, "step": 1532 }, { "epoch": 1.428704566635601, "grad_norm": 1.3863482511496723, "learning_rate": 1.1639627200552298e-05, "loss": 1.8281, "step": 1533 }, { "epoch": 1.428704566635601, "loss_reasoning": 0.4601680338382721, "loss_utility": 1.178192377090454, "step": 1533 }, { "epoch": 1.4296365330848089, "grad_norm": 1.2263785598942216, "learning_rate": 1.1632723507076287e-05, "loss": 1.8193, "step": 1534 }, { "epoch": 1.4296365330848089, "loss_reasoning": 0.515426516532898, "loss_utility": 1.0484074354171753, "step": 1534 }, { "epoch": 1.4305684995340169, "grad_norm": 1.1403802651762982, "learning_rate": 1.1625819813600276e-05, "loss": 1.751, "step": 1535 }, { "epoch": 1.4305684995340169, "loss_reasoning": 0.5059281587600708, "loss_utility": 0.6171923875808716, "step": 1535 }, { "epoch": 1.4315004659832247, "grad_norm": 1.3481485787070642, "learning_rate": 1.1618916120124268e-05, "loss": 1.4874, "step": 1536 }, { "epoch": 1.4315004659832247, "loss_reasoning": 0.5393905639648438, "loss_utility": 1.3430898189544678, "step": 1536 }, { "epoch": 1.4324324324324325, "grad_norm": 1.211191027625617, "learning_rate": 1.1612012426648259e-05, "loss": 1.9753, "step": 1537 }, { "epoch": 1.4324324324324325, "loss_reasoning": 0.5077319145202637, "loss_utility": 1.1119784116744995, "step": 1537 }, { "epoch": 1.4333643988816402, "grad_norm": 1.1415209595171436, "learning_rate": 1.1605108733172248e-05, "loss": 1.739, "step": 1538 }, { "epoch": 1.4333643988816402, "loss_reasoning": 0.47199946641921997, "loss_utility": 1.7095413208007812, "step": 1538 }, { "epoch": 1.434296365330848, "grad_norm": 1.2119669591700437, "learning_rate": 1.1598205039696237e-05, "loss": 1.867, "step": 1539 }, { "epoch": 1.434296365330848, "loss_reasoning": 0.42483198642730713, "loss_utility": 1.441672444343567, "step": 1539 }, { "epoch": 1.4352283317800558, "grad_norm": 1.0921171714181612, "learning_rate": 1.159130134622023e-05, "loss": 1.7507, "step": 1540 }, { "epoch": 1.4352283317800558, "loss_reasoning": 0.4699314832687378, "loss_utility": 1.4059169292449951, "step": 1540 }, { "epoch": 1.4361602982292636, "grad_norm": 1.0632509946644222, "learning_rate": 1.158439765274422e-05, "loss": 1.6807, "step": 1541 }, { "epoch": 1.4361602982292636, "loss_reasoning": 0.5630491375923157, "loss_utility": 1.2465171813964844, "step": 1541 }, { "epoch": 1.4370922646784716, "grad_norm": 1.4273225811289005, "learning_rate": 1.1577493959268209e-05, "loss": 1.8644, "step": 1542 }, { "epoch": 1.4370922646784716, "loss_reasoning": 0.531326174736023, "loss_utility": 1.8991625308990479, "step": 1542 }, { "epoch": 1.4380242311276794, "grad_norm": 2.269133122598111, "learning_rate": 1.1570590265792198e-05, "loss": 1.9893, "step": 1543 }, { "epoch": 1.4380242311276794, "loss_reasoning": 0.4586448669433594, "loss_utility": 0.915505051612854, "step": 1543 }, { "epoch": 1.4389561975768872, "grad_norm": 1.1348360643452395, "learning_rate": 1.156368657231619e-05, "loss": 1.7213, "step": 1544 }, { "epoch": 1.4389561975768872, "loss_reasoning": 0.5147433280944824, "loss_utility": 1.1319093704223633, "step": 1544 }, { "epoch": 1.439888164026095, "grad_norm": 1.190635543758422, "learning_rate": 1.155678287884018e-05, "loss": 1.6204, "step": 1545 }, { "epoch": 1.439888164026095, "loss_reasoning": 0.4822767376899719, "loss_utility": 1.6323997974395752, "step": 1545 }, { "epoch": 1.440820130475303, "grad_norm": 1.1022905228857969, "learning_rate": 1.154987918536417e-05, "loss": 1.7867, "step": 1546 }, { "epoch": 1.440820130475303, "loss_reasoning": 0.481850266456604, "loss_utility": 1.3329174518585205, "step": 1546 }, { "epoch": 1.4417520969245108, "grad_norm": 0.9521366074278248, "learning_rate": 1.1542975491888163e-05, "loss": 1.5912, "step": 1547 }, { "epoch": 1.4417520969245108, "loss_reasoning": 0.4931650757789612, "loss_utility": 0.8459459543228149, "step": 1547 }, { "epoch": 1.4426840633737186, "grad_norm": 1.5383021461693112, "learning_rate": 1.1536071798412152e-05, "loss": 1.6959, "step": 1548 }, { "epoch": 1.4426840633737186, "loss_reasoning": 0.5255305767059326, "loss_utility": 1.0691819190979004, "step": 1548 }, { "epoch": 1.4436160298229264, "grad_norm": 1.2056443350485935, "learning_rate": 1.152916810493614e-05, "loss": 1.8656, "step": 1549 }, { "epoch": 1.4436160298229264, "loss_reasoning": 0.5065183639526367, "loss_utility": 1.2360035181045532, "step": 1549 }, { "epoch": 1.4445479962721341, "grad_norm": 1.2159932230127286, "learning_rate": 1.1522264411460132e-05, "loss": 1.8517, "step": 1550 }, { "epoch": 1.4445479962721341, "loss_reasoning": 0.509131908416748, "loss_utility": 1.1081072092056274, "step": 1550 }, { "epoch": 1.445479962721342, "grad_norm": 1.453550284323689, "learning_rate": 1.1515360717984124e-05, "loss": 1.8158, "step": 1551 }, { "epoch": 1.445479962721342, "loss_reasoning": 0.5360910892486572, "loss_utility": 1.022993564605713, "step": 1551 }, { "epoch": 1.4464119291705497, "grad_norm": 1.1367296898144907, "learning_rate": 1.1508457024508113e-05, "loss": 1.6874, "step": 1552 }, { "epoch": 1.4464119291705497, "loss_reasoning": 0.4434722661972046, "loss_utility": 1.1052777767181396, "step": 1552 }, { "epoch": 1.4473438956197577, "grad_norm": 1.0762722925797796, "learning_rate": 1.1501553331032102e-05, "loss": 1.5207, "step": 1553 }, { "epoch": 1.4473438956197577, "loss_reasoning": 0.4712426960468292, "loss_utility": 2.094111680984497, "step": 1553 }, { "epoch": 1.4482758620689655, "grad_norm": 1.1217624565088529, "learning_rate": 1.1494649637556094e-05, "loss": 1.9508, "step": 1554 }, { "epoch": 1.4482758620689655, "loss_reasoning": 0.4796430468559265, "loss_utility": 0.517494261264801, "step": 1554 }, { "epoch": 1.4492078285181733, "grad_norm": 1.2443928315974575, "learning_rate": 1.1487745944080084e-05, "loss": 1.6316, "step": 1555 }, { "epoch": 1.4492078285181733, "loss_reasoning": 0.4623508155345917, "loss_utility": 1.0338666439056396, "step": 1555 }, { "epoch": 1.450139794967381, "grad_norm": 1.0694370702618878, "learning_rate": 1.1480842250604074e-05, "loss": 1.9108, "step": 1556 }, { "epoch": 1.450139794967381, "loss_reasoning": 0.4703887701034546, "loss_utility": 1.9497597217559814, "step": 1556 }, { "epoch": 1.4510717614165891, "grad_norm": 0.9790329950211771, "learning_rate": 1.1473938557128063e-05, "loss": 1.9357, "step": 1557 }, { "epoch": 1.4510717614165891, "loss_reasoning": 0.47553297877311707, "loss_utility": 0.9049217104911804, "step": 1557 }, { "epoch": 1.452003727865797, "grad_norm": 1.140735099507261, "learning_rate": 1.1467034863652056e-05, "loss": 1.8576, "step": 1558 }, { "epoch": 1.452003727865797, "loss_reasoning": 0.47768592834472656, "loss_utility": 1.155184268951416, "step": 1558 }, { "epoch": 1.4529356943150047, "grad_norm": 1.376131851639426, "learning_rate": 1.1460131170176045e-05, "loss": 1.7187, "step": 1559 }, { "epoch": 1.4529356943150047, "loss_reasoning": 0.483541339635849, "loss_utility": 1.1607043743133545, "step": 1559 }, { "epoch": 1.4538676607642125, "grad_norm": 1.2093885885381688, "learning_rate": 1.1453227476700035e-05, "loss": 1.567, "step": 1560 }, { "epoch": 1.4538676607642125, "loss_reasoning": 0.4736613631248474, "loss_utility": 1.1566925048828125, "step": 1560 }, { "epoch": 1.4547996272134203, "grad_norm": 1.6420291886842844, "learning_rate": 1.1446323783224025e-05, "loss": 1.7792, "step": 1561 }, { "epoch": 1.4547996272134203, "loss_reasoning": 0.5108581781387329, "loss_utility": 1.4595856666564941, "step": 1561 }, { "epoch": 1.455731593662628, "grad_norm": 1.1478674784089578, "learning_rate": 1.1439420089748017e-05, "loss": 1.8311, "step": 1562 }, { "epoch": 1.455731593662628, "loss_reasoning": 0.45892971754074097, "loss_utility": 1.5284974575042725, "step": 1562 }, { "epoch": 1.4566635601118358, "grad_norm": 1.6340020239071524, "learning_rate": 1.1432516396272006e-05, "loss": 1.7634, "step": 1563 }, { "epoch": 1.4566635601118358, "loss_reasoning": 0.5212297439575195, "loss_utility": 1.7323949337005615, "step": 1563 }, { "epoch": 1.4575955265610439, "grad_norm": 1.3399540658843758, "learning_rate": 1.1425612702795997e-05, "loss": 2.1585, "step": 1564 }, { "epoch": 1.4575955265610439, "loss_reasoning": 0.523747444152832, "loss_utility": 0.7331594824790955, "step": 1564 }, { "epoch": 1.4585274930102516, "grad_norm": 1.1272012823958377, "learning_rate": 1.1418709009319987e-05, "loss": 1.6492, "step": 1565 }, { "epoch": 1.4585274930102516, "loss_reasoning": 0.5564490556716919, "loss_utility": 0.9275332093238831, "step": 1565 }, { "epoch": 1.4594594594594594, "grad_norm": 1.1437743642067264, "learning_rate": 1.1411805315843978e-05, "loss": 1.8571, "step": 1566 }, { "epoch": 1.4594594594594594, "loss_reasoning": 0.47086089849472046, "loss_utility": 1.477750301361084, "step": 1566 }, { "epoch": 1.4603914259086672, "grad_norm": 1.440125592614681, "learning_rate": 1.1404901622367967e-05, "loss": 1.6183, "step": 1567 }, { "epoch": 1.4603914259086672, "loss_reasoning": 0.46351343393325806, "loss_utility": 1.8676851987838745, "step": 1567 }, { "epoch": 1.4613233923578752, "grad_norm": 1.318164060689544, "learning_rate": 1.1397997928891958e-05, "loss": 1.9476, "step": 1568 }, { "epoch": 1.4613233923578752, "loss_reasoning": 0.5891605019569397, "loss_utility": 1.0854313373565674, "step": 1568 }, { "epoch": 1.462255358807083, "grad_norm": 1.3521554886527896, "learning_rate": 1.1391094235415949e-05, "loss": 2.0383, "step": 1569 }, { "epoch": 1.462255358807083, "loss_reasoning": 0.46369731426239014, "loss_utility": 1.7190556526184082, "step": 1569 }, { "epoch": 1.4631873252562908, "grad_norm": 1.380674676766668, "learning_rate": 1.138419054193994e-05, "loss": 1.9521, "step": 1570 }, { "epoch": 1.4631873252562908, "loss_reasoning": 0.48240625858306885, "loss_utility": 1.089148998260498, "step": 1570 }, { "epoch": 1.4641192917054986, "grad_norm": 1.0988267684930226, "learning_rate": 1.1377286848463928e-05, "loss": 1.6401, "step": 1571 }, { "epoch": 1.4641192917054986, "loss_reasoning": 0.5548981428146362, "loss_utility": 1.0613899230957031, "step": 1571 }, { "epoch": 1.4650512581547064, "grad_norm": 1.0753235142315312, "learning_rate": 1.137038315498792e-05, "loss": 1.7878, "step": 1572 }, { "epoch": 1.4650512581547064, "loss_reasoning": 0.49000102281570435, "loss_utility": 0.5668133497238159, "step": 1572 }, { "epoch": 1.4659832246039142, "grad_norm": 1.11286626803724, "learning_rate": 1.136347946151191e-05, "loss": 1.6494, "step": 1573 }, { "epoch": 1.4659832246039142, "loss_reasoning": 0.5277203321456909, "loss_utility": 1.0500552654266357, "step": 1573 }, { "epoch": 1.466915191053122, "grad_norm": 1.1653192147154852, "learning_rate": 1.13565757680359e-05, "loss": 1.7918, "step": 1574 }, { "epoch": 1.466915191053122, "loss_reasoning": 0.4624161422252655, "loss_utility": 1.3096189498901367, "step": 1574 }, { "epoch": 1.46784715750233, "grad_norm": 1.2837195176804532, "learning_rate": 1.134967207455989e-05, "loss": 1.8103, "step": 1575 }, { "epoch": 1.46784715750233, "loss_reasoning": 0.44063323736190796, "loss_utility": 1.3225659132003784, "step": 1575 }, { "epoch": 1.4687791239515378, "grad_norm": 1.53441965718706, "learning_rate": 1.1342768381083882e-05, "loss": 1.6926, "step": 1576 }, { "epoch": 1.4687791239515378, "loss_reasoning": 0.4747741222381592, "loss_utility": 1.4604339599609375, "step": 1576 }, { "epoch": 1.4697110904007455, "grad_norm": 1.4209087908473488, "learning_rate": 1.1335864687607871e-05, "loss": 1.8287, "step": 1577 }, { "epoch": 1.4697110904007455, "loss_reasoning": 0.5102024674415588, "loss_utility": 1.9499680995941162, "step": 1577 }, { "epoch": 1.4706430568499533, "grad_norm": 1.10072301031354, "learning_rate": 1.1328960994131862e-05, "loss": 1.8982, "step": 1578 }, { "epoch": 1.4706430568499533, "loss_reasoning": 0.46348118782043457, "loss_utility": 1.3702118396759033, "step": 1578 }, { "epoch": 1.4715750232991613, "grad_norm": 1.5406163481975115, "learning_rate": 1.1322057300655851e-05, "loss": 2.0695, "step": 1579 }, { "epoch": 1.4715750232991613, "loss_reasoning": 0.5114454627037048, "loss_utility": 1.6700067520141602, "step": 1579 }, { "epoch": 1.4725069897483691, "grad_norm": 1.2177977803008895, "learning_rate": 1.1315153607179843e-05, "loss": 1.7609, "step": 1580 }, { "epoch": 1.4725069897483691, "loss_reasoning": 0.5084058046340942, "loss_utility": 0.928092360496521, "step": 1580 }, { "epoch": 1.473438956197577, "grad_norm": 2.7001686312738933, "learning_rate": 1.1308249913703832e-05, "loss": 1.5567, "step": 1581 }, { "epoch": 1.473438956197577, "loss_reasoning": 0.5011746883392334, "loss_utility": 1.235602855682373, "step": 1581 }, { "epoch": 1.4743709226467847, "grad_norm": 1.5476877848187134, "learning_rate": 1.1301346220227823e-05, "loss": 1.6775, "step": 1582 }, { "epoch": 1.4743709226467847, "loss_reasoning": 0.5074387788772583, "loss_utility": 1.431779384613037, "step": 1582 }, { "epoch": 1.4753028890959925, "grad_norm": 1.1904700744264234, "learning_rate": 1.1294442526751812e-05, "loss": 1.6592, "step": 1583 }, { "epoch": 1.4753028890959925, "loss_reasoning": 0.5151318311691284, "loss_utility": 1.9944078922271729, "step": 1583 }, { "epoch": 1.4762348555452003, "grad_norm": 1.29436696927344, "learning_rate": 1.1287538833275805e-05, "loss": 2.1802, "step": 1584 }, { "epoch": 1.4762348555452003, "loss_reasoning": 0.546737790107727, "loss_utility": 1.481451153755188, "step": 1584 }, { "epoch": 1.477166821994408, "grad_norm": 1.1482145532117418, "learning_rate": 1.1280635139799794e-05, "loss": 1.7753, "step": 1585 }, { "epoch": 1.477166821994408, "loss_reasoning": 0.6325141191482544, "loss_utility": 1.8355803489685059, "step": 1585 }, { "epoch": 1.478098788443616, "grad_norm": 1.2237050279965636, "learning_rate": 1.1273731446323783e-05, "loss": 1.8782, "step": 1586 }, { "epoch": 1.478098788443616, "loss_reasoning": 0.5108750462532043, "loss_utility": 1.0387861728668213, "step": 1586 }, { "epoch": 1.4790307548928239, "grad_norm": 1.028375728876023, "learning_rate": 1.1266827752847775e-05, "loss": 1.5956, "step": 1587 }, { "epoch": 1.4790307548928239, "loss_reasoning": 0.511947751045227, "loss_utility": 1.3692644834518433, "step": 1587 }, { "epoch": 1.4799627213420317, "grad_norm": 1.1659943124485919, "learning_rate": 1.1259924059371766e-05, "loss": 1.9482, "step": 1588 }, { "epoch": 1.4799627213420317, "loss_reasoning": 0.6159625053405762, "loss_utility": 1.1477962732315063, "step": 1588 }, { "epoch": 1.4808946877912395, "grad_norm": 1.171179869893797, "learning_rate": 1.1253020365895755e-05, "loss": 1.7266, "step": 1589 }, { "epoch": 1.4808946877912395, "loss_reasoning": 0.48368051648139954, "loss_utility": 1.261106252670288, "step": 1589 }, { "epoch": 1.4818266542404475, "grad_norm": 1.1128279993721462, "learning_rate": 1.1246116672419744e-05, "loss": 1.9464, "step": 1590 }, { "epoch": 1.4818266542404475, "loss_reasoning": 0.48334914445877075, "loss_utility": 0.8952302932739258, "step": 1590 }, { "epoch": 1.4827586206896552, "grad_norm": 1.3540803502983887, "learning_rate": 1.1239212978943736e-05, "loss": 1.8326, "step": 1591 }, { "epoch": 1.4827586206896552, "loss_reasoning": 0.433295339345932, "loss_utility": 2.0180792808532715, "step": 1591 }, { "epoch": 1.483690587138863, "grad_norm": 1.2056567802500155, "learning_rate": 1.1232309285467727e-05, "loss": 2.143, "step": 1592 }, { "epoch": 1.483690587138863, "loss_reasoning": 0.4595116972923279, "loss_utility": 1.5872902870178223, "step": 1592 }, { "epoch": 1.4846225535880708, "grad_norm": 1.060796128590368, "learning_rate": 1.1225405591991716e-05, "loss": 1.4796, "step": 1593 }, { "epoch": 1.4846225535880708, "loss_reasoning": 0.452983021736145, "loss_utility": 1.0596578121185303, "step": 1593 }, { "epoch": 1.4855545200372786, "grad_norm": 1.0892371146348736, "learning_rate": 1.1218501898515709e-05, "loss": 2.0343, "step": 1594 }, { "epoch": 1.4855545200372786, "loss_reasoning": 0.5010091066360474, "loss_utility": 1.1677682399749756, "step": 1594 }, { "epoch": 1.4864864864864864, "grad_norm": 1.304170091289997, "learning_rate": 1.1211598205039698e-05, "loss": 1.7602, "step": 1595 }, { "epoch": 1.4864864864864864, "loss_reasoning": 0.5242798328399658, "loss_utility": 1.2192630767822266, "step": 1595 }, { "epoch": 1.4874184529356942, "grad_norm": 1.1964118059462576, "learning_rate": 1.1204694511563687e-05, "loss": 1.8153, "step": 1596 }, { "epoch": 1.4874184529356942, "loss_reasoning": 0.5310623645782471, "loss_utility": 1.0528432130813599, "step": 1596 }, { "epoch": 1.4883504193849022, "grad_norm": 1.2198213005000778, "learning_rate": 1.1197790818087677e-05, "loss": 1.771, "step": 1597 }, { "epoch": 1.4883504193849022, "loss_reasoning": 0.5219468474388123, "loss_utility": 0.8073543310165405, "step": 1597 }, { "epoch": 1.48928238583411, "grad_norm": 1.2928876101751317, "learning_rate": 1.119088712461167e-05, "loss": 1.6509, "step": 1598 }, { "epoch": 1.48928238583411, "loss_reasoning": 0.45247650146484375, "loss_utility": 1.2392139434814453, "step": 1598 }, { "epoch": 1.4902143522833178, "grad_norm": 1.435783205433191, "learning_rate": 1.1183983431135659e-05, "loss": 1.7759, "step": 1599 }, { "epoch": 1.4902143522833178, "loss_reasoning": 0.48598015308380127, "loss_utility": 0.8499109745025635, "step": 1599 }, { "epoch": 1.4911463187325256, "grad_norm": 1.2497751253007026, "learning_rate": 1.1177079737659648e-05, "loss": 1.5238, "step": 1600 }, { "epoch": 1.4911463187325256, "loss_reasoning": 0.5668221712112427, "loss_utility": 1.426814317703247, "step": 1600 }, { "epoch": 1.4920782851817336, "grad_norm": 1.1312701435504915, "learning_rate": 1.1170176044183639e-05, "loss": 2.0992, "step": 1601 }, { "epoch": 1.4920782851817336, "loss_reasoning": 0.5449866056442261, "loss_utility": 1.5506658554077148, "step": 1601 }, { "epoch": 1.4930102516309414, "grad_norm": 1.098781057915155, "learning_rate": 1.1163272350707631e-05, "loss": 1.8345, "step": 1602 }, { "epoch": 1.4930102516309414, "loss_reasoning": 0.5292676091194153, "loss_utility": 0.9825565218925476, "step": 1602 }, { "epoch": 1.4939422180801492, "grad_norm": 1.1181183523834786, "learning_rate": 1.115636865723162e-05, "loss": 1.6687, "step": 1603 }, { "epoch": 1.4939422180801492, "loss_reasoning": 0.4927000403404236, "loss_utility": 0.7316515445709229, "step": 1603 }, { "epoch": 1.494874184529357, "grad_norm": 1.2844554873767242, "learning_rate": 1.1149464963755609e-05, "loss": 1.7362, "step": 1604 }, { "epoch": 1.494874184529357, "loss_reasoning": 0.5371474027633667, "loss_utility": 1.6820075511932373, "step": 1604 }, { "epoch": 1.4958061509785647, "grad_norm": 1.1756317290711742, "learning_rate": 1.1142561270279602e-05, "loss": 2.1683, "step": 1605 }, { "epoch": 1.4958061509785647, "loss_reasoning": 0.4429130256175995, "loss_utility": 1.5943212509155273, "step": 1605 }, { "epoch": 1.4967381174277725, "grad_norm": 1.4520623903087222, "learning_rate": 1.113565757680359e-05, "loss": 2.2105, "step": 1606 }, { "epoch": 1.4967381174277725, "loss_reasoning": 0.508102297782898, "loss_utility": 1.4007446765899658, "step": 1606 }, { "epoch": 1.4976700838769803, "grad_norm": 1.1649639554630198, "learning_rate": 1.1128753883327581e-05, "loss": 1.7707, "step": 1607 }, { "epoch": 1.4976700838769803, "loss_reasoning": 0.5120248794555664, "loss_utility": 1.4800078868865967, "step": 1607 }, { "epoch": 1.4986020503261883, "grad_norm": 1.2004978605299397, "learning_rate": 1.112185018985157e-05, "loss": 2.0129, "step": 1608 }, { "epoch": 1.4986020503261883, "loss_reasoning": 0.49041154980659485, "loss_utility": 0.6609750390052795, "step": 1608 }, { "epoch": 1.499534016775396, "grad_norm": 0.9799797961776237, "learning_rate": 1.1114946496375563e-05, "loss": 1.4454, "step": 1609 }, { "epoch": 1.499534016775396, "loss_reasoning": 0.49945443868637085, "loss_utility": 1.5417852401733398, "step": 1609 }, { "epoch": 1.500465983224604, "grad_norm": 1.4583251920844214, "learning_rate": 1.1108042802899552e-05, "loss": 1.9087, "step": 1610 }, { "epoch": 1.500465983224604, "loss_reasoning": 0.48333969712257385, "loss_utility": 1.2127416133880615, "step": 1610 }, { "epoch": 1.501397949673812, "grad_norm": 1.1684183954041898, "learning_rate": 1.1101139109423543e-05, "loss": 1.9467, "step": 1611 }, { "epoch": 1.501397949673812, "loss_reasoning": 0.5596440434455872, "loss_utility": 1.4696578979492188, "step": 1611 }, { "epoch": 1.5023299161230197, "grad_norm": 1.3595905213260522, "learning_rate": 1.1094235415947532e-05, "loss": 1.8932, "step": 1612 }, { "epoch": 1.5023299161230197, "loss_reasoning": 0.48330265283584595, "loss_utility": 1.3219910860061646, "step": 1612 }, { "epoch": 1.5032618825722275, "grad_norm": 1.1984731586865551, "learning_rate": 1.1087331722471524e-05, "loss": 1.746, "step": 1613 }, { "epoch": 1.5032618825722275, "loss_reasoning": 0.5609037280082703, "loss_utility": 1.3854490518569946, "step": 1613 }, { "epoch": 1.5041938490214353, "grad_norm": 1.3098429036763843, "learning_rate": 1.1080428028995513e-05, "loss": 1.8645, "step": 1614 }, { "epoch": 1.5041938490214353, "loss_reasoning": 0.5029298067092896, "loss_utility": 1.3861730098724365, "step": 1614 }, { "epoch": 1.505125815470643, "grad_norm": 1.1610903121356342, "learning_rate": 1.1073524335519504e-05, "loss": 1.4875, "step": 1615 }, { "epoch": 1.505125815470643, "loss_reasoning": 0.5079445838928223, "loss_utility": 1.4534603357315063, "step": 1615 }, { "epoch": 1.5060577819198508, "grad_norm": 1.241232279588477, "learning_rate": 1.1066620642043494e-05, "loss": 1.7314, "step": 1616 }, { "epoch": 1.5060577819198508, "loss_reasoning": 0.44822874665260315, "loss_utility": 1.7658518552780151, "step": 1616 }, { "epoch": 1.5069897483690586, "grad_norm": 1.0546306584872172, "learning_rate": 1.1059716948567485e-05, "loss": 1.4557, "step": 1617 }, { "epoch": 1.5069897483690586, "loss_reasoning": 0.5091219544410706, "loss_utility": 0.7960716485977173, "step": 1617 }, { "epoch": 1.5079217148182664, "grad_norm": 1.4710585712105713, "learning_rate": 1.1052813255091474e-05, "loss": 1.7138, "step": 1618 }, { "epoch": 1.5079217148182664, "loss_reasoning": 0.5177127122879028, "loss_utility": 1.1935319900512695, "step": 1618 }, { "epoch": 1.5088536812674742, "grad_norm": 1.0239588745206336, "learning_rate": 1.1045909561615465e-05, "loss": 1.8109, "step": 1619 }, { "epoch": 1.5088536812674742, "loss_reasoning": 0.48415327072143555, "loss_utility": 1.1208975315093994, "step": 1619 }, { "epoch": 1.5097856477166822, "grad_norm": 1.15254149311207, "learning_rate": 1.1039005868139456e-05, "loss": 1.8785, "step": 1620 }, { "epoch": 1.5097856477166822, "loss_reasoning": 0.4734709560871124, "loss_utility": 1.261413812637329, "step": 1620 }, { "epoch": 1.51071761416589, "grad_norm": 1.0645319419696637, "learning_rate": 1.1032102174663446e-05, "loss": 1.8708, "step": 1621 }, { "epoch": 1.51071761416589, "loss_reasoning": 0.44645676016807556, "loss_utility": 1.4982026815414429, "step": 1621 }, { "epoch": 1.511649580615098, "grad_norm": 1.1112591315512157, "learning_rate": 1.1025198481187435e-05, "loss": 1.7074, "step": 1622 }, { "epoch": 1.511649580615098, "loss_reasoning": 0.5257596969604492, "loss_utility": 1.9900413751602173, "step": 1622 }, { "epoch": 1.5125815470643058, "grad_norm": 1.4020836690608214, "learning_rate": 1.1018294787711426e-05, "loss": 1.8266, "step": 1623 }, { "epoch": 1.5125815470643058, "loss_reasoning": 0.46965134143829346, "loss_utility": 1.107912302017212, "step": 1623 }, { "epoch": 1.5135135135135136, "grad_norm": 1.2654995086661267, "learning_rate": 1.1011391094235417e-05, "loss": 1.4901, "step": 1624 }, { "epoch": 1.5135135135135136, "loss_reasoning": 0.5034494400024414, "loss_utility": 1.2331867218017578, "step": 1624 }, { "epoch": 1.5144454799627214, "grad_norm": 1.3492038438677099, "learning_rate": 1.1004487400759408e-05, "loss": 1.584, "step": 1625 }, { "epoch": 1.5144454799627214, "loss_reasoning": 0.46326711773872375, "loss_utility": 1.745913028717041, "step": 1625 }, { "epoch": 1.5153774464119292, "grad_norm": 1.314126577928299, "learning_rate": 1.0997583707283397e-05, "loss": 1.7226, "step": 1626 }, { "epoch": 1.5153774464119292, "loss_reasoning": 0.48114699125289917, "loss_utility": 0.7468488216400146, "step": 1626 }, { "epoch": 1.516309412861137, "grad_norm": 1.0643939336259158, "learning_rate": 1.0990680013807389e-05, "loss": 1.4012, "step": 1627 }, { "epoch": 1.516309412861137, "loss_reasoning": 0.4491052031517029, "loss_utility": 1.3943850994110107, "step": 1627 }, { "epoch": 1.5172413793103448, "grad_norm": 1.0773668776218677, "learning_rate": 1.0983776320331378e-05, "loss": 1.5371, "step": 1628 }, { "epoch": 1.5172413793103448, "loss_reasoning": 0.5162263512611389, "loss_utility": 0.85094153881073, "step": 1628 }, { "epoch": 1.5181733457595525, "grad_norm": 1.1166044091050735, "learning_rate": 1.0976872626855369e-05, "loss": 1.6331, "step": 1629 }, { "epoch": 1.5181733457595525, "loss_reasoning": 0.5320342183113098, "loss_utility": 1.360368251800537, "step": 1629 }, { "epoch": 1.5191053122087603, "grad_norm": 1.3182954167210228, "learning_rate": 1.0969968933379358e-05, "loss": 1.6748, "step": 1630 }, { "epoch": 1.5191053122087603, "loss_reasoning": 0.4168112874031067, "loss_utility": 1.8628662824630737, "step": 1630 }, { "epoch": 1.5200372786579683, "grad_norm": 1.1490856290260039, "learning_rate": 1.096306523990335e-05, "loss": 2.8037, "step": 1631 }, { "epoch": 1.5200372786579683, "loss_reasoning": 0.44679057598114014, "loss_utility": 1.224543571472168, "step": 1631 }, { "epoch": 1.5209692451071761, "grad_norm": 1.223522337927496, "learning_rate": 1.095616154642734e-05, "loss": 1.7576, "step": 1632 }, { "epoch": 1.5209692451071761, "loss_reasoning": 0.645148515701294, "loss_utility": 1.312593698501587, "step": 1632 }, { "epoch": 1.521901211556384, "grad_norm": 1.25907981291643, "learning_rate": 1.094925785295133e-05, "loss": 1.8045, "step": 1633 }, { "epoch": 1.521901211556384, "loss_reasoning": 0.49644172191619873, "loss_utility": 1.2744338512420654, "step": 1633 }, { "epoch": 1.522833178005592, "grad_norm": 1.3605935341572775, "learning_rate": 1.094235415947532e-05, "loss": 1.6571, "step": 1634 }, { "epoch": 1.522833178005592, "loss_reasoning": 0.43510764837265015, "loss_utility": 1.1689541339874268, "step": 1634 }, { "epoch": 1.5237651444547997, "grad_norm": 1.040674437320187, "learning_rate": 1.0935450465999312e-05, "loss": 1.7107, "step": 1635 }, { "epoch": 1.5237651444547997, "loss_reasoning": 0.5184987187385559, "loss_utility": 1.8183915615081787, "step": 1635 }, { "epoch": 1.5246971109040075, "grad_norm": 1.1489521907273919, "learning_rate": 1.09285467725233e-05, "loss": 1.6244, "step": 1636 }, { "epoch": 1.5246971109040075, "loss_reasoning": 0.46240174770355225, "loss_utility": 0.7742180824279785, "step": 1636 }, { "epoch": 1.5256290773532153, "grad_norm": 1.246983149164847, "learning_rate": 1.092164307904729e-05, "loss": 1.6478, "step": 1637 }, { "epoch": 1.5256290773532153, "loss_reasoning": 0.4466019570827484, "loss_utility": 0.691226065158844, "step": 1637 }, { "epoch": 1.526561043802423, "grad_norm": 1.1602735309815457, "learning_rate": 1.0914739385571282e-05, "loss": 1.4586, "step": 1638 }, { "epoch": 1.526561043802423, "loss_reasoning": 0.523409366607666, "loss_utility": 1.4823704957962036, "step": 1638 }, { "epoch": 1.5274930102516309, "grad_norm": 1.3511141916786038, "learning_rate": 1.0907835692095273e-05, "loss": 1.8776, "step": 1639 }, { "epoch": 1.5274930102516309, "loss_reasoning": 0.4997243881225586, "loss_utility": 0.9758493900299072, "step": 1639 }, { "epoch": 1.5284249767008387, "grad_norm": 1.2230119603403393, "learning_rate": 1.0900931998619262e-05, "loss": 1.6301, "step": 1640 }, { "epoch": 1.5284249767008387, "loss_reasoning": 0.46920979022979736, "loss_utility": 0.7690877914428711, "step": 1640 }, { "epoch": 1.5293569431500464, "grad_norm": 1.248606741537474, "learning_rate": 1.0894028305143251e-05, "loss": 1.8196, "step": 1641 }, { "epoch": 1.5293569431500464, "loss_reasoning": 0.4567752778530121, "loss_utility": 0.9022554159164429, "step": 1641 }, { "epoch": 1.5302889095992545, "grad_norm": 1.00774450626163, "learning_rate": 1.0887124611667243e-05, "loss": 1.772, "step": 1642 }, { "epoch": 1.5302889095992545, "loss_reasoning": 0.6229934692382812, "loss_utility": 1.2668497562408447, "step": 1642 }, { "epoch": 1.5312208760484622, "grad_norm": 1.1960034358188103, "learning_rate": 1.0880220918191234e-05, "loss": 1.7874, "step": 1643 }, { "epoch": 1.5312208760484622, "loss_reasoning": 0.588123083114624, "loss_utility": 1.43583083152771, "step": 1643 }, { "epoch": 1.53215284249767, "grad_norm": 1.2608091575425662, "learning_rate": 1.0873317224715223e-05, "loss": 1.7752, "step": 1644 }, { "epoch": 1.53215284249767, "loss_reasoning": 0.5790972113609314, "loss_utility": 1.7828902006149292, "step": 1644 }, { "epoch": 1.533084808946878, "grad_norm": 1.1106661196972554, "learning_rate": 1.0866413531239216e-05, "loss": 1.7934, "step": 1645 }, { "epoch": 1.533084808946878, "loss_reasoning": 0.5835741758346558, "loss_utility": 1.1379269361495972, "step": 1645 }, { "epoch": 1.5340167753960858, "grad_norm": 1.0897454034186813, "learning_rate": 1.0859509837763205e-05, "loss": 1.6496, "step": 1646 }, { "epoch": 1.5340167753960858, "loss_reasoning": 0.4548754096031189, "loss_utility": 1.1900215148925781, "step": 1646 }, { "epoch": 1.5349487418452936, "grad_norm": 1.330233707495827, "learning_rate": 1.0852606144287194e-05, "loss": 1.8522, "step": 1647 }, { "epoch": 1.5349487418452936, "loss_reasoning": 0.4878137707710266, "loss_utility": 1.704248309135437, "step": 1647 }, { "epoch": 1.5358807082945014, "grad_norm": 1.2107979594967533, "learning_rate": 1.0845702450811184e-05, "loss": 1.9082, "step": 1648 }, { "epoch": 1.5358807082945014, "loss_reasoning": 0.4973216950893402, "loss_utility": 0.9212844371795654, "step": 1648 }, { "epoch": 1.5368126747437092, "grad_norm": 1.1652574848463717, "learning_rate": 1.0838798757335177e-05, "loss": 1.8414, "step": 1649 }, { "epoch": 1.5368126747437092, "loss_reasoning": 0.47148656845092773, "loss_utility": 0.9633508920669556, "step": 1649 }, { "epoch": 1.537744641192917, "grad_norm": 1.1598362282925736, "learning_rate": 1.0831895063859166e-05, "loss": 1.5221, "step": 1650 }, { "epoch": 1.537744641192917, "loss_reasoning": 0.4697914719581604, "loss_utility": 1.9278578758239746, "step": 1650 }, { "epoch": 1.5386766076421248, "grad_norm": 1.4538828329307472, "learning_rate": 1.0824991370383155e-05, "loss": 1.9005, "step": 1651 }, { "epoch": 1.5386766076421248, "loss_reasoning": 0.4762868285179138, "loss_utility": 1.0398954153060913, "step": 1651 }, { "epoch": 1.5396085740913326, "grad_norm": 1.2279722279784975, "learning_rate": 1.0818087676907146e-05, "loss": 1.9267, "step": 1652 }, { "epoch": 1.5396085740913326, "loss_reasoning": 0.43911880254745483, "loss_utility": 0.8163207173347473, "step": 1652 }, { "epoch": 1.5405405405405406, "grad_norm": 1.242418861011169, "learning_rate": 1.0811183983431136e-05, "loss": 1.8689, "step": 1653 }, { "epoch": 1.5405405405405406, "loss_reasoning": 0.46317461133003235, "loss_utility": 0.822883665561676, "step": 1653 }, { "epoch": 1.5414725069897484, "grad_norm": 1.3415286510218813, "learning_rate": 1.0804280289955127e-05, "loss": 1.6604, "step": 1654 }, { "epoch": 1.5414725069897484, "loss_reasoning": 0.4879213273525238, "loss_utility": 1.6189374923706055, "step": 1654 }, { "epoch": 1.5424044734389561, "grad_norm": 1.4093423144292259, "learning_rate": 1.0797376596479116e-05, "loss": 1.7666, "step": 1655 }, { "epoch": 1.5424044734389561, "loss_reasoning": 0.5033010244369507, "loss_utility": 1.3098812103271484, "step": 1655 }, { "epoch": 1.5433364398881642, "grad_norm": 1.269631705298488, "learning_rate": 1.0790472903003109e-05, "loss": 1.6558, "step": 1656 }, { "epoch": 1.5433364398881642, "loss_reasoning": 0.4370609521865845, "loss_utility": 1.5496997833251953, "step": 1656 }, { "epoch": 1.544268406337372, "grad_norm": 1.3791228581959254, "learning_rate": 1.0783569209527098e-05, "loss": 2.1034, "step": 1657 }, { "epoch": 1.544268406337372, "loss_reasoning": 0.5282573103904724, "loss_utility": 2.088438034057617, "step": 1657 }, { "epoch": 1.5452003727865797, "grad_norm": 1.434653723616235, "learning_rate": 1.0776665516051088e-05, "loss": 2.0284, "step": 1658 }, { "epoch": 1.5452003727865797, "loss_reasoning": 0.5427634716033936, "loss_utility": 0.7282429933547974, "step": 1658 }, { "epoch": 1.5461323392357875, "grad_norm": 1.1463237966320925, "learning_rate": 1.0769761822575077e-05, "loss": 1.6251, "step": 1659 }, { "epoch": 1.5461323392357875, "loss_reasoning": 0.4720478057861328, "loss_utility": 1.2095489501953125, "step": 1659 }, { "epoch": 1.5470643056849953, "grad_norm": 1.1187531218336209, "learning_rate": 1.076285812909907e-05, "loss": 1.629, "step": 1660 }, { "epoch": 1.5470643056849953, "loss_reasoning": 0.4948413372039795, "loss_utility": 1.270418643951416, "step": 1660 }, { "epoch": 1.547996272134203, "grad_norm": 1.3113234615625615, "learning_rate": 1.0755954435623059e-05, "loss": 1.8265, "step": 1661 }, { "epoch": 1.547996272134203, "loss_reasoning": 0.5317738056182861, "loss_utility": 0.9743407368659973, "step": 1661 }, { "epoch": 1.5489282385834109, "grad_norm": 1.471809047697659, "learning_rate": 1.074905074214705e-05, "loss": 1.6942, "step": 1662 }, { "epoch": 1.5489282385834109, "loss_reasoning": 0.47144612669944763, "loss_utility": 1.5417362451553345, "step": 1662 }, { "epoch": 1.5498602050326187, "grad_norm": 1.188298949495732, "learning_rate": 1.0742147048671039e-05, "loss": 1.8038, "step": 1663 }, { "epoch": 1.5498602050326187, "loss_reasoning": 0.5295671224594116, "loss_utility": 1.2556335926055908, "step": 1663 }, { "epoch": 1.5507921714818267, "grad_norm": 1.2949203294384841, "learning_rate": 1.0735243355195031e-05, "loss": 1.797, "step": 1664 }, { "epoch": 1.5507921714818267, "loss_reasoning": 0.4507678151130676, "loss_utility": 1.6212084293365479, "step": 1664 }, { "epoch": 1.5517241379310345, "grad_norm": 1.20881404221716, "learning_rate": 1.072833966171902e-05, "loss": 1.7473, "step": 1665 }, { "epoch": 1.5517241379310345, "loss_reasoning": 0.42774662375450134, "loss_utility": 0.7850481271743774, "step": 1665 }, { "epoch": 1.5526561043802423, "grad_norm": 1.1149075030712217, "learning_rate": 1.072143596824301e-05, "loss": 1.6297, "step": 1666 }, { "epoch": 1.5526561043802423, "loss_reasoning": 0.5150117874145508, "loss_utility": 1.7349960803985596, "step": 1666 }, { "epoch": 1.5535880708294503, "grad_norm": 1.1846283930344583, "learning_rate": 1.0714532274767002e-05, "loss": 1.7781, "step": 1667 }, { "epoch": 1.5535880708294503, "loss_reasoning": 0.47285133600234985, "loss_utility": 0.6732199788093567, "step": 1667 }, { "epoch": 1.554520037278658, "grad_norm": 1.2828428304193906, "learning_rate": 1.0707628581290992e-05, "loss": 1.574, "step": 1668 }, { "epoch": 1.554520037278658, "loss_reasoning": 0.5273004770278931, "loss_utility": 1.11482572555542, "step": 1668 }, { "epoch": 1.5554520037278659, "grad_norm": 0.9702464637585559, "learning_rate": 1.0700724887814981e-05, "loss": 1.6324, "step": 1669 }, { "epoch": 1.5554520037278659, "loss_reasoning": 0.531456470489502, "loss_utility": 0.8440481424331665, "step": 1669 }, { "epoch": 1.5563839701770736, "grad_norm": 1.286107101968563, "learning_rate": 1.0693821194338972e-05, "loss": 1.5717, "step": 1670 }, { "epoch": 1.5563839701770736, "loss_reasoning": 0.47211146354675293, "loss_utility": 1.284956932067871, "step": 1670 }, { "epoch": 1.5573159366262814, "grad_norm": 1.1631720564284576, "learning_rate": 1.0686917500862963e-05, "loss": 1.7523, "step": 1671 }, { "epoch": 1.5573159366262814, "loss_reasoning": 0.5427296757698059, "loss_utility": 1.4047908782958984, "step": 1671 }, { "epoch": 1.5582479030754892, "grad_norm": 1.2564284067142042, "learning_rate": 1.0680013807386953e-05, "loss": 1.8837, "step": 1672 }, { "epoch": 1.5582479030754892, "loss_reasoning": 0.5786150097846985, "loss_utility": 1.137184500694275, "step": 1672 }, { "epoch": 1.559179869524697, "grad_norm": 1.0400295157546033, "learning_rate": 1.0673110113910943e-05, "loss": 1.4977, "step": 1673 }, { "epoch": 1.559179869524697, "loss_reasoning": 0.4557154178619385, "loss_utility": 0.8303369283676147, "step": 1673 }, { "epoch": 1.5601118359739048, "grad_norm": 1.2240227767935108, "learning_rate": 1.0666206420434933e-05, "loss": 1.5605, "step": 1674 }, { "epoch": 1.5601118359739048, "loss_reasoning": 0.5213992595672607, "loss_utility": 1.5651520490646362, "step": 1674 }, { "epoch": 1.5610438024231128, "grad_norm": 1.6132880592954362, "learning_rate": 1.0659302726958924e-05, "loss": 1.7814, "step": 1675 }, { "epoch": 1.5610438024231128, "loss_reasoning": 0.5133664608001709, "loss_utility": 0.6484164595603943, "step": 1675 }, { "epoch": 1.5619757688723206, "grad_norm": 1.3835723505228439, "learning_rate": 1.0652399033482915e-05, "loss": 1.7398, "step": 1676 }, { "epoch": 1.5619757688723206, "loss_reasoning": 0.45117634534835815, "loss_utility": 1.051011085510254, "step": 1676 }, { "epoch": 1.5629077353215284, "grad_norm": 1.2683407241625124, "learning_rate": 1.0645495340006904e-05, "loss": 1.4943, "step": 1677 }, { "epoch": 1.5629077353215284, "loss_reasoning": 0.4766077995300293, "loss_utility": 0.7243160009384155, "step": 1677 }, { "epoch": 1.5638397017707364, "grad_norm": 1.0409268320610268, "learning_rate": 1.0638591646530896e-05, "loss": 1.5302, "step": 1678 }, { "epoch": 1.5638397017707364, "loss_reasoning": 0.5051317811012268, "loss_utility": 0.791805624961853, "step": 1678 }, { "epoch": 1.5647716682199442, "grad_norm": 1.229753976487742, "learning_rate": 1.0631687953054885e-05, "loss": 1.5922, "step": 1679 }, { "epoch": 1.5647716682199442, "loss_reasoning": 0.5482827425003052, "loss_utility": 1.3080894947052002, "step": 1679 }, { "epoch": 1.565703634669152, "grad_norm": 1.1544831636878805, "learning_rate": 1.0624784259578876e-05, "loss": 2.0229, "step": 1680 }, { "epoch": 1.565703634669152, "loss_reasoning": 0.49957895278930664, "loss_utility": 1.9281182289123535, "step": 1680 }, { "epoch": 1.5666356011183598, "grad_norm": 0.984490187268706, "learning_rate": 1.0617880566102865e-05, "loss": 1.7289, "step": 1681 }, { "epoch": 1.5666356011183598, "loss_reasoning": 0.5376635789871216, "loss_utility": 0.8135339021682739, "step": 1681 }, { "epoch": 1.5675675675675675, "grad_norm": 1.0750921247553062, "learning_rate": 1.0610976872626857e-05, "loss": 1.5436, "step": 1682 }, { "epoch": 1.5675675675675675, "loss_reasoning": 0.47195965051651, "loss_utility": 1.7119972705841064, "step": 1682 }, { "epoch": 1.5684995340167753, "grad_norm": 1.1257716367272421, "learning_rate": 1.0604073179150846e-05, "loss": 1.8464, "step": 1683 }, { "epoch": 1.5684995340167753, "loss_reasoning": 0.4528149366378784, "loss_utility": 1.3172087669372559, "step": 1683 }, { "epoch": 1.5694315004659831, "grad_norm": 1.0866150109945238, "learning_rate": 1.0597169485674837e-05, "loss": 2.0212, "step": 1684 }, { "epoch": 1.5694315004659831, "loss_reasoning": 0.49554842710494995, "loss_utility": 1.3199615478515625, "step": 1684 }, { "epoch": 1.570363466915191, "grad_norm": 1.1832974914584555, "learning_rate": 1.0590265792198828e-05, "loss": 2.148, "step": 1685 }, { "epoch": 1.570363466915191, "loss_reasoning": 0.4481898546218872, "loss_utility": 0.7669240236282349, "step": 1685 }, { "epoch": 1.571295433364399, "grad_norm": 1.1187707215258522, "learning_rate": 1.0583362098722819e-05, "loss": 1.7241, "step": 1686 }, { "epoch": 1.571295433364399, "loss_reasoning": 0.4552290141582489, "loss_utility": 1.725147008895874, "step": 1686 }, { "epoch": 1.5722273998136067, "grad_norm": 1.2569834626175493, "learning_rate": 1.0576458405246808e-05, "loss": 1.9289, "step": 1687 }, { "epoch": 1.5722273998136067, "loss_reasoning": 0.5125317573547363, "loss_utility": 1.7587186098098755, "step": 1687 }, { "epoch": 1.5731593662628145, "grad_norm": 1.126004014470985, "learning_rate": 1.0569554711770797e-05, "loss": 1.654, "step": 1688 }, { "epoch": 1.5731593662628145, "loss_reasoning": 0.46243706345558167, "loss_utility": 1.5404859781265259, "step": 1688 }, { "epoch": 1.5740913327120225, "grad_norm": 1.4975994473533907, "learning_rate": 1.056265101829479e-05, "loss": 1.824, "step": 1689 }, { "epoch": 1.5740913327120225, "loss_reasoning": 0.5578198432922363, "loss_utility": 1.582511067390442, "step": 1689 }, { "epoch": 1.5750232991612303, "grad_norm": 1.0813985220471711, "learning_rate": 1.055574732481878e-05, "loss": 1.695, "step": 1690 }, { "epoch": 1.5750232991612303, "loss_reasoning": 0.4929708242416382, "loss_utility": 1.274735689163208, "step": 1690 }, { "epoch": 1.575955265610438, "grad_norm": 1.3441908729706145, "learning_rate": 1.0548843631342769e-05, "loss": 1.9874, "step": 1691 }, { "epoch": 1.575955265610438, "loss_reasoning": 0.5258209705352783, "loss_utility": 1.3190741539001465, "step": 1691 }, { "epoch": 1.5768872320596459, "grad_norm": 1.103043630674866, "learning_rate": 1.0541939937866758e-05, "loss": 1.3466, "step": 1692 }, { "epoch": 1.5768872320596459, "loss_reasoning": 0.511117696762085, "loss_utility": 1.6893640756607056, "step": 1692 }, { "epoch": 1.5778191985088537, "grad_norm": 1.0581397985461412, "learning_rate": 1.053503624439075e-05, "loss": 1.9077, "step": 1693 }, { "epoch": 1.5778191985088537, "loss_reasoning": 0.46968382596969604, "loss_utility": 1.368826150894165, "step": 1693 }, { "epoch": 1.5787511649580614, "grad_norm": 1.0477863224269766, "learning_rate": 1.0528132550914741e-05, "loss": 1.6938, "step": 1694 }, { "epoch": 1.5787511649580614, "loss_reasoning": 0.5653462409973145, "loss_utility": 1.199245810508728, "step": 1694 }, { "epoch": 1.5796831314072692, "grad_norm": 1.4268338775852927, "learning_rate": 1.052122885743873e-05, "loss": 1.7284, "step": 1695 }, { "epoch": 1.5796831314072692, "loss_reasoning": 0.5310301184654236, "loss_utility": 1.5279781818389893, "step": 1695 }, { "epoch": 1.580615097856477, "grad_norm": 1.316820330041903, "learning_rate": 1.0514325163962723e-05, "loss": 1.8276, "step": 1696 }, { "epoch": 1.580615097856477, "loss_reasoning": 0.49068695306777954, "loss_utility": 1.6824817657470703, "step": 1696 }, { "epoch": 1.581547064305685, "grad_norm": 1.1025424268435702, "learning_rate": 1.0507421470486712e-05, "loss": 1.5963, "step": 1697 }, { "epoch": 1.581547064305685, "loss_reasoning": 0.48145222663879395, "loss_utility": 0.9798498749732971, "step": 1697 }, { "epoch": 1.5824790307548928, "grad_norm": 1.1884319027832166, "learning_rate": 1.05005177770107e-05, "loss": 1.7849, "step": 1698 }, { "epoch": 1.5824790307548928, "loss_reasoning": 0.45406949520111084, "loss_utility": 1.3622767925262451, "step": 1698 }, { "epoch": 1.5834109972041006, "grad_norm": 1.375811163445793, "learning_rate": 1.0493614083534691e-05, "loss": 1.8211, "step": 1699 }, { "epoch": 1.5834109972041006, "loss_reasoning": 0.4981788098812103, "loss_utility": 0.6849969625473022, "step": 1699 }, { "epoch": 1.5843429636533086, "grad_norm": 1.1245345587932445, "learning_rate": 1.0486710390058684e-05, "loss": 1.8593, "step": 1700 }, { "epoch": 1.5843429636533086, "loss_reasoning": 0.49170756340026855, "loss_utility": 0.9744530916213989, "step": 1700 }, { "epoch": 1.5852749301025164, "grad_norm": 1.0230943709946867, "learning_rate": 1.0479806696582673e-05, "loss": 1.5531, "step": 1701 }, { "epoch": 1.5852749301025164, "loss_reasoning": 0.5578739643096924, "loss_utility": 1.0506396293640137, "step": 1701 }, { "epoch": 1.5862068965517242, "grad_norm": 1.3423592353952805, "learning_rate": 1.0472903003106662e-05, "loss": 1.5835, "step": 1702 }, { "epoch": 1.5862068965517242, "loss_reasoning": 0.4161059558391571, "loss_utility": 1.3616286516189575, "step": 1702 }, { "epoch": 1.587138863000932, "grad_norm": 1.1815013376216013, "learning_rate": 1.0465999309630653e-05, "loss": 1.6673, "step": 1703 }, { "epoch": 1.587138863000932, "loss_reasoning": 0.5269840359687805, "loss_utility": 1.6781471967697144, "step": 1703 }, { "epoch": 1.5880708294501398, "grad_norm": 1.1047770778376143, "learning_rate": 1.0459095616154643e-05, "loss": 1.7455, "step": 1704 }, { "epoch": 1.5880708294501398, "loss_reasoning": 0.48380857706069946, "loss_utility": 1.6416146755218506, "step": 1704 }, { "epoch": 1.5890027958993476, "grad_norm": 1.3649931914254447, "learning_rate": 1.0452191922678634e-05, "loss": 1.6893, "step": 1705 }, { "epoch": 1.5890027958993476, "loss_reasoning": 0.5060957670211792, "loss_utility": 1.3480356931686401, "step": 1705 }, { "epoch": 1.5899347623485554, "grad_norm": 1.118019131047129, "learning_rate": 1.0445288229202623e-05, "loss": 1.5311, "step": 1706 }, { "epoch": 1.5899347623485554, "loss_reasoning": 0.45264461636543274, "loss_utility": 1.2590960264205933, "step": 1706 }, { "epoch": 1.5908667287977631, "grad_norm": 1.2411619022323448, "learning_rate": 1.0438384535726616e-05, "loss": 1.8, "step": 1707 }, { "epoch": 1.5908667287977631, "loss_reasoning": 0.45502686500549316, "loss_utility": 1.5898525714874268, "step": 1707 }, { "epoch": 1.5917986952469712, "grad_norm": 1.5571153957509116, "learning_rate": 1.0431480842250605e-05, "loss": 1.8947, "step": 1708 }, { "epoch": 1.5917986952469712, "loss_reasoning": 0.5241315960884094, "loss_utility": 1.3311748504638672, "step": 1708 }, { "epoch": 1.592730661696179, "grad_norm": 1.6112534864576085, "learning_rate": 1.0424577148774595e-05, "loss": 1.7346, "step": 1709 }, { "epoch": 1.592730661696179, "loss_reasoning": 0.541785717010498, "loss_utility": 1.3259978294372559, "step": 1709 }, { "epoch": 1.5936626281453867, "grad_norm": 1.2966336244820125, "learning_rate": 1.0417673455298584e-05, "loss": 1.8817, "step": 1710 }, { "epoch": 1.5936626281453867, "loss_reasoning": 0.5464617013931274, "loss_utility": 1.4071173667907715, "step": 1710 }, { "epoch": 1.5945945945945947, "grad_norm": 1.204844128942379, "learning_rate": 1.0410769761822577e-05, "loss": 1.5932, "step": 1711 }, { "epoch": 1.5945945945945947, "loss_reasoning": 0.5682194232940674, "loss_utility": 1.3146611452102661, "step": 1711 }, { "epoch": 1.5955265610438025, "grad_norm": 1.0533437428663526, "learning_rate": 1.0403866068346566e-05, "loss": 1.654, "step": 1712 }, { "epoch": 1.5955265610438025, "loss_reasoning": 0.5287485122680664, "loss_utility": 0.8583744764328003, "step": 1712 }, { "epoch": 1.5964585274930103, "grad_norm": 1.2496426571289578, "learning_rate": 1.0396962374870557e-05, "loss": 1.5949, "step": 1713 }, { "epoch": 1.5964585274930103, "loss_reasoning": 0.43904656171798706, "loss_utility": 1.4136278629302979, "step": 1713 }, { "epoch": 1.597390493942218, "grad_norm": 1.493560902900233, "learning_rate": 1.0390058681394546e-05, "loss": 1.8623, "step": 1714 }, { "epoch": 1.597390493942218, "loss_reasoning": 0.43343329429626465, "loss_utility": 0.7678748369216919, "step": 1714 }, { "epoch": 1.598322460391426, "grad_norm": 1.1254480276805536, "learning_rate": 1.0383154987918538e-05, "loss": 1.8173, "step": 1715 }, { "epoch": 1.598322460391426, "loss_reasoning": 0.4836893081665039, "loss_utility": 0.6369092464447021, "step": 1715 }, { "epoch": 1.5992544268406337, "grad_norm": 1.0739528853069809, "learning_rate": 1.0376251294442527e-05, "loss": 1.5741, "step": 1716 }, { "epoch": 1.5992544268406337, "loss_reasoning": 0.5011014342308044, "loss_utility": 1.5671635866165161, "step": 1716 }, { "epoch": 1.6001863932898415, "grad_norm": 1.074575313424538, "learning_rate": 1.0369347600966518e-05, "loss": 1.7923, "step": 1717 }, { "epoch": 1.6001863932898415, "loss_reasoning": 0.5285272598266602, "loss_utility": 1.2634177207946777, "step": 1717 }, { "epoch": 1.6011183597390493, "grad_norm": 1.270776304718346, "learning_rate": 1.0362443907490509e-05, "loss": 1.85, "step": 1718 }, { "epoch": 1.6011183597390493, "loss_reasoning": 0.49134328961372375, "loss_utility": 1.389687418937683, "step": 1718 }, { "epoch": 1.6020503261882573, "grad_norm": 1.1436071479557626, "learning_rate": 1.03555402140145e-05, "loss": 1.863, "step": 1719 }, { "epoch": 1.6020503261882573, "loss_reasoning": 0.5134153962135315, "loss_utility": 1.0415070056915283, "step": 1719 }, { "epoch": 1.602982292637465, "grad_norm": 1.3056027993919048, "learning_rate": 1.0348636520538488e-05, "loss": 1.7631, "step": 1720 }, { "epoch": 1.602982292637465, "loss_reasoning": 0.5204848647117615, "loss_utility": 0.6567106246948242, "step": 1720 }, { "epoch": 1.6039142590866728, "grad_norm": 1.070822712636781, "learning_rate": 1.0341732827062479e-05, "loss": 1.9385, "step": 1721 }, { "epoch": 1.6039142590866728, "loss_reasoning": 0.43828821182250977, "loss_utility": 0.9173543453216553, "step": 1721 }, { "epoch": 1.6048462255358809, "grad_norm": 0.9598233107937131, "learning_rate": 1.033482913358647e-05, "loss": 1.3531, "step": 1722 }, { "epoch": 1.6048462255358809, "loss_reasoning": 0.48339778184890747, "loss_utility": 0.8778284192085266, "step": 1722 }, { "epoch": 1.6057781919850886, "grad_norm": 1.0601896421743886, "learning_rate": 1.032792544011046e-05, "loss": 1.4388, "step": 1723 }, { "epoch": 1.6057781919850886, "loss_reasoning": 0.5273580551147461, "loss_utility": 1.8203930854797363, "step": 1723 }, { "epoch": 1.6067101584342964, "grad_norm": 1.2255227145479228, "learning_rate": 1.032102174663445e-05, "loss": 1.9104, "step": 1724 }, { "epoch": 1.6067101584342964, "loss_reasoning": 0.524484395980835, "loss_utility": 1.119755744934082, "step": 1724 }, { "epoch": 1.6076421248835042, "grad_norm": 1.167924621526346, "learning_rate": 1.031411805315844e-05, "loss": 1.8278, "step": 1725 }, { "epoch": 1.6076421248835042, "loss_reasoning": 0.44373756647109985, "loss_utility": 0.9513871669769287, "step": 1725 }, { "epoch": 1.608574091332712, "grad_norm": 1.220838982172096, "learning_rate": 1.0307214359682431e-05, "loss": 1.7048, "step": 1726 }, { "epoch": 1.608574091332712, "loss_reasoning": 0.46386682987213135, "loss_utility": 1.576493740081787, "step": 1726 }, { "epoch": 1.6095060577819198, "grad_norm": 1.1092645760264583, "learning_rate": 1.0300310666206422e-05, "loss": 1.752, "step": 1727 }, { "epoch": 1.6095060577819198, "loss_reasoning": 0.5447152853012085, "loss_utility": 1.3733043670654297, "step": 1727 }, { "epoch": 1.6104380242311276, "grad_norm": 1.4734145117445872, "learning_rate": 1.029340697273041e-05, "loss": 1.718, "step": 1728 }, { "epoch": 1.6104380242311276, "loss_reasoning": 0.530381441116333, "loss_utility": 1.2774677276611328, "step": 1728 }, { "epoch": 1.6113699906803354, "grad_norm": 1.1085489620907087, "learning_rate": 1.0286503279254403e-05, "loss": 1.5271, "step": 1729 }, { "epoch": 1.6113699906803354, "loss_reasoning": 0.5375190377235413, "loss_utility": 1.9109017848968506, "step": 1729 }, { "epoch": 1.6123019571295434, "grad_norm": 1.100009507931806, "learning_rate": 1.0279599585778392e-05, "loss": 1.7606, "step": 1730 }, { "epoch": 1.6123019571295434, "loss_reasoning": 0.48392611742019653, "loss_utility": 1.1743121147155762, "step": 1730 }, { "epoch": 1.6132339235787512, "grad_norm": 1.201973810669204, "learning_rate": 1.0272695892302383e-05, "loss": 1.7658, "step": 1731 }, { "epoch": 1.6132339235787512, "loss_reasoning": 0.5449161529541016, "loss_utility": 1.7715455293655396, "step": 1731 }, { "epoch": 1.614165890027959, "grad_norm": 1.1185917052938985, "learning_rate": 1.0265792198826372e-05, "loss": 1.7263, "step": 1732 }, { "epoch": 1.614165890027959, "loss_reasoning": 0.4977104663848877, "loss_utility": 1.4102442264556885, "step": 1732 }, { "epoch": 1.615097856477167, "grad_norm": 1.1126040489803486, "learning_rate": 1.0258888505350364e-05, "loss": 1.5981, "step": 1733 }, { "epoch": 1.615097856477167, "loss_reasoning": 0.4840483069419861, "loss_utility": 1.0900273323059082, "step": 1733 }, { "epoch": 1.6160298229263748, "grad_norm": 0.94245089637259, "learning_rate": 1.0251984811874353e-05, "loss": 1.3972, "step": 1734 }, { "epoch": 1.6160298229263748, "loss_reasoning": 0.49773934483528137, "loss_utility": 1.1123110055923462, "step": 1734 }, { "epoch": 1.6169617893755825, "grad_norm": 1.3572721640271392, "learning_rate": 1.0245081118398344e-05, "loss": 1.7084, "step": 1735 }, { "epoch": 1.6169617893755825, "loss_reasoning": 0.4318242073059082, "loss_utility": 1.3497836589813232, "step": 1735 }, { "epoch": 1.6178937558247903, "grad_norm": 1.337284482899329, "learning_rate": 1.0238177424922335e-05, "loss": 1.7412, "step": 1736 }, { "epoch": 1.6178937558247903, "loss_reasoning": 0.5379681587219238, "loss_utility": 0.8463999032974243, "step": 1736 }, { "epoch": 1.6188257222739981, "grad_norm": 1.305271490875675, "learning_rate": 1.0231273731446326e-05, "loss": 1.6146, "step": 1737 }, { "epoch": 1.6188257222739981, "loss_reasoning": 0.4615198075771332, "loss_utility": 0.7408491373062134, "step": 1737 }, { "epoch": 1.619757688723206, "grad_norm": 0.9486923914396899, "learning_rate": 1.0224370037970315e-05, "loss": 1.5486, "step": 1738 }, { "epoch": 1.619757688723206, "loss_reasoning": 0.44553136825561523, "loss_utility": 0.7237419486045837, "step": 1738 }, { "epoch": 1.6206896551724137, "grad_norm": 1.1135651665098385, "learning_rate": 1.0217466344494304e-05, "loss": 1.5055, "step": 1739 }, { "epoch": 1.6206896551724137, "loss_reasoning": 0.5052474737167358, "loss_utility": 1.0211153030395508, "step": 1739 }, { "epoch": 1.6216216216216215, "grad_norm": 1.2968483176445584, "learning_rate": 1.0210562651018296e-05, "loss": 1.5312, "step": 1740 }, { "epoch": 1.6216216216216215, "loss_reasoning": 0.5480624437332153, "loss_utility": 1.7206934690475464, "step": 1740 }, { "epoch": 1.6225535880708295, "grad_norm": 1.1585098266561167, "learning_rate": 1.0203658957542287e-05, "loss": 1.7635, "step": 1741 }, { "epoch": 1.6225535880708295, "loss_reasoning": 0.4570268392562866, "loss_utility": 1.4866358041763306, "step": 1741 }, { "epoch": 1.6234855545200373, "grad_norm": 1.4865066086132253, "learning_rate": 1.0196755264066276e-05, "loss": 1.8727, "step": 1742 }, { "epoch": 1.6234855545200373, "loss_reasoning": 0.5129920244216919, "loss_utility": 1.1692149639129639, "step": 1742 }, { "epoch": 1.624417520969245, "grad_norm": 1.2613142005898323, "learning_rate": 1.0189851570590265e-05, "loss": 1.8475, "step": 1743 }, { "epoch": 1.624417520969245, "loss_reasoning": 0.5108015537261963, "loss_utility": 1.095210313796997, "step": 1743 }, { "epoch": 1.625349487418453, "grad_norm": 1.2414844604375155, "learning_rate": 1.0182947877114257e-05, "loss": 1.7461, "step": 1744 }, { "epoch": 1.625349487418453, "loss_reasoning": 0.4748724400997162, "loss_utility": 0.7438489198684692, "step": 1744 }, { "epoch": 1.6262814538676609, "grad_norm": 1.2848090016491553, "learning_rate": 1.0176044183638248e-05, "loss": 1.7603, "step": 1745 }, { "epoch": 1.6262814538676609, "loss_reasoning": 0.5303449630737305, "loss_utility": 1.146032691001892, "step": 1745 }, { "epoch": 1.6272134203168687, "grad_norm": 1.231316706833012, "learning_rate": 1.0169140490162237e-05, "loss": 1.7184, "step": 1746 }, { "epoch": 1.6272134203168687, "loss_reasoning": 0.49589961767196655, "loss_utility": 1.0816552639007568, "step": 1746 }, { "epoch": 1.6281453867660765, "grad_norm": 1.5307275554620132, "learning_rate": 1.016223679668623e-05, "loss": 2.0112, "step": 1747 }, { "epoch": 1.6281453867660765, "loss_reasoning": 0.5616562962532043, "loss_utility": 0.7394307851791382, "step": 1747 }, { "epoch": 1.6290773532152842, "grad_norm": 1.1993484664236038, "learning_rate": 1.0155333103210219e-05, "loss": 1.4958, "step": 1748 }, { "epoch": 1.6290773532152842, "loss_reasoning": 0.5242613554000854, "loss_utility": 1.765533685684204, "step": 1748 }, { "epoch": 1.630009319664492, "grad_norm": 1.296381310970708, "learning_rate": 1.0148429409734208e-05, "loss": 2.1652, "step": 1749 }, { "epoch": 1.630009319664492, "loss_reasoning": 0.5448315143585205, "loss_utility": 1.0302881002426147, "step": 1749 }, { "epoch": 1.6309412861136998, "grad_norm": 1.193482979941305, "learning_rate": 1.0141525716258198e-05, "loss": 1.7101, "step": 1750 }, { "epoch": 1.6309412861136998, "loss_reasoning": 0.4857584536075592, "loss_utility": 1.6022640466690063, "step": 1750 }, { "epoch": 1.6318732525629076, "grad_norm": 1.173819414706855, "learning_rate": 1.0134622022782191e-05, "loss": 1.9466, "step": 1751 }, { "epoch": 1.6318732525629076, "loss_reasoning": 0.44359439611434937, "loss_utility": 1.512003779411316, "step": 1751 }, { "epoch": 1.6328052190121156, "grad_norm": 1.1181938028005172, "learning_rate": 1.012771832930618e-05, "loss": 1.7332, "step": 1752 }, { "epoch": 1.6328052190121156, "loss_reasoning": 0.48436594009399414, "loss_utility": 1.5249958038330078, "step": 1752 }, { "epoch": 1.6337371854613234, "grad_norm": 1.0446412171103416, "learning_rate": 1.0120814635830169e-05, "loss": 1.5532, "step": 1753 }, { "epoch": 1.6337371854613234, "loss_reasoning": 0.4476335942745209, "loss_utility": 1.6852984428405762, "step": 1753 }, { "epoch": 1.6346691519105312, "grad_norm": 1.1687266167137895, "learning_rate": 1.011391094235416e-05, "loss": 1.677, "step": 1754 }, { "epoch": 1.6346691519105312, "loss_reasoning": 0.5363286733627319, "loss_utility": 1.0609899759292603, "step": 1754 }, { "epoch": 1.6356011183597392, "grad_norm": 1.1392665312301715, "learning_rate": 1.010700724887815e-05, "loss": 1.5045, "step": 1755 }, { "epoch": 1.6356011183597392, "loss_reasoning": 0.5152133703231812, "loss_utility": 1.3428984880447388, "step": 1755 }, { "epoch": 1.636533084808947, "grad_norm": 1.2323987712726236, "learning_rate": 1.0100103555402141e-05, "loss": 1.404, "step": 1756 }, { "epoch": 1.636533084808947, "loss_reasoning": 0.5097377300262451, "loss_utility": 1.3556232452392578, "step": 1756 }, { "epoch": 1.6374650512581548, "grad_norm": 1.1888201644021072, "learning_rate": 1.009319986192613e-05, "loss": 1.7266, "step": 1757 }, { "epoch": 1.6374650512581548, "loss_reasoning": 0.5640846490859985, "loss_utility": 1.2459208965301514, "step": 1757 }, { "epoch": 1.6383970177073626, "grad_norm": 1.0939887947044205, "learning_rate": 1.0086296168450123e-05, "loss": 1.6738, "step": 1758 }, { "epoch": 1.6383970177073626, "loss_reasoning": 0.4518745541572571, "loss_utility": 1.874191164970398, "step": 1758 }, { "epoch": 1.6393289841565704, "grad_norm": 1.33341909172835, "learning_rate": 1.0079392474974112e-05, "loss": 1.9906, "step": 1759 }, { "epoch": 1.6393289841565704, "loss_reasoning": 0.4876067042350769, "loss_utility": 1.5214157104492188, "step": 1759 }, { "epoch": 1.6402609506057781, "grad_norm": 1.1724732241876767, "learning_rate": 1.0072488781498102e-05, "loss": 2.2353, "step": 1760 }, { "epoch": 1.6402609506057781, "loss_reasoning": 0.5488225221633911, "loss_utility": 0.6552610397338867, "step": 1760 }, { "epoch": 1.641192917054986, "grad_norm": 1.3782019025737906, "learning_rate": 1.0065585088022091e-05, "loss": 1.5305, "step": 1761 }, { "epoch": 1.641192917054986, "loss_reasoning": 0.4930206835269928, "loss_utility": 1.283473253250122, "step": 1761 }, { "epoch": 1.6421248835041937, "grad_norm": 1.2529389052363058, "learning_rate": 1.0058681394546084e-05, "loss": 1.7004, "step": 1762 }, { "epoch": 1.6421248835041937, "loss_reasoning": 0.4967428147792816, "loss_utility": 1.495377779006958, "step": 1762 }, { "epoch": 1.6430568499534017, "grad_norm": 1.1427102798077158, "learning_rate": 1.0051777701070073e-05, "loss": 2.0084, "step": 1763 }, { "epoch": 1.6430568499534017, "loss_reasoning": 0.5034679174423218, "loss_utility": 1.3563499450683594, "step": 1763 }, { "epoch": 1.6439888164026095, "grad_norm": 1.1295275061555716, "learning_rate": 1.0044874007594064e-05, "loss": 1.6024, "step": 1764 }, { "epoch": 1.6439888164026095, "loss_reasoning": 0.5280951261520386, "loss_utility": 1.0766682624816895, "step": 1764 }, { "epoch": 1.6449207828518173, "grad_norm": 1.1385967891132291, "learning_rate": 1.0037970314118053e-05, "loss": 1.6342, "step": 1765 }, { "epoch": 1.6449207828518173, "loss_reasoning": 0.5010555386543274, "loss_utility": 1.6639269590377808, "step": 1765 }, { "epoch": 1.6458527493010253, "grad_norm": 1.3159252358073397, "learning_rate": 1.0031066620642045e-05, "loss": 1.9712, "step": 1766 }, { "epoch": 1.6458527493010253, "loss_reasoning": 0.4378843307495117, "loss_utility": 1.0464115142822266, "step": 1766 }, { "epoch": 1.646784715750233, "grad_norm": 1.2473192779848348, "learning_rate": 1.0024162927166034e-05, "loss": 1.8392, "step": 1767 }, { "epoch": 1.646784715750233, "loss_reasoning": 0.495119571685791, "loss_utility": 0.8266257047653198, "step": 1767 }, { "epoch": 1.647716682199441, "grad_norm": 0.9521712965071255, "learning_rate": 1.0017259233690025e-05, "loss": 1.7991, "step": 1768 }, { "epoch": 1.647716682199441, "loss_reasoning": 0.4803813397884369, "loss_utility": 0.5683925151824951, "step": 1768 }, { "epoch": 1.6486486486486487, "grad_norm": 1.3120525814481692, "learning_rate": 1.0010355540214016e-05, "loss": 1.798, "step": 1769 }, { "epoch": 1.6486486486486487, "loss_reasoning": 0.51819908618927, "loss_utility": 1.8426765203475952, "step": 1769 }, { "epoch": 1.6495806150978565, "grad_norm": 1.0778636769604897, "learning_rate": 1.0003451846738006e-05, "loss": 1.759, "step": 1770 }, { "epoch": 1.6495806150978565, "loss_reasoning": 0.4779733121395111, "loss_utility": 0.7759807109832764, "step": 1770 }, { "epoch": 1.6505125815470643, "grad_norm": 1.097215718138468, "learning_rate": 9.996548153261995e-06, "loss": 1.7042, "step": 1771 }, { "epoch": 1.6505125815470643, "loss_reasoning": 0.4722265601158142, "loss_utility": 1.4018230438232422, "step": 1771 }, { "epoch": 1.651444547996272, "grad_norm": 1.1610894869052923, "learning_rate": 9.989644459785986e-06, "loss": 2.0347, "step": 1772 }, { "epoch": 1.651444547996272, "loss_reasoning": 0.47032472491264343, "loss_utility": 1.100968360900879, "step": 1772 }, { "epoch": 1.6523765144454798, "grad_norm": 1.0692785262034221, "learning_rate": 9.982740766309977e-06, "loss": 1.4785, "step": 1773 }, { "epoch": 1.6523765144454798, "loss_reasoning": 0.46855854988098145, "loss_utility": 1.662309169769287, "step": 1773 }, { "epoch": 1.6533084808946876, "grad_norm": 1.0857533855022012, "learning_rate": 9.975837072833968e-06, "loss": 1.6435, "step": 1774 }, { "epoch": 1.6533084808946876, "loss_reasoning": 0.4717520475387573, "loss_utility": 1.6348987817764282, "step": 1774 }, { "epoch": 1.6542404473438956, "grad_norm": 1.1498490783378672, "learning_rate": 9.968933379357957e-06, "loss": 1.8968, "step": 1775 }, { "epoch": 1.6542404473438956, "loss_reasoning": 0.5527429580688477, "loss_utility": 1.2121955156326294, "step": 1775 }, { "epoch": 1.6551724137931034, "grad_norm": 1.160117936316541, "learning_rate": 9.962029685881947e-06, "loss": 1.5127, "step": 1776 }, { "epoch": 1.6551724137931034, "loss_reasoning": 0.4929795265197754, "loss_utility": 0.9160279631614685, "step": 1776 }, { "epoch": 1.6561043802423114, "grad_norm": 1.2509942592203063, "learning_rate": 9.955125992405938e-06, "loss": 1.557, "step": 1777 }, { "epoch": 1.6561043802423114, "loss_reasoning": 0.4859587550163269, "loss_utility": 1.1338698863983154, "step": 1777 }, { "epoch": 1.6570363466915192, "grad_norm": 1.4811925202898784, "learning_rate": 9.948222298929929e-06, "loss": 1.7722, "step": 1778 }, { "epoch": 1.6570363466915192, "loss_reasoning": 0.5064839124679565, "loss_utility": 1.2723445892333984, "step": 1778 }, { "epoch": 1.657968313140727, "grad_norm": 1.3097758837402829, "learning_rate": 9.94131860545392e-06, "loss": 1.7531, "step": 1779 }, { "epoch": 1.657968313140727, "loss_reasoning": 0.4098958969116211, "loss_utility": 0.8885332345962524, "step": 1779 }, { "epoch": 1.6589002795899348, "grad_norm": 1.077257697992361, "learning_rate": 9.934414911977909e-06, "loss": 1.6962, "step": 1780 }, { "epoch": 1.6589002795899348, "loss_reasoning": 0.5259140133857727, "loss_utility": 1.5911625623703003, "step": 1780 }, { "epoch": 1.6598322460391426, "grad_norm": 1.0149357453194994, "learning_rate": 9.9275112185019e-06, "loss": 1.612, "step": 1781 }, { "epoch": 1.6598322460391426, "loss_reasoning": 0.49805018305778503, "loss_utility": 1.4334957599639893, "step": 1781 }, { "epoch": 1.6607642124883504, "grad_norm": 1.1328271133569583, "learning_rate": 9.92060752502589e-06, "loss": 1.7849, "step": 1782 }, { "epoch": 1.6607642124883504, "loss_reasoning": 0.45714104175567627, "loss_utility": 2.0410804748535156, "step": 1782 }, { "epoch": 1.6616961789375582, "grad_norm": 1.368647424962291, "learning_rate": 9.91370383154988e-06, "loss": 1.7698, "step": 1783 }, { "epoch": 1.6616961789375582, "loss_reasoning": 0.49647918343544006, "loss_utility": 0.3696766495704651, "step": 1783 }, { "epoch": 1.662628145386766, "grad_norm": 1.2219628817019437, "learning_rate": 9.90680013807387e-06, "loss": 1.6, "step": 1784 }, { "epoch": 1.662628145386766, "loss_reasoning": 0.5186412930488586, "loss_utility": 1.4651587009429932, "step": 1784 }, { "epoch": 1.6635601118359737, "grad_norm": 1.1454349467357225, "learning_rate": 9.89989644459786e-06, "loss": 2.0895, "step": 1785 }, { "epoch": 1.6635601118359737, "loss_reasoning": 0.4705379009246826, "loss_utility": 1.6109232902526855, "step": 1785 }, { "epoch": 1.6644920782851818, "grad_norm": 1.2841525446871098, "learning_rate": 9.892992751121851e-06, "loss": 2.2182, "step": 1786 }, { "epoch": 1.6644920782851818, "loss_reasoning": 0.4885798692703247, "loss_utility": 1.1679495573043823, "step": 1786 }, { "epoch": 1.6654240447343895, "grad_norm": 1.4438034292302602, "learning_rate": 9.886089057645842e-06, "loss": 1.6358, "step": 1787 }, { "epoch": 1.6654240447343895, "loss_reasoning": 0.4843246340751648, "loss_utility": 1.2859280109405518, "step": 1787 }, { "epoch": 1.6663560111835976, "grad_norm": 1.0600496714996477, "learning_rate": 9.879185364169833e-06, "loss": 1.8477, "step": 1788 }, { "epoch": 1.6663560111835976, "loss_reasoning": 0.4840767979621887, "loss_utility": 1.0619795322418213, "step": 1788 }, { "epoch": 1.6672879776328053, "grad_norm": 1.0604071290348736, "learning_rate": 9.872281670693822e-06, "loss": 1.9263, "step": 1789 }, { "epoch": 1.6672879776328053, "loss_reasoning": 0.5436725616455078, "loss_utility": 1.3718492984771729, "step": 1789 }, { "epoch": 1.6682199440820131, "grad_norm": 1.4019338565320478, "learning_rate": 9.865377977217812e-06, "loss": 1.7152, "step": 1790 }, { "epoch": 1.6682199440820131, "loss_reasoning": 0.5337907671928406, "loss_utility": 1.1365792751312256, "step": 1790 }, { "epoch": 1.669151910531221, "grad_norm": 1.0686919928283536, "learning_rate": 9.858474283741803e-06, "loss": 1.6928, "step": 1791 }, { "epoch": 1.669151910531221, "loss_reasoning": 0.4749230742454529, "loss_utility": 0.9259400367736816, "step": 1791 }, { "epoch": 1.6700838769804287, "grad_norm": 1.2452397105487498, "learning_rate": 9.851570590265794e-06, "loss": 1.7258, "step": 1792 }, { "epoch": 1.6700838769804287, "loss_reasoning": 0.5525537729263306, "loss_utility": 1.9090163707733154, "step": 1792 }, { "epoch": 1.6710158434296365, "grad_norm": 1.3045209240995572, "learning_rate": 9.844666896789783e-06, "loss": 2.1237, "step": 1793 }, { "epoch": 1.6710158434296365, "loss_reasoning": 0.4907223582267761, "loss_utility": 1.295179009437561, "step": 1793 }, { "epoch": 1.6719478098788443, "grad_norm": 1.3609218329416903, "learning_rate": 9.837763203313774e-06, "loss": 1.886, "step": 1794 }, { "epoch": 1.6719478098788443, "loss_reasoning": 0.48972901701927185, "loss_utility": 1.245514154434204, "step": 1794 }, { "epoch": 1.672879776328052, "grad_norm": 1.3279206438641087, "learning_rate": 9.830859509837763e-06, "loss": 1.8729, "step": 1795 }, { "epoch": 1.672879776328052, "loss_reasoning": 0.4721871316432953, "loss_utility": 0.9162619113922119, "step": 1795 }, { "epoch": 1.6738117427772599, "grad_norm": 1.2431582425815848, "learning_rate": 9.823955816361753e-06, "loss": 1.8757, "step": 1796 }, { "epoch": 1.6738117427772599, "loss_reasoning": 0.4758462905883789, "loss_utility": 1.3431379795074463, "step": 1796 }, { "epoch": 1.6747437092264679, "grad_norm": 1.194738319808442, "learning_rate": 9.817052122885746e-06, "loss": 1.5366, "step": 1797 }, { "epoch": 1.6747437092264679, "loss_reasoning": 0.5108172297477722, "loss_utility": 1.685646653175354, "step": 1797 }, { "epoch": 1.6756756756756757, "grad_norm": 1.1575106810305478, "learning_rate": 9.810148429409735e-06, "loss": 1.8797, "step": 1798 }, { "epoch": 1.6756756756756757, "loss_reasoning": 0.4185771942138672, "loss_utility": 0.4611753225326538, "step": 1798 }, { "epoch": 1.6766076421248837, "grad_norm": 1.1891136983593842, "learning_rate": 9.803244735933726e-06, "loss": 1.5562, "step": 1799 }, { "epoch": 1.6766076421248837, "loss_reasoning": 0.5099311470985413, "loss_utility": 1.1092305183410645, "step": 1799 }, { "epoch": 1.6775396085740915, "grad_norm": 1.4242693115629872, "learning_rate": 9.796341042457715e-06, "loss": 1.8601, "step": 1800 }, { "epoch": 1.6775396085740915, "loss_reasoning": 0.5414872169494629, "loss_utility": 1.128334879875183, "step": 1800 }, { "epoch": 1.6784715750232992, "grad_norm": 1.4016185376189765, "learning_rate": 9.789437348981705e-06, "loss": 1.9245, "step": 1801 }, { "epoch": 1.6784715750232992, "loss_reasoning": 0.47045913338661194, "loss_utility": 1.2408396005630493, "step": 1801 }, { "epoch": 1.679403541472507, "grad_norm": 1.1583461028291802, "learning_rate": 9.782533655505696e-06, "loss": 1.6363, "step": 1802 }, { "epoch": 1.679403541472507, "loss_reasoning": 0.5726488828659058, "loss_utility": 1.3657028675079346, "step": 1802 }, { "epoch": 1.6803355079217148, "grad_norm": 1.212921377894578, "learning_rate": 9.775629962029687e-06, "loss": 1.7238, "step": 1803 }, { "epoch": 1.6803355079217148, "loss_reasoning": 0.43170106410980225, "loss_utility": 1.027982473373413, "step": 1803 }, { "epoch": 1.6812674743709226, "grad_norm": 1.1091369824296098, "learning_rate": 9.768726268553676e-06, "loss": 1.7601, "step": 1804 }, { "epoch": 1.6812674743709226, "loss_reasoning": 0.42455410957336426, "loss_utility": 1.1505378484725952, "step": 1804 }, { "epoch": 1.6821994408201304, "grad_norm": 1.1848994265371724, "learning_rate": 9.761822575077667e-06, "loss": 1.8178, "step": 1805 }, { "epoch": 1.6821994408201304, "loss_reasoning": 0.48066747188568115, "loss_utility": 1.0458749532699585, "step": 1805 }, { "epoch": 1.6831314072693382, "grad_norm": 1.2112726847544237, "learning_rate": 9.754918881601657e-06, "loss": 1.5964, "step": 1806 }, { "epoch": 1.6831314072693382, "loss_reasoning": 0.5064640045166016, "loss_utility": 1.0496386289596558, "step": 1806 }, { "epoch": 1.684063373718546, "grad_norm": 1.2185690740776374, "learning_rate": 9.748015188125648e-06, "loss": 1.6684, "step": 1807 }, { "epoch": 1.684063373718546, "loss_reasoning": 0.5421684384346008, "loss_utility": 1.8404886722564697, "step": 1807 }, { "epoch": 1.684995340167754, "grad_norm": 1.1804499296818889, "learning_rate": 9.741111494649639e-06, "loss": 2.0776, "step": 1808 }, { "epoch": 1.684995340167754, "loss_reasoning": 0.46643611788749695, "loss_utility": 1.0078083276748657, "step": 1808 }, { "epoch": 1.6859273066169618, "grad_norm": 1.0726053097842716, "learning_rate": 9.734207801173628e-06, "loss": 1.7076, "step": 1809 }, { "epoch": 1.6859273066169618, "loss_reasoning": 0.4530831575393677, "loss_utility": 1.0340180397033691, "step": 1809 }, { "epoch": 1.6868592730661698, "grad_norm": 1.2862420658772113, "learning_rate": 9.727304107697619e-06, "loss": 1.7049, "step": 1810 }, { "epoch": 1.6868592730661698, "loss_reasoning": 0.4761940538883209, "loss_utility": 0.6913950443267822, "step": 1810 }, { "epoch": 1.6877912395153776, "grad_norm": 1.3139521902884697, "learning_rate": 9.72040041422161e-06, "loss": 1.9489, "step": 1811 }, { "epoch": 1.6877912395153776, "loss_reasoning": 0.44225752353668213, "loss_utility": 0.9877276420593262, "step": 1811 }, { "epoch": 1.6887232059645854, "grad_norm": 1.1578705360689159, "learning_rate": 9.7134967207456e-06, "loss": 1.5478, "step": 1812 }, { "epoch": 1.6887232059645854, "loss_reasoning": 0.47664889693260193, "loss_utility": 1.3394899368286133, "step": 1812 }, { "epoch": 1.6896551724137931, "grad_norm": 1.4820261593700927, "learning_rate": 9.70659302726959e-06, "loss": 1.6845, "step": 1813 }, { "epoch": 1.6896551724137931, "loss_reasoning": 0.5267975330352783, "loss_utility": 0.6505111455917358, "step": 1813 }, { "epoch": 1.690587138863001, "grad_norm": 1.219874019004858, "learning_rate": 9.69968933379358e-06, "loss": 1.5306, "step": 1814 }, { "epoch": 1.690587138863001, "loss_reasoning": 0.48505401611328125, "loss_utility": 1.725206971168518, "step": 1814 }, { "epoch": 1.6915191053122087, "grad_norm": 1.4532082730481524, "learning_rate": 9.69278564031757e-06, "loss": 2.2064, "step": 1815 }, { "epoch": 1.6915191053122087, "loss_reasoning": 0.49277257919311523, "loss_utility": 1.675839900970459, "step": 1815 }, { "epoch": 1.6924510717614165, "grad_norm": 1.1345963687313392, "learning_rate": 9.685881946841561e-06, "loss": 1.8654, "step": 1816 }, { "epoch": 1.6924510717614165, "loss_reasoning": 0.5195072293281555, "loss_utility": 1.0107765197753906, "step": 1816 }, { "epoch": 1.6933830382106243, "grad_norm": 1.1906140827875058, "learning_rate": 9.678978253365552e-06, "loss": 1.654, "step": 1817 }, { "epoch": 1.6933830382106243, "loss_reasoning": 0.5267132520675659, "loss_utility": 1.1325899362564087, "step": 1817 }, { "epoch": 1.694315004659832, "grad_norm": 0.8971061756049058, "learning_rate": 9.672074559889541e-06, "loss": 1.4384, "step": 1818 }, { "epoch": 1.694315004659832, "loss_reasoning": 0.5023553967475891, "loss_utility": 1.4066133499145508, "step": 1818 }, { "epoch": 1.69524697110904, "grad_norm": 1.4063733039599633, "learning_rate": 9.665170866413532e-06, "loss": 1.7158, "step": 1819 }, { "epoch": 1.69524697110904, "loss_reasoning": 0.47204363346099854, "loss_utility": 1.3161206245422363, "step": 1819 }, { "epoch": 1.696178937558248, "grad_norm": 1.2178225461748988, "learning_rate": 9.658267172937523e-06, "loss": 1.4725, "step": 1820 }, { "epoch": 1.696178937558248, "loss_reasoning": 0.5093714594841003, "loss_utility": 1.230372428894043, "step": 1820 }, { "epoch": 1.6971109040074557, "grad_norm": 1.0924637236209045, "learning_rate": 9.651363479461513e-06, "loss": 1.6807, "step": 1821 }, { "epoch": 1.6971109040074557, "loss_reasoning": 0.45110616087913513, "loss_utility": 0.49521732330322266, "step": 1821 }, { "epoch": 1.6980428704566637, "grad_norm": 1.4355954820213421, "learning_rate": 9.644459785985502e-06, "loss": 1.588, "step": 1822 }, { "epoch": 1.6980428704566637, "loss_reasoning": 0.4904962182044983, "loss_utility": 1.1904337406158447, "step": 1822 }, { "epoch": 1.6989748369058715, "grad_norm": 1.2121604808254631, "learning_rate": 9.637556092509493e-06, "loss": 1.6689, "step": 1823 }, { "epoch": 1.6989748369058715, "loss_reasoning": 0.46616023778915405, "loss_utility": 1.6704254150390625, "step": 1823 }, { "epoch": 1.6999068033550793, "grad_norm": 1.3744944024118613, "learning_rate": 9.630652399033484e-06, "loss": 1.8929, "step": 1824 }, { "epoch": 1.6999068033550793, "loss_reasoning": 0.4949555993080139, "loss_utility": 1.57965087890625, "step": 1824 }, { "epoch": 1.700838769804287, "grad_norm": 1.1914533094754334, "learning_rate": 9.623748705557475e-06, "loss": 1.8591, "step": 1825 }, { "epoch": 1.700838769804287, "loss_reasoning": 0.45493775606155396, "loss_utility": 1.584695816040039, "step": 1825 }, { "epoch": 1.7017707362534948, "grad_norm": 1.3724066421940713, "learning_rate": 9.616845012081464e-06, "loss": 2.082, "step": 1826 }, { "epoch": 1.7017707362534948, "loss_reasoning": 0.5326683521270752, "loss_utility": 1.0234391689300537, "step": 1826 }, { "epoch": 1.7027027027027026, "grad_norm": 1.130200916890973, "learning_rate": 9.609941318605454e-06, "loss": 1.7684, "step": 1827 }, { "epoch": 1.7027027027027026, "loss_reasoning": 0.4883350729942322, "loss_utility": 1.1454365253448486, "step": 1827 }, { "epoch": 1.7036346691519104, "grad_norm": 1.4424730402111041, "learning_rate": 9.603037625129445e-06, "loss": 1.961, "step": 1828 }, { "epoch": 1.7036346691519104, "loss_reasoning": 0.5326298475265503, "loss_utility": 1.3545033931732178, "step": 1828 }, { "epoch": 1.7045666356011182, "grad_norm": 1.5462337603415108, "learning_rate": 9.596133931653436e-06, "loss": 2.0497, "step": 1829 }, { "epoch": 1.7045666356011182, "loss_reasoning": 0.5344377756118774, "loss_utility": 1.3890951871871948, "step": 1829 }, { "epoch": 1.7054986020503262, "grad_norm": 1.2302381881154245, "learning_rate": 9.589230238177427e-06, "loss": 1.956, "step": 1830 }, { "epoch": 1.7054986020503262, "loss_reasoning": 0.4747900366783142, "loss_utility": 1.5563665628433228, "step": 1830 }, { "epoch": 1.706430568499534, "grad_norm": 1.1242985672629053, "learning_rate": 9.582326544701416e-06, "loss": 1.8643, "step": 1831 }, { "epoch": 1.706430568499534, "loss_reasoning": 0.45753738284111023, "loss_utility": 1.0928387641906738, "step": 1831 }, { "epoch": 1.7073625349487418, "grad_norm": 1.1236643979726744, "learning_rate": 9.575422851225406e-06, "loss": 1.7126, "step": 1832 }, { "epoch": 1.7073625349487418, "loss_reasoning": 0.5620009303092957, "loss_utility": 1.4926120042800903, "step": 1832 }, { "epoch": 1.7082945013979498, "grad_norm": 1.2272908276645376, "learning_rate": 9.568519157749397e-06, "loss": 1.5883, "step": 1833 }, { "epoch": 1.7082945013979498, "loss_reasoning": 0.46357446908950806, "loss_utility": 1.2304608821868896, "step": 1833 }, { "epoch": 1.7092264678471576, "grad_norm": 1.064180472299572, "learning_rate": 9.561615464273388e-06, "loss": 1.6699, "step": 1834 }, { "epoch": 1.7092264678471576, "loss_reasoning": 0.537376880645752, "loss_utility": 0.631008505821228, "step": 1834 }, { "epoch": 1.7101584342963654, "grad_norm": 1.068181411218244, "learning_rate": 9.554711770797377e-06, "loss": 1.3853, "step": 1835 }, { "epoch": 1.7101584342963654, "loss_reasoning": 0.5211796760559082, "loss_utility": 1.3377338647842407, "step": 1835 }, { "epoch": 1.7110904007455732, "grad_norm": 1.208859065300389, "learning_rate": 9.547808077321368e-06, "loss": 2.0537, "step": 1836 }, { "epoch": 1.7110904007455732, "loss_reasoning": 0.5410086512565613, "loss_utility": 0.9788821935653687, "step": 1836 }, { "epoch": 1.712022367194781, "grad_norm": 1.2533246079063043, "learning_rate": 9.540904383845358e-06, "loss": 1.6591, "step": 1837 }, { "epoch": 1.712022367194781, "loss_reasoning": 0.45667845010757446, "loss_utility": 1.0170918703079224, "step": 1837 }, { "epoch": 1.7129543336439887, "grad_norm": 1.1295574266364379, "learning_rate": 9.534000690369349e-06, "loss": 1.6493, "step": 1838 }, { "epoch": 1.7129543336439887, "loss_reasoning": 0.476974219083786, "loss_utility": 1.230445384979248, "step": 1838 }, { "epoch": 1.7138863000931965, "grad_norm": 1.381625339956181, "learning_rate": 9.52709699689334e-06, "loss": 1.8296, "step": 1839 }, { "epoch": 1.7138863000931965, "loss_reasoning": 0.5712089538574219, "loss_utility": 1.0252265930175781, "step": 1839 }, { "epoch": 1.7148182665424043, "grad_norm": 1.3163243015721613, "learning_rate": 9.520193303417329e-06, "loss": 1.4894, "step": 1840 }, { "epoch": 1.7148182665424043, "loss_reasoning": 0.5400322079658508, "loss_utility": 1.4792120456695557, "step": 1840 }, { "epoch": 1.7157502329916123, "grad_norm": 1.3284074804196813, "learning_rate": 9.51328960994132e-06, "loss": 1.9005, "step": 1841 }, { "epoch": 1.7157502329916123, "loss_reasoning": 0.5604293346405029, "loss_utility": 0.8542954921722412, "step": 1841 }, { "epoch": 1.7166821994408201, "grad_norm": 1.365041147737842, "learning_rate": 9.50638591646531e-06, "loss": 1.8184, "step": 1842 }, { "epoch": 1.7166821994408201, "loss_reasoning": 0.48367786407470703, "loss_utility": 1.3550163507461548, "step": 1842 }, { "epoch": 1.717614165890028, "grad_norm": 1.3105500963352563, "learning_rate": 9.499482222989301e-06, "loss": 1.8175, "step": 1843 }, { "epoch": 1.717614165890028, "loss_reasoning": 0.4835425615310669, "loss_utility": 0.7141035795211792, "step": 1843 }, { "epoch": 1.718546132339236, "grad_norm": 0.9028179869936547, "learning_rate": 9.49257852951329e-06, "loss": 1.5495, "step": 1844 }, { "epoch": 1.718546132339236, "loss_reasoning": 0.4812491238117218, "loss_utility": 1.697730541229248, "step": 1844 }, { "epoch": 1.7194780987884437, "grad_norm": 1.554423567767758, "learning_rate": 9.48567483603728e-06, "loss": 1.8762, "step": 1845 }, { "epoch": 1.7194780987884437, "loss_reasoning": 0.5316802263259888, "loss_utility": 1.4143277406692505, "step": 1845 }, { "epoch": 1.7204100652376515, "grad_norm": 1.2050783538242615, "learning_rate": 9.47877114256127e-06, "loss": 1.7978, "step": 1846 }, { "epoch": 1.7204100652376515, "loss_reasoning": 0.5381235480308533, "loss_utility": 1.072187066078186, "step": 1846 }, { "epoch": 1.7213420316868593, "grad_norm": 1.0841663993695942, "learning_rate": 9.47186744908526e-06, "loss": 1.5718, "step": 1847 }, { "epoch": 1.7213420316868593, "loss_reasoning": 0.4270215928554535, "loss_utility": 1.0407180786132812, "step": 1847 }, { "epoch": 1.722273998136067, "grad_norm": 1.3082059800154882, "learning_rate": 9.464963755609253e-06, "loss": 1.7209, "step": 1848 }, { "epoch": 1.722273998136067, "loss_reasoning": 0.47802916169166565, "loss_utility": 0.6239244937896729, "step": 1848 }, { "epoch": 1.7232059645852749, "grad_norm": 1.121620641075096, "learning_rate": 9.458060062133242e-06, "loss": 1.4191, "step": 1849 }, { "epoch": 1.7232059645852749, "loss_reasoning": 0.5305063724517822, "loss_utility": 0.8317738771438599, "step": 1849 }, { "epoch": 1.7241379310344827, "grad_norm": 1.2801435926711306, "learning_rate": 9.451156368657233e-06, "loss": 1.5798, "step": 1850 }, { "epoch": 1.7241379310344827, "loss_reasoning": 0.49616265296936035, "loss_utility": 1.2890363931655884, "step": 1850 }, { "epoch": 1.7250698974836904, "grad_norm": 1.234261663014994, "learning_rate": 9.444252675181222e-06, "loss": 1.8772, "step": 1851 }, { "epoch": 1.7250698974836904, "loss_reasoning": 0.5412548780441284, "loss_utility": 1.4910025596618652, "step": 1851 }, { "epoch": 1.7260018639328985, "grad_norm": 1.092648841417121, "learning_rate": 9.437348981705212e-06, "loss": 1.6971, "step": 1852 }, { "epoch": 1.7260018639328985, "loss_reasoning": 0.5355275869369507, "loss_utility": 1.4450013637542725, "step": 1852 }, { "epoch": 1.7269338303821062, "grad_norm": 1.3358263941259645, "learning_rate": 9.430445288229203e-06, "loss": 1.7145, "step": 1853 }, { "epoch": 1.7269338303821062, "loss_reasoning": 0.48745596408843994, "loss_utility": 1.30098557472229, "step": 1853 }, { "epoch": 1.727865796831314, "grad_norm": 0.9996788955336492, "learning_rate": 9.423541594753194e-06, "loss": 1.6089, "step": 1854 }, { "epoch": 1.727865796831314, "loss_reasoning": 0.44942256808280945, "loss_utility": 1.0567387342453003, "step": 1854 }, { "epoch": 1.728797763280522, "grad_norm": 1.3443926656285767, "learning_rate": 9.416637901277183e-06, "loss": 1.4934, "step": 1855 }, { "epoch": 1.728797763280522, "loss_reasoning": 0.492773175239563, "loss_utility": 1.1940035820007324, "step": 1855 }, { "epoch": 1.7297297297297298, "grad_norm": 1.0491113097170766, "learning_rate": 9.409734207801174e-06, "loss": 1.5272, "step": 1856 }, { "epoch": 1.7297297297297298, "loss_reasoning": 0.4953230023384094, "loss_utility": 0.9598903656005859, "step": 1856 }, { "epoch": 1.7306616961789376, "grad_norm": 1.1066387067524908, "learning_rate": 9.402830514325164e-06, "loss": 1.6106, "step": 1857 }, { "epoch": 1.7306616961789376, "loss_reasoning": 0.47621604800224304, "loss_utility": 2.1100573539733887, "step": 1857 }, { "epoch": 1.7315936626281454, "grad_norm": 1.1520514741529408, "learning_rate": 9.395926820849155e-06, "loss": 1.8317, "step": 1858 }, { "epoch": 1.7315936626281454, "loss_reasoning": 0.47620290517807007, "loss_utility": 1.6639035940170288, "step": 1858 }, { "epoch": 1.7325256290773532, "grad_norm": 1.1228677976310457, "learning_rate": 9.389023127373146e-06, "loss": 1.7571, "step": 1859 }, { "epoch": 1.7325256290773532, "loss_reasoning": 0.4290027618408203, "loss_utility": 1.313469409942627, "step": 1859 }, { "epoch": 1.733457595526561, "grad_norm": 1.235715040203356, "learning_rate": 9.382119433897135e-06, "loss": 1.9619, "step": 1860 }, { "epoch": 1.733457595526561, "loss_reasoning": 0.4858739376068115, "loss_utility": 1.3465344905853271, "step": 1860 }, { "epoch": 1.7343895619757688, "grad_norm": 1.2553315703065302, "learning_rate": 9.375215740421126e-06, "loss": 1.7486, "step": 1861 }, { "epoch": 1.7343895619757688, "loss_reasoning": 0.45958399772644043, "loss_utility": 1.0147969722747803, "step": 1861 }, { "epoch": 1.7353215284249766, "grad_norm": 1.0889918965861247, "learning_rate": 9.368312046945116e-06, "loss": 1.6176, "step": 1862 }, { "epoch": 1.7353215284249766, "loss_reasoning": 0.47583121061325073, "loss_utility": 0.9572612047195435, "step": 1862 }, { "epoch": 1.7362534948741846, "grad_norm": 1.1501664740898385, "learning_rate": 9.361408353469107e-06, "loss": 1.4891, "step": 1863 }, { "epoch": 1.7362534948741846, "loss_reasoning": 0.43889668583869934, "loss_utility": 1.3656271696090698, "step": 1863 }, { "epoch": 1.7371854613233924, "grad_norm": 1.4906508989160092, "learning_rate": 9.354504659993096e-06, "loss": 1.7167, "step": 1864 }, { "epoch": 1.7371854613233924, "loss_reasoning": 0.49108409881591797, "loss_utility": 1.2874234914779663, "step": 1864 }, { "epoch": 1.7381174277726001, "grad_norm": 1.1815898907084983, "learning_rate": 9.347600966517087e-06, "loss": 1.5323, "step": 1865 }, { "epoch": 1.7381174277726001, "loss_reasoning": 0.4987472593784332, "loss_utility": 1.5139586925506592, "step": 1865 }, { "epoch": 1.7390493942218082, "grad_norm": 1.4225017703305267, "learning_rate": 9.340697273041078e-06, "loss": 1.9217, "step": 1866 }, { "epoch": 1.7390493942218082, "loss_reasoning": 0.44393742084503174, "loss_utility": 1.6924577951431274, "step": 1866 }, { "epoch": 1.739981360671016, "grad_norm": 1.1067262286630932, "learning_rate": 9.333793579565068e-06, "loss": 1.9009, "step": 1867 }, { "epoch": 1.739981360671016, "loss_reasoning": 0.5371575355529785, "loss_utility": 1.036358118057251, "step": 1867 }, { "epoch": 1.7409133271202237, "grad_norm": 1.3302170262062172, "learning_rate": 9.326889886089059e-06, "loss": 1.6642, "step": 1868 }, { "epoch": 1.7409133271202237, "loss_reasoning": 0.5107525587081909, "loss_utility": 1.474942922592163, "step": 1868 }, { "epoch": 1.7418452935694315, "grad_norm": 1.0036643006516741, "learning_rate": 9.319986192613048e-06, "loss": 1.6468, "step": 1869 }, { "epoch": 1.7418452935694315, "loss_reasoning": 0.4848783016204834, "loss_utility": 1.2745740413665771, "step": 1869 }, { "epoch": 1.7427772600186393, "grad_norm": 1.606039185712268, "learning_rate": 9.313082499137039e-06, "loss": 1.9578, "step": 1870 }, { "epoch": 1.7427772600186393, "loss_reasoning": 0.5310996174812317, "loss_utility": 1.396036148071289, "step": 1870 }, { "epoch": 1.743709226467847, "grad_norm": 1.2201923128038406, "learning_rate": 9.30617880566103e-06, "loss": 1.9589, "step": 1871 }, { "epoch": 1.743709226467847, "loss_reasoning": 0.5052638053894043, "loss_utility": 1.2687721252441406, "step": 1871 }, { "epoch": 1.7446411929170549, "grad_norm": 1.2092413904305908, "learning_rate": 9.29927511218502e-06, "loss": 1.6897, "step": 1872 }, { "epoch": 1.7446411929170549, "loss_reasoning": 0.4901115894317627, "loss_utility": 0.7832213640213013, "step": 1872 }, { "epoch": 1.7455731593662627, "grad_norm": 1.334767611899715, "learning_rate": 9.29237141870901e-06, "loss": 2.1075, "step": 1873 }, { "epoch": 1.7455731593662627, "loss_reasoning": 0.5428292155265808, "loss_utility": 1.5183112621307373, "step": 1873 }, { "epoch": 1.7465051258154707, "grad_norm": 1.5489921307732122, "learning_rate": 9.285467725233e-06, "loss": 1.7729, "step": 1874 }, { "epoch": 1.7465051258154707, "loss_reasoning": 0.47643688321113586, "loss_utility": 1.3883293867111206, "step": 1874 }, { "epoch": 1.7474370922646785, "grad_norm": 1.3428867182095126, "learning_rate": 9.278564031756991e-06, "loss": 2.5786, "step": 1875 }, { "epoch": 1.7474370922646785, "loss_reasoning": 0.46116748452186584, "loss_utility": 1.1348297595977783, "step": 1875 }, { "epoch": 1.7483690587138863, "grad_norm": 1.1760325553347695, "learning_rate": 9.271660338280982e-06, "loss": 1.7363, "step": 1876 }, { "epoch": 1.7483690587138863, "loss_reasoning": 0.43073660135269165, "loss_utility": 1.5364371538162231, "step": 1876 }, { "epoch": 1.7493010251630943, "grad_norm": 1.2644816307529818, "learning_rate": 9.264756644804972e-06, "loss": 1.5782, "step": 1877 }, { "epoch": 1.7493010251630943, "loss_reasoning": 0.48415830731391907, "loss_utility": 2.2036070823669434, "step": 1877 }, { "epoch": 1.750232991612302, "grad_norm": 1.4065188386989487, "learning_rate": 9.257852951328961e-06, "loss": 1.9898, "step": 1878 }, { "epoch": 1.750232991612302, "loss_reasoning": 0.5443509221076965, "loss_utility": 1.3283679485321045, "step": 1878 }, { "epoch": 1.7511649580615098, "grad_norm": 1.2493039080987285, "learning_rate": 9.250949257852952e-06, "loss": 1.5775, "step": 1879 }, { "epoch": 1.7511649580615098, "loss_reasoning": 0.47290554642677307, "loss_utility": 1.2280439138412476, "step": 1879 }, { "epoch": 1.7520969245107176, "grad_norm": 1.1999636776269134, "learning_rate": 9.244045564376943e-06, "loss": 1.4154, "step": 1880 }, { "epoch": 1.7520969245107176, "loss_reasoning": 0.5568498373031616, "loss_utility": 1.409481167793274, "step": 1880 }, { "epoch": 1.7530288909599254, "grad_norm": 1.0540780384415622, "learning_rate": 9.237141870900934e-06, "loss": 1.7917, "step": 1881 }, { "epoch": 1.7530288909599254, "loss_reasoning": 0.486862450838089, "loss_utility": 1.1829019784927368, "step": 1881 }, { "epoch": 1.7539608574091332, "grad_norm": 1.1493455634654395, "learning_rate": 9.230238177424923e-06, "loss": 1.8356, "step": 1882 }, { "epoch": 1.7539608574091332, "loss_reasoning": 0.5359725952148438, "loss_utility": 1.5454990863800049, "step": 1882 }, { "epoch": 1.754892823858341, "grad_norm": 1.2116739053219725, "learning_rate": 9.223334483948913e-06, "loss": 1.4371, "step": 1883 }, { "epoch": 1.754892823858341, "loss_reasoning": 0.48458006978034973, "loss_utility": 1.80049467086792, "step": 1883 }, { "epoch": 1.7558247903075488, "grad_norm": 1.152179420746788, "learning_rate": 9.216430790472904e-06, "loss": 1.926, "step": 1884 }, { "epoch": 1.7558247903075488, "loss_reasoning": 0.47931522130966187, "loss_utility": 1.1592473983764648, "step": 1884 }, { "epoch": 1.7567567567567568, "grad_norm": 2.1226946236433366, "learning_rate": 9.209527096996895e-06, "loss": 1.5644, "step": 1885 }, { "epoch": 1.7567567567567568, "loss_reasoning": 0.5625582933425903, "loss_utility": 1.4037038087844849, "step": 1885 }, { "epoch": 1.7576887232059646, "grad_norm": 1.1861612255760554, "learning_rate": 9.202623403520884e-06, "loss": 1.9852, "step": 1886 }, { "epoch": 1.7576887232059646, "loss_reasoning": 0.5275931358337402, "loss_utility": 1.5162628889083862, "step": 1886 }, { "epoch": 1.7586206896551724, "grad_norm": 1.0606355113720067, "learning_rate": 9.195719710044875e-06, "loss": 1.9829, "step": 1887 }, { "epoch": 1.7586206896551724, "loss_reasoning": 0.4562743902206421, "loss_utility": 1.9448686838150024, "step": 1887 }, { "epoch": 1.7595526561043804, "grad_norm": 1.4037145848302952, "learning_rate": 9.188816016568865e-06, "loss": 2.0285, "step": 1888 }, { "epoch": 1.7595526561043804, "loss_reasoning": 0.4044937789440155, "loss_utility": 1.3543016910552979, "step": 1888 }, { "epoch": 1.7604846225535882, "grad_norm": 1.3228981789466785, "learning_rate": 9.181912323092856e-06, "loss": 1.8043, "step": 1889 }, { "epoch": 1.7604846225535882, "loss_reasoning": 0.5062585473060608, "loss_utility": 1.0959184169769287, "step": 1889 }, { "epoch": 1.761416589002796, "grad_norm": 1.160428289985441, "learning_rate": 9.175008629616847e-06, "loss": 1.6396, "step": 1890 }, { "epoch": 1.761416589002796, "loss_reasoning": 0.4568346440792084, "loss_utility": 2.2883100509643555, "step": 1890 }, { "epoch": 1.7623485554520038, "grad_norm": 1.6729440373857685, "learning_rate": 9.168104936140836e-06, "loss": 2.1351, "step": 1891 }, { "epoch": 1.7623485554520038, "loss_reasoning": 0.49385562539100647, "loss_utility": 2.341411590576172, "step": 1891 }, { "epoch": 1.7632805219012115, "grad_norm": 1.2062843601699607, "learning_rate": 9.161201242664827e-06, "loss": 1.9839, "step": 1892 }, { "epoch": 1.7632805219012115, "loss_reasoning": 0.4720214009284973, "loss_utility": 1.0859206914901733, "step": 1892 }, { "epoch": 1.7642124883504193, "grad_norm": 1.3707362114892345, "learning_rate": 9.154297549188816e-06, "loss": 1.5517, "step": 1893 }, { "epoch": 1.7642124883504193, "loss_reasoning": 0.44085925817489624, "loss_utility": 1.5540618896484375, "step": 1893 }, { "epoch": 1.7651444547996271, "grad_norm": 1.283615177876106, "learning_rate": 9.147393855712808e-06, "loss": 1.9973, "step": 1894 }, { "epoch": 1.7651444547996271, "loss_reasoning": 0.516065239906311, "loss_utility": 0.9748028516769409, "step": 1894 }, { "epoch": 1.766076421248835, "grad_norm": 1.3087222834488734, "learning_rate": 9.140490162236797e-06, "loss": 1.96, "step": 1895 }, { "epoch": 1.766076421248835, "loss_reasoning": 0.4176139831542969, "loss_utility": 1.522749423980713, "step": 1895 }, { "epoch": 1.767008387698043, "grad_norm": 1.5335146502660766, "learning_rate": 9.133586468760788e-06, "loss": 1.7825, "step": 1896 }, { "epoch": 1.767008387698043, "loss_reasoning": 0.4092109501361847, "loss_utility": 0.9381125569343567, "step": 1896 }, { "epoch": 1.7679403541472507, "grad_norm": 1.3921398913848493, "learning_rate": 9.126682775284779e-06, "loss": 1.8901, "step": 1897 }, { "epoch": 1.7679403541472507, "loss_reasoning": 0.5285532474517822, "loss_utility": 1.9510726928710938, "step": 1897 }, { "epoch": 1.7688723205964585, "grad_norm": 1.289139516254945, "learning_rate": 9.119779081808768e-06, "loss": 2.1419, "step": 1898 }, { "epoch": 1.7688723205964585, "loss_reasoning": 0.44349485635757446, "loss_utility": 1.3442120552062988, "step": 1898 }, { "epoch": 1.7698042870456665, "grad_norm": 1.1387611985476287, "learning_rate": 9.11287538833276e-06, "loss": 1.7552, "step": 1899 }, { "epoch": 1.7698042870456665, "loss_reasoning": 0.4738556742668152, "loss_utility": 0.9712603092193604, "step": 1899 }, { "epoch": 1.7707362534948743, "grad_norm": 1.367157135663649, "learning_rate": 9.105971694856749e-06, "loss": 1.6766, "step": 1900 }, { "epoch": 1.7707362534948743, "loss_reasoning": 0.4635477364063263, "loss_utility": 1.253631591796875, "step": 1900 }, { "epoch": 1.771668219944082, "grad_norm": 1.0920374883205932, "learning_rate": 9.09906800138074e-06, "loss": 1.6341, "step": 1901 }, { "epoch": 1.771668219944082, "loss_reasoning": 0.4681117534637451, "loss_utility": 0.6088799834251404, "step": 1901 }, { "epoch": 1.7726001863932899, "grad_norm": 0.9604753133936104, "learning_rate": 9.092164307904729e-06, "loss": 1.1824, "step": 1902 }, { "epoch": 1.7726001863932899, "loss_reasoning": 0.4419727921485901, "loss_utility": 1.7327202558517456, "step": 1902 }, { "epoch": 1.7735321528424977, "grad_norm": 1.1732850632816507, "learning_rate": 9.08526061442872e-06, "loss": 1.7643, "step": 1903 }, { "epoch": 1.7735321528424977, "loss_reasoning": 0.5260893106460571, "loss_utility": 1.2127501964569092, "step": 1903 }, { "epoch": 1.7744641192917054, "grad_norm": 1.1988531074930617, "learning_rate": 9.07835692095271e-06, "loss": 1.6654, "step": 1904 }, { "epoch": 1.7744641192917054, "loss_reasoning": 0.4676581919193268, "loss_utility": 1.6756634712219238, "step": 1904 }, { "epoch": 1.7753960857409132, "grad_norm": 1.2662923754424145, "learning_rate": 9.071453227476701e-06, "loss": 1.9924, "step": 1905 }, { "epoch": 1.7753960857409132, "loss_reasoning": 0.5013000965118408, "loss_utility": 1.0968899726867676, "step": 1905 }, { "epoch": 1.776328052190121, "grad_norm": 1.2028429396788956, "learning_rate": 9.06454953400069e-06, "loss": 1.5592, "step": 1906 }, { "epoch": 1.776328052190121, "loss_reasoning": 0.4969666004180908, "loss_utility": 1.6409149169921875, "step": 1906 }, { "epoch": 1.777260018639329, "grad_norm": 1.0555276924383192, "learning_rate": 9.05764584052468e-06, "loss": 1.7729, "step": 1907 }, { "epoch": 1.777260018639329, "loss_reasoning": 0.4840881824493408, "loss_utility": 1.7404533624649048, "step": 1907 }, { "epoch": 1.7781919850885368, "grad_norm": 1.0088908519945188, "learning_rate": 9.050742147048671e-06, "loss": 1.8002, "step": 1908 }, { "epoch": 1.7781919850885368, "loss_reasoning": 0.5102543830871582, "loss_utility": 1.421337604522705, "step": 1908 }, { "epoch": 1.7791239515377446, "grad_norm": 1.2087414649387065, "learning_rate": 9.043838453572662e-06, "loss": 1.9097, "step": 1909 }, { "epoch": 1.7791239515377446, "loss_reasoning": 0.49962347745895386, "loss_utility": 1.7473084926605225, "step": 1909 }, { "epoch": 1.7800559179869526, "grad_norm": 1.2654853747111907, "learning_rate": 9.036934760096653e-06, "loss": 1.8917, "step": 1910 }, { "epoch": 1.7800559179869526, "loss_reasoning": 0.5322952270507812, "loss_utility": 1.4966113567352295, "step": 1910 }, { "epoch": 1.7809878844361604, "grad_norm": 1.1525855214754306, "learning_rate": 9.030031066620642e-06, "loss": 1.9949, "step": 1911 }, { "epoch": 1.7809878844361604, "loss_reasoning": 0.4401395320892334, "loss_utility": 0.9922259449958801, "step": 1911 }, { "epoch": 1.7819198508853682, "grad_norm": 1.0039121765007126, "learning_rate": 9.023127373144633e-06, "loss": 1.3177, "step": 1912 }, { "epoch": 1.7819198508853682, "loss_reasoning": 0.5012624263763428, "loss_utility": 1.5354313850402832, "step": 1912 }, { "epoch": 1.782851817334576, "grad_norm": 1.0157868975145168, "learning_rate": 9.016223679668623e-06, "loss": 1.5419, "step": 1913 }, { "epoch": 1.782851817334576, "loss_reasoning": 0.5385214686393738, "loss_utility": 1.2787729501724243, "step": 1913 }, { "epoch": 1.7837837837837838, "grad_norm": 1.2176200958712742, "learning_rate": 9.009319986192614e-06, "loss": 1.4649, "step": 1914 }, { "epoch": 1.7837837837837838, "loss_reasoning": 0.5079900622367859, "loss_utility": 1.1722333431243896, "step": 1914 }, { "epoch": 1.7847157502329916, "grad_norm": 1.3260881503608695, "learning_rate": 9.002416292716603e-06, "loss": 1.6809, "step": 1915 }, { "epoch": 1.7847157502329916, "loss_reasoning": 0.5081678628921509, "loss_utility": 0.8227671384811401, "step": 1915 }, { "epoch": 1.7856477166821993, "grad_norm": 1.5114953212843896, "learning_rate": 8.995512599240594e-06, "loss": 1.7457, "step": 1916 }, { "epoch": 1.7856477166821993, "loss_reasoning": 0.5211949944496155, "loss_utility": 1.5578725337982178, "step": 1916 }, { "epoch": 1.7865796831314071, "grad_norm": 1.219433287435646, "learning_rate": 8.988608905764585e-06, "loss": 1.6143, "step": 1917 }, { "epoch": 1.7865796831314071, "loss_reasoning": 0.5338155627250671, "loss_utility": 1.7619999647140503, "step": 1917 }, { "epoch": 1.7875116495806151, "grad_norm": 1.3941760564272303, "learning_rate": 8.981705212288575e-06, "loss": 1.9681, "step": 1918 }, { "epoch": 1.7875116495806151, "loss_reasoning": 0.49493488669395447, "loss_utility": 1.2178328037261963, "step": 1918 }, { "epoch": 1.788443616029823, "grad_norm": 1.3003024202572124, "learning_rate": 8.974801518812566e-06, "loss": 1.6666, "step": 1919 }, { "epoch": 1.788443616029823, "loss_reasoning": 0.4937789738178253, "loss_utility": 1.132367730140686, "step": 1919 }, { "epoch": 1.7893755824790307, "grad_norm": 1.0815226554175985, "learning_rate": 8.967897825336555e-06, "loss": 1.5636, "step": 1920 }, { "epoch": 1.7893755824790307, "loss_reasoning": 0.49425050616264343, "loss_utility": 0.2762766182422638, "step": 1920 }, { "epoch": 1.7903075489282387, "grad_norm": 1.2667188380363945, "learning_rate": 8.960994131860546e-06, "loss": 1.5196, "step": 1921 }, { "epoch": 1.7903075489282387, "loss_reasoning": 0.44485005736351013, "loss_utility": 1.3760061264038086, "step": 1921 }, { "epoch": 1.7912395153774465, "grad_norm": 1.2636348692164918, "learning_rate": 8.954090438384537e-06, "loss": 1.6209, "step": 1922 }, { "epoch": 1.7912395153774465, "loss_reasoning": 0.49020063877105713, "loss_utility": 1.5755616426467896, "step": 1922 }, { "epoch": 1.7921714818266543, "grad_norm": 1.2181476258068658, "learning_rate": 8.947186744908527e-06, "loss": 1.7576, "step": 1923 }, { "epoch": 1.7921714818266543, "loss_reasoning": 0.4433548152446747, "loss_utility": 1.0780155658721924, "step": 1923 }, { "epoch": 1.793103448275862, "grad_norm": 1.1428522982190803, "learning_rate": 8.940283051432516e-06, "loss": 1.5502, "step": 1924 }, { "epoch": 1.793103448275862, "loss_reasoning": 0.45872628688812256, "loss_utility": 2.3164923191070557, "step": 1924 }, { "epoch": 1.7940354147250699, "grad_norm": 1.465265459428371, "learning_rate": 8.933379357956507e-06, "loss": 2.1209, "step": 1925 }, { "epoch": 1.7940354147250699, "loss_reasoning": 0.48785120248794556, "loss_utility": 1.1434264183044434, "step": 1925 }, { "epoch": 1.7949673811742777, "grad_norm": 1.048541728374822, "learning_rate": 8.926475664480498e-06, "loss": 1.4676, "step": 1926 }, { "epoch": 1.7949673811742777, "loss_reasoning": 0.4776042699813843, "loss_utility": 1.1838268041610718, "step": 1926 }, { "epoch": 1.7958993476234855, "grad_norm": 1.1540393659387804, "learning_rate": 8.919571971004489e-06, "loss": 1.6663, "step": 1927 }, { "epoch": 1.7958993476234855, "loss_reasoning": 0.48254451155662537, "loss_utility": 1.195828914642334, "step": 1927 }, { "epoch": 1.7968313140726933, "grad_norm": 1.0333410342144411, "learning_rate": 8.91266827752848e-06, "loss": 1.6013, "step": 1928 }, { "epoch": 1.7968313140726933, "loss_reasoning": 0.4081447720527649, "loss_utility": 0.811646044254303, "step": 1928 }, { "epoch": 1.7977632805219013, "grad_norm": 1.1496129517530587, "learning_rate": 8.905764584052468e-06, "loss": 1.8377, "step": 1929 }, { "epoch": 1.7977632805219013, "loss_reasoning": 0.5616958141326904, "loss_utility": 1.0821224451065063, "step": 1929 }, { "epoch": 1.798695246971109, "grad_norm": 1.0912104366654596, "learning_rate": 8.898860890576459e-06, "loss": 1.8598, "step": 1930 }, { "epoch": 1.798695246971109, "loss_reasoning": 0.5015807151794434, "loss_utility": 1.2515406608581543, "step": 1930 }, { "epoch": 1.7996272134203168, "grad_norm": 1.110044722419523, "learning_rate": 8.89195719710045e-06, "loss": 1.6997, "step": 1931 }, { "epoch": 1.7996272134203168, "loss_reasoning": 0.48134031891822815, "loss_utility": 0.41440513730049133, "step": 1931 }, { "epoch": 1.8005591798695249, "grad_norm": 1.043442555451199, "learning_rate": 8.88505350362444e-06, "loss": 1.6074, "step": 1932 }, { "epoch": 1.8005591798695249, "loss_reasoning": 0.4743509292602539, "loss_utility": 1.45964515209198, "step": 1932 }, { "epoch": 1.8014911463187326, "grad_norm": 1.265642622729561, "learning_rate": 8.87814981014843e-06, "loss": 1.7838, "step": 1933 }, { "epoch": 1.8014911463187326, "loss_reasoning": 0.44932425022125244, "loss_utility": 1.1098887920379639, "step": 1933 }, { "epoch": 1.8024231127679404, "grad_norm": 1.1911945171050513, "learning_rate": 8.87124611667242e-06, "loss": 1.7776, "step": 1934 }, { "epoch": 1.8024231127679404, "loss_reasoning": 0.5624804496765137, "loss_utility": 1.2922773361206055, "step": 1934 }, { "epoch": 1.8033550792171482, "grad_norm": 1.4529769127426686, "learning_rate": 8.864342423196411e-06, "loss": 1.5825, "step": 1935 }, { "epoch": 1.8033550792171482, "loss_reasoning": 0.5456920266151428, "loss_utility": 0.9211941957473755, "step": 1935 }, { "epoch": 1.804287045666356, "grad_norm": 1.1139571912558335, "learning_rate": 8.857438729720402e-06, "loss": 1.7768, "step": 1936 }, { "epoch": 1.804287045666356, "loss_reasoning": 0.5367616415023804, "loss_utility": 1.3662725687026978, "step": 1936 }, { "epoch": 1.8052190121155638, "grad_norm": 1.2632736988528952, "learning_rate": 8.850535036244391e-06, "loss": 1.7493, "step": 1937 }, { "epoch": 1.8052190121155638, "loss_reasoning": 0.43399778008461, "loss_utility": 0.9624567031860352, "step": 1937 }, { "epoch": 1.8061509785647716, "grad_norm": 1.0632289046111276, "learning_rate": 8.843631342768382e-06, "loss": 1.4599, "step": 1938 }, { "epoch": 1.8061509785647716, "loss_reasoning": 0.49408870935440063, "loss_utility": 0.46931350231170654, "step": 1938 }, { "epoch": 1.8070829450139794, "grad_norm": 1.241016180943228, "learning_rate": 8.836727649292372e-06, "loss": 1.3986, "step": 1939 }, { "epoch": 1.8070829450139794, "loss_reasoning": 0.48265135288238525, "loss_utility": 1.6987082958221436, "step": 1939 }, { "epoch": 1.8080149114631874, "grad_norm": 1.3152835993161378, "learning_rate": 8.829823955816363e-06, "loss": 2.0282, "step": 1940 }, { "epoch": 1.8080149114631874, "loss_reasoning": 0.49386322498321533, "loss_utility": 1.1694536209106445, "step": 1940 }, { "epoch": 1.8089468779123952, "grad_norm": 1.581478680129676, "learning_rate": 8.822920262340354e-06, "loss": 1.9017, "step": 1941 }, { "epoch": 1.8089468779123952, "loss_reasoning": 0.5481325387954712, "loss_utility": 1.3698129653930664, "step": 1941 }, { "epoch": 1.809878844361603, "grad_norm": 1.055080977094516, "learning_rate": 8.816016568864343e-06, "loss": 1.676, "step": 1942 }, { "epoch": 1.809878844361603, "loss_reasoning": 0.4517761170864105, "loss_utility": 0.6848657131195068, "step": 1942 }, { "epoch": 1.810810810810811, "grad_norm": 1.2392197958190003, "learning_rate": 8.809112875388334e-06, "loss": 1.5381, "step": 1943 }, { "epoch": 1.810810810810811, "loss_reasoning": 0.5147702693939209, "loss_utility": 1.0122374296188354, "step": 1943 }, { "epoch": 1.8117427772600188, "grad_norm": 1.0678609971053188, "learning_rate": 8.802209181912323e-06, "loss": 1.7135, "step": 1944 }, { "epoch": 1.8117427772600188, "loss_reasoning": 0.5468922853469849, "loss_utility": 2.5733284950256348, "step": 1944 }, { "epoch": 1.8126747437092265, "grad_norm": 1.306255417071724, "learning_rate": 8.795305488436315e-06, "loss": 1.966, "step": 1945 }, { "epoch": 1.8126747437092265, "loss_reasoning": 0.4908186197280884, "loss_utility": 0.7820706367492676, "step": 1945 }, { "epoch": 1.8136067101584343, "grad_norm": 1.1435137603601824, "learning_rate": 8.788401794960304e-06, "loss": 1.5619, "step": 1946 }, { "epoch": 1.8136067101584343, "loss_reasoning": 0.44361525774002075, "loss_utility": 1.47117018699646, "step": 1946 }, { "epoch": 1.8145386766076421, "grad_norm": 1.2456830389452862, "learning_rate": 8.781498101484295e-06, "loss": 1.7607, "step": 1947 }, { "epoch": 1.8145386766076421, "loss_reasoning": 0.47217655181884766, "loss_utility": 1.4532082080841064, "step": 1947 }, { "epoch": 1.81547064305685, "grad_norm": 1.7400658449025623, "learning_rate": 8.774594408008286e-06, "loss": 1.8396, "step": 1948 }, { "epoch": 1.81547064305685, "loss_reasoning": 0.47716888785362244, "loss_utility": 1.016968846321106, "step": 1948 }, { "epoch": 1.8164026095060577, "grad_norm": 1.0238316276253634, "learning_rate": 8.767690714532275e-06, "loss": 1.5378, "step": 1949 }, { "epoch": 1.8164026095060577, "loss_reasoning": 0.4918558597564697, "loss_utility": 0.6958029270172119, "step": 1949 }, { "epoch": 1.8173345759552655, "grad_norm": 1.09104910641385, "learning_rate": 8.760787021056267e-06, "loss": 1.4849, "step": 1950 }, { "epoch": 1.8173345759552655, "loss_reasoning": 0.4550497531890869, "loss_utility": 0.4531525671482086, "step": 1950 }, { "epoch": 1.8182665424044733, "grad_norm": 1.1520227496426412, "learning_rate": 8.753883327580256e-06, "loss": 1.5277, "step": 1951 }, { "epoch": 1.8182665424044733, "loss_reasoning": 0.5301910042762756, "loss_utility": 0.8914298415184021, "step": 1951 }, { "epoch": 1.8191985088536813, "grad_norm": 1.204599103029003, "learning_rate": 8.746979634104247e-06, "loss": 1.535, "step": 1952 }, { "epoch": 1.8191985088536813, "loss_reasoning": 0.49337679147720337, "loss_utility": 1.0500789880752563, "step": 1952 }, { "epoch": 1.820130475302889, "grad_norm": 1.525280999574343, "learning_rate": 8.740075940628236e-06, "loss": 1.6108, "step": 1953 }, { "epoch": 1.820130475302889, "loss_reasoning": 0.4540018141269684, "loss_utility": 2.0136570930480957, "step": 1953 }, { "epoch": 1.821062441752097, "grad_norm": 1.4161606634875628, "learning_rate": 8.733172247152227e-06, "loss": 1.8319, "step": 1954 }, { "epoch": 1.821062441752097, "loss_reasoning": 0.4828234016895294, "loss_utility": 1.0934157371520996, "step": 1954 }, { "epoch": 1.8219944082013049, "grad_norm": 1.1049354020885715, "learning_rate": 8.726268553676217e-06, "loss": 1.4769, "step": 1955 }, { "epoch": 1.8219944082013049, "loss_reasoning": 0.49474823474884033, "loss_utility": 0.9031672477722168, "step": 1955 }, { "epoch": 1.8229263746505127, "grad_norm": 1.3944776440301072, "learning_rate": 8.719364860200208e-06, "loss": 1.7443, "step": 1956 }, { "epoch": 1.8229263746505127, "loss_reasoning": 0.4870549142360687, "loss_utility": 1.3850455284118652, "step": 1956 }, { "epoch": 1.8238583410997204, "grad_norm": 1.2004915061878534, "learning_rate": 8.712461166724197e-06, "loss": 2.0453, "step": 1957 }, { "epoch": 1.8238583410997204, "loss_reasoning": 0.4918653070926666, "loss_utility": 0.996599555015564, "step": 1957 }, { "epoch": 1.8247903075489282, "grad_norm": 1.513660520726279, "learning_rate": 8.705557473248188e-06, "loss": 1.7404, "step": 1958 }, { "epoch": 1.8247903075489282, "loss_reasoning": 0.4763164520263672, "loss_utility": 1.51383376121521, "step": 1958 }, { "epoch": 1.825722273998136, "grad_norm": 1.2569137554099472, "learning_rate": 8.698653779772179e-06, "loss": 2.0432, "step": 1959 }, { "epoch": 1.825722273998136, "loss_reasoning": 0.5528664588928223, "loss_utility": 1.2904798984527588, "step": 1959 }, { "epoch": 1.8266542404473438, "grad_norm": 1.2337586516547154, "learning_rate": 8.69175008629617e-06, "loss": 2.0403, "step": 1960 }, { "epoch": 1.8266542404473438, "loss_reasoning": 0.4702712595462799, "loss_utility": 0.9146637916564941, "step": 1960 }, { "epoch": 1.8275862068965516, "grad_norm": 1.0146528220723314, "learning_rate": 8.68484639282016e-06, "loss": 1.6379, "step": 1961 }, { "epoch": 1.8275862068965516, "loss_reasoning": 0.49014636874198914, "loss_utility": 1.2614803314208984, "step": 1961 }, { "epoch": 1.8285181733457594, "grad_norm": 1.2041509113908675, "learning_rate": 8.677942699344149e-06, "loss": 2.0078, "step": 1962 }, { "epoch": 1.8285181733457594, "loss_reasoning": 0.45112311840057373, "loss_utility": 1.544705867767334, "step": 1962 }, { "epoch": 1.8294501397949674, "grad_norm": 1.3223630869249938, "learning_rate": 8.67103900586814e-06, "loss": 2.0474, "step": 1963 }, { "epoch": 1.8294501397949674, "loss_reasoning": 0.5400568842887878, "loss_utility": 0.7191222906112671, "step": 1963 }, { "epoch": 1.8303821062441752, "grad_norm": 1.1548008980483537, "learning_rate": 8.66413531239213e-06, "loss": 1.5364, "step": 1964 }, { "epoch": 1.8303821062441752, "loss_reasoning": 0.5225916504859924, "loss_utility": 1.464956283569336, "step": 1964 }, { "epoch": 1.8313140726933832, "grad_norm": 1.0699073672913577, "learning_rate": 8.657231618916121e-06, "loss": 1.8217, "step": 1965 }, { "epoch": 1.8313140726933832, "loss_reasoning": 0.4207400679588318, "loss_utility": 0.8544393181800842, "step": 1965 }, { "epoch": 1.832246039142591, "grad_norm": 1.213851705847965, "learning_rate": 8.65032792544011e-06, "loss": 1.4249, "step": 1966 }, { "epoch": 1.832246039142591, "loss_reasoning": 0.5030989050865173, "loss_utility": 1.1901386976242065, "step": 1966 }, { "epoch": 1.8331780055917988, "grad_norm": 1.135111986281968, "learning_rate": 8.643424231964101e-06, "loss": 1.8433, "step": 1967 }, { "epoch": 1.8331780055917988, "loss_reasoning": 0.47972381114959717, "loss_utility": 1.1403727531433105, "step": 1967 }, { "epoch": 1.8341099720410066, "grad_norm": 1.3396639721097674, "learning_rate": 8.636520538488092e-06, "loss": 1.8314, "step": 1968 }, { "epoch": 1.8341099720410066, "loss_reasoning": 0.48443061113357544, "loss_utility": 1.7387919425964355, "step": 1968 }, { "epoch": 1.8350419384902144, "grad_norm": 1.1571219293472021, "learning_rate": 8.629616845012082e-06, "loss": 1.9846, "step": 1969 }, { "epoch": 1.8350419384902144, "loss_reasoning": 0.472339391708374, "loss_utility": 1.112762212753296, "step": 1969 }, { "epoch": 1.8359739049394221, "grad_norm": 1.1433443763548345, "learning_rate": 8.622713151536073e-06, "loss": 1.6101, "step": 1970 }, { "epoch": 1.8359739049394221, "loss_reasoning": 0.5182310342788696, "loss_utility": 0.8736681342124939, "step": 1970 }, { "epoch": 1.83690587138863, "grad_norm": 1.1175879433827964, "learning_rate": 8.615809458060062e-06, "loss": 1.6404, "step": 1971 }, { "epoch": 1.83690587138863, "loss_reasoning": 0.5914585590362549, "loss_utility": 1.0174583196640015, "step": 1971 }, { "epoch": 1.8378378378378377, "grad_norm": 1.2742953485559552, "learning_rate": 8.608905764584053e-06, "loss": 1.6538, "step": 1972 }, { "epoch": 1.8378378378378377, "loss_reasoning": 0.49509045481681824, "loss_utility": 1.1399166584014893, "step": 1972 }, { "epoch": 1.8387698042870455, "grad_norm": 1.1900773360032177, "learning_rate": 8.602002071108044e-06, "loss": 1.7339, "step": 1973 }, { "epoch": 1.8387698042870455, "loss_reasoning": 0.566870391368866, "loss_utility": 1.8711512088775635, "step": 1973 }, { "epoch": 1.8397017707362535, "grad_norm": 1.3657677766703284, "learning_rate": 8.595098377632034e-06, "loss": 1.8385, "step": 1974 }, { "epoch": 1.8397017707362535, "loss_reasoning": 0.5312892198562622, "loss_utility": 1.302822470664978, "step": 1974 }, { "epoch": 1.8406337371854613, "grad_norm": 1.2989404053799496, "learning_rate": 8.588194684156023e-06, "loss": 1.6583, "step": 1975 }, { "epoch": 1.8406337371854613, "loss_reasoning": 0.4854894280433655, "loss_utility": 1.2487775087356567, "step": 1975 }, { "epoch": 1.8415657036346693, "grad_norm": 1.1408785216397883, "learning_rate": 8.581290990680014e-06, "loss": 1.4043, "step": 1976 }, { "epoch": 1.8415657036346693, "loss_reasoning": 0.49827536940574646, "loss_utility": 0.9817308187484741, "step": 1976 }, { "epoch": 1.842497670083877, "grad_norm": 1.1557274656571452, "learning_rate": 8.574387297204005e-06, "loss": 1.6456, "step": 1977 }, { "epoch": 1.842497670083877, "loss_reasoning": 0.5228250622749329, "loss_utility": 1.1893270015716553, "step": 1977 }, { "epoch": 1.843429636533085, "grad_norm": 1.1598424629185164, "learning_rate": 8.567483603727996e-06, "loss": 1.6451, "step": 1978 }, { "epoch": 1.843429636533085, "loss_reasoning": 0.49781477451324463, "loss_utility": 1.3582744598388672, "step": 1978 }, { "epoch": 1.8443616029822927, "grad_norm": 1.412881819493065, "learning_rate": 8.560579910251986e-06, "loss": 1.7673, "step": 1979 }, { "epoch": 1.8443616029822927, "loss_reasoning": 0.42976969480514526, "loss_utility": 1.7332603931427002, "step": 1979 }, { "epoch": 1.8452935694315005, "grad_norm": 1.1000643321382066, "learning_rate": 8.553676216775975e-06, "loss": 1.6635, "step": 1980 }, { "epoch": 1.8452935694315005, "loss_reasoning": 0.4532811641693115, "loss_utility": 1.2512000799179077, "step": 1980 }, { "epoch": 1.8462255358807083, "grad_norm": 1.2422511211508107, "learning_rate": 8.546772523299966e-06, "loss": 1.8212, "step": 1981 }, { "epoch": 1.8462255358807083, "loss_reasoning": 0.47642338275909424, "loss_utility": 0.8874200582504272, "step": 1981 }, { "epoch": 1.847157502329916, "grad_norm": 1.1216847113406578, "learning_rate": 8.539868829823957e-06, "loss": 1.6314, "step": 1982 }, { "epoch": 1.847157502329916, "loss_reasoning": 0.477393239736557, "loss_utility": 1.3043036460876465, "step": 1982 }, { "epoch": 1.8480894687791238, "grad_norm": 1.2175132609374342, "learning_rate": 8.532965136347948e-06, "loss": 1.6969, "step": 1983 }, { "epoch": 1.8480894687791238, "loss_reasoning": 0.4944772720336914, "loss_utility": 1.2073724269866943, "step": 1983 }, { "epoch": 1.8490214352283316, "grad_norm": 1.3094388927874856, "learning_rate": 8.526061442871937e-06, "loss": 1.6069, "step": 1984 }, { "epoch": 1.8490214352283316, "loss_reasoning": 0.49973487854003906, "loss_utility": 1.1464288234710693, "step": 1984 }, { "epoch": 1.8499534016775396, "grad_norm": 1.2277598533609149, "learning_rate": 8.519157749395927e-06, "loss": 1.451, "step": 1985 }, { "epoch": 1.8499534016775396, "loss_reasoning": 0.5548176169395447, "loss_utility": 1.4404053688049316, "step": 1985 }, { "epoch": 1.8508853681267474, "grad_norm": 1.4069318307164709, "learning_rate": 8.512254055919918e-06, "loss": 1.7234, "step": 1986 }, { "epoch": 1.8508853681267474, "loss_reasoning": 0.4445571303367615, "loss_utility": 0.9506056904792786, "step": 1986 }, { "epoch": 1.8518173345759554, "grad_norm": 1.0987388583124822, "learning_rate": 8.505350362443909e-06, "loss": 1.5822, "step": 1987 }, { "epoch": 1.8518173345759554, "loss_reasoning": 0.5217059850692749, "loss_utility": 1.375821828842163, "step": 1987 }, { "epoch": 1.8527493010251632, "grad_norm": 1.1877133335946128, "learning_rate": 8.4984466689679e-06, "loss": 1.5095, "step": 1988 }, { "epoch": 1.8527493010251632, "loss_reasoning": 0.5442067384719849, "loss_utility": 1.5605496168136597, "step": 1988 }, { "epoch": 1.853681267474371, "grad_norm": 1.1081009640694786, "learning_rate": 8.491542975491889e-06, "loss": 1.7803, "step": 1989 }, { "epoch": 1.853681267474371, "loss_reasoning": 0.49663567543029785, "loss_utility": 1.306501865386963, "step": 1989 }, { "epoch": 1.8546132339235788, "grad_norm": 1.3319032206340327, "learning_rate": 8.48463928201588e-06, "loss": 1.8382, "step": 1990 }, { "epoch": 1.8546132339235788, "loss_reasoning": 0.4565746784210205, "loss_utility": 1.5136699676513672, "step": 1990 }, { "epoch": 1.8555452003727866, "grad_norm": 1.0827037182430128, "learning_rate": 8.47773558853987e-06, "loss": 1.7118, "step": 1991 }, { "epoch": 1.8555452003727866, "loss_reasoning": 0.527300238609314, "loss_utility": 1.3039071559906006, "step": 1991 }, { "epoch": 1.8564771668219944, "grad_norm": 1.22111811767788, "learning_rate": 8.47083189506386e-06, "loss": 2.0945, "step": 1992 }, { "epoch": 1.8564771668219944, "loss_reasoning": 0.43613964319229126, "loss_utility": 1.5982789993286133, "step": 1992 }, { "epoch": 1.8574091332712022, "grad_norm": 1.293415043570198, "learning_rate": 8.46392820158785e-06, "loss": 1.9708, "step": 1993 }, { "epoch": 1.8574091332712022, "loss_reasoning": 0.48442262411117554, "loss_utility": 1.3333942890167236, "step": 1993 }, { "epoch": 1.85834109972041, "grad_norm": 1.1697488891262122, "learning_rate": 8.45702450811184e-06, "loss": 1.7774, "step": 1994 }, { "epoch": 1.85834109972041, "loss_reasoning": 0.5110911726951599, "loss_utility": 1.4071534872055054, "step": 1994 }, { "epoch": 1.8592730661696177, "grad_norm": 1.2827210094191834, "learning_rate": 8.45012081463583e-06, "loss": 1.798, "step": 1995 }, { "epoch": 1.8592730661696177, "loss_reasoning": 0.4677290618419647, "loss_utility": 1.1199363470077515, "step": 1995 }, { "epoch": 1.8602050326188257, "grad_norm": 1.1500599101123785, "learning_rate": 8.443217121159822e-06, "loss": 1.7255, "step": 1996 }, { "epoch": 1.8602050326188257, "loss_reasoning": 0.475294828414917, "loss_utility": 1.354026436805725, "step": 1996 }, { "epoch": 1.8611369990680335, "grad_norm": 1.1896348917784616, "learning_rate": 8.436313427683811e-06, "loss": 1.4996, "step": 1997 }, { "epoch": 1.8611369990680335, "loss_reasoning": 0.49535027146339417, "loss_utility": 1.2213616371154785, "step": 1997 }, { "epoch": 1.8620689655172413, "grad_norm": 1.247585844278387, "learning_rate": 8.429409734207802e-06, "loss": 1.6276, "step": 1998 }, { "epoch": 1.8620689655172413, "loss_reasoning": 0.4730224907398224, "loss_utility": 1.369384527206421, "step": 1998 }, { "epoch": 1.8630009319664493, "grad_norm": 1.181510679411698, "learning_rate": 8.422506040731793e-06, "loss": 1.9091, "step": 1999 }, { "epoch": 1.8630009319664493, "loss_reasoning": 0.4352494478225708, "loss_utility": 3.1003623008728027, "step": 1999 }, { "epoch": 1.8639328984156571, "grad_norm": 1.379582525936018, "learning_rate": 8.415602347255782e-06, "loss": 2.304, "step": 2000 }, { "epoch": 1.8639328984156571, "loss_reasoning": 0.5100979208946228, "loss_utility": 1.3658945560455322, "step": 2000 }, { "epoch": 1.864864864864865, "grad_norm": 1.3130963058253131, "learning_rate": 8.408698653779774e-06, "loss": 1.6358, "step": 2001 }, { "epoch": 1.864864864864865, "loss_reasoning": 0.5244476795196533, "loss_utility": 0.9838794469833374, "step": 2001 }, { "epoch": 1.8657968313140727, "grad_norm": 1.1960110500077736, "learning_rate": 8.401794960303763e-06, "loss": 1.8373, "step": 2002 }, { "epoch": 1.8657968313140727, "loss_reasoning": 0.4924951195716858, "loss_utility": 1.4394149780273438, "step": 2002 }, { "epoch": 1.8667287977632805, "grad_norm": 1.368514113711013, "learning_rate": 8.394891266827754e-06, "loss": 1.741, "step": 2003 }, { "epoch": 1.8667287977632805, "loss_reasoning": 0.5397895574569702, "loss_utility": 1.9199374914169312, "step": 2003 }, { "epoch": 1.8676607642124883, "grad_norm": 1.4969584761102857, "learning_rate": 8.387987573351743e-06, "loss": 2.034, "step": 2004 }, { "epoch": 1.8676607642124883, "loss_reasoning": 0.4990260601043701, "loss_utility": 0.8493919372558594, "step": 2004 }, { "epoch": 1.868592730661696, "grad_norm": 0.9123231594815784, "learning_rate": 8.381083879875734e-06, "loss": 1.4521, "step": 2005 }, { "epoch": 1.868592730661696, "loss_reasoning": 0.5734570026397705, "loss_utility": 1.459015130996704, "step": 2005 }, { "epoch": 1.8695246971109039, "grad_norm": 1.4467288319606413, "learning_rate": 8.374180186399724e-06, "loss": 1.9919, "step": 2006 }, { "epoch": 1.8695246971109039, "loss_reasoning": 0.4881105422973633, "loss_utility": 1.3846006393432617, "step": 2006 }, { "epoch": 1.8704566635601119, "grad_norm": 1.1842190972913025, "learning_rate": 8.367276492923715e-06, "loss": 1.7501, "step": 2007 }, { "epoch": 1.8704566635601119, "loss_reasoning": 0.5114502906799316, "loss_utility": 1.5641014575958252, "step": 2007 }, { "epoch": 1.8713886300093197, "grad_norm": 1.3826914988636918, "learning_rate": 8.360372799447706e-06, "loss": 1.9277, "step": 2008 }, { "epoch": 1.8713886300093197, "loss_reasoning": 0.5069124698638916, "loss_utility": 1.3730084896087646, "step": 2008 }, { "epoch": 1.8723205964585274, "grad_norm": 1.3542209178758675, "learning_rate": 8.353469105971695e-06, "loss": 1.9226, "step": 2009 }, { "epoch": 1.8723205964585274, "loss_reasoning": 0.5084777474403381, "loss_utility": 1.5714036226272583, "step": 2009 }, { "epoch": 1.8732525629077355, "grad_norm": 1.2072626307363254, "learning_rate": 8.346565412495686e-06, "loss": 1.6013, "step": 2010 }, { "epoch": 1.8732525629077355, "loss_reasoning": 0.42601630091667175, "loss_utility": 1.8513944149017334, "step": 2010 }, { "epoch": 1.8741845293569432, "grad_norm": 1.2700602528432574, "learning_rate": 8.339661719019676e-06, "loss": 1.6414, "step": 2011 }, { "epoch": 1.8741845293569432, "loss_reasoning": 0.43047890067100525, "loss_utility": 1.4448779821395874, "step": 2011 }, { "epoch": 1.875116495806151, "grad_norm": 1.3368399514029592, "learning_rate": 8.332758025543667e-06, "loss": 1.7318, "step": 2012 }, { "epoch": 1.875116495806151, "loss_reasoning": 0.5360170006752014, "loss_utility": 1.135664939880371, "step": 2012 }, { "epoch": 1.8760484622553588, "grad_norm": 0.9945943954051516, "learning_rate": 8.325854332067656e-06, "loss": 1.4823, "step": 2013 }, { "epoch": 1.8760484622553588, "loss_reasoning": 0.4794762134552002, "loss_utility": 0.8642305135726929, "step": 2013 }, { "epoch": 1.8769804287045666, "grad_norm": 1.2965041749011768, "learning_rate": 8.318950638591647e-06, "loss": 1.5811, "step": 2014 }, { "epoch": 1.8769804287045666, "loss_reasoning": 0.47374552488327026, "loss_utility": 0.7968896627426147, "step": 2014 }, { "epoch": 1.8779123951537744, "grad_norm": 1.1001235775207847, "learning_rate": 8.312046945115638e-06, "loss": 1.5016, "step": 2015 }, { "epoch": 1.8779123951537744, "loss_reasoning": 0.5294362306594849, "loss_utility": 0.9244770407676697, "step": 2015 }, { "epoch": 1.8788443616029822, "grad_norm": 1.031173690740194, "learning_rate": 8.305143251639628e-06, "loss": 1.4341, "step": 2016 }, { "epoch": 1.8788443616029822, "loss_reasoning": 0.4355055093765259, "loss_utility": 1.3777509927749634, "step": 2016 }, { "epoch": 1.87977632805219, "grad_norm": 1.4156103018603907, "learning_rate": 8.298239558163617e-06, "loss": 1.7941, "step": 2017 }, { "epoch": 1.87977632805219, "loss_reasoning": 0.47094231843948364, "loss_utility": 0.9381978511810303, "step": 2017 }, { "epoch": 1.880708294501398, "grad_norm": 1.1282589344948268, "learning_rate": 8.291335864687608e-06, "loss": 1.473, "step": 2018 }, { "epoch": 1.880708294501398, "loss_reasoning": 0.5083662271499634, "loss_utility": 1.3046162128448486, "step": 2018 }, { "epoch": 1.8816402609506058, "grad_norm": 1.3369754204828888, "learning_rate": 8.284432171211599e-06, "loss": 1.9491, "step": 2019 }, { "epoch": 1.8816402609506058, "loss_reasoning": 0.47756338119506836, "loss_utility": 0.8743101358413696, "step": 2019 }, { "epoch": 1.8825722273998136, "grad_norm": 1.1249328499105535, "learning_rate": 8.27752847773559e-06, "loss": 1.6705, "step": 2020 }, { "epoch": 1.8825722273998136, "loss_reasoning": 0.452289879322052, "loss_utility": 1.1317692995071411, "step": 2020 }, { "epoch": 1.8835041938490216, "grad_norm": 1.2573404983336383, "learning_rate": 8.27062478425958e-06, "loss": 1.8034, "step": 2021 }, { "epoch": 1.8835041938490216, "loss_reasoning": 0.4940444231033325, "loss_utility": 0.8498162031173706, "step": 2021 }, { "epoch": 1.8844361602982294, "grad_norm": 1.4498100296553775, "learning_rate": 8.26372109078357e-06, "loss": 1.7936, "step": 2022 }, { "epoch": 1.8844361602982294, "loss_reasoning": 0.516433596611023, "loss_utility": 0.8244739770889282, "step": 2022 }, { "epoch": 1.8853681267474371, "grad_norm": 1.3971194545095014, "learning_rate": 8.25681739730756e-06, "loss": 1.8516, "step": 2023 }, { "epoch": 1.8853681267474371, "loss_reasoning": 0.43641117215156555, "loss_utility": 1.6128156185150146, "step": 2023 }, { "epoch": 1.886300093196645, "grad_norm": 1.6780146302914278, "learning_rate": 8.24991370383155e-06, "loss": 1.8184, "step": 2024 }, { "epoch": 1.886300093196645, "loss_reasoning": 0.43894100189208984, "loss_utility": 0.9170196652412415, "step": 2024 }, { "epoch": 1.8872320596458527, "grad_norm": 1.2575203329637277, "learning_rate": 8.243010010355541e-06, "loss": 1.5431, "step": 2025 }, { "epoch": 1.8872320596458527, "loss_reasoning": 0.4709470570087433, "loss_utility": 1.3347110748291016, "step": 2025 }, { "epoch": 1.8881640260950605, "grad_norm": 1.1791994802463424, "learning_rate": 8.23610631687953e-06, "loss": 1.6875, "step": 2026 }, { "epoch": 1.8881640260950605, "loss_reasoning": 0.5237486362457275, "loss_utility": 1.3697487115859985, "step": 2026 }, { "epoch": 1.8890959925442683, "grad_norm": 1.2603664886571155, "learning_rate": 8.229202623403521e-06, "loss": 1.5082, "step": 2027 }, { "epoch": 1.8890959925442683, "loss_reasoning": 0.47907453775405884, "loss_utility": 1.4799915552139282, "step": 2027 }, { "epoch": 1.890027958993476, "grad_norm": 1.31728084652596, "learning_rate": 8.222298929927512e-06, "loss": 1.8963, "step": 2028 }, { "epoch": 1.890027958993476, "loss_reasoning": 0.5190674066543579, "loss_utility": 1.178466796875, "step": 2028 }, { "epoch": 1.890959925442684, "grad_norm": 1.0948708043345219, "learning_rate": 8.215395236451503e-06, "loss": 1.7344, "step": 2029 }, { "epoch": 1.890959925442684, "loss_reasoning": 0.527578592300415, "loss_utility": 1.2960007190704346, "step": 2029 }, { "epoch": 1.8918918918918919, "grad_norm": 1.128804419647351, "learning_rate": 8.208491542975493e-06, "loss": 1.5989, "step": 2030 }, { "epoch": 1.8918918918918919, "loss_reasoning": 0.46482813358306885, "loss_utility": 1.1797349452972412, "step": 2030 }, { "epoch": 1.8928238583410997, "grad_norm": 1.1533689363778885, "learning_rate": 8.201587849499482e-06, "loss": 1.6597, "step": 2031 }, { "epoch": 1.8928238583410997, "loss_reasoning": 0.5043432712554932, "loss_utility": 1.5039448738098145, "step": 2031 }, { "epoch": 1.8937558247903077, "grad_norm": 1.2180204678541373, "learning_rate": 8.194684156023473e-06, "loss": 1.6963, "step": 2032 }, { "epoch": 1.8937558247903077, "loss_reasoning": 0.47464197874069214, "loss_utility": 0.9807804822921753, "step": 2032 }, { "epoch": 1.8946877912395155, "grad_norm": 0.9654581752806861, "learning_rate": 8.187780462547464e-06, "loss": 1.2512, "step": 2033 }, { "epoch": 1.8946877912395155, "loss_reasoning": 0.47650784254074097, "loss_utility": 1.0838323831558228, "step": 2033 }, { "epoch": 1.8956197576887233, "grad_norm": 1.1786354900921627, "learning_rate": 8.180876769071455e-06, "loss": 1.4922, "step": 2034 }, { "epoch": 1.8956197576887233, "loss_reasoning": 0.5005524158477783, "loss_utility": 1.7512550354003906, "step": 2034 }, { "epoch": 1.896551724137931, "grad_norm": 1.2116466999260953, "learning_rate": 8.173973075595444e-06, "loss": 1.9358, "step": 2035 }, { "epoch": 1.896551724137931, "loss_reasoning": 0.516760528087616, "loss_utility": 1.5302250385284424, "step": 2035 }, { "epoch": 1.8974836905871388, "grad_norm": 1.352524341144101, "learning_rate": 8.167069382119434e-06, "loss": 1.7329, "step": 2036 }, { "epoch": 1.8974836905871388, "loss_reasoning": 0.4615558981895447, "loss_utility": 0.4507015347480774, "step": 2036 }, { "epoch": 1.8984156570363466, "grad_norm": 0.9975496872053404, "learning_rate": 8.160165688643425e-06, "loss": 1.38, "step": 2037 }, { "epoch": 1.8984156570363466, "loss_reasoning": 0.4976271390914917, "loss_utility": 1.1871044635772705, "step": 2037 }, { "epoch": 1.8993476234855544, "grad_norm": 1.2058636116594161, "learning_rate": 8.153261995167416e-06, "loss": 2.1116, "step": 2038 }, { "epoch": 1.8993476234855544, "loss_reasoning": 0.5678970813751221, "loss_utility": 1.3989384174346924, "step": 2038 }, { "epoch": 1.9002795899347622, "grad_norm": 1.3025320252433736, "learning_rate": 8.146358301691407e-06, "loss": 1.9363, "step": 2039 }, { "epoch": 1.9002795899347622, "loss_reasoning": 0.45440465211868286, "loss_utility": 0.5338732004165649, "step": 2039 }, { "epoch": 1.9012115563839702, "grad_norm": 1.1576298678706638, "learning_rate": 8.139454608215396e-06, "loss": 1.4472, "step": 2040 }, { "epoch": 1.9012115563839702, "loss_reasoning": 0.4948100447654724, "loss_utility": 1.1973483562469482, "step": 2040 }, { "epoch": 1.902143522833178, "grad_norm": 1.2688469565626874, "learning_rate": 8.132550914739386e-06, "loss": 1.8548, "step": 2041 }, { "epoch": 1.902143522833178, "loss_reasoning": 0.55795818567276, "loss_utility": 0.5986361503601074, "step": 2041 }, { "epoch": 1.9030754892823858, "grad_norm": 1.1885324448868293, "learning_rate": 8.125647221263377e-06, "loss": 1.8534, "step": 2042 }, { "epoch": 1.9030754892823858, "loss_reasoning": 0.5633201599121094, "loss_utility": 1.1719179153442383, "step": 2042 }, { "epoch": 1.9040074557315938, "grad_norm": 1.0481892234089072, "learning_rate": 8.118743527787368e-06, "loss": 1.4888, "step": 2043 }, { "epoch": 1.9040074557315938, "loss_reasoning": 0.5300619006156921, "loss_utility": 1.4884624481201172, "step": 2043 }, { "epoch": 1.9049394221808016, "grad_norm": 1.230041050261461, "learning_rate": 8.111839834311357e-06, "loss": 1.987, "step": 2044 }, { "epoch": 1.9049394221808016, "loss_reasoning": 0.47217780351638794, "loss_utility": 1.179131031036377, "step": 2044 }, { "epoch": 1.9058713886300094, "grad_norm": 1.3521155617100322, "learning_rate": 8.104936140835348e-06, "loss": 1.7893, "step": 2045 }, { "epoch": 1.9058713886300094, "loss_reasoning": 0.510735034942627, "loss_utility": 1.0362595319747925, "step": 2045 }, { "epoch": 1.9068033550792172, "grad_norm": 1.1926810000126482, "learning_rate": 8.098032447359337e-06, "loss": 1.722, "step": 2046 }, { "epoch": 1.9068033550792172, "loss_reasoning": 0.5108646154403687, "loss_utility": 1.1064159870147705, "step": 2046 }, { "epoch": 1.907735321528425, "grad_norm": 1.147697621958424, "learning_rate": 8.091128753883329e-06, "loss": 1.5829, "step": 2047 }, { "epoch": 1.907735321528425, "loss_reasoning": 0.5624079704284668, "loss_utility": 1.0419214963912964, "step": 2047 }, { "epoch": 1.9086672879776327, "grad_norm": 1.4507745528761355, "learning_rate": 8.084225060407318e-06, "loss": 1.8666, "step": 2048 }, { "epoch": 1.9086672879776327, "loss_reasoning": 0.46026694774627686, "loss_utility": 1.2174540758132935, "step": 2048 }, { "epoch": 1.9095992544268405, "grad_norm": 1.1485976671078053, "learning_rate": 8.077321366931309e-06, "loss": 1.8684, "step": 2049 }, { "epoch": 1.9095992544268405, "loss_reasoning": 0.4637131094932556, "loss_utility": 1.1092641353607178, "step": 2049 }, { "epoch": 1.9105312208760483, "grad_norm": 1.252938796660719, "learning_rate": 8.0704176734553e-06, "loss": 1.6637, "step": 2050 }, { "epoch": 1.9105312208760483, "loss_reasoning": 0.4772137999534607, "loss_utility": 1.360046148300171, "step": 2050 }, { "epoch": 1.9114631873252563, "grad_norm": 1.2219697688820699, "learning_rate": 8.063513979979289e-06, "loss": 1.8449, "step": 2051 }, { "epoch": 1.9114631873252563, "loss_reasoning": 0.4466732442378998, "loss_utility": 1.1992604732513428, "step": 2051 }, { "epoch": 1.9123951537744641, "grad_norm": 1.3295536564503707, "learning_rate": 8.056610286503281e-06, "loss": 1.6932, "step": 2052 }, { "epoch": 1.9123951537744641, "loss_reasoning": 0.5070199966430664, "loss_utility": 1.2560973167419434, "step": 2052 }, { "epoch": 1.913327120223672, "grad_norm": 1.3500506930409457, "learning_rate": 8.04970659302727e-06, "loss": 1.6453, "step": 2053 }, { "epoch": 1.913327120223672, "loss_reasoning": 0.5538057684898376, "loss_utility": 0.7496421337127686, "step": 2053 }, { "epoch": 1.91425908667288, "grad_norm": 1.1897175466408612, "learning_rate": 8.04280289955126e-06, "loss": 1.6477, "step": 2054 }, { "epoch": 1.91425908667288, "loss_reasoning": 0.4793500304222107, "loss_utility": 1.978391170501709, "step": 2054 }, { "epoch": 1.9151910531220877, "grad_norm": 1.1358076565259956, "learning_rate": 8.03589920607525e-06, "loss": 1.8721, "step": 2055 }, { "epoch": 1.9151910531220877, "loss_reasoning": 0.48457708954811096, "loss_utility": 1.507331371307373, "step": 2055 }, { "epoch": 1.9161230195712955, "grad_norm": 0.9603133914013644, "learning_rate": 8.02899551259924e-06, "loss": 1.5714, "step": 2056 }, { "epoch": 1.9161230195712955, "loss_reasoning": 0.4701691269874573, "loss_utility": 1.323815941810608, "step": 2056 }, { "epoch": 1.9170549860205033, "grad_norm": 1.2389395800653094, "learning_rate": 8.022091819123231e-06, "loss": 1.9747, "step": 2057 }, { "epoch": 1.9170549860205033, "loss_reasoning": 0.5344281196594238, "loss_utility": 1.454023838043213, "step": 2057 }, { "epoch": 1.917986952469711, "grad_norm": 1.161210778649028, "learning_rate": 8.015188125647222e-06, "loss": 1.995, "step": 2058 }, { "epoch": 1.917986952469711, "loss_reasoning": 0.5655463933944702, "loss_utility": 0.9357751607894897, "step": 2058 }, { "epoch": 1.9189189189189189, "grad_norm": 1.2215520649249025, "learning_rate": 8.008284432171213e-06, "loss": 1.8071, "step": 2059 }, { "epoch": 1.9189189189189189, "loss_reasoning": 0.5003251433372498, "loss_utility": 0.6486669182777405, "step": 2059 }, { "epoch": 1.9198508853681266, "grad_norm": 1.4578874518706533, "learning_rate": 8.001380738695202e-06, "loss": 1.6444, "step": 2060 }, { "epoch": 1.9198508853681266, "loss_reasoning": 0.5037456750869751, "loss_utility": 0.7473100423812866, "step": 2060 }, { "epoch": 1.9207828518173344, "grad_norm": 1.3885622181893524, "learning_rate": 7.994477045219193e-06, "loss": 1.8693, "step": 2061 }, { "epoch": 1.9207828518173344, "loss_reasoning": 0.4687501788139343, "loss_utility": 1.411841869354248, "step": 2061 }, { "epoch": 1.9217148182665424, "grad_norm": 1.002686960681185, "learning_rate": 7.987573351743183e-06, "loss": 1.7102, "step": 2062 }, { "epoch": 1.9217148182665424, "loss_reasoning": 0.4734174609184265, "loss_utility": 0.8241462707519531, "step": 2062 }, { "epoch": 1.9226467847157502, "grad_norm": 1.2671761037061242, "learning_rate": 7.980669658267174e-06, "loss": 1.8282, "step": 2063 }, { "epoch": 1.9226467847157502, "loss_reasoning": 0.493313729763031, "loss_utility": 1.136149525642395, "step": 2063 }, { "epoch": 1.923578751164958, "grad_norm": 1.0427027090880334, "learning_rate": 7.973765964791163e-06, "loss": 1.643, "step": 2064 }, { "epoch": 1.923578751164958, "loss_reasoning": 0.4877748191356659, "loss_utility": 1.226545810699463, "step": 2064 }, { "epoch": 1.924510717614166, "grad_norm": 1.3216087726616188, "learning_rate": 7.966862271315154e-06, "loss": 1.7197, "step": 2065 }, { "epoch": 1.924510717614166, "loss_reasoning": 0.5352862477302551, "loss_utility": 0.9169765114784241, "step": 2065 }, { "epoch": 1.9254426840633738, "grad_norm": 1.051784036852267, "learning_rate": 7.959958577839145e-06, "loss": 1.3358, "step": 2066 }, { "epoch": 1.9254426840633738, "loss_reasoning": 0.5114187598228455, "loss_utility": 0.9874136447906494, "step": 2066 }, { "epoch": 1.9263746505125816, "grad_norm": 1.3171969355777517, "learning_rate": 7.953054884363135e-06, "loss": 1.65, "step": 2067 }, { "epoch": 1.9263746505125816, "loss_reasoning": 0.5705252289772034, "loss_utility": 1.1458237171173096, "step": 2067 }, { "epoch": 1.9273066169617894, "grad_norm": 1.1589507002254693, "learning_rate": 7.946151190887124e-06, "loss": 1.7877, "step": 2068 }, { "epoch": 1.9273066169617894, "loss_reasoning": 0.503515899181366, "loss_utility": 2.017127513885498, "step": 2068 }, { "epoch": 1.9282385834109972, "grad_norm": 1.0145938173635658, "learning_rate": 7.939247497411115e-06, "loss": 1.5147, "step": 2069 }, { "epoch": 1.9282385834109972, "loss_reasoning": 0.4812929034233093, "loss_utility": 1.4014112949371338, "step": 2069 }, { "epoch": 1.929170549860205, "grad_norm": 1.0751833286497983, "learning_rate": 7.932343803935106e-06, "loss": 1.7889, "step": 2070 }, { "epoch": 1.929170549860205, "loss_reasoning": 0.5024557113647461, "loss_utility": 1.3364026546478271, "step": 2070 }, { "epoch": 1.9301025163094128, "grad_norm": 1.1280611291680136, "learning_rate": 7.925440110459096e-06, "loss": 1.7599, "step": 2071 }, { "epoch": 1.9301025163094128, "loss_reasoning": 0.4985908269882202, "loss_utility": 1.3256670236587524, "step": 2071 }, { "epoch": 1.9310344827586206, "grad_norm": 1.3515228077051642, "learning_rate": 7.918536416983087e-06, "loss": 1.7444, "step": 2072 }, { "epoch": 1.9310344827586206, "loss_reasoning": 0.4842062294483185, "loss_utility": 1.5324443578720093, "step": 2072 }, { "epoch": 1.9319664492078286, "grad_norm": 1.338706781477002, "learning_rate": 7.911632723507076e-06, "loss": 1.6395, "step": 2073 }, { "epoch": 1.9319664492078286, "loss_reasoning": 0.4901280403137207, "loss_utility": 1.4424172639846802, "step": 2073 }, { "epoch": 1.9328984156570364, "grad_norm": 1.2995772694868541, "learning_rate": 7.904729030031067e-06, "loss": 1.5457, "step": 2074 }, { "epoch": 1.9328984156570364, "loss_reasoning": 0.45592671632766724, "loss_utility": 0.9981111288070679, "step": 2074 }, { "epoch": 1.9338303821062441, "grad_norm": 1.1473268908444592, "learning_rate": 7.897825336555058e-06, "loss": 1.7272, "step": 2075 }, { "epoch": 1.9338303821062441, "loss_reasoning": 0.5188480019569397, "loss_utility": 0.8371078968048096, "step": 2075 }, { "epoch": 1.9347623485554521, "grad_norm": 1.2048385951235614, "learning_rate": 7.890921643079048e-06, "loss": 1.5593, "step": 2076 }, { "epoch": 1.9347623485554521, "loss_reasoning": 0.5266265869140625, "loss_utility": 1.4398293495178223, "step": 2076 }, { "epoch": 1.93569431500466, "grad_norm": 0.9732928474736146, "learning_rate": 7.884017949603038e-06, "loss": 1.4399, "step": 2077 }, { "epoch": 1.93569431500466, "loss_reasoning": 0.5461737513542175, "loss_utility": 1.4803417921066284, "step": 2077 }, { "epoch": 1.9366262814538677, "grad_norm": 1.0921578292136793, "learning_rate": 7.877114256127028e-06, "loss": 1.8098, "step": 2078 }, { "epoch": 1.9366262814538677, "loss_reasoning": 0.5055542588233948, "loss_utility": 0.3906170129776001, "step": 2078 }, { "epoch": 1.9375582479030755, "grad_norm": 1.2370280591312532, "learning_rate": 7.870210562651019e-06, "loss": 1.7252, "step": 2079 }, { "epoch": 1.9375582479030755, "loss_reasoning": 0.49772050976753235, "loss_utility": 0.9468809366226196, "step": 2079 }, { "epoch": 1.9384902143522833, "grad_norm": 1.1664597490265154, "learning_rate": 7.86330686917501e-06, "loss": 1.7317, "step": 2080 }, { "epoch": 1.9384902143522833, "loss_reasoning": 0.5083916187286377, "loss_utility": 1.3365520238876343, "step": 2080 }, { "epoch": 1.939422180801491, "grad_norm": 2.055309677650245, "learning_rate": 7.856403175699e-06, "loss": 1.7114, "step": 2081 }, { "epoch": 1.939422180801491, "loss_reasoning": 0.46484464406967163, "loss_utility": 0.8819279670715332, "step": 2081 }, { "epoch": 1.9403541472506989, "grad_norm": 1.0468504526015252, "learning_rate": 7.84949948222299e-06, "loss": 1.7233, "step": 2082 }, { "epoch": 1.9403541472506989, "loss_reasoning": 0.5420441031455994, "loss_utility": 1.3658701181411743, "step": 2082 }, { "epoch": 1.9412861136999067, "grad_norm": 1.2212922582532106, "learning_rate": 7.84259578874698e-06, "loss": 1.8273, "step": 2083 }, { "epoch": 1.9412861136999067, "loss_reasoning": 0.4858970046043396, "loss_utility": 1.1199299097061157, "step": 2083 }, { "epoch": 1.9422180801491147, "grad_norm": 1.121722359812092, "learning_rate": 7.835692095270971e-06, "loss": 1.4172, "step": 2084 }, { "epoch": 1.9422180801491147, "loss_reasoning": 0.5855814814567566, "loss_utility": 0.8600355982780457, "step": 2084 }, { "epoch": 1.9431500465983225, "grad_norm": 1.087101428012026, "learning_rate": 7.828788401794962e-06, "loss": 1.815, "step": 2085 }, { "epoch": 1.9431500465983225, "loss_reasoning": 0.4830821454524994, "loss_utility": 0.9120408296585083, "step": 2085 }, { "epoch": 1.9440820130475303, "grad_norm": 1.2253469665307763, "learning_rate": 7.82188470831895e-06, "loss": 1.5681, "step": 2086 }, { "epoch": 1.9440820130475303, "loss_reasoning": 0.47194379568099976, "loss_utility": 1.5333666801452637, "step": 2086 }, { "epoch": 1.9450139794967383, "grad_norm": 1.5287813185011643, "learning_rate": 7.814981014842941e-06, "loss": 2.1863, "step": 2087 }, { "epoch": 1.9450139794967383, "loss_reasoning": 0.4594530463218689, "loss_utility": 1.3828558921813965, "step": 2087 }, { "epoch": 1.945945945945946, "grad_norm": 1.141543082908782, "learning_rate": 7.808077321366932e-06, "loss": 1.8313, "step": 2088 }, { "epoch": 1.945945945945946, "loss_reasoning": 0.5113171339035034, "loss_utility": 1.4494165182113647, "step": 2088 }, { "epoch": 1.9468779123951538, "grad_norm": 1.2183937630344743, "learning_rate": 7.801173627890923e-06, "loss": 1.78, "step": 2089 }, { "epoch": 1.9468779123951538, "loss_reasoning": 0.5698955059051514, "loss_utility": 0.5928229093551636, "step": 2089 }, { "epoch": 1.9478098788443616, "grad_norm": 1.2149771371316633, "learning_rate": 7.794269934414914e-06, "loss": 1.6468, "step": 2090 }, { "epoch": 1.9478098788443616, "loss_reasoning": 0.445255845785141, "loss_utility": 0.5171786546707153, "step": 2090 }, { "epoch": 1.9487418452935694, "grad_norm": 0.9947993565492905, "learning_rate": 7.787366240938903e-06, "loss": 1.6732, "step": 2091 }, { "epoch": 1.9487418452935694, "loss_reasoning": 0.4834039807319641, "loss_utility": 1.1165566444396973, "step": 2091 }, { "epoch": 1.9496738117427772, "grad_norm": 1.091056210556366, "learning_rate": 7.780462547462893e-06, "loss": 1.5254, "step": 2092 }, { "epoch": 1.9496738117427772, "loss_reasoning": 0.49018657207489014, "loss_utility": 2.031582832336426, "step": 2092 }, { "epoch": 1.950605778191985, "grad_norm": 1.3112786492778672, "learning_rate": 7.773558853986884e-06, "loss": 1.9477, "step": 2093 }, { "epoch": 1.950605778191985, "loss_reasoning": 0.4522019028663635, "loss_utility": 1.0504231452941895, "step": 2093 }, { "epoch": 1.9515377446411928, "grad_norm": 1.0449387030936532, "learning_rate": 7.766655160510875e-06, "loss": 1.6828, "step": 2094 }, { "epoch": 1.9515377446411928, "loss_reasoning": 0.46270546317100525, "loss_utility": 1.0993417501449585, "step": 2094 }, { "epoch": 1.9524697110904008, "grad_norm": 1.1991834187855956, "learning_rate": 7.759751467034864e-06, "loss": 1.5669, "step": 2095 }, { "epoch": 1.9524697110904008, "loss_reasoning": 0.4905899167060852, "loss_utility": 1.2967777252197266, "step": 2095 }, { "epoch": 1.9534016775396086, "grad_norm": 1.1250061710796073, "learning_rate": 7.752847773558855e-06, "loss": 1.7213, "step": 2096 }, { "epoch": 1.9534016775396086, "loss_reasoning": 0.4967057406902313, "loss_utility": 1.1280347108840942, "step": 2096 }, { "epoch": 1.9543336439888164, "grad_norm": 0.9931532349843294, "learning_rate": 7.745944080082844e-06, "loss": 1.6372, "step": 2097 }, { "epoch": 1.9543336439888164, "loss_reasoning": 0.5056257843971252, "loss_utility": 1.3282760381698608, "step": 2097 }, { "epoch": 1.9552656104380244, "grad_norm": 1.2269823236301114, "learning_rate": 7.739040386606836e-06, "loss": 1.7902, "step": 2098 }, { "epoch": 1.9552656104380244, "loss_reasoning": 0.4881531298160553, "loss_utility": 1.3589249849319458, "step": 2098 }, { "epoch": 1.9561975768872322, "grad_norm": 1.0574300830799612, "learning_rate": 7.732136693130827e-06, "loss": 1.544, "step": 2099 }, { "epoch": 1.9561975768872322, "loss_reasoning": 0.4675259590148926, "loss_utility": 1.6014328002929688, "step": 2099 }, { "epoch": 1.95712954333644, "grad_norm": 1.1546335941715944, "learning_rate": 7.725232999654816e-06, "loss": 1.8271, "step": 2100 }, { "epoch": 1.95712954333644, "loss_reasoning": 0.4721565246582031, "loss_utility": 1.6847630739212036, "step": 2100 }, { "epoch": 1.9580615097856477, "grad_norm": 1.1580120478433877, "learning_rate": 7.718329306178807e-06, "loss": 1.8283, "step": 2101 }, { "epoch": 1.9580615097856477, "loss_reasoning": 0.5083716511726379, "loss_utility": 0.9052304029464722, "step": 2101 }, { "epoch": 1.9589934762348555, "grad_norm": 1.314892018358505, "learning_rate": 7.711425612702796e-06, "loss": 1.468, "step": 2102 }, { "epoch": 1.9589934762348555, "loss_reasoning": 0.45239219069480896, "loss_utility": 1.9129409790039062, "step": 2102 }, { "epoch": 1.9599254426840633, "grad_norm": 1.090567899423651, "learning_rate": 7.704521919226788e-06, "loss": 1.91, "step": 2103 }, { "epoch": 1.9599254426840633, "loss_reasoning": 0.48406288027763367, "loss_utility": 0.8581284284591675, "step": 2103 }, { "epoch": 1.9608574091332711, "grad_norm": 1.15412697527059, "learning_rate": 7.697618225750777e-06, "loss": 1.9069, "step": 2104 }, { "epoch": 1.9608574091332711, "loss_reasoning": 0.5026399493217468, "loss_utility": 0.9651056528091431, "step": 2104 }, { "epoch": 1.961789375582479, "grad_norm": 1.270908560168757, "learning_rate": 7.690714532274768e-06, "loss": 1.9328, "step": 2105 }, { "epoch": 1.961789375582479, "loss_reasoning": 0.4652945399284363, "loss_utility": 0.9499430656433105, "step": 2105 }, { "epoch": 1.962721342031687, "grad_norm": 1.1400737049160095, "learning_rate": 7.683810838798757e-06, "loss": 1.6148, "step": 2106 }, { "epoch": 1.962721342031687, "loss_reasoning": 0.5058283805847168, "loss_utility": 1.1257743835449219, "step": 2106 }, { "epoch": 1.9636533084808947, "grad_norm": 1.2375112894937124, "learning_rate": 7.676907145322748e-06, "loss": 1.6355, "step": 2107 }, { "epoch": 1.9636533084808947, "loss_reasoning": 0.5287209749221802, "loss_utility": 1.8580445051193237, "step": 2107 }, { "epoch": 1.9645852749301025, "grad_norm": 1.121516128665761, "learning_rate": 7.670003451846738e-06, "loss": 2.0544, "step": 2108 }, { "epoch": 1.9645852749301025, "loss_reasoning": 0.4828621745109558, "loss_utility": 1.3744943141937256, "step": 2108 }, { "epoch": 1.9655172413793105, "grad_norm": 1.1277982530624975, "learning_rate": 7.663099758370729e-06, "loss": 1.5646, "step": 2109 }, { "epoch": 1.9655172413793105, "loss_reasoning": 0.5407861471176147, "loss_utility": 1.2387914657592773, "step": 2109 }, { "epoch": 1.9664492078285183, "grad_norm": 1.462307297271021, "learning_rate": 7.65619606489472e-06, "loss": 1.6384, "step": 2110 }, { "epoch": 1.9664492078285183, "loss_reasoning": 0.49693989753723145, "loss_utility": 1.2584624290466309, "step": 2110 }, { "epoch": 1.967381174277726, "grad_norm": 1.173307921738643, "learning_rate": 7.649292371418709e-06, "loss": 1.7166, "step": 2111 }, { "epoch": 1.967381174277726, "loss_reasoning": 0.5111258625984192, "loss_utility": 2.160890579223633, "step": 2111 }, { "epoch": 1.9683131407269339, "grad_norm": 1.2911513311616445, "learning_rate": 7.6423886779427e-06, "loss": 1.9037, "step": 2112 }, { "epoch": 1.9683131407269339, "loss_reasoning": 0.47873327136039734, "loss_utility": 1.3836286067962646, "step": 2112 }, { "epoch": 1.9692451071761417, "grad_norm": 1.3503528801768068, "learning_rate": 7.63548498446669e-06, "loss": 1.8113, "step": 2113 }, { "epoch": 1.9692451071761417, "loss_reasoning": 0.47451499104499817, "loss_utility": 0.9471349716186523, "step": 2113 }, { "epoch": 1.9701770736253494, "grad_norm": 1.1484814791279099, "learning_rate": 7.628581290990681e-06, "loss": 1.5773, "step": 2114 }, { "epoch": 1.9701770736253494, "loss_reasoning": 0.46745049953460693, "loss_utility": 1.0077741146087646, "step": 2114 }, { "epoch": 1.9711090400745572, "grad_norm": 1.0453424923623478, "learning_rate": 7.621677597514671e-06, "loss": 1.5922, "step": 2115 }, { "epoch": 1.9711090400745572, "loss_reasoning": 0.4836566150188446, "loss_utility": 1.032791018486023, "step": 2115 }, { "epoch": 1.972041006523765, "grad_norm": 1.1509701664795078, "learning_rate": 7.614773904038662e-06, "loss": 1.7833, "step": 2116 }, { "epoch": 1.972041006523765, "loss_reasoning": 0.4720325469970703, "loss_utility": 1.120194435119629, "step": 2116 }, { "epoch": 1.972972972972973, "grad_norm": 1.180943171597875, "learning_rate": 7.6078702105626516e-06, "loss": 1.4791, "step": 2117 }, { "epoch": 1.972972972972973, "loss_reasoning": 0.5500317215919495, "loss_utility": 0.8840492963790894, "step": 2117 }, { "epoch": 1.9739049394221808, "grad_norm": 1.2577287867648232, "learning_rate": 7.600966517086642e-06, "loss": 1.5583, "step": 2118 }, { "epoch": 1.9739049394221808, "loss_reasoning": 0.4031049311161041, "loss_utility": 1.0547846555709839, "step": 2118 }, { "epoch": 1.9748369058713886, "grad_norm": 1.2911344982602773, "learning_rate": 7.594062823610633e-06, "loss": 1.8939, "step": 2119 }, { "epoch": 1.9748369058713886, "loss_reasoning": 0.4708578586578369, "loss_utility": 1.4989053010940552, "step": 2119 }, { "epoch": 1.9757688723205966, "grad_norm": 1.1974127504894194, "learning_rate": 7.587159130134623e-06, "loss": 1.735, "step": 2120 }, { "epoch": 1.9757688723205966, "loss_reasoning": 0.5464776754379272, "loss_utility": 1.0407109260559082, "step": 2120 }, { "epoch": 1.9767008387698044, "grad_norm": 1.4748900055047192, "learning_rate": 7.580255436658614e-06, "loss": 1.7423, "step": 2121 }, { "epoch": 1.9767008387698044, "loss_reasoning": 0.45141059160232544, "loss_utility": 1.4854607582092285, "step": 2121 }, { "epoch": 1.9776328052190122, "grad_norm": 1.3714838093906931, "learning_rate": 7.5733517431826035e-06, "loss": 1.7862, "step": 2122 }, { "epoch": 1.9776328052190122, "loss_reasoning": 0.45604464411735535, "loss_utility": 1.9565069675445557, "step": 2122 }, { "epoch": 1.97856477166822, "grad_norm": 1.1992770205732968, "learning_rate": 7.566448049706594e-06, "loss": 1.9065, "step": 2123 }, { "epoch": 1.97856477166822, "loss_reasoning": 0.5055491328239441, "loss_utility": 1.499828815460205, "step": 2123 }, { "epoch": 1.9794967381174278, "grad_norm": 1.331715454147992, "learning_rate": 7.559544356230583e-06, "loss": 1.7758, "step": 2124 }, { "epoch": 1.9794967381174278, "loss_reasoning": 0.4757586717605591, "loss_utility": 1.5887150764465332, "step": 2124 }, { "epoch": 1.9804287045666356, "grad_norm": 1.2513232800308394, "learning_rate": 7.552640662754575e-06, "loss": 1.9581, "step": 2125 }, { "epoch": 1.9804287045666356, "loss_reasoning": 0.4927470088005066, "loss_utility": 1.225780963897705, "step": 2125 }, { "epoch": 1.9813606710158433, "grad_norm": 1.1371117570536111, "learning_rate": 7.545736969278564e-06, "loss": 1.7887, "step": 2126 }, { "epoch": 1.9813606710158433, "loss_reasoning": 0.43706268072128296, "loss_utility": 1.332839846611023, "step": 2126 }, { "epoch": 1.9822926374650511, "grad_norm": 1.3634133036703329, "learning_rate": 7.5388332758025555e-06, "loss": 1.7775, "step": 2127 }, { "epoch": 1.9822926374650511, "loss_reasoning": 0.5869276523590088, "loss_utility": 1.4969210624694824, "step": 2127 }, { "epoch": 1.983224603914259, "grad_norm": 1.0744855071322699, "learning_rate": 7.5319295823265445e-06, "loss": 2.0109, "step": 2128 }, { "epoch": 1.983224603914259, "loss_reasoning": 0.464664101600647, "loss_utility": 1.1902945041656494, "step": 2128 }, { "epoch": 1.984156570363467, "grad_norm": 1.0610609987573039, "learning_rate": 7.525025888850535e-06, "loss": 1.7882, "step": 2129 }, { "epoch": 1.984156570363467, "loss_reasoning": 0.52778559923172, "loss_utility": 1.3938651084899902, "step": 2129 }, { "epoch": 1.9850885368126747, "grad_norm": 1.0713773321587197, "learning_rate": 7.518122195374527e-06, "loss": 1.7602, "step": 2130 }, { "epoch": 1.9850885368126747, "loss_reasoning": 0.46193403005599976, "loss_utility": 1.305365800857544, "step": 2130 }, { "epoch": 1.9860205032618827, "grad_norm": 1.0314594809932387, "learning_rate": 7.511218501898516e-06, "loss": 1.4972, "step": 2131 }, { "epoch": 1.9860205032618827, "loss_reasoning": 0.4762088656425476, "loss_utility": 1.3157539367675781, "step": 2131 }, { "epoch": 1.9869524697110905, "grad_norm": 1.1585592763988306, "learning_rate": 7.504314808422507e-06, "loss": 1.7063, "step": 2132 }, { "epoch": 1.9869524697110905, "loss_reasoning": 0.4045145511627197, "loss_utility": 1.2918282747268677, "step": 2132 }, { "epoch": 1.9878844361602983, "grad_norm": 1.0795399437595379, "learning_rate": 7.4974111149464965e-06, "loss": 1.5988, "step": 2133 }, { "epoch": 1.9878844361602983, "loss_reasoning": 0.5375102758407593, "loss_utility": 1.16033935546875, "step": 2133 }, { "epoch": 1.988816402609506, "grad_norm": 1.1145018772353377, "learning_rate": 7.490507421470487e-06, "loss": 1.6109, "step": 2134 }, { "epoch": 1.988816402609506, "loss_reasoning": 0.5169192552566528, "loss_utility": 1.7560877799987793, "step": 2134 }, { "epoch": 1.9897483690587139, "grad_norm": 1.0703468777807876, "learning_rate": 7.483603727994477e-06, "loss": 1.5903, "step": 2135 }, { "epoch": 1.9897483690587139, "loss_reasoning": 0.4031231105327606, "loss_utility": 1.2541766166687012, "step": 2135 }, { "epoch": 1.9906803355079217, "grad_norm": 1.146058698136012, "learning_rate": 7.476700034518468e-06, "loss": 1.7238, "step": 2136 }, { "epoch": 1.9906803355079217, "loss_reasoning": 0.5346508622169495, "loss_utility": 0.6537414789199829, "step": 2136 }, { "epoch": 1.9916123019571295, "grad_norm": 1.543943929407967, "learning_rate": 7.469796341042458e-06, "loss": 1.6139, "step": 2137 }, { "epoch": 1.9916123019571295, "loss_reasoning": 0.44585120677948, "loss_utility": 1.1570779085159302, "step": 2137 }, { "epoch": 1.9925442684063372, "grad_norm": 0.9861750668218108, "learning_rate": 7.4628926475664485e-06, "loss": 1.6462, "step": 2138 }, { "epoch": 1.9925442684063372, "loss_reasoning": 0.43587106466293335, "loss_utility": 1.063585638999939, "step": 2138 }, { "epoch": 1.993476234855545, "grad_norm": 1.0805422419639585, "learning_rate": 7.455988954090439e-06, "loss": 1.6856, "step": 2139 }, { "epoch": 1.993476234855545, "loss_reasoning": 0.5321640968322754, "loss_utility": 1.0066921710968018, "step": 2139 }, { "epoch": 1.994408201304753, "grad_norm": 1.0222285868358743, "learning_rate": 7.449085260614429e-06, "loss": 1.6163, "step": 2140 }, { "epoch": 1.994408201304753, "loss_reasoning": 0.4294750690460205, "loss_utility": 1.1615954637527466, "step": 2140 }, { "epoch": 1.9953401677539608, "grad_norm": 1.1806000430666383, "learning_rate": 7.44218156713842e-06, "loss": 1.6827, "step": 2141 }, { "epoch": 1.9953401677539608, "loss_reasoning": 0.5065244436264038, "loss_utility": 1.179842472076416, "step": 2141 }, { "epoch": 1.9962721342031688, "grad_norm": 1.0583522279041375, "learning_rate": 7.43527787366241e-06, "loss": 1.6446, "step": 2142 }, { "epoch": 1.9962721342031688, "loss_reasoning": 0.445306658744812, "loss_utility": 0.7104368805885315, "step": 2142 }, { "epoch": 1.9972041006523766, "grad_norm": 1.2247318403876568, "learning_rate": 7.4283741801864004e-06, "loss": 1.6762, "step": 2143 }, { "epoch": 1.9972041006523766, "loss_reasoning": 0.44452017545700073, "loss_utility": 0.9920967221260071, "step": 2143 }, { "epoch": 1.9981360671015844, "grad_norm": 1.0960276491530272, "learning_rate": 7.42147048671039e-06, "loss": 1.3891, "step": 2144 }, { "epoch": 1.9981360671015844, "loss_reasoning": 0.5577834844589233, "loss_utility": 1.0962917804718018, "step": 2144 }, { "epoch": 1.9990680335507922, "grad_norm": 1.1305649680651937, "learning_rate": 7.414566793234381e-06, "loss": 1.4421, "step": 2145 }, { "epoch": 1.9990680335507922, "loss_reasoning": 0.41788768768310547, "loss_utility": 0.8284657001495361, "step": 2145 }, { "epoch": 2.0, "grad_norm": 1.1396524235316356, "learning_rate": 7.407663099758371e-06, "loss": 1.4587, "step": 2146 }, { "epoch": 2.0, "loss_reasoning": 0.5002771615982056, "loss_utility": 1.260857105255127, "step": 2146 }, { "epoch": 2.000931966449208, "grad_norm": 1.0315326895142514, "learning_rate": 7.400759406282362e-06, "loss": 1.8535, "step": 2147 }, { "epoch": 2.000931966449208, "loss_reasoning": 0.5011879801750183, "loss_utility": 1.2868504524230957, "step": 2147 }, { "epoch": 2.0018639328984156, "grad_norm": 1.2996776372262313, "learning_rate": 7.3938557128063516e-06, "loss": 1.4999, "step": 2148 }, { "epoch": 2.0018639328984156, "loss_reasoning": 0.44640153646469116, "loss_utility": 0.9076786041259766, "step": 2148 }, { "epoch": 2.0027958993476234, "grad_norm": 1.0848310651889566, "learning_rate": 7.386952019330342e-06, "loss": 1.5641, "step": 2149 }, { "epoch": 2.0027958993476234, "loss_reasoning": 0.4490746855735779, "loss_utility": 0.7425609230995178, "step": 2149 }, { "epoch": 2.003727865796831, "grad_norm": 0.8924881859948548, "learning_rate": 7.380048325854333e-06, "loss": 1.449, "step": 2150 }, { "epoch": 2.003727865796831, "loss_reasoning": 0.4975321292877197, "loss_utility": 1.096443772315979, "step": 2150 }, { "epoch": 2.004659832246039, "grad_norm": 1.0678535083147047, "learning_rate": 7.373144632378323e-06, "loss": 1.4866, "step": 2151 }, { "epoch": 2.004659832246039, "loss_reasoning": 0.48298195004463196, "loss_utility": 0.730504035949707, "step": 2151 }, { "epoch": 2.005591798695247, "grad_norm": 1.133052743281638, "learning_rate": 7.366240938902314e-06, "loss": 1.2456, "step": 2152 }, { "epoch": 2.005591798695247, "loss_reasoning": 0.4745321571826935, "loss_utility": 0.9579606652259827, "step": 2152 }, { "epoch": 2.006523765144455, "grad_norm": 1.018457680007705, "learning_rate": 7.3593372454263035e-06, "loss": 1.3547, "step": 2153 }, { "epoch": 2.006523765144455, "loss_reasoning": 0.5075346231460571, "loss_utility": 1.0584847927093506, "step": 2153 }, { "epoch": 2.0074557315936628, "grad_norm": 0.8859446069497312, "learning_rate": 7.352433551950294e-06, "loss": 1.2257, "step": 2154 }, { "epoch": 2.0074557315936628, "loss_reasoning": 0.466166228055954, "loss_utility": 1.2644529342651367, "step": 2154 }, { "epoch": 2.0083876980428705, "grad_norm": 1.0772730442890381, "learning_rate": 7.345529858474284e-06, "loss": 1.45, "step": 2155 }, { "epoch": 2.0083876980428705, "loss_reasoning": 0.4817643463611603, "loss_utility": 1.027801752090454, "step": 2155 }, { "epoch": 2.0093196644920783, "grad_norm": 1.4594174400427404, "learning_rate": 7.338626164998275e-06, "loss": 1.5658, "step": 2156 }, { "epoch": 2.0093196644920783, "loss_reasoning": 0.4711132347583771, "loss_utility": 1.4121283292770386, "step": 2156 }, { "epoch": 2.010251630941286, "grad_norm": 1.3286113961923594, "learning_rate": 7.331722471522265e-06, "loss": 1.4555, "step": 2157 }, { "epoch": 2.010251630941286, "loss_reasoning": 0.42136529088020325, "loss_utility": 1.23878812789917, "step": 2157 }, { "epoch": 2.011183597390494, "grad_norm": 1.0824237408085906, "learning_rate": 7.3248187780462555e-06, "loss": 1.4387, "step": 2158 }, { "epoch": 2.011183597390494, "loss_reasoning": 0.47839850187301636, "loss_utility": 0.9215288162231445, "step": 2158 }, { "epoch": 2.0121155638397017, "grad_norm": 1.1555922464774901, "learning_rate": 7.317915084570245e-06, "loss": 1.4456, "step": 2159 }, { "epoch": 2.0121155638397017, "loss_reasoning": 0.5207388997077942, "loss_utility": 0.8739473819732666, "step": 2159 }, { "epoch": 2.0130475302889095, "grad_norm": 1.2090808384457767, "learning_rate": 7.311011391094236e-06, "loss": 1.3254, "step": 2160 }, { "epoch": 2.0130475302889095, "loss_reasoning": 0.45861995220184326, "loss_utility": 1.1277358531951904, "step": 2160 }, { "epoch": 2.0139794967381173, "grad_norm": 1.1198138213824786, "learning_rate": 7.304107697618227e-06, "loss": 1.6303, "step": 2161 }, { "epoch": 2.0139794967381173, "loss_reasoning": 0.4248705208301544, "loss_utility": 0.3291526436805725, "step": 2161 }, { "epoch": 2.014911463187325, "grad_norm": 1.2919078194663633, "learning_rate": 7.297204004142217e-06, "loss": 1.153, "step": 2162 }, { "epoch": 2.014911463187325, "loss_reasoning": 0.5523272752761841, "loss_utility": 1.2976157665252686, "step": 2162 }, { "epoch": 2.0158434296365333, "grad_norm": 1.087207671366923, "learning_rate": 7.2903003106662075e-06, "loss": 1.6474, "step": 2163 }, { "epoch": 2.0158434296365333, "loss_reasoning": 0.5265390872955322, "loss_utility": 1.2704477310180664, "step": 2163 }, { "epoch": 2.016775396085741, "grad_norm": 1.2464926946106831, "learning_rate": 7.283396617190197e-06, "loss": 1.732, "step": 2164 }, { "epoch": 2.016775396085741, "loss_reasoning": 0.4962180554866791, "loss_utility": 1.1150362491607666, "step": 2164 }, { "epoch": 2.017707362534949, "grad_norm": 1.03793508737422, "learning_rate": 7.276492923714188e-06, "loss": 1.365, "step": 2165 }, { "epoch": 2.017707362534949, "loss_reasoning": 0.5622146725654602, "loss_utility": 0.9291197061538696, "step": 2165 }, { "epoch": 2.0186393289841567, "grad_norm": 1.1424121573966486, "learning_rate": 7.269589230238178e-06, "loss": 1.3602, "step": 2166 }, { "epoch": 2.0186393289841567, "loss_reasoning": 0.5841368436813354, "loss_utility": 1.4694815874099731, "step": 2166 }, { "epoch": 2.0195712954333644, "grad_norm": 1.0938464660354459, "learning_rate": 7.262685536762169e-06, "loss": 1.9312, "step": 2167 }, { "epoch": 2.0195712954333644, "loss_reasoning": 0.5063197016716003, "loss_utility": 0.28122133016586304, "step": 2167 }, { "epoch": 2.0205032618825722, "grad_norm": 1.0823255035027946, "learning_rate": 7.255781843286159e-06, "loss": 1.4323, "step": 2168 }, { "epoch": 2.0205032618825722, "loss_reasoning": 0.4762534499168396, "loss_utility": 0.9210085868835449, "step": 2168 }, { "epoch": 2.02143522833178, "grad_norm": 1.0412979932926503, "learning_rate": 7.248878149810149e-06, "loss": 1.4197, "step": 2169 }, { "epoch": 2.02143522833178, "loss_reasoning": 0.4369620978832245, "loss_utility": 1.0757956504821777, "step": 2169 }, { "epoch": 2.022367194780988, "grad_norm": 1.422028697003004, "learning_rate": 7.24197445633414e-06, "loss": 1.7056, "step": 2170 }, { "epoch": 2.022367194780988, "loss_reasoning": 0.47835883498191833, "loss_utility": 1.0352303981781006, "step": 2170 }, { "epoch": 2.0232991612301956, "grad_norm": 1.1263105345509756, "learning_rate": 7.23507076285813e-06, "loss": 1.4394, "step": 2171 }, { "epoch": 2.0232991612301956, "loss_reasoning": 0.47688978910446167, "loss_utility": 1.0917608737945557, "step": 2171 }, { "epoch": 2.0242311276794034, "grad_norm": 1.054059383600796, "learning_rate": 7.228167069382121e-06, "loss": 1.4456, "step": 2172 }, { "epoch": 2.0242311276794034, "loss_reasoning": 0.5292195081710815, "loss_utility": 1.0432311296463013, "step": 2172 }, { "epoch": 2.025163094128611, "grad_norm": 0.8970264205629389, "learning_rate": 7.2212633759061106e-06, "loss": 1.3803, "step": 2173 }, { "epoch": 2.025163094128611, "loss_reasoning": 0.5222039222717285, "loss_utility": 1.4049341678619385, "step": 2173 }, { "epoch": 2.0260950605778194, "grad_norm": 1.1107995398696417, "learning_rate": 7.214359682430101e-06, "loss": 1.368, "step": 2174 }, { "epoch": 2.0260950605778194, "loss_reasoning": 0.4579598605632782, "loss_utility": 1.8463683128356934, "step": 2174 }, { "epoch": 2.027027027027027, "grad_norm": 1.2376752541209997, "learning_rate": 7.20745598895409e-06, "loss": 1.8297, "step": 2175 }, { "epoch": 2.027027027027027, "loss_reasoning": 0.537305474281311, "loss_utility": 0.7082968354225159, "step": 2175 }, { "epoch": 2.027958993476235, "grad_norm": 1.2196057517504548, "learning_rate": 7.200552295478082e-06, "loss": 1.4361, "step": 2176 }, { "epoch": 2.027958993476235, "loss_reasoning": 0.5088523626327515, "loss_utility": 1.6123058795928955, "step": 2176 }, { "epoch": 2.0288909599254428, "grad_norm": 1.1785882941911217, "learning_rate": 7.193648602002071e-06, "loss": 1.6936, "step": 2177 }, { "epoch": 2.0288909599254428, "loss_reasoning": 0.4887741804122925, "loss_utility": 1.132157802581787, "step": 2177 }, { "epoch": 2.0298229263746506, "grad_norm": 1.4654317502150107, "learning_rate": 7.1867449085260625e-06, "loss": 1.4988, "step": 2178 }, { "epoch": 2.0298229263746506, "loss_reasoning": 0.5322045683860779, "loss_utility": 1.1721320152282715, "step": 2178 }, { "epoch": 2.0307548928238583, "grad_norm": 1.2682464220600567, "learning_rate": 7.1798412150500516e-06, "loss": 1.4944, "step": 2179 }, { "epoch": 2.0307548928238583, "loss_reasoning": 0.5316858291625977, "loss_utility": 0.9650512337684631, "step": 2179 }, { "epoch": 2.031686859273066, "grad_norm": 0.9134887274819085, "learning_rate": 7.172937521574042e-06, "loss": 1.2657, "step": 2180 }, { "epoch": 2.031686859273066, "loss_reasoning": 0.49063271284103394, "loss_utility": 0.9515056610107422, "step": 2180 }, { "epoch": 2.032618825722274, "grad_norm": 1.10506628740562, "learning_rate": 7.166033828098034e-06, "loss": 1.4263, "step": 2181 }, { "epoch": 2.032618825722274, "loss_reasoning": 0.5145193338394165, "loss_utility": 0.46650439500808716, "step": 2181 }, { "epoch": 2.0335507921714817, "grad_norm": 1.0323632998960686, "learning_rate": 7.159130134622023e-06, "loss": 1.108, "step": 2182 }, { "epoch": 2.0335507921714817, "loss_reasoning": 0.48291027545928955, "loss_utility": 1.397615909576416, "step": 2182 }, { "epoch": 2.0344827586206895, "grad_norm": 1.4358612663086268, "learning_rate": 7.152226441146014e-06, "loss": 1.7331, "step": 2183 }, { "epoch": 2.0344827586206895, "loss_reasoning": 0.5034095048904419, "loss_utility": 1.0164768695831299, "step": 2183 }, { "epoch": 2.0354147250698973, "grad_norm": 1.9873357458410916, "learning_rate": 7.1453227476700035e-06, "loss": 1.6496, "step": 2184 }, { "epoch": 2.0354147250698973, "loss_reasoning": 0.4016393721103668, "loss_utility": 0.5420467853546143, "step": 2184 }, { "epoch": 2.0363466915191055, "grad_norm": 1.26132154869015, "learning_rate": 7.138419054193994e-06, "loss": 1.2841, "step": 2185 }, { "epoch": 2.0363466915191055, "loss_reasoning": 0.4761825203895569, "loss_utility": 1.4077386856079102, "step": 2185 }, { "epoch": 2.0372786579683133, "grad_norm": 1.2542100046406863, "learning_rate": 7.131515360717984e-06, "loss": 1.3785, "step": 2186 }, { "epoch": 2.0372786579683133, "loss_reasoning": 0.5215762257575989, "loss_utility": 0.7986088395118713, "step": 2186 }, { "epoch": 2.038210624417521, "grad_norm": 1.112056286095658, "learning_rate": 7.124611667241975e-06, "loss": 1.6065, "step": 2187 }, { "epoch": 2.038210624417521, "loss_reasoning": 0.5017585754394531, "loss_utility": 1.6659140586853027, "step": 2187 }, { "epoch": 2.039142590866729, "grad_norm": 1.2175378262348426, "learning_rate": 7.117707973765965e-06, "loss": 1.6312, "step": 2188 }, { "epoch": 2.039142590866729, "loss_reasoning": 0.4082995653152466, "loss_utility": 0.971910834312439, "step": 2188 }, { "epoch": 2.0400745573159367, "grad_norm": 0.9192947356931382, "learning_rate": 7.1108042802899555e-06, "loss": 1.2421, "step": 2189 }, { "epoch": 2.0400745573159367, "loss_reasoning": 0.503121018409729, "loss_utility": 1.166171908378601, "step": 2189 }, { "epoch": 2.0410065237651445, "grad_norm": 1.454715600791732, "learning_rate": 7.103900586813946e-06, "loss": 1.6042, "step": 2190 }, { "epoch": 2.0410065237651445, "loss_reasoning": 0.5606790781021118, "loss_utility": 1.7144848108291626, "step": 2190 }, { "epoch": 2.0419384902143523, "grad_norm": 1.2883530356145834, "learning_rate": 7.096996893337936e-06, "loss": 1.5436, "step": 2191 }, { "epoch": 2.0419384902143523, "loss_reasoning": 0.49484241008758545, "loss_utility": 1.1629798412322998, "step": 2191 }, { "epoch": 2.04287045666356, "grad_norm": 1.091988185936109, "learning_rate": 7.090093199861927e-06, "loss": 1.7889, "step": 2192 }, { "epoch": 2.04287045666356, "loss_reasoning": 0.4961091876029968, "loss_utility": 0.9659538269042969, "step": 2192 }, { "epoch": 2.043802423112768, "grad_norm": 1.1099154659675752, "learning_rate": 7.083189506385917e-06, "loss": 1.3619, "step": 2193 }, { "epoch": 2.043802423112768, "loss_reasoning": 0.48983049392700195, "loss_utility": 1.4930620193481445, "step": 2193 }, { "epoch": 2.0447343895619756, "grad_norm": 1.0303928171870653, "learning_rate": 7.0762858129099075e-06, "loss": 1.6736, "step": 2194 }, { "epoch": 2.0447343895619756, "loss_reasoning": 0.5578317046165466, "loss_utility": 0.643038272857666, "step": 2194 }, { "epoch": 2.0456663560111834, "grad_norm": 1.2262345410885216, "learning_rate": 7.069382119433897e-06, "loss": 1.7182, "step": 2195 }, { "epoch": 2.0456663560111834, "loss_reasoning": 0.5286166667938232, "loss_utility": 1.024545669555664, "step": 2195 }, { "epoch": 2.0465983224603916, "grad_norm": 1.0418067354668687, "learning_rate": 7.062478425957888e-06, "loss": 1.3581, "step": 2196 }, { "epoch": 2.0465983224603916, "loss_reasoning": 0.4837417006492615, "loss_utility": 1.3657264709472656, "step": 2196 }, { "epoch": 2.0475302889095994, "grad_norm": 1.1331527254919216, "learning_rate": 7.055574732481878e-06, "loss": 1.4521, "step": 2197 }, { "epoch": 2.0475302889095994, "loss_reasoning": 0.5931880474090576, "loss_utility": 0.9986658692359924, "step": 2197 }, { "epoch": 2.048462255358807, "grad_norm": 1.0659060193449834, "learning_rate": 7.048671039005869e-06, "loss": 1.1716, "step": 2198 }, { "epoch": 2.048462255358807, "loss_reasoning": 0.5578038692474365, "loss_utility": 0.8161787986755371, "step": 2198 }, { "epoch": 2.049394221808015, "grad_norm": 1.0910909079239055, "learning_rate": 7.041767345529859e-06, "loss": 1.4737, "step": 2199 }, { "epoch": 2.049394221808015, "loss_reasoning": 0.44308656454086304, "loss_utility": 1.6039648056030273, "step": 2199 }, { "epoch": 2.050326188257223, "grad_norm": 1.272143242689588, "learning_rate": 7.034863652053849e-06, "loss": 1.8301, "step": 2200 }, { "epoch": 2.050326188257223, "loss_reasoning": 0.5095379948616028, "loss_utility": 0.6921396255493164, "step": 2200 }, { "epoch": 2.0512581547064306, "grad_norm": 1.0985630848005867, "learning_rate": 7.02795995857784e-06, "loss": 1.1382, "step": 2201 }, { "epoch": 2.0512581547064306, "loss_reasoning": 0.49262696504592896, "loss_utility": 1.3444561958312988, "step": 2201 }, { "epoch": 2.0521901211556384, "grad_norm": 1.2953877673705008, "learning_rate": 7.02105626510183e-06, "loss": 1.5856, "step": 2202 }, { "epoch": 2.0521901211556384, "loss_reasoning": 0.4869158864021301, "loss_utility": 0.7056523561477661, "step": 2202 }, { "epoch": 2.053122087604846, "grad_norm": 1.0436088841914843, "learning_rate": 7.014152571625821e-06, "loss": 1.4098, "step": 2203 }, { "epoch": 2.053122087604846, "loss_reasoning": 0.42007339000701904, "loss_utility": 0.824142575263977, "step": 2203 }, { "epoch": 2.054054054054054, "grad_norm": 1.0512344152918547, "learning_rate": 7.0072488781498106e-06, "loss": 1.4099, "step": 2204 }, { "epoch": 2.054054054054054, "loss_reasoning": 0.5067924857139587, "loss_utility": 1.019271731376648, "step": 2204 }, { "epoch": 2.0549860205032617, "grad_norm": 1.291579955845228, "learning_rate": 7.000345184673801e-06, "loss": 1.3746, "step": 2205 }, { "epoch": 2.0549860205032617, "loss_reasoning": 0.47851502895355225, "loss_utility": 0.4606528878211975, "step": 2205 }, { "epoch": 2.0559179869524695, "grad_norm": 1.0335228163831722, "learning_rate": 6.993441491197791e-06, "loss": 1.4562, "step": 2206 }, { "epoch": 2.0559179869524695, "loss_reasoning": 0.46013402938842773, "loss_utility": 1.0823326110839844, "step": 2206 }, { "epoch": 2.0568499534016778, "grad_norm": 1.2918132655307624, "learning_rate": 6.986537797721782e-06, "loss": 1.5469, "step": 2207 }, { "epoch": 2.0568499534016778, "loss_reasoning": 0.44975799322128296, "loss_utility": 1.4955546855926514, "step": 2207 }, { "epoch": 2.0577819198508855, "grad_norm": 1.6674424390870255, "learning_rate": 6.979634104245772e-06, "loss": 1.5358, "step": 2208 }, { "epoch": 2.0577819198508855, "loss_reasoning": 0.46803152561187744, "loss_utility": 0.9034412503242493, "step": 2208 }, { "epoch": 2.0587138863000933, "grad_norm": 0.9675093400689287, "learning_rate": 6.9727304107697625e-06, "loss": 1.4156, "step": 2209 }, { "epoch": 2.0587138863000933, "loss_reasoning": 0.49849414825439453, "loss_utility": 1.802739143371582, "step": 2209 }, { "epoch": 2.059645852749301, "grad_norm": 0.9386591703397579, "learning_rate": 6.965826717293753e-06, "loss": 1.5104, "step": 2210 }, { "epoch": 2.059645852749301, "loss_reasoning": 0.49737170338630676, "loss_utility": 0.7046815156936646, "step": 2210 }, { "epoch": 2.060577819198509, "grad_norm": 1.2682592715163505, "learning_rate": 6.958923023817743e-06, "loss": 1.5277, "step": 2211 }, { "epoch": 2.060577819198509, "loss_reasoning": 0.5257436037063599, "loss_utility": 0.7976952791213989, "step": 2211 }, { "epoch": 2.0615097856477167, "grad_norm": 0.9573783593433723, "learning_rate": 6.952019330341734e-06, "loss": 1.2732, "step": 2212 }, { "epoch": 2.0615097856477167, "loss_reasoning": 0.46107128262519836, "loss_utility": 0.889610230922699, "step": 2212 }, { "epoch": 2.0624417520969245, "grad_norm": 1.3159894212293113, "learning_rate": 6.945115636865724e-06, "loss": 1.3916, "step": 2213 }, { "epoch": 2.0624417520969245, "loss_reasoning": 0.4885956346988678, "loss_utility": 0.37203317880630493, "step": 2213 }, { "epoch": 2.0633737185461323, "grad_norm": 1.151751478232489, "learning_rate": 6.9382119433897145e-06, "loss": 1.3565, "step": 2214 }, { "epoch": 2.0633737185461323, "loss_reasoning": 0.5092496871948242, "loss_utility": 0.7427061796188354, "step": 2214 }, { "epoch": 2.06430568499534, "grad_norm": 1.2038857159941663, "learning_rate": 6.931308249913704e-06, "loss": 1.4747, "step": 2215 }, { "epoch": 2.06430568499534, "loss_reasoning": 0.4084552526473999, "loss_utility": 0.3369526267051697, "step": 2215 }, { "epoch": 2.065237651444548, "grad_norm": 1.1478718180817158, "learning_rate": 6.924404556437695e-06, "loss": 1.2459, "step": 2216 }, { "epoch": 2.065237651444548, "loss_reasoning": 0.5067018270492554, "loss_utility": 0.8888627290725708, "step": 2216 }, { "epoch": 2.0661696178937556, "grad_norm": 1.2129422506618317, "learning_rate": 6.917500862961685e-06, "loss": 1.3336, "step": 2217 }, { "epoch": 2.0661696178937556, "loss_reasoning": 0.40546345710754395, "loss_utility": 0.7995815277099609, "step": 2217 }, { "epoch": 2.0671015843429634, "grad_norm": 1.106473534000129, "learning_rate": 6.910597169485676e-06, "loss": 1.6356, "step": 2218 }, { "epoch": 2.0671015843429634, "loss_reasoning": 0.521087646484375, "loss_utility": 1.3079216480255127, "step": 2218 }, { "epoch": 2.0680335507921717, "grad_norm": 1.5172002871354464, "learning_rate": 6.903693476009666e-06, "loss": 1.5594, "step": 2219 }, { "epoch": 2.0680335507921717, "loss_reasoning": 0.4729752540588379, "loss_utility": 1.1282628774642944, "step": 2219 }, { "epoch": 2.0689655172413794, "grad_norm": 1.2799760637896083, "learning_rate": 6.896789782533656e-06, "loss": 1.2556, "step": 2220 }, { "epoch": 2.0689655172413794, "loss_reasoning": 0.4634888768196106, "loss_utility": 1.3111896514892578, "step": 2220 }, { "epoch": 2.0698974836905872, "grad_norm": 1.1501420192121437, "learning_rate": 6.889886089057647e-06, "loss": 1.594, "step": 2221 }, { "epoch": 2.0698974836905872, "loss_reasoning": 0.465766966342926, "loss_utility": 0.9609297513961792, "step": 2221 }, { "epoch": 2.070829450139795, "grad_norm": 1.2667080413800478, "learning_rate": 6.882982395581637e-06, "loss": 1.5317, "step": 2222 }, { "epoch": 2.070829450139795, "loss_reasoning": 0.47091835737228394, "loss_utility": 0.39191097021102905, "step": 2222 }, { "epoch": 2.071761416589003, "grad_norm": 1.2226931500310332, "learning_rate": 6.876078702105628e-06, "loss": 1.4206, "step": 2223 }, { "epoch": 2.071761416589003, "loss_reasoning": 0.49012720584869385, "loss_utility": 1.0952515602111816, "step": 2223 }, { "epoch": 2.0726933830382106, "grad_norm": 1.30466770652418, "learning_rate": 6.869175008629618e-06, "loss": 1.7049, "step": 2224 }, { "epoch": 2.0726933830382106, "loss_reasoning": 0.42812579870224, "loss_utility": 0.7920087575912476, "step": 2224 }, { "epoch": 2.0736253494874184, "grad_norm": 1.1679823598736572, "learning_rate": 6.862271315153608e-06, "loss": 1.4573, "step": 2225 }, { "epoch": 2.0736253494874184, "loss_reasoning": 0.4570143222808838, "loss_utility": 1.4729506969451904, "step": 2225 }, { "epoch": 2.074557315936626, "grad_norm": 1.1550023000208378, "learning_rate": 6.855367621677597e-06, "loss": 1.6295, "step": 2226 }, { "epoch": 2.074557315936626, "loss_reasoning": 0.5606889128684998, "loss_utility": 0.9586862325668335, "step": 2226 }, { "epoch": 2.075489282385834, "grad_norm": 1.4216937625527157, "learning_rate": 6.848463928201589e-06, "loss": 1.523, "step": 2227 }, { "epoch": 2.075489282385834, "loss_reasoning": 0.4643593430519104, "loss_utility": 1.541623830795288, "step": 2227 }, { "epoch": 2.0764212488350418, "grad_norm": 0.9598213905166545, "learning_rate": 6.841560234725578e-06, "loss": 1.5118, "step": 2228 }, { "epoch": 2.0764212488350418, "loss_reasoning": 0.49131637811660767, "loss_utility": 0.41688698530197144, "step": 2228 }, { "epoch": 2.0773532152842495, "grad_norm": 1.5067747863402212, "learning_rate": 6.834656541249569e-06, "loss": 1.4563, "step": 2229 }, { "epoch": 2.0773532152842495, "loss_reasoning": 0.46842530369758606, "loss_utility": 0.5104073286056519, "step": 2229 }, { "epoch": 2.0782851817334578, "grad_norm": 1.113254562464855, "learning_rate": 6.82775284777356e-06, "loss": 1.2547, "step": 2230 }, { "epoch": 2.0782851817334578, "loss_reasoning": 0.5837060213088989, "loss_utility": 0.8810255527496338, "step": 2230 }, { "epoch": 2.0792171481826656, "grad_norm": 1.0179457538856675, "learning_rate": 6.820849154297549e-06, "loss": 1.3217, "step": 2231 }, { "epoch": 2.0792171481826656, "loss_reasoning": 0.5194177627563477, "loss_utility": 1.1393864154815674, "step": 2231 }, { "epoch": 2.0801491146318734, "grad_norm": 1.1928295250959369, "learning_rate": 6.813945460821541e-06, "loss": 1.5031, "step": 2232 }, { "epoch": 2.0801491146318734, "loss_reasoning": 0.4331117272377014, "loss_utility": 0.3996339440345764, "step": 2232 }, { "epoch": 2.081081081081081, "grad_norm": 1.0958907461573486, "learning_rate": 6.80704176734553e-06, "loss": 1.1465, "step": 2233 }, { "epoch": 2.081081081081081, "loss_reasoning": 0.4587620496749878, "loss_utility": 0.8300772905349731, "step": 2233 }, { "epoch": 2.082013047530289, "grad_norm": 1.089066653075503, "learning_rate": 6.800138073869521e-06, "loss": 1.5484, "step": 2234 }, { "epoch": 2.082013047530289, "loss_reasoning": 0.4436674416065216, "loss_utility": 0.6774601936340332, "step": 2234 }, { "epoch": 2.0829450139794967, "grad_norm": 1.1802526081029876, "learning_rate": 6.7932343803935106e-06, "loss": 1.4729, "step": 2235 }, { "epoch": 2.0829450139794967, "loss_reasoning": 0.4834752380847931, "loss_utility": 0.5126340985298157, "step": 2235 }, { "epoch": 2.0838769804287045, "grad_norm": 1.3240807201509102, "learning_rate": 6.786330686917501e-06, "loss": 1.3355, "step": 2236 }, { "epoch": 2.0838769804287045, "loss_reasoning": 0.5185756683349609, "loss_utility": 1.3062691688537598, "step": 2236 }, { "epoch": 2.0848089468779123, "grad_norm": 1.1840685934289223, "learning_rate": 6.779426993441491e-06, "loss": 1.6139, "step": 2237 }, { "epoch": 2.0848089468779123, "loss_reasoning": 0.4903625249862671, "loss_utility": 0.5645535588264465, "step": 2237 }, { "epoch": 2.08574091332712, "grad_norm": 1.2155410618005202, "learning_rate": 6.772523299965482e-06, "loss": 1.6203, "step": 2238 }, { "epoch": 2.08574091332712, "loss_reasoning": 0.5296554565429688, "loss_utility": 0.5534323453903198, "step": 2238 }, { "epoch": 2.086672879776328, "grad_norm": 1.0532224439544158, "learning_rate": 6.765619606489472e-06, "loss": 1.4084, "step": 2239 }, { "epoch": 2.086672879776328, "loss_reasoning": 0.49514254927635193, "loss_utility": 0.9533135294914246, "step": 2239 }, { "epoch": 2.0876048462255357, "grad_norm": 1.5154359574554357, "learning_rate": 6.7587159130134625e-06, "loss": 1.5888, "step": 2240 }, { "epoch": 2.0876048462255357, "loss_reasoning": 0.4841763973236084, "loss_utility": 1.4005682468414307, "step": 2240 }, { "epoch": 2.088536812674744, "grad_norm": 1.289965729276194, "learning_rate": 6.751812219537453e-06, "loss": 1.6221, "step": 2241 }, { "epoch": 2.088536812674744, "loss_reasoning": 0.4610024690628052, "loss_utility": 0.701576828956604, "step": 2241 }, { "epoch": 2.0894687791239517, "grad_norm": 1.2411705072291725, "learning_rate": 6.744908526061443e-06, "loss": 1.4244, "step": 2242 }, { "epoch": 2.0894687791239517, "loss_reasoning": 0.5030103921890259, "loss_utility": 1.0586645603179932, "step": 2242 }, { "epoch": 2.0904007455731595, "grad_norm": 1.038912159762885, "learning_rate": 6.738004832585434e-06, "loss": 1.2773, "step": 2243 }, { "epoch": 2.0904007455731595, "loss_reasoning": 0.5083227753639221, "loss_utility": 0.8523985147476196, "step": 2243 }, { "epoch": 2.0913327120223673, "grad_norm": 1.3077590693421828, "learning_rate": 6.731101139109424e-06, "loss": 1.3612, "step": 2244 }, { "epoch": 2.0913327120223673, "loss_reasoning": 0.5145977735519409, "loss_utility": 0.8106555342674255, "step": 2244 }, { "epoch": 2.092264678471575, "grad_norm": 0.9912334876017322, "learning_rate": 6.7241974456334145e-06, "loss": 1.2934, "step": 2245 }, { "epoch": 2.092264678471575, "loss_reasoning": 0.5124586820602417, "loss_utility": 1.0185531377792358, "step": 2245 }, { "epoch": 2.093196644920783, "grad_norm": 1.2953672566858578, "learning_rate": 6.717293752157404e-06, "loss": 1.4367, "step": 2246 }, { "epoch": 2.093196644920783, "loss_reasoning": 0.46594342589378357, "loss_utility": 1.2935194969177246, "step": 2246 }, { "epoch": 2.0941286113699906, "grad_norm": 1.4956772040788155, "learning_rate": 6.710390058681395e-06, "loss": 1.5654, "step": 2247 }, { "epoch": 2.0941286113699906, "loss_reasoning": 0.48811954259872437, "loss_utility": 0.8801812529563904, "step": 2247 }, { "epoch": 2.0950605778191984, "grad_norm": 0.9453814821136699, "learning_rate": 6.703486365205385e-06, "loss": 1.1872, "step": 2248 }, { "epoch": 2.0950605778191984, "loss_reasoning": 0.5348150730133057, "loss_utility": 1.086862325668335, "step": 2248 }, { "epoch": 2.095992544268406, "grad_norm": 1.0946252039919562, "learning_rate": 6.696582671729376e-06, "loss": 1.3128, "step": 2249 }, { "epoch": 2.095992544268406, "loss_reasoning": 0.47316116094589233, "loss_utility": 1.2207632064819336, "step": 2249 }, { "epoch": 2.096924510717614, "grad_norm": 1.426220588066636, "learning_rate": 6.6896789782533665e-06, "loss": 1.5955, "step": 2250 }, { "epoch": 2.096924510717614, "loss_reasoning": 0.4951670169830322, "loss_utility": 0.8808112740516663, "step": 2250 }, { "epoch": 2.0978564771668218, "grad_norm": 1.2640784120786583, "learning_rate": 6.682775284777356e-06, "loss": 1.4206, "step": 2251 }, { "epoch": 2.0978564771668218, "loss_reasoning": 0.49208909273147583, "loss_utility": 1.1659027338027954, "step": 2251 }, { "epoch": 2.09878844361603, "grad_norm": 1.169332487658693, "learning_rate": 6.675871591301347e-06, "loss": 1.8628, "step": 2252 }, { "epoch": 2.09878844361603, "loss_reasoning": 0.5002091526985168, "loss_utility": 1.4781124591827393, "step": 2252 }, { "epoch": 2.099720410065238, "grad_norm": 1.1656404461575207, "learning_rate": 6.668967897825337e-06, "loss": 1.4161, "step": 2253 }, { "epoch": 2.099720410065238, "loss_reasoning": 0.4460018277168274, "loss_utility": 1.11106276512146, "step": 2253 }, { "epoch": 2.1006523765144456, "grad_norm": 1.5014222801043293, "learning_rate": 6.662064204349328e-06, "loss": 1.7314, "step": 2254 }, { "epoch": 2.1006523765144456, "loss_reasoning": 0.5077154040336609, "loss_utility": 1.075198769569397, "step": 2254 }, { "epoch": 2.1015843429636534, "grad_norm": 0.9921211921257793, "learning_rate": 6.655160510873318e-06, "loss": 1.4091, "step": 2255 }, { "epoch": 2.1015843429636534, "loss_reasoning": 0.6003104448318481, "loss_utility": 0.8051571249961853, "step": 2255 }, { "epoch": 2.102516309412861, "grad_norm": 1.1948400485366013, "learning_rate": 6.648256817397308e-06, "loss": 1.9354, "step": 2256 }, { "epoch": 2.102516309412861, "loss_reasoning": 0.4535061717033386, "loss_utility": 1.6500898599624634, "step": 2256 }, { "epoch": 2.103448275862069, "grad_norm": 1.1789810097474014, "learning_rate": 6.641353123921298e-06, "loss": 1.877, "step": 2257 }, { "epoch": 2.103448275862069, "loss_reasoning": 0.4855008125305176, "loss_utility": 0.6525072455406189, "step": 2257 }, { "epoch": 2.1043802423112767, "grad_norm": 1.0561560924665245, "learning_rate": 6.634449430445289e-06, "loss": 1.328, "step": 2258 }, { "epoch": 2.1043802423112767, "loss_reasoning": 0.49430781602859497, "loss_utility": 0.731308102607727, "step": 2258 }, { "epoch": 2.1053122087604845, "grad_norm": 1.0177251572477306, "learning_rate": 6.627545736969279e-06, "loss": 1.6139, "step": 2259 }, { "epoch": 2.1053122087604845, "loss_reasoning": 0.433205783367157, "loss_utility": 0.4170919358730316, "step": 2259 }, { "epoch": 2.1062441752096923, "grad_norm": 1.1225070087763922, "learning_rate": 6.6206420434932696e-06, "loss": 1.2971, "step": 2260 }, { "epoch": 2.1062441752096923, "loss_reasoning": 0.4992079436779022, "loss_utility": 0.8599140644073486, "step": 2260 }, { "epoch": 2.1071761416589, "grad_norm": 1.1541821602978817, "learning_rate": 6.61373835001726e-06, "loss": 1.5951, "step": 2261 }, { "epoch": 2.1071761416589, "loss_reasoning": 0.44588395953178406, "loss_utility": 1.333539605140686, "step": 2261 }, { "epoch": 2.108108108108108, "grad_norm": 1.3037337042624053, "learning_rate": 6.60683465654125e-06, "loss": 1.6983, "step": 2262 }, { "epoch": 2.108108108108108, "loss_reasoning": 0.4653949737548828, "loss_utility": 0.31351780891418457, "step": 2262 }, { "epoch": 2.109040074557316, "grad_norm": 1.098999269561347, "learning_rate": 6.599930963065241e-06, "loss": 1.4845, "step": 2263 }, { "epoch": 2.109040074557316, "loss_reasoning": 0.4702725410461426, "loss_utility": 0.9501199722290039, "step": 2263 }, { "epoch": 2.109972041006524, "grad_norm": 1.1194518465089687, "learning_rate": 6.593027269589231e-06, "loss": 1.1852, "step": 2264 }, { "epoch": 2.109972041006524, "loss_reasoning": 0.5209318995475769, "loss_utility": 1.5364536046981812, "step": 2264 }, { "epoch": 2.1109040074557317, "grad_norm": 1.0605560053605583, "learning_rate": 6.5861235761132215e-06, "loss": 1.3883, "step": 2265 }, { "epoch": 2.1109040074557317, "loss_reasoning": 0.43268367648124695, "loss_utility": 0.9612001180648804, "step": 2265 }, { "epoch": 2.1118359739049395, "grad_norm": 0.9958889856966306, "learning_rate": 6.579219882637211e-06, "loss": 1.4158, "step": 2266 }, { "epoch": 2.1118359739049395, "loss_reasoning": 0.4648453891277313, "loss_utility": 0.8575276136398315, "step": 2266 }, { "epoch": 2.1127679403541473, "grad_norm": 1.3739369589324604, "learning_rate": 6.572316189161202e-06, "loss": 1.8047, "step": 2267 }, { "epoch": 2.1127679403541473, "loss_reasoning": 0.5113363265991211, "loss_utility": 0.7892987728118896, "step": 2267 }, { "epoch": 2.113699906803355, "grad_norm": 1.1400121666073224, "learning_rate": 6.565412495685192e-06, "loss": 1.6184, "step": 2268 }, { "epoch": 2.113699906803355, "loss_reasoning": 0.49377697706222534, "loss_utility": 1.0035146474838257, "step": 2268 }, { "epoch": 2.114631873252563, "grad_norm": 1.0193368558691598, "learning_rate": 6.558508802209183e-06, "loss": 1.4312, "step": 2269 }, { "epoch": 2.114631873252563, "loss_reasoning": 0.5570229887962341, "loss_utility": 0.9490026235580444, "step": 2269 }, { "epoch": 2.1155638397017706, "grad_norm": 1.2778509910901983, "learning_rate": 6.551605108733173e-06, "loss": 1.8054, "step": 2270 }, { "epoch": 2.1155638397017706, "loss_reasoning": 0.5710750818252563, "loss_utility": 0.7642506957054138, "step": 2270 }, { "epoch": 2.1164958061509784, "grad_norm": 1.15482481607739, "learning_rate": 6.544701415257163e-06, "loss": 1.4581, "step": 2271 }, { "epoch": 2.1164958061509784, "loss_reasoning": 0.5132529735565186, "loss_utility": 1.0394434928894043, "step": 2271 }, { "epoch": 2.117427772600186, "grad_norm": 1.0856914457492979, "learning_rate": 6.537797721781154e-06, "loss": 1.6112, "step": 2272 }, { "epoch": 2.117427772600186, "loss_reasoning": 0.4960869550704956, "loss_utility": 1.080674409866333, "step": 2272 }, { "epoch": 2.118359739049394, "grad_norm": 1.108832875036208, "learning_rate": 6.530894028305144e-06, "loss": 1.3374, "step": 2273 }, { "epoch": 2.118359739049394, "loss_reasoning": 0.49121132493019104, "loss_utility": 1.2192045450210571, "step": 2273 }, { "epoch": 2.1192917054986022, "grad_norm": 1.5328082694699825, "learning_rate": 6.523990334829135e-06, "loss": 1.5328, "step": 2274 }, { "epoch": 2.1192917054986022, "loss_reasoning": 0.4217795431613922, "loss_utility": 0.822717547416687, "step": 2274 }, { "epoch": 2.12022367194781, "grad_norm": 0.958950400353667, "learning_rate": 6.517086641353124e-06, "loss": 1.257, "step": 2275 }, { "epoch": 2.12022367194781, "loss_reasoning": 0.46204453706741333, "loss_utility": 0.8767894506454468, "step": 2275 }, { "epoch": 2.121155638397018, "grad_norm": 1.1764633224719, "learning_rate": 6.510182947877115e-06, "loss": 1.5428, "step": 2276 }, { "epoch": 2.121155638397018, "loss_reasoning": 0.5171429514884949, "loss_utility": 1.0342705249786377, "step": 2276 }, { "epoch": 2.1220876048462256, "grad_norm": 1.2247343901128078, "learning_rate": 6.503279254401104e-06, "loss": 1.4971, "step": 2277 }, { "epoch": 2.1220876048462256, "loss_reasoning": 0.5046870708465576, "loss_utility": 0.8753615021705627, "step": 2277 }, { "epoch": 2.1230195712954334, "grad_norm": 1.263534983130791, "learning_rate": 6.496375560925096e-06, "loss": 1.6133, "step": 2278 }, { "epoch": 2.1230195712954334, "loss_reasoning": 0.4743703603744507, "loss_utility": 0.4678727388381958, "step": 2278 }, { "epoch": 2.123951537744641, "grad_norm": 1.273140554473322, "learning_rate": 6.489471867449085e-06, "loss": 1.1892, "step": 2279 }, { "epoch": 2.123951537744641, "loss_reasoning": 0.5010650753974915, "loss_utility": 0.5231244564056396, "step": 2279 }, { "epoch": 2.124883504193849, "grad_norm": 0.895103831083046, "learning_rate": 6.482568173973076e-06, "loss": 1.1804, "step": 2280 }, { "epoch": 2.124883504193849, "loss_reasoning": 0.48234325647354126, "loss_utility": 0.6740903854370117, "step": 2280 }, { "epoch": 2.1258154706430568, "grad_norm": 1.3293885389568232, "learning_rate": 6.475664480497067e-06, "loss": 1.2986, "step": 2281 }, { "epoch": 2.1258154706430568, "loss_reasoning": 0.5415127277374268, "loss_utility": 1.0344107151031494, "step": 2281 }, { "epoch": 2.1267474370922645, "grad_norm": 1.3345372593990703, "learning_rate": 6.468760787021056e-06, "loss": 1.4277, "step": 2282 }, { "epoch": 2.1267474370922645, "loss_reasoning": 0.5103039741516113, "loss_utility": 1.6647415161132812, "step": 2282 }, { "epoch": 2.1276794035414723, "grad_norm": 1.3330341744592829, "learning_rate": 6.461857093545048e-06, "loss": 1.895, "step": 2283 }, { "epoch": 2.1276794035414723, "loss_reasoning": 0.4992626905441284, "loss_utility": 1.088787317276001, "step": 2283 }, { "epoch": 2.12861136999068, "grad_norm": 1.4345307013838273, "learning_rate": 6.454953400069037e-06, "loss": 1.5122, "step": 2284 }, { "epoch": 2.12861136999068, "loss_reasoning": 0.4632297158241272, "loss_utility": 1.1079895496368408, "step": 2284 }, { "epoch": 2.1295433364398884, "grad_norm": 1.0626212559603885, "learning_rate": 6.448049706593028e-06, "loss": 1.2179, "step": 2285 }, { "epoch": 2.1295433364398884, "loss_reasoning": 0.5239306688308716, "loss_utility": 1.3179155588150024, "step": 2285 }, { "epoch": 2.130475302889096, "grad_norm": 1.3471839847886486, "learning_rate": 6.441146013117018e-06, "loss": 1.8761, "step": 2286 }, { "epoch": 2.130475302889096, "loss_reasoning": 0.5148004293441772, "loss_utility": 1.4382396936416626, "step": 2286 }, { "epoch": 2.131407269338304, "grad_norm": 1.2247851000877719, "learning_rate": 6.434242319641008e-06, "loss": 1.5007, "step": 2287 }, { "epoch": 2.131407269338304, "loss_reasoning": 0.42569929361343384, "loss_utility": 0.5266630053520203, "step": 2287 }, { "epoch": 2.1323392357875117, "grad_norm": 1.2175692233828108, "learning_rate": 6.427338626164998e-06, "loss": 1.3311, "step": 2288 }, { "epoch": 2.1323392357875117, "loss_reasoning": 0.4543556571006775, "loss_utility": 1.7133934497833252, "step": 2288 }, { "epoch": 2.1332712022367195, "grad_norm": 1.2054785833927695, "learning_rate": 6.420434932688989e-06, "loss": 1.6388, "step": 2289 }, { "epoch": 2.1332712022367195, "loss_reasoning": 0.4609261453151703, "loss_utility": 1.0989855527877808, "step": 2289 }, { "epoch": 2.1342031686859273, "grad_norm": 1.3387670703491292, "learning_rate": 6.413531239212979e-06, "loss": 1.639, "step": 2290 }, { "epoch": 2.1342031686859273, "loss_reasoning": 0.5429699420928955, "loss_utility": 1.2700496912002563, "step": 2290 }, { "epoch": 2.135135135135135, "grad_norm": 1.1553105033535631, "learning_rate": 6.4066275457369696e-06, "loss": 1.4926, "step": 2291 }, { "epoch": 2.135135135135135, "loss_reasoning": 0.6116613745689392, "loss_utility": 0.704079806804657, "step": 2291 }, { "epoch": 2.136067101584343, "grad_norm": 1.1029175187412998, "learning_rate": 6.39972385226096e-06, "loss": 1.2475, "step": 2292 }, { "epoch": 2.136067101584343, "loss_reasoning": 0.4719087481498718, "loss_utility": 0.6242098808288574, "step": 2292 }, { "epoch": 2.1369990680335507, "grad_norm": 1.1655650630996301, "learning_rate": 6.39282015878495e-06, "loss": 1.6208, "step": 2293 }, { "epoch": 2.1369990680335507, "loss_reasoning": 0.4943462908267975, "loss_utility": 0.6704156398773193, "step": 2293 }, { "epoch": 2.1379310344827585, "grad_norm": 1.1291506207187274, "learning_rate": 6.385916465308941e-06, "loss": 1.417, "step": 2294 }, { "epoch": 2.1379310344827585, "loss_reasoning": 0.464575856924057, "loss_utility": 1.2433226108551025, "step": 2294 }, { "epoch": 2.1388630009319662, "grad_norm": 1.3083100723652705, "learning_rate": 6.379012771832931e-06, "loss": 1.706, "step": 2295 }, { "epoch": 2.1388630009319662, "loss_reasoning": 0.5113272666931152, "loss_utility": 0.7134554386138916, "step": 2295 }, { "epoch": 2.1397949673811745, "grad_norm": 1.2418825215387945, "learning_rate": 6.3721090783569215e-06, "loss": 1.366, "step": 2296 }, { "epoch": 2.1397949673811745, "loss_reasoning": 0.46683362126350403, "loss_utility": 1.1166750192642212, "step": 2296 }, { "epoch": 2.1407269338303823, "grad_norm": 1.5325545049085998, "learning_rate": 6.365205384880911e-06, "loss": 1.6787, "step": 2297 }, { "epoch": 2.1407269338303823, "loss_reasoning": 0.4880945682525635, "loss_utility": 0.5907540321350098, "step": 2297 }, { "epoch": 2.14165890027959, "grad_norm": 1.1368084434636638, "learning_rate": 6.358301691404902e-06, "loss": 1.7301, "step": 2298 }, { "epoch": 2.14165890027959, "loss_reasoning": 0.4646444022655487, "loss_utility": 1.0955065488815308, "step": 2298 }, { "epoch": 2.142590866728798, "grad_norm": 1.2833357902842149, "learning_rate": 6.351397997928892e-06, "loss": 1.4028, "step": 2299 }, { "epoch": 2.142590866728798, "loss_reasoning": 0.539604663848877, "loss_utility": 1.1411464214324951, "step": 2299 }, { "epoch": 2.1435228331780056, "grad_norm": 1.3988750302906205, "learning_rate": 6.344494304452883e-06, "loss": 1.4751, "step": 2300 }, { "epoch": 2.1435228331780056, "loss_reasoning": 0.4874235987663269, "loss_utility": 0.916019082069397, "step": 2300 }, { "epoch": 2.1444547996272134, "grad_norm": 1.3160993312988682, "learning_rate": 6.3375906109768735e-06, "loss": 1.3456, "step": 2301 }, { "epoch": 2.1444547996272134, "loss_reasoning": 0.46960994601249695, "loss_utility": 0.5616455674171448, "step": 2301 }, { "epoch": 2.145386766076421, "grad_norm": 1.1301000235417418, "learning_rate": 6.330686917500863e-06, "loss": 1.3791, "step": 2302 }, { "epoch": 2.145386766076421, "loss_reasoning": 0.5201430320739746, "loss_utility": 0.7055925726890564, "step": 2302 }, { "epoch": 2.146318732525629, "grad_norm": 1.0800130892291984, "learning_rate": 6.323783224024854e-06, "loss": 1.3791, "step": 2303 }, { "epoch": 2.146318732525629, "loss_reasoning": 0.4665859639644623, "loss_utility": 1.8565056324005127, "step": 2303 }, { "epoch": 2.147250698974837, "grad_norm": 1.0348605019602073, "learning_rate": 6.316879530548844e-06, "loss": 1.4262, "step": 2304 }, { "epoch": 2.147250698974837, "loss_reasoning": 0.47993701696395874, "loss_utility": 0.8752344846725464, "step": 2304 }, { "epoch": 2.1481826654240446, "grad_norm": 1.1971096055727115, "learning_rate": 6.309975837072835e-06, "loss": 1.6355, "step": 2305 }, { "epoch": 2.1481826654240446, "loss_reasoning": 0.4692026972770691, "loss_utility": 0.9238253831863403, "step": 2305 }, { "epoch": 2.1491146318732524, "grad_norm": 1.2981596662082724, "learning_rate": 6.303072143596825e-06, "loss": 1.36, "step": 2306 }, { "epoch": 2.1491146318732524, "loss_reasoning": 0.6211017370223999, "loss_utility": 0.9246199727058411, "step": 2306 }, { "epoch": 2.1500465983224606, "grad_norm": 1.0135419504527292, "learning_rate": 6.296168450120815e-06, "loss": 1.3684, "step": 2307 }, { "epoch": 2.1500465983224606, "loss_reasoning": 0.48749038577079773, "loss_utility": 1.5693771839141846, "step": 2307 }, { "epoch": 2.1509785647716684, "grad_norm": 1.2342761614167488, "learning_rate": 6.289264756644805e-06, "loss": 1.5872, "step": 2308 }, { "epoch": 2.1509785647716684, "loss_reasoning": 0.5113637447357178, "loss_utility": 1.1063276529312134, "step": 2308 }, { "epoch": 2.151910531220876, "grad_norm": 1.1831451516725666, "learning_rate": 6.282361063168796e-06, "loss": 1.4271, "step": 2309 }, { "epoch": 2.151910531220876, "loss_reasoning": 0.4090450406074524, "loss_utility": 0.9685585498809814, "step": 2309 }, { "epoch": 2.152842497670084, "grad_norm": 1.0880586428668173, "learning_rate": 6.275457369692786e-06, "loss": 1.4048, "step": 2310 }, { "epoch": 2.152842497670084, "loss_reasoning": 0.49100053310394287, "loss_utility": 0.9039806127548218, "step": 2310 }, { "epoch": 2.1537744641192917, "grad_norm": 1.2054136611414346, "learning_rate": 6.268553676216777e-06, "loss": 1.4662, "step": 2311 }, { "epoch": 2.1537744641192917, "loss_reasoning": 0.5372202396392822, "loss_utility": 1.0669634342193604, "step": 2311 }, { "epoch": 2.1547064305684995, "grad_norm": 1.3647600331420877, "learning_rate": 6.261649982740767e-06, "loss": 1.6455, "step": 2312 }, { "epoch": 2.1547064305684995, "loss_reasoning": 0.5549024343490601, "loss_utility": 1.56705904006958, "step": 2312 }, { "epoch": 2.1556383970177073, "grad_norm": 1.2185974017214967, "learning_rate": 6.254746289264757e-06, "loss": 1.7485, "step": 2313 }, { "epoch": 2.1556383970177073, "loss_reasoning": 0.47359588742256165, "loss_utility": 0.692544162273407, "step": 2313 }, { "epoch": 2.156570363466915, "grad_norm": 1.310072284528485, "learning_rate": 6.247842595788748e-06, "loss": 1.2256, "step": 2314 }, { "epoch": 2.156570363466915, "loss_reasoning": 0.4749091565608978, "loss_utility": 1.0947494506835938, "step": 2314 }, { "epoch": 2.157502329916123, "grad_norm": 1.1802571692493393, "learning_rate": 6.240938902312738e-06, "loss": 1.4307, "step": 2315 }, { "epoch": 2.157502329916123, "loss_reasoning": 0.45414790511131287, "loss_utility": 0.8677874803543091, "step": 2315 }, { "epoch": 2.1584342963653307, "grad_norm": 1.2420410031635203, "learning_rate": 6.2340352088367286e-06, "loss": 1.7621, "step": 2316 }, { "epoch": 2.1584342963653307, "loss_reasoning": 0.4359933137893677, "loss_utility": 0.9343351125717163, "step": 2316 }, { "epoch": 2.1593662628145385, "grad_norm": 1.2910182349853334, "learning_rate": 6.2271315153607184e-06, "loss": 1.4334, "step": 2317 }, { "epoch": 2.1593662628145385, "loss_reasoning": 0.47155529260635376, "loss_utility": 1.196366786956787, "step": 2317 }, { "epoch": 2.1602982292637467, "grad_norm": 1.1967009136902254, "learning_rate": 6.220227821884709e-06, "loss": 1.4645, "step": 2318 }, { "epoch": 2.1602982292637467, "loss_reasoning": 0.49070030450820923, "loss_utility": 0.7531790137290955, "step": 2318 }, { "epoch": 2.1612301957129545, "grad_norm": 1.1357232028975353, "learning_rate": 6.213324128408699e-06, "loss": 1.306, "step": 2319 }, { "epoch": 2.1612301957129545, "loss_reasoning": 0.47918233275413513, "loss_utility": 1.1028671264648438, "step": 2319 }, { "epoch": 2.1621621621621623, "grad_norm": 1.1313259628275103, "learning_rate": 6.20642043493269e-06, "loss": 1.4907, "step": 2320 }, { "epoch": 2.1621621621621623, "loss_reasoning": 0.46090441942214966, "loss_utility": 1.5109493732452393, "step": 2320 }, { "epoch": 2.16309412861137, "grad_norm": 1.5027595981599708, "learning_rate": 6.1995167414566805e-06, "loss": 1.8818, "step": 2321 }, { "epoch": 2.16309412861137, "loss_reasoning": 0.5251190662384033, "loss_utility": 0.633726179599762, "step": 2321 }, { "epoch": 2.164026095060578, "grad_norm": 1.141269387321304, "learning_rate": 6.19261304798067e-06, "loss": 1.5477, "step": 2322 }, { "epoch": 2.164026095060578, "loss_reasoning": 0.43203461170196533, "loss_utility": 0.8222651481628418, "step": 2322 }, { "epoch": 2.1649580615097856, "grad_norm": 1.275324736400795, "learning_rate": 6.185709354504661e-06, "loss": 2.1579, "step": 2323 }, { "epoch": 2.1649580615097856, "loss_reasoning": 0.5302155017852783, "loss_utility": 0.8418658971786499, "step": 2323 }, { "epoch": 2.1658900279589934, "grad_norm": 1.0742468345715452, "learning_rate": 6.178805661028651e-06, "loss": 1.3178, "step": 2324 }, { "epoch": 2.1658900279589934, "loss_reasoning": 0.5398658514022827, "loss_utility": 0.6655288934707642, "step": 2324 }, { "epoch": 2.1668219944082012, "grad_norm": 1.0246566763776548, "learning_rate": 6.171901967552642e-06, "loss": 1.3376, "step": 2325 }, { "epoch": 2.1668219944082012, "loss_reasoning": 0.5410468578338623, "loss_utility": 0.8655960559844971, "step": 2325 }, { "epoch": 2.167753960857409, "grad_norm": 1.321627660158231, "learning_rate": 6.164998274076631e-06, "loss": 1.5428, "step": 2326 }, { "epoch": 2.167753960857409, "loss_reasoning": 0.46647635102272034, "loss_utility": 1.2591288089752197, "step": 2326 }, { "epoch": 2.168685927306617, "grad_norm": 1.207043658021813, "learning_rate": 6.158094580600622e-06, "loss": 1.3996, "step": 2327 }, { "epoch": 2.168685927306617, "loss_reasoning": 0.5256195664405823, "loss_utility": 0.6315851211547852, "step": 2327 }, { "epoch": 2.1696178937558246, "grad_norm": 1.306301322189249, "learning_rate": 6.151190887124611e-06, "loss": 1.3192, "step": 2328 }, { "epoch": 2.1696178937558246, "loss_reasoning": 0.5466975569725037, "loss_utility": 1.040489912033081, "step": 2328 }, { "epoch": 2.170549860205033, "grad_norm": 1.1787645137551794, "learning_rate": 6.144287193648603e-06, "loss": 1.5914, "step": 2329 }, { "epoch": 2.170549860205033, "loss_reasoning": 0.45994943380355835, "loss_utility": 1.8857975006103516, "step": 2329 }, { "epoch": 2.1714818266542406, "grad_norm": 0.9908271945357974, "learning_rate": 6.137383500172592e-06, "loss": 1.6159, "step": 2330 }, { "epoch": 2.1714818266542406, "loss_reasoning": 0.5098881721496582, "loss_utility": 0.9551801681518555, "step": 2330 }, { "epoch": 2.1724137931034484, "grad_norm": 1.0968267863387422, "learning_rate": 6.130479806696583e-06, "loss": 1.7558, "step": 2331 }, { "epoch": 2.1724137931034484, "loss_reasoning": 0.4503839910030365, "loss_utility": 1.0763330459594727, "step": 2331 }, { "epoch": 2.173345759552656, "grad_norm": 1.2139526544730976, "learning_rate": 6.123576113220574e-06, "loss": 1.584, "step": 2332 }, { "epoch": 2.173345759552656, "loss_reasoning": 0.4501517415046692, "loss_utility": 0.8332337737083435, "step": 2332 }, { "epoch": 2.174277726001864, "grad_norm": 1.1664937787420278, "learning_rate": 6.116672419744563e-06, "loss": 1.4424, "step": 2333 }, { "epoch": 2.174277726001864, "loss_reasoning": 0.4829009771347046, "loss_utility": 1.3675001859664917, "step": 2333 }, { "epoch": 2.1752096924510718, "grad_norm": 1.1442364558396205, "learning_rate": 6.109768726268555e-06, "loss": 1.6157, "step": 2334 }, { "epoch": 2.1752096924510718, "loss_reasoning": 0.46861666440963745, "loss_utility": 0.8190953135490417, "step": 2334 }, { "epoch": 2.1761416589002796, "grad_norm": 1.2995924648489963, "learning_rate": 6.102865032792544e-06, "loss": 1.4232, "step": 2335 }, { "epoch": 2.1761416589002796, "loss_reasoning": 0.40149250626564026, "loss_utility": 0.8650026321411133, "step": 2335 }, { "epoch": 2.1770736253494873, "grad_norm": 1.2566989876498162, "learning_rate": 6.095961339316535e-06, "loss": 1.5996, "step": 2336 }, { "epoch": 2.1770736253494873, "loss_reasoning": 0.5178813934326172, "loss_utility": 1.0900237560272217, "step": 2336 }, { "epoch": 2.178005591798695, "grad_norm": 1.1735396334165984, "learning_rate": 6.089057645840525e-06, "loss": 1.349, "step": 2337 }, { "epoch": 2.178005591798695, "loss_reasoning": 0.4698495864868164, "loss_utility": 0.6915380954742432, "step": 2337 }, { "epoch": 2.178937558247903, "grad_norm": 1.0253600068412017, "learning_rate": 6.082153952364515e-06, "loss": 1.3246, "step": 2338 }, { "epoch": 2.178937558247903, "loss_reasoning": 0.4712699055671692, "loss_utility": 0.8359991312026978, "step": 2338 }, { "epoch": 2.1798695246971107, "grad_norm": 1.4369747440936516, "learning_rate": 6.075250258888505e-06, "loss": 1.4717, "step": 2339 }, { "epoch": 2.1798695246971107, "loss_reasoning": 0.44116920232772827, "loss_utility": 0.8285923600196838, "step": 2339 }, { "epoch": 2.180801491146319, "grad_norm": 1.0351150837790244, "learning_rate": 6.068346565412496e-06, "loss": 1.4629, "step": 2340 }, { "epoch": 2.180801491146319, "loss_reasoning": 0.4724685549736023, "loss_utility": 0.8790121078491211, "step": 2340 }, { "epoch": 2.1817334575955267, "grad_norm": 1.237243960430383, "learning_rate": 6.061442871936487e-06, "loss": 1.433, "step": 2341 }, { "epoch": 2.1817334575955267, "loss_reasoning": 0.5562352538108826, "loss_utility": 0.9214814901351929, "step": 2341 }, { "epoch": 2.1826654240447345, "grad_norm": 1.2766817226174902, "learning_rate": 6.054539178460477e-06, "loss": 1.5974, "step": 2342 }, { "epoch": 2.1826654240447345, "loss_reasoning": 0.5124905705451965, "loss_utility": 1.1921234130859375, "step": 2342 }, { "epoch": 2.1835973904939423, "grad_norm": 1.0563683872323315, "learning_rate": 6.047635484984467e-06, "loss": 1.7166, "step": 2343 }, { "epoch": 2.1835973904939423, "loss_reasoning": 0.5545016527175903, "loss_utility": 0.9723868370056152, "step": 2343 }, { "epoch": 2.18452935694315, "grad_norm": 1.103252326779507, "learning_rate": 6.040731791508457e-06, "loss": 1.6336, "step": 2344 }, { "epoch": 2.18452935694315, "loss_reasoning": 0.48083293437957764, "loss_utility": 1.1136142015457153, "step": 2344 }, { "epoch": 2.185461323392358, "grad_norm": 1.1592967835608203, "learning_rate": 6.033828098032448e-06, "loss": 1.5786, "step": 2345 }, { "epoch": 2.185461323392358, "loss_reasoning": 0.4379175007343292, "loss_utility": 0.9943023920059204, "step": 2345 }, { "epoch": 2.1863932898415657, "grad_norm": 1.1698253137354777, "learning_rate": 6.026924404556438e-06, "loss": 1.4898, "step": 2346 }, { "epoch": 2.1863932898415657, "loss_reasoning": 0.4419640302658081, "loss_utility": 0.876374363899231, "step": 2346 }, { "epoch": 2.1873252562907735, "grad_norm": 1.2732372052666443, "learning_rate": 6.0200207110804286e-06, "loss": 1.563, "step": 2347 }, { "epoch": 2.1873252562907735, "loss_reasoning": 0.4305450916290283, "loss_utility": 1.07802414894104, "step": 2347 }, { "epoch": 2.1882572227399812, "grad_norm": 1.0337279820267395, "learning_rate": 6.0131170176044184e-06, "loss": 1.4479, "step": 2348 }, { "epoch": 2.1882572227399812, "loss_reasoning": 0.459655225276947, "loss_utility": 1.1651023626327515, "step": 2348 }, { "epoch": 2.189189189189189, "grad_norm": 1.1118948912005473, "learning_rate": 6.006213324128409e-06, "loss": 1.5316, "step": 2349 }, { "epoch": 2.189189189189189, "loss_reasoning": 0.4975600242614746, "loss_utility": 0.8136420249938965, "step": 2349 }, { "epoch": 2.190121155638397, "grad_norm": 1.1944472367422228, "learning_rate": 5.999309630652399e-06, "loss": 1.3649, "step": 2350 }, { "epoch": 2.190121155638397, "loss_reasoning": 0.47807109355926514, "loss_utility": 1.4609148502349854, "step": 2350 }, { "epoch": 2.191053122087605, "grad_norm": 0.9671149109742458, "learning_rate": 5.99240593717639e-06, "loss": 1.6531, "step": 2351 }, { "epoch": 2.191053122087605, "loss_reasoning": 0.48047712445259094, "loss_utility": 0.9626643657684326, "step": 2351 }, { "epoch": 2.191985088536813, "grad_norm": 1.3186555735977825, "learning_rate": 5.9855022437003805e-06, "loss": 1.9859, "step": 2352 }, { "epoch": 2.191985088536813, "loss_reasoning": 0.5049135088920593, "loss_utility": 0.8127774000167847, "step": 2352 }, { "epoch": 2.1929170549860206, "grad_norm": 1.385309860987672, "learning_rate": 5.97859855022437e-06, "loss": 1.2561, "step": 2353 }, { "epoch": 2.1929170549860206, "loss_reasoning": 0.5738538503646851, "loss_utility": 1.2476449012756348, "step": 2353 }, { "epoch": 2.1938490214352284, "grad_norm": 1.14997573053919, "learning_rate": 5.971694856748361e-06, "loss": 1.5976, "step": 2354 }, { "epoch": 2.1938490214352284, "loss_reasoning": 0.48795655369758606, "loss_utility": 0.807603120803833, "step": 2354 }, { "epoch": 2.194780987884436, "grad_norm": 1.1377779473790661, "learning_rate": 5.964791163272351e-06, "loss": 1.3391, "step": 2355 }, { "epoch": 2.194780987884436, "loss_reasoning": 0.5019688606262207, "loss_utility": 0.8191148042678833, "step": 2355 }, { "epoch": 2.195712954333644, "grad_norm": 1.0668032751743495, "learning_rate": 5.957887469796342e-06, "loss": 1.3734, "step": 2356 }, { "epoch": 2.195712954333644, "loss_reasoning": 0.47366175055503845, "loss_utility": 1.4159427881240845, "step": 2356 }, { "epoch": 2.196644920782852, "grad_norm": 1.2136367899658949, "learning_rate": 5.950983776320332e-06, "loss": 1.8227, "step": 2357 }, { "epoch": 2.196644920782852, "loss_reasoning": 0.43436068296432495, "loss_utility": 1.2004568576812744, "step": 2357 }, { "epoch": 2.1975768872320596, "grad_norm": 1.0982429384883667, "learning_rate": 5.944080082844322e-06, "loss": 1.7061, "step": 2358 }, { "epoch": 2.1975768872320596, "loss_reasoning": 0.49358639121055603, "loss_utility": 0.8890564441680908, "step": 2358 }, { "epoch": 2.1985088536812674, "grad_norm": 1.1902726777483355, "learning_rate": 5.937176389368312e-06, "loss": 1.643, "step": 2359 }, { "epoch": 2.1985088536812674, "loss_reasoning": 0.5121949315071106, "loss_utility": 0.5832816362380981, "step": 2359 }, { "epoch": 2.199440820130475, "grad_norm": 1.3127150899171216, "learning_rate": 5.930272695892303e-06, "loss": 1.6292, "step": 2360 }, { "epoch": 2.199440820130475, "loss_reasoning": 0.4206252992153168, "loss_utility": 1.4181180000305176, "step": 2360 }, { "epoch": 2.200372786579683, "grad_norm": 1.1242364598536354, "learning_rate": 5.923369002416293e-06, "loss": 1.631, "step": 2361 }, { "epoch": 2.200372786579683, "loss_reasoning": 0.4282025992870331, "loss_utility": 0.9047642350196838, "step": 2361 }, { "epoch": 2.201304753028891, "grad_norm": 1.2785812754433363, "learning_rate": 5.916465308940284e-06, "loss": 1.3392, "step": 2362 }, { "epoch": 2.201304753028891, "loss_reasoning": 0.528775691986084, "loss_utility": 0.6353108882904053, "step": 2362 }, { "epoch": 2.202236719478099, "grad_norm": 1.1775266995667133, "learning_rate": 5.909561615464274e-06, "loss": 1.6459, "step": 2363 }, { "epoch": 2.202236719478099, "loss_reasoning": 0.45631808042526245, "loss_utility": 0.9800525307655334, "step": 2363 }, { "epoch": 2.2031686859273067, "grad_norm": 1.2396037863972162, "learning_rate": 5.902657921988264e-06, "loss": 1.608, "step": 2364 }, { "epoch": 2.2031686859273067, "loss_reasoning": 0.5276236534118652, "loss_utility": 0.6630904674530029, "step": 2364 }, { "epoch": 2.2041006523765145, "grad_norm": 1.2985785609693625, "learning_rate": 5.895754228512255e-06, "loss": 1.3675, "step": 2365 }, { "epoch": 2.2041006523765145, "loss_reasoning": 0.4735647737979889, "loss_utility": 0.809302568435669, "step": 2365 }, { "epoch": 2.2050326188257223, "grad_norm": 1.2020878710553098, "learning_rate": 5.888850535036245e-06, "loss": 1.3205, "step": 2366 }, { "epoch": 2.2050326188257223, "loss_reasoning": 0.4709378480911255, "loss_utility": 0.7098022103309631, "step": 2366 }, { "epoch": 2.20596458527493, "grad_norm": 1.2249271222677138, "learning_rate": 5.881946841560236e-06, "loss": 1.4171, "step": 2367 }, { "epoch": 2.20596458527493, "loss_reasoning": 0.5001182556152344, "loss_utility": 1.5692553520202637, "step": 2367 }, { "epoch": 2.206896551724138, "grad_norm": 1.3903652144935192, "learning_rate": 5.8750431480842255e-06, "loss": 1.7954, "step": 2368 }, { "epoch": 2.206896551724138, "loss_reasoning": 0.4715249538421631, "loss_utility": 0.8337379693984985, "step": 2368 }, { "epoch": 2.2078285181733457, "grad_norm": 1.131336982050379, "learning_rate": 5.868139454608216e-06, "loss": 1.2368, "step": 2369 }, { "epoch": 2.2078285181733457, "loss_reasoning": 0.5110853910446167, "loss_utility": 1.1659114360809326, "step": 2369 }, { "epoch": 2.2087604846225535, "grad_norm": 1.2933975581060213, "learning_rate": 5.861235761132206e-06, "loss": 1.5793, "step": 2370 }, { "epoch": 2.2087604846225535, "loss_reasoning": 0.523470401763916, "loss_utility": 0.7913123369216919, "step": 2370 }, { "epoch": 2.2096924510717613, "grad_norm": 1.1592689586646612, "learning_rate": 5.854332067656197e-06, "loss": 1.5079, "step": 2371 }, { "epoch": 2.2096924510717613, "loss_reasoning": 0.45548489689826965, "loss_utility": 0.4742899239063263, "step": 2371 }, { "epoch": 2.210624417520969, "grad_norm": 1.199818153427719, "learning_rate": 5.8474283741801875e-06, "loss": 1.2833, "step": 2372 }, { "epoch": 2.210624417520969, "loss_reasoning": 0.45854178071022034, "loss_utility": 0.8760480880737305, "step": 2372 }, { "epoch": 2.2115563839701773, "grad_norm": 1.16086835204241, "learning_rate": 5.8405246807041774e-06, "loss": 1.6723, "step": 2373 }, { "epoch": 2.2115563839701773, "loss_reasoning": 0.49062395095825195, "loss_utility": 1.1053004264831543, "step": 2373 }, { "epoch": 2.212488350419385, "grad_norm": 1.110299972692347, "learning_rate": 5.833620987228168e-06, "loss": 1.3512, "step": 2374 }, { "epoch": 2.212488350419385, "loss_reasoning": 0.4641997814178467, "loss_utility": 1.359131932258606, "step": 2374 }, { "epoch": 2.213420316868593, "grad_norm": 1.214659613929763, "learning_rate": 5.826717293752158e-06, "loss": 1.4225, "step": 2375 }, { "epoch": 2.213420316868593, "loss_reasoning": 0.46884045004844666, "loss_utility": 0.6667686700820923, "step": 2375 }, { "epoch": 2.2143522833178007, "grad_norm": 1.666079145448483, "learning_rate": 5.819813600276149e-06, "loss": 1.3718, "step": 2376 }, { "epoch": 2.2143522833178007, "loss_reasoning": 0.4943612813949585, "loss_utility": 0.7027885913848877, "step": 2376 }, { "epoch": 2.2152842497670084, "grad_norm": 1.0454032520896335, "learning_rate": 5.812909906800138e-06, "loss": 1.5091, "step": 2377 }, { "epoch": 2.2152842497670084, "loss_reasoning": 0.4400993287563324, "loss_utility": 0.8387097120285034, "step": 2377 }, { "epoch": 2.2162162162162162, "grad_norm": 1.115730104853595, "learning_rate": 5.806006213324129e-06, "loss": 1.3489, "step": 2378 }, { "epoch": 2.2162162162162162, "loss_reasoning": 0.4895252287387848, "loss_utility": 1.0378472805023193, "step": 2378 }, { "epoch": 2.217148182665424, "grad_norm": 1.165816510486978, "learning_rate": 5.7991025198481184e-06, "loss": 1.4983, "step": 2379 }, { "epoch": 2.217148182665424, "loss_reasoning": 0.4885386824607849, "loss_utility": 1.1856358051300049, "step": 2379 }, { "epoch": 2.218080149114632, "grad_norm": 1.0877320048075039, "learning_rate": 5.79219882637211e-06, "loss": 1.3729, "step": 2380 }, { "epoch": 2.218080149114632, "loss_reasoning": 0.49437856674194336, "loss_utility": 1.190138339996338, "step": 2380 }, { "epoch": 2.2190121155638396, "grad_norm": 1.0995577363658093, "learning_rate": 5.785295132896099e-06, "loss": 1.6567, "step": 2381 }, { "epoch": 2.2190121155638396, "loss_reasoning": 0.4617035686969757, "loss_utility": 0.982887327671051, "step": 2381 }, { "epoch": 2.2199440820130474, "grad_norm": 1.5562979131489323, "learning_rate": 5.77839143942009e-06, "loss": 1.7389, "step": 2382 }, { "epoch": 2.2199440820130474, "loss_reasoning": 0.4555831551551819, "loss_utility": 0.9583017230033875, "step": 2382 }, { "epoch": 2.220876048462255, "grad_norm": 1.0097508899896963, "learning_rate": 5.771487745944081e-06, "loss": 1.2288, "step": 2383 }, { "epoch": 2.220876048462255, "loss_reasoning": 0.463340699672699, "loss_utility": 0.7626979947090149, "step": 2383 }, { "epoch": 2.2218080149114634, "grad_norm": 1.3798373473535162, "learning_rate": 5.76458405246807e-06, "loss": 1.2041, "step": 2384 }, { "epoch": 2.2218080149114634, "loss_reasoning": 0.4444688558578491, "loss_utility": 0.8447332382202148, "step": 2384 }, { "epoch": 2.222739981360671, "grad_norm": 1.0866109796496959, "learning_rate": 5.757680358992062e-06, "loss": 1.3066, "step": 2385 }, { "epoch": 2.222739981360671, "loss_reasoning": 0.4166901409626007, "loss_utility": 0.9609568119049072, "step": 2385 }, { "epoch": 2.223671947809879, "grad_norm": 1.0790993858691615, "learning_rate": 5.750776665516051e-06, "loss": 1.6893, "step": 2386 }, { "epoch": 2.223671947809879, "loss_reasoning": 0.5158252120018005, "loss_utility": 1.2231111526489258, "step": 2386 }, { "epoch": 2.2246039142590868, "grad_norm": 1.2990563022822188, "learning_rate": 5.743872972040042e-06, "loss": 1.5743, "step": 2387 }, { "epoch": 2.2246039142590868, "loss_reasoning": 0.46750539541244507, "loss_utility": 1.1929658651351929, "step": 2387 }, { "epoch": 2.2255358807082946, "grad_norm": 1.1199519410940109, "learning_rate": 5.736969278564032e-06, "loss": 1.4601, "step": 2388 }, { "epoch": 2.2255358807082946, "loss_reasoning": 0.5218865871429443, "loss_utility": 1.1223840713500977, "step": 2388 }, { "epoch": 2.2264678471575023, "grad_norm": 1.125401647161579, "learning_rate": 5.730065585088022e-06, "loss": 1.5751, "step": 2389 }, { "epoch": 2.2264678471575023, "loss_reasoning": 0.48331865668296814, "loss_utility": 0.9976156949996948, "step": 2389 }, { "epoch": 2.22739981360671, "grad_norm": 1.0577876504951234, "learning_rate": 5.723161891612012e-06, "loss": 1.2455, "step": 2390 }, { "epoch": 2.22739981360671, "loss_reasoning": 0.4976612627506256, "loss_utility": 0.938386082649231, "step": 2390 }, { "epoch": 2.228331780055918, "grad_norm": 1.0551708406551827, "learning_rate": 5.716258198136003e-06, "loss": 1.3188, "step": 2391 }, { "epoch": 2.228331780055918, "loss_reasoning": 0.48663657903671265, "loss_utility": 0.748233437538147, "step": 2391 }, { "epoch": 2.2292637465051257, "grad_norm": 1.2234888459565332, "learning_rate": 5.709354504659994e-06, "loss": 1.4411, "step": 2392 }, { "epoch": 2.2292637465051257, "loss_reasoning": 0.5624076128005981, "loss_utility": 1.052659273147583, "step": 2392 }, { "epoch": 2.2301957129543335, "grad_norm": 1.276444882004187, "learning_rate": 5.702450811183984e-06, "loss": 1.5122, "step": 2393 }, { "epoch": 2.2301957129543335, "loss_reasoning": 0.48724061250686646, "loss_utility": 0.9542442560195923, "step": 2393 }, { "epoch": 2.2311276794035413, "grad_norm": 1.170503826703795, "learning_rate": 5.695547117707974e-06, "loss": 1.5244, "step": 2394 }, { "epoch": 2.2311276794035413, "loss_reasoning": 0.484033465385437, "loss_utility": 0.8353211879730225, "step": 2394 }, { "epoch": 2.2320596458527495, "grad_norm": 1.0470864875938106, "learning_rate": 5.688643424231964e-06, "loss": 1.2149, "step": 2395 }, { "epoch": 2.2320596458527495, "loss_reasoning": 0.4684339761734009, "loss_utility": 0.9629427790641785, "step": 2395 }, { "epoch": 2.2329916123019573, "grad_norm": 1.4039431029812601, "learning_rate": 5.681739730755955e-06, "loss": 1.3803, "step": 2396 }, { "epoch": 2.2329916123019573, "loss_reasoning": 0.45990872383117676, "loss_utility": 1.101984977722168, "step": 2396 }, { "epoch": 2.233923578751165, "grad_norm": 1.214815399393812, "learning_rate": 5.674836037279945e-06, "loss": 1.4259, "step": 2397 }, { "epoch": 2.233923578751165, "loss_reasoning": 0.47506940364837646, "loss_utility": 0.7873091101646423, "step": 2397 }, { "epoch": 2.234855545200373, "grad_norm": 1.0431157704083935, "learning_rate": 5.667932343803936e-06, "loss": 1.2718, "step": 2398 }, { "epoch": 2.234855545200373, "loss_reasoning": 0.5069454908370972, "loss_utility": 1.1485743522644043, "step": 2398 }, { "epoch": 2.2357875116495807, "grad_norm": 1.2943259080896432, "learning_rate": 5.6610286503279255e-06, "loss": 1.5809, "step": 2399 }, { "epoch": 2.2357875116495807, "loss_reasoning": 0.5136932134628296, "loss_utility": 1.2756445407867432, "step": 2399 }, { "epoch": 2.2367194780987885, "grad_norm": 1.180212053481029, "learning_rate": 5.654124956851916e-06, "loss": 1.7167, "step": 2400 }, { "epoch": 2.2367194780987885, "loss_reasoning": 0.4984762668609619, "loss_utility": 0.8482173681259155, "step": 2400 }, { "epoch": 2.2376514445479962, "grad_norm": 1.1206763564971158, "learning_rate": 5.647221263375906e-06, "loss": 1.7041, "step": 2401 }, { "epoch": 2.2376514445479962, "loss_reasoning": 0.42599689960479736, "loss_utility": 0.8920022249221802, "step": 2401 }, { "epoch": 2.238583410997204, "grad_norm": 1.0137993974912294, "learning_rate": 5.640317569899897e-06, "loss": 1.471, "step": 2402 }, { "epoch": 2.238583410997204, "loss_reasoning": 0.420845091342926, "loss_utility": 1.0362262725830078, "step": 2402 }, { "epoch": 2.239515377446412, "grad_norm": 1.213846583435403, "learning_rate": 5.6334138764238875e-06, "loss": 1.5155, "step": 2403 }, { "epoch": 2.239515377446412, "loss_reasoning": 0.43384039402008057, "loss_utility": 0.46842193603515625, "step": 2403 }, { "epoch": 2.2404473438956196, "grad_norm": 1.2706792688794633, "learning_rate": 5.6265101829478774e-06, "loss": 1.1349, "step": 2404 }, { "epoch": 2.2404473438956196, "loss_reasoning": 0.44105708599090576, "loss_utility": 1.2112419605255127, "step": 2404 }, { "epoch": 2.2413793103448274, "grad_norm": 1.2844882877391286, "learning_rate": 5.619606489471868e-06, "loss": 1.4231, "step": 2405 }, { "epoch": 2.2413793103448274, "loss_reasoning": 0.505223274230957, "loss_utility": 0.5403293371200562, "step": 2405 }, { "epoch": 2.2423112767940356, "grad_norm": 1.1586440498371517, "learning_rate": 5.612702795995858e-06, "loss": 1.3206, "step": 2406 }, { "epoch": 2.2423112767940356, "loss_reasoning": 0.502918004989624, "loss_utility": 1.5707143545150757, "step": 2406 }, { "epoch": 2.2432432432432434, "grad_norm": 1.1551574854786222, "learning_rate": 5.605799102519849e-06, "loss": 1.9034, "step": 2407 }, { "epoch": 2.2432432432432434, "loss_reasoning": 0.4932039678096771, "loss_utility": 1.0409910678863525, "step": 2407 }, { "epoch": 2.244175209692451, "grad_norm": 1.35385212468694, "learning_rate": 5.598895409043839e-06, "loss": 1.3621, "step": 2408 }, { "epoch": 2.244175209692451, "loss_reasoning": 0.5185886025428772, "loss_utility": 1.0767961740493774, "step": 2408 }, { "epoch": 2.245107176141659, "grad_norm": 1.6019969388943749, "learning_rate": 5.591991715567829e-06, "loss": 1.4261, "step": 2409 }, { "epoch": 2.245107176141659, "loss_reasoning": 0.45608043670654297, "loss_utility": 0.5368191599845886, "step": 2409 }, { "epoch": 2.246039142590867, "grad_norm": 1.232443832367544, "learning_rate": 5.585088022091819e-06, "loss": 1.4664, "step": 2410 }, { "epoch": 2.246039142590867, "loss_reasoning": 0.5326711535453796, "loss_utility": 1.2976500988006592, "step": 2410 }, { "epoch": 2.2469711090400746, "grad_norm": 1.5321063916164708, "learning_rate": 5.57818432861581e-06, "loss": 1.4953, "step": 2411 }, { "epoch": 2.2469711090400746, "loss_reasoning": 0.43589892983436584, "loss_utility": 1.5331346988677979, "step": 2411 }, { "epoch": 2.2479030754892824, "grad_norm": 1.115834448403868, "learning_rate": 5.571280635139801e-06, "loss": 1.4513, "step": 2412 }, { "epoch": 2.2479030754892824, "loss_reasoning": 0.48930636048316956, "loss_utility": 1.2131338119506836, "step": 2412 }, { "epoch": 2.24883504193849, "grad_norm": 1.0345261359246294, "learning_rate": 5.564376941663791e-06, "loss": 1.3887, "step": 2413 }, { "epoch": 2.24883504193849, "loss_reasoning": 0.5002294778823853, "loss_utility": 1.0443912744522095, "step": 2413 }, { "epoch": 2.249767008387698, "grad_norm": 1.213289092009116, "learning_rate": 5.557473248187781e-06, "loss": 1.4357, "step": 2414 }, { "epoch": 2.249767008387698, "loss_reasoning": 0.4804479479789734, "loss_utility": 1.0872454643249512, "step": 2414 }, { "epoch": 2.2506989748369057, "grad_norm": 0.9795413332822123, "learning_rate": 5.550569554711771e-06, "loss": 1.4688, "step": 2415 }, { "epoch": 2.2506989748369057, "loss_reasoning": 0.47769951820373535, "loss_utility": 1.051156997680664, "step": 2415 }, { "epoch": 2.2516309412861135, "grad_norm": 1.6843168685519645, "learning_rate": 5.543665861235762e-06, "loss": 1.6223, "step": 2416 }, { "epoch": 2.2516309412861135, "loss_reasoning": 0.5162310004234314, "loss_utility": 1.021318793296814, "step": 2416 }, { "epoch": 2.2525629077353218, "grad_norm": 1.2400129606557735, "learning_rate": 5.536762167759752e-06, "loss": 1.3933, "step": 2417 }, { "epoch": 2.2525629077353218, "loss_reasoning": 0.514495849609375, "loss_utility": 1.1109951734542847, "step": 2417 }, { "epoch": 2.2534948741845295, "grad_norm": 1.363660114595498, "learning_rate": 5.529858474283743e-06, "loss": 1.6275, "step": 2418 }, { "epoch": 2.2534948741845295, "loss_reasoning": 0.414736807346344, "loss_utility": 1.6204062700271606, "step": 2418 }, { "epoch": 2.2544268406337373, "grad_norm": 1.0754824908535052, "learning_rate": 5.5229547808077325e-06, "loss": 1.4221, "step": 2419 }, { "epoch": 2.2544268406337373, "loss_reasoning": 0.44681698083877563, "loss_utility": 1.244074821472168, "step": 2419 }, { "epoch": 2.255358807082945, "grad_norm": 1.2853960222181984, "learning_rate": 5.516051087331723e-06, "loss": 1.3548, "step": 2420 }, { "epoch": 2.255358807082945, "loss_reasoning": 0.5544393062591553, "loss_utility": 0.6061506271362305, "step": 2420 }, { "epoch": 2.256290773532153, "grad_norm": 1.340470200943614, "learning_rate": 5.509147393855713e-06, "loss": 1.2683, "step": 2421 }, { "epoch": 2.256290773532153, "loss_reasoning": 0.49391645193099976, "loss_utility": 0.6770050525665283, "step": 2421 }, { "epoch": 2.2572227399813607, "grad_norm": 1.044615433335498, "learning_rate": 5.502243700379704e-06, "loss": 1.3113, "step": 2422 }, { "epoch": 2.2572227399813607, "loss_reasoning": 0.4975840151309967, "loss_utility": 0.6309880018234253, "step": 2422 }, { "epoch": 2.2581547064305685, "grad_norm": 1.2617497098921955, "learning_rate": 5.4953400069036946e-06, "loss": 1.3123, "step": 2423 }, { "epoch": 2.2581547064305685, "loss_reasoning": 0.5035828351974487, "loss_utility": 1.2398407459259033, "step": 2423 }, { "epoch": 2.2590866728797763, "grad_norm": 1.3313191644958702, "learning_rate": 5.4884363134276845e-06, "loss": 1.5139, "step": 2424 }, { "epoch": 2.2590866728797763, "loss_reasoning": 0.4822203516960144, "loss_utility": 1.040717363357544, "step": 2424 }, { "epoch": 2.260018639328984, "grad_norm": 1.0633580053156695, "learning_rate": 5.481532619951675e-06, "loss": 1.4, "step": 2425 }, { "epoch": 2.260018639328984, "loss_reasoning": 0.45758187770843506, "loss_utility": 1.1971852779388428, "step": 2425 }, { "epoch": 2.260950605778192, "grad_norm": 1.4585923006026522, "learning_rate": 5.474628926475665e-06, "loss": 1.691, "step": 2426 }, { "epoch": 2.260950605778192, "loss_reasoning": 0.4334638714790344, "loss_utility": 1.688934087753296, "step": 2426 }, { "epoch": 2.2618825722273996, "grad_norm": 1.1946832418266657, "learning_rate": 5.467725232999656e-06, "loss": 1.6621, "step": 2427 }, { "epoch": 2.2618825722273996, "loss_reasoning": 0.4977288246154785, "loss_utility": 0.47670453786849976, "step": 2427 }, { "epoch": 2.262814538676608, "grad_norm": 1.120495371629869, "learning_rate": 5.460821539523645e-06, "loss": 1.4931, "step": 2428 }, { "epoch": 2.262814538676608, "loss_reasoning": 0.4704696834087372, "loss_utility": 1.105143427848816, "step": 2428 }, { "epoch": 2.2637465051258157, "grad_norm": 1.2181200321380659, "learning_rate": 5.4539178460476364e-06, "loss": 1.421, "step": 2429 }, { "epoch": 2.2637465051258157, "loss_reasoning": 0.48419755697250366, "loss_utility": 0.6590495109558105, "step": 2429 }, { "epoch": 2.2646784715750234, "grad_norm": 1.053214642341764, "learning_rate": 5.4470141525716255e-06, "loss": 1.1634, "step": 2430 }, { "epoch": 2.2646784715750234, "loss_reasoning": 0.4275347590446472, "loss_utility": 1.2657750844955444, "step": 2430 }, { "epoch": 2.2656104380242312, "grad_norm": 1.0865126682646915, "learning_rate": 5.440110459095617e-06, "loss": 1.376, "step": 2431 }, { "epoch": 2.2656104380242312, "loss_reasoning": 0.5317966938018799, "loss_utility": 0.5886818170547485, "step": 2431 }, { "epoch": 2.266542404473439, "grad_norm": 1.0444688720573798, "learning_rate": 5.433206765619608e-06, "loss": 1.5087, "step": 2432 }, { "epoch": 2.266542404473439, "loss_reasoning": 0.527813196182251, "loss_utility": 1.223527431488037, "step": 2432 }, { "epoch": 2.267474370922647, "grad_norm": 1.1623335605171745, "learning_rate": 5.426303072143597e-06, "loss": 1.4137, "step": 2433 }, { "epoch": 2.267474370922647, "loss_reasoning": 0.5083783268928528, "loss_utility": 1.0530104637145996, "step": 2433 }, { "epoch": 2.2684063373718546, "grad_norm": 1.1867107132188333, "learning_rate": 5.419399378667588e-06, "loss": 1.3777, "step": 2434 }, { "epoch": 2.2684063373718546, "loss_reasoning": 0.5006544589996338, "loss_utility": 1.0564286708831787, "step": 2434 }, { "epoch": 2.2693383038210624, "grad_norm": 1.2151815530534387, "learning_rate": 5.4124956851915774e-06, "loss": 1.568, "step": 2435 }, { "epoch": 2.2693383038210624, "loss_reasoning": 0.44735127687454224, "loss_utility": 1.1417394876480103, "step": 2435 }, { "epoch": 2.27027027027027, "grad_norm": 1.1956396164808596, "learning_rate": 5.405591991715568e-06, "loss": 1.246, "step": 2436 }, { "epoch": 2.27027027027027, "loss_reasoning": 0.4925880432128906, "loss_utility": 1.2180590629577637, "step": 2436 }, { "epoch": 2.271202236719478, "grad_norm": 1.2174282511287566, "learning_rate": 5.398688298239558e-06, "loss": 1.5895, "step": 2437 }, { "epoch": 2.271202236719478, "loss_reasoning": 0.4972629249095917, "loss_utility": 0.48159587383270264, "step": 2437 }, { "epoch": 2.2721342031686858, "grad_norm": 1.0606847875625254, "learning_rate": 5.391784604763549e-06, "loss": 1.1974, "step": 2438 }, { "epoch": 2.2721342031686858, "loss_reasoning": 0.4784505069255829, "loss_utility": 0.9400714635848999, "step": 2438 }, { "epoch": 2.273066169617894, "grad_norm": 1.0545005647076406, "learning_rate": 5.384880911287539e-06, "loss": 1.3025, "step": 2439 }, { "epoch": 2.273066169617894, "loss_reasoning": 0.4979560375213623, "loss_utility": 1.0990527868270874, "step": 2439 }, { "epoch": 2.2739981360671018, "grad_norm": 1.1174739401624916, "learning_rate": 5.377977217811529e-06, "loss": 1.6119, "step": 2440 }, { "epoch": 2.2739981360671018, "loss_reasoning": 0.4683549702167511, "loss_utility": 0.6428400278091431, "step": 2440 }, { "epoch": 2.2749301025163096, "grad_norm": 1.1801999084358945, "learning_rate": 5.371073524335519e-06, "loss": 1.3341, "step": 2441 }, { "epoch": 2.2749301025163096, "loss_reasoning": 0.5199252367019653, "loss_utility": 1.5695658922195435, "step": 2441 }, { "epoch": 2.2758620689655173, "grad_norm": 1.015624478683218, "learning_rate": 5.36416983085951e-06, "loss": 1.5392, "step": 2442 }, { "epoch": 2.2758620689655173, "loss_reasoning": 0.5014446377754211, "loss_utility": 0.8548175096511841, "step": 2442 }, { "epoch": 2.276794035414725, "grad_norm": 1.2218455929467906, "learning_rate": 5.357266137383501e-06, "loss": 1.3555, "step": 2443 }, { "epoch": 2.276794035414725, "loss_reasoning": 0.5302061438560486, "loss_utility": 1.2050137519836426, "step": 2443 }, { "epoch": 2.277726001863933, "grad_norm": 1.0721719420132034, "learning_rate": 5.350362443907491e-06, "loss": 1.5827, "step": 2444 }, { "epoch": 2.277726001863933, "loss_reasoning": 0.4938802421092987, "loss_utility": 1.5372226238250732, "step": 2444 }, { "epoch": 2.2786579683131407, "grad_norm": 1.1472570894736394, "learning_rate": 5.343458750431481e-06, "loss": 1.7505, "step": 2445 }, { "epoch": 2.2786579683131407, "loss_reasoning": 0.46588969230651855, "loss_utility": 1.0855952501296997, "step": 2445 }, { "epoch": 2.2795899347623485, "grad_norm": 1.183443502777934, "learning_rate": 5.336555056955471e-06, "loss": 1.6312, "step": 2446 }, { "epoch": 2.2795899347623485, "loss_reasoning": 0.5262787342071533, "loss_utility": 1.6739156246185303, "step": 2446 }, { "epoch": 2.2805219012115563, "grad_norm": 1.280625840686807, "learning_rate": 5.329651363479462e-06, "loss": 1.7683, "step": 2447 }, { "epoch": 2.2805219012115563, "loss_reasoning": 0.44366905093193054, "loss_utility": 1.2499580383300781, "step": 2447 }, { "epoch": 2.281453867660764, "grad_norm": 1.2500894674450593, "learning_rate": 5.322747670003452e-06, "loss": 1.7442, "step": 2448 }, { "epoch": 2.281453867660764, "loss_reasoning": 0.46890705823898315, "loss_utility": 0.9836519956588745, "step": 2448 }, { "epoch": 2.282385834109972, "grad_norm": 1.4924247763817633, "learning_rate": 5.315843976527443e-06, "loss": 1.5136, "step": 2449 }, { "epoch": 2.282385834109972, "loss_reasoning": 0.5564375519752502, "loss_utility": 2.13985538482666, "step": 2449 }, { "epoch": 2.28331780055918, "grad_norm": 1.247390976081946, "learning_rate": 5.3089402830514325e-06, "loss": 1.744, "step": 2450 }, { "epoch": 2.28331780055918, "loss_reasoning": 0.5349990725517273, "loss_utility": 1.164125680923462, "step": 2450 }, { "epoch": 2.284249767008388, "grad_norm": 1.1256754318120736, "learning_rate": 5.302036589575423e-06, "loss": 1.5832, "step": 2451 }, { "epoch": 2.284249767008388, "loss_reasoning": 0.5235418081283569, "loss_utility": 0.738011360168457, "step": 2451 }, { "epoch": 2.2851817334575957, "grad_norm": 1.216461051510226, "learning_rate": 5.295132896099414e-06, "loss": 1.4293, "step": 2452 }, { "epoch": 2.2851817334575957, "loss_reasoning": 0.504177451133728, "loss_utility": 0.537437379360199, "step": 2452 }, { "epoch": 2.2861136999068035, "grad_norm": 1.0213520914981058, "learning_rate": 5.288229202623404e-06, "loss": 1.3657, "step": 2453 }, { "epoch": 2.2861136999068035, "loss_reasoning": 0.5502578020095825, "loss_utility": 1.0982942581176758, "step": 2453 }, { "epoch": 2.2870456663560113, "grad_norm": 1.1658435352069534, "learning_rate": 5.281325509147395e-06, "loss": 1.6529, "step": 2454 }, { "epoch": 2.2870456663560113, "loss_reasoning": 0.5069083571434021, "loss_utility": 1.382467269897461, "step": 2454 }, { "epoch": 2.287977632805219, "grad_norm": 1.30156896059539, "learning_rate": 5.2744218156713845e-06, "loss": 1.6925, "step": 2455 }, { "epoch": 2.287977632805219, "loss_reasoning": 0.4731481671333313, "loss_utility": 1.0247265100479126, "step": 2455 }, { "epoch": 2.288909599254427, "grad_norm": 1.1056736265339693, "learning_rate": 5.267518122195375e-06, "loss": 1.5802, "step": 2456 }, { "epoch": 2.288909599254427, "loss_reasoning": 0.4937843084335327, "loss_utility": 1.5931191444396973, "step": 2456 }, { "epoch": 2.2898415657036346, "grad_norm": 1.1512528390166168, "learning_rate": 5.260614428719365e-06, "loss": 1.6525, "step": 2457 }, { "epoch": 2.2898415657036346, "loss_reasoning": 0.48151376843452454, "loss_utility": 0.9725824594497681, "step": 2457 }, { "epoch": 2.2907735321528424, "grad_norm": 1.115276139485306, "learning_rate": 5.253710735243356e-06, "loss": 1.3514, "step": 2458 }, { "epoch": 2.2907735321528424, "loss_reasoning": 0.46432775259017944, "loss_utility": 1.3556188344955444, "step": 2458 }, { "epoch": 2.29170549860205, "grad_norm": 1.2847779344515262, "learning_rate": 5.246807041767346e-06, "loss": 1.3528, "step": 2459 }, { "epoch": 2.29170549860205, "loss_reasoning": 0.5331754684448242, "loss_utility": 0.606774091720581, "step": 2459 }, { "epoch": 2.292637465051258, "grad_norm": 1.2208193517937367, "learning_rate": 5.2399033482913364e-06, "loss": 1.3977, "step": 2460 }, { "epoch": 2.292637465051258, "loss_reasoning": 0.5095016956329346, "loss_utility": 0.9851001501083374, "step": 2460 }, { "epoch": 2.293569431500466, "grad_norm": 1.041014526853801, "learning_rate": 5.232999654815326e-06, "loss": 1.4739, "step": 2461 }, { "epoch": 2.293569431500466, "loss_reasoning": 0.4687238335609436, "loss_utility": 0.9322364926338196, "step": 2461 }, { "epoch": 2.294501397949674, "grad_norm": 1.2630098320570307, "learning_rate": 5.226095961339317e-06, "loss": 1.3238, "step": 2462 }, { "epoch": 2.294501397949674, "loss_reasoning": 0.4984278082847595, "loss_utility": 0.6498964428901672, "step": 2462 }, { "epoch": 2.295433364398882, "grad_norm": 1.303799406734973, "learning_rate": 5.219192267863308e-06, "loss": 1.3391, "step": 2463 }, { "epoch": 2.295433364398882, "loss_reasoning": 0.4626286029815674, "loss_utility": 1.0109241008758545, "step": 2463 }, { "epoch": 2.2963653308480896, "grad_norm": 1.0684809696940667, "learning_rate": 5.212288574387298e-06, "loss": 1.4278, "step": 2464 }, { "epoch": 2.2963653308480896, "loss_reasoning": 0.45395979285240173, "loss_utility": 1.7497659921646118, "step": 2464 }, { "epoch": 2.2972972972972974, "grad_norm": 1.1759539842089601, "learning_rate": 5.205384880911288e-06, "loss": 1.7959, "step": 2465 }, { "epoch": 2.2972972972972974, "loss_reasoning": 0.48625102639198303, "loss_utility": 1.040220022201538, "step": 2465 }, { "epoch": 2.298229263746505, "grad_norm": 1.0882834079757242, "learning_rate": 5.198481187435278e-06, "loss": 1.4367, "step": 2466 }, { "epoch": 2.298229263746505, "loss_reasoning": 0.4456250071525574, "loss_utility": 1.253899335861206, "step": 2466 }, { "epoch": 2.299161230195713, "grad_norm": 1.2834936237264882, "learning_rate": 5.191577493959269e-06, "loss": 1.3618, "step": 2467 }, { "epoch": 2.299161230195713, "loss_reasoning": 0.4640769958496094, "loss_utility": 1.0650947093963623, "step": 2467 }, { "epoch": 2.3000931966449207, "grad_norm": 1.2604938947101436, "learning_rate": 5.184673800483259e-06, "loss": 1.6156, "step": 2468 }, { "epoch": 2.3000931966449207, "loss_reasoning": 0.46291816234588623, "loss_utility": 0.41808071732521057, "step": 2468 }, { "epoch": 2.3010251630941285, "grad_norm": 1.0160793457639954, "learning_rate": 5.17777010700725e-06, "loss": 1.4313, "step": 2469 }, { "epoch": 2.3010251630941285, "loss_reasoning": 0.5508185625076294, "loss_utility": 1.1646316051483154, "step": 2469 }, { "epoch": 2.3019571295433363, "grad_norm": 1.2628513061707936, "learning_rate": 5.1708664135312395e-06, "loss": 1.576, "step": 2470 }, { "epoch": 2.3019571295433363, "loss_reasoning": 0.48313820362091064, "loss_utility": 1.0783157348632812, "step": 2470 }, { "epoch": 2.302889095992544, "grad_norm": 1.1298586502916093, "learning_rate": 5.16396272005523e-06, "loss": 1.6088, "step": 2471 }, { "epoch": 2.302889095992544, "loss_reasoning": 0.4969647228717804, "loss_utility": 0.6330310106277466, "step": 2471 }, { "epoch": 2.3038210624417523, "grad_norm": 1.1217986385621963, "learning_rate": 5.15705902657922e-06, "loss": 1.6007, "step": 2472 }, { "epoch": 2.3038210624417523, "loss_reasoning": 0.4995437264442444, "loss_utility": 1.0384600162506104, "step": 2472 }, { "epoch": 2.3047530288909597, "grad_norm": 1.0567894250160899, "learning_rate": 5.150155333103211e-06, "loss": 1.4706, "step": 2473 }, { "epoch": 2.3047530288909597, "loss_reasoning": 0.4785103499889374, "loss_utility": 1.2415916919708252, "step": 2473 }, { "epoch": 2.305684995340168, "grad_norm": 1.278392026088174, "learning_rate": 5.143251639627202e-06, "loss": 1.4298, "step": 2474 }, { "epoch": 2.305684995340168, "loss_reasoning": 0.45856618881225586, "loss_utility": 1.5042979717254639, "step": 2474 }, { "epoch": 2.3066169617893757, "grad_norm": 1.062884245969137, "learning_rate": 5.1363479461511915e-06, "loss": 1.5415, "step": 2475 }, { "epoch": 2.3066169617893757, "loss_reasoning": 0.44289207458496094, "loss_utility": 0.8634268045425415, "step": 2475 }, { "epoch": 2.3075489282385835, "grad_norm": 0.9911390626703592, "learning_rate": 5.129444252675182e-06, "loss": 1.5407, "step": 2476 }, { "epoch": 2.3075489282385835, "loss_reasoning": 0.46459734439849854, "loss_utility": 0.7458148002624512, "step": 2476 }, { "epoch": 2.3084808946877913, "grad_norm": 1.237258667069597, "learning_rate": 5.122540559199172e-06, "loss": 1.4302, "step": 2477 }, { "epoch": 2.3084808946877913, "loss_reasoning": 0.5140770673751831, "loss_utility": 1.1732141971588135, "step": 2477 }, { "epoch": 2.309412861136999, "grad_norm": 1.2388680268567307, "learning_rate": 5.115636865723163e-06, "loss": 1.5137, "step": 2478 }, { "epoch": 2.309412861136999, "loss_reasoning": 0.4944639801979065, "loss_utility": 1.3723219633102417, "step": 2478 }, { "epoch": 2.310344827586207, "grad_norm": 1.2443267252761496, "learning_rate": 5.108733172247152e-06, "loss": 1.4823, "step": 2479 }, { "epoch": 2.310344827586207, "loss_reasoning": 0.5651142597198486, "loss_utility": 0.8270998001098633, "step": 2479 }, { "epoch": 2.3112767940354146, "grad_norm": 1.1431215710036002, "learning_rate": 5.1018294787711435e-06, "loss": 1.1722, "step": 2480 }, { "epoch": 2.3112767940354146, "loss_reasoning": 0.5161172747612, "loss_utility": 1.5220627784729004, "step": 2480 }, { "epoch": 2.3122087604846224, "grad_norm": 1.3293999299749013, "learning_rate": 5.0949257852951325e-06, "loss": 1.7551, "step": 2481 }, { "epoch": 2.3122087604846224, "loss_reasoning": 0.4974629580974579, "loss_utility": 0.597038209438324, "step": 2481 }, { "epoch": 2.31314072693383, "grad_norm": 1.0793195857512716, "learning_rate": 5.088022091819124e-06, "loss": 1.3379, "step": 2482 }, { "epoch": 2.31314072693383, "loss_reasoning": 0.505044162273407, "loss_utility": 0.6151441931724548, "step": 2482 }, { "epoch": 2.3140726933830384, "grad_norm": 1.2250804829954698, "learning_rate": 5.081118398343115e-06, "loss": 1.4234, "step": 2483 }, { "epoch": 2.3140726933830384, "loss_reasoning": 0.45391038060188293, "loss_utility": 0.9294559955596924, "step": 2483 }, { "epoch": 2.315004659832246, "grad_norm": 1.265731871461078, "learning_rate": 5.074214704867104e-06, "loss": 1.4411, "step": 2484 }, { "epoch": 2.315004659832246, "loss_reasoning": 0.4526064991950989, "loss_utility": 1.7609410285949707, "step": 2484 }, { "epoch": 2.315936626281454, "grad_norm": 1.1333272353322368, "learning_rate": 5.0673110113910954e-06, "loss": 1.5694, "step": 2485 }, { "epoch": 2.315936626281454, "loss_reasoning": 0.4581325054168701, "loss_utility": 0.3174476623535156, "step": 2485 }, { "epoch": 2.316868592730662, "grad_norm": 1.2300415295445029, "learning_rate": 5.0604073179150845e-06, "loss": 1.4332, "step": 2486 }, { "epoch": 2.316868592730662, "loss_reasoning": 0.52911776304245, "loss_utility": 0.8152068257331848, "step": 2486 }, { "epoch": 2.3178005591798696, "grad_norm": 1.1115677629784269, "learning_rate": 5.053503624439075e-06, "loss": 1.5853, "step": 2487 }, { "epoch": 2.3178005591798696, "loss_reasoning": 0.4158427119255066, "loss_utility": 0.536098062992096, "step": 2487 }, { "epoch": 2.3187325256290774, "grad_norm": 1.4183765664554209, "learning_rate": 5.046599930963065e-06, "loss": 1.5876, "step": 2488 }, { "epoch": 2.3187325256290774, "loss_reasoning": 0.5171223878860474, "loss_utility": 0.8154398202896118, "step": 2488 }, { "epoch": 2.319664492078285, "grad_norm": 1.3019330407092444, "learning_rate": 5.039696237487056e-06, "loss": 1.4687, "step": 2489 }, { "epoch": 2.319664492078285, "loss_reasoning": 0.46805107593536377, "loss_utility": 1.7172389030456543, "step": 2489 }, { "epoch": 2.320596458527493, "grad_norm": 1.2443518627094716, "learning_rate": 5.032792544011046e-06, "loss": 1.6442, "step": 2490 }, { "epoch": 2.320596458527493, "loss_reasoning": 0.5279219150543213, "loss_utility": 1.3862426280975342, "step": 2490 }, { "epoch": 2.3215284249767008, "grad_norm": 1.191539236090136, "learning_rate": 5.0258888505350364e-06, "loss": 1.9571, "step": 2491 }, { "epoch": 2.3215284249767008, "loss_reasoning": 0.4969901442527771, "loss_utility": 1.1564676761627197, "step": 2491 }, { "epoch": 2.3224603914259085, "grad_norm": 1.1592454869061295, "learning_rate": 5.018985157059026e-06, "loss": 1.6653, "step": 2492 }, { "epoch": 2.3224603914259085, "loss_reasoning": 0.545976996421814, "loss_utility": 1.3134040832519531, "step": 2492 }, { "epoch": 2.3233923578751163, "grad_norm": 1.2073141190174892, "learning_rate": 5.012081463583017e-06, "loss": 1.7126, "step": 2493 }, { "epoch": 2.3233923578751163, "loss_reasoning": 0.4745727777481079, "loss_utility": 0.9872607588768005, "step": 2493 }, { "epoch": 2.3243243243243246, "grad_norm": 1.2124989443047565, "learning_rate": 5.005177770107008e-06, "loss": 1.6642, "step": 2494 }, { "epoch": 2.3243243243243246, "loss_reasoning": 0.495571494102478, "loss_utility": 0.585930585861206, "step": 2494 }, { "epoch": 2.325256290773532, "grad_norm": 1.0645437974726402, "learning_rate": 4.998274076630998e-06, "loss": 1.5612, "step": 2495 }, { "epoch": 2.325256290773532, "loss_reasoning": 0.4931586682796478, "loss_utility": 1.1349940299987793, "step": 2495 }, { "epoch": 2.32618825722274, "grad_norm": 1.1721463373455248, "learning_rate": 4.991370383154988e-06, "loss": 1.3997, "step": 2496 }, { "epoch": 2.32618825722274, "loss_reasoning": 0.452234148979187, "loss_utility": 0.9104220867156982, "step": 2496 }, { "epoch": 2.327120223671948, "grad_norm": 1.2000670285480277, "learning_rate": 4.984466689678978e-06, "loss": 1.3358, "step": 2497 }, { "epoch": 2.327120223671948, "loss_reasoning": 0.5355735421180725, "loss_utility": 1.160886287689209, "step": 2497 }, { "epoch": 2.3280521901211557, "grad_norm": 1.3593692738275815, "learning_rate": 4.977562996202969e-06, "loss": 1.6754, "step": 2498 }, { "epoch": 2.3280521901211557, "loss_reasoning": 0.4772282838821411, "loss_utility": 0.780486524105072, "step": 2498 }, { "epoch": 2.3289841565703635, "grad_norm": 1.37146491574934, "learning_rate": 4.97065930272696e-06, "loss": 1.4303, "step": 2499 }, { "epoch": 2.3289841565703635, "loss_reasoning": 0.4669400453567505, "loss_utility": 0.807898759841919, "step": 2499 }, { "epoch": 2.3299161230195713, "grad_norm": 1.3111322265843428, "learning_rate": 4.96375560925095e-06, "loss": 1.4266, "step": 2500 }, { "epoch": 2.3299161230195713, "loss_reasoning": 0.5562127232551575, "loss_utility": 1.1382853984832764, "step": 2500 }, { "epoch": 2.330848089468779, "grad_norm": 1.4500943461139093, "learning_rate": 4.95685191577494e-06, "loss": 1.4549, "step": 2501 }, { "epoch": 2.330848089468779, "loss_reasoning": 0.44111397862434387, "loss_utility": 0.9099377393722534, "step": 2501 }, { "epoch": 2.331780055917987, "grad_norm": 1.381912465376404, "learning_rate": 4.94994822229893e-06, "loss": 1.5052, "step": 2502 }, { "epoch": 2.331780055917987, "loss_reasoning": 0.47803550958633423, "loss_utility": 0.96115642786026, "step": 2502 }, { "epoch": 2.3327120223671947, "grad_norm": 1.2654574962896035, "learning_rate": 4.943044528822921e-06, "loss": 1.5392, "step": 2503 }, { "epoch": 2.3327120223671947, "loss_reasoning": 0.416581392288208, "loss_utility": 0.8746098279953003, "step": 2503 }, { "epoch": 2.3336439888164024, "grad_norm": 1.4841040505049725, "learning_rate": 4.936140835346911e-06, "loss": 1.3189, "step": 2504 }, { "epoch": 2.3336439888164024, "loss_reasoning": 0.5239876508712769, "loss_utility": 0.8424785137176514, "step": 2504 }, { "epoch": 2.3345759552656107, "grad_norm": 1.1140781667004398, "learning_rate": 4.929237141870902e-06, "loss": 1.443, "step": 2505 }, { "epoch": 2.3345759552656107, "loss_reasoning": 0.5036479234695435, "loss_utility": 1.097888469696045, "step": 2505 }, { "epoch": 2.335507921714818, "grad_norm": 1.3082186918029717, "learning_rate": 4.9223334483948915e-06, "loss": 1.375, "step": 2506 }, { "epoch": 2.335507921714818, "loss_reasoning": 0.4451856315135956, "loss_utility": 1.065748929977417, "step": 2506 }, { "epoch": 2.3364398881640263, "grad_norm": 1.2438173473685556, "learning_rate": 4.915429754918881e-06, "loss": 1.4819, "step": 2507 }, { "epoch": 2.3364398881640263, "loss_reasoning": 0.5485469102859497, "loss_utility": 0.7798258066177368, "step": 2507 }, { "epoch": 2.337371854613234, "grad_norm": 1.340747991799731, "learning_rate": 4.908526061442873e-06, "loss": 1.5384, "step": 2508 }, { "epoch": 2.337371854613234, "loss_reasoning": 0.4984910488128662, "loss_utility": 1.0379676818847656, "step": 2508 }, { "epoch": 2.338303821062442, "grad_norm": 1.1446759907580042, "learning_rate": 4.901622367966863e-06, "loss": 1.7875, "step": 2509 }, { "epoch": 2.338303821062442, "loss_reasoning": 0.519218385219574, "loss_utility": 0.8460686206817627, "step": 2509 }, { "epoch": 2.3392357875116496, "grad_norm": 1.1335553780499283, "learning_rate": 4.894718674490853e-06, "loss": 1.2879, "step": 2510 }, { "epoch": 2.3392357875116496, "loss_reasoning": 0.5003407001495361, "loss_utility": 1.278641700744629, "step": 2510 }, { "epoch": 2.3401677539608574, "grad_norm": 1.3103107788770378, "learning_rate": 4.8878149810148435e-06, "loss": 1.5763, "step": 2511 }, { "epoch": 2.3401677539608574, "loss_reasoning": 0.4926774501800537, "loss_utility": 0.5412858724594116, "step": 2511 }, { "epoch": 2.341099720410065, "grad_norm": 1.143526563757796, "learning_rate": 4.880911287538833e-06, "loss": 1.3787, "step": 2512 }, { "epoch": 2.341099720410065, "loss_reasoning": 0.5011473298072815, "loss_utility": 0.7947349548339844, "step": 2512 }, { "epoch": 2.342031686859273, "grad_norm": 1.1584509177893634, "learning_rate": 4.874007594062824e-06, "loss": 1.2972, "step": 2513 }, { "epoch": 2.342031686859273, "loss_reasoning": 0.5024776458740234, "loss_utility": 0.8194969296455383, "step": 2513 }, { "epoch": 2.3429636533084808, "grad_norm": 1.0444834847846627, "learning_rate": 4.867103900586814e-06, "loss": 1.4529, "step": 2514 }, { "epoch": 2.3429636533084808, "loss_reasoning": 0.5555809736251831, "loss_utility": 1.0430858135223389, "step": 2514 }, { "epoch": 2.3438956197576886, "grad_norm": 1.2830151129606804, "learning_rate": 4.860200207110805e-06, "loss": 1.5188, "step": 2515 }, { "epoch": 2.3438956197576886, "loss_reasoning": 0.4996504485607147, "loss_utility": 1.1550581455230713, "step": 2515 }, { "epoch": 2.344827586206897, "grad_norm": 1.4200278103252764, "learning_rate": 4.853296513634795e-06, "loss": 1.4981, "step": 2516 }, { "epoch": 2.344827586206897, "loss_reasoning": 0.4487534165382385, "loss_utility": 1.30763840675354, "step": 2516 }, { "epoch": 2.345759552656104, "grad_norm": 1.0825737458564175, "learning_rate": 4.846392820158785e-06, "loss": 1.702, "step": 2517 }, { "epoch": 2.345759552656104, "loss_reasoning": 0.5073651075363159, "loss_utility": 1.0872375965118408, "step": 2517 }, { "epoch": 2.3466915191053124, "grad_norm": 1.1068372765486516, "learning_rate": 4.839489126682776e-06, "loss": 1.3608, "step": 2518 }, { "epoch": 2.3466915191053124, "loss_reasoning": 0.47340822219848633, "loss_utility": 0.8858627080917358, "step": 2518 }, { "epoch": 2.34762348555452, "grad_norm": 1.0283444283105931, "learning_rate": 4.832585433206766e-06, "loss": 1.3826, "step": 2519 }, { "epoch": 2.34762348555452, "loss_reasoning": 0.5354960560798645, "loss_utility": 0.7198834419250488, "step": 2519 }, { "epoch": 2.348555452003728, "grad_norm": 1.2260377226967547, "learning_rate": 4.825681739730757e-06, "loss": 1.3504, "step": 2520 }, { "epoch": 2.348555452003728, "loss_reasoning": 0.4290722608566284, "loss_utility": 1.1713457107543945, "step": 2520 }, { "epoch": 2.3494874184529357, "grad_norm": 1.2629650182524954, "learning_rate": 4.8187780462547465e-06, "loss": 1.4633, "step": 2521 }, { "epoch": 2.3494874184529357, "loss_reasoning": 0.46599170565605164, "loss_utility": 0.45056137442588806, "step": 2521 }, { "epoch": 2.3504193849021435, "grad_norm": 1.2296576898177072, "learning_rate": 4.811874352778737e-06, "loss": 1.0784, "step": 2522 }, { "epoch": 2.3504193849021435, "loss_reasoning": 0.4927924573421478, "loss_utility": 1.6363062858581543, "step": 2522 }, { "epoch": 2.3513513513513513, "grad_norm": 1.0406667162896137, "learning_rate": 4.804970659302727e-06, "loss": 1.82, "step": 2523 }, { "epoch": 2.3513513513513513, "loss_reasoning": 0.48427510261535645, "loss_utility": 1.7315963506698608, "step": 2523 }, { "epoch": 2.352283317800559, "grad_norm": 1.2467149021662456, "learning_rate": 4.798066965826718e-06, "loss": 1.7728, "step": 2524 }, { "epoch": 2.352283317800559, "loss_reasoning": 0.5181625485420227, "loss_utility": 1.219838261604309, "step": 2524 }, { "epoch": 2.353215284249767, "grad_norm": 1.0969521030044365, "learning_rate": 4.791163272350708e-06, "loss": 1.6591, "step": 2525 }, { "epoch": 2.353215284249767, "loss_reasoning": 0.4848858416080475, "loss_utility": 1.2133674621582031, "step": 2525 }, { "epoch": 2.3541472506989747, "grad_norm": 1.1368486887310445, "learning_rate": 4.7842595788746985e-06, "loss": 1.3972, "step": 2526 }, { "epoch": 2.3541472506989747, "loss_reasoning": 0.5200018882751465, "loss_utility": 0.7525546550750732, "step": 2526 }, { "epoch": 2.355079217148183, "grad_norm": 1.1405466887319322, "learning_rate": 4.777355885398688e-06, "loss": 1.5503, "step": 2527 }, { "epoch": 2.355079217148183, "loss_reasoning": 0.4685787558555603, "loss_utility": 0.9761059284210205, "step": 2527 }, { "epoch": 2.3560111835973903, "grad_norm": 1.2619848664420925, "learning_rate": 4.770452191922679e-06, "loss": 1.5115, "step": 2528 }, { "epoch": 2.3560111835973903, "loss_reasoning": 0.4559257924556732, "loss_utility": 1.1461149454116821, "step": 2528 }, { "epoch": 2.3569431500465985, "grad_norm": 1.5234999934048636, "learning_rate": 4.76354849844667e-06, "loss": 1.6616, "step": 2529 }, { "epoch": 2.3569431500465985, "loss_reasoning": 0.550538182258606, "loss_utility": 1.2234926223754883, "step": 2529 }, { "epoch": 2.3578751164958063, "grad_norm": 1.3952091995345803, "learning_rate": 4.75664480497066e-06, "loss": 1.6507, "step": 2530 }, { "epoch": 2.3578751164958063, "loss_reasoning": 0.5449852347373962, "loss_utility": 1.074812889099121, "step": 2530 }, { "epoch": 2.358807082945014, "grad_norm": 1.0039389428265322, "learning_rate": 4.7497411114946505e-06, "loss": 1.3386, "step": 2531 }, { "epoch": 2.358807082945014, "loss_reasoning": 0.5172023177146912, "loss_utility": 1.010504126548767, "step": 2531 }, { "epoch": 2.359739049394222, "grad_norm": 1.1723271195924425, "learning_rate": 4.74283741801864e-06, "loss": 1.4676, "step": 2532 }, { "epoch": 2.359739049394222, "loss_reasoning": 0.4672827124595642, "loss_utility": 1.0884451866149902, "step": 2532 }, { "epoch": 2.3606710158434296, "grad_norm": 1.1565201933793845, "learning_rate": 4.73593372454263e-06, "loss": 1.405, "step": 2533 }, { "epoch": 2.3606710158434296, "loss_reasoning": 0.4977337121963501, "loss_utility": 1.522871971130371, "step": 2533 }, { "epoch": 2.3616029822926374, "grad_norm": 1.2801131908872099, "learning_rate": 4.729030031066621e-06, "loss": 1.5942, "step": 2534 }, { "epoch": 2.3616029822926374, "loss_reasoning": 0.4907792806625366, "loss_utility": 0.7462068200111389, "step": 2534 }, { "epoch": 2.362534948741845, "grad_norm": 1.3445180806813886, "learning_rate": 4.722126337590611e-06, "loss": 1.2393, "step": 2535 }, { "epoch": 2.362534948741845, "loss_reasoning": 0.48272791504859924, "loss_utility": 1.1702734231948853, "step": 2535 }, { "epoch": 2.363466915191053, "grad_norm": 1.1746041740663773, "learning_rate": 4.715222644114602e-06, "loss": 1.5336, "step": 2536 }, { "epoch": 2.363466915191053, "loss_reasoning": 0.5037727952003479, "loss_utility": 1.309156894683838, "step": 2536 }, { "epoch": 2.364398881640261, "grad_norm": 1.295069509939006, "learning_rate": 4.7083189506385915e-06, "loss": 1.3723, "step": 2537 }, { "epoch": 2.364398881640261, "loss_reasoning": 0.5125863552093506, "loss_utility": 1.135870337486267, "step": 2537 }, { "epoch": 2.3653308480894686, "grad_norm": 1.4598632710357973, "learning_rate": 4.701415257162582e-06, "loss": 1.4182, "step": 2538 }, { "epoch": 2.3653308480894686, "loss_reasoning": 0.3767363727092743, "loss_utility": 1.5641261339187622, "step": 2538 }, { "epoch": 2.3662628145386764, "grad_norm": 1.1213502751146964, "learning_rate": 4.694511563686573e-06, "loss": 1.5342, "step": 2539 }, { "epoch": 2.3662628145386764, "loss_reasoning": 0.490065336227417, "loss_utility": 1.3423792123794556, "step": 2539 }, { "epoch": 2.3671947809878846, "grad_norm": 1.0674639181119485, "learning_rate": 4.687607870210563e-06, "loss": 1.5435, "step": 2540 }, { "epoch": 2.3671947809878846, "loss_reasoning": 0.4944671392440796, "loss_utility": 1.1848304271697998, "step": 2540 }, { "epoch": 2.3681267474370924, "grad_norm": 1.1266207007267635, "learning_rate": 4.6807041767345536e-06, "loss": 2.0702, "step": 2541 }, { "epoch": 2.3681267474370924, "loss_reasoning": 0.5550961494445801, "loss_utility": 0.8368930816650391, "step": 2541 }, { "epoch": 2.3690587138863, "grad_norm": 1.1958089736409558, "learning_rate": 4.6738004832585435e-06, "loss": 1.2429, "step": 2542 }, { "epoch": 2.3690587138863, "loss_reasoning": 0.5158903002738953, "loss_utility": 1.1680259704589844, "step": 2542 }, { "epoch": 2.369990680335508, "grad_norm": 1.4225150552121377, "learning_rate": 4.666896789782534e-06, "loss": 1.6622, "step": 2543 }, { "epoch": 2.369990680335508, "loss_reasoning": 0.5534125566482544, "loss_utility": 1.1362042427062988, "step": 2543 }, { "epoch": 2.3709226467847158, "grad_norm": 1.134944850051928, "learning_rate": 4.659993096306524e-06, "loss": 1.5158, "step": 2544 }, { "epoch": 2.3709226467847158, "loss_reasoning": 0.5094673037528992, "loss_utility": 0.9261491894721985, "step": 2544 }, { "epoch": 2.3718546132339235, "grad_norm": 1.2018608142756921, "learning_rate": 4.653089402830515e-06, "loss": 1.2685, "step": 2545 }, { "epoch": 2.3718546132339235, "loss_reasoning": 0.43452751636505127, "loss_utility": 1.136096477508545, "step": 2545 }, { "epoch": 2.3727865796831313, "grad_norm": 1.2772953542309167, "learning_rate": 4.646185709354505e-06, "loss": 1.4755, "step": 2546 }, { "epoch": 2.3727865796831313, "loss_reasoning": 0.5401829481124878, "loss_utility": 0.8800625205039978, "step": 2546 }, { "epoch": 2.373718546132339, "grad_norm": 1.1307365939647367, "learning_rate": 4.6392820158784954e-06, "loss": 1.298, "step": 2547 }, { "epoch": 2.373718546132339, "loss_reasoning": 0.4554719924926758, "loss_utility": 1.2372642755508423, "step": 2547 }, { "epoch": 2.374650512581547, "grad_norm": 1.0388820177298659, "learning_rate": 4.632378322402486e-06, "loss": 1.2542, "step": 2548 }, { "epoch": 2.374650512581547, "loss_reasoning": 0.4787488877773285, "loss_utility": 0.9181590676307678, "step": 2548 }, { "epoch": 2.3755824790307547, "grad_norm": 1.2772245521258396, "learning_rate": 4.625474628926476e-06, "loss": 1.5779, "step": 2549 }, { "epoch": 2.3755824790307547, "loss_reasoning": 0.4567049741744995, "loss_utility": 0.9175147414207458, "step": 2549 }, { "epoch": 2.3765144454799625, "grad_norm": 1.069986309706842, "learning_rate": 4.618570935450467e-06, "loss": 1.2084, "step": 2550 }, { "epoch": 2.3765144454799625, "loss_reasoning": 0.47420331835746765, "loss_utility": 1.1260216236114502, "step": 2550 }, { "epoch": 2.3774464119291707, "grad_norm": 1.2196673982922257, "learning_rate": 4.611667241974457e-06, "loss": 1.4703, "step": 2551 }, { "epoch": 2.3774464119291707, "loss_reasoning": 0.45700550079345703, "loss_utility": 0.7101070284843445, "step": 2551 }, { "epoch": 2.3783783783783785, "grad_norm": 1.3093394539896839, "learning_rate": 4.604763548498447e-06, "loss": 1.3332, "step": 2552 }, { "epoch": 2.3783783783783785, "loss_reasoning": 0.4976423978805542, "loss_utility": 0.641834020614624, "step": 2552 }, { "epoch": 2.3793103448275863, "grad_norm": 1.0844494080357452, "learning_rate": 4.597859855022437e-06, "loss": 1.5046, "step": 2553 }, { "epoch": 2.3793103448275863, "loss_reasoning": 0.4626990258693695, "loss_utility": 1.642187476158142, "step": 2553 }, { "epoch": 2.380242311276794, "grad_norm": 1.2469142928492531, "learning_rate": 4.590956161546428e-06, "loss": 1.5842, "step": 2554 }, { "epoch": 2.380242311276794, "loss_reasoning": 0.5072648525238037, "loss_utility": 0.9832566976547241, "step": 2554 }, { "epoch": 2.381174277726002, "grad_norm": 1.3044734868321408, "learning_rate": 4.584052468070418e-06, "loss": 1.484, "step": 2555 }, { "epoch": 2.381174277726002, "loss_reasoning": 0.514059841632843, "loss_utility": 0.8968271017074585, "step": 2555 }, { "epoch": 2.3821062441752097, "grad_norm": 1.11202543958233, "learning_rate": 4.577148774594408e-06, "loss": 1.4436, "step": 2556 }, { "epoch": 2.3821062441752097, "loss_reasoning": 0.547681450843811, "loss_utility": 1.3733524084091187, "step": 2556 }, { "epoch": 2.3830382106244175, "grad_norm": 1.2603807845640207, "learning_rate": 4.5702450811183985e-06, "loss": 1.699, "step": 2557 }, { "epoch": 2.3830382106244175, "loss_reasoning": 0.49285000562667847, "loss_utility": 0.7431966066360474, "step": 2557 }, { "epoch": 2.3839701770736252, "grad_norm": 1.3440445844976934, "learning_rate": 4.563341387642389e-06, "loss": 1.411, "step": 2558 }, { "epoch": 2.3839701770736252, "loss_reasoning": 0.5514662265777588, "loss_utility": 0.934539794921875, "step": 2558 }, { "epoch": 2.384902143522833, "grad_norm": 1.207243829143245, "learning_rate": 4.55643769416638e-06, "loss": 1.3619, "step": 2559 }, { "epoch": 2.384902143522833, "loss_reasoning": 0.533916711807251, "loss_utility": 1.2270139455795288, "step": 2559 }, { "epoch": 2.385834109972041, "grad_norm": 1.2974234634376463, "learning_rate": 4.54953400069037e-06, "loss": 1.4015, "step": 2560 }, { "epoch": 2.385834109972041, "loss_reasoning": 0.5805685520172119, "loss_utility": 0.738688588142395, "step": 2560 }, { "epoch": 2.3867660764212486, "grad_norm": 1.347696034549636, "learning_rate": 4.54263030721436e-06, "loss": 1.5969, "step": 2561 }, { "epoch": 2.3867660764212486, "loss_reasoning": 0.4707726538181305, "loss_utility": 1.2620470523834229, "step": 2561 }, { "epoch": 2.387698042870457, "grad_norm": 1.0624602787441826, "learning_rate": 4.5357266137383505e-06, "loss": 1.2283, "step": 2562 }, { "epoch": 2.387698042870457, "loss_reasoning": 0.5137765407562256, "loss_utility": 1.6728622913360596, "step": 2562 }, { "epoch": 2.3886300093196646, "grad_norm": 1.143559276573376, "learning_rate": 4.52882292026234e-06, "loss": 1.5048, "step": 2563 }, { "epoch": 2.3886300093196646, "loss_reasoning": 0.5079482197761536, "loss_utility": 1.0539605617523193, "step": 2563 }, { "epoch": 2.3895619757688724, "grad_norm": 1.305773723453341, "learning_rate": 4.521919226786331e-06, "loss": 1.5729, "step": 2564 }, { "epoch": 2.3895619757688724, "loss_reasoning": 0.49168288707733154, "loss_utility": 1.1388936042785645, "step": 2564 }, { "epoch": 2.39049394221808, "grad_norm": 1.1621225335313021, "learning_rate": 4.515015533310321e-06, "loss": 1.466, "step": 2565 }, { "epoch": 2.39049394221808, "loss_reasoning": 0.4850132465362549, "loss_utility": 1.6384295225143433, "step": 2565 }, { "epoch": 2.391425908667288, "grad_norm": 1.1489802397453066, "learning_rate": 4.508111839834312e-06, "loss": 1.7169, "step": 2566 }, { "epoch": 2.391425908667288, "loss_reasoning": 0.5114678144454956, "loss_utility": 0.7305351495742798, "step": 2566 }, { "epoch": 2.392357875116496, "grad_norm": 1.0335531623752887, "learning_rate": 4.501208146358302e-06, "loss": 1.2123, "step": 2567 }, { "epoch": 2.392357875116496, "loss_reasoning": 0.49379026889801025, "loss_utility": 1.1418812274932861, "step": 2567 }, { "epoch": 2.3932898415657036, "grad_norm": 1.2609735631158403, "learning_rate": 4.494304452882292e-06, "loss": 1.5081, "step": 2568 }, { "epoch": 2.3932898415657036, "loss_reasoning": 0.5520813465118408, "loss_utility": 0.7308909893035889, "step": 2568 }, { "epoch": 2.3942218080149114, "grad_norm": 1.29756577634993, "learning_rate": 4.487400759406283e-06, "loss": 1.6445, "step": 2569 }, { "epoch": 2.3942218080149114, "loss_reasoning": 0.4949260652065277, "loss_utility": 0.4797379970550537, "step": 2569 }, { "epoch": 2.395153774464119, "grad_norm": 1.1587550174297936, "learning_rate": 4.480497065930273e-06, "loss": 1.3619, "step": 2570 }, { "epoch": 2.395153774464119, "loss_reasoning": 0.4566725492477417, "loss_utility": 0.8695467114448547, "step": 2570 }, { "epoch": 2.396085740913327, "grad_norm": 1.1376358933744581, "learning_rate": 4.473593372454264e-06, "loss": 1.5527, "step": 2571 }, { "epoch": 2.396085740913327, "loss_reasoning": 0.46387168765068054, "loss_utility": 0.8441210985183716, "step": 2571 }, { "epoch": 2.3970177073625347, "grad_norm": 1.1730623615403324, "learning_rate": 4.466689678978254e-06, "loss": 1.7016, "step": 2572 }, { "epoch": 2.3970177073625347, "loss_reasoning": 0.5188227891921997, "loss_utility": 0.4168241620063782, "step": 2572 }, { "epoch": 2.397949673811743, "grad_norm": 1.404105514016671, "learning_rate": 4.459785985502244e-06, "loss": 1.2191, "step": 2573 }, { "epoch": 2.397949673811743, "loss_reasoning": 0.44793105125427246, "loss_utility": 0.9171174764633179, "step": 2573 }, { "epoch": 2.3988816402609507, "grad_norm": 1.3292522773465432, "learning_rate": 4.452882292026234e-06, "loss": 1.385, "step": 2574 }, { "epoch": 2.3988816402609507, "loss_reasoning": 0.48806464672088623, "loss_utility": 1.0964596271514893, "step": 2574 }, { "epoch": 2.3998136067101585, "grad_norm": 1.267634965362115, "learning_rate": 4.445978598550225e-06, "loss": 1.53, "step": 2575 }, { "epoch": 2.3998136067101585, "loss_reasoning": 0.567277729511261, "loss_utility": 0.8832889795303345, "step": 2575 }, { "epoch": 2.4007455731593663, "grad_norm": 1.0908704254911568, "learning_rate": 4.439074905074215e-06, "loss": 1.4415, "step": 2576 }, { "epoch": 2.4007455731593663, "loss_reasoning": 0.4872877895832062, "loss_utility": 1.0594635009765625, "step": 2576 }, { "epoch": 2.401677539608574, "grad_norm": 1.1835265698611885, "learning_rate": 4.4321712115982055e-06, "loss": 1.8175, "step": 2577 }, { "epoch": 2.401677539608574, "loss_reasoning": 0.49167680740356445, "loss_utility": 1.0897676944732666, "step": 2577 }, { "epoch": 2.402609506057782, "grad_norm": 1.2501589134794124, "learning_rate": 4.4252675181221954e-06, "loss": 1.4369, "step": 2578 }, { "epoch": 2.402609506057782, "loss_reasoning": 0.4949447512626648, "loss_utility": 0.956682562828064, "step": 2578 }, { "epoch": 2.4035414725069897, "grad_norm": 1.264210781065913, "learning_rate": 4.418363824646186e-06, "loss": 1.476, "step": 2579 }, { "epoch": 2.4035414725069897, "loss_reasoning": 0.4623398780822754, "loss_utility": 0.7474273443222046, "step": 2579 }, { "epoch": 2.4044734389561975, "grad_norm": 1.3546762737069762, "learning_rate": 4.411460131170177e-06, "loss": 1.6412, "step": 2580 }, { "epoch": 2.4044734389561975, "loss_reasoning": 0.552714467048645, "loss_utility": 0.6858121156692505, "step": 2580 }, { "epoch": 2.4054054054054053, "grad_norm": 1.0269770761208339, "learning_rate": 4.404556437694167e-06, "loss": 1.1335, "step": 2581 }, { "epoch": 2.4054054054054053, "loss_reasoning": 0.45734986662864685, "loss_utility": 0.844044029712677, "step": 2581 }, { "epoch": 2.406337371854613, "grad_norm": 1.1086956554472431, "learning_rate": 4.3976527442181575e-06, "loss": 1.3071, "step": 2582 }, { "epoch": 2.406337371854613, "loss_reasoning": 0.5250961780548096, "loss_utility": 1.008692979812622, "step": 2582 }, { "epoch": 2.407269338303821, "grad_norm": 1.2501747154959635, "learning_rate": 4.390749050742147e-06, "loss": 1.4722, "step": 2583 }, { "epoch": 2.407269338303821, "loss_reasoning": 0.4963870346546173, "loss_utility": 0.711496114730835, "step": 2583 }, { "epoch": 2.408201304753029, "grad_norm": 1.470196251213617, "learning_rate": 4.383845357266137e-06, "loss": 1.4243, "step": 2584 }, { "epoch": 2.408201304753029, "loss_reasoning": 0.5170688629150391, "loss_utility": 0.8490813970565796, "step": 2584 }, { "epoch": 2.409133271202237, "grad_norm": 1.2609765803642303, "learning_rate": 4.376941663790128e-06, "loss": 1.5161, "step": 2585 }, { "epoch": 2.409133271202237, "loss_reasoning": 0.5144667029380798, "loss_utility": 0.987678050994873, "step": 2585 }, { "epoch": 2.4100652376514446, "grad_norm": 1.3104886547238532, "learning_rate": 4.370037970314118e-06, "loss": 1.4202, "step": 2586 }, { "epoch": 2.4100652376514446, "loss_reasoning": 0.42351487278938293, "loss_utility": 0.7072133421897888, "step": 2586 }, { "epoch": 2.4109972041006524, "grad_norm": 1.1139269945722934, "learning_rate": 4.363134276838109e-06, "loss": 1.572, "step": 2587 }, { "epoch": 2.4109972041006524, "loss_reasoning": 0.4999883770942688, "loss_utility": 0.6502177715301514, "step": 2587 }, { "epoch": 2.4119291705498602, "grad_norm": 1.101428487838841, "learning_rate": 4.3562305833620985e-06, "loss": 1.3275, "step": 2588 }, { "epoch": 2.4119291705498602, "loss_reasoning": 0.49216294288635254, "loss_utility": 0.8840352892875671, "step": 2588 }, { "epoch": 2.412861136999068, "grad_norm": 1.1033492576589754, "learning_rate": 4.349326889886089e-06, "loss": 1.4309, "step": 2589 }, { "epoch": 2.412861136999068, "loss_reasoning": 0.48409706354141235, "loss_utility": 0.594814121723175, "step": 2589 }, { "epoch": 2.413793103448276, "grad_norm": 1.258706380819311, "learning_rate": 4.34242319641008e-06, "loss": 1.2642, "step": 2590 }, { "epoch": 2.413793103448276, "loss_reasoning": 0.43696945905685425, "loss_utility": 1.223867654800415, "step": 2590 }, { "epoch": 2.4147250698974836, "grad_norm": 1.358886516313383, "learning_rate": 4.33551950293407e-06, "loss": 1.6035, "step": 2591 }, { "epoch": 2.4147250698974836, "loss_reasoning": 0.4308582544326782, "loss_utility": 1.1420645713806152, "step": 2591 }, { "epoch": 2.4156570363466914, "grad_norm": 1.2402103947464924, "learning_rate": 4.328615809458061e-06, "loss": 1.3342, "step": 2592 }, { "epoch": 2.4156570363466914, "loss_reasoning": 0.5325602889060974, "loss_utility": 1.4031524658203125, "step": 2592 }, { "epoch": 2.416589002795899, "grad_norm": 1.1123474735750734, "learning_rate": 4.3217121159820505e-06, "loss": 1.6626, "step": 2593 }, { "epoch": 2.416589002795899, "loss_reasoning": 0.5150107145309448, "loss_utility": 1.1087243556976318, "step": 2593 }, { "epoch": 2.417520969245107, "grad_norm": 1.082500477277367, "learning_rate": 4.314808422506041e-06, "loss": 1.315, "step": 2594 }, { "epoch": 2.417520969245107, "loss_reasoning": 0.43513545393943787, "loss_utility": 1.7624133825302124, "step": 2594 }, { "epoch": 2.418452935694315, "grad_norm": 1.2780148306075707, "learning_rate": 4.307904729030031e-06, "loss": 1.8, "step": 2595 }, { "epoch": 2.418452935694315, "loss_reasoning": 0.535961389541626, "loss_utility": 0.790107011795044, "step": 2595 }, { "epoch": 2.419384902143523, "grad_norm": 1.1098573416308672, "learning_rate": 4.301001035554022e-06, "loss": 1.2661, "step": 2596 }, { "epoch": 2.419384902143523, "loss_reasoning": 0.5638278722763062, "loss_utility": 0.8605947494506836, "step": 2596 }, { "epoch": 2.4203168685927308, "grad_norm": 1.0141635071714428, "learning_rate": 4.294097342078012e-06, "loss": 1.5362, "step": 2597 }, { "epoch": 2.4203168685927308, "loss_reasoning": 0.508808970451355, "loss_utility": 1.309438943862915, "step": 2597 }, { "epoch": 2.4212488350419386, "grad_norm": 1.343221058211101, "learning_rate": 4.2871936486020025e-06, "loss": 1.6244, "step": 2598 }, { "epoch": 2.4212488350419386, "loss_reasoning": 0.4321097135543823, "loss_utility": 0.8106111288070679, "step": 2598 }, { "epoch": 2.4221808014911463, "grad_norm": 1.264625877880273, "learning_rate": 4.280289955125993e-06, "loss": 1.4073, "step": 2599 }, { "epoch": 2.4221808014911463, "loss_reasoning": 0.46587586402893066, "loss_utility": 1.1452581882476807, "step": 2599 }, { "epoch": 2.423112767940354, "grad_norm": 1.2185790862988335, "learning_rate": 4.273386261649983e-06, "loss": 1.5048, "step": 2600 }, { "epoch": 2.423112767940354, "loss_reasoning": 0.48553645610809326, "loss_utility": 0.6032605767250061, "step": 2600 }, { "epoch": 2.424044734389562, "grad_norm": 1.8435513005848108, "learning_rate": 4.266482568173974e-06, "loss": 1.7383, "step": 2601 }, { "epoch": 2.424044734389562, "loss_reasoning": 0.48380762338638306, "loss_utility": 1.223702073097229, "step": 2601 }, { "epoch": 2.4249767008387697, "grad_norm": 1.2458860506579332, "learning_rate": 4.259578874697964e-06, "loss": 1.4734, "step": 2602 }, { "epoch": 2.4249767008387697, "loss_reasoning": 0.5036787986755371, "loss_utility": 1.190819501876831, "step": 2602 }, { "epoch": 2.4259086672879775, "grad_norm": 1.0558442456487573, "learning_rate": 4.2526751812219544e-06, "loss": 1.2815, "step": 2603 }, { "epoch": 2.4259086672879775, "loss_reasoning": 0.504837691783905, "loss_utility": 0.908928394317627, "step": 2603 }, { "epoch": 2.4268406337371853, "grad_norm": 1.145508588515986, "learning_rate": 4.245771487745944e-06, "loss": 1.3684, "step": 2604 }, { "epoch": 2.4268406337371853, "loss_reasoning": 0.5009395480155945, "loss_utility": 1.4755853414535522, "step": 2604 }, { "epoch": 2.427772600186393, "grad_norm": 1.1202126918613473, "learning_rate": 4.238867794269935e-06, "loss": 1.5393, "step": 2605 }, { "epoch": 2.427772600186393, "loss_reasoning": 0.5465750694274902, "loss_utility": 1.5730788707733154, "step": 2605 }, { "epoch": 2.4287045666356013, "grad_norm": 1.2177896832776482, "learning_rate": 4.231964100793925e-06, "loss": 1.6489, "step": 2606 }, { "epoch": 2.4287045666356013, "loss_reasoning": 0.46980178356170654, "loss_utility": 1.3599615097045898, "step": 2606 }, { "epoch": 2.429636533084809, "grad_norm": 1.1872208170928547, "learning_rate": 4.225060407317915e-06, "loss": 1.5036, "step": 2607 }, { "epoch": 2.429636533084809, "loss_reasoning": 0.49923837184906006, "loss_utility": 1.2583367824554443, "step": 2607 }, { "epoch": 2.430568499534017, "grad_norm": 1.4468956202190428, "learning_rate": 4.2181567138419055e-06, "loss": 1.6599, "step": 2608 }, { "epoch": 2.430568499534017, "loss_reasoning": 0.45299452543258667, "loss_utility": 0.6761304140090942, "step": 2608 }, { "epoch": 2.4315004659832247, "grad_norm": 0.9903036980905938, "learning_rate": 4.211253020365896e-06, "loss": 1.3104, "step": 2609 }, { "epoch": 2.4315004659832247, "loss_reasoning": 0.477797269821167, "loss_utility": 0.5371672511100769, "step": 2609 }, { "epoch": 2.4324324324324325, "grad_norm": 1.3130504863144288, "learning_rate": 4.204349326889887e-06, "loss": 1.6226, "step": 2610 }, { "epoch": 2.4324324324324325, "loss_reasoning": 0.47349897027015686, "loss_utility": 2.11080265045166, "step": 2610 }, { "epoch": 2.4333643988816402, "grad_norm": 1.1591659592024497, "learning_rate": 4.197445633413877e-06, "loss": 1.6444, "step": 2611 }, { "epoch": 2.4333643988816402, "loss_reasoning": 0.5100679397583008, "loss_utility": 0.8168165683746338, "step": 2611 }, { "epoch": 2.434296365330848, "grad_norm": 1.2192043594475201, "learning_rate": 4.190541939937867e-06, "loss": 1.4946, "step": 2612 }, { "epoch": 2.434296365330848, "loss_reasoning": 0.42554783821105957, "loss_utility": 0.5393787622451782, "step": 2612 }, { "epoch": 2.435228331780056, "grad_norm": 1.1068896909647747, "learning_rate": 4.1836382464618575e-06, "loss": 1.2304, "step": 2613 }, { "epoch": 2.435228331780056, "loss_reasoning": 0.49931347370147705, "loss_utility": 0.7594591975212097, "step": 2613 }, { "epoch": 2.4361602982292636, "grad_norm": 1.13082577869441, "learning_rate": 4.176734552985847e-06, "loss": 1.3257, "step": 2614 }, { "epoch": 2.4361602982292636, "loss_reasoning": 0.5214176774024963, "loss_utility": 0.9882293939590454, "step": 2614 }, { "epoch": 2.4370922646784714, "grad_norm": 1.2330556703268032, "learning_rate": 4.169830859509838e-06, "loss": 1.4664, "step": 2615 }, { "epoch": 2.4370922646784714, "loss_reasoning": 0.5053625106811523, "loss_utility": 0.6667697429656982, "step": 2615 }, { "epoch": 2.438024231127679, "grad_norm": 1.1655491549224999, "learning_rate": 4.162927166033828e-06, "loss": 1.2018, "step": 2616 }, { "epoch": 2.438024231127679, "loss_reasoning": 0.4265976846218109, "loss_utility": 1.263250470161438, "step": 2616 }, { "epoch": 2.4389561975768874, "grad_norm": 1.3535841618142883, "learning_rate": 4.156023472557819e-06, "loss": 1.6412, "step": 2617 }, { "epoch": 2.4389561975768874, "loss_reasoning": 0.4756677746772766, "loss_utility": 0.7144763469696045, "step": 2617 }, { "epoch": 2.439888164026095, "grad_norm": 1.222698420196986, "learning_rate": 4.149119779081809e-06, "loss": 1.6078, "step": 2618 }, { "epoch": 2.439888164026095, "loss_reasoning": 0.5044302344322205, "loss_utility": 0.5499176383018494, "step": 2618 }, { "epoch": 2.440820130475303, "grad_norm": 1.306280387588891, "learning_rate": 4.142216085605799e-06, "loss": 1.5803, "step": 2619 }, { "epoch": 2.440820130475303, "loss_reasoning": 0.5021287202835083, "loss_utility": 0.9208494424819946, "step": 2619 }, { "epoch": 2.441752096924511, "grad_norm": 1.3022078015431238, "learning_rate": 4.13531239212979e-06, "loss": 1.4523, "step": 2620 }, { "epoch": 2.441752096924511, "loss_reasoning": 0.5037083625793457, "loss_utility": 0.5424047708511353, "step": 2620 }, { "epoch": 2.4426840633737186, "grad_norm": 1.114697891788754, "learning_rate": 4.12840869865378e-06, "loss": 1.4589, "step": 2621 }, { "epoch": 2.4426840633737186, "loss_reasoning": 0.46244797110557556, "loss_utility": 1.0908292531967163, "step": 2621 }, { "epoch": 2.4436160298229264, "grad_norm": 1.1557851861219002, "learning_rate": 4.121505005177771e-06, "loss": 1.4271, "step": 2622 }, { "epoch": 2.4436160298229264, "loss_reasoning": 0.45345237851142883, "loss_utility": 0.6986063122749329, "step": 2622 }, { "epoch": 2.444547996272134, "grad_norm": 1.1759318461155812, "learning_rate": 4.114601311701761e-06, "loss": 1.4931, "step": 2623 }, { "epoch": 2.444547996272134, "loss_reasoning": 0.5191074013710022, "loss_utility": 0.564383327960968, "step": 2623 }, { "epoch": 2.445479962721342, "grad_norm": 1.0477976447979316, "learning_rate": 4.107697618225751e-06, "loss": 1.2233, "step": 2624 }, { "epoch": 2.445479962721342, "loss_reasoning": 0.47395583987236023, "loss_utility": 1.509088397026062, "step": 2624 }, { "epoch": 2.4464119291705497, "grad_norm": 1.2240441145203893, "learning_rate": 4.100793924749741e-06, "loss": 1.6398, "step": 2625 }, { "epoch": 2.4464119291705497, "loss_reasoning": 0.45623549818992615, "loss_utility": 0.9529010057449341, "step": 2625 }, { "epoch": 2.4473438956197575, "grad_norm": 1.077723458658078, "learning_rate": 4.093890231273732e-06, "loss": 1.4091, "step": 2626 }, { "epoch": 2.4473438956197575, "loss_reasoning": 0.4928222894668579, "loss_utility": 1.172088384628296, "step": 2626 }, { "epoch": 2.4482758620689653, "grad_norm": 1.3479806950419888, "learning_rate": 4.086986537797722e-06, "loss": 1.5768, "step": 2627 }, { "epoch": 2.4482758620689653, "loss_reasoning": 0.5306291580200195, "loss_utility": 0.888891339302063, "step": 2627 }, { "epoch": 2.4492078285181735, "grad_norm": 1.2354944050947545, "learning_rate": 4.0800828443217126e-06, "loss": 1.3103, "step": 2628 }, { "epoch": 2.4492078285181735, "loss_reasoning": 0.434635192155838, "loss_utility": 0.489699125289917, "step": 2628 }, { "epoch": 2.4501397949673813, "grad_norm": 1.1222881638511122, "learning_rate": 4.073179150845703e-06, "loss": 1.2815, "step": 2629 }, { "epoch": 2.4501397949673813, "loss_reasoning": 0.482199102640152, "loss_utility": 0.6328914761543274, "step": 2629 }, { "epoch": 2.451071761416589, "grad_norm": 1.266310959028195, "learning_rate": 4.066275457369693e-06, "loss": 1.6486, "step": 2630 }, { "epoch": 2.451071761416589, "loss_reasoning": 0.5238840579986572, "loss_utility": 1.824249267578125, "step": 2630 }, { "epoch": 2.452003727865797, "grad_norm": 1.115810566004046, "learning_rate": 4.059371763893684e-06, "loss": 1.6353, "step": 2631 }, { "epoch": 2.452003727865797, "loss_reasoning": 0.45039063692092896, "loss_utility": 1.123892903327942, "step": 2631 }, { "epoch": 2.4529356943150047, "grad_norm": 1.1178974407414954, "learning_rate": 4.052468070417674e-06, "loss": 1.3305, "step": 2632 }, { "epoch": 2.4529356943150047, "loss_reasoning": 0.518867015838623, "loss_utility": 1.2363131046295166, "step": 2632 }, { "epoch": 2.4538676607642125, "grad_norm": 1.2507269448178242, "learning_rate": 4.0455643769416645e-06, "loss": 1.5428, "step": 2633 }, { "epoch": 2.4538676607642125, "loss_reasoning": 0.5150942802429199, "loss_utility": 0.9426010251045227, "step": 2633 }, { "epoch": 2.4547996272134203, "grad_norm": 1.0216652312081795, "learning_rate": 4.0386606834656544e-06, "loss": 1.4364, "step": 2634 }, { "epoch": 2.4547996272134203, "loss_reasoning": 0.5077718496322632, "loss_utility": 1.022294521331787, "step": 2634 }, { "epoch": 2.455731593662628, "grad_norm": 1.2393391007911838, "learning_rate": 4.031756989989644e-06, "loss": 1.4874, "step": 2635 }, { "epoch": 2.455731593662628, "loss_reasoning": 0.4181564450263977, "loss_utility": 1.8050405979156494, "step": 2635 }, { "epoch": 2.456663560111836, "grad_norm": 1.228644947468823, "learning_rate": 4.024853296513635e-06, "loss": 1.7384, "step": 2636 }, { "epoch": 2.456663560111836, "loss_reasoning": 0.47379255294799805, "loss_utility": 1.176064372062683, "step": 2636 }, { "epoch": 2.4575955265610436, "grad_norm": 1.3404553408782909, "learning_rate": 4.017949603037625e-06, "loss": 1.5247, "step": 2637 }, { "epoch": 2.4575955265610436, "loss_reasoning": 0.555799126625061, "loss_utility": 1.0554020404815674, "step": 2637 }, { "epoch": 2.4585274930102514, "grad_norm": 1.514701555201612, "learning_rate": 4.011045909561616e-06, "loss": 1.319, "step": 2638 }, { "epoch": 2.4585274930102514, "loss_reasoning": 0.5221245884895325, "loss_utility": 0.8380944132804871, "step": 2638 }, { "epoch": 2.4594594594594597, "grad_norm": 1.0211074420291386, "learning_rate": 4.004142216085606e-06, "loss": 1.4419, "step": 2639 }, { "epoch": 2.4594594594594597, "loss_reasoning": 0.4222759008407593, "loss_utility": 1.1851208209991455, "step": 2639 }, { "epoch": 2.4603914259086674, "grad_norm": 1.4017980062650577, "learning_rate": 3.997238522609596e-06, "loss": 1.4311, "step": 2640 }, { "epoch": 2.4603914259086674, "loss_reasoning": 0.4801224172115326, "loss_utility": 0.46192800998687744, "step": 2640 }, { "epoch": 2.4613233923578752, "grad_norm": 1.1631493083474709, "learning_rate": 3.990334829133587e-06, "loss": 1.5564, "step": 2641 }, { "epoch": 2.4613233923578752, "loss_reasoning": 0.4746457040309906, "loss_utility": 0.6774663925170898, "step": 2641 }, { "epoch": 2.462255358807083, "grad_norm": 1.1862577350925476, "learning_rate": 3.983431135657577e-06, "loss": 1.2937, "step": 2642 }, { "epoch": 2.462255358807083, "loss_reasoning": 0.45315489172935486, "loss_utility": 1.0432631969451904, "step": 2642 }, { "epoch": 2.463187325256291, "grad_norm": 1.0436941044609611, "learning_rate": 3.976527442181568e-06, "loss": 1.47, "step": 2643 }, { "epoch": 2.463187325256291, "loss_reasoning": 0.563163697719574, "loss_utility": 1.0281163454055786, "step": 2643 }, { "epoch": 2.4641192917054986, "grad_norm": 1.0815870622081312, "learning_rate": 3.9696237487055575e-06, "loss": 1.4712, "step": 2644 }, { "epoch": 2.4641192917054986, "loss_reasoning": 0.39690646529197693, "loss_utility": 1.2029294967651367, "step": 2644 }, { "epoch": 2.4650512581547064, "grad_norm": 1.3175121799280334, "learning_rate": 3.962720055229548e-06, "loss": 1.4776, "step": 2645 }, { "epoch": 2.4650512581547064, "loss_reasoning": 0.4971380829811096, "loss_utility": 0.6754223108291626, "step": 2645 }, { "epoch": 2.465983224603914, "grad_norm": 1.2627931147439375, "learning_rate": 3.955816361753538e-06, "loss": 1.3927, "step": 2646 }, { "epoch": 2.465983224603914, "loss_reasoning": 0.49354100227355957, "loss_utility": 0.7305111289024353, "step": 2646 }, { "epoch": 2.466915191053122, "grad_norm": 0.9973593017100236, "learning_rate": 3.948912668277529e-06, "loss": 1.3685, "step": 2647 }, { "epoch": 2.466915191053122, "loss_reasoning": 0.5186005234718323, "loss_utility": 0.9825685620307922, "step": 2647 }, { "epoch": 2.4678471575023297, "grad_norm": 1.297315668061093, "learning_rate": 3.942008974801519e-06, "loss": 1.3068, "step": 2648 }, { "epoch": 2.4678471575023297, "loss_reasoning": 0.47145065665245056, "loss_utility": 0.9749908447265625, "step": 2648 }, { "epoch": 2.4687791239515375, "grad_norm": 1.099473440243206, "learning_rate": 3.9351052813255095e-06, "loss": 1.4236, "step": 2649 }, { "epoch": 2.4687791239515375, "loss_reasoning": 0.5423617362976074, "loss_utility": 0.49142512679100037, "step": 2649 }, { "epoch": 2.4697110904007458, "grad_norm": 1.0670823464657178, "learning_rate": 3.9282015878495e-06, "loss": 1.2456, "step": 2650 }, { "epoch": 2.4697110904007458, "loss_reasoning": 0.45141124725341797, "loss_utility": 1.3474546670913696, "step": 2650 }, { "epoch": 2.4706430568499536, "grad_norm": 1.3284596255536305, "learning_rate": 3.92129789437349e-06, "loss": 1.4677, "step": 2651 }, { "epoch": 2.4706430568499536, "loss_reasoning": 0.5125772953033447, "loss_utility": 1.297008752822876, "step": 2651 }, { "epoch": 2.4715750232991613, "grad_norm": 1.0976119309630163, "learning_rate": 3.914394200897481e-06, "loss": 1.5116, "step": 2652 }, { "epoch": 2.4715750232991613, "loss_reasoning": 0.5057238340377808, "loss_utility": 0.5208536386489868, "step": 2652 }, { "epoch": 2.472506989748369, "grad_norm": 1.159423904975747, "learning_rate": 3.907490507421471e-06, "loss": 1.5321, "step": 2653 }, { "epoch": 2.472506989748369, "loss_reasoning": 0.4968550205230713, "loss_utility": 0.8782402873039246, "step": 2653 }, { "epoch": 2.473438956197577, "grad_norm": 1.2611065888901596, "learning_rate": 3.9005868139454615e-06, "loss": 1.4046, "step": 2654 }, { "epoch": 2.473438956197577, "loss_reasoning": 0.45210814476013184, "loss_utility": 0.8293066024780273, "step": 2654 }, { "epoch": 2.4743709226467847, "grad_norm": 1.1121123933266548, "learning_rate": 3.893683120469451e-06, "loss": 1.7812, "step": 2655 }, { "epoch": 2.4743709226467847, "loss_reasoning": 0.4879489243030548, "loss_utility": 1.2579823732376099, "step": 2655 }, { "epoch": 2.4753028890959925, "grad_norm": 1.2818677979638613, "learning_rate": 3.886779426993442e-06, "loss": 2.0669, "step": 2656 }, { "epoch": 2.4753028890959925, "loss_reasoning": 0.44248324632644653, "loss_utility": 0.6897482872009277, "step": 2656 }, { "epoch": 2.4762348555452003, "grad_norm": 1.4677924435803416, "learning_rate": 3.879875733517432e-06, "loss": 1.1779, "step": 2657 }, { "epoch": 2.4762348555452003, "loss_reasoning": 0.4627622067928314, "loss_utility": 0.7498475313186646, "step": 2657 }, { "epoch": 2.477166821994408, "grad_norm": 1.3440190279690338, "learning_rate": 3.872972040041422e-06, "loss": 1.5105, "step": 2658 }, { "epoch": 2.477166821994408, "loss_reasoning": 0.47496020793914795, "loss_utility": 0.7342349886894226, "step": 2658 }, { "epoch": 2.478098788443616, "grad_norm": 1.2155828899611876, "learning_rate": 3.866068346565413e-06, "loss": 1.4551, "step": 2659 }, { "epoch": 2.478098788443616, "loss_reasoning": 0.5109537839889526, "loss_utility": 2.1174755096435547, "step": 2659 }, { "epoch": 2.4790307548928237, "grad_norm": 1.1545132148751256, "learning_rate": 3.859164653089403e-06, "loss": 1.6346, "step": 2660 }, { "epoch": 2.4790307548928237, "loss_reasoning": 0.5327140092849731, "loss_utility": 0.7292211651802063, "step": 2660 }, { "epoch": 2.479962721342032, "grad_norm": 1.246012886657352, "learning_rate": 3.852260959613394e-06, "loss": 1.4546, "step": 2661 }, { "epoch": 2.479962721342032, "loss_reasoning": 0.4380721151828766, "loss_utility": 1.0684434175491333, "step": 2661 }, { "epoch": 2.4808946877912397, "grad_norm": 1.6014649072349763, "learning_rate": 3.845357266137384e-06, "loss": 1.5579, "step": 2662 }, { "epoch": 2.4808946877912397, "loss_reasoning": 0.5215083360671997, "loss_utility": 1.3202636241912842, "step": 2662 }, { "epoch": 2.4818266542404475, "grad_norm": 1.4065174004136831, "learning_rate": 3.838453572661374e-06, "loss": 1.3957, "step": 2663 }, { "epoch": 2.4818266542404475, "loss_reasoning": 0.4550400376319885, "loss_utility": 0.9591301679611206, "step": 2663 }, { "epoch": 2.4827586206896552, "grad_norm": 1.137801716375774, "learning_rate": 3.8315498791853645e-06, "loss": 1.5086, "step": 2664 }, { "epoch": 2.4827586206896552, "loss_reasoning": 0.46582016348838806, "loss_utility": 0.6533517837524414, "step": 2664 }, { "epoch": 2.483690587138863, "grad_norm": 1.0270645304628891, "learning_rate": 3.8246461857093544e-06, "loss": 1.1864, "step": 2665 }, { "epoch": 2.483690587138863, "loss_reasoning": 0.47964727878570557, "loss_utility": 0.581100583076477, "step": 2665 }, { "epoch": 2.484622553588071, "grad_norm": 1.3182399732589352, "learning_rate": 3.817742492233345e-06, "loss": 1.4664, "step": 2666 }, { "epoch": 2.484622553588071, "loss_reasoning": 0.48844850063323975, "loss_utility": 0.8558787107467651, "step": 2666 }, { "epoch": 2.4855545200372786, "grad_norm": 1.2934321300853637, "learning_rate": 3.8108387987573355e-06, "loss": 1.4366, "step": 2667 }, { "epoch": 2.4855545200372786, "loss_reasoning": 0.4385870695114136, "loss_utility": 1.54026198387146, "step": 2667 }, { "epoch": 2.4864864864864864, "grad_norm": 1.1288491233139268, "learning_rate": 3.8039351052813258e-06, "loss": 1.4459, "step": 2668 }, { "epoch": 2.4864864864864864, "loss_reasoning": 0.4231942296028137, "loss_utility": 2.022404432296753, "step": 2668 }, { "epoch": 2.487418452935694, "grad_norm": 1.227492405876395, "learning_rate": 3.7970314118053165e-06, "loss": 1.6126, "step": 2669 }, { "epoch": 2.487418452935694, "loss_reasoning": 0.48461753129959106, "loss_utility": 1.2793748378753662, "step": 2669 }, { "epoch": 2.488350419384902, "grad_norm": 1.014711494823363, "learning_rate": 3.790127718329307e-06, "loss": 1.4031, "step": 2670 }, { "epoch": 2.488350419384902, "loss_reasoning": 0.5595571994781494, "loss_utility": 0.8835980892181396, "step": 2670 }, { "epoch": 2.4892823858341098, "grad_norm": 1.1320024560837512, "learning_rate": 3.783224024853297e-06, "loss": 1.5844, "step": 2671 }, { "epoch": 2.4892823858341098, "loss_reasoning": 0.5013367533683777, "loss_utility": 0.5587217807769775, "step": 2671 }, { "epoch": 2.490214352283318, "grad_norm": 1.033648103681425, "learning_rate": 3.7763203313772874e-06, "loss": 1.2401, "step": 2672 }, { "epoch": 2.490214352283318, "loss_reasoning": 0.5623624324798584, "loss_utility": 0.7873297929763794, "step": 2672 }, { "epoch": 2.491146318732526, "grad_norm": 1.207291781489137, "learning_rate": 3.7694166379012777e-06, "loss": 1.6672, "step": 2673 }, { "epoch": 2.491146318732526, "loss_reasoning": 0.4343254864215851, "loss_utility": 0.7142618894577026, "step": 2673 }, { "epoch": 2.4920782851817336, "grad_norm": 1.1147664400734756, "learning_rate": 3.7625129444252676e-06, "loss": 1.4671, "step": 2674 }, { "epoch": 2.4920782851817336, "loss_reasoning": 0.5161176919937134, "loss_utility": 0.9758065938949585, "step": 2674 }, { "epoch": 2.4930102516309414, "grad_norm": 1.2813968078167126, "learning_rate": 3.755609250949258e-06, "loss": 1.521, "step": 2675 }, { "epoch": 2.4930102516309414, "loss_reasoning": 0.4487941563129425, "loss_utility": 0.8736283183097839, "step": 2675 }, { "epoch": 2.493942218080149, "grad_norm": 1.293456837282232, "learning_rate": 3.7487055574732483e-06, "loss": 1.5958, "step": 2676 }, { "epoch": 2.493942218080149, "loss_reasoning": 0.5352649688720703, "loss_utility": 1.6332011222839355, "step": 2676 }, { "epoch": 2.494874184529357, "grad_norm": 1.1624748277258057, "learning_rate": 3.7418018639972386e-06, "loss": 1.5779, "step": 2677 }, { "epoch": 2.494874184529357, "loss_reasoning": 0.44523829221725464, "loss_utility": 0.6944320201873779, "step": 2677 }, { "epoch": 2.4958061509785647, "grad_norm": 1.069136212652783, "learning_rate": 3.734898170521229e-06, "loss": 1.149, "step": 2678 }, { "epoch": 2.4958061509785647, "loss_reasoning": 0.4665018916130066, "loss_utility": 1.1177048683166504, "step": 2678 }, { "epoch": 2.4967381174277725, "grad_norm": 0.9924006937181743, "learning_rate": 3.7279944770452196e-06, "loss": 1.6375, "step": 2679 }, { "epoch": 2.4967381174277725, "loss_reasoning": 0.558539867401123, "loss_utility": 0.8456410765647888, "step": 2679 }, { "epoch": 2.4976700838769803, "grad_norm": 1.1991262054041998, "learning_rate": 3.72109078356921e-06, "loss": 1.3911, "step": 2680 }, { "epoch": 2.4976700838769803, "loss_reasoning": 0.4761424660682678, "loss_utility": 0.6538905501365662, "step": 2680 }, { "epoch": 2.498602050326188, "grad_norm": 1.2597345482415923, "learning_rate": 3.7141870900932002e-06, "loss": 1.4383, "step": 2681 }, { "epoch": 2.498602050326188, "loss_reasoning": 0.4702039361000061, "loss_utility": 1.067782998085022, "step": 2681 }, { "epoch": 2.499534016775396, "grad_norm": 1.1345591115283045, "learning_rate": 3.7072833966171905e-06, "loss": 1.5818, "step": 2682 }, { "epoch": 2.499534016775396, "loss_reasoning": 0.5609228014945984, "loss_utility": 1.5198559761047363, "step": 2682 }, { "epoch": 2.500465983224604, "grad_norm": 1.1981747158475595, "learning_rate": 3.700379703141181e-06, "loss": 1.6085, "step": 2683 }, { "epoch": 2.500465983224604, "loss_reasoning": 0.43913576006889343, "loss_utility": 1.3870165348052979, "step": 2683 }, { "epoch": 2.501397949673812, "grad_norm": 1.2360856706299062, "learning_rate": 3.693476009665171e-06, "loss": 1.5373, "step": 2684 }, { "epoch": 2.501397949673812, "loss_reasoning": 0.5457977652549744, "loss_utility": 0.7915269732475281, "step": 2684 }, { "epoch": 2.5023299161230197, "grad_norm": 1.0723480794217992, "learning_rate": 3.6865723161891615e-06, "loss": 1.6167, "step": 2685 }, { "epoch": 2.5023299161230197, "loss_reasoning": 0.5285013914108276, "loss_utility": 0.6345059275627136, "step": 2685 }, { "epoch": 2.5032618825722275, "grad_norm": 1.0474929785178655, "learning_rate": 3.6796686227131518e-06, "loss": 1.4105, "step": 2686 }, { "epoch": 2.5032618825722275, "loss_reasoning": 0.43876877427101135, "loss_utility": 0.9615138173103333, "step": 2686 }, { "epoch": 2.5041938490214353, "grad_norm": 1.3691798787931264, "learning_rate": 3.672764929237142e-06, "loss": 1.6856, "step": 2687 }, { "epoch": 2.5041938490214353, "loss_reasoning": 0.4736475348472595, "loss_utility": 0.6944074630737305, "step": 2687 }, { "epoch": 2.505125815470643, "grad_norm": 1.1031524426770185, "learning_rate": 3.6658612357611324e-06, "loss": 1.3641, "step": 2688 }, { "epoch": 2.505125815470643, "loss_reasoning": 0.4940153658390045, "loss_utility": 0.157899409532547, "step": 2688 }, { "epoch": 2.506057781919851, "grad_norm": 1.0722097403449655, "learning_rate": 3.6589575422851227e-06, "loss": 1.292, "step": 2689 }, { "epoch": 2.506057781919851, "loss_reasoning": 0.41607171297073364, "loss_utility": 0.9766888618469238, "step": 2689 }, { "epoch": 2.5069897483690586, "grad_norm": 1.226292734403617, "learning_rate": 3.6520538488091134e-06, "loss": 1.5966, "step": 2690 }, { "epoch": 2.5069897483690586, "loss_reasoning": 0.4881228804588318, "loss_utility": 1.2881526947021484, "step": 2690 }, { "epoch": 2.5079217148182664, "grad_norm": 1.2080452901082495, "learning_rate": 3.6451501553331037e-06, "loss": 1.6209, "step": 2691 }, { "epoch": 2.5079217148182664, "loss_reasoning": 0.5318396687507629, "loss_utility": 0.850806713104248, "step": 2691 }, { "epoch": 2.508853681267474, "grad_norm": 1.3218889348439684, "learning_rate": 3.638246461857094e-06, "loss": 1.6302, "step": 2692 }, { "epoch": 2.508853681267474, "loss_reasoning": 0.44772809743881226, "loss_utility": 1.1607213020324707, "step": 2692 }, { "epoch": 2.509785647716682, "grad_norm": 1.186055258046292, "learning_rate": 3.6313427683810844e-06, "loss": 1.5311, "step": 2693 }, { "epoch": 2.509785647716682, "loss_reasoning": 0.5156736373901367, "loss_utility": 1.0513594150543213, "step": 2693 }, { "epoch": 2.5107176141658902, "grad_norm": 1.1534908633722074, "learning_rate": 3.6244390749050747e-06, "loss": 1.4514, "step": 2694 }, { "epoch": 2.5107176141658902, "loss_reasoning": 0.4736314117908478, "loss_utility": 0.8490614891052246, "step": 2694 }, { "epoch": 2.511649580615098, "grad_norm": 1.1470183957312752, "learning_rate": 3.617535381429065e-06, "loss": 1.7513, "step": 2695 }, { "epoch": 2.511649580615098, "loss_reasoning": 0.503360390663147, "loss_utility": 0.47923508286476135, "step": 2695 }, { "epoch": 2.512581547064306, "grad_norm": 1.0778342107684826, "learning_rate": 3.6106316879530553e-06, "loss": 1.543, "step": 2696 }, { "epoch": 2.512581547064306, "loss_reasoning": 0.4972577691078186, "loss_utility": 0.6376833915710449, "step": 2696 }, { "epoch": 2.5135135135135136, "grad_norm": 1.2675910687676826, "learning_rate": 3.603727994477045e-06, "loss": 1.1957, "step": 2697 }, { "epoch": 2.5135135135135136, "loss_reasoning": 0.5150281190872192, "loss_utility": 1.33695387840271, "step": 2697 }, { "epoch": 2.5144454799627214, "grad_norm": 1.80611416765505, "learning_rate": 3.5968243010010355e-06, "loss": 1.821, "step": 2698 }, { "epoch": 2.5144454799627214, "loss_reasoning": 0.4487997889518738, "loss_utility": 0.7310650944709778, "step": 2698 }, { "epoch": 2.515377446411929, "grad_norm": 1.1184676156852498, "learning_rate": 3.5899206075250258e-06, "loss": 1.27, "step": 2699 }, { "epoch": 2.515377446411929, "loss_reasoning": 0.4960319399833679, "loss_utility": 0.9624170660972595, "step": 2699 }, { "epoch": 2.516309412861137, "grad_norm": 1.5890236619134426, "learning_rate": 3.583016914049017e-06, "loss": 1.4279, "step": 2700 }, { "epoch": 2.516309412861137, "loss_reasoning": 0.45453834533691406, "loss_utility": 1.3653147220611572, "step": 2700 }, { "epoch": 2.5172413793103448, "grad_norm": 1.3563594888128896, "learning_rate": 3.576113220573007e-06, "loss": 1.5774, "step": 2701 }, { "epoch": 2.5172413793103448, "loss_reasoning": 0.5227342844009399, "loss_utility": 0.7358105182647705, "step": 2701 }, { "epoch": 2.5181733457595525, "grad_norm": 1.111961493295764, "learning_rate": 3.569209527096997e-06, "loss": 1.4535, "step": 2702 }, { "epoch": 2.5181733457595525, "loss_reasoning": 0.4673692584037781, "loss_utility": 1.1573879718780518, "step": 2702 }, { "epoch": 2.5191053122087603, "grad_norm": 1.256791549467912, "learning_rate": 3.5623058336209874e-06, "loss": 1.5378, "step": 2703 }, { "epoch": 2.5191053122087603, "loss_reasoning": 0.4443117678165436, "loss_utility": 0.981438159942627, "step": 2703 }, { "epoch": 2.520037278657968, "grad_norm": 1.1983481338687565, "learning_rate": 3.5554021401449777e-06, "loss": 1.3189, "step": 2704 }, { "epoch": 2.520037278657968, "loss_reasoning": 0.5006767511367798, "loss_utility": 1.147669792175293, "step": 2704 }, { "epoch": 2.5209692451071763, "grad_norm": 1.4160605994725286, "learning_rate": 3.548498446668968e-06, "loss": 1.3911, "step": 2705 }, { "epoch": 2.5209692451071763, "loss_reasoning": 0.5390129685401917, "loss_utility": 1.347160816192627, "step": 2705 }, { "epoch": 2.5219012115563837, "grad_norm": 1.3533485046696156, "learning_rate": 3.5415947531929584e-06, "loss": 1.7756, "step": 2706 }, { "epoch": 2.5219012115563837, "loss_reasoning": 0.44520485401153564, "loss_utility": 1.4756954908370972, "step": 2706 }, { "epoch": 2.522833178005592, "grad_norm": 1.0755444717258589, "learning_rate": 3.5346910597169487e-06, "loss": 1.3813, "step": 2707 }, { "epoch": 2.522833178005592, "loss_reasoning": 0.4859968423843384, "loss_utility": 1.110227346420288, "step": 2707 }, { "epoch": 2.5237651444547997, "grad_norm": 1.1904765356122335, "learning_rate": 3.527787366240939e-06, "loss": 1.4905, "step": 2708 }, { "epoch": 2.5237651444547997, "loss_reasoning": 0.49444013833999634, "loss_utility": 1.6601483821868896, "step": 2708 }, { "epoch": 2.5246971109040075, "grad_norm": 1.26074046045879, "learning_rate": 3.5208836727649293e-06, "loss": 1.5608, "step": 2709 }, { "epoch": 2.5246971109040075, "loss_reasoning": 0.46529167890548706, "loss_utility": 1.35420823097229, "step": 2709 }, { "epoch": 2.5256290773532153, "grad_norm": 1.424788680206619, "learning_rate": 3.51397997928892e-06, "loss": 1.6288, "step": 2710 }, { "epoch": 2.5256290773532153, "loss_reasoning": 0.491061270236969, "loss_utility": 1.0101743936538696, "step": 2710 }, { "epoch": 2.526561043802423, "grad_norm": 1.6681047453624391, "learning_rate": 3.5070762858129103e-06, "loss": 1.7329, "step": 2711 }, { "epoch": 2.526561043802423, "loss_reasoning": 0.4408310055732727, "loss_utility": 0.5132197737693787, "step": 2711 }, { "epoch": 2.527493010251631, "grad_norm": 1.2216573433651845, "learning_rate": 3.5001725923369006e-06, "loss": 1.3179, "step": 2712 }, { "epoch": 2.527493010251631, "loss_reasoning": 0.5234030485153198, "loss_utility": 1.354726791381836, "step": 2712 }, { "epoch": 2.5284249767008387, "grad_norm": 1.1578762580186164, "learning_rate": 3.493268898860891e-06, "loss": 1.7133, "step": 2713 }, { "epoch": 2.5284249767008387, "loss_reasoning": 0.4879263937473297, "loss_utility": 0.5699957609176636, "step": 2713 }, { "epoch": 2.5293569431500464, "grad_norm": 1.2876255941875296, "learning_rate": 3.4863652053848813e-06, "loss": 1.7266, "step": 2714 }, { "epoch": 2.5293569431500464, "loss_reasoning": 0.5053273439407349, "loss_utility": 1.4844610691070557, "step": 2714 }, { "epoch": 2.5302889095992542, "grad_norm": 1.2440398324979238, "learning_rate": 3.4794615119088716e-06, "loss": 1.4995, "step": 2715 }, { "epoch": 2.5302889095992542, "loss_reasoning": 0.4485645890235901, "loss_utility": 0.8689144849777222, "step": 2715 }, { "epoch": 2.5312208760484625, "grad_norm": 1.1330632275974393, "learning_rate": 3.472557818432862e-06, "loss": 1.6315, "step": 2716 }, { "epoch": 2.5312208760484625, "loss_reasoning": 0.5004563331604004, "loss_utility": 1.224834680557251, "step": 2716 }, { "epoch": 2.53215284249767, "grad_norm": 1.2140473811624055, "learning_rate": 3.465654124956852e-06, "loss": 1.4356, "step": 2717 }, { "epoch": 2.53215284249767, "loss_reasoning": 0.5114662647247314, "loss_utility": 0.5143051147460938, "step": 2717 }, { "epoch": 2.533084808946878, "grad_norm": 1.2617770929521452, "learning_rate": 3.4587504314808425e-06, "loss": 1.4644, "step": 2718 }, { "epoch": 2.533084808946878, "loss_reasoning": 0.49351900815963745, "loss_utility": 1.0714880228042603, "step": 2718 }, { "epoch": 2.534016775396086, "grad_norm": 1.172170898193716, "learning_rate": 3.451846738004833e-06, "loss": 1.3375, "step": 2719 }, { "epoch": 2.534016775396086, "loss_reasoning": 0.4542016386985779, "loss_utility": 1.604689121246338, "step": 2719 }, { "epoch": 2.5349487418452936, "grad_norm": 1.0273629559273123, "learning_rate": 3.4449430445288235e-06, "loss": 1.467, "step": 2720 }, { "epoch": 2.5349487418452936, "loss_reasoning": 0.4346570372581482, "loss_utility": 0.90834641456604, "step": 2720 }, { "epoch": 2.5358807082945014, "grad_norm": 1.465384384127295, "learning_rate": 3.438039351052814e-06, "loss": 1.4975, "step": 2721 }, { "epoch": 2.5358807082945014, "loss_reasoning": 0.501512885093689, "loss_utility": 0.9076589345932007, "step": 2721 }, { "epoch": 2.536812674743709, "grad_norm": 1.1840525528761567, "learning_rate": 3.431135657576804e-06, "loss": 1.2562, "step": 2722 }, { "epoch": 2.536812674743709, "loss_reasoning": 0.443534791469574, "loss_utility": 1.5680392980575562, "step": 2722 }, { "epoch": 2.537744641192917, "grad_norm": 1.2266139937045732, "learning_rate": 3.4242319641007945e-06, "loss": 1.5491, "step": 2723 }, { "epoch": 2.537744641192917, "loss_reasoning": 0.5221495032310486, "loss_utility": 0.9508843421936035, "step": 2723 }, { "epoch": 2.5386766076421248, "grad_norm": 1.2122084645382214, "learning_rate": 3.4173282706247844e-06, "loss": 1.4256, "step": 2724 }, { "epoch": 2.5386766076421248, "loss_reasoning": 0.4492183327674866, "loss_utility": 0.790095329284668, "step": 2724 }, { "epoch": 2.5396085740913326, "grad_norm": 1.1614617871796704, "learning_rate": 3.4104245771487747e-06, "loss": 1.2828, "step": 2725 }, { "epoch": 2.5396085740913326, "loss_reasoning": 0.47286033630371094, "loss_utility": 0.829459547996521, "step": 2725 }, { "epoch": 2.5405405405405403, "grad_norm": 1.3804014743702793, "learning_rate": 3.403520883672765e-06, "loss": 1.6058, "step": 2726 }, { "epoch": 2.5405405405405403, "loss_reasoning": 0.4641839861869812, "loss_utility": 0.9925626516342163, "step": 2726 }, { "epoch": 2.5414725069897486, "grad_norm": 1.134405511757854, "learning_rate": 3.3966171901967553e-06, "loss": 1.6145, "step": 2727 }, { "epoch": 2.5414725069897486, "loss_reasoning": 0.47626614570617676, "loss_utility": 1.680797815322876, "step": 2727 }, { "epoch": 2.542404473438956, "grad_norm": 1.1857396855620874, "learning_rate": 3.3897134967207456e-06, "loss": 1.8433, "step": 2728 }, { "epoch": 2.542404473438956, "loss_reasoning": 0.47159284353256226, "loss_utility": 0.7033230066299438, "step": 2728 }, { "epoch": 2.543336439888164, "grad_norm": 1.5277038387416952, "learning_rate": 3.382809803244736e-06, "loss": 1.5096, "step": 2729 }, { "epoch": 2.543336439888164, "loss_reasoning": 0.44134366512298584, "loss_utility": 0.9901764392852783, "step": 2729 }, { "epoch": 2.544268406337372, "grad_norm": 1.1849307217049339, "learning_rate": 3.3759061097687266e-06, "loss": 1.5212, "step": 2730 }, { "epoch": 2.544268406337372, "loss_reasoning": 0.5202727913856506, "loss_utility": 0.7767860889434814, "step": 2730 }, { "epoch": 2.5452003727865797, "grad_norm": 1.3866063885562407, "learning_rate": 3.369002416292717e-06, "loss": 1.7208, "step": 2731 }, { "epoch": 2.5452003727865797, "loss_reasoning": 0.4564913511276245, "loss_utility": 0.6989491581916809, "step": 2731 }, { "epoch": 2.5461323392357875, "grad_norm": 1.2007991999139274, "learning_rate": 3.3620987228167072e-06, "loss": 1.4539, "step": 2732 }, { "epoch": 2.5461323392357875, "loss_reasoning": 0.5584740042686462, "loss_utility": 0.8447936177253723, "step": 2732 }, { "epoch": 2.5470643056849953, "grad_norm": 1.011214438229992, "learning_rate": 3.3551950293406976e-06, "loss": 1.3775, "step": 2733 }, { "epoch": 2.5470643056849953, "loss_reasoning": 0.4825425148010254, "loss_utility": 0.8020853996276855, "step": 2733 }, { "epoch": 2.547996272134203, "grad_norm": 1.3548582846283839, "learning_rate": 3.348291335864688e-06, "loss": 1.4716, "step": 2734 }, { "epoch": 2.547996272134203, "loss_reasoning": 0.46143481135368347, "loss_utility": 0.983077883720398, "step": 2734 }, { "epoch": 2.548928238583411, "grad_norm": 1.106075822471997, "learning_rate": 3.341387642388678e-06, "loss": 1.5005, "step": 2735 }, { "epoch": 2.548928238583411, "loss_reasoning": 0.45635074377059937, "loss_utility": 1.2150983810424805, "step": 2735 }, { "epoch": 2.5498602050326187, "grad_norm": 1.2265429266537022, "learning_rate": 3.3344839489126685e-06, "loss": 1.3739, "step": 2736 }, { "epoch": 2.5498602050326187, "loss_reasoning": 0.4542003273963928, "loss_utility": 2.1583759784698486, "step": 2736 }, { "epoch": 2.5507921714818265, "grad_norm": 1.3624938656931556, "learning_rate": 3.327580255436659e-06, "loss": 1.7356, "step": 2737 }, { "epoch": 2.5507921714818265, "loss_reasoning": 0.47649726271629333, "loss_utility": 1.262636661529541, "step": 2737 }, { "epoch": 2.5517241379310347, "grad_norm": 0.9886028866053642, "learning_rate": 3.320676561960649e-06, "loss": 1.3391, "step": 2738 }, { "epoch": 2.5517241379310347, "loss_reasoning": 0.5390408039093018, "loss_utility": 1.1029515266418457, "step": 2738 }, { "epoch": 2.552656104380242, "grad_norm": 0.9941857095601909, "learning_rate": 3.3137728684846394e-06, "loss": 1.3791, "step": 2739 }, { "epoch": 2.552656104380242, "loss_reasoning": 0.46683812141418457, "loss_utility": 0.6719949841499329, "step": 2739 }, { "epoch": 2.5535880708294503, "grad_norm": 1.4128971638067906, "learning_rate": 3.30686917500863e-06, "loss": 1.5108, "step": 2740 }, { "epoch": 2.5535880708294503, "loss_reasoning": 0.5508387684822083, "loss_utility": 1.2950987815856934, "step": 2740 }, { "epoch": 2.554520037278658, "grad_norm": 1.0648749787392113, "learning_rate": 3.2999654815326205e-06, "loss": 1.513, "step": 2741 }, { "epoch": 2.554520037278658, "loss_reasoning": 0.39267611503601074, "loss_utility": 1.1067888736724854, "step": 2741 }, { "epoch": 2.555452003727866, "grad_norm": 1.171182371215897, "learning_rate": 3.2930617880566108e-06, "loss": 1.3555, "step": 2742 }, { "epoch": 2.555452003727866, "loss_reasoning": 0.49943041801452637, "loss_utility": 0.9484673738479614, "step": 2742 }, { "epoch": 2.5563839701770736, "grad_norm": 1.142284576182841, "learning_rate": 3.286158094580601e-06, "loss": 1.3985, "step": 2743 }, { "epoch": 2.5563839701770736, "loss_reasoning": 0.4850093126296997, "loss_utility": 0.8426896929740906, "step": 2743 }, { "epoch": 2.5573159366262814, "grad_norm": 1.2122684450134418, "learning_rate": 3.2792544011045914e-06, "loss": 1.5919, "step": 2744 }, { "epoch": 2.5573159366262814, "loss_reasoning": 0.5086733102798462, "loss_utility": 0.7429749965667725, "step": 2744 }, { "epoch": 2.558247903075489, "grad_norm": 1.1159601310065306, "learning_rate": 3.2723507076285817e-06, "loss": 1.2675, "step": 2745 }, { "epoch": 2.558247903075489, "loss_reasoning": 0.4190101623535156, "loss_utility": 0.8600261211395264, "step": 2745 }, { "epoch": 2.559179869524697, "grad_norm": 1.281682595872071, "learning_rate": 3.265447014152572e-06, "loss": 1.6824, "step": 2746 }, { "epoch": 2.559179869524697, "loss_reasoning": 0.46703028678894043, "loss_utility": 0.8582026958465576, "step": 2746 }, { "epoch": 2.560111835973905, "grad_norm": 1.284172860397728, "learning_rate": 3.258543320676562e-06, "loss": 1.6924, "step": 2747 }, { "epoch": 2.560111835973905, "loss_reasoning": 0.5147014856338501, "loss_utility": 1.4542429447174072, "step": 2747 }, { "epoch": 2.5610438024231126, "grad_norm": 0.9862044656811022, "learning_rate": 3.251639627200552e-06, "loss": 1.4688, "step": 2748 }, { "epoch": 2.5610438024231126, "loss_reasoning": 0.49557238817214966, "loss_utility": 1.5636756420135498, "step": 2748 }, { "epoch": 2.561975768872321, "grad_norm": 1.2016298834672596, "learning_rate": 3.2447359337245425e-06, "loss": 1.5063, "step": 2749 }, { "epoch": 2.561975768872321, "loss_reasoning": 0.4955865442752838, "loss_utility": 0.7379698753356934, "step": 2749 }, { "epoch": 2.562907735321528, "grad_norm": 1.1362102630123667, "learning_rate": 3.2378322402485337e-06, "loss": 1.4424, "step": 2750 }, { "epoch": 2.562907735321528, "loss_reasoning": 0.4781309962272644, "loss_utility": 1.0099685192108154, "step": 2750 }, { "epoch": 2.5638397017707364, "grad_norm": 0.9715535635791215, "learning_rate": 3.230928546772524e-06, "loss": 1.3161, "step": 2751 }, { "epoch": 2.5638397017707364, "loss_reasoning": 0.43104374408721924, "loss_utility": 0.7909444570541382, "step": 2751 }, { "epoch": 2.564771668219944, "grad_norm": 1.2431624426810723, "learning_rate": 3.224024853296514e-06, "loss": 1.2747, "step": 2752 }, { "epoch": 2.564771668219944, "loss_reasoning": 0.4686954915523529, "loss_utility": 0.6643375158309937, "step": 2752 }, { "epoch": 2.565703634669152, "grad_norm": 1.0741846486537354, "learning_rate": 3.217121159820504e-06, "loss": 1.4859, "step": 2753 }, { "epoch": 2.565703634669152, "loss_reasoning": 0.5223178863525391, "loss_utility": 1.2170929908752441, "step": 2753 }, { "epoch": 2.5666356011183598, "grad_norm": 1.1289172979008122, "learning_rate": 3.2102174663444945e-06, "loss": 1.4896, "step": 2754 }, { "epoch": 2.5666356011183598, "loss_reasoning": 0.4925810694694519, "loss_utility": 1.056037425994873, "step": 2754 }, { "epoch": 2.5675675675675675, "grad_norm": 1.0911910829675642, "learning_rate": 3.2033137728684848e-06, "loss": 1.4086, "step": 2755 }, { "epoch": 2.5675675675675675, "loss_reasoning": 0.5366069078445435, "loss_utility": 0.9676751494407654, "step": 2755 }, { "epoch": 2.5684995340167753, "grad_norm": 1.3656165628308172, "learning_rate": 3.196410079392475e-06, "loss": 1.5769, "step": 2756 }, { "epoch": 2.5684995340167753, "loss_reasoning": 0.49393224716186523, "loss_utility": 0.54408860206604, "step": 2756 }, { "epoch": 2.569431500465983, "grad_norm": 1.1316530643986398, "learning_rate": 3.1895063859164654e-06, "loss": 1.2496, "step": 2757 }, { "epoch": 2.569431500465983, "loss_reasoning": 0.4548971652984619, "loss_utility": 0.8006751537322998, "step": 2757 }, { "epoch": 2.570363466915191, "grad_norm": 1.3570049493717282, "learning_rate": 3.1826026924404557e-06, "loss": 1.415, "step": 2758 }, { "epoch": 2.570363466915191, "loss_reasoning": 0.4757186770439148, "loss_utility": 1.0775933265686035, "step": 2758 }, { "epoch": 2.5712954333643987, "grad_norm": 1.060782361201835, "learning_rate": 3.175698998964446e-06, "loss": 1.341, "step": 2759 }, { "epoch": 2.5712954333643987, "loss_reasoning": 0.4986695945262909, "loss_utility": 0.615235447883606, "step": 2759 }, { "epoch": 2.572227399813607, "grad_norm": 1.1578856579772836, "learning_rate": 3.1687953054884367e-06, "loss": 1.2745, "step": 2760 }, { "epoch": 2.572227399813607, "loss_reasoning": 0.5058436989784241, "loss_utility": 0.5381358861923218, "step": 2760 }, { "epoch": 2.5731593662628143, "grad_norm": 1.296188462805449, "learning_rate": 3.161891612012427e-06, "loss": 1.3892, "step": 2761 }, { "epoch": 2.5731593662628143, "loss_reasoning": 0.519020676612854, "loss_utility": 0.44226914644241333, "step": 2761 }, { "epoch": 2.5740913327120225, "grad_norm": 1.291971588466328, "learning_rate": 3.1549879185364174e-06, "loss": 1.5383, "step": 2762 }, { "epoch": 2.5740913327120225, "loss_reasoning": 0.49895280599594116, "loss_utility": 0.8746985197067261, "step": 2762 }, { "epoch": 2.5750232991612303, "grad_norm": 1.064335217513516, "learning_rate": 3.1480842250604077e-06, "loss": 1.3278, "step": 2763 }, { "epoch": 2.5750232991612303, "loss_reasoning": 0.5356144309043884, "loss_utility": 0.9957942366600037, "step": 2763 }, { "epoch": 2.575955265610438, "grad_norm": 1.160916909112929, "learning_rate": 3.141180531584398e-06, "loss": 1.579, "step": 2764 }, { "epoch": 2.575955265610438, "loss_reasoning": 0.5001895427703857, "loss_utility": 1.9962022304534912, "step": 2764 }, { "epoch": 2.576887232059646, "grad_norm": 1.5964208858770386, "learning_rate": 3.1342768381083883e-06, "loss": 1.841, "step": 2765 }, { "epoch": 2.576887232059646, "loss_reasoning": 0.5058311223983765, "loss_utility": 0.8253679275512695, "step": 2765 }, { "epoch": 2.5778191985088537, "grad_norm": 1.1147236291677174, "learning_rate": 3.1273731446323786e-06, "loss": 1.178, "step": 2766 }, { "epoch": 2.5778191985088537, "loss_reasoning": 0.44246524572372437, "loss_utility": 1.0916991233825684, "step": 2766 }, { "epoch": 2.5787511649580614, "grad_norm": 1.005698122269395, "learning_rate": 3.120469451156369e-06, "loss": 1.1042, "step": 2767 }, { "epoch": 2.5787511649580614, "loss_reasoning": 0.4571678638458252, "loss_utility": 1.0150119066238403, "step": 2767 }, { "epoch": 2.5796831314072692, "grad_norm": 1.3650869452508785, "learning_rate": 3.1135657576803592e-06, "loss": 1.493, "step": 2768 }, { "epoch": 2.5796831314072692, "loss_reasoning": 0.4947388768196106, "loss_utility": 0.9112151265144348, "step": 2768 }, { "epoch": 2.580615097856477, "grad_norm": 1.1602425691666292, "learning_rate": 3.1066620642043495e-06, "loss": 1.5003, "step": 2769 }, { "epoch": 2.580615097856477, "loss_reasoning": 0.42720651626586914, "loss_utility": 1.3732225894927979, "step": 2769 }, { "epoch": 2.581547064305685, "grad_norm": 1.1572094305227671, "learning_rate": 3.0997583707283403e-06, "loss": 1.6059, "step": 2770 }, { "epoch": 2.581547064305685, "loss_reasoning": 0.5295553207397461, "loss_utility": 1.0447137355804443, "step": 2770 }, { "epoch": 2.582479030754893, "grad_norm": 1.1367442951483862, "learning_rate": 3.0928546772523306e-06, "loss": 1.4651, "step": 2771 }, { "epoch": 2.582479030754893, "loss_reasoning": 0.4662615656852722, "loss_utility": 0.900166928768158, "step": 2771 }, { "epoch": 2.5834109972041004, "grad_norm": 1.0778583665248351, "learning_rate": 3.085950983776321e-06, "loss": 1.5128, "step": 2772 }, { "epoch": 2.5834109972041004, "loss_reasoning": 0.4798518419265747, "loss_utility": 1.14640474319458, "step": 2772 }, { "epoch": 2.5843429636533086, "grad_norm": 0.9273381215866789, "learning_rate": 3.079047290300311e-06, "loss": 1.3506, "step": 2773 }, { "epoch": 2.5843429636533086, "loss_reasoning": 0.531781017780304, "loss_utility": 0.774297833442688, "step": 2773 }, { "epoch": 2.5852749301025164, "grad_norm": 1.0708811778481506, "learning_rate": 3.0721435968243015e-06, "loss": 1.2503, "step": 2774 }, { "epoch": 2.5852749301025164, "loss_reasoning": 0.4483589828014374, "loss_utility": 0.797080397605896, "step": 2774 }, { "epoch": 2.586206896551724, "grad_norm": 1.0550504869719666, "learning_rate": 3.0652399033482914e-06, "loss": 1.2983, "step": 2775 }, { "epoch": 2.586206896551724, "loss_reasoning": 0.4460079073905945, "loss_utility": 0.6555898785591125, "step": 2775 }, { "epoch": 2.587138863000932, "grad_norm": 0.997592485430252, "learning_rate": 3.0583362098722817e-06, "loss": 1.3698, "step": 2776 }, { "epoch": 2.587138863000932, "loss_reasoning": 0.4701733887195587, "loss_utility": 1.6278808116912842, "step": 2776 }, { "epoch": 2.5880708294501398, "grad_norm": 1.2638272868120881, "learning_rate": 3.051432516396272e-06, "loss": 1.5612, "step": 2777 }, { "epoch": 2.5880708294501398, "loss_reasoning": 0.4552556872367859, "loss_utility": 1.1049244403839111, "step": 2777 }, { "epoch": 2.5890027958993476, "grad_norm": 1.2487302746954534, "learning_rate": 3.0445288229202623e-06, "loss": 1.5108, "step": 2778 }, { "epoch": 2.5890027958993476, "loss_reasoning": 0.5280842781066895, "loss_utility": 1.3796995878219604, "step": 2778 }, { "epoch": 2.5899347623485554, "grad_norm": 1.135822631256417, "learning_rate": 3.0376251294442526e-06, "loss": 1.4475, "step": 2779 }, { "epoch": 2.5899347623485554, "loss_reasoning": 0.49810683727264404, "loss_utility": 1.222477674484253, "step": 2779 }, { "epoch": 2.590866728797763, "grad_norm": 0.9974107968615307, "learning_rate": 3.0307214359682433e-06, "loss": 1.3873, "step": 2780 }, { "epoch": 2.590866728797763, "loss_reasoning": 0.5361716747283936, "loss_utility": 1.366015076637268, "step": 2780 }, { "epoch": 2.591798695246971, "grad_norm": 1.415360188163994, "learning_rate": 3.0238177424922337e-06, "loss": 1.5627, "step": 2781 }, { "epoch": 2.591798695246971, "loss_reasoning": 0.44187986850738525, "loss_utility": 0.8960331678390503, "step": 2781 }, { "epoch": 2.592730661696179, "grad_norm": 1.254637643941195, "learning_rate": 3.016914049016224e-06, "loss": 1.3304, "step": 2782 }, { "epoch": 2.592730661696179, "loss_reasoning": 0.4399558901786804, "loss_utility": 1.2088634967803955, "step": 2782 }, { "epoch": 2.5936626281453865, "grad_norm": 1.1173230591247678, "learning_rate": 3.0100103555402143e-06, "loss": 1.6362, "step": 2783 }, { "epoch": 2.5936626281453865, "loss_reasoning": 0.4436225891113281, "loss_utility": 0.8750499486923218, "step": 2783 }, { "epoch": 2.5945945945945947, "grad_norm": 1.2368174897961788, "learning_rate": 3.0031066620642046e-06, "loss": 1.5758, "step": 2784 }, { "epoch": 2.5945945945945947, "loss_reasoning": 0.452603280544281, "loss_utility": 0.8235999941825867, "step": 2784 }, { "epoch": 2.5955265610438025, "grad_norm": 1.6266601447173574, "learning_rate": 2.996202968588195e-06, "loss": 1.3132, "step": 2785 }, { "epoch": 2.5955265610438025, "loss_reasoning": 0.5540603399276733, "loss_utility": 1.8434510231018066, "step": 2785 }, { "epoch": 2.5964585274930103, "grad_norm": 1.3043088839085728, "learning_rate": 2.989299275112185e-06, "loss": 1.6078, "step": 2786 }, { "epoch": 2.5964585274930103, "loss_reasoning": 0.5222610235214233, "loss_utility": 1.2184269428253174, "step": 2786 }, { "epoch": 2.597390493942218, "grad_norm": 1.0628259698632692, "learning_rate": 2.9823955816361755e-06, "loss": 1.6107, "step": 2787 }, { "epoch": 2.597390493942218, "loss_reasoning": 0.5244482159614563, "loss_utility": 0.704961359500885, "step": 2787 }, { "epoch": 2.598322460391426, "grad_norm": 0.9854734425008508, "learning_rate": 2.975491888160166e-06, "loss": 1.4698, "step": 2788 }, { "epoch": 2.598322460391426, "loss_reasoning": 0.45281630754470825, "loss_utility": 0.8659007549285889, "step": 2788 }, { "epoch": 2.5992544268406337, "grad_norm": 1.4496084932362925, "learning_rate": 2.968588194684156e-06, "loss": 1.4912, "step": 2789 }, { "epoch": 2.5992544268406337, "loss_reasoning": 0.47606736421585083, "loss_utility": 0.7927722930908203, "step": 2789 }, { "epoch": 2.6001863932898415, "grad_norm": 1.3337350282670861, "learning_rate": 2.9616845012081464e-06, "loss": 1.4371, "step": 2790 }, { "epoch": 2.6001863932898415, "loss_reasoning": 0.46974340081214905, "loss_utility": 0.9136008024215698, "step": 2790 }, { "epoch": 2.6011183597390493, "grad_norm": 1.16653040727061, "learning_rate": 2.954780807732137e-06, "loss": 1.4795, "step": 2791 }, { "epoch": 2.6011183597390493, "loss_reasoning": 0.5247169137001038, "loss_utility": 1.616041660308838, "step": 2791 }, { "epoch": 2.602050326188257, "grad_norm": 1.2108493150063293, "learning_rate": 2.9478771142561275e-06, "loss": 1.5258, "step": 2792 }, { "epoch": 2.602050326188257, "loss_reasoning": 0.45708751678466797, "loss_utility": 1.3744139671325684, "step": 2792 }, { "epoch": 2.6029822926374653, "grad_norm": 1.3067075016941887, "learning_rate": 2.940973420780118e-06, "loss": 1.6229, "step": 2793 }, { "epoch": 2.6029822926374653, "loss_reasoning": 0.4802667498588562, "loss_utility": 0.7967141270637512, "step": 2793 }, { "epoch": 2.6039142590866726, "grad_norm": 1.38967470090495, "learning_rate": 2.934069727304108e-06, "loss": 1.4658, "step": 2794 }, { "epoch": 2.6039142590866726, "loss_reasoning": 0.5007531046867371, "loss_utility": 0.9030482172966003, "step": 2794 }, { "epoch": 2.604846225535881, "grad_norm": 1.0730824705824311, "learning_rate": 2.9271660338280984e-06, "loss": 1.3942, "step": 2795 }, { "epoch": 2.604846225535881, "loss_reasoning": 0.4650513231754303, "loss_utility": 1.7278940677642822, "step": 2795 }, { "epoch": 2.6057781919850886, "grad_norm": 1.459296438959333, "learning_rate": 2.9202623403520887e-06, "loss": 1.6421, "step": 2796 }, { "epoch": 2.6057781919850886, "loss_reasoning": 0.5107582211494446, "loss_utility": 0.7381712198257446, "step": 2796 }, { "epoch": 2.6067101584342964, "grad_norm": 0.9905881083098407, "learning_rate": 2.913358646876079e-06, "loss": 1.4483, "step": 2797 }, { "epoch": 2.6067101584342964, "loss_reasoning": 0.4332652688026428, "loss_utility": 0.9276401996612549, "step": 2797 }, { "epoch": 2.607642124883504, "grad_norm": 1.2796418414566904, "learning_rate": 2.906454953400069e-06, "loss": 1.7117, "step": 2798 }, { "epoch": 2.607642124883504, "loss_reasoning": 0.5194847583770752, "loss_utility": 0.5528521537780762, "step": 2798 }, { "epoch": 2.608574091332712, "grad_norm": 1.348392063114455, "learning_rate": 2.8995512599240592e-06, "loss": 1.3558, "step": 2799 }, { "epoch": 2.608574091332712, "loss_reasoning": 0.5045268535614014, "loss_utility": 0.7423412799835205, "step": 2799 }, { "epoch": 2.60950605778192, "grad_norm": 1.155949998347795, "learning_rate": 2.8926475664480495e-06, "loss": 1.4716, "step": 2800 }, { "epoch": 2.60950605778192, "loss_reasoning": 0.47274458408355713, "loss_utility": 1.3287525177001953, "step": 2800 }, { "epoch": 2.6104380242311276, "grad_norm": 1.171519412010478, "learning_rate": 2.8857438729720407e-06, "loss": 1.531, "step": 2801 }, { "epoch": 2.6104380242311276, "loss_reasoning": 0.5489492416381836, "loss_utility": 0.3124638795852661, "step": 2801 }, { "epoch": 2.6113699906803354, "grad_norm": 1.3654877877637464, "learning_rate": 2.878840179496031e-06, "loss": 1.3673, "step": 2802 }, { "epoch": 2.6113699906803354, "loss_reasoning": 0.46085095405578613, "loss_utility": 1.2166624069213867, "step": 2802 }, { "epoch": 2.612301957129543, "grad_norm": 1.209567946345436, "learning_rate": 2.871936486020021e-06, "loss": 1.3588, "step": 2803 }, { "epoch": 2.612301957129543, "loss_reasoning": 0.4484992027282715, "loss_utility": 0.6588109731674194, "step": 2803 }, { "epoch": 2.6132339235787514, "grad_norm": 1.3113460968900799, "learning_rate": 2.865032792544011e-06, "loss": 1.4851, "step": 2804 }, { "epoch": 2.6132339235787514, "loss_reasoning": 0.48802104592323303, "loss_utility": 0.9310811758041382, "step": 2804 }, { "epoch": 2.6141658900279587, "grad_norm": 1.1360560369669543, "learning_rate": 2.8581290990680015e-06, "loss": 1.4178, "step": 2805 }, { "epoch": 2.6141658900279587, "loss_reasoning": 0.463981568813324, "loss_utility": 0.7503829002380371, "step": 2805 }, { "epoch": 2.615097856477167, "grad_norm": 1.2422317271449856, "learning_rate": 2.851225405591992e-06, "loss": 1.9344, "step": 2806 }, { "epoch": 2.615097856477167, "loss_reasoning": 0.44693613052368164, "loss_utility": 0.921097457408905, "step": 2806 }, { "epoch": 2.6160298229263748, "grad_norm": 1.0386037166284707, "learning_rate": 2.844321712115982e-06, "loss": 1.3199, "step": 2807 }, { "epoch": 2.6160298229263748, "loss_reasoning": 0.4949423670768738, "loss_utility": 1.2315733432769775, "step": 2807 }, { "epoch": 2.6169617893755825, "grad_norm": 1.1686577731048693, "learning_rate": 2.8374180186399724e-06, "loss": 1.4866, "step": 2808 }, { "epoch": 2.6169617893755825, "loss_reasoning": 0.5590404272079468, "loss_utility": 1.0881417989730835, "step": 2808 }, { "epoch": 2.6178937558247903, "grad_norm": 1.0465335072162993, "learning_rate": 2.8305143251639627e-06, "loss": 1.7252, "step": 2809 }, { "epoch": 2.6178937558247903, "loss_reasoning": 0.44596290588378906, "loss_utility": 0.3981170654296875, "step": 2809 }, { "epoch": 2.618825722273998, "grad_norm": 1.0478338993801546, "learning_rate": 2.823610631687953e-06, "loss": 1.1613, "step": 2810 }, { "epoch": 2.618825722273998, "loss_reasoning": 0.46679991483688354, "loss_utility": 1.0427889823913574, "step": 2810 }, { "epoch": 2.619757688723206, "grad_norm": 1.4625408723823092, "learning_rate": 2.8167069382119438e-06, "loss": 1.3595, "step": 2811 }, { "epoch": 2.619757688723206, "loss_reasoning": 0.5417388677597046, "loss_utility": 0.4986152946949005, "step": 2811 }, { "epoch": 2.6206896551724137, "grad_norm": 1.0588618809391612, "learning_rate": 2.809803244735934e-06, "loss": 1.2455, "step": 2812 }, { "epoch": 2.6206896551724137, "loss_reasoning": 0.5065410137176514, "loss_utility": 0.29071253538131714, "step": 2812 }, { "epoch": 2.6216216216216215, "grad_norm": 1.2192398552683683, "learning_rate": 2.8028995512599244e-06, "loss": 1.0531, "step": 2813 }, { "epoch": 2.6216216216216215, "loss_reasoning": 0.5205833315849304, "loss_utility": 0.6735507249832153, "step": 2813 }, { "epoch": 2.6225535880708293, "grad_norm": 1.0753364033928376, "learning_rate": 2.7959958577839147e-06, "loss": 1.5477, "step": 2814 }, { "epoch": 2.6225535880708293, "loss_reasoning": 0.48286014795303345, "loss_utility": 0.28598612546920776, "step": 2814 }, { "epoch": 2.6234855545200375, "grad_norm": 1.0490082066010833, "learning_rate": 2.789092164307905e-06, "loss": 1.3989, "step": 2815 }, { "epoch": 2.6234855545200375, "loss_reasoning": 0.4424865245819092, "loss_utility": 0.8886294364929199, "step": 2815 }, { "epoch": 2.624417520969245, "grad_norm": 1.0198982407979527, "learning_rate": 2.7821884708318953e-06, "loss": 1.3233, "step": 2816 }, { "epoch": 2.624417520969245, "loss_reasoning": 0.49223142862319946, "loss_utility": 1.1866295337677002, "step": 2816 }, { "epoch": 2.625349487418453, "grad_norm": 1.1814538119899656, "learning_rate": 2.7752847773558856e-06, "loss": 1.5863, "step": 2817 }, { "epoch": 2.625349487418453, "loss_reasoning": 0.45229387283325195, "loss_utility": 1.0063939094543457, "step": 2817 }, { "epoch": 2.626281453867661, "grad_norm": 1.2185426940122128, "learning_rate": 2.768381083879876e-06, "loss": 1.4799, "step": 2818 }, { "epoch": 2.626281453867661, "loss_reasoning": 0.5298290848731995, "loss_utility": 0.6552421450614929, "step": 2818 }, { "epoch": 2.6272134203168687, "grad_norm": 1.0554094872608266, "learning_rate": 2.7614773904038662e-06, "loss": 1.2781, "step": 2819 }, { "epoch": 2.6272134203168687, "loss_reasoning": 0.4848949909210205, "loss_utility": 0.5931350588798523, "step": 2819 }, { "epoch": 2.6281453867660765, "grad_norm": 1.267229086343475, "learning_rate": 2.7545736969278566e-06, "loss": 1.2926, "step": 2820 }, { "epoch": 2.6281453867660765, "loss_reasoning": 0.5290756821632385, "loss_utility": 1.1492407321929932, "step": 2820 }, { "epoch": 2.6290773532152842, "grad_norm": 1.361176054527441, "learning_rate": 2.7476700034518473e-06, "loss": 1.758, "step": 2821 }, { "epoch": 2.6290773532152842, "loss_reasoning": 0.5263150930404663, "loss_utility": 0.8406526446342468, "step": 2821 }, { "epoch": 2.630009319664492, "grad_norm": 1.2247541855875088, "learning_rate": 2.7407663099758376e-06, "loss": 1.5788, "step": 2822 }, { "epoch": 2.630009319664492, "loss_reasoning": 0.48088401556015015, "loss_utility": 0.6571146249771118, "step": 2822 }, { "epoch": 2.6309412861137, "grad_norm": 1.2097477815679918, "learning_rate": 2.733862616499828e-06, "loss": 1.7508, "step": 2823 }, { "epoch": 2.6309412861137, "loss_reasoning": 0.4618769586086273, "loss_utility": 0.7704195380210876, "step": 2823 }, { "epoch": 2.6318732525629076, "grad_norm": 1.228691204605878, "learning_rate": 2.7269589230238182e-06, "loss": 1.5855, "step": 2824 }, { "epoch": 2.6318732525629076, "loss_reasoning": 0.4989089369773865, "loss_utility": 0.3559648394584656, "step": 2824 }, { "epoch": 2.6328052190121154, "grad_norm": 1.1512059313545466, "learning_rate": 2.7200552295478085e-06, "loss": 1.3724, "step": 2825 }, { "epoch": 2.6328052190121154, "loss_reasoning": 0.4424394369125366, "loss_utility": 0.8052055835723877, "step": 2825 }, { "epoch": 2.6337371854613236, "grad_norm": 1.152352228359927, "learning_rate": 2.7131515360717984e-06, "loss": 1.4452, "step": 2826 }, { "epoch": 2.6337371854613236, "loss_reasoning": 0.4900186359882355, "loss_utility": 1.2321858406066895, "step": 2826 }, { "epoch": 2.634669151910531, "grad_norm": 1.1747358671824293, "learning_rate": 2.7062478425957887e-06, "loss": 1.696, "step": 2827 }, { "epoch": 2.634669151910531, "loss_reasoning": 0.4969269037246704, "loss_utility": 1.4579286575317383, "step": 2827 }, { "epoch": 2.635601118359739, "grad_norm": 1.3700826177005774, "learning_rate": 2.699344149119779e-06, "loss": 1.629, "step": 2828 }, { "epoch": 2.635601118359739, "loss_reasoning": 0.5641753077507019, "loss_utility": 0.8108032941818237, "step": 2828 }, { "epoch": 2.636533084808947, "grad_norm": 1.4702747032689236, "learning_rate": 2.6924404556437693e-06, "loss": 1.2974, "step": 2829 }, { "epoch": 2.636533084808947, "loss_reasoning": 0.5329114198684692, "loss_utility": 1.0428379774093628, "step": 2829 }, { "epoch": 2.637465051258155, "grad_norm": 1.3114392097797798, "learning_rate": 2.6855367621677596e-06, "loss": 1.8636, "step": 2830 }, { "epoch": 2.637465051258155, "loss_reasoning": 0.5074747204780579, "loss_utility": 1.0304049253463745, "step": 2830 }, { "epoch": 2.6383970177073626, "grad_norm": 1.245789670255607, "learning_rate": 2.6786330686917504e-06, "loss": 1.407, "step": 2831 }, { "epoch": 2.6383970177073626, "loss_reasoning": 0.515624463558197, "loss_utility": 0.6881819367408752, "step": 2831 }, { "epoch": 2.6393289841565704, "grad_norm": 1.5194202229905551, "learning_rate": 2.6717293752157407e-06, "loss": 1.5343, "step": 2832 }, { "epoch": 2.6393289841565704, "loss_reasoning": 0.46511855721473694, "loss_utility": 0.8243316411972046, "step": 2832 }, { "epoch": 2.640260950605778, "grad_norm": 1.0644352404530342, "learning_rate": 2.664825681739731e-06, "loss": 1.2822, "step": 2833 }, { "epoch": 2.640260950605778, "loss_reasoning": 0.5153641104698181, "loss_utility": 0.9162650108337402, "step": 2833 }, { "epoch": 2.641192917054986, "grad_norm": 1.116335173828611, "learning_rate": 2.6579219882637213e-06, "loss": 1.7037, "step": 2834 }, { "epoch": 2.641192917054986, "loss_reasoning": 0.5476046204566956, "loss_utility": 0.9105408787727356, "step": 2834 }, { "epoch": 2.6421248835041937, "grad_norm": 1.094261900316238, "learning_rate": 2.6510182947877116e-06, "loss": 1.2889, "step": 2835 }, { "epoch": 2.6421248835041937, "loss_reasoning": 0.5009143948554993, "loss_utility": 1.1797014474868774, "step": 2835 }, { "epoch": 2.6430568499534015, "grad_norm": 1.1804766327950447, "learning_rate": 2.644114601311702e-06, "loss": 1.257, "step": 2836 }, { "epoch": 2.6430568499534015, "loss_reasoning": 0.5445961952209473, "loss_utility": 1.7479586601257324, "step": 2836 }, { "epoch": 2.6439888164026097, "grad_norm": 1.1707267537239838, "learning_rate": 2.6372109078356922e-06, "loss": 1.6814, "step": 2837 }, { "epoch": 2.6439888164026097, "loss_reasoning": 0.5818622708320618, "loss_utility": 0.644032895565033, "step": 2837 }, { "epoch": 2.644920782851817, "grad_norm": 1.0204424712074136, "learning_rate": 2.6303072143596825e-06, "loss": 1.5273, "step": 2838 }, { "epoch": 2.644920782851817, "loss_reasoning": 0.43586552143096924, "loss_utility": 0.8965134024620056, "step": 2838 }, { "epoch": 2.6458527493010253, "grad_norm": 1.1816376604428063, "learning_rate": 2.623403520883673e-06, "loss": 1.3345, "step": 2839 }, { "epoch": 2.6458527493010253, "loss_reasoning": 0.45422235131263733, "loss_utility": 1.3219265937805176, "step": 2839 }, { "epoch": 2.646784715750233, "grad_norm": 1.1250141746914883, "learning_rate": 2.616499827407663e-06, "loss": 1.4656, "step": 2840 }, { "epoch": 2.646784715750233, "loss_reasoning": 0.4621230959892273, "loss_utility": 1.1607413291931152, "step": 2840 }, { "epoch": 2.647716682199441, "grad_norm": 1.1032944843282595, "learning_rate": 2.609596133931654e-06, "loss": 1.4036, "step": 2841 }, { "epoch": 2.647716682199441, "loss_reasoning": 0.5229777097702026, "loss_utility": 1.5938549041748047, "step": 2841 }, { "epoch": 2.6486486486486487, "grad_norm": 1.2554862676391736, "learning_rate": 2.602692440455644e-06, "loss": 1.9015, "step": 2842 }, { "epoch": 2.6486486486486487, "loss_reasoning": 0.4497549533843994, "loss_utility": 1.0378845930099487, "step": 2842 }, { "epoch": 2.6495806150978565, "grad_norm": 1.093842361368383, "learning_rate": 2.5957887469796345e-06, "loss": 1.488, "step": 2843 }, { "epoch": 2.6495806150978565, "loss_reasoning": 0.48258185386657715, "loss_utility": 0.8173166513442993, "step": 2843 }, { "epoch": 2.6505125815470643, "grad_norm": 1.282130898548632, "learning_rate": 2.588885053503625e-06, "loss": 1.3424, "step": 2844 }, { "epoch": 2.6505125815470643, "loss_reasoning": 0.4914080798625946, "loss_utility": 1.072872519493103, "step": 2844 }, { "epoch": 2.651444547996272, "grad_norm": 1.085889238780529, "learning_rate": 2.581981360027615e-06, "loss": 1.4425, "step": 2845 }, { "epoch": 2.651444547996272, "loss_reasoning": 0.5071074962615967, "loss_utility": 1.6365458965301514, "step": 2845 }, { "epoch": 2.65237651444548, "grad_norm": 1.1493357402178495, "learning_rate": 2.5750776665516054e-06, "loss": 1.5692, "step": 2846 }, { "epoch": 2.65237651444548, "loss_reasoning": 0.5098258256912231, "loss_utility": 1.5485708713531494, "step": 2846 }, { "epoch": 2.6533084808946876, "grad_norm": 1.234791484967253, "learning_rate": 2.5681739730755957e-06, "loss": 1.6894, "step": 2847 }, { "epoch": 2.6533084808946876, "loss_reasoning": 0.48746049404144287, "loss_utility": 0.6201645731925964, "step": 2847 }, { "epoch": 2.654240447343896, "grad_norm": 1.1513724346916658, "learning_rate": 2.561270279599586e-06, "loss": 1.4532, "step": 2848 }, { "epoch": 2.654240447343896, "loss_reasoning": 0.4745657444000244, "loss_utility": 0.7645272016525269, "step": 2848 }, { "epoch": 2.655172413793103, "grad_norm": 1.1888849795761507, "learning_rate": 2.554366586123576e-06, "loss": 1.7301, "step": 2849 }, { "epoch": 2.655172413793103, "loss_reasoning": 0.5132155418395996, "loss_utility": 0.6152679324150085, "step": 2849 }, { "epoch": 2.6561043802423114, "grad_norm": 1.28588995050656, "learning_rate": 2.5474628926475662e-06, "loss": 1.6051, "step": 2850 }, { "epoch": 2.6561043802423114, "loss_reasoning": 0.49000924825668335, "loss_utility": 0.5853188633918762, "step": 2850 }, { "epoch": 2.6570363466915192, "grad_norm": 1.3633151642637102, "learning_rate": 2.5405591991715574e-06, "loss": 1.3667, "step": 2851 }, { "epoch": 2.6570363466915192, "loss_reasoning": 0.4707440733909607, "loss_utility": 0.9763067960739136, "step": 2851 }, { "epoch": 2.657968313140727, "grad_norm": 1.143498517487835, "learning_rate": 2.5336555056955477e-06, "loss": 1.2813, "step": 2852 }, { "epoch": 2.657968313140727, "loss_reasoning": 0.5421923398971558, "loss_utility": 1.2677606344223022, "step": 2852 }, { "epoch": 2.658900279589935, "grad_norm": 1.3655581741024725, "learning_rate": 2.5267518122195376e-06, "loss": 1.4722, "step": 2853 }, { "epoch": 2.658900279589935, "loss_reasoning": 0.526283323764801, "loss_utility": 1.6655187606811523, "step": 2853 }, { "epoch": 2.6598322460391426, "grad_norm": 1.2041585518235938, "learning_rate": 2.519848118743528e-06, "loss": 1.5876, "step": 2854 }, { "epoch": 2.6598322460391426, "loss_reasoning": 0.445200651884079, "loss_utility": 0.8248904943466187, "step": 2854 }, { "epoch": 2.6607642124883504, "grad_norm": 1.088137427449827, "learning_rate": 2.5129444252675182e-06, "loss": 1.4796, "step": 2855 }, { "epoch": 2.6607642124883504, "loss_reasoning": 0.5052751302719116, "loss_utility": 1.0157909393310547, "step": 2855 }, { "epoch": 2.661696178937558, "grad_norm": 0.9257505382330857, "learning_rate": 2.5060407317915085e-06, "loss": 1.5532, "step": 2856 }, { "epoch": 2.661696178937558, "loss_reasoning": 0.5395486354827881, "loss_utility": 0.6451313495635986, "step": 2856 }, { "epoch": 2.662628145386766, "grad_norm": 1.176037572864467, "learning_rate": 2.499137038315499e-06, "loss": 1.4085, "step": 2857 }, { "epoch": 2.662628145386766, "loss_reasoning": 0.43920427560806274, "loss_utility": 1.6321543455123901, "step": 2857 }, { "epoch": 2.6635601118359737, "grad_norm": 1.1597725495292386, "learning_rate": 2.492233344839489e-06, "loss": 1.4486, "step": 2858 }, { "epoch": 2.6635601118359737, "loss_reasoning": 0.4716108441352844, "loss_utility": 1.0048390626907349, "step": 2858 }, { "epoch": 2.664492078285182, "grad_norm": 1.4691286973275752, "learning_rate": 2.48532965136348e-06, "loss": 1.5784, "step": 2859 }, { "epoch": 2.664492078285182, "loss_reasoning": 0.5584235191345215, "loss_utility": 0.9219200611114502, "step": 2859 }, { "epoch": 2.6654240447343893, "grad_norm": 1.2186436240687621, "learning_rate": 2.47842595788747e-06, "loss": 1.6004, "step": 2860 }, { "epoch": 2.6654240447343893, "loss_reasoning": 0.4843728840351105, "loss_utility": 0.38708052039146423, "step": 2860 }, { "epoch": 2.6663560111835976, "grad_norm": 1.430389729308493, "learning_rate": 2.4715222644114605e-06, "loss": 1.2674, "step": 2861 }, { "epoch": 2.6663560111835976, "loss_reasoning": 0.5407331585884094, "loss_utility": 1.0669376850128174, "step": 2861 }, { "epoch": 2.6672879776328053, "grad_norm": 1.2079248855043692, "learning_rate": 2.464618570935451e-06, "loss": 1.5636, "step": 2862 }, { "epoch": 2.6672879776328053, "loss_reasoning": 0.4561603367328644, "loss_utility": 1.0785588026046753, "step": 2862 }, { "epoch": 2.668219944082013, "grad_norm": 1.1351587775354142, "learning_rate": 2.4577148774594407e-06, "loss": 1.5596, "step": 2863 }, { "epoch": 2.668219944082013, "loss_reasoning": 0.49262723326683044, "loss_utility": 0.9631412029266357, "step": 2863 }, { "epoch": 2.669151910531221, "grad_norm": 1.0835911969508587, "learning_rate": 2.4508111839834314e-06, "loss": 1.3847, "step": 2864 }, { "epoch": 2.669151910531221, "loss_reasoning": 0.5023736357688904, "loss_utility": 1.4685685634613037, "step": 2864 }, { "epoch": 2.6700838769804287, "grad_norm": 1.2791226908203772, "learning_rate": 2.4439074905074217e-06, "loss": 1.9711, "step": 2865 }, { "epoch": 2.6700838769804287, "loss_reasoning": 0.4657626450061798, "loss_utility": 1.2819390296936035, "step": 2865 }, { "epoch": 2.6710158434296365, "grad_norm": 1.1176735710606889, "learning_rate": 2.437003797031412e-06, "loss": 1.6879, "step": 2866 }, { "epoch": 2.6710158434296365, "loss_reasoning": 0.4933582544326782, "loss_utility": 1.035191297531128, "step": 2866 }, { "epoch": 2.6719478098788443, "grad_norm": 1.1557104534659914, "learning_rate": 2.4301001035554023e-06, "loss": 1.6117, "step": 2867 }, { "epoch": 2.6719478098788443, "loss_reasoning": 0.46946555376052856, "loss_utility": 0.7772473096847534, "step": 2867 }, { "epoch": 2.672879776328052, "grad_norm": 1.107333463986856, "learning_rate": 2.4231964100793927e-06, "loss": 1.3504, "step": 2868 }, { "epoch": 2.672879776328052, "loss_reasoning": 0.4783565402030945, "loss_utility": 1.1586449146270752, "step": 2868 }, { "epoch": 2.67381174277726, "grad_norm": 1.0124230766480342, "learning_rate": 2.416292716603383e-06, "loss": 1.1539, "step": 2869 }, { "epoch": 2.67381174277726, "loss_reasoning": 0.44852378964424133, "loss_utility": 1.0468645095825195, "step": 2869 }, { "epoch": 2.674743709226468, "grad_norm": 1.112742982884396, "learning_rate": 2.4093890231273733e-06, "loss": 1.3987, "step": 2870 }, { "epoch": 2.674743709226468, "loss_reasoning": 0.5693726539611816, "loss_utility": 0.7677852511405945, "step": 2870 }, { "epoch": 2.6756756756756754, "grad_norm": 1.2464635081459259, "learning_rate": 2.4024853296513636e-06, "loss": 1.5582, "step": 2871 }, { "epoch": 2.6756756756756754, "loss_reasoning": 0.47049859166145325, "loss_utility": 0.8464400172233582, "step": 2871 }, { "epoch": 2.6766076421248837, "grad_norm": 1.1182860878764278, "learning_rate": 2.395581636175354e-06, "loss": 1.1372, "step": 2872 }, { "epoch": 2.6766076421248837, "loss_reasoning": 0.47192609310150146, "loss_utility": 1.060120701789856, "step": 2872 }, { "epoch": 2.6775396085740915, "grad_norm": 1.2573825359965252, "learning_rate": 2.388677942699344e-06, "loss": 1.687, "step": 2873 }, { "epoch": 2.6775396085740915, "loss_reasoning": 0.485805481672287, "loss_utility": 1.201417326927185, "step": 2873 }, { "epoch": 2.6784715750232992, "grad_norm": 1.304134814639042, "learning_rate": 2.381774249223335e-06, "loss": 1.6934, "step": 2874 }, { "epoch": 2.6784715750232992, "loss_reasoning": 0.5132309198379517, "loss_utility": 1.055405855178833, "step": 2874 }, { "epoch": 2.679403541472507, "grad_norm": 1.1567130152277876, "learning_rate": 2.3748705557473252e-06, "loss": 1.5837, "step": 2875 }, { "epoch": 2.679403541472507, "loss_reasoning": 0.48826664686203003, "loss_utility": 0.9969849586486816, "step": 2875 }, { "epoch": 2.680335507921715, "grad_norm": 1.7611857490498048, "learning_rate": 2.367966862271315e-06, "loss": 1.4526, "step": 2876 }, { "epoch": 2.680335507921715, "loss_reasoning": 0.5184494256973267, "loss_utility": 0.925068199634552, "step": 2876 }, { "epoch": 2.6812674743709226, "grad_norm": 1.111978277794046, "learning_rate": 2.3610631687953054e-06, "loss": 1.2265, "step": 2877 }, { "epoch": 2.6812674743709226, "loss_reasoning": 0.5117291212081909, "loss_utility": 0.7945775985717773, "step": 2877 }, { "epoch": 2.6821994408201304, "grad_norm": 1.085186485569005, "learning_rate": 2.3541594753192957e-06, "loss": 1.4244, "step": 2878 }, { "epoch": 2.6821994408201304, "loss_reasoning": 0.4698728024959564, "loss_utility": 0.8050220012664795, "step": 2878 }, { "epoch": 2.683131407269338, "grad_norm": 1.1252200389136913, "learning_rate": 2.3472557818432865e-06, "loss": 1.2976, "step": 2879 }, { "epoch": 2.683131407269338, "loss_reasoning": 0.4711233377456665, "loss_utility": 0.7975051403045654, "step": 2879 }, { "epoch": 2.684063373718546, "grad_norm": 1.099612122396079, "learning_rate": 2.3403520883672768e-06, "loss": 1.4459, "step": 2880 }, { "epoch": 2.684063373718546, "loss_reasoning": 0.5063846707344055, "loss_utility": 1.0172709226608276, "step": 2880 }, { "epoch": 2.684995340167754, "grad_norm": 1.1654646279173557, "learning_rate": 2.333448394891267e-06, "loss": 1.6936, "step": 2881 }, { "epoch": 2.684995340167754, "loss_reasoning": 0.5220657587051392, "loss_utility": 0.7140916585922241, "step": 2881 }, { "epoch": 2.6859273066169616, "grad_norm": 1.2546103801321782, "learning_rate": 2.3265447014152574e-06, "loss": 1.5338, "step": 2882 }, { "epoch": 2.6859273066169616, "loss_reasoning": 0.46290963888168335, "loss_utility": 1.5969884395599365, "step": 2882 }, { "epoch": 2.68685927306617, "grad_norm": 1.0329832750099341, "learning_rate": 2.3196410079392477e-06, "loss": 1.6962, "step": 2883 }, { "epoch": 2.68685927306617, "loss_reasoning": 0.49503350257873535, "loss_utility": 1.0128060579299927, "step": 2883 }, { "epoch": 2.6877912395153776, "grad_norm": 1.3575479436067963, "learning_rate": 2.312737314463238e-06, "loss": 1.4676, "step": 2884 }, { "epoch": 2.6877912395153776, "loss_reasoning": 0.4987252354621887, "loss_utility": 0.63723224401474, "step": 2884 }, { "epoch": 2.6887232059645854, "grad_norm": 0.9928845259055448, "learning_rate": 2.3058336209872283e-06, "loss": 1.2916, "step": 2885 }, { "epoch": 2.6887232059645854, "loss_reasoning": 0.4844578504562378, "loss_utility": 0.7069246768951416, "step": 2885 }, { "epoch": 2.689655172413793, "grad_norm": 1.0457553576142953, "learning_rate": 2.2989299275112186e-06, "loss": 1.4155, "step": 2886 }, { "epoch": 2.689655172413793, "loss_reasoning": 0.5541187524795532, "loss_utility": 1.1181182861328125, "step": 2886 }, { "epoch": 2.690587138863001, "grad_norm": 1.11759810380569, "learning_rate": 2.292026234035209e-06, "loss": 1.3207, "step": 2887 }, { "epoch": 2.690587138863001, "loss_reasoning": 0.46378839015960693, "loss_utility": 0.6812944412231445, "step": 2887 }, { "epoch": 2.6915191053122087, "grad_norm": 1.2489136325707206, "learning_rate": 2.2851225405591993e-06, "loss": 1.3503, "step": 2888 }, { "epoch": 2.6915191053122087, "loss_reasoning": 0.5157457590103149, "loss_utility": 0.7931252121925354, "step": 2888 }, { "epoch": 2.6924510717614165, "grad_norm": 1.1333773172931372, "learning_rate": 2.27821884708319e-06, "loss": 1.3727, "step": 2889 }, { "epoch": 2.6924510717614165, "loss_reasoning": 0.4913308620452881, "loss_utility": 1.1265252828598022, "step": 2889 }, { "epoch": 2.6933830382106243, "grad_norm": 1.073876183733953, "learning_rate": 2.27131515360718e-06, "loss": 1.4957, "step": 2890 }, { "epoch": 2.6933830382106243, "loss_reasoning": 0.5325099229812622, "loss_utility": 1.1431171894073486, "step": 2890 }, { "epoch": 2.694315004659832, "grad_norm": 1.1015952657328332, "learning_rate": 2.26441146013117e-06, "loss": 1.3268, "step": 2891 }, { "epoch": 2.694315004659832, "loss_reasoning": 0.46676310896873474, "loss_utility": 0.853472113609314, "step": 2891 }, { "epoch": 2.6952469711090403, "grad_norm": 1.1457249749203746, "learning_rate": 2.2575077666551605e-06, "loss": 1.4255, "step": 2892 }, { "epoch": 2.6952469711090403, "loss_reasoning": 0.4999622404575348, "loss_utility": 0.9330625534057617, "step": 2892 }, { "epoch": 2.6961789375582477, "grad_norm": 1.0595566541726154, "learning_rate": 2.250604073179151e-06, "loss": 1.442, "step": 2893 }, { "epoch": 2.6961789375582477, "loss_reasoning": 0.5011311173439026, "loss_utility": 1.2559133768081665, "step": 2893 }, { "epoch": 2.697110904007456, "grad_norm": 1.3193296989761298, "learning_rate": 2.2437003797031415e-06, "loss": 1.4634, "step": 2894 }, { "epoch": 2.697110904007456, "loss_reasoning": 0.46688199043273926, "loss_utility": 0.9653741717338562, "step": 2894 }, { "epoch": 2.6980428704566637, "grad_norm": 1.0528836103035781, "learning_rate": 2.236796686227132e-06, "loss": 1.4186, "step": 2895 }, { "epoch": 2.6980428704566637, "loss_reasoning": 0.46126729249954224, "loss_utility": 1.3775858879089355, "step": 2895 }, { "epoch": 2.6989748369058715, "grad_norm": 1.2503261123758993, "learning_rate": 2.229892992751122e-06, "loss": 1.961, "step": 2896 }, { "epoch": 2.6989748369058715, "loss_reasoning": 0.47738003730773926, "loss_utility": 1.3559629917144775, "step": 2896 }, { "epoch": 2.6999068033550793, "grad_norm": 1.0479060972713938, "learning_rate": 2.2229892992751125e-06, "loss": 1.3488, "step": 2897 }, { "epoch": 2.6999068033550793, "loss_reasoning": 0.5203611850738525, "loss_utility": 1.8522472381591797, "step": 2897 }, { "epoch": 2.700838769804287, "grad_norm": 1.334956562166077, "learning_rate": 2.2160856057991028e-06, "loss": 1.7649, "step": 2898 }, { "epoch": 2.700838769804287, "loss_reasoning": 0.4784064292907715, "loss_utility": 0.5625213980674744, "step": 2898 }, { "epoch": 2.701770736253495, "grad_norm": 1.1263185550166073, "learning_rate": 2.209181912323093e-06, "loss": 1.3964, "step": 2899 }, { "epoch": 2.701770736253495, "loss_reasoning": 0.508213460445404, "loss_utility": 0.6588548421859741, "step": 2899 }, { "epoch": 2.7027027027027026, "grad_norm": 1.1019172839919484, "learning_rate": 2.2022782188470834e-06, "loss": 1.4524, "step": 2900 }, { "epoch": 2.7027027027027026, "loss_reasoning": 0.5001736879348755, "loss_utility": 0.8364787101745605, "step": 2900 }, { "epoch": 2.7036346691519104, "grad_norm": 1.1705074263046564, "learning_rate": 2.1953745253710737e-06, "loss": 1.2887, "step": 2901 }, { "epoch": 2.7036346691519104, "loss_reasoning": 0.43140751123428345, "loss_utility": 0.7475816011428833, "step": 2901 }, { "epoch": 2.704566635601118, "grad_norm": 1.1354172552495607, "learning_rate": 2.188470831895064e-06, "loss": 1.4304, "step": 2902 }, { "epoch": 2.704566635601118, "loss_reasoning": 0.494737833738327, "loss_utility": 0.7202049493789673, "step": 2902 }, { "epoch": 2.7054986020503264, "grad_norm": 1.1265197905479876, "learning_rate": 2.1815671384190543e-06, "loss": 1.1917, "step": 2903 }, { "epoch": 2.7054986020503264, "loss_reasoning": 0.5324453711509705, "loss_utility": 2.0537109375, "step": 2903 }, { "epoch": 2.706430568499534, "grad_norm": 1.3416789234122535, "learning_rate": 2.1746634449430446e-06, "loss": 1.8172, "step": 2904 }, { "epoch": 2.706430568499534, "loss_reasoning": 0.47080934047698975, "loss_utility": 0.614726185798645, "step": 2904 }, { "epoch": 2.707362534948742, "grad_norm": 1.0685668032300495, "learning_rate": 2.167759751467035e-06, "loss": 1.3733, "step": 2905 }, { "epoch": 2.707362534948742, "loss_reasoning": 0.5779355764389038, "loss_utility": 1.0256679058074951, "step": 2905 }, { "epoch": 2.70829450139795, "grad_norm": 1.2901228429462634, "learning_rate": 2.1608560579910252e-06, "loss": 1.5695, "step": 2906 }, { "epoch": 2.70829450139795, "loss_reasoning": 0.5033103823661804, "loss_utility": 1.4843165874481201, "step": 2906 }, { "epoch": 2.7092264678471576, "grad_norm": 1.6606784014295244, "learning_rate": 2.1539523645150156e-06, "loss": 1.5205, "step": 2907 }, { "epoch": 2.7092264678471576, "loss_reasoning": 0.4545767903327942, "loss_utility": 0.7351754903793335, "step": 2907 }, { "epoch": 2.7101584342963654, "grad_norm": 1.2927220695305586, "learning_rate": 2.147048671039006e-06, "loss": 1.2717, "step": 2908 }, { "epoch": 2.7101584342963654, "loss_reasoning": 0.5040253400802612, "loss_utility": 0.877896249294281, "step": 2908 }, { "epoch": 2.711090400745573, "grad_norm": 1.142119781153136, "learning_rate": 2.1401449775629966e-06, "loss": 1.3198, "step": 2909 }, { "epoch": 2.711090400745573, "loss_reasoning": 0.49633634090423584, "loss_utility": 1.287714958190918, "step": 2909 }, { "epoch": 2.712022367194781, "grad_norm": 1.489349998205745, "learning_rate": 2.133241284086987e-06, "loss": 1.6573, "step": 2910 }, { "epoch": 2.712022367194781, "loss_reasoning": 0.4759790897369385, "loss_utility": 0.7000206708908081, "step": 2910 }, { "epoch": 2.7129543336439887, "grad_norm": 1.1266266781783831, "learning_rate": 2.1263375906109772e-06, "loss": 1.4629, "step": 2911 }, { "epoch": 2.7129543336439887, "loss_reasoning": 0.4340283274650574, "loss_utility": 0.9417881369590759, "step": 2911 }, { "epoch": 2.7138863000931965, "grad_norm": 1.2338669782834193, "learning_rate": 2.1194338971349675e-06, "loss": 1.4609, "step": 2912 }, { "epoch": 2.7138863000931965, "loss_reasoning": 0.48639553785324097, "loss_utility": 0.5183067321777344, "step": 2912 }, { "epoch": 2.7148182665424043, "grad_norm": 1.041270744132083, "learning_rate": 2.1125302036589574e-06, "loss": 1.351, "step": 2913 }, { "epoch": 2.7148182665424043, "loss_reasoning": 0.458793580532074, "loss_utility": 0.550898015499115, "step": 2913 }, { "epoch": 2.7157502329916126, "grad_norm": 0.9364714172348189, "learning_rate": 2.105626510182948e-06, "loss": 1.1747, "step": 2914 }, { "epoch": 2.7157502329916126, "loss_reasoning": 0.48332679271698, "loss_utility": 1.3347837924957275, "step": 2914 }, { "epoch": 2.71668219944082, "grad_norm": 1.5189806490457842, "learning_rate": 2.0987228167069384e-06, "loss": 1.5225, "step": 2915 }, { "epoch": 2.71668219944082, "loss_reasoning": 0.4860362410545349, "loss_utility": 0.5585808753967285, "step": 2915 }, { "epoch": 2.717614165890028, "grad_norm": 1.0999922574339547, "learning_rate": 2.0918191232309288e-06, "loss": 1.4157, "step": 2916 }, { "epoch": 2.717614165890028, "loss_reasoning": 0.45637011528015137, "loss_utility": 0.7886319756507874, "step": 2916 }, { "epoch": 2.718546132339236, "grad_norm": 1.1809394697823523, "learning_rate": 2.084915429754919e-06, "loss": 1.3635, "step": 2917 }, { "epoch": 2.718546132339236, "loss_reasoning": 0.44345730543136597, "loss_utility": 1.262876272201538, "step": 2917 }, { "epoch": 2.7194780987884437, "grad_norm": 1.1753827440694307, "learning_rate": 2.0780117362789094e-06, "loss": 1.7006, "step": 2918 }, { "epoch": 2.7194780987884437, "loss_reasoning": 0.4655756652355194, "loss_utility": 1.2639377117156982, "step": 2918 }, { "epoch": 2.7204100652376515, "grad_norm": 1.1776452305828422, "learning_rate": 2.0711080428028997e-06, "loss": 1.5269, "step": 2919 }, { "epoch": 2.7204100652376515, "loss_reasoning": 0.4584694504737854, "loss_utility": 1.1518759727478027, "step": 2919 }, { "epoch": 2.7213420316868593, "grad_norm": 1.4519550398692673, "learning_rate": 2.06420434932689e-06, "loss": 1.576, "step": 2920 }, { "epoch": 2.7213420316868593, "loss_reasoning": 0.4462078809738159, "loss_utility": 0.9624526500701904, "step": 2920 }, { "epoch": 2.722273998136067, "grad_norm": 1.2220410915087307, "learning_rate": 2.0573006558508803e-06, "loss": 1.4144, "step": 2921 }, { "epoch": 2.722273998136067, "loss_reasoning": 0.45382407307624817, "loss_utility": 0.43605679273605347, "step": 2921 }, { "epoch": 2.723205964585275, "grad_norm": 1.1794919256984986, "learning_rate": 2.0503969623748706e-06, "loss": 1.4151, "step": 2922 }, { "epoch": 2.723205964585275, "loss_reasoning": 0.4287702441215515, "loss_utility": 0.8143327236175537, "step": 2922 }, { "epoch": 2.7241379310344827, "grad_norm": 1.2955433657721673, "learning_rate": 2.043493268898861e-06, "loss": 1.492, "step": 2923 }, { "epoch": 2.7241379310344827, "loss_reasoning": 0.5040600895881653, "loss_utility": 0.7777401208877563, "step": 2923 }, { "epoch": 2.7250698974836904, "grad_norm": 1.3194067589483947, "learning_rate": 2.0365895754228517e-06, "loss": 1.4641, "step": 2924 }, { "epoch": 2.7250698974836904, "loss_reasoning": 0.48293349146842957, "loss_utility": 0.9914988279342651, "step": 2924 }, { "epoch": 2.7260018639328987, "grad_norm": 1.2183199016504995, "learning_rate": 2.029685881946842e-06, "loss": 1.5086, "step": 2925 }, { "epoch": 2.7260018639328987, "loss_reasoning": 0.4428315758705139, "loss_utility": 0.3132869005203247, "step": 2925 }, { "epoch": 2.726933830382106, "grad_norm": 1.0702531209018218, "learning_rate": 2.0227821884708323e-06, "loss": 1.3009, "step": 2926 }, { "epoch": 2.726933830382106, "loss_reasoning": 0.5120832920074463, "loss_utility": 1.3353798389434814, "step": 2926 }, { "epoch": 2.7278657968313142, "grad_norm": 1.45203502099727, "learning_rate": 2.015878494994822e-06, "loss": 1.4786, "step": 2927 }, { "epoch": 2.7278657968313142, "loss_reasoning": 0.432314932346344, "loss_utility": 0.8648946285247803, "step": 2927 }, { "epoch": 2.728797763280522, "grad_norm": 1.0138249851282157, "learning_rate": 2.0089748015188125e-06, "loss": 1.3173, "step": 2928 }, { "epoch": 2.728797763280522, "loss_reasoning": 0.48531341552734375, "loss_utility": 0.6381804943084717, "step": 2928 }, { "epoch": 2.72972972972973, "grad_norm": 1.1886651107256634, "learning_rate": 2.002071108042803e-06, "loss": 1.2377, "step": 2929 }, { "epoch": 2.72972972972973, "loss_reasoning": 0.4539368450641632, "loss_utility": 1.0890991687774658, "step": 2929 }, { "epoch": 2.7306616961789376, "grad_norm": 1.0891191976945032, "learning_rate": 1.9951674145667935e-06, "loss": 1.4902, "step": 2930 }, { "epoch": 2.7306616961789376, "loss_reasoning": 0.43522903323173523, "loss_utility": 1.3582892417907715, "step": 2930 }, { "epoch": 2.7315936626281454, "grad_norm": 1.2691031618531563, "learning_rate": 1.988263721090784e-06, "loss": 1.613, "step": 2931 }, { "epoch": 2.7315936626281454, "loss_reasoning": 0.5271478891372681, "loss_utility": 1.0601646900177002, "step": 2931 }, { "epoch": 2.732525629077353, "grad_norm": 1.019277256500948, "learning_rate": 1.981360027614774e-06, "loss": 1.2233, "step": 2932 }, { "epoch": 2.732525629077353, "loss_reasoning": 0.48385190963745117, "loss_utility": 1.03212308883667, "step": 2932 }, { "epoch": 2.733457595526561, "grad_norm": 1.5447950998204012, "learning_rate": 1.9744563341387644e-06, "loss": 1.8172, "step": 2933 }, { "epoch": 2.733457595526561, "loss_reasoning": 0.46518635749816895, "loss_utility": 1.9698541164398193, "step": 2933 }, { "epoch": 2.7343895619757688, "grad_norm": 1.4916906420420586, "learning_rate": 1.9675526406627547e-06, "loss": 1.9114, "step": 2934 }, { "epoch": 2.7343895619757688, "loss_reasoning": 0.4638862609863281, "loss_utility": 0.6210991144180298, "step": 2934 }, { "epoch": 2.7353215284249766, "grad_norm": 1.0123667607011217, "learning_rate": 1.960648947186745e-06, "loss": 1.1246, "step": 2935 }, { "epoch": 2.7353215284249766, "loss_reasoning": 0.48563480377197266, "loss_utility": 1.350672960281372, "step": 2935 }, { "epoch": 2.736253494874185, "grad_norm": 1.2638086175147085, "learning_rate": 1.9537452537107354e-06, "loss": 1.6344, "step": 2936 }, { "epoch": 2.736253494874185, "loss_reasoning": 0.5161011219024658, "loss_utility": 0.6032114028930664, "step": 2936 }, { "epoch": 2.737185461323392, "grad_norm": 0.9737365827984324, "learning_rate": 1.9468415602347257e-06, "loss": 1.2768, "step": 2937 }, { "epoch": 2.737185461323392, "loss_reasoning": 0.4521843194961548, "loss_utility": 1.1734297275543213, "step": 2937 }, { "epoch": 2.7381174277726004, "grad_norm": 1.2943707346588258, "learning_rate": 1.939937866758716e-06, "loss": 1.643, "step": 2938 }, { "epoch": 2.7381174277726004, "loss_reasoning": 0.4810030460357666, "loss_utility": 0.7698622345924377, "step": 2938 }, { "epoch": 2.739049394221808, "grad_norm": 1.165817699730714, "learning_rate": 1.9330341732827067e-06, "loss": 1.5021, "step": 2939 }, { "epoch": 2.739049394221808, "loss_reasoning": 0.48748844861984253, "loss_utility": 0.49228835105895996, "step": 2939 }, { "epoch": 2.739981360671016, "grad_norm": 0.9131021464217164, "learning_rate": 1.926130479806697e-06, "loss": 1.1553, "step": 2940 }, { "epoch": 2.739981360671016, "loss_reasoning": 0.4611959457397461, "loss_utility": 1.1356428861618042, "step": 2940 }, { "epoch": 2.7409133271202237, "grad_norm": 1.310439031636727, "learning_rate": 1.919226786330687e-06, "loss": 1.5735, "step": 2941 }, { "epoch": 2.7409133271202237, "loss_reasoning": 0.5251374840736389, "loss_utility": 0.6398044228553772, "step": 2941 }, { "epoch": 2.7418452935694315, "grad_norm": 2.4185220248432584, "learning_rate": 1.9123230928546772e-06, "loss": 1.3643, "step": 2942 }, { "epoch": 2.7418452935694315, "loss_reasoning": 0.4608045220375061, "loss_utility": 0.9390655159950256, "step": 2942 }, { "epoch": 2.7427772600186393, "grad_norm": 1.1453872104926626, "learning_rate": 1.9054193993786677e-06, "loss": 1.5009, "step": 2943 }, { "epoch": 2.7427772600186393, "loss_reasoning": 0.4668406546115875, "loss_utility": 0.7047552466392517, "step": 2943 }, { "epoch": 2.743709226467847, "grad_norm": 1.0746896585910044, "learning_rate": 1.8985157059026583e-06, "loss": 1.4037, "step": 2944 }, { "epoch": 2.743709226467847, "loss_reasoning": 0.47499212622642517, "loss_utility": 1.092130422592163, "step": 2944 }, { "epoch": 2.744641192917055, "grad_norm": 1.0976624710246912, "learning_rate": 1.8916120124266486e-06, "loss": 1.1785, "step": 2945 }, { "epoch": 2.744641192917055, "loss_reasoning": 0.4976266324520111, "loss_utility": 0.9966756701469421, "step": 2945 }, { "epoch": 2.7455731593662627, "grad_norm": 1.2188202075996808, "learning_rate": 1.8847083189506389e-06, "loss": 1.5504, "step": 2946 }, { "epoch": 2.7455731593662627, "loss_reasoning": 0.5207245349884033, "loss_utility": 0.7121434211730957, "step": 2946 }, { "epoch": 2.746505125815471, "grad_norm": 1.2056472607531081, "learning_rate": 1.877804625474629e-06, "loss": 1.5026, "step": 2947 }, { "epoch": 2.746505125815471, "loss_reasoning": 0.5188835263252258, "loss_utility": 0.8791528940200806, "step": 2947 }, { "epoch": 2.7474370922646782, "grad_norm": 1.0331244047688288, "learning_rate": 1.8709009319986193e-06, "loss": 1.3417, "step": 2948 }, { "epoch": 2.7474370922646782, "loss_reasoning": 0.506182074546814, "loss_utility": 0.7229231595993042, "step": 2948 }, { "epoch": 2.7483690587138865, "grad_norm": 1.0041874236882948, "learning_rate": 1.8639972385226098e-06, "loss": 1.3627, "step": 2949 }, { "epoch": 2.7483690587138865, "loss_reasoning": 0.5100111961364746, "loss_utility": 0.5654563903808594, "step": 2949 }, { "epoch": 2.7493010251630943, "grad_norm": 1.0202397627556339, "learning_rate": 1.8570935450466001e-06, "loss": 1.3623, "step": 2950 }, { "epoch": 2.7493010251630943, "loss_reasoning": 0.42276668548583984, "loss_utility": 0.2873220443725586, "step": 2950 }, { "epoch": 2.750232991612302, "grad_norm": 1.183356168410165, "learning_rate": 1.8501898515705904e-06, "loss": 1.382, "step": 2951 }, { "epoch": 2.750232991612302, "loss_reasoning": 0.450957715511322, "loss_utility": 0.688818097114563, "step": 2951 }, { "epoch": 2.75116495806151, "grad_norm": 1.1453239407913127, "learning_rate": 1.8432861580945807e-06, "loss": 1.2558, "step": 2952 }, { "epoch": 2.75116495806151, "loss_reasoning": 0.5463386178016663, "loss_utility": 0.6657124161720276, "step": 2952 }, { "epoch": 2.7520969245107176, "grad_norm": 1.0750867709873753, "learning_rate": 1.836382464618571e-06, "loss": 1.4064, "step": 2953 }, { "epoch": 2.7520969245107176, "loss_reasoning": 0.4701448976993561, "loss_utility": 0.7719773054122925, "step": 2953 }, { "epoch": 2.7530288909599254, "grad_norm": 1.2978860564712662, "learning_rate": 1.8294787711425613e-06, "loss": 1.5485, "step": 2954 }, { "epoch": 2.7530288909599254, "loss_reasoning": 0.44804614782333374, "loss_utility": 0.872647762298584, "step": 2954 }, { "epoch": 2.753960857409133, "grad_norm": 1.3856900872431226, "learning_rate": 1.8225750776665519e-06, "loss": 1.5246, "step": 2955 }, { "epoch": 2.753960857409133, "loss_reasoning": 0.4440910220146179, "loss_utility": 0.8404051065444946, "step": 2955 }, { "epoch": 2.754892823858341, "grad_norm": 1.1137743773692894, "learning_rate": 1.8156713841905422e-06, "loss": 1.5507, "step": 2956 }, { "epoch": 2.754892823858341, "loss_reasoning": 0.46542301774024963, "loss_utility": 0.6390071511268616, "step": 2956 }, { "epoch": 2.755824790307549, "grad_norm": 1.3149146037835344, "learning_rate": 1.8087676907145325e-06, "loss": 1.2718, "step": 2957 }, { "epoch": 2.755824790307549, "loss_reasoning": 0.4805232286453247, "loss_utility": 0.966684877872467, "step": 2957 }, { "epoch": 2.756756756756757, "grad_norm": 1.3068676496868163, "learning_rate": 1.8018639972385226e-06, "loss": 1.5451, "step": 2958 }, { "epoch": 2.756756756756757, "loss_reasoning": 0.5024267435073853, "loss_utility": 0.8249235153198242, "step": 2958 }, { "epoch": 2.7576887232059644, "grad_norm": 1.3617836158983982, "learning_rate": 1.7949603037625129e-06, "loss": 1.5448, "step": 2959 }, { "epoch": 2.7576887232059644, "loss_reasoning": 0.45720016956329346, "loss_utility": 0.8822161555290222, "step": 2959 }, { "epoch": 2.7586206896551726, "grad_norm": 1.3497032569440548, "learning_rate": 1.7880566102865034e-06, "loss": 1.3821, "step": 2960 }, { "epoch": 2.7586206896551726, "loss_reasoning": 0.4770476818084717, "loss_utility": 0.8620278239250183, "step": 2960 }, { "epoch": 2.7595526561043804, "grad_norm": 1.4048203466766784, "learning_rate": 1.7811529168104937e-06, "loss": 1.6831, "step": 2961 }, { "epoch": 2.7595526561043804, "loss_reasoning": 0.5339357852935791, "loss_utility": 0.9512431621551514, "step": 2961 }, { "epoch": 2.760484622553588, "grad_norm": 1.2125784856691064, "learning_rate": 1.774249223334484e-06, "loss": 1.5345, "step": 2962 }, { "epoch": 2.760484622553588, "loss_reasoning": 0.4659600257873535, "loss_utility": 1.1971763372421265, "step": 2962 }, { "epoch": 2.761416589002796, "grad_norm": 1.2450748327018786, "learning_rate": 1.7673455298584743e-06, "loss": 1.4687, "step": 2963 }, { "epoch": 2.761416589002796, "loss_reasoning": 0.4949162006378174, "loss_utility": 0.9336687326431274, "step": 2963 }, { "epoch": 2.7623485554520038, "grad_norm": 1.223546075977728, "learning_rate": 1.7604418363824646e-06, "loss": 1.6422, "step": 2964 }, { "epoch": 2.7623485554520038, "loss_reasoning": 0.5106064081192017, "loss_utility": 0.8400276303291321, "step": 2964 }, { "epoch": 2.7632805219012115, "grad_norm": 1.142978772838023, "learning_rate": 1.7535381429064552e-06, "loss": 1.4756, "step": 2965 }, { "epoch": 2.7632805219012115, "loss_reasoning": 0.5397804975509644, "loss_utility": 0.979375958442688, "step": 2965 }, { "epoch": 2.7642124883504193, "grad_norm": 1.0903622847266636, "learning_rate": 1.7466344494304455e-06, "loss": 1.5306, "step": 2966 }, { "epoch": 2.7642124883504193, "loss_reasoning": 0.4677666425704956, "loss_utility": 0.45227885246276855, "step": 2966 }, { "epoch": 2.765144454799627, "grad_norm": 1.0839785812469473, "learning_rate": 1.7397307559544358e-06, "loss": 1.1007, "step": 2967 }, { "epoch": 2.765144454799627, "loss_reasoning": 0.5320950746536255, "loss_utility": 0.6700797080993652, "step": 2967 }, { "epoch": 2.766076421248835, "grad_norm": 1.2292953088714307, "learning_rate": 1.732827062478426e-06, "loss": 1.4958, "step": 2968 }, { "epoch": 2.766076421248835, "loss_reasoning": 0.47462332248687744, "loss_utility": 1.3174865245819092, "step": 2968 }, { "epoch": 2.767008387698043, "grad_norm": 1.0107697534010613, "learning_rate": 1.7259233690024164e-06, "loss": 1.6501, "step": 2969 }, { "epoch": 2.767008387698043, "loss_reasoning": 0.48139694333076477, "loss_utility": 0.7905222177505493, "step": 2969 }, { "epoch": 2.7679403541472505, "grad_norm": 1.0273306874110795, "learning_rate": 1.719019675526407e-06, "loss": 1.2002, "step": 2970 }, { "epoch": 2.7679403541472505, "loss_reasoning": 0.5031364560127258, "loss_utility": 0.6986020803451538, "step": 2970 }, { "epoch": 2.7688723205964587, "grad_norm": 1.1828789556442203, "learning_rate": 1.7121159820503972e-06, "loss": 1.3878, "step": 2971 }, { "epoch": 2.7688723205964587, "loss_reasoning": 0.41489362716674805, "loss_utility": 1.3393747806549072, "step": 2971 }, { "epoch": 2.7698042870456665, "grad_norm": 1.1544171039659166, "learning_rate": 1.7052122885743873e-06, "loss": 2.0135, "step": 2972 }, { "epoch": 2.7698042870456665, "loss_reasoning": 0.5341699123382568, "loss_utility": 1.6682543754577637, "step": 2972 }, { "epoch": 2.7707362534948743, "grad_norm": 1.2208313859828854, "learning_rate": 1.6983085950983776e-06, "loss": 1.686, "step": 2973 }, { "epoch": 2.7707362534948743, "loss_reasoning": 0.47867900133132935, "loss_utility": 0.6671826839447021, "step": 2973 }, { "epoch": 2.771668219944082, "grad_norm": 0.9889823760141574, "learning_rate": 1.691404901622368e-06, "loss": 1.4936, "step": 2974 }, { "epoch": 2.771668219944082, "loss_reasoning": 0.42785489559173584, "loss_utility": 1.0722507238388062, "step": 2974 }, { "epoch": 2.77260018639329, "grad_norm": 1.1124753235453648, "learning_rate": 1.6845012081463585e-06, "loss": 1.3559, "step": 2975 }, { "epoch": 2.77260018639329, "loss_reasoning": 0.5401639342308044, "loss_utility": 0.6802589297294617, "step": 2975 }, { "epoch": 2.7735321528424977, "grad_norm": 1.174300256344605, "learning_rate": 1.6775975146703488e-06, "loss": 1.325, "step": 2976 }, { "epoch": 2.7735321528424977, "loss_reasoning": 0.5239858627319336, "loss_utility": 0.5366213321685791, "step": 2976 }, { "epoch": 2.7744641192917054, "grad_norm": 1.1079077339508905, "learning_rate": 1.670693821194339e-06, "loss": 1.4594, "step": 2977 }, { "epoch": 2.7744641192917054, "loss_reasoning": 0.46330779790878296, "loss_utility": 1.1845512390136719, "step": 2977 }, { "epoch": 2.7753960857409132, "grad_norm": 1.4704209797679348, "learning_rate": 1.6637901277183294e-06, "loss": 1.7537, "step": 2978 }, { "epoch": 2.7753960857409132, "loss_reasoning": 0.4603751003742218, "loss_utility": 0.324770450592041, "step": 2978 }, { "epoch": 2.776328052190121, "grad_norm": 1.2010077785641187, "learning_rate": 1.6568864342423197e-06, "loss": 1.3903, "step": 2979 }, { "epoch": 2.776328052190121, "loss_reasoning": 0.47633591294288635, "loss_utility": 1.123650074005127, "step": 2979 }, { "epoch": 2.7772600186393293, "grad_norm": 1.3089037304520958, "learning_rate": 1.6499827407663102e-06, "loss": 1.5711, "step": 2980 }, { "epoch": 2.7772600186393293, "loss_reasoning": 0.5097631216049194, "loss_utility": 0.8880433440208435, "step": 2980 }, { "epoch": 2.7781919850885366, "grad_norm": 1.2737987237606179, "learning_rate": 1.6430790472903005e-06, "loss": 1.6068, "step": 2981 }, { "epoch": 2.7781919850885366, "loss_reasoning": 0.656024694442749, "loss_utility": 1.39677095413208, "step": 2981 }, { "epoch": 2.779123951537745, "grad_norm": 2.165356222778703, "learning_rate": 1.6361753538142908e-06, "loss": 1.9606, "step": 2982 }, { "epoch": 2.779123951537745, "loss_reasoning": 0.4604035019874573, "loss_utility": 0.48785847425460815, "step": 2982 }, { "epoch": 2.7800559179869526, "grad_norm": 1.05073010770304, "learning_rate": 1.629271660338281e-06, "loss": 1.1409, "step": 2983 }, { "epoch": 2.7800559179869526, "loss_reasoning": 0.5109918117523193, "loss_utility": 1.0218695402145386, "step": 2983 }, { "epoch": 2.7809878844361604, "grad_norm": 1.08485812341923, "learning_rate": 1.6223679668622713e-06, "loss": 1.5395, "step": 2984 }, { "epoch": 2.7809878844361604, "loss_reasoning": 0.4419977068901062, "loss_utility": 0.8122888803482056, "step": 2984 }, { "epoch": 2.781919850885368, "grad_norm": 1.1204423450950005, "learning_rate": 1.615464273386262e-06, "loss": 1.4315, "step": 2985 }, { "epoch": 2.781919850885368, "loss_reasoning": 0.5246937274932861, "loss_utility": 0.9053352475166321, "step": 2985 }, { "epoch": 2.782851817334576, "grad_norm": 1.3710050854116465, "learning_rate": 1.608560579910252e-06, "loss": 1.2429, "step": 2986 }, { "epoch": 2.782851817334576, "loss_reasoning": 0.49542659521102905, "loss_utility": 0.9236125349998474, "step": 2986 }, { "epoch": 2.7837837837837838, "grad_norm": 1.2695619785505932, "learning_rate": 1.6016568864342424e-06, "loss": 1.4039, "step": 2987 }, { "epoch": 2.7837837837837838, "loss_reasoning": 0.5141045451164246, "loss_utility": 0.6276190280914307, "step": 2987 }, { "epoch": 2.7847157502329916, "grad_norm": 1.1585433752401937, "learning_rate": 1.5947531929582327e-06, "loss": 1.5842, "step": 2988 }, { "epoch": 2.7847157502329916, "loss_reasoning": 0.43281328678131104, "loss_utility": 0.9552302360534668, "step": 2988 }, { "epoch": 2.7856477166821993, "grad_norm": 1.1711125386630323, "learning_rate": 1.587849499482223e-06, "loss": 1.3682, "step": 2989 }, { "epoch": 2.7856477166821993, "loss_reasoning": 0.5862225294113159, "loss_utility": 0.7360630035400391, "step": 2989 }, { "epoch": 2.786579683131407, "grad_norm": 1.2199891865365866, "learning_rate": 1.5809458060062135e-06, "loss": 1.347, "step": 2990 }, { "epoch": 2.786579683131407, "loss_reasoning": 0.45419374108314514, "loss_utility": 0.3286830186843872, "step": 2990 }, { "epoch": 2.7875116495806154, "grad_norm": 1.0222911923456357, "learning_rate": 1.5740421125302038e-06, "loss": 1.166, "step": 2991 }, { "epoch": 2.7875116495806154, "loss_reasoning": 0.4362039566040039, "loss_utility": 0.882712185382843, "step": 2991 }, { "epoch": 2.7884436160298227, "grad_norm": 1.0834029385286221, "learning_rate": 1.5671384190541941e-06, "loss": 1.376, "step": 2992 }, { "epoch": 2.7884436160298227, "loss_reasoning": 0.48629462718963623, "loss_utility": 0.9070010185241699, "step": 2992 }, { "epoch": 2.789375582479031, "grad_norm": 1.180618059340675, "learning_rate": 1.5602347255781845e-06, "loss": 1.4957, "step": 2993 }, { "epoch": 2.789375582479031, "loss_reasoning": 0.4598899781703949, "loss_utility": 0.7294796705245972, "step": 2993 }, { "epoch": 2.7903075489282387, "grad_norm": 1.2559988239087607, "learning_rate": 1.5533310321021748e-06, "loss": 1.3725, "step": 2994 }, { "epoch": 2.7903075489282387, "loss_reasoning": 0.45427584648132324, "loss_utility": 1.252101182937622, "step": 2994 }, { "epoch": 2.7912395153774465, "grad_norm": 1.1180530739194845, "learning_rate": 1.5464273386261653e-06, "loss": 1.5029, "step": 2995 }, { "epoch": 2.7912395153774465, "loss_reasoning": 0.5093640089035034, "loss_utility": 0.8087407350540161, "step": 2995 }, { "epoch": 2.7921714818266543, "grad_norm": 1.251895080657959, "learning_rate": 1.5395236451501556e-06, "loss": 1.2985, "step": 2996 }, { "epoch": 2.7921714818266543, "loss_reasoning": 0.4589642584323883, "loss_utility": 0.22647035121917725, "step": 2996 }, { "epoch": 2.793103448275862, "grad_norm": 1.1097443644379963, "learning_rate": 1.5326199516741457e-06, "loss": 1.2554, "step": 2997 }, { "epoch": 2.793103448275862, "loss_reasoning": 0.4743484556674957, "loss_utility": 1.1122734546661377, "step": 2997 }, { "epoch": 2.79403541472507, "grad_norm": 1.172477133287276, "learning_rate": 1.525716258198136e-06, "loss": 1.4852, "step": 2998 }, { "epoch": 2.79403541472507, "loss_reasoning": 0.4943199157714844, "loss_utility": 1.3292944431304932, "step": 2998 }, { "epoch": 2.7949673811742777, "grad_norm": 1.1842388145758926, "learning_rate": 1.5188125647221263e-06, "loss": 1.7186, "step": 2999 }, { "epoch": 2.7949673811742777, "loss_reasoning": 0.44718605279922485, "loss_utility": 1.3827850818634033, "step": 2999 }, { "epoch": 2.7958993476234855, "grad_norm": 1.0345641955014946, "learning_rate": 1.5119088712461168e-06, "loss": 1.4694, "step": 3000 }, { "epoch": 2.7958993476234855, "loss_reasoning": 0.627802312374115, "loss_utility": 0.37065815925598145, "step": 3000 }, { "epoch": 2.7968313140726933, "grad_norm": 1.0903654688046027, "learning_rate": 1.5050051777701071e-06, "loss": 1.3116, "step": 3001 }, { "epoch": 2.7968313140726933, "loss_reasoning": 0.56316077709198, "loss_utility": 0.6022759675979614, "step": 3001 }, { "epoch": 2.7977632805219015, "grad_norm": 1.0037141110359429, "learning_rate": 1.4981014842940974e-06, "loss": 1.5374, "step": 3002 }, { "epoch": 2.7977632805219015, "loss_reasoning": 0.5096836090087891, "loss_utility": 1.109354019165039, "step": 3002 }, { "epoch": 2.798695246971109, "grad_norm": 1.1399705554167094, "learning_rate": 1.4911977908180878e-06, "loss": 1.348, "step": 3003 }, { "epoch": 2.798695246971109, "loss_reasoning": 0.48144423961639404, "loss_utility": 0.3808731436729431, "step": 3003 }, { "epoch": 2.799627213420317, "grad_norm": 1.2732874979550313, "learning_rate": 1.484294097342078e-06, "loss": 1.3232, "step": 3004 }, { "epoch": 2.799627213420317, "loss_reasoning": 0.5074878334999084, "loss_utility": 0.9720852375030518, "step": 3004 }, { "epoch": 2.800559179869525, "grad_norm": 1.194027692862487, "learning_rate": 1.4773904038660686e-06, "loss": 1.4005, "step": 3005 }, { "epoch": 2.800559179869525, "loss_reasoning": 0.5208885073661804, "loss_utility": 0.8544920682907104, "step": 3005 }, { "epoch": 2.8014911463187326, "grad_norm": 1.1365480139969706, "learning_rate": 1.470486710390059e-06, "loss": 1.4745, "step": 3006 }, { "epoch": 2.8014911463187326, "loss_reasoning": 0.5484871864318848, "loss_utility": 1.465983510017395, "step": 3006 }, { "epoch": 2.8024231127679404, "grad_norm": 1.0572394119196664, "learning_rate": 1.4635830169140492e-06, "loss": 1.583, "step": 3007 }, { "epoch": 2.8024231127679404, "loss_reasoning": 0.4677931070327759, "loss_utility": 1.6179900169372559, "step": 3007 }, { "epoch": 2.803355079217148, "grad_norm": 1.1969886943422956, "learning_rate": 1.4566793234380395e-06, "loss": 1.7426, "step": 3008 }, { "epoch": 2.803355079217148, "loss_reasoning": 0.45091742277145386, "loss_utility": 1.1021382808685303, "step": 3008 }, { "epoch": 2.804287045666356, "grad_norm": 1.1998276056594923, "learning_rate": 1.4497756299620296e-06, "loss": 1.6057, "step": 3009 }, { "epoch": 2.804287045666356, "loss_reasoning": 0.4578651189804077, "loss_utility": 0.765915036201477, "step": 3009 }, { "epoch": 2.805219012115564, "grad_norm": 1.1409172687547082, "learning_rate": 1.4428719364860203e-06, "loss": 1.4594, "step": 3010 }, { "epoch": 2.805219012115564, "loss_reasoning": 0.47494062781333923, "loss_utility": 1.129004955291748, "step": 3010 }, { "epoch": 2.8061509785647716, "grad_norm": 1.109756695323266, "learning_rate": 1.4359682430100104e-06, "loss": 1.4508, "step": 3011 }, { "epoch": 2.8061509785647716, "loss_reasoning": 0.45908474922180176, "loss_utility": 0.9995938539505005, "step": 3011 }, { "epoch": 2.8070829450139794, "grad_norm": 0.8974489571546261, "learning_rate": 1.4290645495340007e-06, "loss": 1.1563, "step": 3012 }, { "epoch": 2.8070829450139794, "loss_reasoning": 0.4869639575481415, "loss_utility": 0.9592272043228149, "step": 3012 }, { "epoch": 2.8080149114631876, "grad_norm": 1.2108446639045953, "learning_rate": 1.422160856057991e-06, "loss": 1.5488, "step": 3013 }, { "epoch": 2.8080149114631876, "loss_reasoning": 0.4753507971763611, "loss_utility": 0.8609464168548584, "step": 3013 }, { "epoch": 2.808946877912395, "grad_norm": 1.3325060989483408, "learning_rate": 1.4152571625819814e-06, "loss": 1.3502, "step": 3014 }, { "epoch": 2.808946877912395, "loss_reasoning": 0.45959073305130005, "loss_utility": 0.845100998878479, "step": 3014 }, { "epoch": 2.809878844361603, "grad_norm": 1.203174932948164, "learning_rate": 1.4083534691059719e-06, "loss": 1.5554, "step": 3015 }, { "epoch": 2.809878844361603, "loss_reasoning": 0.48751452565193176, "loss_utility": 1.270201563835144, "step": 3015 }, { "epoch": 2.810810810810811, "grad_norm": 1.1609977409246273, "learning_rate": 1.4014497756299622e-06, "loss": 1.2836, "step": 3016 }, { "epoch": 2.810810810810811, "loss_reasoning": 0.4691693186759949, "loss_utility": 0.5794448852539062, "step": 3016 }, { "epoch": 2.8117427772600188, "grad_norm": 1.131593008868582, "learning_rate": 1.3945460821539525e-06, "loss": 1.426, "step": 3017 }, { "epoch": 2.8117427772600188, "loss_reasoning": 0.4642411768436432, "loss_utility": 1.013963222503662, "step": 3017 }, { "epoch": 2.8126747437092265, "grad_norm": 1.5537089731669764, "learning_rate": 1.3876423886779428e-06, "loss": 1.6943, "step": 3018 }, { "epoch": 2.8126747437092265, "loss_reasoning": 0.46040499210357666, "loss_utility": 0.7435826659202576, "step": 3018 }, { "epoch": 2.8136067101584343, "grad_norm": 1.0039484995231576, "learning_rate": 1.3807386952019331e-06, "loss": 1.4154, "step": 3019 }, { "epoch": 2.8136067101584343, "loss_reasoning": 0.49067801237106323, "loss_utility": 0.8113395571708679, "step": 3019 }, { "epoch": 2.814538676607642, "grad_norm": 1.2876306113804126, "learning_rate": 1.3738350017259236e-06, "loss": 1.3701, "step": 3020 }, { "epoch": 2.814538676607642, "loss_reasoning": 0.46931761503219604, "loss_utility": 1.3209673166275024, "step": 3020 }, { "epoch": 2.81547064305685, "grad_norm": 1.4686201704635387, "learning_rate": 1.366931308249914e-06, "loss": 1.6432, "step": 3021 }, { "epoch": 2.81547064305685, "loss_reasoning": 0.4824032187461853, "loss_utility": 0.9457855820655823, "step": 3021 }, { "epoch": 2.8164026095060577, "grad_norm": 1.2589568939374132, "learning_rate": 1.3600276147739043e-06, "loss": 1.5312, "step": 3022 }, { "epoch": 2.8164026095060577, "loss_reasoning": 0.5251350402832031, "loss_utility": 0.5764126777648926, "step": 3022 }, { "epoch": 2.8173345759552655, "grad_norm": 1.0280988938069702, "learning_rate": 1.3531239212978944e-06, "loss": 1.2869, "step": 3023 }, { "epoch": 2.8173345759552655, "loss_reasoning": 0.4458402991294861, "loss_utility": 0.519379198551178, "step": 3023 }, { "epoch": 2.8182665424044733, "grad_norm": 1.1430123938467753, "learning_rate": 1.3462202278218847e-06, "loss": 1.2739, "step": 3024 }, { "epoch": 2.8182665424044733, "loss_reasoning": 0.5109078288078308, "loss_utility": 1.0561747550964355, "step": 3024 }, { "epoch": 2.819198508853681, "grad_norm": 1.364292187157888, "learning_rate": 1.3393165343458752e-06, "loss": 1.6672, "step": 3025 }, { "epoch": 2.819198508853681, "loss_reasoning": 0.5125213861465454, "loss_utility": 0.9067248106002808, "step": 3025 }, { "epoch": 2.8201304753028893, "grad_norm": 1.306688709944688, "learning_rate": 1.3324128408698655e-06, "loss": 1.3785, "step": 3026 }, { "epoch": 2.8201304753028893, "loss_reasoning": 0.453082799911499, "loss_utility": 1.004835605621338, "step": 3026 }, { "epoch": 2.821062441752097, "grad_norm": 1.3309652663334675, "learning_rate": 1.3255091473938558e-06, "loss": 1.757, "step": 3027 }, { "epoch": 2.821062441752097, "loss_reasoning": 0.4994722008705139, "loss_utility": 0.7387232780456543, "step": 3027 }, { "epoch": 2.821994408201305, "grad_norm": 1.295518398657246, "learning_rate": 1.3186054539178461e-06, "loss": 1.4341, "step": 3028 }, { "epoch": 2.821994408201305, "loss_reasoning": 0.4799728989601135, "loss_utility": 1.3741495609283447, "step": 3028 }, { "epoch": 2.8229263746505127, "grad_norm": 1.0199214828383947, "learning_rate": 1.3117017604418364e-06, "loss": 1.3583, "step": 3029 }, { "epoch": 2.8229263746505127, "loss_reasoning": 0.49816644191741943, "loss_utility": 0.9788063168525696, "step": 3029 }, { "epoch": 2.8238583410997204, "grad_norm": 1.2317537426473848, "learning_rate": 1.304798066965827e-06, "loss": 1.4007, "step": 3030 }, { "epoch": 2.8238583410997204, "loss_reasoning": 0.48115068674087524, "loss_utility": 1.3058271408081055, "step": 3030 }, { "epoch": 2.8247903075489282, "grad_norm": 1.1892757077519547, "learning_rate": 1.2978943734898173e-06, "loss": 1.3412, "step": 3031 }, { "epoch": 2.8247903075489282, "loss_reasoning": 0.5040590763092041, "loss_utility": 0.650951623916626, "step": 3031 }, { "epoch": 2.825722273998136, "grad_norm": 1.2919780302972335, "learning_rate": 1.2909906800138076e-06, "loss": 1.4433, "step": 3032 }, { "epoch": 2.825722273998136, "loss_reasoning": 0.44489774107933044, "loss_utility": 0.8923473954200745, "step": 3032 }, { "epoch": 2.826654240447344, "grad_norm": 1.1734115367341758, "learning_rate": 1.2840869865377979e-06, "loss": 1.3027, "step": 3033 }, { "epoch": 2.826654240447344, "loss_reasoning": 0.4599149227142334, "loss_utility": 0.9155024290084839, "step": 3033 }, { "epoch": 2.8275862068965516, "grad_norm": 1.100620986125146, "learning_rate": 1.277183293061788e-06, "loss": 1.2459, "step": 3034 }, { "epoch": 2.8275862068965516, "loss_reasoning": 0.49604785442352295, "loss_utility": 1.3269344568252563, "step": 3034 }, { "epoch": 2.8285181733457594, "grad_norm": 1.6690826140186605, "learning_rate": 1.2702795995857787e-06, "loss": 1.6948, "step": 3035 }, { "epoch": 2.8285181733457594, "loss_reasoning": 0.5053510665893555, "loss_utility": 0.6396387219429016, "step": 3035 }, { "epoch": 2.829450139794967, "grad_norm": 1.2009065093998674, "learning_rate": 1.2633759061097688e-06, "loss": 1.3578, "step": 3036 }, { "epoch": 2.829450139794967, "loss_reasoning": 0.49522289633750916, "loss_utility": 0.3219785690307617, "step": 3036 }, { "epoch": 2.8303821062441754, "grad_norm": 1.7570878906394016, "learning_rate": 1.2564722126337591e-06, "loss": 1.6434, "step": 3037 }, { "epoch": 2.8303821062441754, "loss_reasoning": 0.465320885181427, "loss_utility": 1.1284620761871338, "step": 3037 }, { "epoch": 2.831314072693383, "grad_norm": 1.2392373042053324, "learning_rate": 1.2495685191577494e-06, "loss": 1.5811, "step": 3038 }, { "epoch": 2.831314072693383, "loss_reasoning": 0.496320515871048, "loss_utility": 0.6145251393318176, "step": 3038 }, { "epoch": 2.832246039142591, "grad_norm": 1.2868268636905547, "learning_rate": 1.24266482568174e-06, "loss": 1.4808, "step": 3039 }, { "epoch": 2.832246039142591, "loss_reasoning": 0.48610764741897583, "loss_utility": 0.6978825926780701, "step": 3039 }, { "epoch": 2.8331780055917988, "grad_norm": 1.132017193350829, "learning_rate": 1.2357611322057302e-06, "loss": 1.4598, "step": 3040 }, { "epoch": 2.8331780055917988, "loss_reasoning": 0.4794856607913971, "loss_utility": 1.244117021560669, "step": 3040 }, { "epoch": 2.8341099720410066, "grad_norm": 1.1688660278110523, "learning_rate": 1.2288574387297203e-06, "loss": 1.3542, "step": 3041 }, { "epoch": 2.8341099720410066, "loss_reasoning": 0.44743093848228455, "loss_utility": 0.38039079308509827, "step": 3041 }, { "epoch": 2.8350419384902144, "grad_norm": 1.0812246233824008, "learning_rate": 1.2219537452537109e-06, "loss": 1.2936, "step": 3042 }, { "epoch": 2.8350419384902144, "loss_reasoning": 0.45192810893058777, "loss_utility": 0.8608909249305725, "step": 3042 }, { "epoch": 2.835973904939422, "grad_norm": 1.0065635033522315, "learning_rate": 1.2150500517777012e-06, "loss": 1.4627, "step": 3043 }, { "epoch": 2.835973904939422, "loss_reasoning": 0.4887845516204834, "loss_utility": 0.8212243318557739, "step": 3043 }, { "epoch": 2.83690587138863, "grad_norm": 1.253677201222485, "learning_rate": 1.2081463583016915e-06, "loss": 1.3255, "step": 3044 }, { "epoch": 2.83690587138863, "loss_reasoning": 0.4700760543346405, "loss_utility": 0.7355124950408936, "step": 3044 }, { "epoch": 2.8378378378378377, "grad_norm": 1.1730065059873223, "learning_rate": 1.2012426648256818e-06, "loss": 1.3605, "step": 3045 }, { "epoch": 2.8378378378378377, "loss_reasoning": 0.5059856176376343, "loss_utility": 0.9929594397544861, "step": 3045 }, { "epoch": 2.8387698042870455, "grad_norm": 1.0446379150601357, "learning_rate": 1.194338971349672e-06, "loss": 1.5464, "step": 3046 }, { "epoch": 2.8387698042870455, "loss_reasoning": 0.47388893365859985, "loss_utility": 0.9834898114204407, "step": 3046 }, { "epoch": 2.8397017707362533, "grad_norm": 1.0452875073803005, "learning_rate": 1.1874352778736626e-06, "loss": 1.3629, "step": 3047 }, { "epoch": 2.8397017707362533, "loss_reasoning": 0.4645553529262543, "loss_utility": 0.7356818914413452, "step": 3047 }, { "epoch": 2.8406337371854615, "grad_norm": 1.22223939746232, "learning_rate": 1.1805315843976527e-06, "loss": 1.1617, "step": 3048 }, { "epoch": 2.8406337371854615, "loss_reasoning": 0.5499037504196167, "loss_utility": 0.62320876121521, "step": 3048 }, { "epoch": 2.8415657036346693, "grad_norm": 1.3608937940567716, "learning_rate": 1.1736278909216432e-06, "loss": 1.4085, "step": 3049 }, { "epoch": 2.8415657036346693, "loss_reasoning": 0.5418730974197388, "loss_utility": 1.327483892440796, "step": 3049 }, { "epoch": 2.842497670083877, "grad_norm": 1.3440934950054373, "learning_rate": 1.1667241974456335e-06, "loss": 1.3814, "step": 3050 }, { "epoch": 2.842497670083877, "loss_reasoning": 0.46384957432746887, "loss_utility": 1.211103916168213, "step": 3050 }, { "epoch": 2.843429636533085, "grad_norm": 1.437829197147113, "learning_rate": 1.1598205039696239e-06, "loss": 1.3851, "step": 3051 }, { "epoch": 2.843429636533085, "loss_reasoning": 0.5716668963432312, "loss_utility": 1.1047192811965942, "step": 3051 }, { "epoch": 2.8443616029822927, "grad_norm": 1.4039956674806005, "learning_rate": 1.1529168104936142e-06, "loss": 1.3575, "step": 3052 }, { "epoch": 2.8443616029822927, "loss_reasoning": 0.4675513803958893, "loss_utility": 0.5552394986152649, "step": 3052 }, { "epoch": 2.8452935694315005, "grad_norm": 1.1508192314815728, "learning_rate": 1.1460131170176045e-06, "loss": 1.3422, "step": 3053 }, { "epoch": 2.8452935694315005, "loss_reasoning": 0.48008859157562256, "loss_utility": 0.8653793334960938, "step": 3053 }, { "epoch": 2.8462255358807083, "grad_norm": 1.2122169761080153, "learning_rate": 1.139109423541595e-06, "loss": 1.4091, "step": 3054 }, { "epoch": 2.8462255358807083, "loss_reasoning": 0.45041221380233765, "loss_utility": 0.9546570181846619, "step": 3054 }, { "epoch": 2.847157502329916, "grad_norm": 1.5320633533040113, "learning_rate": 1.132205730065585e-06, "loss": 1.5555, "step": 3055 }, { "epoch": 2.847157502329916, "loss_reasoning": 0.48967695236206055, "loss_utility": 1.127980351448059, "step": 3055 }, { "epoch": 2.848089468779124, "grad_norm": 1.2445358992723092, "learning_rate": 1.1253020365895754e-06, "loss": 1.4829, "step": 3056 }, { "epoch": 2.848089468779124, "loss_reasoning": 0.46854764223098755, "loss_utility": 1.3679227828979492, "step": 3056 }, { "epoch": 2.8490214352283316, "grad_norm": 1.2197035095657731, "learning_rate": 1.118398343113566e-06, "loss": 1.6543, "step": 3057 }, { "epoch": 2.8490214352283316, "loss_reasoning": 0.5317995548248291, "loss_utility": 0.8769342303276062, "step": 3057 }, { "epoch": 2.8499534016775394, "grad_norm": 1.4088772483158505, "learning_rate": 1.1114946496375562e-06, "loss": 1.5556, "step": 3058 }, { "epoch": 2.8499534016775394, "loss_reasoning": 0.4607912302017212, "loss_utility": 1.0976431369781494, "step": 3058 }, { "epoch": 2.8508853681267476, "grad_norm": 1.2859957073131334, "learning_rate": 1.1045909561615465e-06, "loss": 1.6016, "step": 3059 }, { "epoch": 2.8508853681267476, "loss_reasoning": 0.4716227650642395, "loss_utility": 0.5167189240455627, "step": 3059 }, { "epoch": 2.8518173345759554, "grad_norm": 1.2703082346084704, "learning_rate": 1.0976872626855368e-06, "loss": 1.4108, "step": 3060 }, { "epoch": 2.8518173345759554, "loss_reasoning": 0.47614774107933044, "loss_utility": 0.7825953960418701, "step": 3060 }, { "epoch": 2.852749301025163, "grad_norm": 1.1796555793359633, "learning_rate": 1.0907835692095272e-06, "loss": 1.3388, "step": 3061 }, { "epoch": 2.852749301025163, "loss_reasoning": 0.5204927921295166, "loss_utility": 0.8082786202430725, "step": 3061 }, { "epoch": 2.853681267474371, "grad_norm": 1.2548403323268764, "learning_rate": 1.0838798757335175e-06, "loss": 1.4035, "step": 3062 }, { "epoch": 2.853681267474371, "loss_reasoning": 0.5142292380332947, "loss_utility": 1.5765695571899414, "step": 3062 }, { "epoch": 2.854613233923579, "grad_norm": 1.3280602651364986, "learning_rate": 1.0769761822575078e-06, "loss": 1.5486, "step": 3063 }, { "epoch": 2.854613233923579, "loss_reasoning": 0.556858479976654, "loss_utility": 0.875453770160675, "step": 3063 }, { "epoch": 2.8555452003727866, "grad_norm": 1.1320780302592488, "learning_rate": 1.0700724887814983e-06, "loss": 1.4356, "step": 3064 }, { "epoch": 2.8555452003727866, "loss_reasoning": 0.48235464096069336, "loss_utility": 0.9175893068313599, "step": 3064 }, { "epoch": 2.8564771668219944, "grad_norm": 1.3247111777038445, "learning_rate": 1.0631687953054886e-06, "loss": 1.4154, "step": 3065 }, { "epoch": 2.8564771668219944, "loss_reasoning": 0.4809999465942383, "loss_utility": 0.9433885812759399, "step": 3065 }, { "epoch": 2.857409133271202, "grad_norm": 1.3306881711195566, "learning_rate": 1.0562651018294787e-06, "loss": 1.6255, "step": 3066 }, { "epoch": 2.857409133271202, "loss_reasoning": 0.4764537811279297, "loss_utility": 0.6521978378295898, "step": 3066 }, { "epoch": 2.85834109972041, "grad_norm": 1.178721375659992, "learning_rate": 1.0493614083534692e-06, "loss": 1.5256, "step": 3067 }, { "epoch": 2.85834109972041, "loss_reasoning": 0.49747195839881897, "loss_utility": 0.5404994487762451, "step": 3067 }, { "epoch": 2.8592730661696177, "grad_norm": 1.1515306657285702, "learning_rate": 1.0424577148774595e-06, "loss": 1.3556, "step": 3068 }, { "epoch": 2.8592730661696177, "loss_reasoning": 0.46441420912742615, "loss_utility": 0.9067106246948242, "step": 3068 }, { "epoch": 2.8602050326188255, "grad_norm": 0.9823423882336532, "learning_rate": 1.0355540214014498e-06, "loss": 1.2105, "step": 3069 }, { "epoch": 2.8602050326188255, "loss_reasoning": 0.48377320170402527, "loss_utility": 1.07657790184021, "step": 3069 }, { "epoch": 2.8611369990680338, "grad_norm": 1.3693235896351366, "learning_rate": 1.0286503279254402e-06, "loss": 1.8998, "step": 3070 }, { "epoch": 2.8611369990680338, "loss_reasoning": 0.46916526556015015, "loss_utility": 0.8217675685882568, "step": 3070 }, { "epoch": 2.862068965517241, "grad_norm": 1.3051258254441915, "learning_rate": 1.0217466344494305e-06, "loss": 1.5734, "step": 3071 }, { "epoch": 2.862068965517241, "loss_reasoning": 0.5631362199783325, "loss_utility": 1.3281641006469727, "step": 3071 }, { "epoch": 2.8630009319664493, "grad_norm": 1.2111138216850874, "learning_rate": 1.014842940973421e-06, "loss": 1.5206, "step": 3072 }, { "epoch": 2.8630009319664493, "loss_reasoning": 0.4674679636955261, "loss_utility": 1.346513271331787, "step": 3072 }, { "epoch": 2.863932898415657, "grad_norm": 1.4854404747448549, "learning_rate": 1.007939247497411e-06, "loss": 1.7173, "step": 3073 }, { "epoch": 2.863932898415657, "loss_reasoning": 0.525749683380127, "loss_utility": 0.883324921131134, "step": 3073 }, { "epoch": 2.864864864864865, "grad_norm": 1.2745646056668916, "learning_rate": 1.0010355540214016e-06, "loss": 1.3693, "step": 3074 }, { "epoch": 2.864864864864865, "loss_reasoning": 0.48073771595954895, "loss_utility": 1.2990922927856445, "step": 3074 }, { "epoch": 2.8657968313140727, "grad_norm": 1.1685083534979643, "learning_rate": 9.94131860545392e-07, "loss": 1.4329, "step": 3075 }, { "epoch": 2.8657968313140727, "loss_reasoning": 0.48650383949279785, "loss_utility": 0.7937753200531006, "step": 3075 }, { "epoch": 2.8667287977632805, "grad_norm": 1.168232110649334, "learning_rate": 9.872281670693822e-07, "loss": 1.5187, "step": 3076 }, { "epoch": 2.8667287977632805, "loss_reasoning": 0.5001188516616821, "loss_utility": 0.6416950821876526, "step": 3076 }, { "epoch": 2.8676607642124883, "grad_norm": 1.1410784319222327, "learning_rate": 9.803244735933725e-07, "loss": 1.5296, "step": 3077 }, { "epoch": 2.8676607642124883, "loss_reasoning": 0.5025418996810913, "loss_utility": 1.1593704223632812, "step": 3077 }, { "epoch": 2.868592730661696, "grad_norm": 1.2208255620851984, "learning_rate": 9.734207801173628e-07, "loss": 1.7004, "step": 3078 }, { "epoch": 2.868592730661696, "loss_reasoning": 0.4879533052444458, "loss_utility": 0.758364200592041, "step": 3078 }, { "epoch": 2.869524697110904, "grad_norm": 0.9990664597751041, "learning_rate": 9.665170866413534e-07, "loss": 1.1279, "step": 3079 }, { "epoch": 2.869524697110904, "loss_reasoning": 0.5056272745132446, "loss_utility": 1.3739933967590332, "step": 3079 }, { "epoch": 2.8704566635601116, "grad_norm": 1.2239485315518124, "learning_rate": 9.596133931653435e-07, "loss": 1.6833, "step": 3080 }, { "epoch": 2.8704566635601116, "loss_reasoning": 0.4978383481502533, "loss_utility": 1.0565345287322998, "step": 3080 }, { "epoch": 2.87138863000932, "grad_norm": 1.2296888828511616, "learning_rate": 9.527096996893339e-07, "loss": 1.5546, "step": 3081 }, { "epoch": 2.87138863000932, "loss_reasoning": 0.44298994541168213, "loss_utility": 0.30545276403427124, "step": 3081 }, { "epoch": 2.872320596458527, "grad_norm": 1.1948828177202295, "learning_rate": 9.458060062133243e-07, "loss": 1.2698, "step": 3082 }, { "epoch": 2.872320596458527, "loss_reasoning": 0.47338318824768066, "loss_utility": 0.7883907556533813, "step": 3082 }, { "epoch": 2.8732525629077355, "grad_norm": 1.2399029675048292, "learning_rate": 9.389023127373145e-07, "loss": 1.343, "step": 3083 }, { "epoch": 2.8732525629077355, "loss_reasoning": 0.491748183965683, "loss_utility": 1.4891810417175293, "step": 3083 }, { "epoch": 2.8741845293569432, "grad_norm": 1.0867961605174725, "learning_rate": 9.319986192613049e-07, "loss": 1.4626, "step": 3084 }, { "epoch": 2.8741845293569432, "loss_reasoning": 0.4878265857696533, "loss_utility": 0.35223907232284546, "step": 3084 }, { "epoch": 2.875116495806151, "grad_norm": 1.0080032217293302, "learning_rate": 9.250949257852952e-07, "loss": 1.1779, "step": 3085 }, { "epoch": 2.875116495806151, "loss_reasoning": 0.47713667154312134, "loss_utility": 0.9917442798614502, "step": 3085 }, { "epoch": 2.876048462255359, "grad_norm": 1.3992958529360424, "learning_rate": 9.181912323092855e-07, "loss": 1.5417, "step": 3086 }, { "epoch": 2.876048462255359, "loss_reasoning": 0.4435279369354248, "loss_utility": 0.5705242156982422, "step": 3086 }, { "epoch": 2.8769804287045666, "grad_norm": 1.1980068076392945, "learning_rate": 9.112875388332759e-07, "loss": 1.2782, "step": 3087 }, { "epoch": 2.8769804287045666, "loss_reasoning": 0.46584171056747437, "loss_utility": 0.6768686771392822, "step": 3087 }, { "epoch": 2.8779123951537744, "grad_norm": 1.0595583252670469, "learning_rate": 9.043838453572662e-07, "loss": 1.5582, "step": 3088 }, { "epoch": 2.8779123951537744, "loss_reasoning": 0.45547398924827576, "loss_utility": 0.8834458589553833, "step": 3088 }, { "epoch": 2.878844361602982, "grad_norm": 1.2259442907252296, "learning_rate": 8.974801518812564e-07, "loss": 1.2923, "step": 3089 }, { "epoch": 2.878844361602982, "loss_reasoning": 0.47426024079322815, "loss_utility": 1.0542168617248535, "step": 3089 }, { "epoch": 2.87977632805219, "grad_norm": 1.145594714555123, "learning_rate": 8.905764584052469e-07, "loss": 1.3413, "step": 3090 }, { "epoch": 2.87977632805219, "loss_reasoning": 0.4437997341156006, "loss_utility": 0.8692529797554016, "step": 3090 }, { "epoch": 2.8807082945013978, "grad_norm": 1.0691158130429828, "learning_rate": 8.836727649292372e-07, "loss": 1.4635, "step": 3091 }, { "epoch": 2.8807082945013978, "loss_reasoning": 0.48019349575042725, "loss_utility": 0.7333806753158569, "step": 3091 }, { "epoch": 2.881640260950606, "grad_norm": 1.2361905477683817, "learning_rate": 8.767690714532276e-07, "loss": 1.3123, "step": 3092 }, { "epoch": 2.881640260950606, "loss_reasoning": 0.5168765187263489, "loss_utility": 0.7574863433837891, "step": 3092 }, { "epoch": 2.8825722273998133, "grad_norm": 1.07168280473482, "learning_rate": 8.698653779772179e-07, "loss": 1.3549, "step": 3093 }, { "epoch": 2.8825722273998133, "loss_reasoning": 0.4684644639492035, "loss_utility": 0.9380813241004944, "step": 3093 }, { "epoch": 2.8835041938490216, "grad_norm": 1.0604123940685433, "learning_rate": 8.629616845012082e-07, "loss": 1.2593, "step": 3094 }, { "epoch": 2.8835041938490216, "loss_reasoning": 0.42575186491012573, "loss_utility": 1.4568589925765991, "step": 3094 }, { "epoch": 2.8844361602982294, "grad_norm": 1.1044894929523974, "learning_rate": 8.560579910251986e-07, "loss": 1.4902, "step": 3095 }, { "epoch": 2.8844361602982294, "loss_reasoning": 0.49408668279647827, "loss_utility": 0.8187387585639954, "step": 3095 }, { "epoch": 2.885368126747437, "grad_norm": 1.2505014818157634, "learning_rate": 8.491542975491888e-07, "loss": 1.2976, "step": 3096 }, { "epoch": 2.885368126747437, "loss_reasoning": 0.5258463621139526, "loss_utility": 1.207653284072876, "step": 3096 }, { "epoch": 2.886300093196645, "grad_norm": 1.39109025212369, "learning_rate": 8.422506040731792e-07, "loss": 1.5801, "step": 3097 }, { "epoch": 2.886300093196645, "loss_reasoning": 0.4840274155139923, "loss_utility": 0.6672377586364746, "step": 3097 }, { "epoch": 2.8872320596458527, "grad_norm": 1.0326254408715068, "learning_rate": 8.353469105971695e-07, "loss": 1.3967, "step": 3098 }, { "epoch": 2.8872320596458527, "loss_reasoning": 0.4296787977218628, "loss_utility": 0.9491032361984253, "step": 3098 }, { "epoch": 2.8881640260950605, "grad_norm": 1.1557225412547711, "learning_rate": 8.284432171211599e-07, "loss": 1.371, "step": 3099 }, { "epoch": 2.8881640260950605, "loss_reasoning": 0.5195832252502441, "loss_utility": 0.9578717947006226, "step": 3099 }, { "epoch": 2.8890959925442683, "grad_norm": 1.1929653506751852, "learning_rate": 8.215395236451503e-07, "loss": 1.4041, "step": 3100 }, { "epoch": 2.8890959925442683, "loss_reasoning": 0.48592162132263184, "loss_utility": 0.8026846051216125, "step": 3100 }, { "epoch": 2.890027958993476, "grad_norm": 1.2380503477019755, "learning_rate": 8.146358301691405e-07, "loss": 1.1917, "step": 3101 }, { "epoch": 2.890027958993476, "loss_reasoning": 0.49773746728897095, "loss_utility": 0.7007358074188232, "step": 3101 }, { "epoch": 2.890959925442684, "grad_norm": 1.5393899561578848, "learning_rate": 8.07732136693131e-07, "loss": 1.5406, "step": 3102 }, { "epoch": 2.890959925442684, "loss_reasoning": 0.443164587020874, "loss_utility": 0.6054132580757141, "step": 3102 }, { "epoch": 2.891891891891892, "grad_norm": 1.1692328718384244, "learning_rate": 8.008284432171212e-07, "loss": 1.3076, "step": 3103 }, { "epoch": 2.891891891891892, "loss_reasoning": 0.4370339512825012, "loss_utility": 1.2788515090942383, "step": 3103 }, { "epoch": 2.8928238583410995, "grad_norm": 1.3298769421734018, "learning_rate": 7.939247497411115e-07, "loss": 1.513, "step": 3104 }, { "epoch": 2.8928238583410995, "loss_reasoning": 0.5167684555053711, "loss_utility": 0.924497127532959, "step": 3104 }, { "epoch": 2.8937558247903077, "grad_norm": 0.8623894922320142, "learning_rate": 7.870210562651019e-07, "loss": 1.2945, "step": 3105 }, { "epoch": 2.8937558247903077, "loss_reasoning": 0.49345123767852783, "loss_utility": 0.7321885824203491, "step": 3105 }, { "epoch": 2.8946877912395155, "grad_norm": 1.12601818768712, "learning_rate": 7.801173627890922e-07, "loss": 1.4407, "step": 3106 }, { "epoch": 2.8946877912395155, "loss_reasoning": 0.4394923448562622, "loss_utility": 0.41235774755477905, "step": 3106 }, { "epoch": 2.8956197576887233, "grad_norm": 0.9245402052168452, "learning_rate": 7.732136693130826e-07, "loss": 1.2254, "step": 3107 }, { "epoch": 2.8956197576887233, "loss_reasoning": 0.5031344890594482, "loss_utility": 1.2763521671295166, "step": 3107 }, { "epoch": 2.896551724137931, "grad_norm": 1.3386665975677108, "learning_rate": 7.663099758370728e-07, "loss": 1.7694, "step": 3108 }, { "epoch": 2.896551724137931, "loss_reasoning": 0.45515671372413635, "loss_utility": 0.8621218204498291, "step": 3108 }, { "epoch": 2.897483690587139, "grad_norm": 1.2089884984416064, "learning_rate": 7.594062823610632e-07, "loss": 1.5035, "step": 3109 }, { "epoch": 2.897483690587139, "loss_reasoning": 0.46439796686172485, "loss_utility": 0.7812806963920593, "step": 3109 }, { "epoch": 2.8984156570363466, "grad_norm": 1.0422762816771085, "learning_rate": 7.525025888850536e-07, "loss": 1.4328, "step": 3110 }, { "epoch": 2.8984156570363466, "loss_reasoning": 0.5050030946731567, "loss_utility": 0.26248085498809814, "step": 3110 }, { "epoch": 2.8993476234855544, "grad_norm": 1.0919366868601168, "learning_rate": 7.455988954090439e-07, "loss": 1.2505, "step": 3111 }, { "epoch": 2.8993476234855544, "loss_reasoning": 0.4593014717102051, "loss_utility": 0.5831692218780518, "step": 3111 }, { "epoch": 2.900279589934762, "grad_norm": 1.045052413423801, "learning_rate": 7.386952019330343e-07, "loss": 1.0588, "step": 3112 }, { "epoch": 2.900279589934762, "loss_reasoning": 0.43935152888298035, "loss_utility": 0.8867943286895752, "step": 3112 }, { "epoch": 2.90121155638397, "grad_norm": 1.2590312463769124, "learning_rate": 7.317915084570246e-07, "loss": 1.5457, "step": 3113 }, { "epoch": 2.90121155638397, "loss_reasoning": 0.43915191292762756, "loss_utility": 0.9409950971603394, "step": 3113 }, { "epoch": 2.9021435228331782, "grad_norm": 1.1752512946801272, "learning_rate": 7.248878149810148e-07, "loss": 1.2655, "step": 3114 }, { "epoch": 2.9021435228331782, "loss_reasoning": 0.48054438829421997, "loss_utility": 1.5323225259780884, "step": 3114 }, { "epoch": 2.9030754892823856, "grad_norm": 1.1882480973569585, "learning_rate": 7.179841215050052e-07, "loss": 1.4913, "step": 3115 }, { "epoch": 2.9030754892823856, "loss_reasoning": 0.5511963367462158, "loss_utility": 0.8821791410446167, "step": 3115 }, { "epoch": 2.904007455731594, "grad_norm": 1.0518299259043078, "learning_rate": 7.110804280289955e-07, "loss": 1.5546, "step": 3116 }, { "epoch": 2.904007455731594, "loss_reasoning": 0.5347863435745239, "loss_utility": 1.0324407815933228, "step": 3116 }, { "epoch": 2.9049394221808016, "grad_norm": 1.7502047162760166, "learning_rate": 7.041767345529859e-07, "loss": 1.3788, "step": 3117 }, { "epoch": 2.9049394221808016, "loss_reasoning": 0.5206148624420166, "loss_utility": 0.6763327121734619, "step": 3117 }, { "epoch": 2.9058713886300094, "grad_norm": 1.096421529244451, "learning_rate": 6.972730410769763e-07, "loss": 1.5074, "step": 3118 }, { "epoch": 2.9058713886300094, "loss_reasoning": 0.557024359703064, "loss_utility": 0.6030124425888062, "step": 3118 }, { "epoch": 2.906803355079217, "grad_norm": 1.0896517792661575, "learning_rate": 6.903693476009666e-07, "loss": 1.4044, "step": 3119 }, { "epoch": 2.906803355079217, "loss_reasoning": 0.45912840962409973, "loss_utility": 0.713356077671051, "step": 3119 }, { "epoch": 2.907735321528425, "grad_norm": 1.3180621248861466, "learning_rate": 6.83465654124957e-07, "loss": 1.5526, "step": 3120 }, { "epoch": 2.907735321528425, "loss_reasoning": 0.5273133516311646, "loss_utility": 1.3260470628738403, "step": 3120 }, { "epoch": 2.9086672879776327, "grad_norm": 1.2868061727874234, "learning_rate": 6.765619606489472e-07, "loss": 1.5127, "step": 3121 }, { "epoch": 2.9086672879776327, "loss_reasoning": 0.48139703273773193, "loss_utility": 0.9684551954269409, "step": 3121 }, { "epoch": 2.9095992544268405, "grad_norm": 1.1664601841648736, "learning_rate": 6.696582671729376e-07, "loss": 1.3389, "step": 3122 }, { "epoch": 2.9095992544268405, "loss_reasoning": 0.5045649409294128, "loss_utility": 0.9397475719451904, "step": 3122 }, { "epoch": 2.9105312208760483, "grad_norm": 1.2328270339467344, "learning_rate": 6.627545736969279e-07, "loss": 1.483, "step": 3123 }, { "epoch": 2.9105312208760483, "loss_reasoning": 0.5029791593551636, "loss_utility": 0.9922726154327393, "step": 3123 }, { "epoch": 2.911463187325256, "grad_norm": 1.2013120510760258, "learning_rate": 6.558508802209182e-07, "loss": 1.3969, "step": 3124 }, { "epoch": 2.911463187325256, "loss_reasoning": 0.444337397813797, "loss_utility": 0.6032148599624634, "step": 3124 }, { "epoch": 2.9123951537744643, "grad_norm": 1.3014638877324354, "learning_rate": 6.489471867449086e-07, "loss": 1.3586, "step": 3125 }, { "epoch": 2.9123951537744643, "loss_reasoning": 0.4307212829589844, "loss_utility": 0.933741569519043, "step": 3125 }, { "epoch": 2.9133271202236717, "grad_norm": 1.0417887541007194, "learning_rate": 6.420434932688989e-07, "loss": 1.1727, "step": 3126 }, { "epoch": 2.9133271202236717, "loss_reasoning": 0.4361470341682434, "loss_utility": 0.9426543116569519, "step": 3126 }, { "epoch": 2.91425908667288, "grad_norm": 1.1836335763147736, "learning_rate": 6.351397997928894e-07, "loss": 1.5789, "step": 3127 }, { "epoch": 2.91425908667288, "loss_reasoning": 0.47410106658935547, "loss_utility": 1.0385019779205322, "step": 3127 }, { "epoch": 2.9151910531220877, "grad_norm": 1.2235922649647717, "learning_rate": 6.282361063168796e-07, "loss": 1.4141, "step": 3128 }, { "epoch": 2.9151910531220877, "loss_reasoning": 0.4308074414730072, "loss_utility": 0.7590376138687134, "step": 3128 }, { "epoch": 2.9161230195712955, "grad_norm": 1.4307184138002007, "learning_rate": 6.2133241284087e-07, "loss": 1.3146, "step": 3129 }, { "epoch": 2.9161230195712955, "loss_reasoning": 0.4958169460296631, "loss_utility": 0.9653366804122925, "step": 3129 }, { "epoch": 2.9170549860205033, "grad_norm": 1.2216246896142176, "learning_rate": 6.144287193648602e-07, "loss": 1.5656, "step": 3130 }, { "epoch": 2.9170549860205033, "loss_reasoning": 0.4765363037586212, "loss_utility": 0.33053070306777954, "step": 3130 }, { "epoch": 2.917986952469711, "grad_norm": 1.2864639365313224, "learning_rate": 6.075250258888506e-07, "loss": 1.2997, "step": 3131 }, { "epoch": 2.917986952469711, "loss_reasoning": 0.4841996729373932, "loss_utility": 0.8874467611312866, "step": 3131 }, { "epoch": 2.918918918918919, "grad_norm": 1.1616693730505248, "learning_rate": 6.006213324128409e-07, "loss": 1.51, "step": 3132 }, { "epoch": 2.918918918918919, "loss_reasoning": 0.46506863832473755, "loss_utility": 1.2348058223724365, "step": 3132 }, { "epoch": 2.9198508853681266, "grad_norm": 1.1929155469659511, "learning_rate": 5.937176389368313e-07, "loss": 1.6073, "step": 3133 }, { "epoch": 2.9198508853681266, "loss_reasoning": 0.47837164998054504, "loss_utility": 0.9357797503471375, "step": 3133 }, { "epoch": 2.9207828518173344, "grad_norm": 1.2468318209564666, "learning_rate": 5.868139454608216e-07, "loss": 1.3581, "step": 3134 }, { "epoch": 2.9207828518173344, "loss_reasoning": 0.4515094757080078, "loss_utility": 1.389890432357788, "step": 3134 }, { "epoch": 2.9217148182665422, "grad_norm": 1.209444114657819, "learning_rate": 5.799102519848119e-07, "loss": 1.4396, "step": 3135 }, { "epoch": 2.9217148182665422, "loss_reasoning": 0.48675212264060974, "loss_utility": 0.8693847060203552, "step": 3135 }, { "epoch": 2.9226467847157505, "grad_norm": 1.0407306095409559, "learning_rate": 5.730065585088022e-07, "loss": 1.0598, "step": 3136 }, { "epoch": 2.9226467847157505, "loss_reasoning": 0.4554646909236908, "loss_utility": 0.42726385593414307, "step": 3136 }, { "epoch": 2.923578751164958, "grad_norm": 1.0628875149896564, "learning_rate": 5.661028650327925e-07, "loss": 1.1298, "step": 3137 }, { "epoch": 2.923578751164958, "loss_reasoning": 0.47257909178733826, "loss_utility": 1.0564460754394531, "step": 3137 }, { "epoch": 2.924510717614166, "grad_norm": 1.1816699140283473, "learning_rate": 5.59199171556783e-07, "loss": 1.3889, "step": 3138 }, { "epoch": 2.924510717614166, "loss_reasoning": 0.44270509481430054, "loss_utility": 0.6794419884681702, "step": 3138 }, { "epoch": 2.925442684063374, "grad_norm": 0.9960204085699726, "learning_rate": 5.522954780807733e-07, "loss": 1.3817, "step": 3139 }, { "epoch": 2.925442684063374, "loss_reasoning": 0.4897741675376892, "loss_utility": 0.7071965932846069, "step": 3139 }, { "epoch": 2.9263746505125816, "grad_norm": 1.097642759781928, "learning_rate": 5.453917846047636e-07, "loss": 1.2157, "step": 3140 }, { "epoch": 2.9263746505125816, "loss_reasoning": 0.5068343877792358, "loss_utility": 0.9562487602233887, "step": 3140 }, { "epoch": 2.9273066169617894, "grad_norm": 1.1460170442588398, "learning_rate": 5.384880911287539e-07, "loss": 1.5943, "step": 3141 }, { "epoch": 2.9273066169617894, "loss_reasoning": 0.527256190776825, "loss_utility": 1.1430096626281738, "step": 3141 }, { "epoch": 2.928238583410997, "grad_norm": 1.310456960515377, "learning_rate": 5.315843976527443e-07, "loss": 1.641, "step": 3142 }, { "epoch": 2.928238583410997, "loss_reasoning": 0.472125381231308, "loss_utility": 0.4967987835407257, "step": 3142 }, { "epoch": 2.929170549860205, "grad_norm": 1.0166650075408352, "learning_rate": 5.246807041767346e-07, "loss": 1.2825, "step": 3143 }, { "epoch": 2.929170549860205, "loss_reasoning": 0.5257306098937988, "loss_utility": 1.0370535850524902, "step": 3143 }, { "epoch": 2.9301025163094128, "grad_norm": 1.261910794230777, "learning_rate": 5.177770107007249e-07, "loss": 1.5717, "step": 3144 }, { "epoch": 2.9301025163094128, "loss_reasoning": 0.47100257873535156, "loss_utility": 1.4148330688476562, "step": 3144 }, { "epoch": 2.9310344827586206, "grad_norm": 1.3844568313039118, "learning_rate": 5.108733172247152e-07, "loss": 1.6456, "step": 3145 }, { "epoch": 2.9310344827586206, "loss_reasoning": 0.5138661861419678, "loss_utility": 1.3359293937683105, "step": 3145 }, { "epoch": 2.9319664492078283, "grad_norm": 1.235971017923136, "learning_rate": 5.039696237487055e-07, "loss": 1.6508, "step": 3146 }, { "epoch": 2.9319664492078283, "loss_reasoning": 0.5189318060874939, "loss_utility": 1.0380949974060059, "step": 3146 }, { "epoch": 2.9328984156570366, "grad_norm": 1.1058565547665182, "learning_rate": 4.97065930272696e-07, "loss": 1.3012, "step": 3147 }, { "epoch": 2.9328984156570366, "loss_reasoning": 0.43307003378868103, "loss_utility": 1.9914202690124512, "step": 3147 }, { "epoch": 2.933830382106244, "grad_norm": 1.0361230535922823, "learning_rate": 4.901622367966863e-07, "loss": 1.6903, "step": 3148 }, { "epoch": 2.933830382106244, "loss_reasoning": 0.49221253395080566, "loss_utility": 0.3701748251914978, "step": 3148 }, { "epoch": 2.934762348555452, "grad_norm": 1.018490676711685, "learning_rate": 4.832585433206767e-07, "loss": 1.5448, "step": 3149 }, { "epoch": 2.934762348555452, "loss_reasoning": 0.49237608909606934, "loss_utility": 0.8223962783813477, "step": 3149 }, { "epoch": 2.93569431500466, "grad_norm": 1.3359168344092458, "learning_rate": 4.7635484984466693e-07, "loss": 1.5006, "step": 3150 }, { "epoch": 2.93569431500466, "loss_reasoning": 0.4783933162689209, "loss_utility": 0.9199367761611938, "step": 3150 }, { "epoch": 2.9366262814538677, "grad_norm": 1.0984974209143765, "learning_rate": 4.6945115636865724e-07, "loss": 1.4378, "step": 3151 }, { "epoch": 2.9366262814538677, "loss_reasoning": 0.5012020468711853, "loss_utility": 1.0734741687774658, "step": 3151 }, { "epoch": 2.9375582479030755, "grad_norm": 1.2899243003227463, "learning_rate": 4.625474628926476e-07, "loss": 1.2614, "step": 3152 }, { "epoch": 2.9375582479030755, "loss_reasoning": 0.4602234363555908, "loss_utility": 1.4444226026535034, "step": 3152 }, { "epoch": 2.9384902143522833, "grad_norm": 1.5820828990000915, "learning_rate": 4.5564376941663797e-07, "loss": 1.6277, "step": 3153 }, { "epoch": 2.9384902143522833, "loss_reasoning": 0.4722004234790802, "loss_utility": 0.5154001712799072, "step": 3153 }, { "epoch": 2.939422180801491, "grad_norm": 1.1408072277484553, "learning_rate": 4.487400759406282e-07, "loss": 1.1861, "step": 3154 }, { "epoch": 2.939422180801491, "loss_reasoning": 0.4748319387435913, "loss_utility": 0.9765446186065674, "step": 3154 }, { "epoch": 2.940354147250699, "grad_norm": 1.1444926168947593, "learning_rate": 4.418363824646186e-07, "loss": 1.6966, "step": 3155 }, { "epoch": 2.940354147250699, "loss_reasoning": 0.4208441972732544, "loss_utility": 0.9992344379425049, "step": 3155 }, { "epoch": 2.9412861136999067, "grad_norm": 1.2877195780901516, "learning_rate": 4.3493268898860895e-07, "loss": 1.2925, "step": 3156 }, { "epoch": 2.9412861136999067, "loss_reasoning": 0.4708130955696106, "loss_utility": 1.3691493272781372, "step": 3156 }, { "epoch": 2.9422180801491145, "grad_norm": 1.4838406873058274, "learning_rate": 4.280289955125993e-07, "loss": 1.7773, "step": 3157 }, { "epoch": 2.9422180801491145, "loss_reasoning": 0.442414790391922, "loss_utility": 0.8280864953994751, "step": 3157 }, { "epoch": 2.9431500465983227, "grad_norm": 1.0797371532746187, "learning_rate": 4.211253020365896e-07, "loss": 1.3572, "step": 3158 }, { "epoch": 2.9431500465983227, "loss_reasoning": 0.49126631021499634, "loss_utility": 1.479495644569397, "step": 3158 }, { "epoch": 2.94408201304753, "grad_norm": 1.0852053370225765, "learning_rate": 4.142216085605799e-07, "loss": 1.6058, "step": 3159 }, { "epoch": 2.94408201304753, "loss_reasoning": 0.45444780588150024, "loss_utility": 0.6045312285423279, "step": 3159 }, { "epoch": 2.9450139794967383, "grad_norm": 1.3528467995810516, "learning_rate": 4.0731791508457024e-07, "loss": 1.2921, "step": 3160 }, { "epoch": 2.9450139794967383, "loss_reasoning": 0.5101872086524963, "loss_utility": 1.0512738227844238, "step": 3160 }, { "epoch": 2.945945945945946, "grad_norm": 1.2985345081541304, "learning_rate": 4.004142216085606e-07, "loss": 1.4926, "step": 3161 }, { "epoch": 2.945945945945946, "loss_reasoning": 0.46620696783065796, "loss_utility": 0.796310544013977, "step": 3161 }, { "epoch": 2.946877912395154, "grad_norm": 1.2718814935000908, "learning_rate": 3.9351052813255096e-07, "loss": 1.3627, "step": 3162 }, { "epoch": 2.946877912395154, "loss_reasoning": 0.48761865496635437, "loss_utility": 0.8978451490402222, "step": 3162 }, { "epoch": 2.9478098788443616, "grad_norm": 1.270882688665512, "learning_rate": 3.866068346565413e-07, "loss": 1.427, "step": 3163 }, { "epoch": 2.9478098788443616, "loss_reasoning": 0.4664490222930908, "loss_utility": 1.226426124572754, "step": 3163 }, { "epoch": 2.9487418452935694, "grad_norm": 1.4858582774838842, "learning_rate": 3.797031411805316e-07, "loss": 1.6139, "step": 3164 }, { "epoch": 2.9487418452935694, "loss_reasoning": 0.44282788038253784, "loss_utility": 0.8336156010627747, "step": 3164 }, { "epoch": 2.949673811742777, "grad_norm": 1.2560371658287492, "learning_rate": 3.7279944770452194e-07, "loss": 1.4439, "step": 3165 }, { "epoch": 2.949673811742777, "loss_reasoning": 0.45994269847869873, "loss_utility": 0.8839614391326904, "step": 3165 }, { "epoch": 2.950605778191985, "grad_norm": 1.1562923957911224, "learning_rate": 3.658957542285123e-07, "loss": 1.3383, "step": 3166 }, { "epoch": 2.950605778191985, "loss_reasoning": 0.5092771649360657, "loss_utility": 0.4840952455997467, "step": 3166 }, { "epoch": 2.951537744641193, "grad_norm": 1.1259776019982994, "learning_rate": 3.589920607525026e-07, "loss": 1.4261, "step": 3167 }, { "epoch": 2.951537744641193, "loss_reasoning": 0.4894355535507202, "loss_utility": 1.4140392541885376, "step": 3167 }, { "epoch": 2.9524697110904006, "grad_norm": 1.1068808752432093, "learning_rate": 3.5208836727649297e-07, "loss": 1.5653, "step": 3168 }, { "epoch": 2.9524697110904006, "loss_reasoning": 0.47511571645736694, "loss_utility": 1.527514934539795, "step": 3168 }, { "epoch": 2.953401677539609, "grad_norm": 1.1704368963271223, "learning_rate": 3.451846738004833e-07, "loss": 1.6146, "step": 3169 }, { "epoch": 2.953401677539609, "loss_reasoning": 0.5185478329658508, "loss_utility": 0.8025870323181152, "step": 3169 }, { "epoch": 2.954333643988816, "grad_norm": 1.145668950420968, "learning_rate": 3.382809803244736e-07, "loss": 1.5866, "step": 3170 }, { "epoch": 2.954333643988816, "loss_reasoning": 0.528316855430603, "loss_utility": 0.7025830745697021, "step": 3170 }, { "epoch": 2.9552656104380244, "grad_norm": 1.044957212720469, "learning_rate": 3.3137728684846395e-07, "loss": 1.3826, "step": 3171 }, { "epoch": 2.9552656104380244, "loss_reasoning": 0.46947452425956726, "loss_utility": 0.7253926992416382, "step": 3171 }, { "epoch": 2.956197576887232, "grad_norm": 1.0563826493808617, "learning_rate": 3.244735933724543e-07, "loss": 1.4107, "step": 3172 }, { "epoch": 2.956197576887232, "loss_reasoning": 0.5527459383010864, "loss_utility": 0.6669877767562866, "step": 3172 }, { "epoch": 2.95712954333644, "grad_norm": 1.042490489160785, "learning_rate": 3.175698998964447e-07, "loss": 1.576, "step": 3173 }, { "epoch": 2.95712954333644, "loss_reasoning": 0.42287302017211914, "loss_utility": 0.8977538347244263, "step": 3173 }, { "epoch": 2.9580615097856477, "grad_norm": 1.2549826195361569, "learning_rate": 3.10666206420435e-07, "loss": 1.7906, "step": 3174 }, { "epoch": 2.9580615097856477, "loss_reasoning": 0.513994038105011, "loss_utility": 0.8780009150505066, "step": 3174 }, { "epoch": 2.9589934762348555, "grad_norm": 1.0034096029003614, "learning_rate": 3.037625129444253e-07, "loss": 1.2434, "step": 3175 }, { "epoch": 2.9589934762348555, "loss_reasoning": 0.4735473692417145, "loss_utility": 0.9689948558807373, "step": 3175 }, { "epoch": 2.9599254426840633, "grad_norm": 1.0053425196042345, "learning_rate": 2.9685881946841566e-07, "loss": 1.2021, "step": 3176 }, { "epoch": 2.9599254426840633, "loss_reasoning": 0.45674747228622437, "loss_utility": 0.8319709300994873, "step": 3176 }, { "epoch": 2.960857409133271, "grad_norm": 1.0822343568902455, "learning_rate": 2.8995512599240596e-07, "loss": 1.4128, "step": 3177 }, { "epoch": 2.960857409133271, "loss_reasoning": 0.4718060791492462, "loss_utility": 1.8007047176361084, "step": 3177 }, { "epoch": 2.961789375582479, "grad_norm": 1.367267383512733, "learning_rate": 2.8305143251639627e-07, "loss": 1.4992, "step": 3178 }, { "epoch": 2.961789375582479, "loss_reasoning": 0.5040687322616577, "loss_utility": 1.1408896446228027, "step": 3178 }, { "epoch": 2.9627213420316867, "grad_norm": 1.4200112324305436, "learning_rate": 2.7614773904038664e-07, "loss": 1.6092, "step": 3179 }, { "epoch": 2.9627213420316867, "loss_reasoning": 0.4453427493572235, "loss_utility": 0.7087688446044922, "step": 3179 }, { "epoch": 2.963653308480895, "grad_norm": 1.1186849076073233, "learning_rate": 2.6924404556437694e-07, "loss": 1.4361, "step": 3180 }, { "epoch": 2.963653308480895, "loss_reasoning": 0.49856770038604736, "loss_utility": 1.3964195251464844, "step": 3180 }, { "epoch": 2.9645852749301023, "grad_norm": 1.6510633748082189, "learning_rate": 2.623403520883673e-07, "loss": 1.7212, "step": 3181 }, { "epoch": 2.9645852749301023, "loss_reasoning": 0.4640982449054718, "loss_utility": 1.7456715106964111, "step": 3181 }, { "epoch": 2.9655172413793105, "grad_norm": 1.066622800919258, "learning_rate": 2.554366586123576e-07, "loss": 1.4769, "step": 3182 }, { "epoch": 2.9655172413793105, "loss_reasoning": 0.4956223964691162, "loss_utility": 0.692937970161438, "step": 3182 }, { "epoch": 2.9664492078285183, "grad_norm": 1.2109731486750068, "learning_rate": 2.48532965136348e-07, "loss": 1.3488, "step": 3183 }, { "epoch": 2.9664492078285183, "loss_reasoning": 0.5094659328460693, "loss_utility": 1.1321969032287598, "step": 3183 }, { "epoch": 2.967381174277726, "grad_norm": 1.1957125697308217, "learning_rate": 2.4162927166033834e-07, "loss": 1.6463, "step": 3184 }, { "epoch": 2.967381174277726, "loss_reasoning": 0.4314778745174408, "loss_utility": 0.8058649301528931, "step": 3184 }, { "epoch": 2.968313140726934, "grad_norm": 1.185277752091032, "learning_rate": 2.3472557818432862e-07, "loss": 1.3125, "step": 3185 }, { "epoch": 2.968313140726934, "loss_reasoning": 0.44475024938583374, "loss_utility": 1.0634461641311646, "step": 3185 }, { "epoch": 2.9692451071761417, "grad_norm": 1.486983539033034, "learning_rate": 2.2782188470831898e-07, "loss": 1.563, "step": 3186 }, { "epoch": 2.9692451071761417, "loss_reasoning": 0.469540536403656, "loss_utility": 1.0674917697906494, "step": 3186 }, { "epoch": 2.9701770736253494, "grad_norm": 1.462523737525399, "learning_rate": 2.209181912323093e-07, "loss": 1.4741, "step": 3187 }, { "epoch": 2.9701770736253494, "loss_reasoning": 0.4939785599708557, "loss_utility": 0.9016091823577881, "step": 3187 }, { "epoch": 2.9711090400745572, "grad_norm": 1.213823087587125, "learning_rate": 2.1401449775629965e-07, "loss": 1.5011, "step": 3188 }, { "epoch": 2.9711090400745572, "loss_reasoning": 0.4590492844581604, "loss_utility": 1.2545088529586792, "step": 3188 }, { "epoch": 2.972041006523765, "grad_norm": 1.333207546390383, "learning_rate": 2.0711080428028996e-07, "loss": 1.3396, "step": 3189 }, { "epoch": 2.972041006523765, "loss_reasoning": 0.4254246950149536, "loss_utility": 0.7223878502845764, "step": 3189 }, { "epoch": 2.972972972972973, "grad_norm": 1.1185569601866607, "learning_rate": 2.002071108042803e-07, "loss": 1.2295, "step": 3190 }, { "epoch": 2.972972972972973, "loss_reasoning": 0.4891923666000366, "loss_utility": 1.101898193359375, "step": 3190 }, { "epoch": 2.973904939422181, "grad_norm": 1.6089649550993907, "learning_rate": 1.9330341732827066e-07, "loss": 1.5304, "step": 3191 }, { "epoch": 2.973904939422181, "loss_reasoning": 0.5508183240890503, "loss_utility": 1.237500786781311, "step": 3191 }, { "epoch": 2.9748369058713884, "grad_norm": 1.27120348687006, "learning_rate": 1.8639972385226097e-07, "loss": 1.55, "step": 3192 }, { "epoch": 2.9748369058713884, "loss_reasoning": 0.49839305877685547, "loss_utility": 1.4700489044189453, "step": 3192 }, { "epoch": 2.9757688723205966, "grad_norm": 1.0671614593716288, "learning_rate": 1.794960303762513e-07, "loss": 1.4301, "step": 3193 }, { "epoch": 2.9757688723205966, "loss_reasoning": 0.504202127456665, "loss_utility": 0.7236015796661377, "step": 3193 }, { "epoch": 2.9767008387698044, "grad_norm": 1.1623829950599016, "learning_rate": 1.7259233690024164e-07, "loss": 1.5199, "step": 3194 }, { "epoch": 2.9767008387698044, "loss_reasoning": 0.5179492831230164, "loss_utility": 0.7934588193893433, "step": 3194 }, { "epoch": 2.977632805219012, "grad_norm": 1.189985002838833, "learning_rate": 1.6568864342423198e-07, "loss": 1.2986, "step": 3195 }, { "epoch": 2.977632805219012, "loss_reasoning": 0.4831523001194, "loss_utility": 1.3340319395065308, "step": 3195 }, { "epoch": 2.97856477166822, "grad_norm": 1.2480331346040332, "learning_rate": 1.5878494994822234e-07, "loss": 1.5159, "step": 3196 }, { "epoch": 2.97856477166822, "loss_reasoning": 0.4971458315849304, "loss_utility": 0.7734905481338501, "step": 3196 }, { "epoch": 2.9794967381174278, "grad_norm": 1.1353067849151297, "learning_rate": 1.5188125647221265e-07, "loss": 1.4704, "step": 3197 }, { "epoch": 2.9794967381174278, "loss_reasoning": 0.4861734211444855, "loss_utility": 1.1721656322479248, "step": 3197 }, { "epoch": 2.9804287045666356, "grad_norm": 1.379709582185985, "learning_rate": 1.4497756299620298e-07, "loss": 1.5542, "step": 3198 }, { "epoch": 2.9804287045666356, "loss_reasoning": 0.47416752576828003, "loss_utility": 1.186203122138977, "step": 3198 }, { "epoch": 2.9813606710158433, "grad_norm": 1.152876036552653, "learning_rate": 1.3807386952019332e-07, "loss": 1.367, "step": 3199 }, { "epoch": 2.9813606710158433, "loss_reasoning": 0.48086172342300415, "loss_utility": 0.9406228065490723, "step": 3199 }, { "epoch": 2.982292637465051, "grad_norm": 1.2598576062812348, "learning_rate": 1.3117017604418365e-07, "loss": 1.5809, "step": 3200 }, { "epoch": 2.982292637465051, "loss_reasoning": 0.5669618844985962, "loss_utility": 0.8449477553367615, "step": 3200 }, { "epoch": 2.983224603914259, "grad_norm": 1.2547899345412261, "learning_rate": 1.24266482568174e-07, "loss": 1.4777, "step": 3201 }, { "epoch": 2.983224603914259, "loss_reasoning": 0.4472874104976654, "loss_utility": 0.8937463164329529, "step": 3201 }, { "epoch": 2.984156570363467, "grad_norm": 1.3030575331147187, "learning_rate": 1.1736278909216431e-07, "loss": 1.5319, "step": 3202 }, { "epoch": 2.984156570363467, "loss_reasoning": 0.49158698320388794, "loss_utility": 0.9696317911148071, "step": 3202 }, { "epoch": 2.9850885368126745, "grad_norm": 1.179704200441749, "learning_rate": 1.1045909561615465e-07, "loss": 1.6118, "step": 3203 }, { "epoch": 2.9850885368126745, "loss_reasoning": 0.4724789261817932, "loss_utility": 1.1805744171142578, "step": 3203 }, { "epoch": 2.9860205032618827, "grad_norm": 1.0352196941423342, "learning_rate": 1.0355540214014498e-07, "loss": 1.3142, "step": 3204 }, { "epoch": 2.9860205032618827, "loss_reasoning": 0.4809569716453552, "loss_utility": 1.3760390281677246, "step": 3204 }, { "epoch": 2.9869524697110905, "grad_norm": 1.3422856610628449, "learning_rate": 9.665170866413533e-08, "loss": 1.6408, "step": 3205 }, { "epoch": 2.9869524697110905, "loss_reasoning": 0.4671713709831238, "loss_utility": 0.8159922957420349, "step": 3205 }, { "epoch": 2.9878844361602983, "grad_norm": 1.118226194367613, "learning_rate": 8.974801518812565e-08, "loss": 1.147, "step": 3206 }, { "epoch": 2.9878844361602983, "loss_reasoning": 0.5452754497528076, "loss_utility": 0.47278332710266113, "step": 3206 }, { "epoch": 2.988816402609506, "grad_norm": 1.0021382590819752, "learning_rate": 8.284432171211599e-08, "loss": 1.3237, "step": 3207 }, { "epoch": 2.988816402609506, "loss_reasoning": 0.44311437010765076, "loss_utility": 1.2117252349853516, "step": 3207 }, { "epoch": 2.989748369058714, "grad_norm": 1.3111979047435414, "learning_rate": 7.594062823610632e-08, "loss": 1.604, "step": 3208 }, { "epoch": 2.989748369058714, "loss_reasoning": 0.48166829347610474, "loss_utility": 0.5354286432266235, "step": 3208 }, { "epoch": 2.9906803355079217, "grad_norm": 1.3500525838704407, "learning_rate": 6.903693476009666e-08, "loss": 1.4997, "step": 3209 }, { "epoch": 2.9906803355079217, "loss_reasoning": 0.4896896183490753, "loss_utility": 0.6925499439239502, "step": 3209 }, { "epoch": 2.9916123019571295, "grad_norm": 1.0181252193743746, "learning_rate": 6.2133241284087e-08, "loss": 1.4171, "step": 3210 }, { "epoch": 2.9916123019571295, "loss_reasoning": 0.48846209049224854, "loss_utility": 1.341889500617981, "step": 3210 }, { "epoch": 2.9925442684063372, "grad_norm": 1.3467845226786936, "learning_rate": 5.522954780807732e-08, "loss": 1.7582, "step": 3211 }, { "epoch": 2.9925442684063372, "loss_reasoning": 0.49037298560142517, "loss_utility": 1.1350924968719482, "step": 3211 }, { "epoch": 2.993476234855545, "grad_norm": 1.4229092123972746, "learning_rate": 4.8325854332067665e-08, "loss": 1.3503, "step": 3212 }, { "epoch": 2.993476234855545, "loss_reasoning": 0.5098397731781006, "loss_utility": 1.4775607585906982, "step": 3212 }, { "epoch": 2.9944082013047533, "grad_norm": 1.252369626370556, "learning_rate": 4.1422160856057994e-08, "loss": 1.5817, "step": 3213 }, { "epoch": 2.9944082013047533, "loss_reasoning": 0.47908374667167664, "loss_utility": 0.6991057395935059, "step": 3213 }, { "epoch": 2.9953401677539606, "grad_norm": 1.104007448089532, "learning_rate": 3.451846738004833e-08, "loss": 1.5342, "step": 3214 }, { "epoch": 2.9953401677539606, "loss_reasoning": 0.4956129491329193, "loss_utility": 1.020118236541748, "step": 3214 }, { "epoch": 2.996272134203169, "grad_norm": 1.049036325748085, "learning_rate": 2.761477390403866e-08, "loss": 1.2185, "step": 3215 }, { "epoch": 2.996272134203169, "loss_reasoning": 0.5333962440490723, "loss_utility": 0.977138876914978, "step": 3215 }, { "epoch": 2.9972041006523766, "grad_norm": 1.5576148254821995, "learning_rate": 2.0711080428028997e-08, "loss": 1.2271, "step": 3216 }, { "epoch": 2.9972041006523766, "loss_reasoning": 0.5170373320579529, "loss_utility": 1.143272042274475, "step": 3216 }, { "epoch": 2.9981360671015844, "grad_norm": 1.1684906618918147, "learning_rate": 1.380738695201933e-08, "loss": 1.3625, "step": 3217 }, { "epoch": 2.9981360671015844, "loss_reasoning": 0.48319774866104126, "loss_utility": 0.7790474891662598, "step": 3217 }, { "epoch": 2.999068033550792, "grad_norm": 1.1637152369223014, "learning_rate": 6.903693476009665e-09, "loss": 1.417, "step": 3218 }, { "epoch": 2.999068033550792, "loss_reasoning": 0.5190764665603638, "loss_utility": 0.47238510847091675, "step": 3218 }, { "epoch": 3.0, "grad_norm": 0.9364084332688037, "learning_rate": 0.0, "loss": 1.0492, "step": 3219 }, { "epoch": 3.0, "step": 3219, "total_flos": 0.0, "train_loss": 1.8987820319930897, "train_runtime": 118018.8932, "train_samples_per_second": 0.436, "train_steps_per_second": 0.027 } ], "logging_steps": 1, "max_steps": 3219, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }