| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 14.66467958271237, |
| "eval_steps": 500, |
| "global_step": 615, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12, |
| "grad_norm": 50.82564606274215, |
| "learning_rate": 4.032258064516129e-07, |
| "loss": 2.6296, |
| "sft_loss": 0.2952887358143926, |
| "step": 5, |
| "total_loss": 0.3225633257534355, |
| "value_loss": 0.27274589324370024, |
| "value_loss_search": 0.8215309411287308, |
| "value_loss_thought": 1.3604361996985972 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 43.36034486559219, |
| "learning_rate": 8.064516129032258e-07, |
| "loss": 2.4575, |
| "sft_loss": 0.28640273250639436, |
| "step": 10, |
| "total_loss": 0.3117025727406144, |
| "value_loss": 0.25299841817468405, |
| "value_loss_search": 0.8143498097546399, |
| "value_loss_thought": 1.2096375316381454 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 36.31265974182755, |
| "learning_rate": 1.2096774193548388e-06, |
| "loss": 2.1814, |
| "sft_loss": 0.24496446046978235, |
| "step": 15, |
| "total_loss": 0.27037954148254356, |
| "value_loss": 0.2541508126072586, |
| "value_loss_search": 0.8789042549207806, |
| "value_loss_thought": 1.15430224314332 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 23.885147220945996, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 1.5974, |
| "sft_loss": 0.18549970276653766, |
| "step": 20, |
| "total_loss": 0.20942154704825952, |
| "value_loss": 0.239218432828784, |
| "value_loss_search": 0.8069580754265189, |
| "value_loss_thought": 1.1067893739789725 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 14.605157217566555, |
| "learning_rate": 2.0161290322580646e-06, |
| "loss": 1.1409, |
| "sft_loss": 0.11383889233693481, |
| "step": 25, |
| "total_loss": 0.13655262757092715, |
| "value_loss": 0.22713735280558467, |
| "value_loss_search": 0.8525716276839376, |
| "value_loss_thought": 0.9645271969959139 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 19.32238588959413, |
| "learning_rate": 2.4193548387096776e-06, |
| "loss": 0.9755, |
| "sft_loss": 0.10292870132252574, |
| "step": 30, |
| "total_loss": 0.12329704709118232, |
| "value_loss": 0.20368346022441983, |
| "value_loss_search": 0.6985976113937795, |
| "value_loss_thought": 0.9308700620196759 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 11.366176943904033, |
| "learning_rate": 2.822580645161291e-06, |
| "loss": 0.8166, |
| "sft_loss": 0.08234395189210772, |
| "step": 35, |
| "total_loss": 0.102398702444043, |
| "value_loss": 0.2005474975332618, |
| "value_loss_search": 0.73103982496541, |
| "value_loss_thought": 0.8733401508070529 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 18.523228091154312, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 0.7801, |
| "sft_loss": 0.0764334655366838, |
| "step": 40, |
| "total_loss": 0.09517455038730986, |
| "value_loss": 0.18741084402427077, |
| "value_loss_search": 0.5712150579318405, |
| "value_loss_thought": 0.9280716905370354 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 13.717307975492233, |
| "learning_rate": 3.6290322580645166e-06, |
| "loss": 0.7302, |
| "sft_loss": 0.06878535831347107, |
| "step": 45, |
| "total_loss": 0.09124175217002631, |
| "value_loss": 0.22456393418833615, |
| "value_loss_search": 0.7111891292035579, |
| "value_loss_thought": 1.0853223511949182 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 19.25629488379356, |
| "learning_rate": 4.032258064516129e-06, |
| "loss": 0.722, |
| "sft_loss": 0.07188704321160913, |
| "step": 50, |
| "total_loss": 0.09479074196424335, |
| "value_loss": 0.22903698151931168, |
| "value_loss_search": 0.9242108400911093, |
| "value_loss_thought": 0.9080850033089518 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 16.515271675316733, |
| "learning_rate": 4.435483870967742e-06, |
| "loss": 0.7237, |
| "sft_loss": 0.07221840480342508, |
| "step": 55, |
| "total_loss": 0.09306601642165332, |
| "value_loss": 0.20847611278295516, |
| "value_loss_search": 0.7067324505187571, |
| "value_loss_thought": 0.9610764627344907 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 13.559730992843452, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 0.6799, |
| "sft_loss": 0.06844400409609079, |
| "step": 60, |
| "total_loss": 0.08777883652364835, |
| "value_loss": 0.1933483243919909, |
| "value_loss_search": 0.6386921301018447, |
| "value_loss_thought": 0.90809445703635 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 7.838247236141987, |
| "learning_rate": 4.999636929057196e-06, |
| "loss": 0.6609, |
| "sft_loss": 0.06545158205553889, |
| "step": 65, |
| "total_loss": 0.08412742811487987, |
| "value_loss": 0.1867584578692913, |
| "value_loss_search": 0.6354834865778685, |
| "value_loss_thought": 0.8585841765627265 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 8.945716463803295, |
| "learning_rate": 4.997418544072742e-06, |
| "loss": 0.6528, |
| "sft_loss": 0.06201667059212923, |
| "step": 70, |
| "total_loss": 0.07894517340464517, |
| "value_loss": 0.1692850286606699, |
| "value_loss_search": 0.49887247155420483, |
| "value_loss_thought": 0.8554077588021756 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 8.604383259697785, |
| "learning_rate": 4.993185267783142e-06, |
| "loss": 0.6129, |
| "sft_loss": 0.0569519879296422, |
| "step": 75, |
| "total_loss": 0.07289455400314182, |
| "value_loss": 0.1594256573356688, |
| "value_loss_search": 0.43842247435823084, |
| "value_loss_thought": 0.8369827844202519 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 9.195694446581905, |
| "learning_rate": 4.986940515551676e-06, |
| "loss": 0.6197, |
| "sft_loss": 0.06148028993047774, |
| "step": 80, |
| "total_loss": 0.0772681548143737, |
| "value_loss": 0.1578786402475089, |
| "value_loss_search": 0.4949493734864518, |
| "value_loss_thought": 0.7680797455832362 |
| }, |
| { |
| "epoch": 2.03, |
| "grad_norm": 8.949095878160305, |
| "learning_rate": 4.978689325579491e-06, |
| "loss": 0.5908, |
| "sft_loss": 0.06064098924398422, |
| "step": 85, |
| "total_loss": 0.07585813322220929, |
| "value_loss": 0.15217143492773175, |
| "value_loss_search": 0.47303855791687965, |
| "value_loss_thought": 0.7443329165223986 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 5.832639914877802, |
| "learning_rate": 4.968438354840834e-06, |
| "loss": 0.5485, |
| "sft_loss": 0.05426874342374503, |
| "step": 90, |
| "total_loss": 0.06687840489903465, |
| "value_loss": 0.1260966133326292, |
| "value_loss_search": 0.43105568194878285, |
| "value_loss_thought": 0.5777172191534191 |
| }, |
| { |
| "epoch": 2.27, |
| "grad_norm": 7.605084631748654, |
| "learning_rate": 4.956195873712274e-06, |
| "loss": 0.5201, |
| "sft_loss": 0.056156763760373, |
| "step": 95, |
| "total_loss": 0.06784167702426203, |
| "value_loss": 0.1168491319520399, |
| "value_loss_search": 0.34044701922684906, |
| "value_loss_thought": 0.5943460309877991 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 5.404770654306174, |
| "learning_rate": 4.941971759300249e-06, |
| "loss": 0.5264, |
| "sft_loss": 0.05927520957775414, |
| "step": 100, |
| "total_loss": 0.07008651044452563, |
| "value_loss": 0.10811300831846893, |
| "value_loss_search": 0.30770739456638696, |
| "value_loss_thought": 0.5571966758929193 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 4.381257694207405, |
| "learning_rate": 4.925777487472318e-06, |
| "loss": 0.5074, |
| "sft_loss": 0.05194715983234346, |
| "step": 105, |
| "total_loss": 0.061547464787145144, |
| "value_loss": 0.09600305040366948, |
| "value_loss_search": 0.29806660558097064, |
| "value_loss_thought": 0.46995780025608835 |
| }, |
| { |
| "epoch": 2.62, |
| "grad_norm": 7.428215594764233, |
| "learning_rate": 4.907626123598552e-06, |
| "loss": 0.5066, |
| "sft_loss": 0.05760289076715708, |
| "step": 110, |
| "total_loss": 0.06731230580771808, |
| "value_loss": 0.09709415114484728, |
| "value_loss_search": 0.30406282742042096, |
| "value_loss_thought": 0.4726903848350048 |
| }, |
| { |
| "epoch": 2.74, |
| "grad_norm": 3.2888847744957492, |
| "learning_rate": 4.8875323120105275e-06, |
| "loss": 0.5131, |
| "sft_loss": 0.057099767681211236, |
| "step": 115, |
| "total_loss": 0.06571883515571245, |
| "value_loss": 0.0861906715668738, |
| "value_loss_search": 0.29786290233023466, |
| "value_loss_thought": 0.3916624684818089 |
| }, |
| { |
| "epoch": 2.86, |
| "grad_norm": 4.869835996951379, |
| "learning_rate": 4.8655122641864335e-06, |
| "loss": 0.5126, |
| "sft_loss": 0.056119235185906294, |
| "step": 120, |
| "total_loss": 0.06504482553282287, |
| "value_loss": 0.08925589976133778, |
| "value_loss_search": 0.25328616083133965, |
| "value_loss_thought": 0.4607610349543393 |
| }, |
| { |
| "epoch": 2.98, |
| "grad_norm": 3.5856824633030433, |
| "learning_rate": 4.84158374567182e-06, |
| "loss": 0.4804, |
| "sft_loss": 0.046175889065489174, |
| "step": 125, |
| "total_loss": 0.054164890843094324, |
| "value_loss": 0.0798900181078352, |
| "value_loss_search": 0.2606613096548244, |
| "value_loss_thought": 0.3784588351845741 |
| }, |
| { |
| "epoch": 3.1, |
| "grad_norm": 2.393170606173336, |
| "learning_rate": 4.815766061746538e-06, |
| "loss": 0.4603, |
| "sft_loss": 0.05217493660748005, |
| "step": 130, |
| "total_loss": 0.05943963085010182, |
| "value_loss": 0.0726469449698925, |
| "value_loss_search": 0.21297869782429188, |
| "value_loss_thought": 0.3681968664750457 |
| }, |
| { |
| "epoch": 3.22, |
| "grad_norm": 1.9679581522016634, |
| "learning_rate": 4.788080041849443e-06, |
| "loss": 0.4278, |
| "sft_loss": 0.052470245282165706, |
| "step": 135, |
| "total_loss": 0.05776963674434228, |
| "value_loss": 0.05299391018925235, |
| "value_loss_search": 0.17309838664950802, |
| "value_loss_thought": 0.2508528954349458 |
| }, |
| { |
| "epoch": 3.34, |
| "grad_norm": 3.1066245128680725, |
| "learning_rate": 4.7585480227734175e-06, |
| "loss": 0.434, |
| "sft_loss": 0.049476837972179055, |
| "step": 140, |
| "total_loss": 0.055079690403363205, |
| "value_loss": 0.056028524460271, |
| "value_loss_search": 0.18748179799877107, |
| "value_loss_thought": 0.2607463992317207 |
| }, |
| { |
| "epoch": 3.46, |
| "grad_norm": 3.4261087057869335, |
| "learning_rate": 4.7271938306442855e-06, |
| "loss": 0.4274, |
| "sft_loss": 0.04518961615394801, |
| "step": 145, |
| "total_loss": 0.05061705713451374, |
| "value_loss": 0.054274410346988586, |
| "value_loss_search": 0.15429693737823982, |
| "value_loss_thought": 0.27989834365434946 |
| }, |
| { |
| "epoch": 3.58, |
| "grad_norm": 3.0813091887098563, |
| "learning_rate": 4.694042761698135e-06, |
| "loss": 0.438, |
| "sft_loss": 0.055581874679774046, |
| "step": 150, |
| "total_loss": 0.06076982853701338, |
| "value_loss": 0.051879540382651614, |
| "value_loss_search": 0.14725855304568541, |
| "value_loss_thought": 0.26777777075767517 |
| }, |
| { |
| "epoch": 3.7, |
| "grad_norm": 2.0023308919779734, |
| "learning_rate": 4.6591215618725775e-06, |
| "loss": 0.4328, |
| "sft_loss": 0.04702084113378078, |
| "step": 155, |
| "total_loss": 0.05245317818043986, |
| "value_loss": 0.054323368283803575, |
| "value_loss_search": 0.15814204575144686, |
| "value_loss_thought": 0.27644489823142065 |
| }, |
| { |
| "epoch": 3.82, |
| "grad_norm": 1.977551344543693, |
| "learning_rate": 4.622458405228411e-06, |
| "loss": 0.4244, |
| "sft_loss": 0.05086941795889288, |
| "step": 160, |
| "total_loss": 0.0561610649638169, |
| "value_loss": 0.05291647011763416, |
| "value_loss_search": 0.15665370621718466, |
| "value_loss_thought": 0.26667805360630153 |
| }, |
| { |
| "epoch": 3.93, |
| "grad_norm": 1.9529739672122766, |
| "learning_rate": 4.5840828712190725e-06, |
| "loss": 0.4032, |
| "sft_loss": 0.04604445670265704, |
| "step": 165, |
| "total_loss": 0.05026301761099603, |
| "value_loss": 0.0421856101078447, |
| "value_loss_search": 0.12487082706647926, |
| "value_loss_thought": 0.21261405241675674 |
| }, |
| { |
| "epoch": 4.05, |
| "grad_norm": 1.5029470328755332, |
| "learning_rate": 4.54402592082625e-06, |
| "loss": 0.3912, |
| "sft_loss": 0.0438391676871106, |
| "step": 170, |
| "total_loss": 0.046969617460126754, |
| "value_loss": 0.031304498872486874, |
| "value_loss_search": 0.08991801288211718, |
| "value_loss_thought": 0.1605179784586653 |
| }, |
| { |
| "epoch": 4.17, |
| "grad_norm": 1.5958208749346106, |
| "learning_rate": 4.502319871580879e-06, |
| "loss": 0.3559, |
| "sft_loss": 0.0380060717696324, |
| "step": 175, |
| "total_loss": 0.040836501114608834, |
| "value_loss": 0.028304292738903315, |
| "value_loss_search": 0.08076013361569494, |
| "value_loss_thought": 0.1456742096459493 |
| }, |
| { |
| "epoch": 4.29, |
| "grad_norm": 1.1597766780252663, |
| "learning_rate": 4.458998371489695e-06, |
| "loss": 0.3542, |
| "sft_loss": 0.04091373980045319, |
| "step": 180, |
| "total_loss": 0.04352424936296302, |
| "value_loss": 0.02610509568476118, |
| "value_loss_search": 0.08407636135234497, |
| "value_loss_thought": 0.12476440471946262 |
| }, |
| { |
| "epoch": 4.41, |
| "grad_norm": 1.1887589876167461, |
| "learning_rate": 4.414096371888378e-06, |
| "loss": 0.3573, |
| "sft_loss": 0.039319761795923114, |
| "step": 185, |
| "total_loss": 0.04204170083394274, |
| "value_loss": 0.027219387918012216, |
| "value_loss_search": 0.07986954482039436, |
| "value_loss_thought": 0.13788555894279853 |
| }, |
| { |
| "epoch": 4.53, |
| "grad_norm": 1.1787290016300027, |
| "learning_rate": 4.367650099243167e-06, |
| "loss": 0.3568, |
| "sft_loss": 0.04122671156655997, |
| "step": 190, |
| "total_loss": 0.04415785642413539, |
| "value_loss": 0.029311446825158783, |
| "value_loss_search": 0.07832561889081262, |
| "value_loss_thought": 0.15616595644678455 |
| }, |
| { |
| "epoch": 4.65, |
| "grad_norm": 1.6029965120643992, |
| "learning_rate": 4.319697025923736e-06, |
| "loss": 0.3598, |
| "sft_loss": 0.04112560288049281, |
| "step": 195, |
| "total_loss": 0.043696090345474656, |
| "value_loss": 0.02570487381599378, |
| "value_loss_search": 0.08383175324415788, |
| "value_loss_thought": 0.12180723798810504 |
| }, |
| { |
| "epoch": 4.77, |
| "grad_norm": 1.4381210449396125, |
| "learning_rate": 4.270275839970868e-06, |
| "loss": 0.3515, |
| "sft_loss": 0.039879024308174846, |
| "step": 200, |
| "total_loss": 0.04217200499406317, |
| "value_loss": 0.022929807018954307, |
| "value_loss_search": 0.07454460244625807, |
| "value_loss_thought": 0.10889385400805622 |
| }, |
| { |
| "epoch": 4.89, |
| "grad_norm": 1.2614274454432255, |
| "learning_rate": 4.219426413883348e-06, |
| "loss": 0.3625, |
| "sft_loss": 0.04709821604192257, |
| "step": 205, |
| "total_loss": 0.0502521290894947, |
| "value_loss": 0.03153913400019519, |
| "value_loss_search": 0.08359810820547864, |
| "value_loss_thought": 0.1687149630743079 |
| }, |
| { |
| "epoch": 5.01, |
| "grad_norm": 1.1506878122900264, |
| "learning_rate": 4.167189772449248e-06, |
| "loss": 0.3432, |
| "sft_loss": 0.04020570595748722, |
| "step": 210, |
| "total_loss": 0.04252870589734812, |
| "value_loss": 0.023229998056194745, |
| "value_loss_search": 0.07497004266479053, |
| "value_loss_thought": 0.11086994202341885 |
| }, |
| { |
| "epoch": 5.13, |
| "grad_norm": 0.9862059900461185, |
| "learning_rate": 4.113608059647553e-06, |
| "loss": 0.2994, |
| "sft_loss": 0.0355574628803879, |
| "step": 215, |
| "total_loss": 0.037456610312801786, |
| "value_loss": 0.018991474900394677, |
| "value_loss_search": 0.058882090356200935, |
| "value_loss_thought": 0.09304970969678834 |
| }, |
| { |
| "epoch": 5.25, |
| "grad_norm": 0.9601247633138389, |
| "learning_rate": 4.058724504646834e-06, |
| "loss": 0.288, |
| "sft_loss": 0.03504389475565404, |
| "step": 220, |
| "total_loss": 0.03674481045381981, |
| "value_loss": 0.017009155871346594, |
| "value_loss_search": 0.050329163245623934, |
| "value_loss_thought": 0.08574408317217604 |
| }, |
| { |
| "epoch": 5.37, |
| "grad_norm": 1.3509709560128629, |
| "learning_rate": 4.00258338692841e-06, |
| "loss": 0.2933, |
| "sft_loss": 0.03573095325846225, |
| "step": 225, |
| "total_loss": 0.037422025205160026, |
| "value_loss": 0.016910719190491363, |
| "value_loss_search": 0.05178567884140648, |
| "value_loss_thought": 0.08350007485714742 |
| }, |
| { |
| "epoch": 5.48, |
| "grad_norm": 0.9625163641024351, |
| "learning_rate": 3.945230000562121e-06, |
| "loss": 0.2842, |
| "sft_loss": 0.03569462758023292, |
| "step": 230, |
| "total_loss": 0.03722189712352701, |
| "value_loss": 0.015272694976010826, |
| "value_loss_search": 0.04224359960644506, |
| "value_loss_thought": 0.07993796134978766 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 1.2461981914737617, |
| "learning_rate": 3.8867106176635405e-06, |
| "loss": 0.2944, |
| "sft_loss": 0.034198284102603796, |
| "step": 235, |
| "total_loss": 0.03592255775074591, |
| "value_loss": 0.017242736062326004, |
| "value_loss_search": 0.052543783394503406, |
| "value_loss_thought": 0.08539810547372326 |
| }, |
| { |
| "epoch": 5.72, |
| "grad_norm": 1.0230143635639728, |
| "learning_rate": 3.827072451062118e-06, |
| "loss": 0.3004, |
| "sft_loss": 0.037929134699516, |
| "step": 240, |
| "total_loss": 0.03954664655320812, |
| "value_loss": 0.016175120149273425, |
| "value_loss_search": 0.053255315756541674, |
| "value_loss_thought": 0.076145645219367 |
| }, |
| { |
| "epoch": 5.84, |
| "grad_norm": 1.1409535328456384, |
| "learning_rate": 3.7663636162103583e-06, |
| "loss": 0.2922, |
| "sft_loss": 0.034704937925562264, |
| "step": 245, |
| "total_loss": 0.03651254886699462, |
| "value_loss": 0.018076109029061628, |
| "value_loss_search": 0.05161890290983138, |
| "value_loss_thought": 0.09298996879952029 |
| }, |
| { |
| "epoch": 5.96, |
| "grad_norm": 0.9301923066699931, |
| "learning_rate": 3.7046330923647733e-06, |
| "loss": 0.2982, |
| "sft_loss": 0.03279960451181978, |
| "step": 250, |
| "total_loss": 0.03438955684650864, |
| "value_loss": 0.01589952201466076, |
| "value_loss_search": 0.04750958961667493, |
| "value_loss_thought": 0.07968658691388555 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 0.97678387780499, |
| "learning_rate": 3.6419306830699275e-06, |
| "loss": 0.2545, |
| "sft_loss": 0.031023622630164026, |
| "step": 255, |
| "total_loss": 0.03264989823801443, |
| "value_loss": 0.016262755844218192, |
| "value_loss_search": 0.05106269954121671, |
| "value_loss_thought": 0.07903934685018613 |
| }, |
| { |
| "epoch": 6.2, |
| "grad_norm": 0.8822926753721482, |
| "learning_rate": 3.578306975977459e-06, |
| "loss": 0.2328, |
| "sft_loss": 0.024921502592042087, |
| "step": 260, |
| "total_loss": 0.026061263032170247, |
| "value_loss": 0.011397602935903705, |
| "value_loss_search": 0.03485325619985815, |
| "value_loss_thought": 0.05632756747363601 |
| }, |
| { |
| "epoch": 6.32, |
| "grad_norm": 0.9577185704540809, |
| "learning_rate": 3.513813302032485e-06, |
| "loss": 0.232, |
| "sft_loss": 0.030204204528126867, |
| "step": 265, |
| "total_loss": 0.03123898759331496, |
| "value_loss": 0.010347830413957126, |
| "value_loss_search": 0.03173550037899986, |
| "value_loss_thought": 0.05104714238696033 |
| }, |
| { |
| "epoch": 6.44, |
| "grad_norm": 1.0377123471406164, |
| "learning_rate": 3.448501694060332e-06, |
| "loss": 0.2229, |
| "sft_loss": 0.02486703696195036, |
| "step": 270, |
| "total_loss": 0.0260224458801531, |
| "value_loss": 0.011554089462151751, |
| "value_loss_search": 0.03946099675667938, |
| "value_loss_thought": 0.05297171908459859 |
| }, |
| { |
| "epoch": 6.56, |
| "grad_norm": 0.9525897324775634, |
| "learning_rate": 3.38242484478699e-06, |
| "loss": 0.2427, |
| "sft_loss": 0.028131387650500984, |
| "step": 275, |
| "total_loss": 0.02923572239851637, |
| "value_loss": 0.011043349133251468, |
| "value_loss_search": 0.03660392903257161, |
| "value_loss_thought": 0.051742863486288114 |
| }, |
| { |
| "epoch": 6.68, |
| "grad_norm": 0.8743581706882085, |
| "learning_rate": 3.315636064327174e-06, |
| "loss": 0.2321, |
| "sft_loss": 0.028024015191476792, |
| "step": 280, |
| "total_loss": 0.02930470963474363, |
| "value_loss": 0.012806941750750412, |
| "value_loss_search": 0.04088172500487417, |
| "value_loss_thought": 0.061573809065157546 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.8925599129064181, |
| "learning_rate": 3.248189237174273e-06, |
| "loss": 0.2333, |
| "sft_loss": 0.028793911496177316, |
| "step": 285, |
| "total_loss": 0.030005147381052664, |
| "value_loss": 0.01211236050730804, |
| "value_loss_search": 0.03714065319800284, |
| "value_loss_thought": 0.05975823083135765 |
| }, |
| { |
| "epoch": 6.92, |
| "grad_norm": 0.9363754532616637, |
| "learning_rate": 3.1801387787269043e-06, |
| "loss": 0.2354, |
| "sft_loss": 0.03100782575784251, |
| "step": 290, |
| "total_loss": 0.032353814740054075, |
| "value_loss": 0.013459890354715754, |
| "value_loss_search": 0.04140264603483956, |
| "value_loss_thought": 0.0662764773296658 |
| }, |
| { |
| "epoch": 7.03, |
| "grad_norm": 0.9366809408523523, |
| "learning_rate": 3.1115395913871355e-06, |
| "loss": 0.2125, |
| "sft_loss": 0.026877322501968594, |
| "step": 295, |
| "total_loss": 0.028246402372678857, |
| "value_loss": 0.013690798578318208, |
| "value_loss_search": 0.03995930703094928, |
| "value_loss_thought": 0.06956708127399906 |
| }, |
| { |
| "epoch": 7.15, |
| "grad_norm": 1.119490178115649, |
| "learning_rate": 3.0424470202657953e-06, |
| "loss": 0.1853, |
| "sft_loss": 0.02365317102521658, |
| "step": 300, |
| "total_loss": 0.02459036890468269, |
| "value_loss": 0.009371978462149854, |
| "value_loss_search": 0.028495647068484686, |
| "value_loss_thought": 0.0464801803114824 |
| }, |
| { |
| "epoch": 7.27, |
| "grad_norm": 1.1048820754695519, |
| "learning_rate": 2.972916808530619e-06, |
| "loss": 0.1726, |
| "sft_loss": 0.019329519721213727, |
| "step": 305, |
| "total_loss": 0.020249521383448155, |
| "value_loss": 0.00920001688064076, |
| "value_loss_search": 0.03524856393923983, |
| "value_loss_thought": 0.03835157056746539 |
| }, |
| { |
| "epoch": 7.39, |
| "grad_norm": 1.009889999878668, |
| "learning_rate": 2.903005052433234e-06, |
| "loss": 0.182, |
| "sft_loss": 0.020777616742998363, |
| "step": 310, |
| "total_loss": 0.021737158278847347, |
| "value_loss": 0.009595414497016463, |
| "value_loss_search": 0.031585555197671054, |
| "value_loss_thought": 0.0451777604612289 |
| }, |
| { |
| "epoch": 7.51, |
| "grad_norm": 1.2183303716757812, |
| "learning_rate": 2.832768156051293e-06, |
| "loss": 0.1824, |
| "sft_loss": 0.021867655974347144, |
| "step": 315, |
| "total_loss": 0.0228094671114377, |
| "value_loss": 0.009418110789556521, |
| "value_loss_search": 0.028446125249320175, |
| "value_loss_thought": 0.04689876097254455 |
| }, |
| { |
| "epoch": 7.63, |
| "grad_norm": 0.9682548506977025, |
| "learning_rate": 2.7622627857822453e-06, |
| "loss": 0.1794, |
| "sft_loss": 0.02051793959690258, |
| "step": 320, |
| "total_loss": 0.021331546243163756, |
| "value_loss": 0.008136065979488193, |
| "value_loss_search": 0.028256760543445125, |
| "value_loss_thought": 0.03683176791091682 |
| }, |
| { |
| "epoch": 7.75, |
| "grad_norm": 1.008229209327857, |
| "learning_rate": 2.691545824625483e-06, |
| "loss": 0.1853, |
| "sft_loss": 0.02085896288044751, |
| "step": 325, |
| "total_loss": 0.021939672244116082, |
| "value_loss": 0.01080709469388239, |
| "value_loss_search": 0.02821317232446745, |
| "value_loss_thought": 0.05824358468817081 |
| }, |
| { |
| "epoch": 7.87, |
| "grad_norm": 0.9801384650684389, |
| "learning_rate": 2.620674326289725e-06, |
| "loss": 0.1792, |
| "sft_loss": 0.021865303337108344, |
| "step": 330, |
| "total_loss": 0.0226608935457989, |
| "value_loss": 0.007955901384411846, |
| "value_loss_search": 0.02793408685174654, |
| "value_loss_thought": 0.03571312382337055 |
| }, |
| { |
| "epoch": 7.99, |
| "grad_norm": 0.9957117603261937, |
| "learning_rate": 2.5497054691626754e-06, |
| "loss": 0.1838, |
| "sft_loss": 0.023318158509209753, |
| "step": 335, |
| "total_loss": 0.024168919044313952, |
| "value_loss": 0.008507605516933836, |
| "value_loss_search": 0.026497727730020414, |
| "value_loss_thought": 0.04156311670230935 |
| }, |
| { |
| "epoch": 8.11, |
| "grad_norm": 0.9806341509152543, |
| "learning_rate": 2.478696510180105e-06, |
| "loss": 0.149, |
| "sft_loss": 0.0160996129445266, |
| "step": 340, |
| "total_loss": 0.016797228009818355, |
| "value_loss": 0.006976150353148114, |
| "value_loss_search": 0.023582903065835124, |
| "value_loss_thought": 0.03222629976226017 |
| }, |
| { |
| "epoch": 8.23, |
| "grad_norm": 1.0333112441972616, |
| "learning_rate": 2.4077047386315375e-06, |
| "loss": 0.146, |
| "sft_loss": 0.01726848309626803, |
| "step": 345, |
| "total_loss": 0.01786828497188253, |
| "value_loss": 0.005998019542312249, |
| "value_loss_search": 0.018190549310384087, |
| "value_loss_thought": 0.029793607082683592 |
| }, |
| { |
| "epoch": 8.35, |
| "grad_norm": 1.1009437866195484, |
| "learning_rate": 2.3367874299398587e-06, |
| "loss": 0.1382, |
| "sft_loss": 0.017719452880555764, |
| "step": 350, |
| "total_loss": 0.018400812321306147, |
| "value_loss": 0.006813595366838854, |
| "value_loss_search": 0.022706839033344296, |
| "value_loss_thought": 0.031801923821331 |
| }, |
| { |
| "epoch": 8.46, |
| "grad_norm": 1.1026584990458597, |
| "learning_rate": 2.266001799452094e-06, |
| "loss": 0.1346, |
| "sft_loss": 0.01671770153916441, |
| "step": 355, |
| "total_loss": 0.017422528978204354, |
| "value_loss": 0.007048274046974256, |
| "value_loss_search": 0.023480392742203548, |
| "value_loss_thought": 0.032905799969739746 |
| }, |
| { |
| "epoch": 8.58, |
| "grad_norm": 0.9942201314268132, |
| "learning_rate": 2.195404956278663e-06, |
| "loss": 0.1399, |
| "sft_loss": 0.01536920148646459, |
| "step": 360, |
| "total_loss": 0.01604004146747684, |
| "value_loss": 0.0067083996378642045, |
| "value_loss_search": 0.02136720167036401, |
| "value_loss_thought": 0.03229999518080149 |
| }, |
| { |
| "epoch": 8.7, |
| "grad_norm": 1.0200716562222094, |
| "learning_rate": 2.125053857218346e-06, |
| "loss": 0.1367, |
| "sft_loss": 0.01647848271531984, |
| "step": 365, |
| "total_loss": 0.017283534408488777, |
| "value_loss": 0.008050516255025286, |
| "value_loss_search": 0.025714086863445118, |
| "value_loss_thought": 0.038690043007954956 |
| }, |
| { |
| "epoch": 8.82, |
| "grad_norm": 0.9746017587073934, |
| "learning_rate": 2.055005260806125e-06, |
| "loss": 0.1386, |
| "sft_loss": 0.016943779814755543, |
| "step": 370, |
| "total_loss": 0.017788732309236367, |
| "value_loss": 0.008449525324977003, |
| "value_loss_search": 0.027701811920269392, |
| "value_loss_thought": 0.03989439076831332 |
| }, |
| { |
| "epoch": 8.94, |
| "grad_norm": 1.013640616346488, |
| "learning_rate": 1.9853156815209955e-06, |
| "loss": 0.1376, |
| "sft_loss": 0.01664973153383471, |
| "step": 375, |
| "total_loss": 0.017237521769857267, |
| "value_loss": 0.005877902866632212, |
| "value_loss_search": 0.020167693262919784, |
| "value_loss_thought": 0.026855529422755355 |
| }, |
| { |
| "epoch": 9.06, |
| "grad_norm": 0.8410872065316937, |
| "learning_rate": 1.9160413441906667e-06, |
| "loss": 0.1288, |
| "sft_loss": 0.01438085613772273, |
| "step": 380, |
| "total_loss": 0.0149352347310014, |
| "value_loss": 0.005543786693306174, |
| "value_loss_search": 0.021038004008005373, |
| "value_loss_thought": 0.02331228948896751 |
| }, |
| { |
| "epoch": 9.18, |
| "grad_norm": 1.1267722685163901, |
| "learning_rate": 1.8472381386299597e-06, |
| "loss": 0.1118, |
| "sft_loss": 0.013145256665302441, |
| "step": 385, |
| "total_loss": 0.013753409318451305, |
| "value_loss": 0.0060815261575044135, |
| "value_loss_search": 0.018587293970631434, |
| "value_loss_thought": 0.030064915464026853 |
| }, |
| { |
| "epoch": 9.3, |
| "grad_norm": 1.0416216404779595, |
| "learning_rate": 1.7789615745494842e-06, |
| "loss": 0.1081, |
| "sft_loss": 0.011737877724226565, |
| "step": 390, |
| "total_loss": 0.012277404680207837, |
| "value_loss": 0.005395268987922463, |
| "value_loss_search": 0.014393719820509432, |
| "value_loss_thought": 0.028768431980279275 |
| }, |
| { |
| "epoch": 9.42, |
| "grad_norm": 1.0705185902916405, |
| "learning_rate": 1.7112667367709784e-06, |
| "loss": 0.1076, |
| "sft_loss": 0.011471840902231634, |
| "step": 395, |
| "total_loss": 0.012079083354365139, |
| "value_loss": 0.006072424399462761, |
| "value_loss_search": 0.01874679435568396, |
| "value_loss_thought": 0.029832600799272767 |
| }, |
| { |
| "epoch": 9.54, |
| "grad_norm": 0.9769935708025238, |
| "learning_rate": 1.644208240785454e-06, |
| "loss": 0.1092, |
| "sft_loss": 0.012640924844890832, |
| "step": 400, |
| "total_loss": 0.013220903444744182, |
| "value_loss": 0.005799786370334914, |
| "value_loss_search": 0.017371999034367036, |
| "value_loss_thought": 0.02902629199961666 |
| }, |
| { |
| "epoch": 9.66, |
| "grad_norm": 0.8732612511216603, |
| "learning_rate": 1.5778401886899808e-06, |
| "loss": 0.1101, |
| "sft_loss": 0.011417516821529717, |
| "step": 405, |
| "total_loss": 0.011904582896204375, |
| "value_loss": 0.004870661003951682, |
| "value_loss_search": 0.017166581230412703, |
| "value_loss_thought": 0.021798706852132456 |
| }, |
| { |
| "epoch": 9.78, |
| "grad_norm": 0.9695707300264521, |
| "learning_rate": 1.5122161255386815e-06, |
| "loss": 0.1094, |
| "sft_loss": 0.01428213594481349, |
| "step": 410, |
| "total_loss": 0.014828245776698168, |
| "value_loss": 0.0054610981664154675, |
| "value_loss_search": 0.0202188741764985, |
| "value_loss_thought": 0.02346991123922635 |
| }, |
| { |
| "epoch": 9.9, |
| "grad_norm": 0.8746087026209299, |
| "learning_rate": 1.4473889961431342e-06, |
| "loss": 0.1039, |
| "sft_loss": 0.014117157692089677, |
| "step": 415, |
| "total_loss": 0.01462695918216923, |
| "value_loss": 0.005098015529256372, |
| "value_loss_search": 0.01615281879567192, |
| "value_loss_thought": 0.024631305370348855 |
| }, |
| { |
| "epoch": 10.01, |
| "grad_norm": 0.8874682684740957, |
| "learning_rate": 1.3834111023570557e-06, |
| "loss": 0.1081, |
| "sft_loss": 0.012744372192537412, |
| "step": 420, |
| "total_loss": 0.01325692782020269, |
| "value_loss": 0.005125556160055567, |
| "value_loss_search": 0.019138199927692768, |
| "value_loss_thought": 0.021866249560844153 |
| }, |
| { |
| "epoch": 10.13, |
| "grad_norm": 0.8207501590556279, |
| "learning_rate": 1.320334060879702e-06, |
| "loss": 0.0882, |
| "sft_loss": 0.01002211165614426, |
| "step": 425, |
| "total_loss": 0.010463051758233633, |
| "value_loss": 0.004409400749136694, |
| "value_loss_search": 0.015294310338504147, |
| "value_loss_thought": 0.01998089556072955 |
| }, |
| { |
| "epoch": 10.25, |
| "grad_norm": 0.9646575581607053, |
| "learning_rate": 1.258208761612061e-06, |
| "loss": 0.0881, |
| "sft_loss": 0.01025077059166506, |
| "step": 430, |
| "total_loss": 0.010693130262006889, |
| "value_loss": 0.004423596604101476, |
| "value_loss_search": 0.014729893395269755, |
| "value_loss_thought": 0.02065887938551896 |
| }, |
| { |
| "epoch": 10.37, |
| "grad_norm": 0.81964071156398, |
| "learning_rate": 1.1970853265994008e-06, |
| "loss": 0.0842, |
| "sft_loss": 0.010936628474155441, |
| "step": 435, |
| "total_loss": 0.011318862752523274, |
| "value_loss": 0.0038223424373427404, |
| "value_loss_search": 0.01328829516278347, |
| "value_loss_thought": 0.01729044433013769 |
| }, |
| { |
| "epoch": 10.49, |
| "grad_norm": 0.8384988992049195, |
| "learning_rate": 1.1370130695933317e-06, |
| "loss": 0.0843, |
| "sft_loss": 0.010979409719584509, |
| "step": 440, |
| "total_loss": 0.011374782725033583, |
| "value_loss": 0.003953730190551142, |
| "value_loss_search": 0.013550824504636693, |
| "value_loss_thought": 0.018079017029958778 |
| }, |
| { |
| "epoch": 10.61, |
| "grad_norm": 0.8335588319297724, |
| "learning_rate": 1.07804045626598e-06, |
| "loss": 0.0855, |
| "sft_loss": 0.009162665629992262, |
| "step": 445, |
| "total_loss": 0.009644234287043219, |
| "value_loss": 0.004815686383517459, |
| "value_loss_search": 0.014363445951312314, |
| "value_loss_thought": 0.024162045223056337 |
| }, |
| { |
| "epoch": 10.73, |
| "grad_norm": 0.9120796553660501, |
| "learning_rate": 1.020215065108393e-06, |
| "loss": 0.0872, |
| "sft_loss": 0.011613176169339568, |
| "step": 450, |
| "total_loss": 0.01204290009372926, |
| "value_loss": 0.004297239889274352, |
| "value_loss_search": 0.013511335075600072, |
| "value_loss_thought": 0.020866583984752652 |
| }, |
| { |
| "epoch": 10.85, |
| "grad_norm": 0.9330832254283613, |
| "learning_rate": 9.635835490446993e-07, |
| "loss": 0.0867, |
| "sft_loss": 0.009662232839036732, |
| "step": 455, |
| "total_loss": 0.010051346268846828, |
| "value_loss": 0.00389113405108219, |
| "value_loss_search": 0.013773129394394346, |
| "value_loss_thought": 0.017355942977883388 |
| }, |
| { |
| "epoch": 10.97, |
| "grad_norm": 0.8158067315783223, |
| "learning_rate": 9.081915977930242e-07, |
| "loss": 0.0897, |
| "sft_loss": 0.010187394253443926, |
| "step": 460, |
| "total_loss": 0.010594041376316454, |
| "value_loss": 0.004066470502220909, |
| "value_loss_search": 0.013818205476309232, |
| "value_loss_thought": 0.018713558375020513 |
| }, |
| { |
| "epoch": 11.09, |
| "grad_norm": 0.7146761362099704, |
| "learning_rate": 8.54083901003492e-07, |
| "loss": 0.077, |
| "sft_loss": 0.008773002325324342, |
| "step": 465, |
| "total_loss": 0.009241018544889812, |
| "value_loss": 0.004680162535805721, |
| "value_loss_search": 0.013033845103927888, |
| "value_loss_thought": 0.02440745545027312 |
| }, |
| { |
| "epoch": 11.21, |
| "grad_norm": 0.7385000543408173, |
| "learning_rate": 8.013041122030857e-07, |
| "loss": 0.0729, |
| "sft_loss": 0.008190885756630451, |
| "step": 470, |
| "total_loss": 0.008555836805862782, |
| "value_loss": 0.003649510513059795, |
| "value_loss_search": 0.012406850125989877, |
| "value_loss_thought": 0.016789233975578098 |
| }, |
| { |
| "epoch": 11.33, |
| "grad_norm": 0.7633334519531002, |
| "learning_rate": 7.49894813576437e-07, |
| "loss": 0.0746, |
| "sft_loss": 0.008580005669500679, |
| "step": 475, |
| "total_loss": 0.008921884518167644, |
| "value_loss": 0.003418788455019239, |
| "value_loss_search": 0.010919666264089755, |
| "value_loss_thought": 0.01643064135714667 |
| }, |
| { |
| "epoch": 11.45, |
| "grad_norm": 0.9909650038252632, |
| "learning_rate": 6.998974816109566e-07, |
| "loss": 0.0715, |
| "sft_loss": 0.007525585388066247, |
| "step": 480, |
| "total_loss": 0.007853847992191732, |
| "value_loss": 0.0032826256348926107, |
| "value_loss_search": 0.010486412676982582, |
| "value_loss_thought": 0.01577459230902605 |
| }, |
| { |
| "epoch": 11.56, |
| "grad_norm": 0.8129682389354967, |
| "learning_rate": 6.513524536340412e-07, |
| "loss": 0.0712, |
| "sft_loss": 0.008444644045084715, |
| "step": 485, |
| "total_loss": 0.008825799487021868, |
| "value_loss": 0.0038115545008622575, |
| "value_loss_search": 0.010597189843247179, |
| "value_loss_thought": 0.019895246106898412 |
| }, |
| { |
| "epoch": 11.68, |
| "grad_norm": 0.8775605329407913, |
| "learning_rate": 6.04298895269334e-07, |
| "loss": 0.0703, |
| "sft_loss": 0.008042437641415745, |
| "step": 490, |
| "total_loss": 0.008418237919249805, |
| "value_loss": 0.003758002670656424, |
| "value_loss_search": 0.011610074328200427, |
| "value_loss_thought": 0.018453947085072288 |
| }, |
| { |
| "epoch": 11.8, |
| "grad_norm": 0.8428362204023381, |
| "learning_rate": 5.5877476883831e-07, |
| "loss": 0.072, |
| "sft_loss": 0.00909191146492958, |
| "step": 495, |
| "total_loss": 0.009458713585991064, |
| "value_loss": 0.00366802109128912, |
| "value_loss_search": 0.012498747254721821, |
| "value_loss_thought": 0.016845421569450993 |
| }, |
| { |
| "epoch": 11.92, |
| "grad_norm": 0.778559570796605, |
| "learning_rate": 5.148168027326672e-07, |
| "loss": 0.07, |
| "sft_loss": 0.008272510836832225, |
| "step": 500, |
| "total_loss": 0.008611040847790718, |
| "value_loss": 0.0033853001063107514, |
| "value_loss_search": 0.01196195939264726, |
| "value_loss_thought": 0.015120441405451856 |
| }, |
| { |
| "epoch": 12.04, |
| "grad_norm": 0.6559608657124244, |
| "learning_rate": 4.724604617822429e-07, |
| "loss": 0.069, |
| "sft_loss": 0.008424899209057913, |
| "step": 505, |
| "total_loss": 0.00873491317679509, |
| "value_loss": 0.003100138959416654, |
| "value_loss_search": 0.009996878827223554, |
| "value_loss_thought": 0.014804233009635936 |
| }, |
| { |
| "epoch": 12.16, |
| "grad_norm": 0.6496917858411566, |
| "learning_rate": 4.317399186423574e-07, |
| "loss": 0.0674, |
| "sft_loss": 0.008423441136255861, |
| "step": 510, |
| "total_loss": 0.008760591896862024, |
| "value_loss": 0.003371507487099734, |
| "value_loss_search": 0.009991695114877075, |
| "value_loss_thought": 0.01698036474554101 |
| }, |
| { |
| "epoch": 12.28, |
| "grad_norm": 0.6988165213179325, |
| "learning_rate": 3.926880262236724e-07, |
| "loss": 0.0611, |
| "sft_loss": 0.007937616313574836, |
| "step": 515, |
| "total_loss": 0.008260040113054857, |
| "value_loss": 0.0032242384204437258, |
| "value_loss_search": 0.010769415664253757, |
| "value_loss_thought": 0.015024491625081283 |
| }, |
| { |
| "epoch": 12.4, |
| "grad_norm": 0.7528021544771036, |
| "learning_rate": 3.5533629118680443e-07, |
| "loss": 0.0643, |
| "sft_loss": 0.008419578592292965, |
| "step": 520, |
| "total_loss": 0.008726798217958276, |
| "value_loss": 0.003072196059838461, |
| "value_loss_search": 0.010191632094210945, |
| "value_loss_thought": 0.01438593643833883 |
| }, |
| { |
| "epoch": 12.52, |
| "grad_norm": 0.7891775732477346, |
| "learning_rate": 3.1971484852307694e-07, |
| "loss": 0.0611, |
| "sft_loss": 0.00729263544199057, |
| "step": 525, |
| "total_loss": 0.0076024448298994685, |
| "value_loss": 0.0030980941948655525, |
| "value_loss_search": 0.010019748501508729, |
| "value_loss_thought": 0.014765005164372269 |
| }, |
| { |
| "epoch": 12.64, |
| "grad_norm": 0.6907626742145749, |
| "learning_rate": 2.8585243724192466e-07, |
| "loss": 0.0601, |
| "sft_loss": 0.007823836395982654, |
| "step": 530, |
| "total_loss": 0.008138071943994874, |
| "value_loss": 0.0031423555094079346, |
| "value_loss_search": 0.010012611195270438, |
| "value_loss_thought": 0.015126232872717083 |
| }, |
| { |
| "epoch": 12.76, |
| "grad_norm": 0.7429135356582753, |
| "learning_rate": 2.5377637718455887e-07, |
| "loss": 0.0659, |
| "sft_loss": 0.00822238313849084, |
| "step": 535, |
| "total_loss": 0.008556838914773834, |
| "value_loss": 0.0033445580851548585, |
| "value_loss_search": 0.011346992234757635, |
| "value_loss_thought": 0.01540947253961349 |
| }, |
| { |
| "epoch": 12.88, |
| "grad_norm": 0.7774598016373955, |
| "learning_rate": 2.23512546982603e-07, |
| "loss": 0.0645, |
| "sft_loss": 0.0073554387083277105, |
| "step": 540, |
| "total_loss": 0.007690619602180959, |
| "value_loss": 0.0033518082374939693, |
| "value_loss_search": 0.010812847971101292, |
| "value_loss_thought": 0.016001617997244467 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.9294156718635098, |
| "learning_rate": 1.9508536317948358e-07, |
| "loss": 0.0647, |
| "sft_loss": 0.008502764551667496, |
| "step": 545, |
| "total_loss": 0.008804986921677482, |
| "value_loss": 0.0030222234428947557, |
| "value_loss_search": 0.010061871179641458, |
| "value_loss_thought": 0.014115916420996655 |
| }, |
| { |
| "epoch": 13.11, |
| "grad_norm": 0.6263794814598377, |
| "learning_rate": 1.6851776053141505e-07, |
| "loss": 0.0568, |
| "sft_loss": 0.006252302648499608, |
| "step": 550, |
| "total_loss": 0.006542081882616913, |
| "value_loss": 0.002897792073781602, |
| "value_loss_search": 0.008962227043593885, |
| "value_loss_thought": 0.014220109464076813 |
| }, |
| { |
| "epoch": 13.23, |
| "grad_norm": 0.5596990915451534, |
| "learning_rate": 1.438311735038783e-07, |
| "loss": 0.056, |
| "sft_loss": 0.00756752782908734, |
| "step": 555, |
| "total_loss": 0.007857268174484488, |
| "value_loss": 0.0028974040556931867, |
| "value_loss_search": 0.00920752819874906, |
| "value_loss_thought": 0.013971704215146019 |
| }, |
| { |
| "epoch": 13.35, |
| "grad_norm": 0.6205386318885459, |
| "learning_rate": 1.2104551897851645e-07, |
| "loss": 0.0623, |
| "sft_loss": 0.008255193088552914, |
| "step": 560, |
| "total_loss": 0.008591093667018868, |
| "value_loss": 0.0033590062324947213, |
| "value_loss_search": 0.010873512069520076, |
| "value_loss_thought": 0.01599853788575274 |
| }, |
| { |
| "epoch": 13.47, |
| "grad_norm": 0.61362058421556, |
| "learning_rate": 1.0017918018440182e-07, |
| "loss": 0.0572, |
| "sft_loss": 0.006953836599132046, |
| "step": 565, |
| "total_loss": 0.007285526537361875, |
| "value_loss": 0.003316899410856422, |
| "value_loss_search": 0.010180625680368394, |
| "value_loss_thought": 0.01635456970980158 |
| }, |
| { |
| "epoch": 13.59, |
| "grad_norm": 0.672535364687818, |
| "learning_rate": 8.124899186663816e-08, |
| "loss": 0.0589, |
| "sft_loss": 0.007610985788051039, |
| "step": 570, |
| "total_loss": 0.007938396774943613, |
| "value_loss": 0.003274109651101753, |
| "value_loss_search": 0.009723765701346565, |
| "value_loss_thought": 0.01646911151619861 |
| }, |
| { |
| "epoch": 13.71, |
| "grad_norm": 0.5999988951381204, |
| "learning_rate": 6.42702267042633e-08, |
| "loss": 0.0576, |
| "sft_loss": 0.006899624702055007, |
| "step": 575, |
| "total_loss": 0.0071674311737297105, |
| "value_loss": 0.0026780645912367618, |
| "value_loss_search": 0.007902318794367601, |
| "value_loss_thought": 0.013522197940619662 |
| }, |
| { |
| "epoch": 13.83, |
| "grad_norm": 0.5907898559307843, |
| "learning_rate": 4.9256582988409795e-08, |
| "loss": 0.0592, |
| "sft_loss": 0.008203773334389552, |
| "step": 580, |
| "total_loss": 0.008553845428650675, |
| "value_loss": 0.003500720789452316, |
| "value_loss_search": 0.01093327165726805, |
| "value_loss_thought": 0.017072494645253754 |
| }, |
| { |
| "epoch": 13.95, |
| "grad_norm": 0.5970917111715769, |
| "learning_rate": 3.6220173570667426e-08, |
| "loss": 0.0618, |
| "sft_loss": 0.006927466951310635, |
| "step": 585, |
| "total_loss": 0.0071999496663920585, |
| "value_loss": 0.0027248271569987994, |
| "value_loss_search": 0.009161660186873633, |
| "value_loss_thought": 0.012636957273934967 |
| }, |
| { |
| "epoch": 14.07, |
| "grad_norm": 0.6162881999842252, |
| "learning_rate": 2.5171516090559955e-08, |
| "loss": 0.0589, |
| "sft_loss": 0.007042083889245987, |
| "step": 590, |
| "total_loss": 0.007351792247754929, |
| "value_loss": 0.003097083197644679, |
| "value_loss_search": 0.009042211448104354, |
| "value_loss_thought": 0.015734454097400884 |
| }, |
| { |
| "epoch": 14.19, |
| "grad_norm": 0.6339769311529202, |
| "learning_rate": 1.6119524490022796e-08, |
| "loss": 0.0573, |
| "sft_loss": 0.006979101125034503, |
| "step": 595, |
| "total_loss": 0.007320650927067618, |
| "value_loss": 0.003415498048707377, |
| "value_loss_search": 0.00965559176474926, |
| "value_loss_thought": 0.01766839253687067 |
| }, |
| { |
| "epoch": 14.31, |
| "grad_norm": 0.6862289179498278, |
| "learning_rate": 9.07150182172717e-09, |
| "loss": 0.0565, |
| "sft_loss": 0.006115725569543429, |
| "step": 600, |
| "total_loss": 0.006396417736686999, |
| "value_loss": 0.0028069213738490363, |
| "value_loss_search": 0.0084622835922346, |
| "value_loss_thought": 0.013993087414564797 |
| }, |
| { |
| "epoch": 14.43, |
| "grad_norm": 0.6965073779773722, |
| "learning_rate": 4.033134357054047e-09, |
| "loss": 0.0586, |
| "sft_loss": 0.005957965517882258, |
| "step": 605, |
| "total_loss": 0.0062250764058262575, |
| "value_loss": 0.0026711090009484905, |
| "value_loss_search": 0.009071938186025363, |
| "value_loss_thought": 0.012296933901961894 |
| }, |
| { |
| "epoch": 14.55, |
| "grad_norm": 0.5697750508884436, |
| "learning_rate": 1.0084869984686163e-09, |
| "loss": 0.0568, |
| "sft_loss": 0.007345160163822584, |
| "step": 610, |
| "total_loss": 0.007681814903298801, |
| "value_loss": 0.003366547104815254, |
| "value_loss_search": 0.011690776431350969, |
| "value_loss_thought": 0.015241600351873785 |
| }, |
| { |
| "epoch": 14.66, |
| "grad_norm": 0.585078337634092, |
| "learning_rate": 0.0, |
| "loss": 0.0602, |
| "sft_loss": 0.008140892942901701, |
| "step": 615, |
| "total_loss": 0.00840054679711102, |
| "value_loss": 0.002596538521174807, |
| "value_loss_search": 0.008188147976397886, |
| "value_loss_thought": 0.012584160236292519 |
| }, |
| { |
| "epoch": 14.66, |
| "step": 615, |
| "total_flos": 0.0, |
| "train_loss": 0.3256297383851152, |
| "train_runtime": 44647.5676, |
| "train_samples_per_second": 7.206, |
| "train_steps_per_second": 0.014 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 615, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 15, |
| "save_steps": 350, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|