| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9850746268656716, | |
| "eval_steps": 100, | |
| "global_step": 132, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 823.762321472168, | |
| "epoch": 0.014925373134328358, | |
| "grad_norm": 0.17830021679401398, | |
| "learning_rate": 7.142857142857142e-08, | |
| "loss": 0.0919, | |
| "num_tokens": 865267.0, | |
| "reward": 2.096768334507942, | |
| "reward_std": 0.32721264474093914, | |
| "rewards/accuracy_reward": 0.7645089328289032, | |
| "rewards/format_reward": 0.9899553433060646, | |
| "rewards/log_scaled_reward": 0.3423039447516203, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 465.7957763671875, | |
| "epoch": 0.029850746268656716, | |
| "grad_norm": 0.5716150403022766, | |
| "learning_rate": 1.4285714285714285e-07, | |
| "loss": 0.1518, | |
| "num_tokens": 1414092.0, | |
| "reward": 0.1841081934981048, | |
| "reward_std": 0.6422095634043217, | |
| "rewards/accuracy_reward": 0.23437499813735485, | |
| "rewards/format_reward": 0.05022321571595967, | |
| "rewards/log_scaled_reward": -0.10049002850428224, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 482.07144927978516, | |
| "epoch": 0.04477611940298507, | |
| "grad_norm": 0.532157301902771, | |
| "learning_rate": 2.1428571428571426e-07, | |
| "loss": 0.1335, | |
| "num_tokens": 1992932.0, | |
| "reward": 0.10916334297508001, | |
| "reward_std": 0.6189606413245201, | |
| "rewards/accuracy_reward": 0.20647321455180645, | |
| "rewards/format_reward": 0.053571428754366934, | |
| "rewards/log_scaled_reward": -0.1508813016116619, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 529.5971221923828, | |
| "epoch": 0.05970149253731343, | |
| "grad_norm": 0.42384153604507446, | |
| "learning_rate": 2.857142857142857e-07, | |
| "loss": 0.2014, | |
| "num_tokens": 2596963.0, | |
| "reward": 0.05200400925241411, | |
| "reward_std": 0.582377951592207, | |
| "rewards/accuracy_reward": 0.18191964272409678, | |
| "rewards/format_reward": 0.04464285809081048, | |
| "rewards/log_scaled_reward": -0.17455849051475525, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 490.2690010070801, | |
| "epoch": 0.07462686567164178, | |
| "grad_norm": 0.42382651567459106, | |
| "learning_rate": 3.5714285714285716e-07, | |
| "loss": 0.1441, | |
| "num_tokens": 3173748.0, | |
| "reward": 0.10420671524479985, | |
| "reward_std": 0.5982032977044582, | |
| "rewards/accuracy_reward": 0.18750000279396772, | |
| "rewards/format_reward": 0.07366071455180645, | |
| "rewards/log_scaled_reward": -0.15695400722324848, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 450.69086837768555, | |
| "epoch": 0.08955223880597014, | |
| "grad_norm": 0.5930183529853821, | |
| "learning_rate": 4.285714285714285e-07, | |
| "loss": 0.12, | |
| "num_tokens": 3702055.0, | |
| "reward": 0.16118104895576835, | |
| "reward_std": 0.6403544321656227, | |
| "rewards/accuracy_reward": 0.20982143096625805, | |
| "rewards/format_reward": 0.07142857182770967, | |
| "rewards/log_scaled_reward": -0.12006895546801388, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 469.174129486084, | |
| "epoch": 0.1044776119402985, | |
| "grad_norm": 0.5980175137519836, | |
| "learning_rate": 5e-07, | |
| "loss": 0.1409, | |
| "num_tokens": 4251483.0, | |
| "reward": 0.18134657479822636, | |
| "reward_std": 0.680236354470253, | |
| "rewards/accuracy_reward": 0.20982142724096775, | |
| "rewards/format_reward": 0.09821428637951612, | |
| "rewards/log_scaled_reward": -0.12668914068490267, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 501.63505935668945, | |
| "epoch": 0.11940298507462686, | |
| "grad_norm": 0.7299549579620361, | |
| "learning_rate": 5.714285714285714e-07, | |
| "loss": 0.1195, | |
| "num_tokens": 4818596.0, | |
| "reward": 0.20046980120241642, | |
| "reward_std": 0.67672199010849, | |
| "rewards/accuracy_reward": 0.2120535708963871, | |
| "rewards/format_reward": 0.1183035708963871, | |
| "rewards/log_scaled_reward": -0.1298873471096158, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 495.338191986084, | |
| "epoch": 0.13432835820895522, | |
| "grad_norm": 0.4346173405647278, | |
| "learning_rate": 6.428571428571429e-07, | |
| "loss": 0.1516, | |
| "num_tokens": 5387755.0, | |
| "reward": 0.2097643855959177, | |
| "reward_std": 0.6800287291407585, | |
| "rewards/accuracy_reward": 0.2220982164144516, | |
| "rewards/format_reward": 0.10937500093132257, | |
| "rewards/log_scaled_reward": -0.12170884059742093, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 477.33930587768555, | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 3.780118942260742, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 0.0884, | |
| "num_tokens": 5940091.0, | |
| "reward": 0.31957440078258514, | |
| "reward_std": 0.7821567356586456, | |
| "rewards/accuracy_reward": 0.2500000037252903, | |
| "rewards/format_reward": 0.16183035727590322, | |
| "rewards/log_scaled_reward": -0.092255964060314, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 529.9196739196777, | |
| "epoch": 0.16417910447761194, | |
| "grad_norm": 0.7230793833732605, | |
| "learning_rate": 7.857142857142856e-07, | |
| "loss": 0.0843, | |
| "num_tokens": 6544955.0, | |
| "reward": 0.2673153153154999, | |
| "reward_std": 0.7341821119189262, | |
| "rewards/accuracy_reward": 0.21316964086145163, | |
| "rewards/format_reward": 0.1897321417927742, | |
| "rewards/log_scaled_reward": -0.13558648666366935, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 498.72323989868164, | |
| "epoch": 0.1791044776119403, | |
| "grad_norm": 0.9150818586349487, | |
| "learning_rate": 8.57142857142857e-07, | |
| "loss": 0.1105, | |
| "num_tokens": 7121891.0, | |
| "reward": 0.34613738395273685, | |
| "reward_std": 0.7445657253265381, | |
| "rewards/accuracy_reward": 0.2388392873108387, | |
| "rewards/format_reward": 0.22544642630964518, | |
| "rewards/log_scaled_reward": -0.11814834456890821, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 483.24221420288086, | |
| "epoch": 0.19402985074626866, | |
| "grad_norm": 2.0404834747314453, | |
| "learning_rate": 9.285714285714285e-07, | |
| "loss": 0.0535, | |
| "num_tokens": 7678060.0, | |
| "reward": 0.5936555862426758, | |
| "reward_std": 0.7919039279222488, | |
| "rewards/accuracy_reward": 0.2667410708963871, | |
| "rewards/format_reward": 0.4084821417927742, | |
| "rewards/log_scaled_reward": -0.08156766439788043, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 416.2042541503906, | |
| "epoch": 0.208955223880597, | |
| "grad_norm": 3.1881885528564453, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0238, | |
| "num_tokens": 8174851.0, | |
| "reward": 0.6864497661590576, | |
| "reward_std": 0.8334432542324066, | |
| "rewards/accuracy_reward": 0.2354910708963871, | |
| "rewards/format_reward": 0.5345982126891613, | |
| "rewards/log_scaled_reward": -0.0836395358783193, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 402.8460006713867, | |
| "epoch": 0.22388059701492538, | |
| "grad_norm": 1.9057585000991821, | |
| "learning_rate": 9.998286624877785e-07, | |
| "loss": 0.0362, | |
| "num_tokens": 8650305.0, | |
| "reward": 0.7661240547895432, | |
| "reward_std": 0.8178009614348412, | |
| "rewards/accuracy_reward": 0.22656249813735485, | |
| "rewards/format_reward": 0.6272321343421936, | |
| "rewards/log_scaled_reward": -0.08767060440732166, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 452.4196662902832, | |
| "epoch": 0.23880597014925373, | |
| "grad_norm": 25.345657348632812, | |
| "learning_rate": 9.99314767377287e-07, | |
| "loss": 0.0285, | |
| "num_tokens": 9179041.0, | |
| "reward": 0.875191293656826, | |
| "reward_std": 0.7929042428731918, | |
| "rewards/accuracy_reward": 0.25781249813735485, | |
| "rewards/format_reward": 0.6964285671710968, | |
| "rewards/log_scaled_reward": -0.07904981379397213, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 416.35605239868164, | |
| "epoch": 0.2537313432835821, | |
| "grad_norm": 1.1184340715408325, | |
| "learning_rate": 9.98458666866564e-07, | |
| "loss": 0.0563, | |
| "num_tokens": 9701832.0, | |
| "reward": 0.9071941375732422, | |
| "reward_std": 0.8025857880711555, | |
| "rewards/accuracy_reward": 0.24999999813735485, | |
| "rewards/format_reward": 0.7265625, | |
| "rewards/log_scaled_reward": -0.06936839601257816, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 415.1797065734863, | |
| "epoch": 0.26865671641791045, | |
| "grad_norm": 0.4797329604625702, | |
| "learning_rate": 9.972609476841365e-07, | |
| "loss": 0.1162, | |
| "num_tokens": 10210017.0, | |
| "reward": 0.9835792705416679, | |
| "reward_std": 0.7636988162994385, | |
| "rewards/accuracy_reward": 0.24665178544819355, | |
| "rewards/format_reward": 0.8147321343421936, | |
| "rewards/log_scaled_reward": -0.07780469593126327, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 441.59042739868164, | |
| "epoch": 0.2835820895522388, | |
| "grad_norm": 0.4748988151550293, | |
| "learning_rate": 9.957224306869053e-07, | |
| "loss": 0.0578, | |
| "num_tokens": 10730474.0, | |
| "reward": 1.0904420465230942, | |
| "reward_std": 0.80109953135252, | |
| "rewards/accuracy_reward": 0.300223208963871, | |
| "rewards/format_reward": 0.8158482164144516, | |
| "rewards/log_scaled_reward": -0.025629449490224943, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 426.41743087768555, | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 0.47011756896972656, | |
| "learning_rate": 9.938441702975689e-07, | |
| "loss": 0.0503, | |
| "num_tokens": 11239824.0, | |
| "reward": 1.2500263825058937, | |
| "reward_std": 0.8551982864737511, | |
| "rewards/accuracy_reward": 0.3671875, | |
| "rewards/format_reward": 0.8526785746216774, | |
| "rewards/log_scaled_reward": 0.030160245776642114, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 459.67078399658203, | |
| "epoch": 0.31343283582089554, | |
| "grad_norm": 0.37291309237480164, | |
| "learning_rate": 9.916274537819773e-07, | |
| "loss": 0.0366, | |
| "num_tokens": 11776161.0, | |
| "reward": 1.3390378654003143, | |
| "reward_std": 0.8277674093842506, | |
| "rewards/accuracy_reward": 0.4196428544819355, | |
| "rewards/format_reward": 0.8526785746216774, | |
| "rewards/log_scaled_reward": 0.06671636505052447, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 416.93639755249023, | |
| "epoch": 0.3283582089552239, | |
| "grad_norm": 0.4895838499069214, | |
| "learning_rate": 9.890738003669027e-07, | |
| "loss": 0.0473, | |
| "num_tokens": 12279200.0, | |
| "reward": 1.5237962007522583, | |
| "reward_std": 0.8290813863277435, | |
| "rewards/accuracy_reward": 0.4810267835855484, | |
| "rewards/format_reward": 0.8950892835855484, | |
| "rewards/log_scaled_reward": 0.14768002880737185, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 409.1294860839844, | |
| "epoch": 0.34328358208955223, | |
| "grad_norm": 0.4449058175086975, | |
| "learning_rate": 9.861849601988383e-07, | |
| "loss": 0.0255, | |
| "num_tokens": 12776356.0, | |
| "reward": 1.5605345666408539, | |
| "reward_std": 0.8202421888709068, | |
| "rewards/accuracy_reward": 0.488839291036129, | |
| "rewards/format_reward": 0.9151785746216774, | |
| "rewards/log_scaled_reward": 0.15651662228628993, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 487.7277069091797, | |
| "epoch": 0.3582089552238806, | |
| "grad_norm": 0.33160600066185, | |
| "learning_rate": 9.82962913144534e-07, | |
| "loss": 0.0846, | |
| "num_tokens": 13349480.0, | |
| "reward": 1.634689912199974, | |
| "reward_std": 0.7854569926857948, | |
| "rewards/accuracy_reward": 0.5301339291036129, | |
| "rewards/format_reward": 0.9151785746216774, | |
| "rewards/log_scaled_reward": 0.18937731813639402, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 451.1272506713867, | |
| "epoch": 0.373134328358209, | |
| "grad_norm": 0.3235984444618225, | |
| "learning_rate": 9.794098674340966e-07, | |
| "loss": 0.0424, | |
| "num_tokens": 13868850.0, | |
| "reward": 1.9291264861822128, | |
| "reward_std": 0.6902804151177406, | |
| "rewards/accuracy_reward": 0.6618303433060646, | |
| "rewards/format_reward": 0.9441964328289032, | |
| "rewards/log_scaled_reward": 0.32309958525002, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 516.2678871154785, | |
| "epoch": 0.3880597014925373, | |
| "grad_norm": 2.3632936477661133, | |
| "learning_rate": 9.755282581475767e-07, | |
| "loss": 0.1035, | |
| "num_tokens": 14469026.0, | |
| "reward": 1.7230691313743591, | |
| "reward_std": 0.6944096386432648, | |
| "rewards/accuracy_reward": 0.5714285708963871, | |
| "rewards/format_reward": 0.9285714253783226, | |
| "rewards/log_scaled_reward": 0.22306904755532742, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 504.8772506713867, | |
| "epoch": 0.40298507462686567, | |
| "grad_norm": 0.28064557909965515, | |
| "learning_rate": 9.713207455460892e-07, | |
| "loss": 0.0575, | |
| "num_tokens": 15048084.0, | |
| "reward": 1.8045607656240463, | |
| "reward_std": 0.6639576852321625, | |
| "rewards/accuracy_reward": 0.6127232238650322, | |
| "rewards/format_reward": 0.9274553582072258, | |
| "rewards/log_scaled_reward": 0.2643821220844984, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 491.07703018188477, | |
| "epoch": 0.417910447761194, | |
| "grad_norm": 0.29867812991142273, | |
| "learning_rate": 9.667902132486008e-07, | |
| "loss": 0.0641, | |
| "num_tokens": 15607481.0, | |
| "reward": 1.9283190667629242, | |
| "reward_std": 0.627920113503933, | |
| "rewards/accuracy_reward": 0.6529017835855484, | |
| "rewards/format_reward": 0.9575892761349678, | |
| "rewards/log_scaled_reward": 0.3178279069252312, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 559.9810523986816, | |
| "epoch": 0.43283582089552236, | |
| "grad_norm": 0.2834532558917999, | |
| "learning_rate": 9.619397662556433e-07, | |
| "loss": 0.0735, | |
| "num_tokens": 16248464.0, | |
| "reward": 1.7917230874300003, | |
| "reward_std": 0.527396660298109, | |
| "rewards/accuracy_reward": 0.6037946455180645, | |
| "rewards/format_reward": 0.954241082072258, | |
| "rewards/log_scaled_reward": 0.23368733504321426, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 510.0111885070801, | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 0.25292134284973145, | |
| "learning_rate": 9.567727288213004e-07, | |
| "loss": 0.0778, | |
| "num_tokens": 16828154.0, | |
| "reward": 2.033374920487404, | |
| "reward_std": 0.5300325341522694, | |
| "rewards/accuracy_reward": 0.7008928507566452, | |
| "rewards/format_reward": 0.967633917927742, | |
| "rewards/log_scaled_reward": 0.36484804935753345, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 505.8872985839844, | |
| "epoch": 0.4626865671641791, | |
| "grad_norm": 0.2629943788051605, | |
| "learning_rate": 9.512926421749303e-07, | |
| "loss": 0.0917, | |
| "num_tokens": 17405221.0, | |
| "reward": 1.9693890661001205, | |
| "reward_std": 0.4761252626776695, | |
| "rewards/accuracy_reward": 0.6595982164144516, | |
| "rewards/format_reward": 0.9743303507566452, | |
| "rewards/log_scaled_reward": 0.33546042814850807, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 468.31809997558594, | |
| "epoch": 0.47761194029850745, | |
| "grad_norm": 0.25987353920936584, | |
| "learning_rate": 9.455032620941839e-07, | |
| "loss": 0.116, | |
| "num_tokens": 17953570.0, | |
| "reward": 2.0616614371538162, | |
| "reward_std": 0.49665234982967377, | |
| "rewards/accuracy_reward": 0.7075892761349678, | |
| "rewards/format_reward": 0.962053582072258, | |
| "rewards/log_scaled_reward": 0.39201846718788147, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 504.4788246154785, | |
| "epoch": 0.4925373134328358, | |
| "grad_norm": 0.27474886178970337, | |
| "learning_rate": 9.394085563309826e-07, | |
| "loss": 0.1112, | |
| "num_tokens": 18531095.0, | |
| "reward": 1.9522841572761536, | |
| "reward_std": 0.5375584103167057, | |
| "rewards/accuracy_reward": 0.6607142835855484, | |
| "rewards/format_reward": 0.965401791036129, | |
| "rewards/log_scaled_reward": 0.32616803981363773, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 498.3616371154785, | |
| "epoch": 0.5074626865671642, | |
| "grad_norm": 2.524815320968628, | |
| "learning_rate": 9.330127018922193e-07, | |
| "loss": 0.1055, | |
| "num_tokens": 19108059.0, | |
| "reward": 1.9061091989278793, | |
| "reward_std": 0.5573387667536736, | |
| "rewards/accuracy_reward": 0.6339285708963871, | |
| "rewards/format_reward": 0.9642857164144516, | |
| "rewards/log_scaled_reward": 0.30789486039429903, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 444.20873260498047, | |
| "epoch": 0.5223880597014925, | |
| "grad_norm": 0.3097524046897888, | |
| "learning_rate": 9.26320082177046e-07, | |
| "loss": 0.1342, | |
| "num_tokens": 19646062.0, | |
| "reward": 1.9290964603424072, | |
| "reward_std": 0.5330292023718357, | |
| "rewards/accuracy_reward": 0.6272321492433548, | |
| "rewards/format_reward": 0.9765625, | |
| "rewards/log_scaled_reward": 0.325301731005311, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 420.0848388671875, | |
| "epoch": 0.5373134328358209, | |
| "grad_norm": 0.3493054211139679, | |
| "learning_rate": 9.19335283972712e-07, | |
| "loss": 0.1097, | |
| "num_tokens": 20165994.0, | |
| "reward": 1.937290906906128, | |
| "reward_std": 0.5374783836305141, | |
| "rewards/accuracy_reward": 0.621651791036129, | |
| "rewards/format_reward": 0.9732142835855484, | |
| "rewards/log_scaled_reward": 0.34242471773177385, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 367.64064025878906, | |
| "epoch": 0.5522388059701493, | |
| "grad_norm": 0.39517220854759216, | |
| "learning_rate": 9.120630943110077e-07, | |
| "loss": 0.1309, | |
| "num_tokens": 20622824.0, | |
| "reward": 1.9518826305866241, | |
| "reward_std": 0.5057090371847153, | |
| "rewards/accuracy_reward": 0.6294642947614193, | |
| "rewards/format_reward": 0.9765625074505806, | |
| "rewards/log_scaled_reward": 0.34585576388053596, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 344.1038112640381, | |
| "epoch": 0.5671641791044776, | |
| "grad_norm": 1.3338432312011719, | |
| "learning_rate": 9.045084971874737e-07, | |
| "loss": 0.1333, | |
| "num_tokens": 21076925.0, | |
| "reward": 1.9407142996788025, | |
| "reward_std": 0.5814780332148075, | |
| "rewards/accuracy_reward": 0.6127232126891613, | |
| "rewards/format_reward": 0.9709821417927742, | |
| "rewards/log_scaled_reward": 0.35700881760567427, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 277.19309425354004, | |
| "epoch": 0.582089552238806, | |
| "grad_norm": 0.4697703421115875, | |
| "learning_rate": 8.966766701456176e-07, | |
| "loss": 0.1089, | |
| "num_tokens": 21451954.0, | |
| "reward": 1.8939976394176483, | |
| "reward_std": 0.531686820089817, | |
| "rewards/accuracy_reward": 0.5703125037252903, | |
| "rewards/format_reward": 0.984375, | |
| "rewards/log_scaled_reward": 0.33931003510951996, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 292.2109489440918, | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.42749401926994324, | |
| "learning_rate": 8.885729807284854e-07, | |
| "loss": 0.1051, | |
| "num_tokens": 21837471.0, | |
| "reward": 1.7810039222240448, | |
| "reward_std": 0.5695139020681381, | |
| "rewards/accuracy_reward": 0.5212053544819355, | |
| "rewards/format_reward": 0.9832589253783226, | |
| "rewards/log_scaled_reward": 0.2765395335154608, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 227.70425033569336, | |
| "epoch": 0.6119402985074627, | |
| "grad_norm": 0.8301727771759033, | |
| "learning_rate": 8.802029828000155e-07, | |
| "loss": 0.1365, | |
| "num_tokens": 22182102.0, | |
| "reward": 1.7540639638900757, | |
| "reward_std": 0.504084050655365, | |
| "rewards/accuracy_reward": 0.4899553582072258, | |
| "rewards/format_reward": 0.9754464253783226, | |
| "rewards/log_scaled_reward": 0.2886621206998825, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.12500953674316, | |
| "epoch": 0.6268656716417911, | |
| "grad_norm": 0.6249210238456726, | |
| "learning_rate": 8.71572412738697e-07, | |
| "loss": 0.1336, | |
| "num_tokens": 22471350.0, | |
| "reward": 1.9936908185482025, | |
| "reward_std": 0.6450418382883072, | |
| "rewards/accuracy_reward": 0.5926339402794838, | |
| "rewards/format_reward": 0.9888392761349678, | |
| "rewards/log_scaled_reward": 0.41221751645207405, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 169.09710693359375, | |
| "epoch": 0.6417910447761194, | |
| "grad_norm": 0.7566676139831543, | |
| "learning_rate": 8.626871855061437e-07, | |
| "loss": 0.1662, | |
| "num_tokens": 22758477.0, | |
| "reward": 1.8480805903673172, | |
| "reward_std": 0.5597276613116264, | |
| "rewards/accuracy_reward": 0.511160708963871, | |
| "rewards/format_reward": 0.9944196343421936, | |
| "rewards/log_scaled_reward": 0.3425001185387373, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 152.94978141784668, | |
| "epoch": 0.6567164179104478, | |
| "grad_norm": 0.9348928332328796, | |
| "learning_rate": 8.535533905932737e-07, | |
| "loss": 0.1287, | |
| "num_tokens": 23022432.0, | |
| "reward": 1.8545437455177307, | |
| "reward_std": 0.5482046529650688, | |
| "rewards/accuracy_reward": 0.5078125037252903, | |
| "rewards/format_reward": 0.9921874925494194, | |
| "rewards/log_scaled_reward": 0.35454366356134415, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 128.6517915725708, | |
| "epoch": 0.6716417910447762, | |
| "grad_norm": 1.1114394664764404, | |
| "learning_rate": 8.441772878468769e-07, | |
| "loss": 0.1287, | |
| "num_tokens": 23263912.0, | |
| "reward": 1.7271955758333206, | |
| "reward_std": 0.5110182501375675, | |
| "rewards/accuracy_reward": 0.4330357164144516, | |
| "rewards/format_reward": 0.9977678507566452, | |
| "rewards/log_scaled_reward": 0.29639193043112755, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 143.97545337677002, | |
| "epoch": 0.6865671641791045, | |
| "grad_norm": 1.1403673887252808, | |
| "learning_rate": 8.34565303179429e-07, | |
| "loss": 0.1742, | |
| "num_tokens": 23526706.0, | |
| "reward": 1.6235045939683914, | |
| "reward_std": 0.5148132182657719, | |
| "rewards/accuracy_reward": 0.386160708963871, | |
| "rewards/format_reward": 0.987723208963871, | |
| "rewards/log_scaled_reward": 0.24962060060352087, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 104.42187976837158, | |
| "epoch": 0.7014925373134329, | |
| "grad_norm": 1.6832914352416992, | |
| "learning_rate": 8.247240241650917e-07, | |
| "loss": 0.1296, | |
| "num_tokens": 23736324.0, | |
| "reward": 1.690138816833496, | |
| "reward_std": 0.5133109800517559, | |
| "rewards/accuracy_reward": 0.4051339328289032, | |
| "rewards/format_reward": 0.9933035671710968, | |
| "rewards/log_scaled_reward": 0.29170125164091587, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 92.65848731994629, | |
| "epoch": 0.7164179104477612, | |
| "grad_norm": 1.6501548290252686, | |
| "learning_rate": 8.146601955249187e-07, | |
| "loss": 0.1411, | |
| "num_tokens": 23946162.0, | |
| "reward": 1.6304273456335068, | |
| "reward_std": 0.49451132118701935, | |
| "rewards/accuracy_reward": 0.36941964365541935, | |
| "rewards/format_reward": 0.995535708963871, | |
| "rewards/log_scaled_reward": 0.26547193340957165, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 78.72098636627197, | |
| "epoch": 0.7313432835820896, | |
| "grad_norm": 2.571906566619873, | |
| "learning_rate": 8.043807145043603e-07, | |
| "loss": 0.1793, | |
| "num_tokens": 24153096.0, | |
| "reward": 1.581173524260521, | |
| "reward_std": 0.43424950167536736, | |
| "rewards/accuracy_reward": 0.33748282864689827, | |
| "rewards/format_reward": 0.9966517835855484, | |
| "rewards/log_scaled_reward": 0.24970022030174732, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 64.70759201049805, | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 2.7052981853485107, | |
| "learning_rate": 7.938926261462365e-07, | |
| "loss": 0.1791, | |
| "num_tokens": 24349226.0, | |
| "reward": 1.5854334235191345, | |
| "reward_std": 0.3977060168981552, | |
| "rewards/accuracy_reward": 0.33593750186264515, | |
| "rewards/format_reward": 0.9955357015132904, | |
| "rewards/log_scaled_reward": 0.2539601270109415, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 46.40067148208618, | |
| "epoch": 0.7611940298507462, | |
| "grad_norm": 6.834611415863037, | |
| "learning_rate": 7.832031184624164e-07, | |
| "loss": 0.1055, | |
| "num_tokens": 24528585.0, | |
| "reward": 1.5368833392858505, | |
| "reward_std": 0.47740813344717026, | |
| "rewards/accuracy_reward": 0.30133928544819355, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.2366600539535284, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 38.36384057998657, | |
| "epoch": 0.7761194029850746, | |
| "grad_norm": 4.969581127166748, | |
| "learning_rate": 7.723195175075135e-07, | |
| "loss": 0.1206, | |
| "num_tokens": 24691607.0, | |
| "reward": 1.415985830128193, | |
| "reward_std": 0.29450324457138777, | |
| "rewards/accuracy_reward": 0.2377232169965282, | |
| "rewards/format_reward": 0.9977678507566452, | |
| "rewards/log_scaled_reward": 0.18049467215314507, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 33.99218940734863, | |
| "epoch": 0.7910447761194029, | |
| "grad_norm": 4.195903778076172, | |
| "learning_rate": 7.612492823579744e-07, | |
| "loss": 0.145, | |
| "num_tokens": 24849736.0, | |
| "reward": 1.5035328567028046, | |
| "reward_std": 0.34667503647506237, | |
| "rewards/accuracy_reward": 0.27790178544819355, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.2267470918595791, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 26.474331378936768, | |
| "epoch": 0.8059701492537313, | |
| "grad_norm": 24.98171615600586, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.0613, | |
| "num_tokens": 25008289.0, | |
| "reward": 1.5673803389072418, | |
| "reward_std": 0.28740744665265083, | |
| "rewards/accuracy_reward": 0.3069196417927742, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": 0.26046060863882303, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 25.195313692092896, | |
| "epoch": 0.8208955223880597, | |
| "grad_norm": 5.9815216064453125, | |
| "learning_rate": 7.385793801298042e-07, | |
| "loss": 0.0376, | |
| "num_tokens": 25169984.0, | |
| "reward": 1.4668240398168564, | |
| "reward_std": 0.3536365833133459, | |
| "rewards/accuracy_reward": 0.25369161926209927, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": 0.2157079027965665, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 23.91294765472412, | |
| "epoch": 0.835820895522388, | |
| "grad_norm": 7.0327372550964355, | |
| "learning_rate": 7.269952498697734e-07, | |
| "loss": 0.0386, | |
| "num_tokens": 25322466.0, | |
| "reward": 1.4718168079853058, | |
| "reward_std": 0.3211175389587879, | |
| "rewards/accuracy_reward": 0.2578124962747097, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": 0.21400425024330616, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 23.36049222946167, | |
| "epoch": 0.8507462686567164, | |
| "grad_norm": 5.700937271118164, | |
| "learning_rate": 7.152555484041475e-07, | |
| "loss": 0.0265, | |
| "num_tokens": 25466397.0, | |
| "reward": 1.5502240508794785, | |
| "reward_std": 0.2749287262558937, | |
| "rewards/accuracy_reward": 0.296875, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.25446509197354317, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 23.949777364730835, | |
| "epoch": 0.8656716417910447, | |
| "grad_norm": 5.249198913574219, | |
| "learning_rate": 7.033683215379002e-07, | |
| "loss": 0.023, | |
| "num_tokens": 25613824.0, | |
| "reward": 1.5308541655540466, | |
| "reward_std": 0.26417338382452726, | |
| "rewards/accuracy_reward": 0.28794642724096775, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.2440237421542406, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 23.375000953674316, | |
| "epoch": 0.8805970149253731, | |
| "grad_norm": 8.684850692749023, | |
| "learning_rate": 6.913417161825449e-07, | |
| "loss": 0.0289, | |
| "num_tokens": 25760176.0, | |
| "reward": 1.566646233201027, | |
| "reward_std": 0.22551130689680576, | |
| "rewards/accuracy_reward": 0.30691963620483875, | |
| "rewards/format_reward": 0.9966517761349678, | |
| "rewards/log_scaled_reward": 0.26307476311922073, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 21.989956378936768, | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 34.08607864379883, | |
| "learning_rate": 6.7918397477265e-07, | |
| "loss": 0.0144, | |
| "num_tokens": 25914911.0, | |
| "reward": 1.628076210618019, | |
| "reward_std": 0.3200679961591959, | |
| "rewards/accuracy_reward": 0.33705356903374195, | |
| "rewards/format_reward": 0.9966517761349678, | |
| "rewards/log_scaled_reward": 0.2943707942031324, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 22.574777841567993, | |
| "epoch": 0.9104477611940298, | |
| "grad_norm": 5.27999210357666, | |
| "learning_rate": 6.669034296168854e-07, | |
| "loss": 0.0217, | |
| "num_tokens": 26077314.0, | |
| "reward": 1.6822472661733627, | |
| "reward_std": 0.2845423389226198, | |
| "rewards/accuracy_reward": 0.3649553582072258, | |
| "rewards/format_reward": 0.995535708963871, | |
| "rewards/log_scaled_reward": 0.3217560853809118, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 22.198662042617798, | |
| "epoch": 0.9253731343283582, | |
| "grad_norm": 4.5566277503967285, | |
| "learning_rate": 6.545084971874736e-07, | |
| "loss": 0.0064, | |
| "num_tokens": 26222620.0, | |
| "reward": 1.5729791224002838, | |
| "reward_std": 0.22709419997408986, | |
| "rewards/accuracy_reward": 0.31138393096625805, | |
| "rewards/format_reward": 0.9921874925494194, | |
| "rewards/log_scaled_reward": 0.2694076579064131, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 21.32924246788025, | |
| "epoch": 0.9402985074626866, | |
| "grad_norm": 3.140645980834961, | |
| "learning_rate": 6.420076723519614e-07, | |
| "loss": 0.0068, | |
| "num_tokens": 26371243.0, | |
| "reward": 1.6745910048484802, | |
| "reward_std": 0.20502757839858532, | |
| "rewards/accuracy_reward": 0.3582589291036129, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": 0.3163319919258356, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 21.34709930419922, | |
| "epoch": 0.9552238805970149, | |
| "grad_norm": 4.071537494659424, | |
| "learning_rate": 6.294095225512604e-07, | |
| "loss": 0.0055, | |
| "num_tokens": 26527050.0, | |
| "reward": 1.5537814646959305, | |
| "reward_std": 0.30234235525131226, | |
| "rewards/accuracy_reward": 0.2979910681024194, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": 0.25579037982970476, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 21.24776864051819, | |
| "epoch": 0.9701492537313433, | |
| "grad_norm": 4.297541618347168, | |
| "learning_rate": 6.167226819279527e-07, | |
| "loss": 0.0114, | |
| "num_tokens": 26679280.0, | |
| "reward": 1.5483618080615997, | |
| "reward_std": 0.1530774086713791, | |
| "rewards/accuracy_reward": 0.2968749953433871, | |
| "rewards/format_reward": 0.9966517835855484, | |
| "rewards/log_scaled_reward": 0.25483495742082596, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 21.180555820465088, | |
| "epoch": 0.9850746268656716, | |
| "grad_norm": 3.6126675605773926, | |
| "learning_rate": 6.039558454088795e-07, | |
| "loss": 0.009, | |
| "num_tokens": 26828211.0, | |
| "reward": 1.6490006893873215, | |
| "reward_std": 0.29192496836185455, | |
| "rewards/accuracy_reward": 0.3459821417927742, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.304134588688612, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 20.77455449104309, | |
| "epoch": 1.0149253731343284, | |
| "grad_norm": 4.067701816558838, | |
| "learning_rate": 5.911177627460738e-07, | |
| "loss": 0.0094, | |
| "num_tokens": 26965209.0, | |
| "reward": 1.7030873149633408, | |
| "reward_std": 0.2512226551771164, | |
| "rewards/accuracy_reward": 0.37388391979038715, | |
| "rewards/format_reward": 0.9966517761349678, | |
| "rewards/log_scaled_reward": 0.3325514607131481, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 20.35267925262451, | |
| "epoch": 1.0298507462686568, | |
| "grad_norm": 3.4311537742614746, | |
| "learning_rate": 5.782172325201155e-07, | |
| "loss": 0.0126, | |
| "num_tokens": 27116397.0, | |
| "reward": 1.5380910784006119, | |
| "reward_std": 0.20529233757406473, | |
| "rewards/accuracy_reward": 0.2901785708963871, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.24902847222983837, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 20.400670528411865, | |
| "epoch": 1.044776119402985, | |
| "grad_norm": 4.965053081512451, | |
| "learning_rate": 5.652630961100258e-07, | |
| "loss": 0.0182, | |
| "num_tokens": 27260012.0, | |
| "reward": 1.4891109764575958, | |
| "reward_std": 0.2253081511007622, | |
| "rewards/accuracy_reward": 0.26562499813735485, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": 0.22460200637578964, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 19.54799175262451, | |
| "epoch": 1.0597014925373134, | |
| "grad_norm": 9.666987419128418, | |
| "learning_rate": 5.522642316338268e-07, | |
| "loss": 0.0249, | |
| "num_tokens": 27404823.0, | |
| "reward": 1.5430727303028107, | |
| "reward_std": 0.2664187829941511, | |
| "rewards/accuracy_reward": 0.2924107164144516, | |
| "rewards/format_reward": 0.9977678507566452, | |
| "rewards/log_scaled_reward": 0.25289412308484316, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 18.02120590209961, | |
| "epoch": 1.0746268656716418, | |
| "grad_norm": 15.713820457458496, | |
| "learning_rate": 5.392295478639225e-07, | |
| "loss": 0.0167, | |
| "num_tokens": 27555962.0, | |
| "reward": 1.1339266449213028, | |
| "reward_std": 0.34662946686148643, | |
| "rewards/accuracy_reward": 0.1026785708963871, | |
| "rewards/format_reward": 0.9676339328289032, | |
| "rewards/log_scaled_reward": 0.06361408122756984, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 19.39732265472412, | |
| "epoch": 1.0895522388059702, | |
| "grad_norm": 21.441844940185547, | |
| "learning_rate": 5.26167978121472e-07, | |
| "loss": 0.0653, | |
| "num_tokens": 27696134.0, | |
| "reward": 1.0387937128543854, | |
| "reward_std": 0.14305981155484915, | |
| "rewards/accuracy_reward": 0.04464285704307258, | |
| "rewards/format_reward": 0.9866071417927742, | |
| "rewards/log_scaled_reward": 0.0075436777296999935, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.95089340209961, | |
| "epoch": 1.1044776119402986, | |
| "grad_norm": 12.856225967407227, | |
| "learning_rate": 5.130884741539366e-07, | |
| "loss": 0.018, | |
| "num_tokens": 27843106.0, | |
| "reward": 0.9690398126840591, | |
| "reward_std": 0.01615892370318761, | |
| "rewards/accuracy_reward": 0.0022321429569274187, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": -0.03207633784040809, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.233259677886963, | |
| "epoch": 1.1194029850746268, | |
| "grad_norm": 1.9851570129394531, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0015, | |
| "num_tokens": 27969987.0, | |
| "reward": 0.9649922177195549, | |
| "reward_std": 0.0031741062664423225, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": -0.03389178216457367, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.01897418498993, | |
| "epoch": 1.1343283582089552, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.869115258460634e-07, | |
| "loss": 0.0, | |
| "num_tokens": 28107828.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 14.996652722358704, | |
| "epoch": 1.1492537313432836, | |
| "grad_norm": 1.0442914962768555, | |
| "learning_rate": 4.7383202187852804e-07, | |
| "loss": -0.0006, | |
| "num_tokens": 28248081.0, | |
| "reward": 0.9638938158750534, | |
| "reward_std": 0.006288029253482819, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.9977678507566452, | |
| "rewards/log_scaled_reward": -0.03387411683797836, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.00334918498993, | |
| "epoch": 1.164179104477612, | |
| "grad_norm": 1.5838154554367065, | |
| "learning_rate": 4.6077045213607755e-07, | |
| "loss": 0.0005, | |
| "num_tokens": 28386956.0, | |
| "reward": 0.9627697318792343, | |
| "reward_std": 0.009467384777963161, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.9966517835855484, | |
| "rewards/log_scaled_reward": -0.0338821173645556, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.1791044776119404, | |
| "grad_norm": 1.337672472000122, | |
| "learning_rate": 4.477357683661733e-07, | |
| "loss": -0.0, | |
| "num_tokens": 28527204.0, | |
| "reward": 0.9638830795884132, | |
| "reward_std": 0.006318369880318642, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.9977678582072258, | |
| "rewards/log_scaled_reward": -0.033884843811392784, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.1940298507462686, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.347369038899743e-07, | |
| "loss": 0.0, | |
| "num_tokens": 28671788.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.208955223880597, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.2178276747988444e-07, | |
| "loss": 0.0, | |
| "num_tokens": 28811644.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.2238805970149254, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.0888223725392624e-07, | |
| "loss": 0.0, | |
| "num_tokens": 28949996.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.2388059701492538, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.960441545911204e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29100540.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.2537313432835822, | |
| "grad_norm": 0.9363570213317871, | |
| "learning_rate": 3.8327731807204744e-07, | |
| "loss": -0.0, | |
| "num_tokens": 29242748.0, | |
| "reward": 0.9650017619132996, | |
| "reward_std": 0.0031542566139250994, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": -0.03388223238289356, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.2686567164179103, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.7059047744873955e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29394812.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.2835820895522387, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.5799232764803867e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29526756.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.2985074626865671, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.454915028125263e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29662892.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.3134328358208955, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.330965703831146e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29808492.0, | |
| "reward": 0.975348062813282, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.024652006570249796, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.328358208955224, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.2081602522734985e-07, | |
| "loss": 0.0, | |
| "num_tokens": 29960316.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.3432835820895521, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.086582838174551e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30099644.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.005581259727478, | |
| "epoch": 1.3582089552238805, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.9663167846209996e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30246217.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.373134328358209, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.847444515958523e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30391785.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.3880597014925373, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.730047501302266e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30531289.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.4029850746268657, | |
| "grad_norm": 3.6358273029327393, | |
| "learning_rate": 2.6142061987019574e-07, | |
| "loss": -0.0, | |
| "num_tokens": 30664633.0, | |
| "reward": 0.966103158891201, | |
| "reward_std": 1.4753467439732049e-05, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.033896906301379204, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.417910447761194, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.500000000000001e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30820657.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.4328358208955223, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.387507176420256e-07, | |
| "loss": 0.0, | |
| "num_tokens": 30965737.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.00111699104309, | |
| "epoch": 1.4477611940298507, | |
| "grad_norm": 32.66771697998047, | |
| "learning_rate": 2.2768048249248644e-07, | |
| "loss": -0.0002, | |
| "num_tokens": 31108402.0, | |
| "reward": 0.9649775922298431, | |
| "reward_std": 0.0031557143665850163, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 0.9988839253783226, | |
| "rewards/log_scaled_reward": -0.03390640066936612, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.462686567164179, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.167968815375837e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31257634.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.4776119402985075, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0610737385376348e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31412970.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9561928549563966e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31564498.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.5074626865671643, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8533980447508135e-07, | |
| "loss": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.5074626865671643, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 15.000150587305676, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 31697802.0, | |
| "eval_reward": 0.9661169648170471, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward": 0.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/log_scaled_reward": -0.03388310596346855, | |
| "eval_runtime": 724.0402, | |
| "eval_samples_per_second": 6.906, | |
| "eval_steps_per_second": 0.062, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.5223880597014925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7527597583490823e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31837018.0, | |
| "reward": 0.9707325138151646, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.029267556266859174, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.537313432835821, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6543469682057104e-07, | |
| "loss": 0.0, | |
| "num_tokens": 31986642.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.5522388059701493, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5582271215312293e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32126642.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.5671641791044775, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4644660940672627e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32276154.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.582089552238806, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3731281449385628e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32426898.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.5970149253731343, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.284275872613028e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32571226.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.6119402985074627, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1979701719998454e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32719426.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.626865671641791, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1142701927151454e-07, | |
| "loss": 0.0, | |
| "num_tokens": 32852794.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.6417910447761193, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0332332985438247e-07, | |
| "loss": 0.0, | |
| "num_tokens": 33000498.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.6567164179104479, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.549150281252632e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33139850.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.671641791044776, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.793690568899215e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33276866.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.6865671641791045, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.066471602728803e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33407970.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.7014925373134329, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.36799178229539e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33549218.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.716417910447761, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.698729810778064e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33694498.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.7313432835820897, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.059144366901736e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33836482.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.7462686567164178, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.44967379058161e-08, | |
| "loss": 0.0, | |
| "num_tokens": 33970506.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.002233028411865, | |
| "epoch": 1.7611940298507462, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.870735782506979e-08, | |
| "loss": 0.0, | |
| "num_tokens": 34139396.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.7761194029850746, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.322727117869951e-08, | |
| "loss": 0.0, | |
| "num_tokens": 34279244.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.7910447761194028, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.806023374435663e-08, | |
| "loss": 0.0, | |
| "num_tokens": 34418092.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.8059701492537314, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.3209786751399184e-08, | |
| "loss": 0.0, | |
| "num_tokens": 34570476.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.8208955223880596, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.8679254453910785e-08, | |
| "loss": 0.0, | |
| "num_tokens": 34714220.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.835820895522388, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.4471741852423233e-08, | |
| "loss": 0.0, | |
| "num_tokens": 34874452.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.8507462686567164, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0590132565903473e-08, | |
| "loss": 0.0, | |
| "num_tokens": 35020932.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.8656716417910446, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7037086855465898e-08, | |
| "loss": 0.0, | |
| "num_tokens": 35158836.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.8805970149253732, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.3815039801161722e-08, | |
| "loss": 0.0, | |
| "num_tokens": 35299044.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.8955223880597014, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.0926199633097154e-08, | |
| "loss": 0.0, | |
| "num_tokens": 35438660.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.9104477611940298, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.372546218022746e-09, | |
| "loss": 0.0, | |
| "num_tokens": 35589036.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.9253731343283582, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.15582970243117e-09, | |
| "loss": 0.0, | |
| "num_tokens": 35732844.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.9402985074626866, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.277569313094809e-09, | |
| "loss": 0.0, | |
| "num_tokens": 35869612.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.955223880597015, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.739052315863355e-09, | |
| "loss": 0.0, | |
| "num_tokens": 36024796.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.000000953674316, | |
| "epoch": 1.9701492537313432, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.541333133436018e-09, | |
| "loss": 0.0, | |
| "num_tokens": 36165404.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 15.0, | |
| "epoch": 1.9850746268656716, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.852326227130833e-10, | |
| "loss": 0.0, | |
| "num_tokens": 36316348.0, | |
| "reward": 0.9661169648170471, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_scaled_reward": -0.03388310596346855, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.9850746268656716, | |
| "step": 132, | |
| "total_flos": 0.0, | |
| "train_loss": 0.04579740530018937, | |
| "train_runtime": 16666.94, | |
| "train_samples_per_second": 0.9, | |
| "train_steps_per_second": 0.008 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 134, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |