| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 40, |
| "global_step": 201, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 547.1242248535157, |
| "epoch": 0.07462686567164178, |
| "grad_norm": 1.2733972072601318, |
| "kl": 0.00047130584716796874, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 0.0, |
| "reward": 0.5517856992781163, |
| "reward_std": 0.27616733238101004, |
| "rewards/accuracy_reward": 0.3724489748477936, |
| "rewards/format_reward": 0.17933673106599599, |
| "step": 5, |
| "success_rate": 0.37244899198412895 |
| }, |
| { |
| "completion_length": 460.88060302734374, |
| "epoch": 0.14925373134328357, |
| "grad_norm": 4.886548042297363, |
| "kl": 0.024450111389160156, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 0.001, |
| "reward": 0.6818877421319485, |
| "reward_std": 0.2652753606438637, |
| "rewards/accuracy_reward": 0.3150510139763355, |
| "rewards/format_reward": 0.36683672592043876, |
| "step": 10, |
| "success_rate": 0.3150510285049677 |
| }, |
| { |
| "completion_length": 488.90407409667966, |
| "epoch": 0.22388059701492538, |
| "grad_norm": 0.48002028465270996, |
| "kl": 0.018406105041503907, |
| "learning_rate": 2.142857142857143e-06, |
| "loss": 0.0007, |
| "reward": 0.6711734563112259, |
| "reward_std": 0.24843686558306216, |
| "rewards/accuracy_reward": 0.3446428500115871, |
| "rewards/format_reward": 0.32653060741722584, |
| "step": 15, |
| "success_rate": 0.34464287348091605 |
| }, |
| { |
| "completion_length": 490.06836166381834, |
| "epoch": 0.29850746268656714, |
| "grad_norm": 0.5253956317901611, |
| "kl": 0.009164047241210938, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.0004, |
| "reward": 0.7178571283817291, |
| "reward_std": 0.26088091973215344, |
| "rewards/accuracy_reward": 0.40714284889400004, |
| "rewards/format_reward": 0.31071427930146456, |
| "step": 20, |
| "success_rate": 0.40714286640286446 |
| }, |
| { |
| "completion_length": 342.0505027770996, |
| "epoch": 0.373134328358209, |
| "grad_norm": 0.4979659616947174, |
| "kl": 0.0306243896484375, |
| "learning_rate": 2.9963460753897363e-06, |
| "loss": 0.0012, |
| "reward": 0.9278060972690583, |
| "reward_std": 0.24459648579359056, |
| "rewards/accuracy_reward": 0.18979591503739357, |
| "rewards/format_reward": 0.7380101881921292, |
| "step": 25, |
| "success_rate": 0.1897959278896451 |
| }, |
| { |
| "completion_length": 323.44387130737306, |
| "epoch": 0.44776119402985076, |
| "grad_norm": 0.9019961357116699, |
| "kl": 0.0350311279296875, |
| "learning_rate": 2.981532510892707e-06, |
| "loss": 0.0014, |
| "reward": 0.9882652848958969, |
| "reward_std": 0.2664802324026823, |
| "rewards/accuracy_reward": 0.15943877436220646, |
| "rewards/format_reward": 0.8288265079259872, |
| "step": 30, |
| "success_rate": 0.15943878153339028 |
| }, |
| { |
| "completion_length": 281.84744491577146, |
| "epoch": 0.5223880597014925, |
| "grad_norm": 0.42541709542274475, |
| "kl": 0.046868896484375, |
| "learning_rate": 2.9554435894139947e-06, |
| "loss": 0.0019, |
| "reward": 1.116326493024826, |
| "reward_std": 0.28026170562952757, |
| "rewards/accuracy_reward": 0.19030611962080002, |
| "rewards/format_reward": 0.9260203883051872, |
| "step": 35, |
| "success_rate": 0.19030613116919995 |
| }, |
| { |
| "completion_length": 280.4670871734619, |
| "epoch": 0.5970149253731343, |
| "grad_norm": 0.786392331123352, |
| "kl": 0.054290771484375, |
| "learning_rate": 2.9182778633989753e-06, |
| "loss": 0.0022, |
| "reward": 1.1989795714616776, |
| "reward_std": 0.3117813114076853, |
| "rewards/accuracy_reward": 0.24183672983199359, |
| "rewards/format_reward": 0.957142835855484, |
| "step": 40, |
| "success_rate": 0.24183674417436124 |
| }, |
| { |
| "epoch": 0.5970149253731343, |
| "eval_completion_length": 256.7967397167696, |
| "eval_kl": 0.06267662687674581, |
| "eval_loss": 0.002505573211237788, |
| "eval_reward": 1.2279956347449532, |
| "eval_reward_std": 0.283521942211596, |
| "eval_rewards/accuracy_reward": 0.2540759276182458, |
| "eval_rewards/format_reward": 0.9739197237864553, |
| "eval_runtime": 5454.0827, |
| "eval_samples_per_second": 0.917, |
| "eval_steps_per_second": 0.066, |
| "eval_success_rate": 0.2544749851166869, |
| "step": 40 |
| }, |
| { |
| "completion_length": 237.62269897460936, |
| "epoch": 0.6716417910447762, |
| "grad_norm": 0.7149950861930847, |
| "kl": 0.0769775390625, |
| "learning_rate": 2.8703181864639013e-06, |
| "loss": 0.0031, |
| "reward": 1.2594387471675872, |
| "reward_std": 0.28514467738568783, |
| "rewards/accuracy_reward": 0.2785714233294129, |
| "rewards/format_reward": 0.9808673411607742, |
| "step": 45, |
| "success_rate": 0.2785714427009225 |
| }, |
| { |
| "completion_length": 309.43800468444823, |
| "epoch": 0.746268656716418, |
| "grad_norm": 2.802035093307495, |
| "kl": 0.081781005859375, |
| "learning_rate": 2.811929560709094e-06, |
| "loss": 0.0033, |
| "reward": 1.3290816009044648, |
| "reward_std": 0.3365088116377592, |
| "rewards/accuracy_reward": 0.3614795859903097, |
| "rewards/format_reward": 0.9676020219922066, |
| "step": 50, |
| "success_rate": 0.3596938900649548 |
| }, |
| { |
| "completion_length": 303.08392372131345, |
| "epoch": 0.8208955223880597, |
| "grad_norm": 0.3051517605781555, |
| "kl": 0.075457763671875, |
| "learning_rate": 2.7435563588325624e-06, |
| "loss": 0.003, |
| "reward": 1.3415815979242325, |
| "reward_std": 0.3468840003013611, |
| "rewards/accuracy_reward": 0.3770408075302839, |
| "rewards/format_reward": 0.9645407989621162, |
| "step": 55, |
| "success_rate": 0.3752551130950451 |
| }, |
| { |
| "completion_length": 294.10815811157227, |
| "epoch": 0.8955223880597015, |
| "grad_norm": 0.33523619174957275, |
| "kl": 0.097998046875, |
| "learning_rate": 2.6657189421854562e-06, |
| "loss": 0.0039, |
| "reward": 1.3653060972690583, |
| "reward_std": 0.33864556923508643, |
| "rewards/accuracy_reward": 0.38443876840174196, |
| "rewards/format_reward": 0.9808673366904259, |
| "step": 60, |
| "success_rate": 0.3844387885183096 |
| }, |
| { |
| "completion_length": 304.046932220459, |
| "epoch": 0.9701492537313433, |
| "grad_norm": 0.3474382162094116, |
| "kl": 0.096484375, |
| "learning_rate": 2.5790097005079765e-06, |
| "loss": 0.0039, |
| "reward": 1.4303571164608002, |
| "reward_std": 0.32698816806077957, |
| "rewards/accuracy_reward": 0.454846927523613, |
| "rewards/format_reward": 0.9755101934075355, |
| "step": 65, |
| "success_rate": 0.4548469439148903 |
| }, |
| { |
| "completion_length": 348.61254768371583, |
| "epoch": 1.044776119402985, |
| "grad_norm": 0.28319722414016724, |
| "kl": 0.0887451171875, |
| "learning_rate": 2.484088543485761e-06, |
| "loss": 0.0035, |
| "reward": 1.4494387701153755, |
| "reward_std": 0.34128306433558464, |
| "rewards/accuracy_reward": 0.48418366685509684, |
| "rewards/format_reward": 0.9652550905942917, |
| "step": 70, |
| "success_rate": 0.5017857238650322 |
| }, |
| { |
| "completion_length": 374.4933601379395, |
| "epoch": 1.1194029850746268, |
| "grad_norm": 0.27081194519996643, |
| "kl": 25395.28706665039, |
| "learning_rate": 2.3816778784387097e-06, |
| "loss": 1014.577, |
| "reward": 1.4698979407548904, |
| "reward_std": 0.3440066184848547, |
| "rewards/accuracy_reward": 0.5155612148344517, |
| "rewards/format_reward": 0.9543367087841034, |
| "step": 75, |
| "success_rate": 0.5155612342059612 |
| }, |
| { |
| "completion_length": 361.42677841186526, |
| "epoch": 1.1940298507462686, |
| "grad_norm": 0.31254705786705017, |
| "kl": 0.087762451171875, |
| "learning_rate": 2.2725571123650813e-06, |
| "loss": 0.0035, |
| "reward": 1.5135203808546067, |
| "reward_std": 0.35050575956702235, |
| "rewards/accuracy_reward": 0.566326516866684, |
| "rewards/format_reward": 0.947193855047226, |
| "step": 80, |
| "success_rate": 0.5645408242940902 |
| }, |
| { |
| "epoch": 1.1940298507462686, |
| "eval_completion_length": 375.1230107738985, |
| "eval_kl": 0.2485719819308659, |
| "eval_loss": 0.009942025877535343, |
| "eval_reward": 1.4252650491352188, |
| "eval_reward_std": 0.35333527712016133, |
| "eval_rewards/accuracy_reward": 0.4842093172769307, |
| "eval_rewards/format_reward": 0.9410557287365364, |
| "eval_runtime": 6454.9469, |
| "eval_samples_per_second": 0.775, |
| "eval_steps_per_second": 0.055, |
| "eval_success_rate": 0.485406461291473, |
| "step": 80 |
| }, |
| { |
| "completion_length": 378.264786529541, |
| "epoch": 1.2686567164179103, |
| "grad_norm": 0.25367024540901184, |
| "kl": 0.08892822265625, |
| "learning_rate": 2.157556720183616e-06, |
| "loss": 0.0036, |
| "reward": 1.4543367117643355, |
| "reward_std": 0.3721345618367195, |
| "rewards/accuracy_reward": 0.5137754999101162, |
| "rewards/format_reward": 0.9405611962080002, |
| "step": 85, |
| "success_rate": 0.5137755192816258 |
| }, |
| { |
| "completion_length": 377.1609634399414, |
| "epoch": 1.3432835820895521, |
| "grad_norm": 0.2705287039279938, |
| "kl": 0.30084228515625, |
| "learning_rate": 2.03755192431795e-06, |
| "loss": 0.012, |
| "reward": 1.5015305757522583, |
| "reward_std": 0.34179753065109253, |
| "rewards/accuracy_reward": 0.5466836676001549, |
| "rewards/format_reward": 0.954846915602684, |
| "step": 90, |
| "success_rate": 0.5466836795210839 |
| }, |
| { |
| "completion_length": 395.2339202880859, |
| "epoch": 1.417910447761194, |
| "grad_norm": 0.2481299340724945, |
| "kl": 0.08546142578125, |
| "learning_rate": 1.9134560337254986e-06, |
| "loss": 0.0034, |
| "reward": 1.5122448593378066, |
| "reward_std": 0.32668328285217285, |
| "rewards/accuracy_reward": 0.5599489718675613, |
| "rewards/format_reward": 0.9522958919405937, |
| "step": 95, |
| "success_rate": 0.559948992729187 |
| }, |
| { |
| "completion_length": 404.2803482055664, |
| "epoch": 1.4925373134328357, |
| "grad_norm": 0.22587481141090393, |
| "kl": 0.08536376953125, |
| "learning_rate": 1.7862134930648174e-06, |
| "loss": 0.0034, |
| "reward": 1.5109693586826325, |
| "reward_std": 0.3110779445618391, |
| "rewards/accuracy_reward": 0.5604591690003872, |
| "rewards/format_reward": 0.9505101799964905, |
| "step": 100, |
| "success_rate": 0.560459190607071 |
| }, |
| { |
| "completion_length": 446.25024871826173, |
| "epoch": 1.5671641791044775, |
| "grad_norm": 0.2071794718503952, |
| "kl": 0.077789306640625, |
| "learning_rate": 1.6567926949014804e-06, |
| "loss": 0.0031, |
| "reward": 1.521683645248413, |
| "reward_std": 0.32725758776068686, |
| "rewards/accuracy_reward": 0.5826530493795872, |
| "rewards/format_reward": 0.9390305906534195, |
| "step": 105, |
| "success_rate": 0.5826530683785677 |
| }, |
| { |
| "completion_length": 444.8515205383301, |
| "epoch": 1.6417910447761193, |
| "grad_norm": 0.2457750141620636, |
| "kl": 0.081201171875, |
| "learning_rate": 1.5261786096559255e-06, |
| "loss": 0.0032, |
| "reward": 1.5280611962080002, |
| "reward_std": 0.33711482025682926, |
| "rewards/accuracy_reward": 0.5818877436220646, |
| "rewards/format_reward": 0.9461734414100647, |
| "step": 110, |
| "success_rate": 0.5818877592682838 |
| }, |
| { |
| "completion_length": 428.1119789123535, |
| "epoch": 1.716417910447761, |
| "grad_norm": 0.8355852365493774, |
| "kl": 0.08641357421875, |
| "learning_rate": 1.395365289383812e-06, |
| "loss": 0.0035, |
| "reward": 1.5198979318141936, |
| "reward_std": 0.33952501937747004, |
| "rewards/accuracy_reward": 0.5668367221951485, |
| "rewards/format_reward": 0.9530612006783485, |
| "step": 115, |
| "success_rate": 0.5668367445468903 |
| }, |
| { |
| "completion_length": 416.2086639404297, |
| "epoch": 1.7910447761194028, |
| "grad_norm": 0.24691729247570038, |
| "kl": 0.0869140625, |
| "learning_rate": 1.2653483024396534e-06, |
| "loss": 0.0035, |
| "reward": 1.5033163011074067, |
| "reward_std": 0.33274373821914194, |
| "rewards/accuracy_reward": 0.5446428425610066, |
| "rewards/format_reward": 0.9586734384298324, |
| "step": 120, |
| "success_rate": 0.5446428678929806 |
| }, |
| { |
| "epoch": 1.7910447761194028, |
| "eval_completion_length": 416.68637263974665, |
| "eval_kl": 0.08460700178945531, |
| "eval_loss": 0.0033832318149507046, |
| "eval_reward": 1.4522859107848651, |
| "eval_reward_std": 0.33377148831190345, |
| "eval_rewards/accuracy_reward": 0.4971211837739918, |
| "eval_rewards/format_reward": 0.9551647285509376, |
| "eval_runtime": 6690.2624, |
| "eval_samples_per_second": 0.747, |
| "eval_steps_per_second": 0.054, |
| "eval_success_rate": 0.49851784963348056, |
| "step": 120 |
| }, |
| { |
| "completion_length": 399.90585861206057, |
| "epoch": 1.8656716417910446, |
| "grad_norm": 0.2619114816188812, |
| "kl": 0.08975830078125, |
| "learning_rate": 1.1371171566004986e-06, |
| "loss": 0.0036, |
| "reward": 1.4979591608047484, |
| "reward_std": 0.32781863324344157, |
| "rewards/accuracy_reward": 0.5415816225111485, |
| "rewards/format_reward": 0.9563775300979614, |
| "step": 125, |
| "success_rate": 0.5415816411376 |
| }, |
| { |
| "completion_length": 413.0073921203613, |
| "epoch": 1.9402985074626866, |
| "grad_norm": 0.31122443079948425, |
| "kl": 0.0883056640625, |
| "learning_rate": 1.0116477683142654e-06, |
| "loss": 0.0035, |
| "reward": 1.5224489539861679, |
| "reward_std": 0.32416500747203825, |
| "rewards/accuracy_reward": 0.5683673366904258, |
| "rewards/format_reward": 0.954081603884697, |
| "step": 130, |
| "success_rate": 0.568367350846529 |
| }, |
| { |
| "completion_length": 425.88346633911135, |
| "epoch": 2.014925373134328, |
| "grad_norm": 0.20976552367210388, |
| "kl": 0.080792236328125, |
| "learning_rate": 8.898950353863e-07, |
| "loss": 0.0032, |
| "reward": 1.5138265073299408, |
| "reward_std": 0.3204653847962618, |
| "rewards/accuracy_reward": 0.5654081603512168, |
| "rewards/format_reward": 0.9484183505177498, |
| "step": 135, |
| "success_rate": 0.5627551212906837 |
| }, |
| { |
| "completion_length": 431.6691268920898, |
| "epoch": 2.08955223880597, |
| "grad_norm": 0.24750804901123047, |
| "kl": 0.082379150390625, |
| "learning_rate": 7.727855696304945e-07, |
| "loss": 0.0033, |
| "reward": 1.506122413277626, |
| "reward_std": 0.32942725978791715, |
| "rewards/accuracy_reward": 0.5604591719806195, |
| "rewards/format_reward": 0.9456632405519485, |
| "step": 140, |
| "success_rate": 0.5586734853684903 |
| }, |
| { |
| "completion_length": 438.63060150146487, |
| "epoch": 2.1641791044776117, |
| "grad_norm": 0.25727197527885437, |
| "kl": 0.0776123046875, |
| "learning_rate": 6.6121064479388e-07, |
| "loss": 0.0031, |
| "reward": 1.4864795625209808, |
| "reward_std": 0.328120681270957, |
| "rewards/accuracy_reward": 0.5380101919174194, |
| "rewards/format_reward": 0.9484693706035614, |
| "step": 145, |
| "success_rate": 0.5380102179944515 |
| }, |
| { |
| "completion_length": 403.204328918457, |
| "epoch": 2.2388059701492535, |
| "grad_norm": 0.24068014323711395, |
| "kl": 0.08330078125, |
| "learning_rate": 5.560194134252441e-07, |
| "loss": 0.0033, |
| "reward": 1.5372448682785034, |
| "reward_std": 0.3228706333786249, |
| "rewards/accuracy_reward": 0.5795918248593808, |
| "rewards/format_reward": 0.9576530396938324, |
| "step": 150, |
| "success_rate": 0.5795918427407741 |
| }, |
| { |
| "completion_length": 407.8196342468262, |
| "epoch": 2.3134328358208958, |
| "grad_norm": 0.2765465974807739, |
| "kl": 0.0848876953125, |
| "learning_rate": 4.5801244431150397e-07, |
| "loss": 0.0034, |
| "reward": 1.5224489510059356, |
| "reward_std": 0.313472930341959, |
| "rewards/accuracy_reward": 0.5660714194178581, |
| "rewards/format_reward": 0.9563775330781936, |
| "step": 155, |
| "success_rate": 0.5660714313387871 |
| }, |
| { |
| "completion_length": 392.9127471923828, |
| "epoch": 2.388059701492537, |
| "grad_norm": 0.28657880425453186, |
| "kl": 0.09124755859375, |
| "learning_rate": 3.67935629665842e-07, |
| "loss": 0.0036, |
| "reward": 1.561479565501213, |
| "reward_std": 0.3107341818511486, |
| "rewards/accuracy_reward": 0.5979591712355614, |
| "rewards/format_reward": 0.9635203838348388, |
| "step": 160, |
| "success_rate": 0.5979591906070709 |
| }, |
| { |
| "epoch": 2.388059701492537, |
| "eval_completion_length": 415.15222543045127, |
| "eval_kl": 0.08421828626920391, |
| "eval_loss": 0.003366992576047778, |
| "eval_reward": 1.4807034238090728, |
| "eval_reward_std": 0.3203208099780136, |
| "eval_rewards/accuracy_reward": 0.5245125879788531, |
| "eval_rewards/format_reward": 0.956190837162167, |
| "eval_runtime": 6690.7517, |
| "eval_samples_per_second": 0.747, |
| "eval_steps_per_second": 0.054, |
| "eval_success_rate": 0.5258094906890193, |
| "step": 160 |
| }, |
| { |
| "completion_length": 432.94667587280276, |
| "epoch": 2.4626865671641793, |
| "grad_norm": 0.23113694787025452, |
| "kl": 0.0843994140625, |
| "learning_rate": 2.86474508437579e-07, |
| "loss": 0.0034, |
| "reward": 1.4971938461065293, |
| "reward_std": 0.3238256432116032, |
| "rewards/accuracy_reward": 0.5499999865889549, |
| "rewards/format_reward": 0.9471938535571098, |
| "step": 165, |
| "success_rate": 0.5499999992549419 |
| }, |
| { |
| "completion_length": 414.23238906860354, |
| "epoch": 2.5373134328358207, |
| "grad_norm": 0.2992941439151764, |
| "kl": 0.087384033203125, |
| "learning_rate": 2.1424904894683168e-07, |
| "loss": 0.0035, |
| "reward": 1.5561224222183228, |
| "reward_std": 0.3134998256340623, |
| "rewards/accuracy_reward": 0.600255086272955, |
| "rewards/format_reward": 0.9558673143386841, |
| "step": 170, |
| "success_rate": 0.5984694063663483 |
| }, |
| { |
| "completion_length": 438.1839202880859, |
| "epoch": 2.611940298507463, |
| "grad_norm": 0.2166847288608551, |
| "kl": 0.085552978515625, |
| "learning_rate": 1.5180893055124977e-07, |
| "loss": 0.0034, |
| "reward": 1.5079081356525421, |
| "reward_std": 0.33949046954512596, |
| "rewards/accuracy_reward": 0.5670918263494968, |
| "rewards/format_reward": 0.9408163040876388, |
| "step": 175, |
| "success_rate": 0.5670918501913548 |
| }, |
| { |
| "completion_length": 424.2168281555176, |
| "epoch": 2.6865671641791042, |
| "grad_norm": 0.21476835012435913, |
| "kl": 0.08350830078125, |
| "learning_rate": 9.962936025419756e-08, |
| "loss": 0.0033, |
| "reward": 1.5306122213602067, |
| "reward_std": 0.31606815941631794, |
| "rewards/accuracy_reward": 0.5747448861598968, |
| "rewards/format_reward": 0.9558673217892647, |
| "step": 180, |
| "success_rate": 0.5747449062764645 |
| }, |
| { |
| "completion_length": 418.3198890686035, |
| "epoch": 2.7611940298507465, |
| "grad_norm": 0.2472531795501709, |
| "kl": 0.081756591796875, |
| "learning_rate": 5.810745609252166e-08, |
| "loss": 0.0033, |
| "reward": 1.5512754768133163, |
| "reward_std": 0.29889940060675146, |
| "rewards/accuracy_reward": 0.5910714209079743, |
| "rewards/format_reward": 0.9602040618658065, |
| "step": 185, |
| "success_rate": 0.5910714328289032 |
| }, |
| { |
| "completion_length": 431.39743728637694, |
| "epoch": 2.835820895522388, |
| "grad_norm": 0.2179958075284958, |
| "kl": 0.0802490234375, |
| "learning_rate": 2.7559224828504036e-08, |
| "loss": 0.0032, |
| "reward": 1.5283162951469422, |
| "reward_std": 0.3320562928915024, |
| "rewards/accuracy_reward": 0.5762754924595356, |
| "rewards/format_reward": 0.9520407900214195, |
| "step": 190, |
| "success_rate": 0.5762755177915097 |
| }, |
| { |
| "completion_length": 427.4851936340332, |
| "epoch": 2.91044776119403, |
| "grad_norm": 0.235799178481102, |
| "kl": 0.0825927734375, |
| "learning_rate": 8.217156947590065e-09, |
| "loss": 0.0033, |
| "reward": 1.5198979258537293, |
| "reward_std": 0.30454444214701654, |
| "rewards/accuracy_reward": 0.5729591682553291, |
| "rewards/format_reward": 0.9469387531280518, |
| "step": 195, |
| "success_rate": 0.5729591898620129 |
| }, |
| { |
| "completion_length": 437.469376373291, |
| "epoch": 2.9850746268656714, |
| "grad_norm": 0.22140200436115265, |
| "kl": 0.079302978515625, |
| "learning_rate": 2.2845726541309565e-10, |
| "loss": 0.0032, |
| "reward": 1.5232142567634583, |
| "reward_std": 0.32379055954515934, |
| "rewards/accuracy_reward": 0.5744897864758969, |
| "rewards/format_reward": 0.9487244680523872, |
| "step": 200, |
| "success_rate": 0.5744898058474064 |
| }, |
| { |
| "epoch": 2.9850746268656714, |
| "eval_completion_length": 427.15005846929284, |
| "eval_kl": 0.10578449611557263, |
| "eval_loss": 0.004237522836774588, |
| "eval_reward": 1.480674920468357, |
| "eval_reward_std": 0.32418302708830915, |
| "eval_rewards/accuracy_reward": 0.528075465443414, |
| "eval_rewards/format_reward": 0.9525994578553312, |
| "eval_runtime": 6859.1519, |
| "eval_samples_per_second": 0.729, |
| "eval_steps_per_second": 0.052, |
| "eval_success_rate": 0.5293723670505611, |
| "step": 200 |
| }, |
| { |
| "completion_length": 396.71250343322754, |
| "epoch": 3.0, |
| "kl": 0.073760986328125, |
| "reward": 1.6749999970197678, |
| "reward_std": 0.246222835034132, |
| "rewards/accuracy_reward": 0.6750000044703484, |
| "rewards/format_reward": 1.0, |
| "step": 201, |
| "success_rate": 0.5982142873108387, |
| "total_flos": 0.0, |
| "train_loss": 25.241294363718474, |
| "train_runtime": 70314.3192, |
| "train_samples_per_second": 0.32, |
| "train_steps_per_second": 0.003 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 201, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|