| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.05596753882748006, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 624.28125, |
| "epoch": 0.00011193507765496012, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 458.90625, |
| "epoch": 0.00022387015530992023, |
| "grad_norm": 0.7040169045969362, |
| "kl": 0.0, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 0.0601, |
| "reward": 0.007812500116415322, |
| "reward_std": 0.01743034040555358, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.078125, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 444.75, |
| "epoch": 0.00033580523296488035, |
| "grad_norm": 0.8629215889276903, |
| "kl": 0.00027060508728027344, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.0038, |
| "reward": 0.004687500069849193, |
| "reward_std": 0.018750000279396772, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.046875, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 443.078125, |
| "epoch": 0.00044774031061984047, |
| "grad_norm": 0.7016431850799195, |
| "kl": 0.00026345252990722656, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 0.0435, |
| "reward": 0.006250000209547579, |
| "reward_std": 0.016327823046594858, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.0625, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 466.0, |
| "epoch": 0.0005596753882748006, |
| "grad_norm": 0.4574220548610273, |
| "kl": 0.0002818107604980469, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.007, |
| "reward": 0.0031250000465661287, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.03125, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 495.09375, |
| "epoch": 0.0006716104659297607, |
| "grad_norm": 0.5434000185158782, |
| "kl": 0.0002865791320800781, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0427, |
| "reward": 0.0031250000465661287, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.03125, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.765625, |
| "epoch": 0.0007835455435847208, |
| "grad_norm": 7.737527191229261, |
| "kl": 0.0010457038879394531, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": -0.0273, |
| "reward": 0.0031250000465661287, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.03125, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 401.4375, |
| "epoch": 0.0008954806212396809, |
| "grad_norm": 0.9652984425394716, |
| "kl": 0.000743865966796875, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.0031, |
| "reward": 0.0062500000931322575, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.0625, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 631.203125, |
| "epoch": 0.0010074156988946412, |
| "grad_norm": 0.565508681156182, |
| "kl": 0.001064300537109375, |
| "learning_rate": 3e-06, |
| "loss": 0.0314, |
| "reward": 0.004687500069849193, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.046875, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.109375, |
| "epoch": 0.0011193507765496012, |
| "grad_norm": 29.074260549966585, |
| "kl": 0.7770309448242188, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.0532, |
| "reward": 0.02031250041909516, |
| "reward_std": 0.04097762983292341, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.203125, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 306.96875, |
| "epoch": 0.0012312858542045614, |
| "grad_norm": 1.335155005624375, |
| "kl": 0.03216552734375, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": 0.0838, |
| "reward": 0.025000000605359674, |
| "reward_std": 0.043084788136184216, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.25, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 469.265625, |
| "epoch": 0.0013432209318595214, |
| "grad_norm": 0.9728533904128814, |
| "kl": 0.11004638671875, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0547, |
| "reward": 0.02187500020954758, |
| "reward_std": 0.04057852132245898, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.21875, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 411.71875, |
| "epoch": 0.0014551560095144816, |
| "grad_norm": 3.1109091298399707, |
| "kl": 0.34326171875, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.1651, |
| "reward": 0.05625000037252903, |
| "reward_std": 0.04977653082460165, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.5625, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 437.4375, |
| "epoch": 0.0015670910871694416, |
| "grad_norm": 1.4333106697160516, |
| "kl": 0.2135009765625, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.0872, |
| "reward": 0.06718750111758709, |
| "reward_std": 0.04840352013707161, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.671875, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 459.671875, |
| "epoch": 0.0016790261648244019, |
| "grad_norm": 1.1323714811109955, |
| "kl": 0.0590972900390625, |
| "learning_rate": 5e-06, |
| "loss": 0.0902, |
| "reward": 0.07343750260770321, |
| "reward_std": 0.04493850376456976, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.734375, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 430.921875, |
| "epoch": 0.0017909612424793619, |
| "grad_norm": 1.1229606735516884, |
| "kl": 0.0349578857421875, |
| "learning_rate": 4.999952797253148e-06, |
| "loss": 0.0231, |
| "reward": 0.07031250186264515, |
| "reward_std": 0.04625816363841295, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.703125, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.84375, |
| "epoch": 0.001902896320134322, |
| "grad_norm": 1.3958687154501264, |
| "kl": 0.040618896484375, |
| "learning_rate": 4.9998111909931225e-06, |
| "loss": 0.0841, |
| "reward": 0.07656250149011612, |
| "reward_std": 0.04255262762308121, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.765625, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.265625, |
| "epoch": 0.0020148313977892823, |
| "grad_norm": 1.2810159881634564, |
| "kl": 0.058685302734375, |
| "learning_rate": 4.999575187161439e-06, |
| "loss": -0.0583, |
| "reward": 0.08125000260770321, |
| "reward_std": 0.038373483810573816, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.8125, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 544.984375, |
| "epoch": 0.0021267664754442426, |
| "grad_norm": 0.6496120974985506, |
| "kl": 0.0307464599609375, |
| "learning_rate": 4.9992447956603455e-06, |
| "loss": 0.0475, |
| "reward": 0.09218750149011612, |
| "reward_std": 0.01743034040555358, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 470.90625, |
| "epoch": 0.0022387015530992023, |
| "grad_norm": 1.1640288172191966, |
| "kl": 0.016815185546875, |
| "learning_rate": 4.998820030352409e-06, |
| "loss": 0.12, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 295.90625, |
| "epoch": 0.0023506366307541626, |
| "grad_norm": 2.092394327511984, |
| "kl": 0.4071197509765625, |
| "learning_rate": 4.998300909059929e-06, |
| "loss": 0.077, |
| "reward": 0.08906250260770321, |
| "reward_std": 0.025969465728849173, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.890625, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 466.609375, |
| "epoch": 0.002462571708409123, |
| "grad_norm": 1.7353937284994216, |
| "kl": 0.130462646484375, |
| "learning_rate": 4.997687453564198e-06, |
| "loss": 0.0013, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 364.75, |
| "epoch": 0.002574506786064083, |
| "grad_norm": 0.6464548918875196, |
| "kl": 0.0576171875, |
| "learning_rate": 4.9969796896045775e-06, |
| "loss": -0.0217, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 342.421875, |
| "epoch": 0.002686441863719043, |
| "grad_norm": 116.68805529495698, |
| "kl": 6.03173828125, |
| "learning_rate": 4.996177646877426e-06, |
| "loss": 0.0415, |
| "reward": 0.09218750149011612, |
| "reward_std": 0.017430341336876154, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.5625, |
| "epoch": 0.002798376941374003, |
| "grad_norm": 3.181683689923044, |
| "kl": 0.4976806640625, |
| "learning_rate": 4.995281359034851e-06, |
| "loss": -0.0548, |
| "reward": 0.09218750335276127, |
| "reward_std": 0.023328250739723444, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 395.578125, |
| "epoch": 0.0029103120190289633, |
| "grad_norm": 0.9693325036669225, |
| "kl": 0.07562255859375, |
| "learning_rate": 4.994290863683296e-06, |
| "loss": 0.0118, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 505.640625, |
| "epoch": 0.0030222470966839235, |
| "grad_norm": 0.762738499506801, |
| "kl": 0.1248779296875, |
| "learning_rate": 4.99320620238196e-06, |
| "loss": 0.0572, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.016327822115272284, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.65625, |
| "epoch": 0.0031341821743388833, |
| "grad_norm": 0.08544860970511584, |
| "kl": 0.04632568359375, |
| "learning_rate": 4.99202742064106e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.015625, |
| "epoch": 0.0032461172519938435, |
| "grad_norm": 1.1227892135326387, |
| "kl": 0.03948974609375, |
| "learning_rate": 4.990754567919917e-06, |
| "loss": -0.0086, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 413.34375, |
| "epoch": 0.0033580523296488037, |
| "grad_norm": 0.8427884883196753, |
| "kl": 0.03790283203125, |
| "learning_rate": 4.989387697624881e-06, |
| "loss": 0.0001, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 290.09375, |
| "epoch": 0.003469987407303764, |
| "grad_norm": 0.916292516898012, |
| "kl": 0.03955078125, |
| "learning_rate": 4.987926867107095e-06, |
| "loss": -0.0193, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.28125, |
| "epoch": 0.0035819224849587238, |
| "grad_norm": 0.5282928671535011, |
| "kl": 0.043212890625, |
| "learning_rate": 4.986372137660078e-06, |
| "loss": -0.0401, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.0, |
| "epoch": 0.003693857562613684, |
| "grad_norm": 0.8076561222873071, |
| "kl": 0.06463623046875, |
| "learning_rate": 4.984723574517165e-06, |
| "loss": 0.0103, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.015625, |
| "epoch": 0.003805792640268644, |
| "grad_norm": 0.5820201891858064, |
| "kl": 0.04254150390625, |
| "learning_rate": 4.9829812468487655e-06, |
| "loss": -0.0118, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.765625, |
| "epoch": 0.0039177277179236044, |
| "grad_norm": 1.30271756609257, |
| "kl": 0.07342529296875, |
| "learning_rate": 4.981145227759457e-06, |
| "loss": 0.0033, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.328125, |
| "epoch": 0.004029662795578565, |
| "grad_norm": 0.5253064587253158, |
| "kl": 0.0435791015625, |
| "learning_rate": 4.979215594284924e-06, |
| "loss": -0.004, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.046875, |
| "epoch": 0.004141597873233525, |
| "grad_norm": 0.03440107214885444, |
| "kl": 0.03900146484375, |
| "learning_rate": 4.977192427388722e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 446.390625, |
| "epoch": 0.004253532950888485, |
| "grad_norm": 0.03871583904571488, |
| "kl": 0.033447265625, |
| "learning_rate": 4.9750758119588824e-06, |
| "loss": 0.0003, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 407.453125, |
| "epoch": 0.0043654680285434445, |
| "grad_norm": 0.14679688464116405, |
| "kl": 0.05303955078125, |
| "learning_rate": 4.972865836804349e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.578125, |
| "epoch": 0.004477403106198405, |
| "grad_norm": 1.1845417806658105, |
| "kl": 0.06744384765625, |
| "learning_rate": 4.970562594651254e-06, |
| "loss": 0.0676, |
| "reward": 0.09531250037252903, |
| "reward_std": 0.018750000279396772, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 40 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 411.359375, |
| "epoch": 0.004589338183853365, |
| "grad_norm": 0.6033536519344826, |
| "kl": 0.05682373046875, |
| "learning_rate": 4.968166182139026e-06, |
| "loss": 0.1634, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 41 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.03125, |
| "epoch": 0.004701273261508325, |
| "grad_norm": 0.03504335311347305, |
| "kl": 0.042327880859375, |
| "learning_rate": 4.9656766998163306e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 42 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.453125, |
| "epoch": 0.004813208339163285, |
| "grad_norm": 0.11226404372767065, |
| "kl": 0.0537109375, |
| "learning_rate": 4.963094252136865e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 43 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 470.296875, |
| "epoch": 0.004925143416818246, |
| "grad_norm": 1.682711207148907, |
| "kl": 0.2684326171875, |
| "learning_rate": 4.960418947454958e-06, |
| "loss": 0.0222, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 44 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 405.625, |
| "epoch": 0.005037078494473206, |
| "grad_norm": 0.04103769366987546, |
| "kl": 0.0435791015625, |
| "learning_rate": 4.957650898021038e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 45 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 422.765625, |
| "epoch": 0.005149013572128166, |
| "grad_norm": 0.524235834468468, |
| "kl": 0.065673828125, |
| "learning_rate": 4.954790219976915e-06, |
| "loss": -0.0335, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 46 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 443.984375, |
| "epoch": 0.005260948649783125, |
| "grad_norm": 105.38307149631456, |
| "kl": 0.32183837890625, |
| "learning_rate": 4.95183703335091e-06, |
| "loss": 0.0819, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 47 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 385.65625, |
| "epoch": 0.005372883727438086, |
| "grad_norm": 0.6435525752943102, |
| "kl": 0.1368408203125, |
| "learning_rate": 4.948791462052819e-06, |
| "loss": 0.0042, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 48 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 630.0, |
| "epoch": 0.005484818805093046, |
| "grad_norm": 0.5344456298032659, |
| "kl": 0.060546875, |
| "learning_rate": 4.945653633868716e-06, |
| "loss": 0.0254, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 49 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 422.75, |
| "epoch": 0.005596753882748006, |
| "grad_norm": 0.7155667213796403, |
| "kl": 0.08978271484375, |
| "learning_rate": 4.942423680455584e-06, |
| "loss": 0.0132, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 50 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 596.296875, |
| "epoch": 0.005708688960402966, |
| "grad_norm": 0.6239904143882911, |
| "kl": 0.04815673828125, |
| "learning_rate": 4.939101737335802e-06, |
| "loss": 0.0135, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 51 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 402.71875, |
| "epoch": 0.0058206240380579265, |
| "grad_norm": 1.104725268564954, |
| "kl": 0.145263671875, |
| "learning_rate": 4.935687943891447e-06, |
| "loss": 0.0015, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 52 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 477.328125, |
| "epoch": 0.005932559115712887, |
| "grad_norm": 0.5835454079488682, |
| "kl": 0.075927734375, |
| "learning_rate": 4.932182443358458e-06, |
| "loss": 0.1512, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 53 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 580.578125, |
| "epoch": 0.006044494193367847, |
| "grad_norm": 0.49184303981459476, |
| "kl": 0.05926513671875, |
| "learning_rate": 4.928585382820616e-06, |
| "loss": 0.0194, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 54 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 392.046875, |
| "epoch": 0.006156429271022806, |
| "grad_norm": 1.0291704424132635, |
| "kl": 0.1390380859375, |
| "learning_rate": 4.924896913203376e-06, |
| "loss": 0.0102, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 55 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 465.0625, |
| "epoch": 0.006268364348677767, |
| "grad_norm": 0.5613069680179144, |
| "kl": 0.08880615234375, |
| "learning_rate": 4.921117189267535e-06, |
| "loss": 0.0121, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 56 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 480.359375, |
| "epoch": 0.006380299426332727, |
| "grad_norm": 0.7905247960482367, |
| "kl": 0.06884765625, |
| "learning_rate": 4.917246369602742e-06, |
| "loss": 0.0134, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 57 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 393.28125, |
| "epoch": 0.006492234503987687, |
| "grad_norm": 18.99182061239259, |
| "kl": 1.30133056640625, |
| "learning_rate": 4.9132846166208355e-06, |
| "loss": -0.0248, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 58 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 509.59375, |
| "epoch": 0.006604169581642647, |
| "grad_norm": 1.0573356685875819, |
| "kl": 0.06280517578125, |
| "learning_rate": 4.9092320965490365e-06, |
| "loss": 0.0153, |
| "reward": 0.09375, |
| "reward_std": 0.02500000037252903, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 59 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.171875, |
| "epoch": 0.0067161046592976075, |
| "grad_norm": 0.5498420295992223, |
| "kl": 0.070556640625, |
| "learning_rate": 4.905088979422971e-06, |
| "loss": -0.0324, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 60 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.109375, |
| "epoch": 0.006828039736952568, |
| "grad_norm": 0.4563712143777335, |
| "kl": 0.065673828125, |
| "learning_rate": 4.900855439079536e-06, |
| "loss": -0.0263, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 61 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 470.03125, |
| "epoch": 0.006939974814607528, |
| "grad_norm": 0.022676570314052416, |
| "kl": 0.0540771484375, |
| "learning_rate": 4.8965316531496055e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 62 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 442.109375, |
| "epoch": 0.007051909892262488, |
| "grad_norm": 0.37277836211463244, |
| "kl": 0.0537109375, |
| "learning_rate": 4.892117803050578e-06, |
| "loss": -0.0112, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 63 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 407.3125, |
| "epoch": 0.0071638449699174475, |
| "grad_norm": 0.6045549634649411, |
| "kl": 0.06939697265625, |
| "learning_rate": 4.887614073978761e-06, |
| "loss": 0.0378, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 64 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 490.53125, |
| "epoch": 0.007275780047572408, |
| "grad_norm": 0.4804494114616987, |
| "kl": 0.0440673828125, |
| "learning_rate": 4.883020654901609e-06, |
| "loss": 0.0326, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 65 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.625, |
| "epoch": 0.007387715125227368, |
| "grad_norm": 0.02779607160337418, |
| "kl": 0.04901123046875, |
| "learning_rate": 4.878337738549785e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 66 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 391.34375, |
| "epoch": 0.007499650202882328, |
| "grad_norm": 0.019327395350080524, |
| "kl": 0.05145263671875, |
| "learning_rate": 4.873565521409082e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 67 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.140625, |
| "epoch": 0.007611585280537288, |
| "grad_norm": 0.7217823002699405, |
| "kl": 0.07281494140625, |
| "learning_rate": 4.868704203712173e-06, |
| "loss": 0.0201, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 68 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 488.171875, |
| "epoch": 0.007723520358192249, |
| "grad_norm": 0.01773957052813651, |
| "kl": 0.04571533203125, |
| "learning_rate": 4.86375398943021e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 69 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 405.109375, |
| "epoch": 0.007835455435847209, |
| "grad_norm": 0.5495053845625697, |
| "kl": 0.05712890625, |
| "learning_rate": 4.858715086264274e-06, |
| "loss": 0.0313, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 70 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.9375, |
| "epoch": 0.007947390513502168, |
| "grad_norm": 221.95709261316097, |
| "kl": 29.68072509765625, |
| "learning_rate": 4.853587705636646e-06, |
| "loss": 0.4784, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 71 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 328.046875, |
| "epoch": 0.00805932559115713, |
| "grad_norm": 0.6777938636001378, |
| "kl": 0.0565185546875, |
| "learning_rate": 4.84837206268195e-06, |
| "loss": -0.034, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 72 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 283.0, |
| "epoch": 0.008171260668812089, |
| "grad_norm": 0.03603965081169223, |
| "kl": 0.07452392578125, |
| "learning_rate": 4.8430683762381195e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 73 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.4375, |
| "epoch": 0.00828319574646705, |
| "grad_norm": 0.024934251266286088, |
| "kl": 0.06591796875, |
| "learning_rate": 4.837676868837213e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 74 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 395.796875, |
| "epoch": 0.008395130824122009, |
| "grad_norm": 0.025440481130695015, |
| "kl": 0.05645751953125, |
| "learning_rate": 4.832197766696085e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 75 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 448.171875, |
| "epoch": 0.00850706590177697, |
| "grad_norm": 0.053611239059879696, |
| "kl": 0.056640625, |
| "learning_rate": 4.826631299706887e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 76 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 498.859375, |
| "epoch": 0.00861900097943193, |
| "grad_norm": 0.6813369559573024, |
| "kl": 0.05133056640625, |
| "learning_rate": 4.820977701427424e-06, |
| "loss": 0.1191, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 77 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.421875, |
| "epoch": 0.008730936057086889, |
| "grad_norm": 0.7460088285353335, |
| "kl": 0.0655517578125, |
| "learning_rate": 4.81523720907136e-06, |
| "loss": 0.0598, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 78 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.640625, |
| "epoch": 0.00884287113474185, |
| "grad_norm": 0.6085630747158598, |
| "kl": 0.06109619140625, |
| "learning_rate": 4.809410063498254e-06, |
| "loss": 0.0091, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 79 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 373.125, |
| "epoch": 0.00895480621239681, |
| "grad_norm": 0.05060284374443874, |
| "kl": 0.06451416015625, |
| "learning_rate": 4.8034965092034656e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 80 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 479.15625, |
| "epoch": 0.00906674129005177, |
| "grad_norm": 0.020553083133118145, |
| "kl": 0.05169677734375, |
| "learning_rate": 4.797496794307889e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 81 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 383.921875, |
| "epoch": 0.00917867636770673, |
| "grad_norm": 0.02331309838539552, |
| "kl": 0.05242919921875, |
| "learning_rate": 4.791411170547545e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 82 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 470.296875, |
| "epoch": 0.009290611445361691, |
| "grad_norm": 0.8366878780204882, |
| "kl": 0.0616455078125, |
| "learning_rate": 4.785239893263017e-06, |
| "loss": 0.019, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 83 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.609375, |
| "epoch": 0.00940254652301665, |
| "grad_norm": 0.4621690087605406, |
| "kl": 0.07269287109375, |
| "learning_rate": 4.778983221388742e-06, |
| "loss": -0.0043, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 84 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.453125, |
| "epoch": 0.00951448160067161, |
| "grad_norm": 0.7196439160838366, |
| "kl": 0.07952880859375, |
| "learning_rate": 4.77264141744214e-06, |
| "loss": 0.0222, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 85 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.40625, |
| "epoch": 0.00962641667832657, |
| "grad_norm": 0.42630497015032043, |
| "kl": 0.10479736328125, |
| "learning_rate": 4.766214747512603e-06, |
| "loss": -0.0067, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 86 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 301.90625, |
| "epoch": 0.00973835175598153, |
| "grad_norm": 0.0717989075808753, |
| "kl": 0.06146240234375, |
| "learning_rate": 4.759703481250331e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 87 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 514.953125, |
| "epoch": 0.009850286833636491, |
| "grad_norm": 0.5199995504833264, |
| "kl": 0.0433349609375, |
| "learning_rate": 4.753107891855015e-06, |
| "loss": -0.0066, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 88 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 270.765625, |
| "epoch": 0.00996222191129145, |
| "grad_norm": 0.04715436027273259, |
| "kl": 0.07342529296875, |
| "learning_rate": 4.746428256064375e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 89 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 313.0, |
| "epoch": 0.010074156988946412, |
| "grad_norm": 0.03668445511393964, |
| "kl": 0.05902099609375, |
| "learning_rate": 4.7396648541425534e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 90 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 343.015625, |
| "epoch": 0.010186092066601371, |
| "grad_norm": 0.025197578254458317, |
| "kl": 0.080322265625, |
| "learning_rate": 4.732817969868348e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 91 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 337.984375, |
| "epoch": 0.010298027144256332, |
| "grad_norm": 0.027850466760737828, |
| "kl": 0.057373046875, |
| "learning_rate": 4.7258878905233095e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 92 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.03125, |
| "epoch": 0.010409962221911291, |
| "grad_norm": 0.8302898332353863, |
| "kl": 0.06591796875, |
| "learning_rate": 4.718874906879688e-06, |
| "loss": 0.2943, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 93 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 412.859375, |
| "epoch": 0.01052189729956625, |
| "grad_norm": 0.030863660874464384, |
| "kl": 0.052978515625, |
| "learning_rate": 4.711779313188231e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 94 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 377.265625, |
| "epoch": 0.010633832377221212, |
| "grad_norm": 0.04564269693547498, |
| "kl": 0.06561279296875, |
| "learning_rate": 4.70460140716584e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 95 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 372.734375, |
| "epoch": 0.010745767454876171, |
| "grad_norm": 1.0393244090787366, |
| "kl": 0.06671142578125, |
| "learning_rate": 4.697341489983076e-06, |
| "loss": 0.0811, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 96 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 362.25, |
| "epoch": 0.010857702532531132, |
| "grad_norm": 0.023478872872016283, |
| "kl": 0.0535888671875, |
| "learning_rate": 4.6899998662515215e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 97 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 373.90625, |
| "epoch": 0.010969637610186092, |
| "grad_norm": 0.019758461356285777, |
| "kl": 0.0531005859375, |
| "learning_rate": 4.682576844011007e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 98 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 395.90625, |
| "epoch": 0.011081572687841053, |
| "grad_norm": 0.01976638212707949, |
| "kl": 0.0645751953125, |
| "learning_rate": 4.675072734716678e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 99 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.859375, |
| "epoch": 0.011193507765496012, |
| "grad_norm": 2.002878750503418, |
| "kl": 0.0760498046875, |
| "learning_rate": 4.667487853225931e-06, |
| "loss": 0.3922, |
| "reward": 0.09531250037252903, |
| "reward_std": 0.018750000279396772, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 100 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 477.859375, |
| "epoch": 0.011305442843150973, |
| "grad_norm": 0.6672407190007655, |
| "kl": 0.05157470703125, |
| "learning_rate": 4.659822517785203e-06, |
| "loss": 0.1641, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 101 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.8125, |
| "epoch": 0.011417377920805933, |
| "grad_norm": 0.9439619070630538, |
| "kl": 0.061767578125, |
| "learning_rate": 4.6520770500166165e-06, |
| "loss": 0.3783, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 102 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 539.90625, |
| "epoch": 0.011529312998460892, |
| "grad_norm": 0.7519670307168462, |
| "kl": 0.05535888671875, |
| "learning_rate": 4.644251774904487e-06, |
| "loss": 0.0952, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 103 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 396.53125, |
| "epoch": 0.011641248076115853, |
| "grad_norm": 1.2637256595006654, |
| "kl": 0.160400390625, |
| "learning_rate": 4.636347020781684e-06, |
| "loss": -0.0378, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 104 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.40625, |
| "epoch": 0.011753183153770812, |
| "grad_norm": 0.9414284114713096, |
| "kl": 0.0699462890625, |
| "learning_rate": 4.6283631193158605e-06, |
| "loss": -0.0089, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 105 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 404.9375, |
| "epoch": 0.011865118231425774, |
| "grad_norm": 0.15424717287769374, |
| "kl": 0.0926513671875, |
| "learning_rate": 4.620300405495532e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 106 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.765625, |
| "epoch": 0.011977053309080733, |
| "grad_norm": 0.6366716085256641, |
| "kl": 0.0911865234375, |
| "learning_rate": 4.612159217616022e-06, |
| "loss": 0.0133, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 107 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 385.484375, |
| "epoch": 0.012088988386735694, |
| "grad_norm": 0.020907253695270293, |
| "kl": 0.06060791015625, |
| "learning_rate": 4.603939897265268e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 108 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.453125, |
| "epoch": 0.012200923464390653, |
| "grad_norm": 0.024537257651566672, |
| "kl": 0.06396484375, |
| "learning_rate": 4.595642789309492e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 109 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.421875, |
| "epoch": 0.012312858542045613, |
| "grad_norm": 0.02438549022281726, |
| "kl": 0.06048583984375, |
| "learning_rate": 4.587268241878724e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 110 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.796875, |
| "epoch": 0.012424793619700574, |
| "grad_norm": 0.5942167965684412, |
| "kl": 0.0833740234375, |
| "learning_rate": 4.578816606352205e-06, |
| "loss": 0.0065, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 111 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.953125, |
| "epoch": 0.012536728697355533, |
| "grad_norm": 0.023256524966985128, |
| "kl": 0.05865478515625, |
| "learning_rate": 4.570288237343632e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 112 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.203125, |
| "epoch": 0.012648663775010494, |
| "grad_norm": 0.4917785489739923, |
| "kl": 0.0616455078125, |
| "learning_rate": 4.561683492686289e-06, |
| "loss": 0.0131, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 113 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.609375, |
| "epoch": 0.012760598852665454, |
| "grad_norm": 0.4189078075428464, |
| "kl": 0.06109619140625, |
| "learning_rate": 4.5530027334180285e-06, |
| "loss": -0.0367, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 114 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.71875, |
| "epoch": 0.012872533930320415, |
| "grad_norm": 0.03244601571119012, |
| "kl": 0.0718994140625, |
| "learning_rate": 4.544246323766122e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 115 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.453125, |
| "epoch": 0.012984469007975374, |
| "grad_norm": 0.04077561657409853, |
| "kl": 0.0740966796875, |
| "learning_rate": 4.535414631131983e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 116 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 259.078125, |
| "epoch": 0.013096404085630335, |
| "grad_norm": 0.04561395129255693, |
| "kl": 0.0780029296875, |
| "learning_rate": 4.526508026075746e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 117 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.875, |
| "epoch": 0.013208339163285295, |
| "grad_norm": 0.032666442461546756, |
| "kl": 0.071533203125, |
| "learning_rate": 4.517526882300721e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 118 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 445.484375, |
| "epoch": 0.013320274240940254, |
| "grad_norm": 0.030745081226969093, |
| "kl": 0.0496826171875, |
| "learning_rate": 4.508471576637713e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 119 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 410.46875, |
| "epoch": 0.013432209318595215, |
| "grad_norm": 0.025825744807750583, |
| "kl": 0.0751953125, |
| "learning_rate": 4.499342489029211e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 120 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 261.15625, |
| "epoch": 0.013544144396250174, |
| "grad_norm": 0.027055040661668105, |
| "kl": 0.071533203125, |
| "learning_rate": 4.490140002513449e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 121 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 503.46875, |
| "epoch": 0.013656079473905135, |
| "grad_norm": 0.9837824917503732, |
| "kl": 0.05841064453125, |
| "learning_rate": 4.48086450320833e-06, |
| "loss": 0.156, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 122 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 551.9375, |
| "epoch": 0.013768014551560095, |
| "grad_norm": 0.36909382007632124, |
| "kl": 0.0616455078125, |
| "learning_rate": 4.4715163802952266e-06, |
| "loss": 0.034, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 123 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 311.875, |
| "epoch": 0.013879949629215056, |
| "grad_norm": 0.02289685144542213, |
| "kl": 0.0621337890625, |
| "learning_rate": 4.462096026002655e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 349.609375, |
| "epoch": 0.013991884706870015, |
| "grad_norm": 0.5288400884276316, |
| "kl": 0.06207275390625, |
| "learning_rate": 4.4526038355898144e-06, |
| "loss": -0.0128, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 125 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.234375, |
| "epoch": 0.014103819784524976, |
| "grad_norm": 0.019666228568501372, |
| "kl": 0.056396484375, |
| "learning_rate": 4.4430402073300035e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 126 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.671875, |
| "epoch": 0.014215754862179936, |
| "grad_norm": 0.022162440604770836, |
| "kl": 0.0638427734375, |
| "learning_rate": 4.433405542493909e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 127 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.328125, |
| "epoch": 0.014327689939834895, |
| "grad_norm": 0.5613568768967913, |
| "kl": 0.06121826171875, |
| "learning_rate": 4.4237002453327734e-06, |
| "loss": -0.0001, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 128 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.328125, |
| "epoch": 0.014439625017489856, |
| "grad_norm": 0.025068232916270288, |
| "kl": 0.06744384765625, |
| "learning_rate": 4.4139247230614245e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 129 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 407.34375, |
| "epoch": 0.014551560095144815, |
| "grad_norm": 0.018723191179922716, |
| "kl": 0.05828857421875, |
| "learning_rate": 4.404079385841201e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 130 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 265.3125, |
| "epoch": 0.014663495172799777, |
| "grad_norm": 0.16972024878699354, |
| "kl": 0.09710693359375, |
| "learning_rate": 4.394164646762734e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 131 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.984375, |
| "epoch": 0.014775430250454736, |
| "grad_norm": 0.0237436227671808, |
| "kl": 0.06884765625, |
| "learning_rate": 4.384180921828618e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 132 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.296875, |
| "epoch": 0.014887365328109697, |
| "grad_norm": 0.6392088951882646, |
| "kl": 0.05865478515625, |
| "learning_rate": 4.374128629935955e-06, |
| "loss": 0.1335, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 133 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 371.859375, |
| "epoch": 0.014999300405764656, |
| "grad_norm": 0.0258451541588282, |
| "kl": 0.0711669921875, |
| "learning_rate": 4.364008192858781e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 134 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.5625, |
| "epoch": 0.015111235483419617, |
| "grad_norm": 0.023614977303173818, |
| "kl": 0.0672607421875, |
| "learning_rate": 4.353820035230366e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 135 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 466.734375, |
| "epoch": 0.015223170561074577, |
| "grad_norm": 0.019070024346192215, |
| "kl": 0.066162109375, |
| "learning_rate": 4.3435645845254e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 136 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 504.890625, |
| "epoch": 0.015335105638729536, |
| "grad_norm": 0.37696582370006476, |
| "kl": 0.06463623046875, |
| "learning_rate": 4.333242271042054e-06, |
| "loss": -0.0004, |
| "reward": 0.09687500260770321, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 137 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 567.234375, |
| "epoch": 0.015447040716384497, |
| "grad_norm": 3.0390479634412357, |
| "kl": 0.071533203125, |
| "learning_rate": 4.32285352788393e-06, |
| "loss": 0.2188, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 138 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 457.21875, |
| "epoch": 0.015558975794039457, |
| "grad_norm": 1.8150343506782198, |
| "kl": 0.106689453125, |
| "learning_rate": 4.312398790941882e-06, |
| "loss": 0.3003, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 139 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 609.28125, |
| "epoch": 0.015670910871694418, |
| "grad_norm": 0.03462843424405208, |
| "kl": 0.05792236328125, |
| "learning_rate": 4.301878498875735e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 140 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 473.421875, |
| "epoch": 0.015782845949349377, |
| "grad_norm": 1.7729469082974438, |
| "kl": 0.06402587890625, |
| "learning_rate": 4.291293093095873e-06, |
| "loss": 0.1156, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 141 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 440.90625, |
| "epoch": 0.015894781027004336, |
| "grad_norm": 0.678889245969432, |
| "kl": 0.0787353515625, |
| "learning_rate": 4.280643017744723e-06, |
| "loss": 0.0363, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 142 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 615.828125, |
| "epoch": 0.0160067161046593, |
| "grad_norm": 3.1600108504474265, |
| "kl": 0.103759765625, |
| "learning_rate": 4.269928719678117e-06, |
| "loss": 0.2578, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 143 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 383.375, |
| "epoch": 0.01611865118231426, |
| "grad_norm": 0.13214155840655448, |
| "kl": 0.101318359375, |
| "learning_rate": 4.2591506484465426e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 144 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.90625, |
| "epoch": 0.016230586259969218, |
| "grad_norm": 0.7194283098737337, |
| "kl": 0.08380126953125, |
| "learning_rate": 4.248309256276283e-06, |
| "loss": -0.0069, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 145 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 480.046875, |
| "epoch": 0.016342521337624177, |
| "grad_norm": 0.10784588867048772, |
| "kl": 0.0704345703125, |
| "learning_rate": 4.23740499805044e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 146 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.234375, |
| "epoch": 0.016454456415279137, |
| "grad_norm": 1.2992200606773725, |
| "kl": 0.1204833984375, |
| "learning_rate": 4.22643833128985e-06, |
| "loss": 0.0012, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 147 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 430.171875, |
| "epoch": 0.0165663914929341, |
| "grad_norm": 16.491496062528398, |
| "kl": 0.4547119140625, |
| "learning_rate": 4.215409716133885e-06, |
| "loss": 0.131, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 148 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 471.171875, |
| "epoch": 0.01667832657058906, |
| "grad_norm": 4.096997318208475, |
| "kl": 0.2073974609375, |
| "learning_rate": 4.204319615321151e-06, |
| "loss": 0.0021, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 149 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 423.59375, |
| "epoch": 0.016790261648244018, |
| "grad_norm": 3.910798929312304, |
| "kl": 0.2662353515625, |
| "learning_rate": 4.193168494170065e-06, |
| "loss": 0.2077, |
| "reward": 0.09218750335276127, |
| "reward_std": 0.01861694734543562, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 150 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 388.78125, |
| "epoch": 0.016902196725898978, |
| "grad_norm": 13.951120691377671, |
| "kl": 0.37646484375, |
| "learning_rate": 4.181956820559339e-06, |
| "loss": 0.5985, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 151 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 348.0, |
| "epoch": 0.01701413180355394, |
| "grad_norm": 5.2519598128920455, |
| "kl": 0.1241455078125, |
| "learning_rate": 4.170685064908342e-06, |
| "loss": 0.2291, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 152 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 459.453125, |
| "epoch": 0.0171260668812089, |
| "grad_norm": 10.132405471868221, |
| "kl": 0.1990966796875, |
| "learning_rate": 4.159353700157365e-06, |
| "loss": 0.1752, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 153 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.171875, |
| "epoch": 0.01723800195886386, |
| "grad_norm": 0.784730424677537, |
| "kl": 0.0968017578125, |
| "learning_rate": 4.14796320174778e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 154 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 368.65625, |
| "epoch": 0.01734993703651882, |
| "grad_norm": 27.02725934627477, |
| "kl": 0.109619140625, |
| "learning_rate": 4.136514047602087e-06, |
| "loss": 0.1772, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 155 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 477.296875, |
| "epoch": 0.017461872114173778, |
| "grad_norm": 14.370055706061692, |
| "kl": 0.1593017578125, |
| "learning_rate": 4.1250067181038635e-06, |
| "loss": 0.2029, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 156 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 480.53125, |
| "epoch": 0.01757380719182874, |
| "grad_norm": 13.084686861766809, |
| "kl": 0.1204833984375, |
| "learning_rate": 4.113441696077608e-06, |
| "loss": 0.1918, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 157 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.359375, |
| "epoch": 0.0176857422694837, |
| "grad_norm": 0.6843422318132463, |
| "kl": 0.07861328125, |
| "learning_rate": 4.101819466768484e-06, |
| "loss": 0.017, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 158 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.34375, |
| "epoch": 0.01779767734713866, |
| "grad_norm": 6.203392924309637, |
| "kl": 0.2252197265625, |
| "learning_rate": 4.0901405178219535e-06, |
| "loss": -0.0466, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 159 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 404.71875, |
| "epoch": 0.01790961242479362, |
| "grad_norm": 0.7482548079571155, |
| "kl": 0.15234375, |
| "learning_rate": 4.078405339263326e-06, |
| "loss": 0.0015, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 160 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 392.734375, |
| "epoch": 0.018021547502448578, |
| "grad_norm": 0.821487756754832, |
| "kl": 0.095458984375, |
| "learning_rate": 4.06661442347719e-06, |
| "loss": 0.008, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 161 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.734375, |
| "epoch": 0.01813348258010354, |
| "grad_norm": 0.25693644980051783, |
| "kl": 0.1165771484375, |
| "learning_rate": 4.054768265186758e-06, |
| "loss": 0.0012, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 162 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 336.859375, |
| "epoch": 0.0182454176577585, |
| "grad_norm": 0.3151740457382109, |
| "kl": 0.0853271484375, |
| "learning_rate": 4.0428673614331036e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 163 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 354.203125, |
| "epoch": 0.01835735273541346, |
| "grad_norm": 0.2872706706321094, |
| "kl": 0.090087890625, |
| "learning_rate": 4.030912211554316e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 164 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 337.90625, |
| "epoch": 0.01846928781306842, |
| "grad_norm": 0.11020779139062825, |
| "kl": 0.0782470703125, |
| "learning_rate": 4.018903317164539e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 165 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 346.25, |
| "epoch": 0.018581222890723382, |
| "grad_norm": 0.045653419133126164, |
| "kl": 0.0740966796875, |
| "learning_rate": 4.006841182132932e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 166 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 343.359375, |
| "epoch": 0.01869315796837834, |
| "grad_norm": 0.021075436862415513, |
| "kl": 0.06011962890625, |
| "learning_rate": 3.9947263125625195e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 167 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 263.21875, |
| "epoch": 0.0188050930460333, |
| "grad_norm": 0.04494777486555804, |
| "kl": 0.07147216796875, |
| "learning_rate": 3.982559216768967e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 168 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 339.40625, |
| "epoch": 0.01891702812368826, |
| "grad_norm": 0.018822857218736996, |
| "kl": 0.0582275390625, |
| "learning_rate": 3.970340405259245e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 169 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 350.8125, |
| "epoch": 0.01902896320134322, |
| "grad_norm": 0.023628578386486077, |
| "kl": 0.07000732421875, |
| "learning_rate": 3.958070390710214e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 170 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.921875, |
| "epoch": 0.019140898278998182, |
| "grad_norm": 0.023811978335883294, |
| "kl": 0.0609130859375, |
| "learning_rate": 3.945749687947109e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 171 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 300.046875, |
| "epoch": 0.01925283335665314, |
| "grad_norm": 0.8750934577120364, |
| "kl": 0.07550048828125, |
| "learning_rate": 3.933378813921942e-06, |
| "loss": 0.013, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 172 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 335.546875, |
| "epoch": 0.0193647684343081, |
| "grad_norm": 0.6477727504447495, |
| "kl": 0.071044921875, |
| "learning_rate": 3.920958287691811e-06, |
| "loss": -0.0026, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 173 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 268.5, |
| "epoch": 0.01947670351196306, |
| "grad_norm": 27.871539574475218, |
| "kl": 0.44085693359375, |
| "learning_rate": 3.908488630397121e-06, |
| "loss": -0.0071, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 174 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 328.78125, |
| "epoch": 0.019588638589618023, |
| "grad_norm": 0.057425424775596354, |
| "kl": 0.06646728515625, |
| "learning_rate": 3.8959703652397175e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 175 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.21875, |
| "epoch": 0.019700573667272982, |
| "grad_norm": 0.02368246574314423, |
| "kl": 0.055419921875, |
| "learning_rate": 3.883404017460935e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 176 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 320.234375, |
| "epoch": 0.019812508744927942, |
| "grad_norm": 0.03358527901376269, |
| "kl": 0.05792236328125, |
| "learning_rate": 3.870790114319559e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 177 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 249.78125, |
| "epoch": 0.0199244438225829, |
| "grad_norm": 0.5392466815020299, |
| "kl": 0.06982421875, |
| "learning_rate": 3.858129185069701e-06, |
| "loss": -0.0209, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 178 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 322.09375, |
| "epoch": 0.02003637890023786, |
| "grad_norm": 0.17046312215041995, |
| "kl": 0.08056640625, |
| "learning_rate": 3.845421760938597e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 179 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 343.234375, |
| "epoch": 0.020148313977892823, |
| "grad_norm": 0.05778936711697761, |
| "kl": 0.05181884765625, |
| "learning_rate": 3.832668375104312e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 180 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 328.75, |
| "epoch": 0.020260249055547783, |
| "grad_norm": 0.9368586862857481, |
| "kl": 0.0615234375, |
| "learning_rate": 3.8198695626733725e-06, |
| "loss": -0.0006, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 181 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 303.6875, |
| "epoch": 0.020372184133202742, |
| "grad_norm": 0.1463135701740714, |
| "kl": 0.05364990234375, |
| "learning_rate": 3.8070258606583156e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 182 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 268.671875, |
| "epoch": 0.0204841192108577, |
| "grad_norm": 24.196429667374314, |
| "kl": 0.20501708984375, |
| "learning_rate": 3.7941378079551544e-06, |
| "loss": 0.0021, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 183 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 350.625, |
| "epoch": 0.020596054288512664, |
| "grad_norm": 0.7575728537579188, |
| "kl": 0.067138671875, |
| "learning_rate": 3.7812059453207677e-06, |
| "loss": -0.0088, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 184 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.71875, |
| "epoch": 0.020707989366167624, |
| "grad_norm": 1.5781775679619348, |
| "kl": 0.129150390625, |
| "learning_rate": 3.768230815350213e-06, |
| "loss": -0.0091, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 384.6875, |
| "epoch": 0.020819924443822583, |
| "grad_norm": 3.7258182746280184, |
| "kl": 0.70550537109375, |
| "learning_rate": 3.7552129624539557e-06, |
| "loss": 0.2283, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 186 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 449.1875, |
| "epoch": 0.020931859521477542, |
| "grad_norm": 23.600980161646945, |
| "kl": 2.8865966796875, |
| "learning_rate": 3.7421529328350316e-06, |
| "loss": 0.2557, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 187 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.265625, |
| "epoch": 0.0210437945991325, |
| "grad_norm": 36.37207086101098, |
| "kl": 1.830078125, |
| "learning_rate": 3.7290512744661274e-06, |
| "loss": 0.3201, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 188 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 396.90625, |
| "epoch": 0.021155729676787464, |
| "grad_norm": 20.154618516530974, |
| "kl": 0.8580322265625, |
| "learning_rate": 3.715908537066589e-06, |
| "loss": 0.236, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 189 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 371.265625, |
| "epoch": 0.021267664754442424, |
| "grad_norm": 1.0506630322756636, |
| "kl": 0.18212890625, |
| "learning_rate": 3.7027252720793538e-06, |
| "loss": -0.0025, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 190 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 290.59375, |
| "epoch": 0.021379599832097383, |
| "grad_norm": 0.10894834856423846, |
| "kl": 0.0850830078125, |
| "learning_rate": 3.689502032647817e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 191 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.421875, |
| "epoch": 0.021491534909752343, |
| "grad_norm": 0.070012885417872, |
| "kl": 0.0955810546875, |
| "learning_rate": 3.6762393735926245e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 192 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 288.953125, |
| "epoch": 0.021603469987407305, |
| "grad_norm": 0.055813531691409256, |
| "kl": 0.068603515625, |
| "learning_rate": 3.6629378513883852e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 193 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.171875, |
| "epoch": 0.021715405065062265, |
| "grad_norm": 52.57320558278293, |
| "kl": 0.1983642578125, |
| "learning_rate": 3.6495980241403307e-06, |
| "loss": 0.3357, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 194 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 291.78125, |
| "epoch": 0.021827340142717224, |
| "grad_norm": 0.04396489484628073, |
| "kl": 0.0579833984375, |
| "learning_rate": 3.636220451560896e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 390.78125, |
| "epoch": 0.021939275220372183, |
| "grad_norm": 1.0092735457153308, |
| "kl": 0.08642578125, |
| "learning_rate": 3.622805694946235e-06, |
| "loss": 0.0163, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 196 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.9375, |
| "epoch": 0.022051210298027143, |
| "grad_norm": 0.11501833373142471, |
| "kl": 0.07763671875, |
| "learning_rate": 3.609354317152667e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 197 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 354.546875, |
| "epoch": 0.022163145375682106, |
| "grad_norm": 0.24707486499522355, |
| "kl": 0.0819091796875, |
| "learning_rate": 3.595866882573063e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 198 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 307.640625, |
| "epoch": 0.022275080453337065, |
| "grad_norm": 27.72743469845055, |
| "kl": 0.6578369140625, |
| "learning_rate": 3.5823439571131675e-06, |
| "loss": 0.0387, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 297.953125, |
| "epoch": 0.022387015530992024, |
| "grad_norm": 4.257281648957348, |
| "kl": 0.2021484375, |
| "learning_rate": 3.5687861081678477e-06, |
| "loss": 0.002, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 200 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 406.703125, |
| "epoch": 0.022498950608646984, |
| "grad_norm": 1.991901860771057, |
| "kl": 0.127197265625, |
| "learning_rate": 3.555193904597291e-06, |
| "loss": -0.0177, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 201 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 360.75, |
| "epoch": 0.022610885686301947, |
| "grad_norm": 22.923764220941024, |
| "kl": 0.1807861328125, |
| "learning_rate": 3.541567916703138e-06, |
| "loss": 0.1058, |
| "reward": 0.09687500260770321, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 202 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.609375, |
| "epoch": 0.022722820763956906, |
| "grad_norm": 148.13942760303817, |
| "kl": 10.0704345703125, |
| "learning_rate": 3.5279087162045517e-06, |
| "loss": 0.3985, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 203 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 312.625, |
| "epoch": 0.022834755841611865, |
| "grad_norm": 5.396743744887109, |
| "kl": 0.3883056640625, |
| "learning_rate": 3.5142168762142265e-06, |
| "loss": -0.0111, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 204 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 409.84375, |
| "epoch": 0.022946690919266825, |
| "grad_norm": 2770.9102363815387, |
| "kl": 340.0640869140625, |
| "learning_rate": 3.500492971214347e-06, |
| "loss": 3.6234, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 205 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 254.1875, |
| "epoch": 0.023058625996921784, |
| "grad_norm": 11.015389916640636, |
| "kl": 1.8922119140625, |
| "learning_rate": 3.48673757703248e-06, |
| "loss": -0.0028, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 206 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 350.78125, |
| "epoch": 0.023170561074576747, |
| "grad_norm": 1.8419760739328712, |
| "kl": 0.1317138671875, |
| "learning_rate": 3.472951270817418e-06, |
| "loss": -0.0237, |
| "reward": 0.09062500298023224, |
| "reward_std": 0.024866947438567877, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.90625, |
| "step": 207 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 384.078125, |
| "epoch": 0.023282496152231706, |
| "grad_norm": 1.681815519531453, |
| "kl": 0.16943359375, |
| "learning_rate": 3.4591346310149578e-06, |
| "loss": -0.0704, |
| "reward": 0.08906250260770321, |
| "reward_std": 0.025969466660171747, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.890625, |
| "step": 208 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 265.984375, |
| "epoch": 0.023394431229886666, |
| "grad_norm": 4.149140370325476, |
| "kl": 0.343994140625, |
| "learning_rate": 3.445288237343632e-06, |
| "loss": 0.0316, |
| "reward": 0.08906250260770321, |
| "reward_std": 0.031116947531700134, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.890625, |
| "step": 209 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.3125, |
| "epoch": 0.023506366307541625, |
| "grad_norm": 7.97862155393176, |
| "kl": 0.59326171875, |
| "learning_rate": 3.4314126707703895e-06, |
| "loss": -0.0088, |
| "reward": 0.0937500037252903, |
| "reward_std": 0.017078250646591187, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 210 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 410.84375, |
| "epoch": 0.023618301385196588, |
| "grad_norm": 37.00032182098118, |
| "kl": 3.810546875, |
| "learning_rate": 3.4175085134862128e-06, |
| "loss": 0.1349, |
| "reward": 0.08281250484287739, |
| "reward_std": 0.03833641391247511, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.828125, |
| "step": 211 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 348.96875, |
| "epoch": 0.023730236462851547, |
| "grad_norm": 78.79622548097791, |
| "kl": 3.765380859375, |
| "learning_rate": 3.4035763488816953e-06, |
| "loss": 0.2076, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 212 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.921875, |
| "epoch": 0.023842171540506506, |
| "grad_norm": 10.816304901178587, |
| "kl": 3.8818359375, |
| "learning_rate": 3.3896167615225594e-06, |
| "loss": 0.1445, |
| "reward": 0.08906250260770321, |
| "reward_std": 0.025969465728849173, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.890625, |
| "step": 213 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.6875, |
| "epoch": 0.023954106618161466, |
| "grad_norm": 13.522641035708572, |
| "kl": 4.187744140625, |
| "learning_rate": 3.375630337125133e-06, |
| "loss": 0.0886, |
| "reward": 0.09062500111758709, |
| "reward_std": 0.020155644044280052, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.90625, |
| "step": 214 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 292.984375, |
| "epoch": 0.024066041695816425, |
| "grad_norm": 9.482318398692025, |
| "kl": 5.662109375, |
| "learning_rate": 3.361617662531772e-06, |
| "loss": 0.135, |
| "reward": 0.09062500298023224, |
| "reward_std": 0.0295782508328557, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.90625, |
| "step": 215 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 330.234375, |
| "epoch": 0.024177976773471388, |
| "grad_norm": 9.298647150834725, |
| "kl": 1.287109375, |
| "learning_rate": 3.347579325686237e-06, |
| "loss": -0.0025, |
| "reward": 0.09218750335276127, |
| "reward_std": 0.01861694734543562, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 216 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 321.375, |
| "epoch": 0.024289911851126347, |
| "grad_norm": 9.33505130284713, |
| "kl": 2.80908203125, |
| "learning_rate": 3.333515915609027e-06, |
| "loss": 0.0542, |
| "reward": 0.09218750335276127, |
| "reward_std": 0.01861694734543562, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 217 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 364.453125, |
| "epoch": 0.024401846928781307, |
| "grad_norm": 46.939949673345936, |
| "kl": 7.6962890625, |
| "learning_rate": 3.3194280223726616e-06, |
| "loss": 0.1244, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 218 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 389.140625, |
| "epoch": 0.024513782006436266, |
| "grad_norm": 8.965046605455813, |
| "kl": 1.474365234375, |
| "learning_rate": 3.305316237076927e-06, |
| "loss": 0.0928, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 219 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.921875, |
| "epoch": 0.024625717084091225, |
| "grad_norm": 30.26772960669773, |
| "kl": 3.83154296875, |
| "learning_rate": 3.291181151824071e-06, |
| "loss": 0.0081, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.011180340312421322, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 220 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 431.640625, |
| "epoch": 0.024737652161746188, |
| "grad_norm": 9.50042126895199, |
| "kl": 0.977294921875, |
| "learning_rate": 3.27702335969396e-06, |
| "loss": -0.0179, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 221 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.90625, |
| "epoch": 0.024849587239401148, |
| "grad_norm": 8.200151360167519, |
| "kl": 0.697509765625, |
| "learning_rate": 3.2628434547191985e-06, |
| "loss": -0.037, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 222 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 305.859375, |
| "epoch": 0.024961522317056107, |
| "grad_norm": 18.01832349548423, |
| "kl": 0.634033203125, |
| "learning_rate": 3.2486420318601973e-06, |
| "loss": -0.0207, |
| "reward": 0.09218750335276127, |
| "reward_std": 0.01861694734543562, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 223 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 368.5, |
| "epoch": 0.025073457394711066, |
| "grad_norm": 99.84487944919411, |
| "kl": 4.774169921875, |
| "learning_rate": 3.2344196869802187e-06, |
| "loss": -0.0292, |
| "reward": 0.09218750149011612, |
| "reward_std": 0.02257782220840454, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 224 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 553.921875, |
| "epoch": 0.02518539247236603, |
| "grad_norm": 4.540807623552518, |
| "kl": 0.2640380859375, |
| "learning_rate": 3.2201770168203694e-06, |
| "loss": 0.0409, |
| "reward": 0.09531250037252903, |
| "reward_std": 0.018750000279396772, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 225 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 470.421875, |
| "epoch": 0.02529732755002099, |
| "grad_norm": 1.9006028901018341, |
| "kl": 0.2882080078125, |
| "learning_rate": 3.205914618974563e-06, |
| "loss": -0.0101, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 226 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.5, |
| "epoch": 0.025409262627675948, |
| "grad_norm": 8.497076561424455, |
| "kl": 1.46826171875, |
| "learning_rate": 3.1916330918644496e-06, |
| "loss": 0.0076, |
| "reward": 0.0937500037252903, |
| "reward_std": 0.017078250646591187, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 227 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 398.203125, |
| "epoch": 0.025521197705330907, |
| "grad_norm": 6.49286525909613, |
| "kl": 0.5, |
| "learning_rate": 3.177333034714303e-06, |
| "loss": 0.0436, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 228 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.09375, |
| "epoch": 0.025633132782985867, |
| "grad_norm": 99.57124601550895, |
| "kl": 13.4130859375, |
| "learning_rate": 3.1630150475258813e-06, |
| "loss": 0.1554, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 229 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.78125, |
| "epoch": 0.02574506786064083, |
| "grad_norm": 49.128639521298574, |
| "kl": 7.0673828125, |
| "learning_rate": 3.148679731053252e-06, |
| "loss": 0.0762, |
| "reward": 0.09218750335276127, |
| "reward_std": 0.023328250739723444, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.921875, |
| "step": 230 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.09375, |
| "epoch": 0.02585700293829579, |
| "grad_norm": 367.82367126923293, |
| "kl": 26.0625, |
| "learning_rate": 3.1343276867775805e-06, |
| "loss": 0.19, |
| "reward": 0.09531250037252903, |
| "reward_std": 0.018750000279396772, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 231 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.828125, |
| "epoch": 0.025968938015950748, |
| "grad_norm": 2760.034913740409, |
| "kl": 32.2548828125, |
| "learning_rate": 3.1199595168819043e-06, |
| "loss": 0.4045, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 232 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 421.625, |
| "epoch": 0.026080873093605707, |
| "grad_norm": 24.759326550765813, |
| "kl": 2.56591796875, |
| "learning_rate": 3.105575824225852e-06, |
| "loss": 0.1236, |
| "reward": 0.09531250037252903, |
| "reward_std": 0.018750000279396772, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 233 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.453125, |
| "epoch": 0.02619280817126067, |
| "grad_norm": 11.271203444155871, |
| "kl": 0.311767578125, |
| "learning_rate": 3.091177212320363e-06, |
| "loss": 0.0506, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 234 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 294.984375, |
| "epoch": 0.02630474324891563, |
| "grad_norm": 60.98662683249761, |
| "kl": 6.624267578125, |
| "learning_rate": 3.0767642853023538e-06, |
| "loss": 0.0659, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 235 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 387.59375, |
| "epoch": 0.02641667832657059, |
| "grad_norm": 8.237085005507922, |
| "kl": 0.9478759765625, |
| "learning_rate": 3.062337647909376e-06, |
| "loss": -0.0391, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 236 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 260.09375, |
| "epoch": 0.02652861340422555, |
| "grad_norm": 18.57477238550014, |
| "kl": 2.2236328125, |
| "learning_rate": 3.04789790545424e-06, |
| "loss": 0.0015, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 237 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 322.015625, |
| "epoch": 0.026640548481880508, |
| "grad_norm": 16.627244952129402, |
| "kl": 0.564453125, |
| "learning_rate": 3.033445663799621e-06, |
| "loss": 0.0793, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 238 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 315.96875, |
| "epoch": 0.02675248355953547, |
| "grad_norm": 54.633519018974916, |
| "kl": 4.1083984375, |
| "learning_rate": 3.018981529332633e-06, |
| "loss": 0.1508, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 239 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.375, |
| "epoch": 0.02686441863719043, |
| "grad_norm": 8.038755997888481, |
| "kl": 3.0703125, |
| "learning_rate": 3.00450610893939e-06, |
| "loss": 0.0033, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 240 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.46875, |
| "epoch": 0.02697635371484539, |
| "grad_norm": 12.489323661336277, |
| "kl": 1.4534912109375, |
| "learning_rate": 2.9900200099795396e-06, |
| "loss": 0.0417, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 241 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.390625, |
| "epoch": 0.02708828879250035, |
| "grad_norm": 5.250434377988738, |
| "kl": 1.822265625, |
| "learning_rate": 2.9755238402607826e-06, |
| "loss": -0.0469, |
| "reward": 0.0937500037252903, |
| "reward_std": 0.017078250646591187, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 242 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.125, |
| "epoch": 0.02720022387015531, |
| "grad_norm": 57.97881368696394, |
| "kl": 5.73095703125, |
| "learning_rate": 2.961018208013367e-06, |
| "loss": 0.2336, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 243 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 264.375, |
| "epoch": 0.02731215894781027, |
| "grad_norm": 49.75089750933376, |
| "kl": 4.38427734375, |
| "learning_rate": 2.9465037218645694e-06, |
| "loss": 0.0314, |
| "reward": 0.09375000186264515, |
| "reward_std": 0.021039125509560108, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.9375, |
| "step": 244 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 322.0625, |
| "epoch": 0.02742409402546523, |
| "grad_norm": 1.707430633827994, |
| "kl": 0.204833984375, |
| "learning_rate": 2.9319809908131604e-06, |
| "loss": 0.002, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 245 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.109375, |
| "epoch": 0.02753602910312019, |
| "grad_norm": 13.47781256474575, |
| "kl": 0.590576171875, |
| "learning_rate": 2.917450624203847e-06, |
| "loss": 0.0719, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 246 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.0625, |
| "epoch": 0.02764796418077515, |
| "grad_norm": 1680.1301953567038, |
| "kl": 79.1229248046875, |
| "learning_rate": 2.9029132317017118e-06, |
| "loss": 0.7462, |
| "reward": 0.09687500260770321, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 247 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.234375, |
| "epoch": 0.02775989925843011, |
| "grad_norm": 0.6798016263852162, |
| "kl": 0.1409912109375, |
| "learning_rate": 2.888369423266629e-06, |
| "loss": 0.0014, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 248 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.953125, |
| "epoch": 0.02787183433608507, |
| "grad_norm": 5.926518387479333, |
| "kl": 0.63525390625, |
| "learning_rate": 2.8738198091276712e-06, |
| "loss": -0.0057, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 249 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.359375, |
| "epoch": 0.02798376941374003, |
| "grad_norm": 73.88428710060259, |
| "kl": 6.923828125, |
| "learning_rate": 2.859264999757509e-06, |
| "loss": 0.2651, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 250 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 413.0625, |
| "epoch": 0.02809570449139499, |
| "grad_norm": 19.57610821275727, |
| "kl": 1.18603515625, |
| "learning_rate": 2.8447056058467928e-06, |
| "loss": -0.0419, |
| "reward": 0.09531250223517418, |
| "reward_std": 0.01478912541642785, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.953125, |
| "step": 251 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.59375, |
| "epoch": 0.028207639569049953, |
| "grad_norm": 15.02657989678683, |
| "kl": 0.35986328125, |
| "learning_rate": 2.830142238278531e-06, |
| "loss": 0.0012, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 252 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.203125, |
| "epoch": 0.028319574646704912, |
| "grad_norm": 2.875721583070933, |
| "kl": 0.475341796875, |
| "learning_rate": 2.81557550810246e-06, |
| "loss": -0.0231, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 253 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 371.046875, |
| "epoch": 0.02843150972435987, |
| "grad_norm": 0.6092524986743804, |
| "kl": 0.1376953125, |
| "learning_rate": 2.8010060265094026e-06, |
| "loss": 0.0014, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 254 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.78125, |
| "epoch": 0.02854344480201483, |
| "grad_norm": 1.3831215185283958, |
| "kl": 0.130615234375, |
| "learning_rate": 2.786434404805629e-06, |
| "loss": -0.032, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 255 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 394.375, |
| "epoch": 0.02865537987966979, |
| "grad_norm": 0.29391488966565826, |
| "kl": 0.1136474609375, |
| "learning_rate": 2.771861254387199e-06, |
| "loss": 0.0011, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 256 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 418.015625, |
| "epoch": 0.028767314957324753, |
| "grad_norm": 8.597539761339826, |
| "kl": 0.3472900390625, |
| "learning_rate": 2.7572871867143204e-06, |
| "loss": 0.0113, |
| "reward": 0.09687500260770321, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 257 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 425.578125, |
| "epoch": 0.028879250034979712, |
| "grad_norm": 0.5239580359655661, |
| "kl": 0.1436767578125, |
| "learning_rate": 2.742712813285681e-06, |
| "loss": 0.0014, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 258 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 341.625, |
| "epoch": 0.02899118511263467, |
| "grad_norm": 1.9555271298208972, |
| "kl": 0.2032470703125, |
| "learning_rate": 2.7281387456128017e-06, |
| "loss": 0.002, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 259 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 368.65625, |
| "epoch": 0.02910312019028963, |
| "grad_norm": 1.252836717471123, |
| "kl": 0.2208251953125, |
| "learning_rate": 2.7135655951943716e-06, |
| "loss": 0.0022, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 260 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.59375, |
| "epoch": 0.029215055267944594, |
| "grad_norm": 0.8719765743586952, |
| "kl": 0.1676025390625, |
| "learning_rate": 2.698993973490598e-06, |
| "loss": 0.0017, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 261 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 333.0625, |
| "epoch": 0.029326990345599553, |
| "grad_norm": 0.13713063096829928, |
| "kl": 0.10205078125, |
| "learning_rate": 2.6844244918975416e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 262 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 320.5, |
| "epoch": 0.029438925423254513, |
| "grad_norm": 1.2802535059679794, |
| "kl": 0.09423828125, |
| "learning_rate": 2.66985776172147e-06, |
| "loss": 0.0411, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 263 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 434.359375, |
| "epoch": 0.029550860500909472, |
| "grad_norm": 3.6751799791380084, |
| "kl": 0.1397705078125, |
| "learning_rate": 2.6552943941532088e-06, |
| "loss": -0.0156, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 264 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.90625, |
| "epoch": 0.02966279557856443, |
| "grad_norm": 0.07806269511080617, |
| "kl": 0.0855712890625, |
| "learning_rate": 2.6407350002424927e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 265 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 298.84375, |
| "epoch": 0.029774730656219394, |
| "grad_norm": 0.06731730411902957, |
| "kl": 0.087646484375, |
| "learning_rate": 2.626180190872329e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 266 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 295.734375, |
| "epoch": 0.029886665733874353, |
| "grad_norm": 0.13087790730798726, |
| "kl": 0.0902099609375, |
| "learning_rate": 2.611630576733372e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 267 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 273.078125, |
| "epoch": 0.029998600811529313, |
| "grad_norm": 5.554675473835038, |
| "kl": 0.14794921875, |
| "learning_rate": 2.5970867682982885e-06, |
| "loss": -0.051, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 268 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.1875, |
| "epoch": 0.030110535889184272, |
| "grad_norm": 0.14583524682041712, |
| "kl": 0.0927734375, |
| "learning_rate": 2.582549375796154e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 269 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 290.578125, |
| "epoch": 0.030222470966839235, |
| "grad_norm": 0.058225539376494065, |
| "kl": 0.077880859375, |
| "learning_rate": 2.568019009186841e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 270 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.6875, |
| "epoch": 0.030334406044494194, |
| "grad_norm": 0.04344087707852648, |
| "kl": 0.0789794921875, |
| "learning_rate": 2.5534962781354317e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 271 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 305.953125, |
| "epoch": 0.030446341122149154, |
| "grad_norm": 0.07717630877616868, |
| "kl": 0.07293701171875, |
| "learning_rate": 2.538981791986634e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 272 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.171875, |
| "epoch": 0.030558276199804113, |
| "grad_norm": 0.22582461693188216, |
| "kl": 0.0968017578125, |
| "learning_rate": 2.524476159739218e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 273 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 299.953125, |
| "epoch": 0.030670211277459072, |
| "grad_norm": 27.999475574336614, |
| "kl": 0.5250244140625, |
| "learning_rate": 2.5099799900204607e-06, |
| "loss": 0.0169, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 274 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.21875, |
| "epoch": 0.030782146355114035, |
| "grad_norm": 0.034837371039734215, |
| "kl": 0.07232666015625, |
| "learning_rate": 2.4954938910606108e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 275 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.265625, |
| "epoch": 0.030894081432768995, |
| "grad_norm": 18.866980558523725, |
| "kl": 1.3262939453125, |
| "learning_rate": 2.481018470667368e-06, |
| "loss": 0.0199, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 276 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.71875, |
| "epoch": 0.031006016510423954, |
| "grad_norm": 0.08715091318862284, |
| "kl": 0.0843505859375, |
| "learning_rate": 2.4665543362003802e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 277 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 383.90625, |
| "epoch": 0.031117951588078913, |
| "grad_norm": 18.97786056185319, |
| "kl": 0.388671875, |
| "learning_rate": 2.4521020945457615e-06, |
| "loss": 0.0333, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 278 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 292.890625, |
| "epoch": 0.031229886665733873, |
| "grad_norm": 0.3777705304986896, |
| "kl": 0.09979248046875, |
| "learning_rate": 2.4376623520906255e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 279 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 306.59375, |
| "epoch": 0.031341821743388835, |
| "grad_norm": 23.98251858869545, |
| "kl": 5.38348388671875, |
| "learning_rate": 2.4232357146976478e-06, |
| "loss": 0.0275, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 280 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.40625, |
| "epoch": 0.031453756821043795, |
| "grad_norm": 0.09304733402356336, |
| "kl": 0.0772705078125, |
| "learning_rate": 2.408822787679637e-06, |
| "loss": 0.0008, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 281 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 354.25, |
| "epoch": 0.031565691898698754, |
| "grad_norm": 0.14440303776607324, |
| "kl": 0.0906982421875, |
| "learning_rate": 2.3944241757741475e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 282 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.765625, |
| "epoch": 0.031677626976353714, |
| "grad_norm": 2.359216668730623, |
| "kl": 0.90911865234375, |
| "learning_rate": 2.380040483118097e-06, |
| "loss": -0.0461, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 283 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 269.328125, |
| "epoch": 0.03178956205400867, |
| "grad_norm": 0.045061338534023075, |
| "kl": 0.05657958984375, |
| "learning_rate": 2.365672313222419e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 284 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 281.828125, |
| "epoch": 0.03190149713166363, |
| "grad_norm": 0.21772012539025168, |
| "kl": 0.0853271484375, |
| "learning_rate": 2.351320268946749e-06, |
| "loss": 0.0009, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 285 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 314.703125, |
| "epoch": 0.0320134322093186, |
| "grad_norm": 0.6734360714201397, |
| "kl": 0.1312255859375, |
| "learning_rate": 2.336984952474119e-06, |
| "loss": 0.0013, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 286 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.15625, |
| "epoch": 0.03212536728697356, |
| "grad_norm": 4.987286255355766, |
| "kl": 0.12139892578125, |
| "learning_rate": 2.322666965285697e-06, |
| "loss": -0.0532, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 287 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.9375, |
| "epoch": 0.03223730236462852, |
| "grad_norm": 0.6096977245482182, |
| "kl": 0.09234619140625, |
| "learning_rate": 2.3083669081355507e-06, |
| "loss": -0.0601, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 288 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 502.640625, |
| "epoch": 0.03234923744228348, |
| "grad_norm": 0.04237782613357625, |
| "kl": 0.06573486328125, |
| "learning_rate": 2.2940853810254377e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 289 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 324.53125, |
| "epoch": 0.032461172519938436, |
| "grad_norm": 0.029403412259251353, |
| "kl": 0.06024169921875, |
| "learning_rate": 2.2798229831796313e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 290 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 413.109375, |
| "epoch": 0.032573107597593395, |
| "grad_norm": 0.08363337386352669, |
| "kl": 0.0560302734375, |
| "learning_rate": 2.2655803130197816e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 291 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 318.734375, |
| "epoch": 0.032685042675248355, |
| "grad_norm": 0.023053884997018263, |
| "kl": 0.06512451171875, |
| "learning_rate": 2.2513579681398034e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 292 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 460.546875, |
| "epoch": 0.032796977752903314, |
| "grad_norm": 1.460556497688483, |
| "kl": 0.15765380859375, |
| "learning_rate": 2.237156545280803e-06, |
| "loss": -0.026, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 293 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 282.609375, |
| "epoch": 0.03290891283055827, |
| "grad_norm": 7.935071492001548, |
| "kl": 1.8072509765625, |
| "learning_rate": 2.2229766403060403e-06, |
| "loss": -0.0278, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 294 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 385.125, |
| "epoch": 0.03302084790821324, |
| "grad_norm": 0.01681019430958899, |
| "kl": 0.0477294921875, |
| "learning_rate": 2.2088188481759305e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 295 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.484375, |
| "epoch": 0.0331327829858682, |
| "grad_norm": 0.01847832809783982, |
| "kl": 0.04632568359375, |
| "learning_rate": 2.194683762923073e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 296 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 443.78125, |
| "epoch": 0.03324471806352316, |
| "grad_norm": 0.02629542045427332, |
| "kl": 0.0531005859375, |
| "learning_rate": 2.1805719776273387e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 297 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 371.09375, |
| "epoch": 0.03335665314117812, |
| "grad_norm": 0.039099364449013255, |
| "kl": 0.0689697265625, |
| "learning_rate": 2.166484084390974e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 298 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 348.171875, |
| "epoch": 0.03346858821883308, |
| "grad_norm": 0.054692981575624876, |
| "kl": 0.067138671875, |
| "learning_rate": 2.1524206743137636e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 299 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 453.71875, |
| "epoch": 0.033580523296488037, |
| "grad_norm": 0.020024754177952568, |
| "kl": 0.04815673828125, |
| "learning_rate": 2.1383823374682287e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 300 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 397.609375, |
| "epoch": 0.033692458374142996, |
| "grad_norm": 0.7962215451972806, |
| "kl": 0.0548095703125, |
| "learning_rate": 2.124369662874868e-06, |
| "loss": 0.0015, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 301 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 439.640625, |
| "epoch": 0.033804393451797955, |
| "grad_norm": 0.03594032335785641, |
| "kl": 0.04754638671875, |
| "learning_rate": 2.110383238477441e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 302 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 382.828125, |
| "epoch": 0.033916328529452915, |
| "grad_norm": 0.02676423646625272, |
| "kl": 0.0552978515625, |
| "learning_rate": 2.096423651118305e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 303 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 364.1875, |
| "epoch": 0.03402826360710788, |
| "grad_norm": 0.021001193681821163, |
| "kl": 0.04876708984375, |
| "learning_rate": 2.082491486513788e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 304 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.9375, |
| "epoch": 0.03414019868476284, |
| "grad_norm": 0.015515949438366839, |
| "kl": 0.0455322265625, |
| "learning_rate": 2.0685873292296116e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 305 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 502.484375, |
| "epoch": 0.0342521337624178, |
| "grad_norm": 0.10070236191895302, |
| "kl": 0.042633056640625, |
| "learning_rate": 2.054711762656369e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 306 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 423.890625, |
| "epoch": 0.03436406884007276, |
| "grad_norm": 8.449530301304263, |
| "kl": 0.17718505859375, |
| "learning_rate": 2.040865368985044e-06, |
| "loss": 0.0072, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 307 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 365.953125, |
| "epoch": 0.03447600391772772, |
| "grad_norm": 4.554366640679164, |
| "kl": 0.10430908203125, |
| "learning_rate": 2.027048729182583e-06, |
| "loss": 0.0096, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 308 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.78125, |
| "epoch": 0.03458793899538268, |
| "grad_norm": 1.4037888132598981, |
| "kl": 0.065673828125, |
| "learning_rate": 2.0132624229675205e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 309 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.390625, |
| "epoch": 0.03469987407303764, |
| "grad_norm": 0.08573893743799793, |
| "kl": 0.04962158203125, |
| "learning_rate": 1.9995070287856546e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 310 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 412.375, |
| "epoch": 0.034811809150692596, |
| "grad_norm": 14.764228813015494, |
| "kl": 0.43438720703125, |
| "learning_rate": 1.985783123785774e-06, |
| "loss": -0.03, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 311 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 279.03125, |
| "epoch": 0.034923744228347556, |
| "grad_norm": 0.2193000836614737, |
| "kl": 0.067138671875, |
| "learning_rate": 1.9720912837954486e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 312 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 385.421875, |
| "epoch": 0.035035679306002515, |
| "grad_norm": 0.1633478285227259, |
| "kl": 0.05523681640625, |
| "learning_rate": 1.958432083296862e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 313 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 388.4375, |
| "epoch": 0.03514761438365748, |
| "grad_norm": 0.145955357953582, |
| "kl": 0.051025390625, |
| "learning_rate": 1.9448060954027093e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 314 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 378.703125, |
| "epoch": 0.03525954946131244, |
| "grad_norm": 7.683536847345325, |
| "kl": 0.1790771484375, |
| "learning_rate": 1.931213891832153e-06, |
| "loss": -0.0279, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 315 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 410.09375, |
| "epoch": 0.0353714845389674, |
| "grad_norm": 0.19129137636650337, |
| "kl": 0.05731201171875, |
| "learning_rate": 1.9176560428868336e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 316 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 390.3125, |
| "epoch": 0.03548341961662236, |
| "grad_norm": 6.515657611937849, |
| "kl": 1.45611572265625, |
| "learning_rate": 1.9041331174269373e-06, |
| "loss": -0.0073, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 317 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 454.234375, |
| "epoch": 0.03559535469427732, |
| "grad_norm": 0.2025260783774205, |
| "kl": 0.05340576171875, |
| "learning_rate": 1.8906456828473341e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 318 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 407.5625, |
| "epoch": 0.03570728977193228, |
| "grad_norm": 4.576351957328261, |
| "kl": 1.09912109375, |
| "learning_rate": 1.8771943050537656e-06, |
| "loss": -0.0441, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 319 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 414.34375, |
| "epoch": 0.03581922484958724, |
| "grad_norm": 0.04886630482006485, |
| "kl": 0.0460205078125, |
| "learning_rate": 1.8637795484391046e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 320 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 456.3125, |
| "epoch": 0.0359311599272422, |
| "grad_norm": 0.21065449469813613, |
| "kl": 0.0640869140625, |
| "learning_rate": 1.8504019758596698e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 321 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.09375, |
| "epoch": 0.036043095004897156, |
| "grad_norm": 4.883286184793087, |
| "kl": 1.3126220703125, |
| "learning_rate": 1.8370621486116163e-06, |
| "loss": -0.0417, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 322 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 396.03125, |
| "epoch": 0.03615503008255212, |
| "grad_norm": 2.4312943229919903, |
| "kl": 0.7298583984375, |
| "learning_rate": 1.823760626407377e-06, |
| "loss": -0.0482, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 323 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.21875, |
| "epoch": 0.03626696516020708, |
| "grad_norm": 0.023794241531636316, |
| "kl": 0.0516357421875, |
| "learning_rate": 1.8104979673521838e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 324 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 338.34375, |
| "epoch": 0.03637890023786204, |
| "grad_norm": 0.06501484692075148, |
| "kl": 0.0458984375, |
| "learning_rate": 1.7972747279206482e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 325 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 392.140625, |
| "epoch": 0.036490835315517, |
| "grad_norm": 0.0554933350153533, |
| "kl": 0.045013427734375, |
| "learning_rate": 1.7840914629334122e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 326 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.5, |
| "epoch": 0.03660277039317196, |
| "grad_norm": 0.034907067888093626, |
| "kl": 0.05291748046875, |
| "learning_rate": 1.7709487255338731e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 327 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 375.453125, |
| "epoch": 0.03671470547082692, |
| "grad_norm": 0.04733189685853122, |
| "kl": 0.0457763671875, |
| "learning_rate": 1.7578470671649684e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 328 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 337.875, |
| "epoch": 0.03682664054848188, |
| "grad_norm": 8.191477588254072, |
| "kl": 0.31414794921875, |
| "learning_rate": 1.744787037546045e-06, |
| "loss": -0.0328, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 329 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.625, |
| "epoch": 0.03693857562613684, |
| "grad_norm": 4.485682653488296, |
| "kl": 0.06427001953125, |
| "learning_rate": 1.731769184649788e-06, |
| "loss": -0.0303, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 330 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 421.796875, |
| "epoch": 0.0370505107037918, |
| "grad_norm": 0.15001206134528802, |
| "kl": 0.054931640625, |
| "learning_rate": 1.7187940546792325e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 331 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 431.671875, |
| "epoch": 0.037162445781446764, |
| "grad_norm": 0.05596585517144101, |
| "kl": 0.04620361328125, |
| "learning_rate": 1.7058621920448465e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 332 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.171875, |
| "epoch": 0.03727438085910172, |
| "grad_norm": 0.21063855683262964, |
| "kl": 0.05145263671875, |
| "learning_rate": 1.6929741393416855e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 333 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 401.03125, |
| "epoch": 0.03738631593675668, |
| "grad_norm": 14.703670642314776, |
| "kl": 0.6478271484375, |
| "learning_rate": 1.6801304373266286e-06, |
| "loss": -0.0265, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 334 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 405.28125, |
| "epoch": 0.03749825101441164, |
| "grad_norm": 3.2693646713839533, |
| "kl": 0.2021484375, |
| "learning_rate": 1.667331624895689e-06, |
| "loss": -0.0538, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 335 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 464.15625, |
| "epoch": 0.0376101860920666, |
| "grad_norm": 0.0326789044313002, |
| "kl": 0.06781005859375, |
| "learning_rate": 1.6545782390614037e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 336 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.953125, |
| "epoch": 0.03772212116972156, |
| "grad_norm": 3.381374096168454, |
| "kl": 0.24603271484375, |
| "learning_rate": 1.6418708149302992e-06, |
| "loss": -0.0531, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 337 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 350.0625, |
| "epoch": 0.03783405624737652, |
| "grad_norm": 4.256284487475119, |
| "kl": 0.145263671875, |
| "learning_rate": 1.6292098856804423e-06, |
| "loss": -0.0518, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 338 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 244.484375, |
| "epoch": 0.03794599132503148, |
| "grad_norm": 4.758319146409537, |
| "kl": 0.52178955078125, |
| "learning_rate": 1.6165959825390661e-06, |
| "loss": -0.0436, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 339 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.828125, |
| "epoch": 0.03805792640268644, |
| "grad_norm": 0.06125517244847344, |
| "kl": 0.0491943359375, |
| "learning_rate": 1.604029634760284e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 340 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 363.828125, |
| "epoch": 0.038169861480341405, |
| "grad_norm": 0.680605704729485, |
| "kl": 0.10308837890625, |
| "learning_rate": 1.59151136960288e-06, |
| "loss": 0.001, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 341 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 393.265625, |
| "epoch": 0.038281796557996364, |
| "grad_norm": 0.06326323223836276, |
| "kl": 0.04736328125, |
| "learning_rate": 1.5790417123081903e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 342 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.671875, |
| "epoch": 0.038393731635651324, |
| "grad_norm": 0.049854259040433335, |
| "kl": 0.048828125, |
| "learning_rate": 1.5666211860780583e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 343 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.125, |
| "epoch": 0.03850566671330628, |
| "grad_norm": 0.07874363283266796, |
| "kl": 0.05059814453125, |
| "learning_rate": 1.5542503120528918e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 344 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 413.3125, |
| "epoch": 0.03861760179096124, |
| "grad_norm": 0.12008769303035174, |
| "kl": 0.0606689453125, |
| "learning_rate": 1.5419296092897866e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 345 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 445.28125, |
| "epoch": 0.0387295368686162, |
| "grad_norm": 0.10834525189023946, |
| "kl": 0.0592041015625, |
| "learning_rate": 1.529659594740755e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 346 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 378.25, |
| "epoch": 0.03884147194627116, |
| "grad_norm": 0.05706678172063102, |
| "kl": 0.040863037109375, |
| "learning_rate": 1.5174407832310338e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 347 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 316.15625, |
| "epoch": 0.03895340702392612, |
| "grad_norm": 0.2260930410291458, |
| "kl": 0.0599365234375, |
| "learning_rate": 1.5052736874374815e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 348 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 412.671875, |
| "epoch": 0.03906534210158108, |
| "grad_norm": 0.03574155761474703, |
| "kl": 0.0439453125, |
| "learning_rate": 1.4931588178670695e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 349 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 325.40625, |
| "epoch": 0.039177277179236046, |
| "grad_norm": 0.06765325310501516, |
| "kl": 0.05303955078125, |
| "learning_rate": 1.4810966828354605e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 350 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 382.625, |
| "epoch": 0.039289212256891005, |
| "grad_norm": 0.45741242496960755, |
| "kl": 0.063720703125, |
| "learning_rate": 1.469087788445684e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 351 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.0625, |
| "epoch": 0.039401147334545965, |
| "grad_norm": 0.059362152677566817, |
| "kl": 0.044921875, |
| "learning_rate": 1.4571326385668965e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 352 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 409.265625, |
| "epoch": 0.039513082412200924, |
| "grad_norm": 0.03486583265069237, |
| "kl": 0.04302978515625, |
| "learning_rate": 1.4452317348132434e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 353 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 549.171875, |
| "epoch": 0.039625017489855884, |
| "grad_norm": 0.044710017702799705, |
| "kl": 0.04583740234375, |
| "learning_rate": 1.4333855765228104e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 354 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.359375, |
| "epoch": 0.03973695256751084, |
| "grad_norm": 0.0864511831454649, |
| "kl": 0.04925537109375, |
| "learning_rate": 1.421594660736675e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 355 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.8125, |
| "epoch": 0.0398488876451658, |
| "grad_norm": 0.03699683675596586, |
| "kl": 0.04742431640625, |
| "learning_rate": 1.4098594821780476e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 356 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 362.8125, |
| "epoch": 0.03996082272282076, |
| "grad_norm": 0.07941048515036817, |
| "kl": 0.05633544921875, |
| "learning_rate": 1.3981805332315174e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 357 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 293.390625, |
| "epoch": 0.04007275780047572, |
| "grad_norm": 12.642109265209918, |
| "kl": 1.81060791015625, |
| "learning_rate": 1.3865583039223929e-06, |
| "loss": -0.0185, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 358 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 357.390625, |
| "epoch": 0.04018469287813069, |
| "grad_norm": 0.08034212517091562, |
| "kl": 0.0445556640625, |
| "learning_rate": 1.374993281896137e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 359 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.171875, |
| "epoch": 0.04029662795578565, |
| "grad_norm": 0.09767901214575864, |
| "kl": 0.04425048828125, |
| "learning_rate": 1.3634859523979134e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 360 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 392.578125, |
| "epoch": 0.040408563033440606, |
| "grad_norm": 0.07629861701658103, |
| "kl": 0.048583984375, |
| "learning_rate": 1.3520367982522208e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 361 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.375, |
| "epoch": 0.040520498111095565, |
| "grad_norm": 2.436909257977141, |
| "kl": 0.3397216796875, |
| "learning_rate": 1.3406462998426358e-06, |
| "loss": -0.0575, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 362 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.640625, |
| "epoch": 0.040632433188750525, |
| "grad_norm": 1.7488785493981298, |
| "kl": 0.41937255859375, |
| "learning_rate": 1.3293149350916595e-06, |
| "loss": -0.0315, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 363 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.265625, |
| "epoch": 0.040744368266405484, |
| "grad_norm": 0.042176241580247326, |
| "kl": 0.03741455078125, |
| "learning_rate": 1.3180431794406623e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 364 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 438.40625, |
| "epoch": 0.04085630334406044, |
| "grad_norm": 0.06954183102024396, |
| "kl": 0.0513916015625, |
| "learning_rate": 1.3068315058299358e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 365 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 384.453125, |
| "epoch": 0.0409682384217154, |
| "grad_norm": 0.0585988602116313, |
| "kl": 0.03924560546875, |
| "learning_rate": 1.2956803846788503e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 366 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.34375, |
| "epoch": 0.04108017349937036, |
| "grad_norm": 0.029227802328528226, |
| "kl": 0.05963134765625, |
| "learning_rate": 1.284590283866116e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 367 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 468.015625, |
| "epoch": 0.04119210857702533, |
| "grad_norm": 0.024030066071591895, |
| "kl": 0.039306640625, |
| "learning_rate": 1.2735616687101518e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 368 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.578125, |
| "epoch": 0.04130404365468029, |
| "grad_norm": 0.022956778080715768, |
| "kl": 0.04571533203125, |
| "learning_rate": 1.2625950019495614e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 369 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 419.140625, |
| "epoch": 0.04141597873233525, |
| "grad_norm": 0.03031369908455225, |
| "kl": 0.04296875, |
| "learning_rate": 1.251690743723718e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 370 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 327.125, |
| "epoch": 0.041527913809990206, |
| "grad_norm": 1.4619752022004908, |
| "kl": 0.42498779296875, |
| "learning_rate": 1.2408493515534581e-06, |
| "loss": -0.0518, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 371 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 403.390625, |
| "epoch": 0.041639848887645166, |
| "grad_norm": 0.06787211806577707, |
| "kl": 0.04547119140625, |
| "learning_rate": 1.2300712803218834e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 372 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 384.984375, |
| "epoch": 0.041751783965300125, |
| "grad_norm": 0.844949833560224, |
| "kl": 0.04034423828125, |
| "learning_rate": 1.2193569822552772e-06, |
| "loss": 0.0324, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 373 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 469.625, |
| "epoch": 0.041863719042955085, |
| "grad_norm": 0.5718530387307899, |
| "kl": 0.034820556640625, |
| "learning_rate": 1.2087069069041268e-06, |
| "loss": 0.014, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 374 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.0625, |
| "epoch": 0.041975654120610044, |
| "grad_norm": 0.03780725899196792, |
| "kl": 0.0430908203125, |
| "learning_rate": 1.1981215011242654e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 375 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 394.140625, |
| "epoch": 0.042087589198265, |
| "grad_norm": 0.022995562401170407, |
| "kl": 0.05279541015625, |
| "learning_rate": 1.1876012090581184e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 376 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 411.75, |
| "epoch": 0.04219952427591997, |
| "grad_norm": 0.10984965670222785, |
| "kl": 0.04351806640625, |
| "learning_rate": 1.177146472116071e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 377 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.6875, |
| "epoch": 0.04231145935357493, |
| "grad_norm": 3.2788406827242613, |
| "kl": 0.182952880859375, |
| "learning_rate": 1.1667577289579462e-06, |
| "loss": -0.0157, |
| "reward": 0.09687500260770321, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 378 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.484375, |
| "epoch": 0.04242339443122989, |
| "grad_norm": 0.04631375660525243, |
| "kl": 0.0482177734375, |
| "learning_rate": 1.1564354154746007e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 379 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 361.921875, |
| "epoch": 0.04253532950888485, |
| "grad_norm": 0.5029931057300824, |
| "kl": 0.04144287109375, |
| "learning_rate": 1.146179964769635e-06, |
| "loss": -0.0096, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 380 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.53125, |
| "epoch": 0.04264726458653981, |
| "grad_norm": 2.062066313854755, |
| "kl": 0.10888671875, |
| "learning_rate": 1.1359918071412195e-06, |
| "loss": 0.0137, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 381 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 473.390625, |
| "epoch": 0.042759199664194766, |
| "grad_norm": 5.689014120759552, |
| "kl": 0.19818115234375, |
| "learning_rate": 1.1258713700640456e-06, |
| "loss": 0.002, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 382 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 378.40625, |
| "epoch": 0.042871134741849726, |
| "grad_norm": 5.456471510055751, |
| "kl": 0.1583251953125, |
| "learning_rate": 1.115819078171383e-06, |
| "loss": -0.0152, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 383 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 405.59375, |
| "epoch": 0.042983069819504685, |
| "grad_norm": 0.01860238339511713, |
| "kl": 0.04095458984375, |
| "learning_rate": 1.1058353532372667e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 384 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 382.15625, |
| "epoch": 0.043095004897159644, |
| "grad_norm": 0.13949154395909993, |
| "kl": 0.0496826171875, |
| "learning_rate": 1.0959206141587998e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 385 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 315.453125, |
| "epoch": 0.04320693997481461, |
| "grad_norm": 1.107285367889642, |
| "kl": 0.3203125, |
| "learning_rate": 1.0860752769385766e-06, |
| "loss": -0.0542, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 386 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 304.203125, |
| "epoch": 0.04331887505246957, |
| "grad_norm": 14.993173199026563, |
| "kl": 1.521728515625, |
| "learning_rate": 1.0762997546672279e-06, |
| "loss": -0.0433, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 387 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 456.78125, |
| "epoch": 0.04343081013012453, |
| "grad_norm": 0.10799334625634623, |
| "kl": 0.041534423828125, |
| "learning_rate": 1.0665944575060914e-06, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 388 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 410.546875, |
| "epoch": 0.04354274520777949, |
| "grad_norm": 0.37423665656457383, |
| "kl": 0.061767578125, |
| "learning_rate": 1.056959792669997e-06, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 389 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 373.703125, |
| "epoch": 0.04365468028543445, |
| "grad_norm": 0.19089367747123068, |
| "kl": 0.05108642578125, |
| "learning_rate": 1.0473961644101856e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 390 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 358.71875, |
| "epoch": 0.04376661536308941, |
| "grad_norm": 1.5778323232854403, |
| "kl": 0.1815185546875, |
| "learning_rate": 1.037903973997345e-06, |
| "loss": -0.0537, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 391 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 326.71875, |
| "epoch": 0.04387855044074437, |
| "grad_norm": 0.31904972301610846, |
| "kl": 0.052734375, |
| "learning_rate": 1.0284836197047737e-06, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 392 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 266.0625, |
| "epoch": 0.043990485518399326, |
| "grad_norm": 2.0851570445818868, |
| "kl": 0.2457275390625, |
| "learning_rate": 1.0191354967916712e-06, |
| "loss": 0.0287, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 393 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.109375, |
| "epoch": 0.044102420596054286, |
| "grad_norm": 0.3993597476384507, |
| "kl": 0.07476806640625, |
| "learning_rate": 1.0098599974865515e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 394 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 378.15625, |
| "epoch": 0.04421435567370925, |
| "grad_norm": 0.3569068198951939, |
| "kl": 0.06988525390625, |
| "learning_rate": 1.0006575109707898e-06, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 395 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 314.234375, |
| "epoch": 0.04432629075136421, |
| "grad_norm": 0.13286047664045245, |
| "kl": 0.04833984375, |
| "learning_rate": 9.915284233622877e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 396 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 416.9375, |
| "epoch": 0.04443822582901917, |
| "grad_norm": 2.2497661432685447, |
| "kl": 0.05743408203125, |
| "learning_rate": 9.824731176992796e-07, |
| "loss": -0.002, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 397 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 365.140625, |
| "epoch": 0.04455016090667413, |
| "grad_norm": 3.30119576808313, |
| "kl": 1.076416015625, |
| "learning_rate": 9.734919739242543e-07, |
| "loss": -0.0157, |
| "reward": 0.09687500260770321, |
| "reward_std": 0.008539125323295593, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 398 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 277.28125, |
| "epoch": 0.04466209598432909, |
| "grad_norm": 0.08032954719670685, |
| "kl": 0.06231689453125, |
| "learning_rate": 9.645853688680177e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 399 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 379.8125, |
| "epoch": 0.04477403106198405, |
| "grad_norm": 0.30125779125635027, |
| "kl": 0.06890869140625, |
| "learning_rate": 9.557536762338786e-07, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 400 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 408.609375, |
| "epoch": 0.04488596613963901, |
| "grad_norm": 0.10410594418892839, |
| "kl": 0.04498291015625, |
| "learning_rate": 9.46997266581973e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 401 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 388.671875, |
| "epoch": 0.04499790121729397, |
| "grad_norm": 0.21768463357305143, |
| "kl": 0.04937744140625, |
| "learning_rate": 9.383165073137115e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 402 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 340.828125, |
| "epoch": 0.04510983629494893, |
| "grad_norm": 0.24943422959107125, |
| "kl": 0.054443359375, |
| "learning_rate": 9.297117626563687e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 403 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 398.4375, |
| "epoch": 0.04522177137260389, |
| "grad_norm": 0.15214769860694116, |
| "kl": 0.05352783203125, |
| "learning_rate": 9.211833936477957e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 404 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 309.265625, |
| "epoch": 0.04533370645025885, |
| "grad_norm": 0.12737968723084875, |
| "kl": 0.04833984375, |
| "learning_rate": 9.127317581212753e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 405 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 367.078125, |
| "epoch": 0.04544564152791381, |
| "grad_norm": 0.12491707622355717, |
| "kl": 0.0419921875, |
| "learning_rate": 9.043572106905084e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 406 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 347.171875, |
| "epoch": 0.04555757660556877, |
| "grad_norm": 0.2782478324098304, |
| "kl": 0.04632568359375, |
| "learning_rate": 8.960601027347321e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 407 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.59375, |
| "epoch": 0.04566951168322373, |
| "grad_norm": 1.707651816399516, |
| "kl": 0.17364501953125, |
| "learning_rate": 8.878407823839788e-07, |
| "loss": 0.0017, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 408 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 258.625, |
| "epoch": 0.04578144676087869, |
| "grad_norm": 0.18198728965219338, |
| "kl": 0.05511474609375, |
| "learning_rate": 8.796995945044689e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 409 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 329.171875, |
| "epoch": 0.04589338183853365, |
| "grad_norm": 3.0116035887758312, |
| "kl": 0.07513427734375, |
| "learning_rate": 8.716368806841405e-07, |
| "loss": 0.0028, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 410 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.46875, |
| "epoch": 0.04600531691618861, |
| "grad_norm": 0.036581584760347785, |
| "kl": 0.04638671875, |
| "learning_rate": 8.636529792183171e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 411 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 412.40625, |
| "epoch": 0.04611725199384357, |
| "grad_norm": 1.7675486622831695, |
| "kl": 0.08441162109375, |
| "learning_rate": 8.557482250955144e-07, |
| "loss": 0.0507, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 412 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.046875, |
| "epoch": 0.046229187071498534, |
| "grad_norm": 0.8339879341516419, |
| "kl": 0.06793212890625, |
| "learning_rate": 8.479229499833844e-07, |
| "loss": -0.0386, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 413 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 269.328125, |
| "epoch": 0.046341122149153494, |
| "grad_norm": 0.03758254954126514, |
| "kl": 0.046630859375, |
| "learning_rate": 8.401774822147976e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 414 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 426.90625, |
| "epoch": 0.04645305722680845, |
| "grad_norm": 0.03884055882244416, |
| "kl": 0.0457763671875, |
| "learning_rate": 8.325121467740695e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 415 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 327.15625, |
| "epoch": 0.04656499230446341, |
| "grad_norm": 0.13058568836650014, |
| "kl": 0.06011962890625, |
| "learning_rate": 8.249272652833226e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 416 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 389.59375, |
| "epoch": 0.04667692738211837, |
| "grad_norm": 0.05421556382240225, |
| "kl": 0.0401611328125, |
| "learning_rate": 8.174231559889931e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 417 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 266.25, |
| "epoch": 0.04678886245977333, |
| "grad_norm": 2.135742456611555, |
| "kl": 0.55340576171875, |
| "learning_rate": 8.100001337484787e-07, |
| "loss": -0.052, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 418 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 301.5625, |
| "epoch": 0.04690079753742829, |
| "grad_norm": 27.79993671743234, |
| "kl": 2.71258544921875, |
| "learning_rate": 8.026585100169251e-07, |
| "loss": 0.1087, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 419 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.640625, |
| "epoch": 0.04701273261508325, |
| "grad_norm": 0.3229458185249445, |
| "kl": 0.0728759765625, |
| "learning_rate": 7.953985928341601e-07, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 420 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 352.0625, |
| "epoch": 0.04712466769273821, |
| "grad_norm": 0.18899120451502113, |
| "kl": 0.04962158203125, |
| "learning_rate": 7.882206868117693e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 421 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 285.9375, |
| "epoch": 0.047236602770393175, |
| "grad_norm": 0.05378027086791007, |
| "kl": 0.0703125, |
| "learning_rate": 7.81125093120313e-07, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 422 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 366.609375, |
| "epoch": 0.047348537848048135, |
| "grad_norm": 0.275422779661784, |
| "kl": 0.05328369140625, |
| "learning_rate": 7.741121094766916e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 423 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.015625, |
| "epoch": 0.047460472925703094, |
| "grad_norm": 0.02946053453290786, |
| "kl": 0.04119873046875, |
| "learning_rate": 7.671820301316532e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 424 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 381.890625, |
| "epoch": 0.047572408003358053, |
| "grad_norm": 0.02258693420380951, |
| "kl": 0.0445556640625, |
| "learning_rate": 7.603351458574474e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 425 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 416.9375, |
| "epoch": 0.04768434308101301, |
| "grad_norm": 1.383085540888027, |
| "kl": 0.811279296875, |
| "learning_rate": 7.535717439356255e-07, |
| "loss": -0.0196, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 426 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 368.796875, |
| "epoch": 0.04779627815866797, |
| "grad_norm": 3.7777508498427164, |
| "kl": 0.77886962890625, |
| "learning_rate": 7.46892108144986e-07, |
| "loss": -0.0481, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 427 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 276.875, |
| "epoch": 0.04790821323632293, |
| "grad_norm": 0.10955470879553754, |
| "kl": 0.05078125, |
| "learning_rate": 7.402965187496697e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 428 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 380.671875, |
| "epoch": 0.04802014831397789, |
| "grad_norm": 0.04113851640884478, |
| "kl": 0.05523681640625, |
| "learning_rate": 7.337852524873974e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 429 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 420.03125, |
| "epoch": 0.04813208339163285, |
| "grad_norm": 0.018684245104689922, |
| "kl": 0.03826904296875, |
| "learning_rate": 7.273585825578608e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 430 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 345.5, |
| "epoch": 0.04824401846928781, |
| "grad_norm": 8.259759861418749, |
| "kl": 1.716339111328125, |
| "learning_rate": 7.21016778611259e-07, |
| "loss": -0.0347, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 431 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 402.265625, |
| "epoch": 0.048355953546942776, |
| "grad_norm": 0.020343976644658462, |
| "kl": 0.033447265625, |
| "learning_rate": 7.147601067369835e-07, |
| "loss": 0.0003, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 432 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 351.609375, |
| "epoch": 0.048467888624597735, |
| "grad_norm": 1.0066422924478642, |
| "kl": 0.75701904296875, |
| "learning_rate": 7.085888294524561e-07, |
| "loss": -0.0467, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 433 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 415.34375, |
| "epoch": 0.048579823702252695, |
| "grad_norm": 0.026562111650317053, |
| "kl": 0.04150390625, |
| "learning_rate": 7.025032056921117e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 434 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 308.921875, |
| "epoch": 0.048691758779907654, |
| "grad_norm": 0.03150175672968521, |
| "kl": 0.03955078125, |
| "learning_rate": 6.965034907965349e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 435 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 343.296875, |
| "epoch": 0.04880369385756261, |
| "grad_norm": 0.030310737599258293, |
| "kl": 0.04107666015625, |
| "learning_rate": 6.905899365017462e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 436 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 319.328125, |
| "epoch": 0.04891562893521757, |
| "grad_norm": 1.8528337687242826, |
| "kl": 1.40911865234375, |
| "learning_rate": 6.847627909286409e-07, |
| "loss": -0.0344, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 437 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 328.515625, |
| "epoch": 0.04902756401287253, |
| "grad_norm": 0.028586652468959688, |
| "kl": 0.04345703125, |
| "learning_rate": 6.790222985725761e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 438 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 335.4375, |
| "epoch": 0.04913949909052749, |
| "grad_norm": 0.08766284200366595, |
| "kl": 0.0438232421875, |
| "learning_rate": 6.733687002931141e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 439 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 305.109375, |
| "epoch": 0.04925143416818245, |
| "grad_norm": 2.391400552951245, |
| "kl": 0.09228515625, |
| "learning_rate": 6.678022333039158e-07, |
| "loss": -0.0495, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 440 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 322.40625, |
| "epoch": 0.04936336924583742, |
| "grad_norm": 0.02480090522832702, |
| "kl": 0.0423583984375, |
| "learning_rate": 6.623231311627876e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 441 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 395.546875, |
| "epoch": 0.049475304323492376, |
| "grad_norm": 0.019725004180409796, |
| "kl": 0.0413818359375, |
| "learning_rate": 6.569316237618811e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 442 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 359.765625, |
| "epoch": 0.049587239401147336, |
| "grad_norm": 0.04162342966221699, |
| "kl": 0.0498046875, |
| "learning_rate": 6.516279373180499e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 443 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.640625, |
| "epoch": 0.049699174478802295, |
| "grad_norm": 0.02391594067619232, |
| "kl": 0.0406494140625, |
| "learning_rate": 6.464122943633543e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 444 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 369.609375, |
| "epoch": 0.049811109556457255, |
| "grad_norm": 0.026401154596888455, |
| "kl": 0.03631591796875, |
| "learning_rate": 6.412849137357271e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 445 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 378.40625, |
| "epoch": 0.049923044634112214, |
| "grad_norm": 0.02165150509733936, |
| "kl": 0.04083251953125, |
| "learning_rate": 6.3624601056979e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 446 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 424.734375, |
| "epoch": 0.05003497971176717, |
| "grad_norm": 0.056253725649908846, |
| "kl": 0.04132080078125, |
| "learning_rate": 6.312957962878278e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 447 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 332.90625, |
| "epoch": 0.05014691478942213, |
| "grad_norm": 0.026148724794812197, |
| "kl": 0.05267333984375, |
| "learning_rate": 6.264344785909181e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 448 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 310.484375, |
| "epoch": 0.05025884986707709, |
| "grad_norm": 0.1390653311408599, |
| "kl": 0.049560546875, |
| "learning_rate": 6.216622614502149e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 449 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 388.671875, |
| "epoch": 0.05037078494473206, |
| "grad_norm": 0.07042561536180422, |
| "kl": 0.048095703125, |
| "learning_rate": 6.169793450983916e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 450 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 368.046875, |
| "epoch": 0.05048272002238702, |
| "grad_norm": 0.8824459143035142, |
| "kl": 0.163818359375, |
| "learning_rate": 6.123859260212393e-07, |
| "loss": 0.0016, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 451 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 328.421875, |
| "epoch": 0.05059465510004198, |
| "grad_norm": 0.9135028043813113, |
| "kl": 0.13751220703125, |
| "learning_rate": 6.07882196949423e-07, |
| "loss": -0.0596, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 452 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 396.40625, |
| "epoch": 0.050706590177696936, |
| "grad_norm": 0.02488219762931012, |
| "kl": 0.0374755859375, |
| "learning_rate": 6.034683468503948e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 453 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 339.765625, |
| "epoch": 0.050818525255351896, |
| "grad_norm": 0.07775771681049337, |
| "kl": 0.040863037109375, |
| "learning_rate": 5.991445609204641e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 454 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 323.953125, |
| "epoch": 0.050930460333006855, |
| "grad_norm": 0.23941770812751062, |
| "kl": 0.07257080078125, |
| "learning_rate": 5.949110205770292e-07, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 455 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 377.90625, |
| "epoch": 0.051042395410661814, |
| "grad_norm": 0.838372427480566, |
| "kl": 0.10174560546875, |
| "learning_rate": 5.90767903450964e-07, |
| "loss": -0.0595, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 456 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 354.3125, |
| "epoch": 0.051154330488316774, |
| "grad_norm": 0.02696775344751943, |
| "kl": 0.0364990234375, |
| "learning_rate": 5.867153833791652e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 457 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 293.890625, |
| "epoch": 0.05126626556597173, |
| "grad_norm": 2.593781001852504, |
| "kl": 0.27880859375, |
| "learning_rate": 5.827536303972587e-07, |
| "loss": 0.0028, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 458 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 440.28125, |
| "epoch": 0.0513782006436267, |
| "grad_norm": 0.09378689742071065, |
| "kl": 0.0506591796875, |
| "learning_rate": 5.78882810732465e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 459 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.09375, |
| "epoch": 0.05149013572128166, |
| "grad_norm": 3.1614265708752236, |
| "kl": 0.16015625, |
| "learning_rate": 5.75103086796625e-07, |
| "loss": 0.0016, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 460 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 311.296875, |
| "epoch": 0.05160207079893662, |
| "grad_norm": 1.7687368403392676, |
| "kl": 0.1033935546875, |
| "learning_rate": 5.714146171793846e-07, |
| "loss": -0.0578, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 461 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 377.265625, |
| "epoch": 0.05171400587659158, |
| "grad_norm": 0.049362194712489586, |
| "kl": 0.035614013671875, |
| "learning_rate": 5.678175566415422e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 462 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 364.171875, |
| "epoch": 0.05182594095424654, |
| "grad_norm": 0.02337468550122759, |
| "kl": 0.0418701171875, |
| "learning_rate": 5.643120561085528e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 463 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.125, |
| "epoch": 0.051937876031901496, |
| "grad_norm": 0.19211609005932148, |
| "kl": 0.06243896484375, |
| "learning_rate": 5.608982626641991e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 464 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 460.9375, |
| "epoch": 0.052049811109556456, |
| "grad_norm": 0.02527496415068125, |
| "kl": 0.0364990234375, |
| "learning_rate": 5.575763195444166e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 465 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 337.75, |
| "epoch": 0.052161746187211415, |
| "grad_norm": 0.02483601186922771, |
| "kl": 0.0379638671875, |
| "learning_rate": 5.543463661312847e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 466 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 382.203125, |
| "epoch": 0.052273681264866374, |
| "grad_norm": 0.030569824177275738, |
| "kl": 0.036956787109375, |
| "learning_rate": 5.512085379471808e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 467 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 372.96875, |
| "epoch": 0.05238561634252134, |
| "grad_norm": 0.870995271718928, |
| "kl": 0.053680419921875, |
| "learning_rate": 5.481629666490903e-07, |
| "loss": -0.0283, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 468 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 255.546875, |
| "epoch": 0.0524975514201763, |
| "grad_norm": 0.054911233572157374, |
| "kl": 0.04248046875, |
| "learning_rate": 5.452097800230853e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 469 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 365.671875, |
| "epoch": 0.05260948649783126, |
| "grad_norm": 0.9189343613920824, |
| "kl": 0.35272216796875, |
| "learning_rate": 5.423491019789623e-07, |
| "loss": -0.0551, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 470 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 364.390625, |
| "epoch": 0.05272142157548622, |
| "grad_norm": 0.0633802951576789, |
| "kl": 0.03961181640625, |
| "learning_rate": 5.395810525450425e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 471 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 332.03125, |
| "epoch": 0.05283335665314118, |
| "grad_norm": 0.1011785822244599, |
| "kl": 0.053466796875, |
| "learning_rate": 5.369057478631359e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 472 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 287.609375, |
| "epoch": 0.05294529173079614, |
| "grad_norm": 0.13769887874885445, |
| "kl": 0.0439453125, |
| "learning_rate": 5.343233001836694e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 473 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 387.34375, |
| "epoch": 0.0530572268084511, |
| "grad_norm": 0.09302862947626754, |
| "kl": 0.03985595703125, |
| "learning_rate": 5.318338178609754e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 474 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 374.40625, |
| "epoch": 0.053169161886106056, |
| "grad_norm": 0.1263278332387542, |
| "kl": 0.062255859375, |
| "learning_rate": 5.294374053487459e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 475 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 358.015625, |
| "epoch": 0.053281096963761015, |
| "grad_norm": 0.020700901911474265, |
| "kl": 0.0355224609375, |
| "learning_rate": 5.271341631956511e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 476 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 401.046875, |
| "epoch": 0.05339303204141598, |
| "grad_norm": 0.027139069654941325, |
| "kl": 0.0396728515625, |
| "learning_rate": 5.249241880411181e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 477 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 515.703125, |
| "epoch": 0.05350496711907094, |
| "grad_norm": 0.04265200650936088, |
| "kl": 0.060302734375, |
| "learning_rate": 5.228075726112785e-07, |
| "loss": 0.0006, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 478 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.640625, |
| "epoch": 0.0536169021967259, |
| "grad_norm": 0.025504104198776033, |
| "kl": 0.03570556640625, |
| "learning_rate": 5.207844057150768e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 479 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 213.78125, |
| "epoch": 0.05372883727438086, |
| "grad_norm": 18.185082196534506, |
| "kl": 1.0419921875, |
| "learning_rate": 5.188547722405437e-07, |
| "loss": 0.0419, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 480 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 390.96875, |
| "epoch": 0.05384077235203582, |
| "grad_norm": 0.03327137868690443, |
| "kl": 0.0408935546875, |
| "learning_rate": 5.170187531512351e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 481 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 426.703125, |
| "epoch": 0.05395270742969078, |
| "grad_norm": 1.7420279438246644, |
| "kl": 1.31329345703125, |
| "learning_rate": 5.152764254828348e-07, |
| "loss": -0.0354, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 482 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 432.5, |
| "epoch": 0.05406464250734574, |
| "grad_norm": 0.33787614688903167, |
| "kl": 0.06884765625, |
| "learning_rate": 5.136278623399225e-07, |
| "loss": 0.0007, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 483 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 291.515625, |
| "epoch": 0.0541765775850007, |
| "grad_norm": 0.023809496275349114, |
| "kl": 0.05426025390625, |
| "learning_rate": 5.120731328929058e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 484 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 383.3125, |
| "epoch": 0.05428851266265566, |
| "grad_norm": 0.015058815890629395, |
| "kl": 0.0362548828125, |
| "learning_rate": 5.106123023751187e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 485 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 373.515625, |
| "epoch": 0.05440044774031062, |
| "grad_norm": 0.019845886834378008, |
| "kl": 0.03704833984375, |
| "learning_rate": 5.092454320800833e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 486 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 336.0, |
| "epoch": 0.05451238281796558, |
| "grad_norm": 0.05683684336283696, |
| "kl": 0.0426025390625, |
| "learning_rate": 5.079725793589405e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 487 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 286.71875, |
| "epoch": 0.05462431789562054, |
| "grad_norm": 0.03418640906474549, |
| "kl": 0.04443359375, |
| "learning_rate": 5.067937976180407e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 488 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 318.0625, |
| "epoch": 0.0547362529732755, |
| "grad_norm": 0.036114039198282856, |
| "kl": 0.05450439453125, |
| "learning_rate": 5.057091363167046e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 489 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 376.90625, |
| "epoch": 0.05484818805093046, |
| "grad_norm": 0.023332365324867536, |
| "kl": 0.03631591796875, |
| "learning_rate": 5.047186409651489e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 490 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 388.671875, |
| "epoch": 0.05496012312858542, |
| "grad_norm": 13.172841203218344, |
| "kl": 2.97564697265625, |
| "learning_rate": 5.038223531225742e-07, |
| "loss": 0.0339, |
| "reward": 0.09687500074505806, |
| "reward_std": 0.012500000186264515, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.96875, |
| "step": 491 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 310.796875, |
| "epoch": 0.05507205820624038, |
| "grad_norm": 9.652533322003624, |
| "kl": 0.4891357421875, |
| "learning_rate": 5.030203103954232e-07, |
| "loss": -0.0404, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 492 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 302.296875, |
| "epoch": 0.05518399328389534, |
| "grad_norm": 0.030140393405088423, |
| "kl": 0.0369873046875, |
| "learning_rate": 5.023125464358026e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 493 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 353.0, |
| "epoch": 0.0552959283615503, |
| "grad_norm": 0.02422588912502369, |
| "kl": 0.037200927734375, |
| "learning_rate": 5.016990909400709e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 494 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 370.34375, |
| "epoch": 0.055407863439205264, |
| "grad_norm": 0.0372886708561457, |
| "kl": 0.05389404296875, |
| "learning_rate": 5.011799696475915e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 495 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 384.703125, |
| "epoch": 0.05551979851686022, |
| "grad_norm": 0.01359500582951308, |
| "kl": 0.035980224609375, |
| "learning_rate": 5.007552043396547e-07, |
| "loss": 0.0004, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 496 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 400.46875, |
| "epoch": 0.05563173359451518, |
| "grad_norm": 0.018893589493420345, |
| "kl": 0.04736328125, |
| "learning_rate": 5.004248128385618e-07, |
| "loss": 0.0005, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 497 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 356.9375, |
| "epoch": 0.05574366867217014, |
| "grad_norm": 2.740738937008755, |
| "kl": 0.7550048828125, |
| "learning_rate": 5.001888090068784e-07, |
| "loss": -0.0421, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 498 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 386.421875, |
| "epoch": 0.0558556037498251, |
| "grad_norm": 0.9492236255194394, |
| "kl": 0.24884033203125, |
| "learning_rate": 5.000472027468528e-07, |
| "loss": -0.0556, |
| "reward": 0.09843750111758709, |
| "reward_std": 0.0062500000931322575, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 0.984375, |
| "step": 499 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 344.65625, |
| "epoch": 0.05596753882748006, |
| "grad_norm": 0.024972922874043218, |
| "kl": 0.033905029296875, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.0003, |
| "reward": 0.10000000149011612, |
| "reward_std": 0.0, |
| "rewards/code_reward": 0.0, |
| "rewards/format_reward": 1.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05596753882748006, |
| "step": 500, |
| "total_flos": 0.0, |
| "train_loss": 0.028706138839246706, |
| "train_runtime": 9937.8104, |
| "train_samples_per_second": 3.22, |
| "train_steps_per_second": 0.05 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|