| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0064, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.25e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 1 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0128, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 2 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0192, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 3 |
| }, |
| { |
| "completion_length": 1020.34375, |
| "epoch": 0.0256, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 4 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.032, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 6.25e-06, |
| "loss": 0.0, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 5 |
| }, |
| { |
| "completion_length": 1019.09375, |
| "epoch": 0.0384, |
| "grad_norm": 0.11545927077531815, |
| "kl": 0.0, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.0, |
| "reward": 0.046875, |
| "reward_std": 0.06733439117670059, |
| "rewards/accuracy_reward": 0.046875, |
| "rewards/format_reward": 0.0, |
| "step": 6 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0448, |
| "grad_norm": 0.0010597483487799764, |
| "kl": 9.465217590332031e-05, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 7 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0512, |
| "grad_norm": 0.0010805391939356923, |
| "kl": 9.632110595703125e-05, |
| "learning_rate": 1e-05, |
| "loss": 0.0, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 8 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0576, |
| "grad_norm": 0.0016811740351840854, |
| "kl": 0.00012564659118652344, |
| "learning_rate": 1.125e-05, |
| "loss": 0.0, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 9 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.064, |
| "grad_norm": 0.0018933276878669858, |
| "kl": 0.00016236305236816406, |
| "learning_rate": 1.25e-05, |
| "loss": 0.0, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 10 |
| }, |
| { |
| "completion_length": 1022.078125, |
| "epoch": 0.0704, |
| "grad_norm": 0.11562428623437881, |
| "kl": 0.0006074905395507812, |
| "learning_rate": 1.375e-05, |
| "loss": 0.0, |
| "reward": 0.09375, |
| "reward_std": 0.0625, |
| "rewards/accuracy_reward": 0.09375, |
| "rewards/format_reward": 0.0, |
| "step": 11 |
| }, |
| { |
| "completion_length": 1018.828125, |
| "epoch": 0.0768, |
| "grad_norm": 0.003929761704057455, |
| "kl": 0.0005993843078613281, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 12 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.0832, |
| "grad_norm": 0.03520556539297104, |
| "kl": 0.002246856689453125, |
| "learning_rate": 1.6250000000000002e-05, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 13 |
| }, |
| { |
| "completion_length": 1019.796875, |
| "epoch": 0.0896, |
| "grad_norm": 0.007564186584204435, |
| "kl": 0.00232696533203125, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 14 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.096, |
| "grad_norm": 0.08798696100711823, |
| "kl": 0.00222015380859375, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 0.0001, |
| "reward": 0.140625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.140625, |
| "rewards/format_reward": 0.0, |
| "step": 15 |
| }, |
| { |
| "completion_length": 1019.109375, |
| "epoch": 0.1024, |
| "grad_norm": 0.09157592058181763, |
| "kl": 0.00362396240234375, |
| "learning_rate": 2e-05, |
| "loss": 0.0001, |
| "reward": 0.046875, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.046875, |
| "rewards/format_reward": 0.0, |
| "step": 16 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1088, |
| "grad_norm": 0.010128885507583618, |
| "kl": 0.00473785400390625, |
| "learning_rate": 1.999748234942507e-05, |
| "loss": 0.0002, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 17 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1152, |
| "grad_norm": 0.08485038578510284, |
| "kl": 0.008880615234375, |
| "learning_rate": 1.9989930665413148e-05, |
| "loss": 0.0004, |
| "reward": 0.03125, |
| "reward_std": 0.03608439117670059, |
| "rewards/accuracy_reward": 0.03125, |
| "rewards/format_reward": 0.0, |
| "step": 18 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1216, |
| "grad_norm": 0.013642659410834312, |
| "kl": 0.00960540771484375, |
| "learning_rate": 1.997734875046456e-05, |
| "loss": 0.0004, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 19 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.128, |
| "grad_norm": 0.01660318858921528, |
| "kl": 0.0130615234375, |
| "learning_rate": 1.9959742939952393e-05, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 20 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1344, |
| "grad_norm": 0.01704811304807663, |
| "kl": 0.0155792236328125, |
| "learning_rate": 1.9937122098932428e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 21 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1408, |
| "grad_norm": 0.014823434874415398, |
| "kl": 0.013824462890625, |
| "learning_rate": 1.990949761767935e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 22 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1472, |
| "grad_norm": 0.08114675432443619, |
| "kl": 0.0141143798828125, |
| "learning_rate": 1.9876883405951378e-05, |
| "loss": 0.0006, |
| "reward": 0.015625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.015625, |
| "rewards/format_reward": 0.0, |
| "step": 23 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1536, |
| "grad_norm": 0.015050478279590607, |
| "kl": 0.0153656005859375, |
| "learning_rate": 1.98392958859863e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 24 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.16, |
| "grad_norm": 0.014087294228374958, |
| "kl": 0.014617919921875, |
| "learning_rate": 1.9796753984232357e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 25 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1664, |
| "grad_norm": 0.012405358254909515, |
| "kl": 0.0152130126953125, |
| "learning_rate": 1.9749279121818235e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 26 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1728, |
| "grad_norm": 0.012326586991548538, |
| "kl": 0.01531982421875, |
| "learning_rate": 1.969689520376687e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 27 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1792, |
| "grad_norm": 0.011726093478500843, |
| "kl": 0.014434814453125, |
| "learning_rate": 1.9639628606958535e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 28 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1856, |
| "grad_norm": 0.01226333249360323, |
| "kl": 0.0152740478515625, |
| "learning_rate": 1.9577508166849308e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 29 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.192, |
| "grad_norm": 0.085689015686512, |
| "kl": 0.0193023681640625, |
| "learning_rate": 1.9510565162951538e-05, |
| "loss": 0.0008, |
| "reward": 0.015625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.015625, |
| "rewards/format_reward": 0.0, |
| "step": 30 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.1984, |
| "grad_norm": 0.013328597880899906, |
| "kl": 0.0130462646484375, |
| "learning_rate": 1.9438833303083677e-05, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 31 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2048, |
| "grad_norm": 0.013332940638065338, |
| "kl": 0.01568603515625, |
| "learning_rate": 1.9362348706397374e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 32 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2112, |
| "grad_norm": 0.013586850836873055, |
| "kl": 0.0170440673828125, |
| "learning_rate": 1.928114988519039e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 33 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2176, |
| "grad_norm": 0.014487874694168568, |
| "kl": 0.0222625732421875, |
| "learning_rate": 1.919527772551451e-05, |
| "loss": 0.0009, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 34 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.224, |
| "grad_norm": 0.014178161509335041, |
| "kl": 0.021728515625, |
| "learning_rate": 1.9104775466588162e-05, |
| "loss": 0.0009, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 35 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2304, |
| "grad_norm": 0.012242796830832958, |
| "kl": 0.018096923828125, |
| "learning_rate": 1.900968867902419e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 36 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2368, |
| "grad_norm": 0.08061395585536957, |
| "kl": 0.0169525146484375, |
| "learning_rate": 1.891006524188368e-05, |
| "loss": 0.0007, |
| "reward": 0.015625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.015625, |
| "rewards/format_reward": 0.0, |
| "step": 37 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2432, |
| "grad_norm": 0.011660503223538399, |
| "kl": 0.01715087890625, |
| "learning_rate": 1.880595531856738e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 38 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2496, |
| "grad_norm": 0.012358421459794044, |
| "kl": 0.0159454345703125, |
| "learning_rate": 1.8697411331556958e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 39 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.256, |
| "grad_norm": 0.013630127534270287, |
| "kl": 0.019073486328125, |
| "learning_rate": 1.8584487936018663e-05, |
| "loss": 0.0008, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 40 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2624, |
| "grad_norm": 0.012698043137788773, |
| "kl": 0.0185699462890625, |
| "learning_rate": 1.8467241992282842e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 41 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2688, |
| "grad_norm": 0.10450158268213272, |
| "kl": 0.01519775390625, |
| "learning_rate": 1.834573253721303e-05, |
| "loss": 0.0006, |
| "reward": 0.03125, |
| "reward_std": 0.0625, |
| "rewards/accuracy_reward": 0.03125, |
| "rewards/format_reward": 0.0, |
| "step": 42 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2752, |
| "grad_norm": 0.011531068943440914, |
| "kl": 0.014739990234375, |
| "learning_rate": 1.8220020754479104e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 43 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2816, |
| "grad_norm": 0.010796881280839443, |
| "kl": 0.0131988525390625, |
| "learning_rate": 1.8090169943749477e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 44 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.288, |
| "grad_norm": 0.013068209402263165, |
| "kl": 0.0175018310546875, |
| "learning_rate": 1.795624548881781e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 45 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.2944, |
| "grad_norm": 0.011865634471178055, |
| "kl": 0.0152435302734375, |
| "learning_rate": 1.78183148246803e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 46 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3008, |
| "grad_norm": 0.010992134921252728, |
| "kl": 0.0125885009765625, |
| "learning_rate": 1.7676447403580114e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 47 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3072, |
| "grad_norm": 0.01211047638207674, |
| "kl": 0.0198211669921875, |
| "learning_rate": 1.7530714660036112e-05, |
| "loss": 0.0008, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 48 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3136, |
| "grad_norm": 0.011479598470032215, |
| "kl": 0.0194091796875, |
| "learning_rate": 1.738118997487341e-05, |
| "loss": 0.0008, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 49 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.32, |
| "grad_norm": 0.012375111691653728, |
| "kl": 0.0240020751953125, |
| "learning_rate": 1.7227948638273918e-05, |
| "loss": 0.001, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 50 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3264, |
| "grad_norm": 0.010566718876361847, |
| "kl": 0.0144195556640625, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 51 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3328, |
| "grad_norm": 0.010176180861890316, |
| "kl": 0.01507568359375, |
| "learning_rate": 1.691062648986865e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 52 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3392, |
| "grad_norm": 0.0095510957762599, |
| "kl": 0.0172271728515625, |
| "learning_rate": 1.6746705459320746e-05, |
| "loss": 0.0007, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 53 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3456, |
| "grad_norm": 0.008464411832392216, |
| "kl": 0.0118865966796875, |
| "learning_rate": 1.657938725939713e-05, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 54 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.352, |
| "grad_norm": 0.010119668208062649, |
| "kl": 0.0164337158203125, |
| "learning_rate": 1.6408756139850243e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 55 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3584, |
| "grad_norm": 0.013174059800803661, |
| "kl": 0.015625, |
| "learning_rate": 1.6234898018587336e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 56 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3648, |
| "grad_norm": 0.011142551898956299, |
| "kl": 0.019500732421875, |
| "learning_rate": 1.60579004384082e-05, |
| "loss": 0.0008, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 57 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3712, |
| "grad_norm": 0.011646406725049019, |
| "kl": 0.015411376953125, |
| "learning_rate": 1.5877852522924733e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 58 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3776, |
| "grad_norm": 0.009249154478311539, |
| "kl": 0.012603759765625, |
| "learning_rate": 1.569484493168452e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 59 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.384, |
| "grad_norm": 0.07278093695640564, |
| "kl": 0.012664794921875, |
| "learning_rate": 1.5508969814521026e-05, |
| "loss": 0.0005, |
| "reward": 0.015625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.015625, |
| "rewards/format_reward": 0.0, |
| "step": 60 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3904, |
| "grad_norm": 0.009469371289014816, |
| "kl": 0.011566162109375, |
| "learning_rate": 1.5320320765153367e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 61 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.3968, |
| "grad_norm": 0.010748224332928658, |
| "kl": 0.013092041015625, |
| "learning_rate": 1.5128992774059063e-05, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 62 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4032, |
| "grad_norm": 0.01076967641711235, |
| "kl": 0.0132904052734375, |
| "learning_rate": 1.493508218064347e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 63 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4096, |
| "grad_norm": 0.07990572601556778, |
| "kl": 0.0186309814453125, |
| "learning_rate": 1.4738686624729987e-05, |
| "loss": 0.0007, |
| "reward": 0.09375, |
| "reward_std": 0.03608439117670059, |
| "rewards/accuracy_reward": 0.09375, |
| "rewards/format_reward": 0.0, |
| "step": 64 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.416, |
| "grad_norm": 0.010965151712298393, |
| "kl": 0.01556396484375, |
| "learning_rate": 1.4539904997395468e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 65 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4224, |
| "grad_norm": 0.008827170357108116, |
| "kl": 0.0107879638671875, |
| "learning_rate": 1.4338837391175582e-05, |
| "loss": 0.0004, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 66 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4288, |
| "grad_norm": 0.009334170259535313, |
| "kl": 0.0099334716796875, |
| "learning_rate": 1.4135585049665207e-05, |
| "loss": 0.0004, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 67 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4352, |
| "grad_norm": 0.009643997065722942, |
| "kl": 0.01165771484375, |
| "learning_rate": 1.3930250316539237e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 68 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4416, |
| "grad_norm": 0.010741900652647018, |
| "kl": 0.01458740234375, |
| "learning_rate": 1.3722936584019453e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 69 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.448, |
| "grad_norm": 0.010083486326038837, |
| "kl": 0.0158233642578125, |
| "learning_rate": 1.3513748240813429e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 70 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4544, |
| "grad_norm": 0.011387434788048267, |
| "kl": 0.0134124755859375, |
| "learning_rate": 1.3302790619551673e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 71 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4608, |
| "grad_norm": 0.009537500329315662, |
| "kl": 0.0139312744140625, |
| "learning_rate": 1.3090169943749475e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 72 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4672, |
| "grad_norm": 0.010195745155215263, |
| "kl": 0.0109100341796875, |
| "learning_rate": 1.2875993274320173e-05, |
| "loss": 0.0004, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 73 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4736, |
| "grad_norm": 0.009297964163124561, |
| "kl": 0.0154876708984375, |
| "learning_rate": 1.2660368455666752e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 74 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.48, |
| "grad_norm": 0.08940494805574417, |
| "kl": 0.0132598876953125, |
| "learning_rate": 1.2443404061378941e-05, |
| "loss": 0.0005, |
| "reward": 0.03125, |
| "reward_std": 0.03608439117670059, |
| "rewards/accuracy_reward": 0.03125, |
| "rewards/format_reward": 0.0, |
| "step": 75 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4864, |
| "grad_norm": 0.082063689827919, |
| "kl": 0.01239013671875, |
| "learning_rate": 1.2225209339563144e-05, |
| "loss": 0.0005, |
| "reward": 0.078125, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.078125, |
| "rewards/format_reward": 0.0, |
| "step": 76 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4928, |
| "grad_norm": 0.09806675463914871, |
| "kl": 0.010894775390625, |
| "learning_rate": 1.200589415783273e-05, |
| "loss": 0.0004, |
| "reward": 0.015625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.015625, |
| "rewards/format_reward": 0.0, |
| "step": 77 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.4992, |
| "grad_norm": 0.009176390245556831, |
| "kl": 0.011138916015625, |
| "learning_rate": 1.1785568947986368e-05, |
| "loss": 0.0004, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 78 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5056, |
| "grad_norm": 0.010212006978690624, |
| "kl": 0.0101165771484375, |
| "learning_rate": 1.156434465040231e-05, |
| "loss": 0.0004, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 79 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.512, |
| "grad_norm": 0.01116965152323246, |
| "kl": 0.012969970703125, |
| "learning_rate": 1.1342332658176556e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 80 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5184, |
| "grad_norm": 0.013367475010454655, |
| "kl": 0.015472412109375, |
| "learning_rate": 1.1119644761033079e-05, |
| "loss": 0.0006, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 81 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5248, |
| "grad_norm": 0.012096280232071877, |
| "kl": 0.0129547119140625, |
| "learning_rate": 1.0896393089034336e-05, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 82 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5312, |
| "grad_norm": 0.012920072302222252, |
| "kl": 0.0158538818359375, |
| "learning_rate": 1.0672690056120398e-05, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 83 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5376, |
| "grad_norm": 0.08177845180034637, |
| "kl": 0.01666259765625, |
| "learning_rate": 1.044864830350515e-05, |
| "loss": 0.0007, |
| "reward": 0.015625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.015625, |
| "rewards/format_reward": 0.0, |
| "step": 84 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.544, |
| "grad_norm": 0.010724878869950771, |
| "kl": 0.014251708984375, |
| "learning_rate": 1.0224380642958052e-05, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 85 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5504, |
| "grad_norm": 0.011316907592117786, |
| "kl": 0.0172119140625, |
| "learning_rate": 1e-05, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 86 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5568, |
| "grad_norm": 0.010359718464314938, |
| "kl": 0.01251220703125, |
| "learning_rate": 9.775619357041952e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 87 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5632, |
| "grad_norm": 0.012716464698314667, |
| "kl": 0.017181396484375, |
| "learning_rate": 9.551351696494854e-06, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 88 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5696, |
| "grad_norm": 0.012020590715110302, |
| "kl": 0.019500732421875, |
| "learning_rate": 9.327309943879604e-06, |
| "loss": 0.0008, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 89 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.576, |
| "grad_norm": 0.01044081524014473, |
| "kl": 0.0130157470703125, |
| "learning_rate": 9.103606910965666e-06, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 90 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5824, |
| "grad_norm": 0.012544355355203152, |
| "kl": 0.017578125, |
| "learning_rate": 8.880355238966923e-06, |
| "loss": 0.0007, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 91 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5888, |
| "grad_norm": 0.0739496573805809, |
| "kl": 0.0176849365234375, |
| "learning_rate": 8.657667341823449e-06, |
| "loss": 0.0007, |
| "reward": 0.15625, |
| "reward_std": 0.03608439117670059, |
| "rewards/accuracy_reward": 0.15625, |
| "rewards/format_reward": 0.0, |
| "step": 92 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.5952, |
| "grad_norm": 0.009553548879921436, |
| "kl": 0.0131378173828125, |
| "learning_rate": 8.43565534959769e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 93 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6016, |
| "grad_norm": 0.7742120623588562, |
| "kl": 0.056976318359375, |
| "learning_rate": 8.214431052013636e-06, |
| "loss": 0.0023, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 94 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.608, |
| "grad_norm": 0.008847479708492756, |
| "kl": 0.0131378173828125, |
| "learning_rate": 7.994105842167274e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 95 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6144, |
| "grad_norm": 0.00984633807092905, |
| "kl": 0.014739990234375, |
| "learning_rate": 7.774790660436857e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 96 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6208, |
| "grad_norm": 0.009609334170818329, |
| "kl": 0.015411376953125, |
| "learning_rate": 7.556595938621058e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 97 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6272, |
| "grad_norm": 0.01706218160688877, |
| "kl": 0.01641845703125, |
| "learning_rate": 7.33963154433325e-06, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 98 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6336, |
| "grad_norm": 0.08538887649774551, |
| "kl": 0.0143585205078125, |
| "learning_rate": 7.124006725679828e-06, |
| "loss": 0.0006, |
| "reward": 0.078125, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.078125, |
| "rewards/format_reward": 0.0, |
| "step": 99 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.64, |
| "grad_norm": 0.009256948716938496, |
| "kl": 0.0128173828125, |
| "learning_rate": 6.909830056250527e-06, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 100 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6464, |
| "grad_norm": 0.009882168844342232, |
| "kl": 0.0126190185546875, |
| "learning_rate": 6.697209380448333e-06, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 101 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6528, |
| "grad_norm": 0.01010966207832098, |
| "kl": 0.016387939453125, |
| "learning_rate": 6.486251759186573e-06, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 102 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6592, |
| "grad_norm": 0.010379007086157799, |
| "kl": 0.0193939208984375, |
| "learning_rate": 6.277063415980549e-06, |
| "loss": 0.0008, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 103 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6656, |
| "grad_norm": 0.010923578403890133, |
| "kl": 0.0181732177734375, |
| "learning_rate": 6.069749683460765e-06, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 104 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.672, |
| "grad_norm": 0.009636354632675648, |
| "kl": 0.016082763671875, |
| "learning_rate": 5.864414950334796e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 105 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6784, |
| "grad_norm": 0.010779432021081448, |
| "kl": 0.013214111328125, |
| "learning_rate": 5.66116260882442e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 106 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6848, |
| "grad_norm": 0.01111359242349863, |
| "kl": 0.0189971923828125, |
| "learning_rate": 5.460095002604533e-06, |
| "loss": 0.0008, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 107 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6912, |
| "grad_norm": 0.010898874141275883, |
| "kl": 0.0147247314453125, |
| "learning_rate": 5.2613133752700145e-06, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 108 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.6976, |
| "grad_norm": 0.009893275797367096, |
| "kl": 0.016815185546875, |
| "learning_rate": 5.064917819356532e-06, |
| "loss": 0.0007, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 109 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.704, |
| "grad_norm": 0.010735229589045048, |
| "kl": 0.0157470703125, |
| "learning_rate": 4.87100722594094e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 110 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7104, |
| "grad_norm": 0.0866052657365799, |
| "kl": 0.025604248046875, |
| "learning_rate": 4.679679234846636e-06, |
| "loss": 0.001, |
| "reward": 0.078125, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.078125, |
| "rewards/format_reward": 0.0, |
| "step": 111 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7168, |
| "grad_norm": 0.008755321614444256, |
| "kl": 0.0163421630859375, |
| "learning_rate": 4.491030185478976e-06, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 112 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7232, |
| "grad_norm": 0.008368578739464283, |
| "kl": 0.01385498046875, |
| "learning_rate": 4.305155068315481e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 113 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7296, |
| "grad_norm": 0.010693593882024288, |
| "kl": 0.0185546875, |
| "learning_rate": 4.12214747707527e-06, |
| "loss": 0.0007, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 114 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.736, |
| "grad_norm": 0.07237356901168823, |
| "kl": 0.01715087890625, |
| "learning_rate": 3.942099561591802e-06, |
| "loss": 0.0007, |
| "reward": 0.140625, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.140625, |
| "rewards/format_reward": 0.0, |
| "step": 115 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7424, |
| "grad_norm": 0.008473976515233517, |
| "kl": 0.012969970703125, |
| "learning_rate": 3.7651019814126656e-06, |
| "loss": 0.0005, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 116 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7488, |
| "grad_norm": 0.008722187951207161, |
| "kl": 0.015594482421875, |
| "learning_rate": 3.591243860149759e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 117 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7552, |
| "grad_norm": 0.08019968122243881, |
| "kl": 0.0188140869140625, |
| "learning_rate": 3.4206127406028744e-06, |
| "loss": 0.0008, |
| "reward": 0.078125, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.078125, |
| "rewards/format_reward": 0.0, |
| "step": 118 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7616, |
| "grad_norm": 0.008491527289152145, |
| "kl": 0.01312255859375, |
| "learning_rate": 3.2532945406792573e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 119 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.768, |
| "grad_norm": 0.08431468904018402, |
| "kl": 0.0164947509765625, |
| "learning_rate": 3.089373510131354e-06, |
| "loss": 0.0007, |
| "reward": 0.078125, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.078125, |
| "rewards/format_reward": 0.0, |
| "step": 120 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7744, |
| "grad_norm": 0.07306572049856186, |
| "kl": 0.0167236328125, |
| "learning_rate": 2.9289321881345257e-06, |
| "loss": 0.0007, |
| "reward": 0.09375, |
| "reward_std": 0.03608439117670059, |
| "rewards/accuracy_reward": 0.09375, |
| "rewards/format_reward": 0.0, |
| "step": 121 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7808, |
| "grad_norm": 0.010250038467347622, |
| "kl": 0.0156707763671875, |
| "learning_rate": 2.7720513617260857e-06, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 122 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7872, |
| "grad_norm": 0.00793982483446598, |
| "kl": 0.013214111328125, |
| "learning_rate": 2.6188100251265947e-06, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 123 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.7936, |
| "grad_norm": 0.008872825652360916, |
| "kl": 0.014404296875, |
| "learning_rate": 2.469285339963892e-06, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 124 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8, |
| "grad_norm": 0.009312779642641544, |
| "kl": 0.0134124755859375, |
| "learning_rate": 2.323552596419889e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 125 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8064, |
| "grad_norm": 0.012824698351323605, |
| "kl": 0.025543212890625, |
| "learning_rate": 2.1816851753197023e-06, |
| "loss": 0.001, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 126 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8128, |
| "grad_norm": 0.009544136002659798, |
| "kl": 0.01507568359375, |
| "learning_rate": 2.043754511182191e-06, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 127 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8192, |
| "grad_norm": 0.010160642676055431, |
| "kl": 0.0191650390625, |
| "learning_rate": 1.9098300562505266e-06, |
| "loss": 0.0008, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 128 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8256, |
| "grad_norm": 0.01088634692132473, |
| "kl": 0.018035888671875, |
| "learning_rate": 1.7799792455209019e-06, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 129 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.832, |
| "grad_norm": 0.00801018439233303, |
| "kl": 0.0134735107421875, |
| "learning_rate": 1.6542674627869738e-06, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 130 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8384, |
| "grad_norm": 0.008347578346729279, |
| "kl": 0.012359619140625, |
| "learning_rate": 1.5327580077171589e-06, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 131 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8448, |
| "grad_norm": 0.009452232159674168, |
| "kl": 0.01336669921875, |
| "learning_rate": 1.4155120639813392e-06, |
| "loss": 0.0005, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 132 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8512, |
| "grad_norm": 0.009305072017014027, |
| "kl": 0.014617919921875, |
| "learning_rate": 1.3025886684430467e-06, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 133 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8576, |
| "grad_norm": 0.008793935179710388, |
| "kl": 0.0133514404296875, |
| "learning_rate": 1.19404468143262e-06, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 134 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.864, |
| "grad_norm": 0.12133859097957611, |
| "kl": 0.0153350830078125, |
| "learning_rate": 1.0899347581163222e-06, |
| "loss": 0.0006, |
| "reward": 0.046875, |
| "reward_std": 0.06733439117670059, |
| "rewards/accuracy_reward": 0.046875, |
| "rewards/format_reward": 0.0, |
| "step": 135 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8704, |
| "grad_norm": 0.009324206039309502, |
| "kl": 0.018707275390625, |
| "learning_rate": 9.903113209758098e-07, |
| "loss": 0.0007, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 136 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8768, |
| "grad_norm": 0.009810159914195538, |
| "kl": 0.01409912109375, |
| "learning_rate": 8.952245334118415e-07, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 137 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8832, |
| "grad_norm": 0.008117611519992352, |
| "kl": 0.015106201171875, |
| "learning_rate": 8.047222744854943e-07, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 138 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.8896, |
| "grad_norm": 0.007806665264070034, |
| "kl": 0.0112762451171875, |
| "learning_rate": 7.188501148096117e-07, |
| "loss": 0.0005, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 139 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.896, |
| "grad_norm": 0.009384059347212315, |
| "kl": 0.0153961181640625, |
| "learning_rate": 6.37651293602628e-07, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 140 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9024, |
| "grad_norm": 0.012416942976415157, |
| "kl": 0.017059326171875, |
| "learning_rate": 5.611666969163243e-07, |
| "loss": 0.0007, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 141 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9088, |
| "grad_norm": 0.010456549935042858, |
| "kl": 0.0162200927734375, |
| "learning_rate": 4.894348370484648e-07, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 142 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9152, |
| "grad_norm": 0.008049213327467442, |
| "kl": 0.01568603515625, |
| "learning_rate": 4.224918331506955e-07, |
| "loss": 0.0006, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 143 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9216, |
| "grad_norm": 0.0871976688504219, |
| "kl": 0.0141754150390625, |
| "learning_rate": 3.603713930414676e-07, |
| "loss": 0.0006, |
| "reward": 0.15625, |
| "reward_std": 0.03608439117670059, |
| "rewards/accuracy_reward": 0.15625, |
| "rewards/format_reward": 0.0, |
| "step": 144 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.928, |
| "grad_norm": 0.007249566726386547, |
| "kl": 0.011077880859375, |
| "learning_rate": 3.0310479623313125e-07, |
| "loss": 0.0004, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 145 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9344, |
| "grad_norm": 0.008691670373082161, |
| "kl": 0.0131988525390625, |
| "learning_rate": 2.507208781817638e-07, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 146 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9408, |
| "grad_norm": 0.08003751933574677, |
| "kl": 0.0145416259765625, |
| "learning_rate": 2.0324601576764525e-07, |
| "loss": 0.0006, |
| "reward": 0.078125, |
| "reward_std": 0.03125, |
| "rewards/accuracy_reward": 0.078125, |
| "rewards/format_reward": 0.0, |
| "step": 147 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9472, |
| "grad_norm": 0.007675584405660629, |
| "kl": 0.01171875, |
| "learning_rate": 1.6070411401370335e-07, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 148 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9536, |
| "grad_norm": 0.007958381436765194, |
| "kl": 0.0104522705078125, |
| "learning_rate": 1.231165940486234e-07, |
| "loss": 0.0004, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 149 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.96, |
| "grad_norm": 0.010069821961224079, |
| "kl": 0.0141448974609375, |
| "learning_rate": 9.0502382320653e-08, |
| "loss": 0.0006, |
| "reward": 0.1875, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.1875, |
| "rewards/format_reward": 0.0, |
| "step": 150 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9664, |
| "grad_norm": 0.008058452978730202, |
| "kl": 0.0142822265625, |
| "learning_rate": 6.287790106757396e-08, |
| "loss": 0.0006, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 151 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9728, |
| "grad_norm": 0.00876203365623951, |
| "kl": 0.0171051025390625, |
| "learning_rate": 4.025706004760932e-08, |
| "loss": 0.0007, |
| "reward": 0.125, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.125, |
| "rewards/format_reward": 0.0, |
| "step": 152 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9792, |
| "grad_norm": 0.009895979426801205, |
| "kl": 0.0211944580078125, |
| "learning_rate": 2.265124953543918e-08, |
| "loss": 0.0008, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 153 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9856, |
| "grad_norm": 0.008299603126943111, |
| "kl": 0.0113067626953125, |
| "learning_rate": 1.0069334586854106e-08, |
| "loss": 0.0005, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0, |
| "rewards/format_reward": 0.0, |
| "step": 154 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.992, |
| "grad_norm": 0.007576911710202694, |
| "kl": 0.0111236572265625, |
| "learning_rate": 2.5176505749346937e-09, |
| "loss": 0.0004, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 155 |
| }, |
| { |
| "completion_length": 1024.0, |
| "epoch": 0.9984, |
| "grad_norm": 0.008262473158538342, |
| "kl": 0.015533447265625, |
| "learning_rate": 0.0, |
| "loss": 0.0006, |
| "reward": 0.0625, |
| "reward_std": 0.0, |
| "rewards/accuracy_reward": 0.0625, |
| "rewards/format_reward": 0.0, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9984, |
| "step": 156, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 1.7346, |
| "train_samples_per_second": 1440.706, |
| "train_steps_per_second": 89.936 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|