{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2727272727272725, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 0.42830961220897734, "epoch": 0.007272727272727273, "frac_reward_zero_std": 0.46875, "grad_norm": 0.646968424320221, "learning_rate": 1.4091988636363636e-05, "loss": -0.0, "num_tokens": 197304.0, "reward": 0.63671875, "reward_std": 0.23533933330327272, "rewards/phased_json_reward/mean": 0.63671875, "rewards/phased_json_reward/std": 0.3294768799096346, "step": 16, "step_time": 6.092447507195175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 0.5094600756419823, "epoch": 0.014545454545454545, "frac_reward_zero_std": 0.890625, "grad_norm": 1.981223702430725, "learning_rate": 1.4083443181818183e-05, "loss": 0.0, "num_tokens": 394552.0, "reward": 0.931640625, "reward_std": 0.04123233538120985, "rewards/phased_json_reward/mean": 0.931640625, "rewards/phased_json_reward/std": 0.07807412650436163, "step": 32, "step_time": 6.241545998607762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 4.857936299405992, "epoch": 0.02181818181818182, "frac_reward_zero_std": 0.296875, "grad_norm": 4.860863208770752, "learning_rate": 1.4074897727272727e-05, "loss": -0.0, "num_tokens": 593872.0, "reward": 0.330078125, "reward_std": 0.3210840136744082, "rewards/phased_json_reward/mean": 0.330078125, "rewards/phased_json_reward/std": 0.3847697665914893, "step": 48, "step_time": 6.55877190240426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.998046875, "completions/max_length": 256.0, "completions/max_terminated_length": 9.875, "completions/mean_length": 255.80859375, "completions/mean_terminated_length": 9.875, "completions/min_length": 249.875, "completions/min_terminated_length": 9.875, "entropy": 7.90592198446393, "epoch": 0.02909090909090909, "frac_reward_zero_std": 0.953125, "grad_norm": 0.0, "learning_rate": 1.4066352272727272e-05, "loss": -0.0, "num_tokens": 791974.0, "reward": 0.0078125, "reward_std": 0.01828151335939765, "rewards/phased_json_reward/mean": 0.0078125, "rewards/phased_json_reward/std": 0.029557579196989536, "step": 64, "step_time": 6.82581608381588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.98046875, "completions/max_length": 256.0, "completions/max_terminated_length": 47.75, "completions/mean_length": 253.859375, "completions/mean_terminated_length": 39.197916984558105, "completions/min_length": 220.125, "completions/min_terminated_length": 28.125, "entropy": 8.064402721822262, "epoch": 0.03636363636363636, "frac_reward_zero_std": 0.953125, "grad_norm": 0.0, "learning_rate": 1.4057806818181818e-05, "loss": -0.0, "num_tokens": 989270.0, "reward": 0.005859375, "reward_std": 0.016572814900428057, "rewards/phased_json_reward/mean": 0.005859375, "rewards/phased_json_reward/std": 0.026419460773468018, "step": 80, "step_time": 6.864816441084258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.943359375, "completions/max_length": 256.0, "completions/max_terminated_length": 181.0625, "completions/mean_length": 251.91015625, "completions/mean_terminated_length": 154.4166669845581, "completions/min_length": 160.5, "completions/min_terminated_length": 128.5, "entropy": 11.036133036017418, "epoch": 0.04363636363636364, "frac_reward_zero_std": 0.796875, "grad_norm": 0.0, "learning_rate": 1.4049261363636365e-05, "loss": 0.0024, "num_tokens": 1186272.0, "reward": 0.02734375, "reward_std": 0.07352422922849655, "rewards/phased_json_reward/mean": 0.02734375, "rewards/phased_json_reward/std": 0.11195407994091511, "step": 96, "step_time": 6.840786207292695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 6.178761022165418, "epoch": 0.05090909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.404071590909091e-05, "loss": 0.0, "num_tokens": 1384584.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 112, "step_time": 6.850926381011959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3679739739745855, "epoch": 0.05818181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.4032170454545456e-05, "loss": 0.0, "num_tokens": 1582080.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 128, "step_time": 6.848654073488433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.056913835927844, "epoch": 0.06545454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.4023625e-05, "loss": 0.0, "num_tokens": 1778904.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 144, "step_time": 6.8916237821104005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.074340298771858, "epoch": 0.07272727272727272, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.4015079545454547e-05, "loss": 0.0, "num_tokens": 1977336.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 160, "step_time": 6.8557646567351185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.200064916163683, "epoch": 0.08, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.4006534090909092e-05, "loss": 0.0, "num_tokens": 2176672.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 176, "step_time": 6.845825185824651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3241069186478853, "epoch": 0.08727272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3997988636363637e-05, "loss": 0.0, "num_tokens": 2372864.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 192, "step_time": 6.798604555544443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.972260655835271, "epoch": 0.09454545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3989443181818183e-05, "loss": 0.0, "num_tokens": 2571040.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 208, "step_time": 6.8837419137125835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5393687821924686, "epoch": 0.10181818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3980897727272728e-05, "loss": 0.0, "num_tokens": 2766736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 224, "step_time": 6.887793117028195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.8803269220516086, "epoch": 0.10909090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3972352272727274e-05, "loss": 0.0, "num_tokens": 2963744.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 240, "step_time": 6.842434576828964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1804261477664113, "epoch": 0.11636363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3963806818181819e-05, "loss": 0.0, "num_tokens": 3163200.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 256, "step_time": 6.885555928805843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1913011521101, "epoch": 0.12363636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3955261363636364e-05, "loss": 0.0, "num_tokens": 3362632.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 272, "step_time": 6.890110642649233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4572390522807837, "epoch": 0.13090909090909092, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.394671590909091e-05, "loss": 0.0, "num_tokens": 3557872.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 288, "step_time": 6.849147691507824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1283934861421585, "epoch": 0.13818181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3938170454545455e-05, "loss": 0.0, "num_tokens": 3758016.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 304, "step_time": 6.893458035716321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.025675481185317, "epoch": 0.14545454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3929625000000001e-05, "loss": 0.0, "num_tokens": 3957072.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 320, "step_time": 6.88795695907902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.401237423531711, "epoch": 0.15272727272727274, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3921079545454546e-05, "loss": 0.0, "num_tokens": 4157008.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 336, "step_time": 6.975196688377764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2594400718808174, "epoch": 0.16, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.391253409090909e-05, "loss": 0.0, "num_tokens": 4353360.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 352, "step_time": 6.774752923694905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0049337027594447, "epoch": 0.16727272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3903988636363637e-05, "loss": 0.0, "num_tokens": 4550704.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 368, "step_time": 6.776181065128185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.057218801230192, "epoch": 0.17454545454545456, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3895443181818182e-05, "loss": 0.0, "num_tokens": 4751440.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 384, "step_time": 6.834552641317714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.084166483953595, "epoch": 0.18181818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3886897727272727e-05, "loss": 0.0, "num_tokens": 4954720.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 400, "step_time": 6.851137411780655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2319228379055858, "epoch": 0.1890909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3878352272727273e-05, "loss": 0.0, "num_tokens": 5152656.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 416, "step_time": 6.775262774550356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.087355736643076, "epoch": 0.19636363636363635, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3869806818181818e-05, "loss": 0.0, "num_tokens": 5352128.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 432, "step_time": 6.806187288369983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1158332098275423, "epoch": 0.20363636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3861261363636364e-05, "loss": 0.0, "num_tokens": 5551104.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 448, "step_time": 6.791217401507311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.370364161208272, "epoch": 0.2109090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3852715909090909e-05, "loss": 0.0, "num_tokens": 5745648.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 464, "step_time": 6.827869251719676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2214455530047417, "epoch": 0.21818181818181817, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3844170454545454e-05, "loss": 0.0, "num_tokens": 5942184.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 480, "step_time": 6.780751351150684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4838474420830607, "epoch": 0.22545454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3835625e-05, "loss": 0.0, "num_tokens": 6139936.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 496, "step_time": 6.812705361051485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2406448647379875, "epoch": 0.23272727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3827079545454545e-05, "loss": 0.0, "num_tokens": 6338696.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 512, "step_time": 6.824429966392927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.337201427668333, "epoch": 0.24, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3818534090909091e-05, "loss": 0.0, "num_tokens": 6538192.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 528, "step_time": 6.816003460087813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2296589333564043, "epoch": 0.24727272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3809988636363638e-05, "loss": 0.0, "num_tokens": 6738168.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 544, "step_time": 6.777038000465836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3775191511958838, "epoch": 0.2545454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3801443181818183e-05, "loss": 0.0, "num_tokens": 6935200.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 560, "step_time": 6.80725452414481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2904440509155393, "epoch": 0.26181818181818184, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3792897727272729e-05, "loss": 0.0, "num_tokens": 7134664.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 576, "step_time": 6.837113229208626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3340134397149086, "epoch": 0.2690909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3784352272727274e-05, "loss": 0.0, "num_tokens": 7332544.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 592, "step_time": 6.751015797315631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.114750794135034, "epoch": 0.27636363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3775806818181818e-05, "loss": 0.0, "num_tokens": 7530032.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 608, "step_time": 6.808422329369932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9973993664607406, "epoch": 0.28363636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3767261363636365e-05, "loss": 0.0, "num_tokens": 7729976.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 624, "step_time": 6.8062825029483065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.138758448883891, "epoch": 0.2909090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.375871590909091e-05, "loss": 0.0, "num_tokens": 7931648.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 640, "step_time": 6.747273121669423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4777450608089566, "epoch": 0.29818181818181816, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3750170454545456e-05, "loss": 0.0, "num_tokens": 8129520.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 656, "step_time": 6.801122222619597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3086934061720967, "epoch": 0.3054545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3741625e-05, "loss": 0.0, "num_tokens": 8325576.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 672, "step_time": 6.7866363351931795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0702755441889167, "epoch": 0.31272727272727274, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3733079545454545e-05, "loss": 0.0, "num_tokens": 8525608.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 688, "step_time": 6.792434763687197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1070512793958187, "epoch": 0.32, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3724534090909092e-05, "loss": 0.0, "num_tokens": 8727528.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 704, "step_time": 6.834328093973454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5615911297500134, "epoch": 0.32727272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3715988636363637e-05, "loss": 0.0, "num_tokens": 8925104.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 720, "step_time": 6.751501765102148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1564355148002505, "epoch": 0.33454545454545453, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3707443181818183e-05, "loss": 0.0, "num_tokens": 9123336.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 736, "step_time": 6.769851226534229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4383005276322365, "epoch": 0.3418181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3698897727272728e-05, "loss": 0.0, "num_tokens": 9318184.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 752, "step_time": 6.735914218821563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3242766577750444, "epoch": 0.3490909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3690352272727273e-05, "loss": 0.0, "num_tokens": 9516768.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 768, "step_time": 6.764547313621733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.198952840641141, "epoch": 0.3563636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3681806818181819e-05, "loss": 0.0, "num_tokens": 9716792.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 784, "step_time": 6.795565195614472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.269397678785026, "epoch": 0.36363636363636365, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3673261363636364e-05, "loss": 0.0, "num_tokens": 9910896.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 800, "step_time": 6.826461906952318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.652379145845771, "epoch": 0.3709090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.366471590909091e-05, "loss": 0.0, "num_tokens": 10104888.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 816, "step_time": 6.726004549302161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1831045616418123, "epoch": 0.3781818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3656170454545455e-05, "loss": 0.0, "num_tokens": 10306424.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 832, "step_time": 6.786695882154163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.074679055251181, "epoch": 0.38545454545454544, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3647625e-05, "loss": 0.0, "num_tokens": 10505688.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 848, "step_time": 6.809287193464115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1155389808118343, "epoch": 0.3927272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3639079545454546e-05, "loss": 0.0, "num_tokens": 10705760.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 864, "step_time": 6.785546476719901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.333749057725072, "epoch": 0.4, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.363053409090909e-05, "loss": 0.0, "num_tokens": 10904320.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 880, "step_time": 6.80823243613122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4373437957838178, "epoch": 0.4072727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3621988636363637e-05, "loss": 0.0, "num_tokens": 11102216.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 896, "step_time": 6.764123514120001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1984064765274525, "epoch": 0.41454545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3613443181818182e-05, "loss": 0.0, "num_tokens": 11301192.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 912, "step_time": 6.791246214008424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2342392737045884, "epoch": 0.4218181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3604897727272727e-05, "loss": 0.0, "num_tokens": 11499160.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 928, "step_time": 6.8632384678930975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3517434494569898, "epoch": 0.4290909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3596352272727273e-05, "loss": 0.0, "num_tokens": 11699704.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 944, "step_time": 6.8105885534896515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.096210742369294, "epoch": 0.43636363636363634, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3587806818181818e-05, "loss": 0.0, "num_tokens": 11903248.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 960, "step_time": 6.8464043863350525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2437717225402594, "epoch": 0.44363636363636366, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3579261363636363e-05, "loss": 0.0, "num_tokens": 12101112.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 976, "step_time": 6.825504575506784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0635962896049023, "epoch": 0.4509090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3570715909090909e-05, "loss": 0.0, "num_tokens": 12299056.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 992, "step_time": 6.814487671887036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3715159576386213, "epoch": 0.4581818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3562170454545455e-05, "loss": 0.0, "num_tokens": 12498240.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1008, "step_time": 6.855824781639967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.249473374336958, "epoch": 0.46545454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3553625000000002e-05, "loss": 0.0, "num_tokens": 12698888.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1024, "step_time": 6.82246611622395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9984740586951375, "epoch": 0.4727272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3545079545454547e-05, "loss": 0.0, "num_tokens": 12898736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1040, "step_time": 6.777579111163504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0234864549711347, "epoch": 0.48, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3536534090909091e-05, "loss": 0.0, "num_tokens": 13099704.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1056, "step_time": 6.884962650714442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.022543553262949, "epoch": 0.48727272727272725, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3527988636363638e-05, "loss": 0.0, "num_tokens": 13298200.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1072, "step_time": 6.744426833873149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.102074005641043, "epoch": 0.49454545454545457, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3519443181818183e-05, "loss": 0.0, "num_tokens": 13497656.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1088, "step_time": 6.766534966300242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.393769825808704, "epoch": 0.5018181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3510897727272729e-05, "loss": 0.0, "num_tokens": 13695544.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1104, "step_time": 6.776737172156572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.045584147796035, "epoch": 0.509090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3502352272727274e-05, "loss": 0.0, "num_tokens": 13893656.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1120, "step_time": 6.779956117738038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.084079368971288, "epoch": 0.5163636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3493806818181818e-05, "loss": 0.0, "num_tokens": 14095056.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1136, "step_time": 6.782520697277505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1256261626258492, "epoch": 0.5236363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3485261363636365e-05, "loss": 0.0, "num_tokens": 14296296.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1152, "step_time": 6.842673333128914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3446692936122417, "epoch": 0.5309090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.347671590909091e-05, "loss": 0.0, "num_tokens": 14495584.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1168, "step_time": 6.777760679484345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.10722962487489, "epoch": 0.5381818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3468170454545456e-05, "loss": 0.0, "num_tokens": 14694192.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1184, "step_time": 6.828546286211349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4106021150946617, "epoch": 0.5454545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3459625e-05, "loss": 0.0, "num_tokens": 14889632.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1200, "step_time": 6.803303236432839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4056353475898504, "epoch": 0.5527272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3451079545454545e-05, "loss": 0.0, "num_tokens": 15086216.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1216, "step_time": 6.809469573141541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2028971537947655, "epoch": 0.56, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3442534090909092e-05, "loss": 0.0, "num_tokens": 15282776.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1232, "step_time": 6.815300940710586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2706773821264505, "epoch": 0.5672727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3433988636363637e-05, "loss": 0.0, "num_tokens": 15481280.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1248, "step_time": 6.789468246744946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.417286262847483, "epoch": 0.5745454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3425443181818181e-05, "loss": 0.0, "num_tokens": 15678256.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1264, "step_time": 6.803466434008442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9892088724300265, "epoch": 0.5818181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3416897727272728e-05, "loss": 0.0, "num_tokens": 15878344.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1280, "step_time": 6.839117998140864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.317000398412347, "epoch": 0.5890909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3408352272727273e-05, "loss": 0.0, "num_tokens": 16075536.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1296, "step_time": 6.830957441532519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3442000709474087, "epoch": 0.5963636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3399806818181819e-05, "loss": 0.0, "num_tokens": 16277384.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1312, "step_time": 6.842805185704492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.494213981553912, "epoch": 0.6036363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3391261363636364e-05, "loss": 0.0, "num_tokens": 16477464.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1328, "step_time": 6.88639573531691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.187366573140025, "epoch": 0.610909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3382715909090908e-05, "loss": 0.0, "num_tokens": 16678504.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1344, "step_time": 6.771680047851987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2736073713749647, "epoch": 0.6181818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3374170454545455e-05, "loss": 0.0, "num_tokens": 16876456.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1360, "step_time": 6.741794998641126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5669486774131656, "epoch": 0.6254545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3365625e-05, "loss": 0.0, "num_tokens": 17074288.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1376, "step_time": 6.771289608557709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.129427008330822, "epoch": 0.6327272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3357079545454546e-05, "loss": 0.0, "num_tokens": 17270736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1392, "step_time": 6.766435696568806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.147328623570502, "epoch": 0.64, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.334853409090909e-05, "loss": 0.0, "num_tokens": 17470040.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1408, "step_time": 6.835226783179678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0660659885033965, "epoch": 0.6472727272727272, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3339988636363636e-05, "loss": 0.0, "num_tokens": 17670056.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1424, "step_time": 6.817595137399621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3427893919870257, "epoch": 0.6545454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3331443181818182e-05, "loss": 0.0, "num_tokens": 17864512.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1440, "step_time": 6.7745873693493195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.988153620623052, "epoch": 0.6618181818181819, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3322897727272728e-05, "loss": 0.0, "num_tokens": 18064008.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1456, "step_time": 6.810007059946656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5129095343872905, "epoch": 0.6690909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3314352272727273e-05, "loss": 0.0, "num_tokens": 18265008.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1472, "step_time": 6.775363146036398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.341884721070528, "epoch": 0.6763636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.330580681818182e-05, "loss": 0.0, "num_tokens": 18463848.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1488, "step_time": 6.8186130007961765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.579329112544656, "epoch": 0.6836363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3297261363636364e-05, "loss": 0.0, "num_tokens": 18662248.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1504, "step_time": 6.8083467375836335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1765119256451726, "epoch": 0.6909090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.328871590909091e-05, "loss": 0.0, "num_tokens": 18861328.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1520, "step_time": 6.778358690673485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1588730523362756, "epoch": 0.6981818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3280170454545455e-05, "loss": 0.0, "num_tokens": 19062008.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1536, "step_time": 6.802705559995957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.057212447747588, "epoch": 0.7054545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3271625e-05, "loss": 0.0, "num_tokens": 19259584.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1552, "step_time": 6.820947894244455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3579221479594707, "epoch": 0.7127272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3263079545454547e-05, "loss": 0.0, "num_tokens": 19456288.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1568, "step_time": 6.783202945603989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1419159155339003, "epoch": 0.72, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3254534090909091e-05, "loss": 0.0, "num_tokens": 19654392.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1584, "step_time": 6.792444017424714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0922014825046062, "epoch": 0.7272727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3245988636363638e-05, "loss": 0.0, "num_tokens": 19854784.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1600, "step_time": 6.790935325610917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.302881100215018, "epoch": 0.7345454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3237443181818183e-05, "loss": 0.0, "num_tokens": 20054800.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1616, "step_time": 6.799347155960277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2373234955593944, "epoch": 0.7418181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3228897727272727e-05, "loss": 0.0, "num_tokens": 20252920.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1632, "step_time": 6.742899239063263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.122205875813961, "epoch": 0.7490909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3220352272727274e-05, "loss": 0.0, "num_tokens": 20452480.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1648, "step_time": 6.765379874326754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.217903313226998, "epoch": 0.7563636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3211806818181818e-05, "loss": 0.0, "num_tokens": 20654800.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1664, "step_time": 6.768923559284303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3427827628329396, "epoch": 0.7636363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3203261363636365e-05, "loss": 0.0, "num_tokens": 20853216.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1680, "step_time": 6.749019315990154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.964004747569561, "epoch": 0.7709090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.319471590909091e-05, "loss": 0.0, "num_tokens": 21051368.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1696, "step_time": 6.794358595914673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1231435453519225, "epoch": 0.7781818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3186170454545454e-05, "loss": 0.0, "num_tokens": 21247120.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1712, "step_time": 6.795791415905114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5061326697468758, "epoch": 0.7854545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3177625e-05, "loss": 0.0, "num_tokens": 21443488.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1728, "step_time": 6.810171545366757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3949177227914333, "epoch": 0.7927272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3169079545454545e-05, "loss": 0.0, "num_tokens": 21638376.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1744, "step_time": 6.751137403829489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.196354507468641, "epoch": 0.8, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3160534090909092e-05, "loss": 0.0, "num_tokens": 21838752.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1760, "step_time": 6.813927979324944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.8691934188827872, "epoch": 0.8072727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3151988636363637e-05, "loss": 0.0, "num_tokens": 22038504.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1776, "step_time": 6.784064815903548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0765619752928615, "epoch": 0.8145454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3143443181818181e-05, "loss": 0.0, "num_tokens": 22239024.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1792, "step_time": 6.878322484553792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.309577951207757, "epoch": 0.8218181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3134897727272728e-05, "loss": 0.0, "num_tokens": 22438728.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1808, "step_time": 6.863884965714533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.058369752019644, "epoch": 0.8290909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3126352272727273e-05, "loss": 0.0, "num_tokens": 22638544.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1824, "step_time": 6.771415026450995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1903171529993415, "epoch": 0.8363636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3117806818181817e-05, "loss": 0.0, "num_tokens": 22837168.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1840, "step_time": 6.7205873560742475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2971535082906485, "epoch": 0.8436363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3109261363636364e-05, "loss": 0.0, "num_tokens": 23035856.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1856, "step_time": 6.724811541091185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1190958246588707, "epoch": 0.850909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3100715909090908e-05, "loss": 0.0, "num_tokens": 23232776.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1872, "step_time": 6.714156559668481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.027809141203761, "epoch": 0.8581818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3092170454545455e-05, "loss": 0.0, "num_tokens": 23432512.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1888, "step_time": 6.788581044296734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.099637934938073, "epoch": 0.8654545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3083625e-05, "loss": 0.0, "num_tokens": 23630912.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1904, "step_time": 6.804333564650733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0758075192570686, "epoch": 0.8727272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3075079545454546e-05, "loss": 0.0, "num_tokens": 23830264.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1920, "step_time": 6.776671283470932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.175785332918167, "epoch": 0.88, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3066534090909092e-05, "loss": 0.0, "num_tokens": 24029912.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1936, "step_time": 6.677869445469696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.12380725517869, "epoch": 0.8872727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3057988636363637e-05, "loss": 0.0, "num_tokens": 24226168.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1952, "step_time": 6.8098004084895365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2622394105419517, "epoch": 0.8945454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3049443181818184e-05, "loss": 0.0, "num_tokens": 24426208.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1968, "step_time": 6.808981532289181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3609932847321033, "epoch": 0.9018181818181819, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3040897727272728e-05, "loss": 0.0, "num_tokens": 24623336.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 1984, "step_time": 6.7059100805199705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2017374001443386, "epoch": 0.9090909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3032352272727273e-05, "loss": 0.0, "num_tokens": 24820400.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2000, "step_time": 6.68805788771715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.292481265962124, "epoch": 0.9163636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.302380681818182e-05, "loss": 0.0, "num_tokens": 25022056.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2016, "step_time": 6.821227380132768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.187743639573455, "epoch": 0.9236363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3015261363636364e-05, "loss": 0.0, "num_tokens": 25221240.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2032, "step_time": 6.751498891739175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2612225087359548, "epoch": 0.9309090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.300671590909091e-05, "loss": 0.0, "num_tokens": 25420000.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2048, "step_time": 6.7375550081487745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0908776372671127, "epoch": 0.9381818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2998170454545455e-05, "loss": 0.0, "num_tokens": 25617824.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2064, "step_time": 6.745009416132234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.436667935922742, "epoch": 0.9454545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2989625e-05, "loss": 0.0, "num_tokens": 25816440.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2080, "step_time": 6.726034635154065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0809996156021953, "epoch": 0.9527272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2981079545454547e-05, "loss": 0.0, "num_tokens": 26015512.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2096, "step_time": 6.756280101719312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.113866430707276, "epoch": 0.96, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2972534090909091e-05, "loss": 0.0, "num_tokens": 26216440.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2112, "step_time": 6.731012829870451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3540880754590034, "epoch": 0.9672727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2963988636363636e-05, "loss": 0.0, "num_tokens": 26417064.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2128, "step_time": 6.778560371370986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4368344880640507, "epoch": 0.9745454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2955443181818182e-05, "loss": 0.0, "num_tokens": 26612592.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2144, "step_time": 6.781460403406527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.924749942496419, "epoch": 0.9818181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2946897727272727e-05, "loss": 0.0, "num_tokens": 26811568.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2160, "step_time": 6.786069019581191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.308485238812864, "epoch": 0.9890909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2938352272727274e-05, "loss": 0.0, "num_tokens": 27007160.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2176, "step_time": 6.75381765246857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1435415241867304, "epoch": 0.9963636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2929806818181818e-05, "loss": 0.0, "num_tokens": 27206072.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2192, "step_time": 6.723647503124084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1017238460481167, "epoch": 1.0036363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2921261363636363e-05, "loss": 0.0, "num_tokens": 27400520.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2208, "step_time": 6.695596158213448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1460805255919695, "epoch": 1.010909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.291271590909091e-05, "loss": 0.0, "num_tokens": 27601832.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2224, "step_time": 6.741232635220513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.198020614683628, "epoch": 1.018181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2904170454545454e-05, "loss": 0.0, "num_tokens": 27800600.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2240, "step_time": 6.77682097296929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2291477154940367, "epoch": 1.0254545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2895625e-05, "loss": 0.0, "num_tokens": 28000384.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2256, "step_time": 6.74526576831704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.355206954292953, "epoch": 1.0327272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2887079545454545e-05, "loss": 0.0, "num_tokens": 28196720.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2272, "step_time": 6.7606360777281225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3051024340093136, "epoch": 1.04, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.287853409090909e-05, "loss": 0.0, "num_tokens": 28394992.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2288, "step_time": 6.772849566536024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2615869557484984, "epoch": 1.0472727272727274, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2869988636363637e-05, "loss": 0.0, "num_tokens": 28592184.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2304, "step_time": 6.763053335249424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.264405391179025, "epoch": 1.0545454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2861443181818181e-05, "loss": 0.0, "num_tokens": 28791208.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2320, "step_time": 6.750271147699095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.35021267272532, "epoch": 1.0618181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2852897727272728e-05, "loss": 0.0, "num_tokens": 28992008.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2336, "step_time": 6.7981160210911185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1354622216895223, "epoch": 1.069090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2844352272727273e-05, "loss": 0.0, "num_tokens": 29188224.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2352, "step_time": 6.698252131580375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0579872159287333, "epoch": 1.0763636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2835806818181819e-05, "loss": 0.0, "num_tokens": 29388544.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2368, "step_time": 6.779826779093128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.028238764964044, "epoch": 1.0836363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2827261363636365e-05, "loss": 0.0, "num_tokens": 29589800.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2384, "step_time": 6.813384254521225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.218187157995999, "epoch": 1.0909090909090908, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.281871590909091e-05, "loss": 0.0, "num_tokens": 29785960.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2400, "step_time": 6.7202456255909055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.303743524476886, "epoch": 1.0981818181818181, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2810170454545455e-05, "loss": 0.0, "num_tokens": 29983272.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2416, "step_time": 6.780246646958403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.396721890196204, "epoch": 1.1054545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2801625000000001e-05, "loss": 0.0, "num_tokens": 30182432.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2432, "step_time": 6.793788745824713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.242832077667117, "epoch": 1.1127272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2793079545454546e-05, "loss": 0.0, "num_tokens": 30382672.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2448, "step_time": 6.747386139992159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.286748672835529, "epoch": 1.12, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2784534090909092e-05, "loss": 0.0, "num_tokens": 30582552.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2464, "step_time": 6.742027651518583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.192711003124714, "epoch": 1.1272727272727272, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2775988636363637e-05, "loss": 0.0, "num_tokens": 30781016.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2480, "step_time": 6.797363699704874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.312186806462705, "epoch": 1.1345454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2767443181818182e-05, "loss": 0.0, "num_tokens": 30981640.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2496, "step_time": 6.711918727087323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.18869723752141, "epoch": 1.1418181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2758897727272728e-05, "loss": 0.0, "num_tokens": 31178992.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2512, "step_time": 6.795956993591972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2482578828930855, "epoch": 1.1490909090909092, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2750352272727273e-05, "loss": 0.0, "num_tokens": 31377960.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2528, "step_time": 6.804913574247621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.359376387670636, "epoch": 1.1563636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.274180681818182e-05, "loss": 0.0, "num_tokens": 31579352.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2544, "step_time": 6.872893456253223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.277367865666747, "epoch": 1.1636363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2733261363636364e-05, "loss": 0.0, "num_tokens": 31776984.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2560, "step_time": 6.8818644337006845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.388336017727852, "epoch": 1.170909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2724715909090909e-05, "loss": 0.0, "num_tokens": 31978032.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2576, "step_time": 6.782117213879246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0712667126208544, "epoch": 1.1781818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2716170454545455e-05, "loss": 0.0, "num_tokens": 32178904.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2592, "step_time": 6.735153479909059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3562627052888274, "epoch": 1.1854545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2707625e-05, "loss": 0.0, "num_tokens": 32377648.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2608, "step_time": 6.842811773065478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0933955032378435, "epoch": 1.1927272727272726, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2699079545454547e-05, "loss": 0.0, "num_tokens": 32577280.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2624, "step_time": 6.833708286401816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2871429016813636, "epoch": 1.2, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2690534090909091e-05, "loss": 0.0, "num_tokens": 32775856.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2640, "step_time": 6.858709867228754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1881882632151246, "epoch": 1.2072727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2681988636363636e-05, "loss": 0.0, "num_tokens": 32972456.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2656, "step_time": 6.836625085736159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.047997717745602, "epoch": 1.2145454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2673443181818182e-05, "loss": 0.0, "num_tokens": 33171640.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2672, "step_time": 6.823630373168271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9498577415943146, "epoch": 1.221818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2664897727272727e-05, "loss": 0.0, "num_tokens": 33372552.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2688, "step_time": 6.791127723874524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1252716667950153, "epoch": 1.229090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2656352272727274e-05, "loss": 0.0, "num_tokens": 33570448.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2704, "step_time": 6.756760664633475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2014796687290072, "epoch": 1.2363636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2647806818181818e-05, "loss": 0.0, "num_tokens": 33766520.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2720, "step_time": 6.686530307750218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2421076837927103, "epoch": 1.2436363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2639261363636363e-05, "loss": 0.0, "num_tokens": 33964584.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2736, "step_time": 6.7870109137147665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3316868767142296, "epoch": 1.250909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.263071590909091e-05, "loss": 0.0, "num_tokens": 34163576.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2752, "step_time": 6.718043155909982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.503485517576337, "epoch": 1.2581818181818183, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2622170454545454e-05, "loss": 0.0, "num_tokens": 34360144.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2768, "step_time": 6.755784626526292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.360766554251313, "epoch": 1.2654545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2613624999999999e-05, "loss": 0.0, "num_tokens": 34557016.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2784, "step_time": 6.720057619037107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.143316438421607, "epoch": 1.2727272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2605079545454545e-05, "loss": 0.0, "num_tokens": 34755640.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2800, "step_time": 6.744722189905588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1157698202878237, "epoch": 1.28, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.259653409090909e-05, "loss": 0.0, "num_tokens": 34951632.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2816, "step_time": 6.816667142789811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.456563299521804, "epoch": 1.2872727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2587988636363638e-05, "loss": 0.0, "num_tokens": 35150376.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2832, "step_time": 6.802197508630343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9530596751719713, "epoch": 1.2945454545454544, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2579443181818183e-05, "loss": 0.0, "num_tokens": 35349424.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2848, "step_time": 6.7407911563059315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2607680400833488, "epoch": 1.3018181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2570897727272728e-05, "loss": 0.0, "num_tokens": 35546248.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2864, "step_time": 6.751011346175801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2476487196981907, "epoch": 1.309090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2562352272727274e-05, "loss": 0.0, "num_tokens": 35746672.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2880, "step_time": 6.7571043413481675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.974738628603518, "epoch": 1.3163636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2553806818181819e-05, "loss": 0.0, "num_tokens": 35947936.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2896, "step_time": 6.732615724671632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9833324188366532, "epoch": 1.3236363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2545261363636365e-05, "loss": 0.0, "num_tokens": 36146976.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2912, "step_time": 6.747754844080191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1581534668803215, "epoch": 1.330909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.253671590909091e-05, "loss": 0.0, "num_tokens": 36347240.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2928, "step_time": 6.806651291146409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9356587659567595, "epoch": 1.3381818181818181, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2528170454545455e-05, "loss": 0.0, "num_tokens": 36546312.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2944, "step_time": 6.798078317428008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.31802302505821, "epoch": 1.3454545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2519625000000001e-05, "loss": 0.0, "num_tokens": 36745320.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2960, "step_time": 6.80775830487255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.344906162470579, "epoch": 1.3527272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2511079545454546e-05, "loss": 0.0, "num_tokens": 36943456.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2976, "step_time": 6.7794261837843806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.300058946944773, "epoch": 1.3599999999999999, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.250253409090909e-05, "loss": 0.0, "num_tokens": 37140768.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 2992, "step_time": 6.751858755480498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3071441184729338, "epoch": 1.3672727272727272, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2493988636363637e-05, "loss": 0.0, "num_tokens": 37338376.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3008, "step_time": 6.764147925772704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.082784323953092, "epoch": 1.3745454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2485443181818182e-05, "loss": 0.0, "num_tokens": 37536672.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3024, "step_time": 6.7155560600804165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.446023194119334, "epoch": 1.3818181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2476897727272728e-05, "loss": 0.0, "num_tokens": 37735560.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3040, "step_time": 6.788893666642252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.8666899278759956, "epoch": 1.3890909090909092, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2468352272727273e-05, "loss": 0.0, "num_tokens": 37936488.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3056, "step_time": 6.777422826969996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3226305842399597, "epoch": 1.3963636363636365, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2459806818181818e-05, "loss": 0.0, "num_tokens": 38138056.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3072, "step_time": 6.787566299317405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1517041893675923, "epoch": 1.4036363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2451261363636364e-05, "loss": 0.0, "num_tokens": 38337816.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3088, "step_time": 6.76234096015105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.091954587958753, "epoch": 1.410909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2442715909090909e-05, "loss": 0.0, "num_tokens": 38535064.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3104, "step_time": 6.831137398316059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3856596797704697, "epoch": 1.4181818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2434170454545455e-05, "loss": 0.0, "num_tokens": 38729336.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3120, "step_time": 6.82296618015971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2895570769906044, "epoch": 1.4254545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2425625e-05, "loss": 0.0, "num_tokens": 38924968.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3136, "step_time": 6.74725763668539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.043810401111841, "epoch": 1.4327272727272726, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2417079545454545e-05, "loss": 0.0, "num_tokens": 39122568.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3152, "step_time": 6.819663318456151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5287796342745423, "epoch": 1.44, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2408534090909091e-05, "loss": 0.0, "num_tokens": 39323144.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3168, "step_time": 6.781180361402221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0407142341136932, "epoch": 1.4472727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2399988636363636e-05, "loss": 0.0, "num_tokens": 39522680.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3184, "step_time": 6.799189413781278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.955046221613884, "epoch": 1.4545454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2391443181818182e-05, "loss": 0.0, "num_tokens": 39722640.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3200, "step_time": 6.7851121361600235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.305285726673901, "epoch": 1.461818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2382897727272727e-05, "loss": 0.0, "num_tokens": 39920992.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3216, "step_time": 6.807475323788822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0928380796685815, "epoch": 1.4690909090909092, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2374352272727272e-05, "loss": 0.0, "num_tokens": 40120288.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3232, "step_time": 6.831833872594871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.916893396526575, "epoch": 1.4763636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2365806818181818e-05, "loss": 0.0, "num_tokens": 40317616.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3248, "step_time": 6.692840414005332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.162033887580037, "epoch": 1.4836363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2357261363636363e-05, "loss": 0.0, "num_tokens": 40518912.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3264, "step_time": 6.813380097912159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.265519233420491, "epoch": 1.490909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.234871590909091e-05, "loss": 0.0, "num_tokens": 40712320.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3280, "step_time": 6.8094925917102955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.264208293519914, "epoch": 1.498181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2340170454545456e-05, "loss": 0.0, "num_tokens": 40910752.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3296, "step_time": 6.92184988129884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0021583158522844, "epoch": 1.5054545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2331625e-05, "loss": 0.0, "num_tokens": 41109232.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3312, "step_time": 6.807405228668358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1294081257656217, "epoch": 1.5127272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2323079545454547e-05, "loss": 0.0, "num_tokens": 41307944.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3328, "step_time": 6.7090692204074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1915039075538516, "epoch": 1.52, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2314534090909092e-05, "loss": 0.0, "num_tokens": 41507408.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3344, "step_time": 6.812871761154383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3671645503491163, "epoch": 1.5272727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2305988636363637e-05, "loss": 0.0, "num_tokens": 41701304.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3360, "step_time": 6.819892742554657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2086366089060903, "epoch": 1.5345454545454547, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2297443181818183e-05, "loss": 0.0, "num_tokens": 41897808.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3376, "step_time": 6.792966264649294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4239872731268406, "epoch": 1.541818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2288897727272728e-05, "loss": 0.0, "num_tokens": 42097744.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3392, "step_time": 6.7452264034654945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4442757526412606, "epoch": 1.549090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2280352272727274e-05, "loss": 0.0, "num_tokens": 42295304.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3408, "step_time": 6.8505471590324305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0827516186982393, "epoch": 1.5563636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2271806818181819e-05, "loss": 0.0, "num_tokens": 42494648.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3424, "step_time": 6.797972589090932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3115072082728148, "epoch": 1.5636363636363635, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2263261363636364e-05, "loss": 0.0, "num_tokens": 42691832.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3440, "step_time": 6.812032384448685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2236884916201234, "epoch": 1.5709090909090908, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.225471590909091e-05, "loss": 0.0, "num_tokens": 42888088.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3456, "step_time": 6.784853787452448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.468571356497705, "epoch": 1.5781818181818181, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2246170454545455e-05, "loss": 0.0, "num_tokens": 43085488.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3472, "step_time": 6.80501512484625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3984502805396914, "epoch": 1.5854545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2237625000000001e-05, "loss": 0.0, "num_tokens": 43284248.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3488, "step_time": 6.7867183712660335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4759268388152122, "epoch": 1.5927272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2229079545454546e-05, "loss": 0.0, "num_tokens": 43484528.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3504, "step_time": 6.7809304527472705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.420325178653002, "epoch": 1.6, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.222053409090909e-05, "loss": 0.0, "num_tokens": 43682552.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3520, "step_time": 6.777703637490049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.481171676889062, "epoch": 1.6072727272727274, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2211988636363637e-05, "loss": 0.0, "num_tokens": 43882104.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3536, "step_time": 6.742597420467064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.358258410356939, "epoch": 1.6145454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2203443181818182e-05, "loss": 0.0, "num_tokens": 44079960.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3552, "step_time": 6.825212087715045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.696707614697516, "epoch": 1.6218181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2194897727272728e-05, "loss": 0.0, "num_tokens": 44276632.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3568, "step_time": 6.804728279355913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2286562798544765, "epoch": 1.6290909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2186352272727273e-05, "loss": 0.0, "num_tokens": 44474816.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3584, "step_time": 6.721510254021268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.202762014232576, "epoch": 1.6363636363636362, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2177806818181818e-05, "loss": 0.0, "num_tokens": 44676336.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3600, "step_time": 6.728681799781043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3508113231509924, "epoch": 1.6436363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2169261363636364e-05, "loss": 0.0, "num_tokens": 44876448.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3616, "step_time": 6.817280929361004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1071648206561804, "epoch": 1.6509090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2160715909090909e-05, "loss": 0.0, "num_tokens": 45075696.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3632, "step_time": 6.778283861873206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1784300385043025, "epoch": 1.6581818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2152170454545454e-05, "loss": 0.0, "num_tokens": 45272976.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3648, "step_time": 6.743116174242459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1938441330567002, "epoch": 1.6654545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2143625e-05, "loss": 0.0, "num_tokens": 45468792.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3664, "step_time": 6.801589194859844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1752672847360373, "epoch": 1.6727272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2135079545454545e-05, "loss": 0.0, "num_tokens": 45665600.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3680, "step_time": 6.774731121433433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.326844639144838, "epoch": 1.6800000000000002, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2126534090909091e-05, "loss": 0.0, "num_tokens": 45861696.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3696, "step_time": 6.764392688404769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9930380610749125, "epoch": 1.6872727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2117988636363636e-05, "loss": 0.0, "num_tokens": 46061648.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3712, "step_time": 6.803180871240329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.130782919935882, "epoch": 1.6945454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2109443181818182e-05, "loss": 0.0, "num_tokens": 46260736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3728, "step_time": 6.832387202652171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3689714800566435, "epoch": 1.7018181818181817, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2100897727272729e-05, "loss": 0.0, "num_tokens": 46457608.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3744, "step_time": 6.8068507291027345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1559818061068654, "epoch": 1.709090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2092352272727274e-05, "loss": 0.0, "num_tokens": 46657072.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3760, "step_time": 6.819567769533023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.149191261269152, "epoch": 1.7163636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.208380681818182e-05, "loss": 0.0, "num_tokens": 46857960.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3776, "step_time": 6.7862320126150735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3385393377393484, "epoch": 1.7236363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2075261363636365e-05, "loss": 0.0, "num_tokens": 47056800.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3792, "step_time": 6.83025036327308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3446352081373334, "epoch": 1.730909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.206671590909091e-05, "loss": 0.0, "num_tokens": 47254072.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3808, "step_time": 6.7963205120759085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1582796899601817, "epoch": 1.7381818181818183, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2058170454545456e-05, "loss": 0.0, "num_tokens": 47453424.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3824, "step_time": 6.754275181854609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.350019756704569, "epoch": 1.7454545454545456, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2049625e-05, "loss": 0.0, "num_tokens": 47655640.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3840, "step_time": 6.815282926079817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2120337886735797, "epoch": 1.7527272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2041079545454545e-05, "loss": 0.0, "num_tokens": 47855408.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3856, "step_time": 6.773871595331002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.111018780618906, "epoch": 1.76, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2032534090909092e-05, "loss": 0.0, "num_tokens": 48052248.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3872, "step_time": 6.7454344616271555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.026843812316656, "epoch": 1.767272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2023988636363637e-05, "loss": 0.0, "num_tokens": 48251128.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3888, "step_time": 6.81127433484653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.142205369658768, "epoch": 1.7745454545454544, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2015443181818183e-05, "loss": 0.0, "num_tokens": 48451048.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3904, "step_time": 6.768906163924839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.833907858468592, "epoch": 1.7818181818181817, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2006897727272728e-05, "loss": 0.0, "num_tokens": 48650960.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3920, "step_time": 6.76123388716951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.965332482010126, "epoch": 1.789090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1998352272727272e-05, "loss": 0.0, "num_tokens": 48852520.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3936, "step_time": 6.785187493253034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.069445427507162, "epoch": 1.7963636363636364, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1989806818181819e-05, "loss": 0.0, "num_tokens": 49053104.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3952, "step_time": 6.755604680161923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2699612844735384, "epoch": 1.8036363636363637, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1981261363636364e-05, "loss": 0.0, "num_tokens": 49249768.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3968, "step_time": 6.758650440606289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1967758797109127, "epoch": 1.810909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.197271590909091e-05, "loss": 0.0, "num_tokens": 49453984.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 3984, "step_time": 6.8494268947979435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.364362958818674, "epoch": 1.8181818181818183, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1964170454545455e-05, "loss": 0.0, "num_tokens": 49652000.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4000, "step_time": 6.740954163658898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.118711346760392, "epoch": 1.8254545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1955625e-05, "loss": 0.0, "num_tokens": 49849992.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4016, "step_time": 6.755418484797701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.07691203802824, "epoch": 1.8327272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1947079545454546e-05, "loss": 0.0, "num_tokens": 50049880.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4032, "step_time": 6.882297096424736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9445608789101243, "epoch": 1.8399999999999999, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.193853409090909e-05, "loss": 0.0, "num_tokens": 50247568.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4048, "step_time": 6.855586153862532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3924843007698655, "epoch": 1.8472727272727272, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1929988636363637e-05, "loss": 0.0, "num_tokens": 50444408.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4064, "step_time": 6.770890337065794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0896419156342745, "epoch": 1.8545454545454545, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1921443181818182e-05, "loss": 0.0, "num_tokens": 50645840.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4080, "step_time": 6.759655719448347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.368128092959523, "epoch": 1.8618181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1912897727272727e-05, "loss": 0.0, "num_tokens": 50845024.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4096, "step_time": 6.766734938137233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.189123467542231, "epoch": 1.8690909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1904352272727273e-05, "loss": 0.0, "num_tokens": 51041856.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4112, "step_time": 6.764893968065735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.213881215080619, "epoch": 1.8763636363636365, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1895806818181818e-05, "loss": 0.0, "num_tokens": 51241264.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4128, "step_time": 6.777542426658329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.6151960818096995, "epoch": 1.8836363636363638, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1887261363636364e-05, "loss": 0.0, "num_tokens": 51436520.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4144, "step_time": 6.759419926791452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2313709212467074, "epoch": 1.8909090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1878715909090909e-05, "loss": 0.0, "num_tokens": 51632536.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4160, "step_time": 6.775483648234513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2920576203614473, "epoch": 1.8981818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1870170454545454e-05, "loss": 0.0, "num_tokens": 51829736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4176, "step_time": 6.728544996934943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0724613601341844, "epoch": 1.9054545454545453, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1861625000000002e-05, "loss": 0.0, "num_tokens": 52030088.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4192, "step_time": 6.739417067728937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3921290542930365, "epoch": 1.9127272727272726, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1853079545454547e-05, "loss": 0.0, "num_tokens": 52229240.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4208, "step_time": 6.781370086420793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.101226268336177, "epoch": 1.92, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1844534090909091e-05, "loss": 0.0, "num_tokens": 52427576.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4224, "step_time": 6.747128820337821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2702527521178126, "epoch": 1.9272727272727272, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1835988636363638e-05, "loss": 0.0, "num_tokens": 52623864.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4240, "step_time": 6.811194027657621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1875290479511023, "epoch": 1.9345454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1827443181818182e-05, "loss": 0.0, "num_tokens": 52823336.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4256, "step_time": 6.749341471120715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.232122967019677, "epoch": 1.9418181818181819, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1818897727272729e-05, "loss": 0.0, "num_tokens": 53021440.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4272, "step_time": 6.738833487150259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2272182758897543, "epoch": 1.9490909090909092, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1810352272727274e-05, "loss": 0.0, "num_tokens": 53221544.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4288, "step_time": 6.832757337251678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.256685646250844, "epoch": 1.9563636363636365, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1801806818181818e-05, "loss": 0.0, "num_tokens": 53419944.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4304, "step_time": 6.768037219590042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2525876704603434, "epoch": 1.9636363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1793261363636365e-05, "loss": 0.0, "num_tokens": 53618080.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4320, "step_time": 6.794253888714593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.8666307339444757, "epoch": 1.970909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.178471590909091e-05, "loss": 0.0, "num_tokens": 53815992.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4336, "step_time": 6.776390675047878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1485872622579336, "epoch": 1.978181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1776170454545456e-05, "loss": 0.0, "num_tokens": 54016672.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4352, "step_time": 6.770807302149478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.231991925276816, "epoch": 1.9854545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1767625e-05, "loss": 0.0, "num_tokens": 54212968.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4368, "step_time": 6.753521733451635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.170779351145029, "epoch": 1.9927272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1759079545454545e-05, "loss": 0.0, "num_tokens": 54413608.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4384, "step_time": 6.785529180720914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.5853626411408186, "epoch": 2.0, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1750534090909092e-05, "loss": 0.0, "num_tokens": 54610264.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4400, "step_time": 6.7369108707644045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2464861115440726, "epoch": 2.0072727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1741988636363637e-05, "loss": 0.0, "num_tokens": 54810368.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4416, "step_time": 6.768912441621069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.171581517904997, "epoch": 2.0145454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1733443181818183e-05, "loss": 0.0, "num_tokens": 55007896.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4432, "step_time": 6.792140641016886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.452936114743352, "epoch": 2.021818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1724897727272728e-05, "loss": 0.0, "num_tokens": 55210808.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4448, "step_time": 6.832421531260479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1356757525354624, "epoch": 2.0290909090909093, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1716352272727272e-05, "loss": 0.0, "num_tokens": 55414744.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4464, "step_time": 6.85547347436659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.0414814334362745, "epoch": 2.036363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1707806818181819e-05, "loss": 0.0, "num_tokens": 55612824.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4480, "step_time": 6.871003287553322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2338881632313132, "epoch": 2.0436363636363635, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1699261363636364e-05, "loss": 0.0, "num_tokens": 55810056.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4496, "step_time": 6.782766689662822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.887524533085525, "epoch": 2.050909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1690715909090908e-05, "loss": 0.0, "num_tokens": 56008880.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4512, "step_time": 6.833852186566219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1544918725267053, "epoch": 2.058181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1682170454545455e-05, "loss": 0.0, "num_tokens": 56207120.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4528, "step_time": 6.802051118807867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3047911264002323, "epoch": 2.0654545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1673625e-05, "loss": 0.0, "num_tokens": 56402352.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4544, "step_time": 6.732677310064901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.9341879040002823, "epoch": 2.0727272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1665079545454546e-05, "loss": 0.0, "num_tokens": 56600552.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4560, "step_time": 6.7354966415441595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.8921627597883344, "epoch": 2.08, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.165653409090909e-05, "loss": 0.0, "num_tokens": 56803840.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4576, "step_time": 6.7446831813431345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2780918246135116, "epoch": 2.0872727272727274, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1647988636363635e-05, "loss": 0.0, "num_tokens": 57001096.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4592, "step_time": 6.755358822236303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.271533099003136, "epoch": 2.0945454545454547, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1639443181818182e-05, "loss": 0.0, "num_tokens": 57199664.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4608, "step_time": 6.7298191034351476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.135909851640463, "epoch": 2.101818181818182, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1630897727272727e-05, "loss": 0.0, "num_tokens": 57399104.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4624, "step_time": 6.7364318473264575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.366418309509754, "epoch": 2.109090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1622352272727275e-05, "loss": 0.0, "num_tokens": 57598808.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4640, "step_time": 6.73096263356274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2284381790086627, "epoch": 2.1163636363636362, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.161380681818182e-05, "loss": 0.0, "num_tokens": 57797832.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4656, "step_time": 6.690315750252921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4452151097357273, "epoch": 2.1236363636363635, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1605261363636364e-05, "loss": 0.0, "num_tokens": 57995488.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4672, "step_time": 6.778060043696314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 2.8929754393175244, "epoch": 2.130909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.159671590909091e-05, "loss": 0.0, "num_tokens": 58194192.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4688, "step_time": 6.732746751047671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2306772526353598, "epoch": 2.138181818181818, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1588170454545455e-05, "loss": 0.0, "num_tokens": 58393848.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4704, "step_time": 6.7044033099664375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2497889045625925, "epoch": 2.1454545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1579625e-05, "loss": 0.0, "num_tokens": 58593528.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4720, "step_time": 6.717062452749815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.186699027195573, "epoch": 2.152727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1571079545454547e-05, "loss": 0.0, "num_tokens": 58797144.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4736, "step_time": 6.734358058718499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1356083126738667, "epoch": 2.16, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1562534090909091e-05, "loss": 0.0, "num_tokens": 58998840.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4752, "step_time": 6.74215031391941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3374369610100985, "epoch": 2.1672727272727275, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1553988636363638e-05, "loss": 0.0, "num_tokens": 59197904.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4768, "step_time": 6.771698047872633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.377966145053506, "epoch": 2.174545454545455, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1545443181818182e-05, "loss": 0.0, "num_tokens": 59397792.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4784, "step_time": 6.911487909092102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.2283763298764825, "epoch": 2.1818181818181817, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1536897727272727e-05, "loss": 0.0, "num_tokens": 59594240.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4800, "step_time": 6.762488114240114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.3086146786808968, "epoch": 2.189090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1528352272727274e-05, "loss": 0.0, "num_tokens": 59792360.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4816, "step_time": 6.73554704192793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1821892354637384, "epoch": 2.1963636363636363, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1519806818181818e-05, "loss": 0.0, "num_tokens": 59992696.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4832, "step_time": 6.773994111048523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.238061768002808, "epoch": 2.2036363636363636, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1511261363636365e-05, "loss": 0.0, "num_tokens": 60189472.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4848, "step_time": 6.82461291278014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4730343082919717, "epoch": 2.210909090909091, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.150271590909091e-05, "loss": 0.0, "num_tokens": 60389672.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4864, "step_time": 6.831689663988072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.190039082430303, "epoch": 2.2181818181818183, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1494170454545454e-05, "loss": 0.0, "num_tokens": 60591880.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4880, "step_time": 6.769283943169285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.265036822296679, "epoch": 2.2254545454545456, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1485625e-05, "loss": 0.0, "num_tokens": 60790112.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4896, "step_time": 6.8057959082652815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.648914782330394, "epoch": 2.232727272727273, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1477079545454545e-05, "loss": 0.0, "num_tokens": 60986040.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4912, "step_time": 6.796487211890053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.1904073525220156, "epoch": 2.24, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1468534090909092e-05, "loss": 0.0, "num_tokens": 61185592.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4928, "step_time": 6.8102810694254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.176701887510717, "epoch": 2.247272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1459988636363637e-05, "loss": 0.0, "num_tokens": 61382736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4944, "step_time": 6.808710300480016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.182904541492462, "epoch": 2.2545454545454544, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1451443181818181e-05, "loss": 0.0, "num_tokens": 61580104.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4960, "step_time": 6.837853111966979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.30023224465549, "epoch": 2.2618181818181817, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1442897727272728e-05, "loss": 0.0, "num_tokens": 61776736.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4976, "step_time": 6.791178819956258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 1.0, "completions/max_length": 256.0, "completions/max_terminated_length": 0.0, "completions/mean_length": 256.0, "completions/mean_terminated_length": 0.0, "completions/min_length": 256.0, "completions/min_terminated_length": 0.0, "entropy": 3.4328582361340523, "epoch": 2.269090909090909, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.1434352272727272e-05, "loss": 0.0, "num_tokens": 61973360.0, "reward": 0.0, "reward_std": 0.0, "rewards/phased_json_reward/mean": 0.0, "rewards/phased_json_reward/std": 0.0, "step": 4992, "step_time": 6.845349179930054 } ], "logging_steps": 16, "max_steps": 26400, "num_input_tokens_seen": 62074560, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }