{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.2727272727272725,
  "eval_steps": 500,
  "global_step": 5000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 0.42830961220897734,
      "epoch": 0.007272727272727273,
      "frac_reward_zero_std": 0.46875,
      "grad_norm": 0.646968424320221,
      "learning_rate": 1.4091988636363636e-05,
      "loss": -0.0,
      "num_tokens": 197304.0,
      "reward": 0.63671875,
      "reward_std": 0.23533933330327272,
      "rewards/phased_json_reward/mean": 0.63671875,
      "rewards/phased_json_reward/std": 0.3294768799096346,
      "step": 16,
      "step_time": 6.092447507195175
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 0.5094600756419823,
      "epoch": 0.014545454545454545,
      "frac_reward_zero_std": 0.890625,
      "grad_norm": 1.981223702430725,
      "learning_rate": 1.4083443181818183e-05,
      "loss": 0.0,
      "num_tokens": 394552.0,
      "reward": 0.931640625,
      "reward_std": 0.04123233538120985,
      "rewards/phased_json_reward/mean": 0.931640625,
      "rewards/phased_json_reward/std": 0.07807412650436163,
      "step": 32,
      "step_time": 6.241545998607762
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 4.857936299405992,
      "epoch": 0.02181818181818182,
      "frac_reward_zero_std": 0.296875,
      "grad_norm": 4.860863208770752,
      "learning_rate": 1.4074897727272727e-05,
      "loss": -0.0,
      "num_tokens": 593872.0,
      "reward": 0.330078125,
      "reward_std": 0.3210840136744082,
      "rewards/phased_json_reward/mean": 0.330078125,
      "rewards/phased_json_reward/std": 0.3847697665914893,
      "step": 48,
      "step_time": 6.55877190240426
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.998046875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 9.875,
      "completions/mean_length": 255.80859375,
      "completions/mean_terminated_length": 9.875,
      "completions/min_length": 249.875,
      "completions/min_terminated_length": 9.875,
      "entropy": 7.90592198446393,
      "epoch": 0.02909090909090909,
      "frac_reward_zero_std": 0.953125,
      "grad_norm": 0.0,
      "learning_rate": 1.4066352272727272e-05,
      "loss": -0.0,
      "num_tokens": 791974.0,
      "reward": 0.0078125,
      "reward_std": 0.01828151335939765,
      "rewards/phased_json_reward/mean": 0.0078125,
      "rewards/phased_json_reward/std": 0.029557579196989536,
      "step": 64,
      "step_time": 6.82581608381588
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.98046875,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 47.75,
      "completions/mean_length": 253.859375,
      "completions/mean_terminated_length": 39.197916984558105,
      "completions/min_length": 220.125,
      "completions/min_terminated_length": 28.125,
      "entropy": 8.064402721822262,
      "epoch": 0.03636363636363636,
      "frac_reward_zero_std": 0.953125,
      "grad_norm": 0.0,
      "learning_rate": 1.4057806818181818e-05,
      "loss": -0.0,
      "num_tokens": 989270.0,
      "reward": 0.005859375,
      "reward_std": 0.016572814900428057,
      "rewards/phased_json_reward/mean": 0.005859375,
      "rewards/phased_json_reward/std": 0.026419460773468018,
      "step": 80,
      "step_time": 6.864816441084258
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.943359375,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 181.0625,
      "completions/mean_length": 251.91015625,
      "completions/mean_terminated_length": 154.4166669845581,
      "completions/min_length": 160.5,
      "completions/min_terminated_length": 128.5,
      "entropy": 11.036133036017418,
      "epoch": 0.04363636363636364,
      "frac_reward_zero_std": 0.796875,
      "grad_norm": 0.0,
      "learning_rate": 1.4049261363636365e-05,
      "loss": 0.0024,
      "num_tokens": 1186272.0,
      "reward": 0.02734375,
      "reward_std": 0.07352422922849655,
      "rewards/phased_json_reward/mean": 0.02734375,
      "rewards/phased_json_reward/std": 0.11195407994091511,
      "step": 96,
      "step_time": 6.840786207292695
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 6.178761022165418,
      "epoch": 0.05090909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.404071590909091e-05,
      "loss": 0.0,
      "num_tokens": 1384584.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 112,
      "step_time": 6.850926381011959
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3679739739745855,
      "epoch": 0.05818181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.4032170454545456e-05,
      "loss": 0.0,
      "num_tokens": 1582080.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 128,
      "step_time": 6.848654073488433
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.056913835927844,
      "epoch": 0.06545454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.4023625e-05,
      "loss": 0.0,
      "num_tokens": 1778904.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 144,
      "step_time": 6.8916237821104005
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.074340298771858,
      "epoch": 0.07272727272727272,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.4015079545454547e-05,
      "loss": 0.0,
      "num_tokens": 1977336.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 160,
      "step_time": 6.8557646567351185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.200064916163683,
      "epoch": 0.08,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.4006534090909092e-05,
      "loss": 0.0,
      "num_tokens": 2176672.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 176,
      "step_time": 6.845825185824651
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3241069186478853,
      "epoch": 0.08727272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3997988636363637e-05,
      "loss": 0.0,
      "num_tokens": 2372864.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 192,
      "step_time": 6.798604555544443
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.972260655835271,
      "epoch": 0.09454545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3989443181818183e-05,
      "loss": 0.0,
      "num_tokens": 2571040.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 208,
      "step_time": 6.8837419137125835
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5393687821924686,
      "epoch": 0.10181818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3980897727272728e-05,
      "loss": 0.0,
      "num_tokens": 2766736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 224,
      "step_time": 6.887793117028195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.8803269220516086,
      "epoch": 0.10909090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3972352272727274e-05,
      "loss": 0.0,
      "num_tokens": 2963744.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 240,
      "step_time": 6.842434576828964
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1804261477664113,
      "epoch": 0.11636363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3963806818181819e-05,
      "loss": 0.0,
      "num_tokens": 3163200.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 256,
      "step_time": 6.885555928805843
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1913011521101,
      "epoch": 0.12363636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3955261363636364e-05,
      "loss": 0.0,
      "num_tokens": 3362632.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 272,
      "step_time": 6.890110642649233
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4572390522807837,
      "epoch": 0.13090909090909092,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.394671590909091e-05,
      "loss": 0.0,
      "num_tokens": 3557872.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 288,
      "step_time": 6.849147691507824
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1283934861421585,
      "epoch": 0.13818181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3938170454545455e-05,
      "loss": 0.0,
      "num_tokens": 3758016.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 304,
      "step_time": 6.893458035716321
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.025675481185317,
      "epoch": 0.14545454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3929625000000001e-05,
      "loss": 0.0,
      "num_tokens": 3957072.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 320,
      "step_time": 6.88795695907902
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.401237423531711,
      "epoch": 0.15272727272727274,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3921079545454546e-05,
      "loss": 0.0,
      "num_tokens": 4157008.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 336,
      "step_time": 6.975196688377764
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2594400718808174,
      "epoch": 0.16,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.391253409090909e-05,
      "loss": 0.0,
      "num_tokens": 4353360.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 352,
      "step_time": 6.774752923694905
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0049337027594447,
      "epoch": 0.16727272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3903988636363637e-05,
      "loss": 0.0,
      "num_tokens": 4550704.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 368,
      "step_time": 6.776181065128185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.057218801230192,
      "epoch": 0.17454545454545456,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3895443181818182e-05,
      "loss": 0.0,
      "num_tokens": 4751440.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 384,
      "step_time": 6.834552641317714
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.084166483953595,
      "epoch": 0.18181818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3886897727272727e-05,
      "loss": 0.0,
      "num_tokens": 4954720.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 400,
      "step_time": 6.851137411780655
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2319228379055858,
      "epoch": 0.1890909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3878352272727273e-05,
      "loss": 0.0,
      "num_tokens": 5152656.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 416,
      "step_time": 6.775262774550356
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.087355736643076,
      "epoch": 0.19636363636363635,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3869806818181818e-05,
      "loss": 0.0,
      "num_tokens": 5352128.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 432,
      "step_time": 6.806187288369983
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1158332098275423,
      "epoch": 0.20363636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3861261363636364e-05,
      "loss": 0.0,
      "num_tokens": 5551104.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 448,
      "step_time": 6.791217401507311
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.370364161208272,
      "epoch": 0.2109090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3852715909090909e-05,
      "loss": 0.0,
      "num_tokens": 5745648.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 464,
      "step_time": 6.827869251719676
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2214455530047417,
      "epoch": 0.21818181818181817,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3844170454545454e-05,
      "loss": 0.0,
      "num_tokens": 5942184.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 480,
      "step_time": 6.780751351150684
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4838474420830607,
      "epoch": 0.22545454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3835625e-05,
      "loss": 0.0,
      "num_tokens": 6139936.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 496,
      "step_time": 6.812705361051485
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2406448647379875,
      "epoch": 0.23272727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3827079545454545e-05,
      "loss": 0.0,
      "num_tokens": 6338696.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 512,
      "step_time": 6.824429966392927
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.337201427668333,
      "epoch": 0.24,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3818534090909091e-05,
      "loss": 0.0,
      "num_tokens": 6538192.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 528,
      "step_time": 6.816003460087813
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2296589333564043,
      "epoch": 0.24727272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3809988636363638e-05,
      "loss": 0.0,
      "num_tokens": 6738168.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 544,
      "step_time": 6.777038000465836
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3775191511958838,
      "epoch": 0.2545454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3801443181818183e-05,
      "loss": 0.0,
      "num_tokens": 6935200.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 560,
      "step_time": 6.80725452414481
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2904440509155393,
      "epoch": 0.26181818181818184,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3792897727272729e-05,
      "loss": 0.0,
      "num_tokens": 7134664.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 576,
      "step_time": 6.837113229208626
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3340134397149086,
      "epoch": 0.2690909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3784352272727274e-05,
      "loss": 0.0,
      "num_tokens": 7332544.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 592,
      "step_time": 6.751015797315631
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.114750794135034,
      "epoch": 0.27636363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3775806818181818e-05,
      "loss": 0.0,
      "num_tokens": 7530032.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 608,
      "step_time": 6.808422329369932
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9973993664607406,
      "epoch": 0.28363636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3767261363636365e-05,
      "loss": 0.0,
      "num_tokens": 7729976.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 624,
      "step_time": 6.8062825029483065
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.138758448883891,
      "epoch": 0.2909090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.375871590909091e-05,
      "loss": 0.0,
      "num_tokens": 7931648.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 640,
      "step_time": 6.747273121669423
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4777450608089566,
      "epoch": 0.29818181818181816,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3750170454545456e-05,
      "loss": 0.0,
      "num_tokens": 8129520.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 656,
      "step_time": 6.801122222619597
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3086934061720967,
      "epoch": 0.3054545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3741625e-05,
      "loss": 0.0,
      "num_tokens": 8325576.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 672,
      "step_time": 6.7866363351931795
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0702755441889167,
      "epoch": 0.31272727272727274,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3733079545454545e-05,
      "loss": 0.0,
      "num_tokens": 8525608.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 688,
      "step_time": 6.792434763687197
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1070512793958187,
      "epoch": 0.32,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3724534090909092e-05,
      "loss": 0.0,
      "num_tokens": 8727528.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 704,
      "step_time": 6.834328093973454
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5615911297500134,
      "epoch": 0.32727272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3715988636363637e-05,
      "loss": 0.0,
      "num_tokens": 8925104.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 720,
      "step_time": 6.751501765102148
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1564355148002505,
      "epoch": 0.33454545454545453,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3707443181818183e-05,
      "loss": 0.0,
      "num_tokens": 9123336.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 736,
      "step_time": 6.769851226534229
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4383005276322365,
      "epoch": 0.3418181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3698897727272728e-05,
      "loss": 0.0,
      "num_tokens": 9318184.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 752,
      "step_time": 6.735914218821563
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3242766577750444,
      "epoch": 0.3490909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3690352272727273e-05,
      "loss": 0.0,
      "num_tokens": 9516768.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 768,
      "step_time": 6.764547313621733
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.198952840641141,
      "epoch": 0.3563636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3681806818181819e-05,
      "loss": 0.0,
      "num_tokens": 9716792.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 784,
      "step_time": 6.795565195614472
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.269397678785026,
      "epoch": 0.36363636363636365,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3673261363636364e-05,
      "loss": 0.0,
      "num_tokens": 9910896.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 800,
      "step_time": 6.826461906952318
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.652379145845771,
      "epoch": 0.3709090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.366471590909091e-05,
      "loss": 0.0,
      "num_tokens": 10104888.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 816,
      "step_time": 6.726004549302161
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1831045616418123,
      "epoch": 0.3781818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3656170454545455e-05,
      "loss": 0.0,
      "num_tokens": 10306424.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 832,
      "step_time": 6.786695882154163
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.074679055251181,
      "epoch": 0.38545454545454544,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3647625e-05,
      "loss": 0.0,
      "num_tokens": 10505688.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 848,
      "step_time": 6.809287193464115
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1155389808118343,
      "epoch": 0.3927272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3639079545454546e-05,
      "loss": 0.0,
      "num_tokens": 10705760.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 864,
      "step_time": 6.785546476719901
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.333749057725072,
      "epoch": 0.4,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.363053409090909e-05,
      "loss": 0.0,
      "num_tokens": 10904320.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 880,
      "step_time": 6.80823243613122
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4373437957838178,
      "epoch": 0.4072727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3621988636363637e-05,
      "loss": 0.0,
      "num_tokens": 11102216.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 896,
      "step_time": 6.764123514120001
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1984064765274525,
      "epoch": 0.41454545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3613443181818182e-05,
      "loss": 0.0,
      "num_tokens": 11301192.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 912,
      "step_time": 6.791246214008424
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2342392737045884,
      "epoch": 0.4218181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3604897727272727e-05,
      "loss": 0.0,
      "num_tokens": 11499160.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 928,
      "step_time": 6.8632384678930975
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3517434494569898,
      "epoch": 0.4290909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3596352272727273e-05,
      "loss": 0.0,
      "num_tokens": 11699704.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 944,
      "step_time": 6.8105885534896515
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.096210742369294,
      "epoch": 0.43636363636363634,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3587806818181818e-05,
      "loss": 0.0,
      "num_tokens": 11903248.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 960,
      "step_time": 6.8464043863350525
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2437717225402594,
      "epoch": 0.44363636363636366,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3579261363636363e-05,
      "loss": 0.0,
      "num_tokens": 12101112.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 976,
      "step_time": 6.825504575506784
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0635962896049023,
      "epoch": 0.4509090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3570715909090909e-05,
      "loss": 0.0,
      "num_tokens": 12299056.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 992,
      "step_time": 6.814487671887036
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3715159576386213,
      "epoch": 0.4581818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3562170454545455e-05,
      "loss": 0.0,
      "num_tokens": 12498240.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1008,
      "step_time": 6.855824781639967
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.249473374336958,
      "epoch": 0.46545454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3553625000000002e-05,
      "loss": 0.0,
      "num_tokens": 12698888.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1024,
      "step_time": 6.82246611622395
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9984740586951375,
      "epoch": 0.4727272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3545079545454547e-05,
      "loss": 0.0,
      "num_tokens": 12898736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1040,
      "step_time": 6.777579111163504
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0234864549711347,
      "epoch": 0.48,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3536534090909091e-05,
      "loss": 0.0,
      "num_tokens": 13099704.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1056,
      "step_time": 6.884962650714442
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.022543553262949,
      "epoch": 0.48727272727272725,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3527988636363638e-05,
      "loss": 0.0,
      "num_tokens": 13298200.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1072,
      "step_time": 6.744426833873149
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.102074005641043,
      "epoch": 0.49454545454545457,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3519443181818183e-05,
      "loss": 0.0,
      "num_tokens": 13497656.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1088,
      "step_time": 6.766534966300242
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.393769825808704,
      "epoch": 0.5018181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3510897727272729e-05,
      "loss": 0.0,
      "num_tokens": 13695544.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1104,
      "step_time": 6.776737172156572
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.045584147796035,
      "epoch": 0.509090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3502352272727274e-05,
      "loss": 0.0,
      "num_tokens": 13893656.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1120,
      "step_time": 6.779956117738038
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.084079368971288,
      "epoch": 0.5163636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3493806818181818e-05,
      "loss": 0.0,
      "num_tokens": 14095056.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1136,
      "step_time": 6.782520697277505
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1256261626258492,
      "epoch": 0.5236363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3485261363636365e-05,
      "loss": 0.0,
      "num_tokens": 14296296.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1152,
      "step_time": 6.842673333128914
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3446692936122417,
      "epoch": 0.5309090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.347671590909091e-05,
      "loss": 0.0,
      "num_tokens": 14495584.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1168,
      "step_time": 6.777760679484345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.10722962487489,
      "epoch": 0.5381818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3468170454545456e-05,
      "loss": 0.0,
      "num_tokens": 14694192.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1184,
      "step_time": 6.828546286211349
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4106021150946617,
      "epoch": 0.5454545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3459625e-05,
      "loss": 0.0,
      "num_tokens": 14889632.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1200,
      "step_time": 6.803303236432839
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4056353475898504,
      "epoch": 0.5527272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3451079545454545e-05,
      "loss": 0.0,
      "num_tokens": 15086216.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1216,
      "step_time": 6.809469573141541
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2028971537947655,
      "epoch": 0.56,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3442534090909092e-05,
      "loss": 0.0,
      "num_tokens": 15282776.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1232,
      "step_time": 6.815300940710586
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2706773821264505,
      "epoch": 0.5672727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3433988636363637e-05,
      "loss": 0.0,
      "num_tokens": 15481280.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1248,
      "step_time": 6.789468246744946
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.417286262847483,
      "epoch": 0.5745454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3425443181818181e-05,
      "loss": 0.0,
      "num_tokens": 15678256.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1264,
      "step_time": 6.803466434008442
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9892088724300265,
      "epoch": 0.5818181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3416897727272728e-05,
      "loss": 0.0,
      "num_tokens": 15878344.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1280,
      "step_time": 6.839117998140864
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.317000398412347,
      "epoch": 0.5890909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3408352272727273e-05,
      "loss": 0.0,
      "num_tokens": 16075536.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1296,
      "step_time": 6.830957441532519
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3442000709474087,
      "epoch": 0.5963636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3399806818181819e-05,
      "loss": 0.0,
      "num_tokens": 16277384.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1312,
      "step_time": 6.842805185704492
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.494213981553912,
      "epoch": 0.6036363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3391261363636364e-05,
      "loss": 0.0,
      "num_tokens": 16477464.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1328,
      "step_time": 6.88639573531691
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.187366573140025,
      "epoch": 0.610909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3382715909090908e-05,
      "loss": 0.0,
      "num_tokens": 16678504.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1344,
      "step_time": 6.771680047851987
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2736073713749647,
      "epoch": 0.6181818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3374170454545455e-05,
      "loss": 0.0,
      "num_tokens": 16876456.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1360,
      "step_time": 6.741794998641126
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5669486774131656,
      "epoch": 0.6254545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3365625e-05,
      "loss": 0.0,
      "num_tokens": 17074288.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1376,
      "step_time": 6.771289608557709
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.129427008330822,
      "epoch": 0.6327272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3357079545454546e-05,
      "loss": 0.0,
      "num_tokens": 17270736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1392,
      "step_time": 6.766435696568806
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.147328623570502,
      "epoch": 0.64,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.334853409090909e-05,
      "loss": 0.0,
      "num_tokens": 17470040.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1408,
      "step_time": 6.835226783179678
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0660659885033965,
      "epoch": 0.6472727272727272,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3339988636363636e-05,
      "loss": 0.0,
      "num_tokens": 17670056.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1424,
      "step_time": 6.817595137399621
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3427893919870257,
      "epoch": 0.6545454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3331443181818182e-05,
      "loss": 0.0,
      "num_tokens": 17864512.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1440,
      "step_time": 6.7745873693493195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.988153620623052,
      "epoch": 0.6618181818181819,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3322897727272728e-05,
      "loss": 0.0,
      "num_tokens": 18064008.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1456,
      "step_time": 6.810007059946656
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5129095343872905,
      "epoch": 0.6690909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3314352272727273e-05,
      "loss": 0.0,
      "num_tokens": 18265008.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1472,
      "step_time": 6.775363146036398
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.341884721070528,
      "epoch": 0.6763636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.330580681818182e-05,
      "loss": 0.0,
      "num_tokens": 18463848.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1488,
      "step_time": 6.8186130007961765
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.579329112544656,
      "epoch": 0.6836363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3297261363636364e-05,
      "loss": 0.0,
      "num_tokens": 18662248.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1504,
      "step_time": 6.8083467375836335
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1765119256451726,
      "epoch": 0.6909090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.328871590909091e-05,
      "loss": 0.0,
      "num_tokens": 18861328.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1520,
      "step_time": 6.778358690673485
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1588730523362756,
      "epoch": 0.6981818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3280170454545455e-05,
      "loss": 0.0,
      "num_tokens": 19062008.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1536,
      "step_time": 6.802705559995957
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.057212447747588,
      "epoch": 0.7054545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3271625e-05,
      "loss": 0.0,
      "num_tokens": 19259584.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1552,
      "step_time": 6.820947894244455
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3579221479594707,
      "epoch": 0.7127272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3263079545454547e-05,
      "loss": 0.0,
      "num_tokens": 19456288.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1568,
      "step_time": 6.783202945603989
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1419159155339003,
      "epoch": 0.72,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3254534090909091e-05,
      "loss": 0.0,
      "num_tokens": 19654392.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1584,
      "step_time": 6.792444017424714
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0922014825046062,
      "epoch": 0.7272727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3245988636363638e-05,
      "loss": 0.0,
      "num_tokens": 19854784.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1600,
      "step_time": 6.790935325610917
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.302881100215018,
      "epoch": 0.7345454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3237443181818183e-05,
      "loss": 0.0,
      "num_tokens": 20054800.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1616,
      "step_time": 6.799347155960277
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2373234955593944,
      "epoch": 0.7418181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3228897727272727e-05,
      "loss": 0.0,
      "num_tokens": 20252920.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1632,
      "step_time": 6.742899239063263
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.122205875813961,
      "epoch": 0.7490909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3220352272727274e-05,
      "loss": 0.0,
      "num_tokens": 20452480.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1648,
      "step_time": 6.765379874326754
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.217903313226998,
      "epoch": 0.7563636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3211806818181818e-05,
      "loss": 0.0,
      "num_tokens": 20654800.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1664,
      "step_time": 6.768923559284303
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3427827628329396,
      "epoch": 0.7636363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3203261363636365e-05,
      "loss": 0.0,
      "num_tokens": 20853216.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1680,
      "step_time": 6.749019315990154
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.964004747569561,
      "epoch": 0.7709090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.319471590909091e-05,
      "loss": 0.0,
      "num_tokens": 21051368.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1696,
      "step_time": 6.794358595914673
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1231435453519225,
      "epoch": 0.7781818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3186170454545454e-05,
      "loss": 0.0,
      "num_tokens": 21247120.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1712,
      "step_time": 6.795791415905114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5061326697468758,
      "epoch": 0.7854545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3177625e-05,
      "loss": 0.0,
      "num_tokens": 21443488.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1728,
      "step_time": 6.810171545366757
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3949177227914333,
      "epoch": 0.7927272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3169079545454545e-05,
      "loss": 0.0,
      "num_tokens": 21638376.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1744,
      "step_time": 6.751137403829489
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.196354507468641,
      "epoch": 0.8,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3160534090909092e-05,
      "loss": 0.0,
      "num_tokens": 21838752.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1760,
      "step_time": 6.813927979324944
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.8691934188827872,
      "epoch": 0.8072727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3151988636363637e-05,
      "loss": 0.0,
      "num_tokens": 22038504.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1776,
      "step_time": 6.784064815903548
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0765619752928615,
      "epoch": 0.8145454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3143443181818181e-05,
      "loss": 0.0,
      "num_tokens": 22239024.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1792,
      "step_time": 6.878322484553792
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.309577951207757,
      "epoch": 0.8218181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3134897727272728e-05,
      "loss": 0.0,
      "num_tokens": 22438728.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1808,
      "step_time": 6.863884965714533
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.058369752019644,
      "epoch": 0.8290909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3126352272727273e-05,
      "loss": 0.0,
      "num_tokens": 22638544.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1824,
      "step_time": 6.771415026450995
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1903171529993415,
      "epoch": 0.8363636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3117806818181817e-05,
      "loss": 0.0,
      "num_tokens": 22837168.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1840,
      "step_time": 6.7205873560742475
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2971535082906485,
      "epoch": 0.8436363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3109261363636364e-05,
      "loss": 0.0,
      "num_tokens": 23035856.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1856,
      "step_time": 6.724811541091185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1190958246588707,
      "epoch": 0.850909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3100715909090908e-05,
      "loss": 0.0,
      "num_tokens": 23232776.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1872,
      "step_time": 6.714156559668481
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.027809141203761,
      "epoch": 0.8581818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3092170454545455e-05,
      "loss": 0.0,
      "num_tokens": 23432512.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1888,
      "step_time": 6.788581044296734
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.099637934938073,
      "epoch": 0.8654545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3083625e-05,
      "loss": 0.0,
      "num_tokens": 23630912.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1904,
      "step_time": 6.804333564650733
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0758075192570686,
      "epoch": 0.8727272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3075079545454546e-05,
      "loss": 0.0,
      "num_tokens": 23830264.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1920,
      "step_time": 6.776671283470932
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.175785332918167,
      "epoch": 0.88,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3066534090909092e-05,
      "loss": 0.0,
      "num_tokens": 24029912.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1936,
      "step_time": 6.677869445469696
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.12380725517869,
      "epoch": 0.8872727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3057988636363637e-05,
      "loss": 0.0,
      "num_tokens": 24226168.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1952,
      "step_time": 6.8098004084895365
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2622394105419517,
      "epoch": 0.8945454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3049443181818184e-05,
      "loss": 0.0,
      "num_tokens": 24426208.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1968,
      "step_time": 6.808981532289181
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3609932847321033,
      "epoch": 0.9018181818181819,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3040897727272728e-05,
      "loss": 0.0,
      "num_tokens": 24623336.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 1984,
      "step_time": 6.7059100805199705
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2017374001443386,
      "epoch": 0.9090909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3032352272727273e-05,
      "loss": 0.0,
      "num_tokens": 24820400.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2000,
      "step_time": 6.68805788771715
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.292481265962124,
      "epoch": 0.9163636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.302380681818182e-05,
      "loss": 0.0,
      "num_tokens": 25022056.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2016,
      "step_time": 6.821227380132768
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.187743639573455,
      "epoch": 0.9236363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3015261363636364e-05,
      "loss": 0.0,
      "num_tokens": 25221240.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2032,
      "step_time": 6.751498891739175
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2612225087359548,
      "epoch": 0.9309090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.300671590909091e-05,
      "loss": 0.0,
      "num_tokens": 25420000.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2048,
      "step_time": 6.7375550081487745
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0908776372671127,
      "epoch": 0.9381818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2998170454545455e-05,
      "loss": 0.0,
      "num_tokens": 25617824.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2064,
      "step_time": 6.745009416132234
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.436667935922742,
      "epoch": 0.9454545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2989625e-05,
      "loss": 0.0,
      "num_tokens": 25816440.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2080,
      "step_time": 6.726034635154065
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0809996156021953,
      "epoch": 0.9527272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2981079545454547e-05,
      "loss": 0.0,
      "num_tokens": 26015512.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2096,
      "step_time": 6.756280101719312
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.113866430707276,
      "epoch": 0.96,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2972534090909091e-05,
      "loss": 0.0,
      "num_tokens": 26216440.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2112,
      "step_time": 6.731012829870451
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3540880754590034,
      "epoch": 0.9672727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2963988636363636e-05,
      "loss": 0.0,
      "num_tokens": 26417064.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2128,
      "step_time": 6.778560371370986
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4368344880640507,
      "epoch": 0.9745454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2955443181818182e-05,
      "loss": 0.0,
      "num_tokens": 26612592.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2144,
      "step_time": 6.781460403406527
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.924749942496419,
      "epoch": 0.9818181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2946897727272727e-05,
      "loss": 0.0,
      "num_tokens": 26811568.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2160,
      "step_time": 6.786069019581191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.308485238812864,
      "epoch": 0.9890909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2938352272727274e-05,
      "loss": 0.0,
      "num_tokens": 27007160.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2176,
      "step_time": 6.75381765246857
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1435415241867304,
      "epoch": 0.9963636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2929806818181818e-05,
      "loss": 0.0,
      "num_tokens": 27206072.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2192,
      "step_time": 6.723647503124084
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1017238460481167,
      "epoch": 1.0036363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2921261363636363e-05,
      "loss": 0.0,
      "num_tokens": 27400520.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2208,
      "step_time": 6.695596158213448
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1460805255919695,
      "epoch": 1.010909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.291271590909091e-05,
      "loss": 0.0,
      "num_tokens": 27601832.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2224,
      "step_time": 6.741232635220513
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.198020614683628,
      "epoch": 1.018181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2904170454545454e-05,
      "loss": 0.0,
      "num_tokens": 27800600.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2240,
      "step_time": 6.77682097296929
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2291477154940367,
      "epoch": 1.0254545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2895625e-05,
      "loss": 0.0,
      "num_tokens": 28000384.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2256,
      "step_time": 6.74526576831704
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.355206954292953,
      "epoch": 1.0327272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2887079545454545e-05,
      "loss": 0.0,
      "num_tokens": 28196720.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2272,
      "step_time": 6.7606360777281225
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3051024340093136,
      "epoch": 1.04,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.287853409090909e-05,
      "loss": 0.0,
      "num_tokens": 28394992.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2288,
      "step_time": 6.772849566536024
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2615869557484984,
      "epoch": 1.0472727272727274,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2869988636363637e-05,
      "loss": 0.0,
      "num_tokens": 28592184.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2304,
      "step_time": 6.763053335249424
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.264405391179025,
      "epoch": 1.0545454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2861443181818181e-05,
      "loss": 0.0,
      "num_tokens": 28791208.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2320,
      "step_time": 6.750271147699095
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.35021267272532,
      "epoch": 1.0618181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2852897727272728e-05,
      "loss": 0.0,
      "num_tokens": 28992008.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2336,
      "step_time": 6.7981160210911185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1354622216895223,
      "epoch": 1.069090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2844352272727273e-05,
      "loss": 0.0,
      "num_tokens": 29188224.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2352,
      "step_time": 6.698252131580375
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0579872159287333,
      "epoch": 1.0763636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2835806818181819e-05,
      "loss": 0.0,
      "num_tokens": 29388544.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2368,
      "step_time": 6.779826779093128
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.028238764964044,
      "epoch": 1.0836363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2827261363636365e-05,
      "loss": 0.0,
      "num_tokens": 29589800.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2384,
      "step_time": 6.813384254521225
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.218187157995999,
      "epoch": 1.0909090909090908,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.281871590909091e-05,
      "loss": 0.0,
      "num_tokens": 29785960.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2400,
      "step_time": 6.7202456255909055
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.303743524476886,
      "epoch": 1.0981818181818181,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2810170454545455e-05,
      "loss": 0.0,
      "num_tokens": 29983272.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2416,
      "step_time": 6.780246646958403
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.396721890196204,
      "epoch": 1.1054545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2801625000000001e-05,
      "loss": 0.0,
      "num_tokens": 30182432.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2432,
      "step_time": 6.793788745824713
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.242832077667117,
      "epoch": 1.1127272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2793079545454546e-05,
      "loss": 0.0,
      "num_tokens": 30382672.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2448,
      "step_time": 6.747386139992159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.286748672835529,
      "epoch": 1.12,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2784534090909092e-05,
      "loss": 0.0,
      "num_tokens": 30582552.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2464,
      "step_time": 6.742027651518583
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.192711003124714,
      "epoch": 1.1272727272727272,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2775988636363637e-05,
      "loss": 0.0,
      "num_tokens": 30781016.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2480,
      "step_time": 6.797363699704874
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.312186806462705,
      "epoch": 1.1345454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2767443181818182e-05,
      "loss": 0.0,
      "num_tokens": 30981640.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2496,
      "step_time": 6.711918727087323
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.18869723752141,
      "epoch": 1.1418181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2758897727272728e-05,
      "loss": 0.0,
      "num_tokens": 31178992.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2512,
      "step_time": 6.795956993591972
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2482578828930855,
      "epoch": 1.1490909090909092,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2750352272727273e-05,
      "loss": 0.0,
      "num_tokens": 31377960.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2528,
      "step_time": 6.804913574247621
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.359376387670636,
      "epoch": 1.1563636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.274180681818182e-05,
      "loss": 0.0,
      "num_tokens": 31579352.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2544,
      "step_time": 6.872893456253223
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.277367865666747,
      "epoch": 1.1636363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2733261363636364e-05,
      "loss": 0.0,
      "num_tokens": 31776984.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2560,
      "step_time": 6.8818644337006845
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.388336017727852,
      "epoch": 1.170909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2724715909090909e-05,
      "loss": 0.0,
      "num_tokens": 31978032.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2576,
      "step_time": 6.782117213879246
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0712667126208544,
      "epoch": 1.1781818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2716170454545455e-05,
      "loss": 0.0,
      "num_tokens": 32178904.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2592,
      "step_time": 6.735153479909059
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3562627052888274,
      "epoch": 1.1854545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2707625e-05,
      "loss": 0.0,
      "num_tokens": 32377648.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2608,
      "step_time": 6.842811773065478
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0933955032378435,
      "epoch": 1.1927272727272726,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2699079545454547e-05,
      "loss": 0.0,
      "num_tokens": 32577280.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2624,
      "step_time": 6.833708286401816
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2871429016813636,
      "epoch": 1.2,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2690534090909091e-05,
      "loss": 0.0,
      "num_tokens": 32775856.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2640,
      "step_time": 6.858709867228754
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1881882632151246,
      "epoch": 1.2072727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2681988636363636e-05,
      "loss": 0.0,
      "num_tokens": 32972456.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2656,
      "step_time": 6.836625085736159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.047997717745602,
      "epoch": 1.2145454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2673443181818182e-05,
      "loss": 0.0,
      "num_tokens": 33171640.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2672,
      "step_time": 6.823630373168271
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9498577415943146,
      "epoch": 1.221818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2664897727272727e-05,
      "loss": 0.0,
      "num_tokens": 33372552.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2688,
      "step_time": 6.791127723874524
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1252716667950153,
      "epoch": 1.229090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2656352272727274e-05,
      "loss": 0.0,
      "num_tokens": 33570448.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2704,
      "step_time": 6.756760664633475
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2014796687290072,
      "epoch": 1.2363636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2647806818181818e-05,
      "loss": 0.0,
      "num_tokens": 33766520.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2720,
      "step_time": 6.686530307750218
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2421076837927103,
      "epoch": 1.2436363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2639261363636363e-05,
      "loss": 0.0,
      "num_tokens": 33964584.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2736,
      "step_time": 6.7870109137147665
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3316868767142296,
      "epoch": 1.250909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.263071590909091e-05,
      "loss": 0.0,
      "num_tokens": 34163576.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2752,
      "step_time": 6.718043155909982
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.503485517576337,
      "epoch": 1.2581818181818183,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2622170454545454e-05,
      "loss": 0.0,
      "num_tokens": 34360144.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2768,
      "step_time": 6.755784626526292
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.360766554251313,
      "epoch": 1.2654545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2613624999999999e-05,
      "loss": 0.0,
      "num_tokens": 34557016.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2784,
      "step_time": 6.720057619037107
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.143316438421607,
      "epoch": 1.2727272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2605079545454545e-05,
      "loss": 0.0,
      "num_tokens": 34755640.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2800,
      "step_time": 6.744722189905588
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1157698202878237,
      "epoch": 1.28,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.259653409090909e-05,
      "loss": 0.0,
      "num_tokens": 34951632.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2816,
      "step_time": 6.816667142789811
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.456563299521804,
      "epoch": 1.2872727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2587988636363638e-05,
      "loss": 0.0,
      "num_tokens": 35150376.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2832,
      "step_time": 6.802197508630343
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9530596751719713,
      "epoch": 1.2945454545454544,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2579443181818183e-05,
      "loss": 0.0,
      "num_tokens": 35349424.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2848,
      "step_time": 6.7407911563059315
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2607680400833488,
      "epoch": 1.3018181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2570897727272728e-05,
      "loss": 0.0,
      "num_tokens": 35546248.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2864,
      "step_time": 6.751011346175801
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2476487196981907,
      "epoch": 1.309090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2562352272727274e-05,
      "loss": 0.0,
      "num_tokens": 35746672.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2880,
      "step_time": 6.7571043413481675
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.974738628603518,
      "epoch": 1.3163636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2553806818181819e-05,
      "loss": 0.0,
      "num_tokens": 35947936.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2896,
      "step_time": 6.732615724671632
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9833324188366532,
      "epoch": 1.3236363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2545261363636365e-05,
      "loss": 0.0,
      "num_tokens": 36146976.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2912,
      "step_time": 6.747754844080191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1581534668803215,
      "epoch": 1.330909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.253671590909091e-05,
      "loss": 0.0,
      "num_tokens": 36347240.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2928,
      "step_time": 6.806651291146409
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9356587659567595,
      "epoch": 1.3381818181818181,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2528170454545455e-05,
      "loss": 0.0,
      "num_tokens": 36546312.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2944,
      "step_time": 6.798078317428008
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.31802302505821,
      "epoch": 1.3454545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2519625000000001e-05,
      "loss": 0.0,
      "num_tokens": 36745320.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2960,
      "step_time": 6.80775830487255
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.344906162470579,
      "epoch": 1.3527272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2511079545454546e-05,
      "loss": 0.0,
      "num_tokens": 36943456.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2976,
      "step_time": 6.7794261837843806
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.300058946944773,
      "epoch": 1.3599999999999999,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.250253409090909e-05,
      "loss": 0.0,
      "num_tokens": 37140768.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 2992,
      "step_time": 6.751858755480498
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3071441184729338,
      "epoch": 1.3672727272727272,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2493988636363637e-05,
      "loss": 0.0,
      "num_tokens": 37338376.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3008,
      "step_time": 6.764147925772704
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.082784323953092,
      "epoch": 1.3745454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2485443181818182e-05,
      "loss": 0.0,
      "num_tokens": 37536672.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3024,
      "step_time": 6.7155560600804165
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.446023194119334,
      "epoch": 1.3818181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2476897727272728e-05,
      "loss": 0.0,
      "num_tokens": 37735560.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3040,
      "step_time": 6.788893666642252
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.8666899278759956,
      "epoch": 1.3890909090909092,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2468352272727273e-05,
      "loss": 0.0,
      "num_tokens": 37936488.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3056,
      "step_time": 6.777422826969996
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3226305842399597,
      "epoch": 1.3963636363636365,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2459806818181818e-05,
      "loss": 0.0,
      "num_tokens": 38138056.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3072,
      "step_time": 6.787566299317405
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1517041893675923,
      "epoch": 1.4036363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2451261363636364e-05,
      "loss": 0.0,
      "num_tokens": 38337816.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3088,
      "step_time": 6.76234096015105
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.091954587958753,
      "epoch": 1.410909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2442715909090909e-05,
      "loss": 0.0,
      "num_tokens": 38535064.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3104,
      "step_time": 6.831137398316059
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3856596797704697,
      "epoch": 1.4181818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2434170454545455e-05,
      "loss": 0.0,
      "num_tokens": 38729336.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3120,
      "step_time": 6.82296618015971
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2895570769906044,
      "epoch": 1.4254545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2425625e-05,
      "loss": 0.0,
      "num_tokens": 38924968.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3136,
      "step_time": 6.74725763668539
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.043810401111841,
      "epoch": 1.4327272727272726,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2417079545454545e-05,
      "loss": 0.0,
      "num_tokens": 39122568.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3152,
      "step_time": 6.819663318456151
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5287796342745423,
      "epoch": 1.44,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2408534090909091e-05,
      "loss": 0.0,
      "num_tokens": 39323144.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3168,
      "step_time": 6.781180361402221
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0407142341136932,
      "epoch": 1.4472727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2399988636363636e-05,
      "loss": 0.0,
      "num_tokens": 39522680.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3184,
      "step_time": 6.799189413781278
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.955046221613884,
      "epoch": 1.4545454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2391443181818182e-05,
      "loss": 0.0,
      "num_tokens": 39722640.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3200,
      "step_time": 6.7851121361600235
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.305285726673901,
      "epoch": 1.461818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2382897727272727e-05,
      "loss": 0.0,
      "num_tokens": 39920992.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3216,
      "step_time": 6.807475323788822
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0928380796685815,
      "epoch": 1.4690909090909092,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2374352272727272e-05,
      "loss": 0.0,
      "num_tokens": 40120288.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3232,
      "step_time": 6.831833872594871
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.916893396526575,
      "epoch": 1.4763636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2365806818181818e-05,
      "loss": 0.0,
      "num_tokens": 40317616.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3248,
      "step_time": 6.692840414005332
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.162033887580037,
      "epoch": 1.4836363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2357261363636363e-05,
      "loss": 0.0,
      "num_tokens": 40518912.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3264,
      "step_time": 6.813380097912159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.265519233420491,
      "epoch": 1.490909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.234871590909091e-05,
      "loss": 0.0,
      "num_tokens": 40712320.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3280,
      "step_time": 6.8094925917102955
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.264208293519914,
      "epoch": 1.498181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2340170454545456e-05,
      "loss": 0.0,
      "num_tokens": 40910752.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3296,
      "step_time": 6.92184988129884
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0021583158522844,
      "epoch": 1.5054545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2331625e-05,
      "loss": 0.0,
      "num_tokens": 41109232.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3312,
      "step_time": 6.807405228668358
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1294081257656217,
      "epoch": 1.5127272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2323079545454547e-05,
      "loss": 0.0,
      "num_tokens": 41307944.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3328,
      "step_time": 6.7090692204074
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1915039075538516,
      "epoch": 1.52,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2314534090909092e-05,
      "loss": 0.0,
      "num_tokens": 41507408.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3344,
      "step_time": 6.812871761154383
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3671645503491163,
      "epoch": 1.5272727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2305988636363637e-05,
      "loss": 0.0,
      "num_tokens": 41701304.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3360,
      "step_time": 6.819892742554657
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2086366089060903,
      "epoch": 1.5345454545454547,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2297443181818183e-05,
      "loss": 0.0,
      "num_tokens": 41897808.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3376,
      "step_time": 6.792966264649294
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4239872731268406,
      "epoch": 1.541818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2288897727272728e-05,
      "loss": 0.0,
      "num_tokens": 42097744.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3392,
      "step_time": 6.7452264034654945
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4442757526412606,
      "epoch": 1.549090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2280352272727274e-05,
      "loss": 0.0,
      "num_tokens": 42295304.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3408,
      "step_time": 6.8505471590324305
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0827516186982393,
      "epoch": 1.5563636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2271806818181819e-05,
      "loss": 0.0,
      "num_tokens": 42494648.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3424,
      "step_time": 6.797972589090932
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3115072082728148,
      "epoch": 1.5636363636363635,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2263261363636364e-05,
      "loss": 0.0,
      "num_tokens": 42691832.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3440,
      "step_time": 6.812032384448685
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2236884916201234,
      "epoch": 1.5709090909090908,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.225471590909091e-05,
      "loss": 0.0,
      "num_tokens": 42888088.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3456,
      "step_time": 6.784853787452448
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.468571356497705,
      "epoch": 1.5781818181818181,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2246170454545455e-05,
      "loss": 0.0,
      "num_tokens": 43085488.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3472,
      "step_time": 6.80501512484625
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3984502805396914,
      "epoch": 1.5854545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2237625000000001e-05,
      "loss": 0.0,
      "num_tokens": 43284248.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3488,
      "step_time": 6.7867183712660335
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4759268388152122,
      "epoch": 1.5927272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2229079545454546e-05,
      "loss": 0.0,
      "num_tokens": 43484528.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3504,
      "step_time": 6.7809304527472705
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.420325178653002,
      "epoch": 1.6,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.222053409090909e-05,
      "loss": 0.0,
      "num_tokens": 43682552.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3520,
      "step_time": 6.777703637490049
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.481171676889062,
      "epoch": 1.6072727272727274,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2211988636363637e-05,
      "loss": 0.0,
      "num_tokens": 43882104.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3536,
      "step_time": 6.742597420467064
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.358258410356939,
      "epoch": 1.6145454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2203443181818182e-05,
      "loss": 0.0,
      "num_tokens": 44079960.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3552,
      "step_time": 6.825212087715045
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.696707614697516,
      "epoch": 1.6218181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2194897727272728e-05,
      "loss": 0.0,
      "num_tokens": 44276632.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3568,
      "step_time": 6.804728279355913
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2286562798544765,
      "epoch": 1.6290909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2186352272727273e-05,
      "loss": 0.0,
      "num_tokens": 44474816.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3584,
      "step_time": 6.721510254021268
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.202762014232576,
      "epoch": 1.6363636363636362,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2177806818181818e-05,
      "loss": 0.0,
      "num_tokens": 44676336.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3600,
      "step_time": 6.728681799781043
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3508113231509924,
      "epoch": 1.6436363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2169261363636364e-05,
      "loss": 0.0,
      "num_tokens": 44876448.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3616,
      "step_time": 6.817280929361004
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1071648206561804,
      "epoch": 1.6509090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2160715909090909e-05,
      "loss": 0.0,
      "num_tokens": 45075696.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3632,
      "step_time": 6.778283861873206
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1784300385043025,
      "epoch": 1.6581818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2152170454545454e-05,
      "loss": 0.0,
      "num_tokens": 45272976.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3648,
      "step_time": 6.743116174242459
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1938441330567002,
      "epoch": 1.6654545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2143625e-05,
      "loss": 0.0,
      "num_tokens": 45468792.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3664,
      "step_time": 6.801589194859844
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1752672847360373,
      "epoch": 1.6727272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2135079545454545e-05,
      "loss": 0.0,
      "num_tokens": 45665600.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3680,
      "step_time": 6.774731121433433
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.326844639144838,
      "epoch": 1.6800000000000002,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2126534090909091e-05,
      "loss": 0.0,
      "num_tokens": 45861696.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3696,
      "step_time": 6.764392688404769
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9930380610749125,
      "epoch": 1.6872727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2117988636363636e-05,
      "loss": 0.0,
      "num_tokens": 46061648.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3712,
      "step_time": 6.803180871240329
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.130782919935882,
      "epoch": 1.6945454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2109443181818182e-05,
      "loss": 0.0,
      "num_tokens": 46260736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3728,
      "step_time": 6.832387202652171
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3689714800566435,
      "epoch": 1.7018181818181817,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2100897727272729e-05,
      "loss": 0.0,
      "num_tokens": 46457608.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3744,
      "step_time": 6.8068507291027345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1559818061068654,
      "epoch": 1.709090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2092352272727274e-05,
      "loss": 0.0,
      "num_tokens": 46657072.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3760,
      "step_time": 6.819567769533023
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.149191261269152,
      "epoch": 1.7163636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.208380681818182e-05,
      "loss": 0.0,
      "num_tokens": 46857960.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3776,
      "step_time": 6.7862320126150735
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3385393377393484,
      "epoch": 1.7236363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2075261363636365e-05,
      "loss": 0.0,
      "num_tokens": 47056800.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3792,
      "step_time": 6.83025036327308
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3446352081373334,
      "epoch": 1.730909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.206671590909091e-05,
      "loss": 0.0,
      "num_tokens": 47254072.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3808,
      "step_time": 6.7963205120759085
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1582796899601817,
      "epoch": 1.7381818181818183,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2058170454545456e-05,
      "loss": 0.0,
      "num_tokens": 47453424.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3824,
      "step_time": 6.754275181854609
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.350019756704569,
      "epoch": 1.7454545454545456,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2049625e-05,
      "loss": 0.0,
      "num_tokens": 47655640.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3840,
      "step_time": 6.815282926079817
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2120337886735797,
      "epoch": 1.7527272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2041079545454545e-05,
      "loss": 0.0,
      "num_tokens": 47855408.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3856,
      "step_time": 6.773871595331002
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.111018780618906,
      "epoch": 1.76,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2032534090909092e-05,
      "loss": 0.0,
      "num_tokens": 48052248.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3872,
      "step_time": 6.7454344616271555
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.026843812316656,
      "epoch": 1.767272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2023988636363637e-05,
      "loss": 0.0,
      "num_tokens": 48251128.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3888,
      "step_time": 6.81127433484653
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.142205369658768,
      "epoch": 1.7745454545454544,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2015443181818183e-05,
      "loss": 0.0,
      "num_tokens": 48451048.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3904,
      "step_time": 6.768906163924839
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.833907858468592,
      "epoch": 1.7818181818181817,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2006897727272728e-05,
      "loss": 0.0,
      "num_tokens": 48650960.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3920,
      "step_time": 6.76123388716951
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.965332482010126,
      "epoch": 1.789090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1998352272727272e-05,
      "loss": 0.0,
      "num_tokens": 48852520.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3936,
      "step_time": 6.785187493253034
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.069445427507162,
      "epoch": 1.7963636363636364,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1989806818181819e-05,
      "loss": 0.0,
      "num_tokens": 49053104.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3952,
      "step_time": 6.755604680161923
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2699612844735384,
      "epoch": 1.8036363636363637,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1981261363636364e-05,
      "loss": 0.0,
      "num_tokens": 49249768.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3968,
      "step_time": 6.758650440606289
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1967758797109127,
      "epoch": 1.810909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.197271590909091e-05,
      "loss": 0.0,
      "num_tokens": 49453984.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 3984,
      "step_time": 6.8494268947979435
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.364362958818674,
      "epoch": 1.8181818181818183,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1964170454545455e-05,
      "loss": 0.0,
      "num_tokens": 49652000.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4000,
      "step_time": 6.740954163658898
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.118711346760392,
      "epoch": 1.8254545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1955625e-05,
      "loss": 0.0,
      "num_tokens": 49849992.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4016,
      "step_time": 6.755418484797701
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.07691203802824,
      "epoch": 1.8327272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1947079545454546e-05,
      "loss": 0.0,
      "num_tokens": 50049880.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4032,
      "step_time": 6.882297096424736
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9445608789101243,
      "epoch": 1.8399999999999999,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.193853409090909e-05,
      "loss": 0.0,
      "num_tokens": 50247568.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4048,
      "step_time": 6.855586153862532
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3924843007698655,
      "epoch": 1.8472727272727272,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1929988636363637e-05,
      "loss": 0.0,
      "num_tokens": 50444408.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4064,
      "step_time": 6.770890337065794
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0896419156342745,
      "epoch": 1.8545454545454545,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1921443181818182e-05,
      "loss": 0.0,
      "num_tokens": 50645840.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4080,
      "step_time": 6.759655719448347
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.368128092959523,
      "epoch": 1.8618181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1912897727272727e-05,
      "loss": 0.0,
      "num_tokens": 50845024.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4096,
      "step_time": 6.766734938137233
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.189123467542231,
      "epoch": 1.8690909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1904352272727273e-05,
      "loss": 0.0,
      "num_tokens": 51041856.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4112,
      "step_time": 6.764893968065735
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.213881215080619,
      "epoch": 1.8763636363636365,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1895806818181818e-05,
      "loss": 0.0,
      "num_tokens": 51241264.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4128,
      "step_time": 6.777542426658329
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.6151960818096995,
      "epoch": 1.8836363636363638,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1887261363636364e-05,
      "loss": 0.0,
      "num_tokens": 51436520.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4144,
      "step_time": 6.759419926791452
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2313709212467074,
      "epoch": 1.8909090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1878715909090909e-05,
      "loss": 0.0,
      "num_tokens": 51632536.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4160,
      "step_time": 6.775483648234513
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2920576203614473,
      "epoch": 1.8981818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1870170454545454e-05,
      "loss": 0.0,
      "num_tokens": 51829736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4176,
      "step_time": 6.728544996934943
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0724613601341844,
      "epoch": 1.9054545454545453,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1861625000000002e-05,
      "loss": 0.0,
      "num_tokens": 52030088.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4192,
      "step_time": 6.739417067728937
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3921290542930365,
      "epoch": 1.9127272727272726,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1853079545454547e-05,
      "loss": 0.0,
      "num_tokens": 52229240.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4208,
      "step_time": 6.781370086420793
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.101226268336177,
      "epoch": 1.92,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1844534090909091e-05,
      "loss": 0.0,
      "num_tokens": 52427576.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4224,
      "step_time": 6.747128820337821
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2702527521178126,
      "epoch": 1.9272727272727272,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1835988636363638e-05,
      "loss": 0.0,
      "num_tokens": 52623864.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4240,
      "step_time": 6.811194027657621
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1875290479511023,
      "epoch": 1.9345454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1827443181818182e-05,
      "loss": 0.0,
      "num_tokens": 52823336.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4256,
      "step_time": 6.749341471120715
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.232122967019677,
      "epoch": 1.9418181818181819,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1818897727272729e-05,
      "loss": 0.0,
      "num_tokens": 53021440.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4272,
      "step_time": 6.738833487150259
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2272182758897543,
      "epoch": 1.9490909090909092,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1810352272727274e-05,
      "loss": 0.0,
      "num_tokens": 53221544.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4288,
      "step_time": 6.832757337251678
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.256685646250844,
      "epoch": 1.9563636363636365,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1801806818181818e-05,
      "loss": 0.0,
      "num_tokens": 53419944.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4304,
      "step_time": 6.768037219590042
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2525876704603434,
      "epoch": 1.9636363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1793261363636365e-05,
      "loss": 0.0,
      "num_tokens": 53618080.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4320,
      "step_time": 6.794253888714593
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.8666307339444757,
      "epoch": 1.970909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.178471590909091e-05,
      "loss": 0.0,
      "num_tokens": 53815992.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4336,
      "step_time": 6.776390675047878
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1485872622579336,
      "epoch": 1.978181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1776170454545456e-05,
      "loss": 0.0,
      "num_tokens": 54016672.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4352,
      "step_time": 6.770807302149478
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.231991925276816,
      "epoch": 1.9854545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1767625e-05,
      "loss": 0.0,
      "num_tokens": 54212968.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4368,
      "step_time": 6.753521733451635
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.170779351145029,
      "epoch": 1.9927272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1759079545454545e-05,
      "loss": 0.0,
      "num_tokens": 54413608.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4384,
      "step_time": 6.785529180720914
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.5853626411408186,
      "epoch": 2.0,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1750534090909092e-05,
      "loss": 0.0,
      "num_tokens": 54610264.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4400,
      "step_time": 6.7369108707644045
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2464861115440726,
      "epoch": 2.0072727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1741988636363637e-05,
      "loss": 0.0,
      "num_tokens": 54810368.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4416,
      "step_time": 6.768912441621069
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.171581517904997,
      "epoch": 2.0145454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1733443181818183e-05,
      "loss": 0.0,
      "num_tokens": 55007896.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4432,
      "step_time": 6.792140641016886
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.452936114743352,
      "epoch": 2.021818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1724897727272728e-05,
      "loss": 0.0,
      "num_tokens": 55210808.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4448,
      "step_time": 6.832421531260479
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1356757525354624,
      "epoch": 2.0290909090909093,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1716352272727272e-05,
      "loss": 0.0,
      "num_tokens": 55414744.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4464,
      "step_time": 6.85547347436659
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.0414814334362745,
      "epoch": 2.036363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1707806818181819e-05,
      "loss": 0.0,
      "num_tokens": 55612824.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4480,
      "step_time": 6.871003287553322
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2338881632313132,
      "epoch": 2.0436363636363635,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1699261363636364e-05,
      "loss": 0.0,
      "num_tokens": 55810056.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4496,
      "step_time": 6.782766689662822
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.887524533085525,
      "epoch": 2.050909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1690715909090908e-05,
      "loss": 0.0,
      "num_tokens": 56008880.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4512,
      "step_time": 6.833852186566219
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1544918725267053,
      "epoch": 2.058181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1682170454545455e-05,
      "loss": 0.0,
      "num_tokens": 56207120.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4528,
      "step_time": 6.802051118807867
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3047911264002323,
      "epoch": 2.0654545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1673625e-05,
      "loss": 0.0,
      "num_tokens": 56402352.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4544,
      "step_time": 6.732677310064901
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.9341879040002823,
      "epoch": 2.0727272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1665079545454546e-05,
      "loss": 0.0,
      "num_tokens": 56600552.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4560,
      "step_time": 6.7354966415441595
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.8921627597883344,
      "epoch": 2.08,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.165653409090909e-05,
      "loss": 0.0,
      "num_tokens": 56803840.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4576,
      "step_time": 6.7446831813431345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2780918246135116,
      "epoch": 2.0872727272727274,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1647988636363635e-05,
      "loss": 0.0,
      "num_tokens": 57001096.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4592,
      "step_time": 6.755358822236303
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.271533099003136,
      "epoch": 2.0945454545454547,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1639443181818182e-05,
      "loss": 0.0,
      "num_tokens": 57199664.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4608,
      "step_time": 6.7298191034351476
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.135909851640463,
      "epoch": 2.101818181818182,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1630897727272727e-05,
      "loss": 0.0,
      "num_tokens": 57399104.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4624,
      "step_time": 6.7364318473264575
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.366418309509754,
      "epoch": 2.109090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1622352272727275e-05,
      "loss": 0.0,
      "num_tokens": 57598808.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4640,
      "step_time": 6.73096263356274
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2284381790086627,
      "epoch": 2.1163636363636362,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.161380681818182e-05,
      "loss": 0.0,
      "num_tokens": 57797832.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4656,
      "step_time": 6.690315750252921
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4452151097357273,
      "epoch": 2.1236363636363635,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1605261363636364e-05,
      "loss": 0.0,
      "num_tokens": 57995488.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4672,
      "step_time": 6.778060043696314
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 2.8929754393175244,
      "epoch": 2.130909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.159671590909091e-05,
      "loss": 0.0,
      "num_tokens": 58194192.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4688,
      "step_time": 6.732746751047671
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2306772526353598,
      "epoch": 2.138181818181818,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1588170454545455e-05,
      "loss": 0.0,
      "num_tokens": 58393848.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4704,
      "step_time": 6.7044033099664375
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2497889045625925,
      "epoch": 2.1454545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1579625e-05,
      "loss": 0.0,
      "num_tokens": 58593528.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4720,
      "step_time": 6.717062452749815
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.186699027195573,
      "epoch": 2.152727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1571079545454547e-05,
      "loss": 0.0,
      "num_tokens": 58797144.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4736,
      "step_time": 6.734358058718499
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1356083126738667,
      "epoch": 2.16,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1562534090909091e-05,
      "loss": 0.0,
      "num_tokens": 58998840.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4752,
      "step_time": 6.74215031391941
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3374369610100985,
      "epoch": 2.1672727272727275,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1553988636363638e-05,
      "loss": 0.0,
      "num_tokens": 59197904.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4768,
      "step_time": 6.771698047872633
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.377966145053506,
      "epoch": 2.174545454545455,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1545443181818182e-05,
      "loss": 0.0,
      "num_tokens": 59397792.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4784,
      "step_time": 6.911487909092102
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.2283763298764825,
      "epoch": 2.1818181818181817,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1536897727272727e-05,
      "loss": 0.0,
      "num_tokens": 59594240.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4800,
      "step_time": 6.762488114240114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.3086146786808968,
      "epoch": 2.189090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1528352272727274e-05,
      "loss": 0.0,
      "num_tokens": 59792360.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4816,
      "step_time": 6.73554704192793
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1821892354637384,
      "epoch": 2.1963636363636363,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1519806818181818e-05,
      "loss": 0.0,
      "num_tokens": 59992696.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4832,
      "step_time": 6.773994111048523
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.238061768002808,
      "epoch": 2.2036363636363636,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1511261363636365e-05,
      "loss": 0.0,
      "num_tokens": 60189472.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4848,
      "step_time": 6.82461291278014
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4730343082919717,
      "epoch": 2.210909090909091,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.150271590909091e-05,
      "loss": 0.0,
      "num_tokens": 60389672.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4864,
      "step_time": 6.831689663988072
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.190039082430303,
      "epoch": 2.2181818181818183,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1494170454545454e-05,
      "loss": 0.0,
      "num_tokens": 60591880.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4880,
      "step_time": 6.769283943169285
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.265036822296679,
      "epoch": 2.2254545454545456,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1485625e-05,
      "loss": 0.0,
      "num_tokens": 60790112.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4896,
      "step_time": 6.8057959082652815
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.648914782330394,
      "epoch": 2.232727272727273,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1477079545454545e-05,
      "loss": 0.0,
      "num_tokens": 60986040.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4912,
      "step_time": 6.796487211890053
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.1904073525220156,
      "epoch": 2.24,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1468534090909092e-05,
      "loss": 0.0,
      "num_tokens": 61185592.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4928,
      "step_time": 6.8102810694254
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.176701887510717,
      "epoch": 2.247272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1459988636363637e-05,
      "loss": 0.0,
      "num_tokens": 61382736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4944,
      "step_time": 6.808710300480016
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.182904541492462,
      "epoch": 2.2545454545454544,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1451443181818181e-05,
      "loss": 0.0,
      "num_tokens": 61580104.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4960,
      "step_time": 6.837853111966979
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.30023224465549,
      "epoch": 2.2618181818181817,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1442897727272728e-05,
      "loss": 0.0,
      "num_tokens": 61776736.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4976,
      "step_time": 6.791178819956258
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 1.0,
      "completions/max_length": 256.0,
      "completions/max_terminated_length": 0.0,
      "completions/mean_length": 256.0,
      "completions/mean_terminated_length": 0.0,
      "completions/min_length": 256.0,
      "completions/min_terminated_length": 0.0,
      "entropy": 3.4328582361340523,
      "epoch": 2.269090909090909,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.1434352272727272e-05,
      "loss": 0.0,
      "num_tokens": 61973360.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/phased_json_reward/mean": 0.0,
      "rewards/phased_json_reward/std": 0.0,
      "step": 4992,
      "step_time": 6.845349179930054
    }
  ],
  "logging_steps": 16,
  "max_steps": 26400,
  "num_input_tokens_seen": 62074560,
  "num_train_epochs": 12,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}