| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.006, |
| "eval_steps": 500, |
| "global_step": 75, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.84375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 173.0, |
| "completions/mean_length": 230.65625, |
| "completions/mean_terminated_length": 93.80000305175781, |
| "completions/min_length": 32.0, |
| "completions/min_terminated_length": 32.0, |
| "entropy": 2.7413549423217773, |
| "epoch": 8e-05, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "loss": 0.0, |
| "num_tokens": 7605.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 1, |
| "step_time": 20.038708471984137 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 207.0, |
| "completions/mean_length": 220.71875, |
| "completions/mean_terminated_length": 114.875, |
| "completions/min_length": 18.0, |
| "completions/min_terminated_length": 18.0, |
| "entropy": 2.8548775017261505, |
| "epoch": 0.00016, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.2438299999999998e-06, |
| "loss": 0.0, |
| "num_tokens": 14888.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 2, |
| "step_time": 22.31387728100526 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 248.0, |
| "completions/mean_length": 216.3125, |
| "completions/mean_terminated_length": 129.0, |
| "completions/min_length": 26.0, |
| "completions/min_terminated_length": 26.0, |
| "entropy": 2.8777474462985992, |
| "epoch": 0.00024, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.4876599999999997e-06, |
| "loss": 0.0, |
| "num_tokens": 22034.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 3, |
| "step_time": 19.508486614991853 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 203.0, |
| "completions/mean_length": 225.40625, |
| "completions/mean_terminated_length": 92.83333587646484, |
| "completions/min_length": 12.0, |
| "completions/min_terminated_length": 12.0, |
| "entropy": 2.708147943019867, |
| "epoch": 0.00032, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.73149e-06, |
| "loss": 0.0, |
| "num_tokens": 29471.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 4, |
| "step_time": 20.417726718005724 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 246.0, |
| "completions/mean_length": 210.0, |
| "completions/mean_terminated_length": 92.44444274902344, |
| "completions/min_length": 7.0, |
| "completions/min_terminated_length": 7.0, |
| "entropy": 2.7702046930789948, |
| "epoch": 0.0004, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.975319999999999e-06, |
| "loss": 0.0, |
| "num_tokens": 36415.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 5, |
| "step_time": 19.458035143004963 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.59375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 144.0, |
| "completions/mean_length": 180.46875, |
| "completions/mean_terminated_length": 70.0769271850586, |
| "completions/min_length": 14.0, |
| "completions/min_terminated_length": 14.0, |
| "entropy": 2.7136347889900208, |
| "epoch": 0.00048, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 6.2191499999999996e-06, |
| "loss": 0.0, |
| "num_tokens": 42410.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 6, |
| "step_time": 22.11808781498985 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 237.0, |
| "completions/mean_length": 215.84375, |
| "completions/mean_terminated_length": 113.22222137451172, |
| "completions/min_length": 34.0, |
| "completions/min_terminated_length": 34.0, |
| "entropy": 2.875216066837311, |
| "epoch": 0.00056, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 7.46298e-06, |
| "loss": 0.0, |
| "num_tokens": 49537.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 7, |
| "step_time": 21.699966289990698 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 245.0, |
| "completions/mean_length": 219.625, |
| "completions/mean_terminated_length": 110.5, |
| "completions/min_length": 10.0, |
| "completions/min_terminated_length": 10.0, |
| "entropy": 2.737216532230377, |
| "epoch": 0.00064, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 8.70681e-06, |
| "loss": 0.0, |
| "num_tokens": 56789.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 8, |
| "step_time": 19.435475739024696 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 239.0, |
| "completions/mean_length": 210.9375, |
| "completions/mean_terminated_length": 95.77777862548828, |
| "completions/min_length": 44.0, |
| "completions/min_terminated_length": 44.0, |
| "entropy": 2.9164214432239532, |
| "epoch": 0.00072, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 9.950639999999999e-06, |
| "loss": 0.0, |
| "num_tokens": 63759.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 9, |
| "step_time": 23.149850735993823 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.9375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 182.0, |
| "completions/mean_length": 245.96875, |
| "completions/mean_terminated_length": 95.5, |
| "completions/min_length": 9.0, |
| "completions/min_terminated_length": 9.0, |
| "entropy": 2.7655889093875885, |
| "epoch": 0.0008, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.1194469999999999e-05, |
| "loss": 0.0, |
| "num_tokens": 71850.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 10, |
| "step_time": 21.312995460008096 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 239.0, |
| "completions/mean_length": 221.75, |
| "completions/mean_terminated_length": 119.0, |
| "completions/min_length": 7.0, |
| "completions/min_terminated_length": 7.0, |
| "entropy": 2.4570625126361847, |
| "epoch": 0.00088, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.2438299999999999e-05, |
| "loss": 0.0, |
| "num_tokens": 79170.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 11, |
| "step_time": 18.84161558598862 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.78125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.0, |
| "completions/mean_length": 221.59375, |
| "completions/mean_terminated_length": 98.71428680419922, |
| "completions/min_length": 16.0, |
| "completions/min_terminated_length": 16.0, |
| "entropy": 2.7960894107818604, |
| "epoch": 0.00096, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.368213e-05, |
| "loss": 0.0, |
| "num_tokens": 86481.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 12, |
| "step_time": 21.481500715010043 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.5625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 194.0, |
| "completions/mean_length": 175.84375, |
| "completions/mean_terminated_length": 72.78572082519531, |
| "completions/min_length": 14.0, |
| "completions/min_terminated_length": 14.0, |
| "entropy": 2.777267038822174, |
| "epoch": 0.00104, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.492596e-05, |
| "loss": 0.0, |
| "num_tokens": 92316.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 13, |
| "step_time": 22.648648835995118 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 247.0, |
| "completions/mean_length": 222.03125, |
| "completions/mean_terminated_length": 120.125, |
| "completions/min_length": 47.0, |
| "completions/min_terminated_length": 47.0, |
| "entropy": 2.7303763031959534, |
| "epoch": 0.00112, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.6169789999999998e-05, |
| "loss": 0.0, |
| "num_tokens": 99645.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 14, |
| "step_time": 19.483697141011362 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 248.0, |
| "completions/mean_length": 210.625, |
| "completions/mean_terminated_length": 110.80000305175781, |
| "completions/min_length": 32.0, |
| "completions/min_terminated_length": 32.0, |
| "entropy": 2.6232912838459015, |
| "epoch": 0.0012, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.741362e-05, |
| "loss": 0.0, |
| "num_tokens": 106597.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 15, |
| "step_time": 22.048389301991847 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 96.0, |
| "completions/mean_length": 232.78125, |
| "completions/mean_terminated_length": 70.25, |
| "completions/min_length": 21.0, |
| "completions/min_terminated_length": 21.0, |
| "entropy": 2.666172578930855, |
| "epoch": 0.00128, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.865745e-05, |
| "loss": 0.0, |
| "num_tokens": 114266.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 16, |
| "step_time": 21.76690764699015 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 229.0, |
| "completions/mean_length": 215.90625, |
| "completions/mean_terminated_length": 113.44444274902344, |
| "completions/min_length": 6.0, |
| "completions/min_terminated_length": 6.0, |
| "entropy": 2.6928308606147766, |
| "epoch": 0.00136, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 1.9901279999999997e-05, |
| "loss": 0.0, |
| "num_tokens": 121399.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 17, |
| "step_time": 19.46455569099635 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.59375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 167.0, |
| "completions/mean_length": 183.09375, |
| "completions/mean_terminated_length": 76.53846740722656, |
| "completions/min_length": 7.0, |
| "completions/min_terminated_length": 7.0, |
| "entropy": 2.5316834151744843, |
| "epoch": 0.00144, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.114511e-05, |
| "loss": 0.0, |
| "num_tokens": 127478.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 18, |
| "step_time": 22.669331256991427 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.9375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 234.0, |
| "completions/mean_length": 251.125, |
| "completions/mean_terminated_length": 178.0, |
| "completions/min_length": 122.0, |
| "completions/min_terminated_length": 122.0, |
| "entropy": 2.7606712579727173, |
| "epoch": 0.00152, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.2388939999999998e-05, |
| "loss": 0.0, |
| "num_tokens": 135734.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 19, |
| "step_time": 22.320524194008613 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.78125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 235.0, |
| "completions/mean_length": 221.90625, |
| "completions/mean_terminated_length": 100.14286041259766, |
| "completions/min_length": 8.0, |
| "completions/min_terminated_length": 8.0, |
| "entropy": 2.6717658638954163, |
| "epoch": 0.0016, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.3632769999999996e-05, |
| "loss": 0.0, |
| "num_tokens": 143047.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 20, |
| "step_time": 22.302162043000862 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 217.96875, |
| "completions/mean_terminated_length": 103.875, |
| "completions/min_length": 8.0, |
| "completions/min_terminated_length": 8.0, |
| "entropy": 2.536282777786255, |
| "epoch": 0.00168, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.4876599999999998e-05, |
| "loss": 0.0, |
| "num_tokens": 150246.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 21, |
| "step_time": 19.36471923001227 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 225.0, |
| "completions/mean_length": 206.90625, |
| "completions/mean_terminated_length": 98.9000015258789, |
| "completions/min_length": 13.0, |
| "completions/min_terminated_length": 13.0, |
| "entropy": 2.541882336139679, |
| "epoch": 0.00176, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.6120429999999997e-05, |
| "loss": 0.0, |
| "num_tokens": 157083.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 22, |
| "step_time": 22.757351590000326 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 172.0, |
| "completions/mean_length": 223.78125, |
| "completions/mean_terminated_length": 84.16667175292969, |
| "completions/min_length": 27.0, |
| "completions/min_terminated_length": 27.0, |
| "entropy": 2.657119005918503, |
| "epoch": 0.00184, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.736426e-05, |
| "loss": 0.0, |
| "num_tokens": 164464.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 23, |
| "step_time": 22.50556095898355 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 196.0, |
| "completions/mean_length": 219.0625, |
| "completions/mean_terminated_length": 108.25, |
| "completions/min_length": 15.0, |
| "completions/min_terminated_length": 15.0, |
| "entropy": 2.8032337725162506, |
| "epoch": 0.00192, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.8608089999999997e-05, |
| "loss": 0.0, |
| "num_tokens": 171698.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 24, |
| "step_time": 19.709908160984924 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 170.0, |
| "completions/mean_length": 195.96875, |
| "completions/mean_terminated_length": 63.900001525878906, |
| "completions/min_length": 7.0, |
| "completions/min_terminated_length": 7.0, |
| "entropy": 2.8214994370937347, |
| "epoch": 0.002, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 2.985192e-05, |
| "loss": 0.0, |
| "num_tokens": 178181.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 25, |
| "step_time": 22.27659140600008 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.78125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 217.0, |
| "completions/mean_length": 228.3125, |
| "completions/mean_terminated_length": 129.42857360839844, |
| "completions/min_length": 31.0, |
| "completions/min_terminated_length": 31.0, |
| "entropy": 2.782959371805191, |
| "epoch": 0.00208, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.109575e-05, |
| "loss": 0.0, |
| "num_tokens": 185703.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 26, |
| "step_time": 21.666986704993178 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 253.0, |
| "completions/mean_length": 229.625, |
| "completions/mean_terminated_length": 150.5, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 2.837796986103058, |
| "epoch": 0.00216, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.2339579999999996e-05, |
| "loss": 0.0, |
| "num_tokens": 193275.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 27, |
| "step_time": 19.754789013990376 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.84375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 132.0, |
| "completions/mean_length": 228.0, |
| "completions/mean_terminated_length": 76.80000305175781, |
| "completions/min_length": 12.0, |
| "completions/min_terminated_length": 12.0, |
| "entropy": 2.697370797395706, |
| "epoch": 0.00224, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.358341e-05, |
| "loss": 0.0, |
| "num_tokens": 200791.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 28, |
| "step_time": 22.38940100500622 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 180.0, |
| "completions/mean_length": 201.375, |
| "completions/mean_terminated_length": 81.20000457763672, |
| "completions/min_length": 18.0, |
| "completions/min_terminated_length": 18.0, |
| "entropy": 2.6589381992816925, |
| "epoch": 0.00232, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.482724e-05, |
| "loss": 0.0, |
| "num_tokens": 207459.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 29, |
| "step_time": 19.54482721599925 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 246.0, |
| "completions/mean_length": 195.15625, |
| "completions/mean_terminated_length": 93.75, |
| "completions/min_length": 13.0, |
| "completions/min_terminated_length": 13.0, |
| "entropy": 2.9770112335681915, |
| "epoch": 0.0024, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.607107e-05, |
| "loss": 0.0, |
| "num_tokens": 213928.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 30, |
| "step_time": 19.39816167599929 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 247.0, |
| "completions/mean_length": 217.6875, |
| "completions/mean_terminated_length": 153.83334350585938, |
| "completions/min_length": 36.0, |
| "completions/min_terminated_length": 36.0, |
| "entropy": 2.709945023059845, |
| "epoch": 0.00248, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.73149e-05, |
| "loss": 0.0, |
| "num_tokens": 221118.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 31, |
| "step_time": 19.455606768009602 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 221.0, |
| "completions/mean_length": 213.4375, |
| "completions/mean_terminated_length": 119.80000305175781, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 2.7361134737730026, |
| "epoch": 0.00256, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.8558729999999996e-05, |
| "loss": 0.0, |
| "num_tokens": 228168.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 32, |
| "step_time": 22.964359290992434 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 252.0, |
| "completions/mean_length": 216.0, |
| "completions/mean_terminated_length": 96.0, |
| "completions/min_length": 6.0, |
| "completions/min_terminated_length": 6.0, |
| "entropy": 2.909568816423416, |
| "epoch": 0.00264, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 3.9802559999999995e-05, |
| "loss": 0.0, |
| "num_tokens": 235304.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 33, |
| "step_time": 19.29167694800708 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 180.0, |
| "completions/mean_length": 209.1875, |
| "completions/mean_terminated_length": 89.55555725097656, |
| "completions/min_length": 11.0, |
| "completions/min_terminated_length": 11.0, |
| "entropy": 2.879765272140503, |
| "epoch": 0.00272, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.104638999999999e-05, |
| "loss": 0.0, |
| "num_tokens": 242218.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 34, |
| "step_time": 21.556990506993316 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.90625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 185.0, |
| "completions/mean_length": 243.1875, |
| "completions/mean_terminated_length": 119.33333587646484, |
| "completions/min_length": 75.0, |
| "completions/min_terminated_length": 75.0, |
| "entropy": 2.5094977021217346, |
| "epoch": 0.0028, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.229022e-05, |
| "loss": 0.0, |
| "num_tokens": 250216.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 35, |
| "step_time": 21.540133035996405 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 195.0, |
| "completions/mean_length": 211.125, |
| "completions/mean_terminated_length": 96.44444274902344, |
| "completions/min_length": 19.0, |
| "completions/min_terminated_length": 19.0, |
| "entropy": 2.7391299456357956, |
| "epoch": 0.00288, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353405e-05, |
| "loss": 0.0, |
| "num_tokens": 257196.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 36, |
| "step_time": 20.61158860699652 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.59375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 184.0, |
| "completions/mean_length": 188.9375, |
| "completions/mean_terminated_length": 90.92308044433594, |
| "completions/min_length": 5.0, |
| "completions/min_terminated_length": 5.0, |
| "entropy": 2.5645452737808228, |
| "epoch": 0.00296, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534049870739164e-05, |
| "loss": 0.0, |
| "num_tokens": 263466.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 37, |
| "step_time": 19.463059345995134 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.90625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 228.0, |
| "completions/mean_length": 243.15625, |
| "completions/mean_terminated_length": 119.0, |
| "completions/min_length": 16.0, |
| "completions/min_terminated_length": 16.0, |
| "entropy": 2.6515900790691376, |
| "epoch": 0.00304, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534049482956666e-05, |
| "loss": 0.0, |
| "num_tokens": 271471.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 38, |
| "step_time": 19.32687450600497 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.78125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 246.0, |
| "completions/mean_length": 233.4375, |
| "completions/mean_terminated_length": 152.85714721679688, |
| "completions/min_length": 71.0, |
| "completions/min_terminated_length": 71.0, |
| "entropy": 2.815293073654175, |
| "epoch": 0.00312, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353404883665252e-05, |
| "loss": 0.0, |
| "num_tokens": 279165.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 39, |
| "step_time": 19.368901928013656 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 187.0, |
| "completions/mean_length": 227.8125, |
| "completions/mean_terminated_length": 105.66667175292969, |
| "completions/min_length": 21.0, |
| "completions/min_terminated_length": 21.0, |
| "entropy": 2.8266728222370148, |
| "epoch": 0.0032, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353404793182672e-05, |
| "loss": 0.0, |
| "num_tokens": 286679.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 40, |
| "step_time": 19.347783612996864 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 193.0, |
| "completions/mean_length": 211.5, |
| "completions/mean_terminated_length": 78.0, |
| "completions/min_length": 9.0, |
| "completions/min_terminated_length": 9.0, |
| "entropy": 2.8580541610717773, |
| "epoch": 0.00328, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534046768479294e-05, |
| "loss": 0.0, |
| "num_tokens": 293667.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 41, |
| "step_time": 22.78021706399886 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 155.0, |
| "completions/mean_length": 219.9375, |
| "completions/mean_terminated_length": 63.66666793823242, |
| "completions/min_length": 9.0, |
| "completions/min_terminated_length": 9.0, |
| "entropy": 2.999121993780136, |
| "epoch": 0.00336, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353404534661025e-05, |
| "loss": 0.0, |
| "num_tokens": 300929.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 42, |
| "step_time": 19.530366815997695 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 222.0, |
| "completions/mean_length": 222.59375, |
| "completions/mean_terminated_length": 122.375, |
| "completions/min_length": 27.0, |
| "completions/min_terminated_length": 27.0, |
| "entropy": 2.805495321750641, |
| "epoch": 0.00344, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353404366621962e-05, |
| "loss": 0.0, |
| "num_tokens": 308276.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 43, |
| "step_time": 19.35092342599819 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 248.0, |
| "completions/mean_length": 233.1875, |
| "completions/mean_terminated_length": 134.33334350585938, |
| "completions/min_length": 26.0, |
| "completions/min_terminated_length": 26.0, |
| "entropy": 2.8153499960899353, |
| "epoch": 0.00352, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534041727307414e-05, |
| "loss": 0.0, |
| "num_tokens": 315962.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 44, |
| "step_time": 19.47554490200855 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.59375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 249.0, |
| "completions/mean_length": 182.71875, |
| "completions/mean_terminated_length": 75.61538696289062, |
| "completions/min_length": 3.0, |
| "completions/min_terminated_length": 3.0, |
| "entropy": 2.6707731187343597, |
| "epoch": 0.0036, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534039529873685e-05, |
| "loss": 0.0, |
| "num_tokens": 322029.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 45, |
| "step_time": 22.798208642001555 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.84375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 211.0, |
| "completions/mean_length": 236.9375, |
| "completions/mean_terminated_length": 134.0, |
| "completions/min_length": 55.0, |
| "completions/min_terminated_length": 55.0, |
| "entropy": 2.5431984215974808, |
| "epoch": 0.00368, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534037073918466e-05, |
| "loss": 0.0, |
| "num_tokens": 329835.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 46, |
| "step_time": 19.88411584899586 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 247.0, |
| "completions/mean_length": 222.0, |
| "completions/mean_terminated_length": 147.1999969482422, |
| "completions/min_length": 26.0, |
| "completions/min_terminated_length": 26.0, |
| "entropy": 2.9005468487739563, |
| "epoch": 0.00376, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353403435944177e-05, |
| "loss": 0.0, |
| "num_tokens": 337163.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 47, |
| "step_time": 19.48703931599448 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 239.0, |
| "completions/mean_length": 198.15625, |
| "completions/mean_terminated_length": 101.75, |
| "completions/min_length": 26.0, |
| "completions/min_terminated_length": 26.0, |
| "entropy": 2.7508918046951294, |
| "epoch": 0.00384, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353403138644366e-05, |
| "loss": 0.0, |
| "num_tokens": 343724.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 48, |
| "step_time": 22.090118679989246 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 218.0, |
| "completions/mean_length": 226.65625, |
| "completions/mean_terminated_length": 99.5, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 2.669710338115692, |
| "epoch": 0.00392, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534028154924186e-05, |
| "loss": 0.0, |
| "num_tokens": 351201.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 49, |
| "step_time": 19.470153064998158 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 172.0, |
| "completions/mean_length": 219.40625, |
| "completions/mean_terminated_length": 60.833335876464844, |
| "completions/min_length": 9.0, |
| "completions/min_terminated_length": 9.0, |
| "entropy": 2.4048453122377396, |
| "epoch": 0.004, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.35340246648834e-05, |
| "loss": 0.0, |
| "num_tokens": 358438.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 50, |
| "step_time": 22.60661829500168 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 250.0, |
| "completions/mean_length": 222.65625, |
| "completions/mean_terminated_length": 122.625, |
| "completions/min_length": 14.0, |
| "completions/min_terminated_length": 14.0, |
| "entropy": 2.9614007472991943, |
| "epoch": 0.00408, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534020916321335e-05, |
| "loss": 0.0, |
| "num_tokens": 365783.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 51, |
| "step_time": 22.327632517990423 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 243.0, |
| "completions/mean_length": 231.625, |
| "completions/mean_terminated_length": 126.0, |
| "completions/min_length": 25.0, |
| "completions/min_terminated_length": 25.0, |
| "entropy": 2.654112696647644, |
| "epoch": 0.00416, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3534016909238075e-05, |
| "loss": 0.0, |
| "num_tokens": 373419.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 52, |
| "step_time": 19.20923549200961 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 211.0, |
| "completions/mean_length": 215.09375, |
| "completions/mean_terminated_length": 92.375, |
| "completions/min_length": 17.0, |
| "completions/min_terminated_length": 17.0, |
| "entropy": 2.7122311294078827, |
| "epoch": 0.00424, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353401264363367e-05, |
| "loss": 0.0, |
| "num_tokens": 380526.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 53, |
| "step_time": 19.398160734999692 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.90625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 223.0, |
| "completions/mean_length": 246.46875, |
| "completions/mean_terminated_length": 154.33334350585938, |
| "completions/min_length": 116.0, |
| "completions/min_terminated_length": 116.0, |
| "entropy": 2.6178116649389267, |
| "epoch": 0.00432, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.35340081195082e-05, |
| "loss": 0.0, |
| "num_tokens": 388637.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 54, |
| "step_time": 19.338012945008813 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 81.0, |
| "completions/mean_length": 230.5625, |
| "completions/mean_terminated_length": 52.5, |
| "completions/min_length": 23.0, |
| "completions/min_terminated_length": 23.0, |
| "entropy": 2.716535747051239, |
| "epoch": 0.0044, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353400333686172e-05, |
| "loss": 0.0, |
| "num_tokens": 396239.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 55, |
| "step_time": 20.151991571001417 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 218.0, |
| "completions/mean_length": 218.3125, |
| "completions/mean_terminated_length": 135.40000915527344, |
| "completions/min_length": 39.0, |
| "completions/min_terminated_length": 39.0, |
| "entropy": 2.8078980445861816, |
| "epoch": 0.00448, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353399829569432e-05, |
| "loss": 0.0, |
| "num_tokens": 403433.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 56, |
| "step_time": 22.507306526997127 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 252.0, |
| "completions/mean_length": 217.375, |
| "completions/mean_terminated_length": 118.66666412353516, |
| "completions/min_length": 45.0, |
| "completions/min_terminated_length": 45.0, |
| "entropy": 2.833368271589279, |
| "epoch": 0.00456, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353399299600607e-05, |
| "loss": 0.0, |
| "num_tokens": 410613.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 57, |
| "step_time": 19.39536341799976 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 137.0, |
| "completions/mean_length": 214.375, |
| "completions/mean_terminated_length": 89.5, |
| "completions/min_length": 20.0, |
| "completions/min_terminated_length": 20.0, |
| "entropy": 2.6436397433280945, |
| "epoch": 0.00464, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353398743779707e-05, |
| "loss": 0.0, |
| "num_tokens": 417697.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 58, |
| "step_time": 19.44620426499023 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 232.0, |
| "completions/mean_length": 216.5625, |
| "completions/mean_terminated_length": 98.25, |
| "completions/min_length": 35.0, |
| "completions/min_terminated_length": 35.0, |
| "entropy": 2.812237471342087, |
| "epoch": 0.00472, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353398162106738e-05, |
| "loss": 0.0, |
| "num_tokens": 424847.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 59, |
| "step_time": 22.402232911990723 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 163.0, |
| "completions/mean_length": 198.5625, |
| "completions/mean_terminated_length": 72.20000457763672, |
| "completions/min_length": 15.0, |
| "completions/min_terminated_length": 15.0, |
| "entropy": 2.834031730890274, |
| "epoch": 0.0048, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353397554581712e-05, |
| "loss": 0.0, |
| "num_tokens": 431425.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 60, |
| "step_time": 19.969688828998187 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 245.0, |
| "completions/mean_length": 202.3125, |
| "completions/mean_terminated_length": 112.83333587646484, |
| "completions/min_length": 14.0, |
| "completions/min_terminated_length": 14.0, |
| "entropy": 2.8446905314922333, |
| "epoch": 0.00488, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3533969212046366e-05, |
| "loss": 0.0, |
| "num_tokens": 438123.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 61, |
| "step_time": 19.51650980200793 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.78125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 212.0, |
| "completions/mean_length": 218.71875, |
| "completions/mean_terminated_length": 85.5714340209961, |
| "completions/min_length": 17.0, |
| "completions/min_terminated_length": 17.0, |
| "entropy": 2.6947758495807648, |
| "epoch": 0.00496, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3533962619755234e-05, |
| "loss": 0.0, |
| "num_tokens": 445342.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 62, |
| "step_time": 21.935334763016726 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 212.0, |
| "completions/mean_length": 187.90625, |
| "completions/mean_terminated_length": 74.41667175292969, |
| "completions/min_length": 22.0, |
| "completions/min_terminated_length": 22.0, |
| "entropy": 2.6052669137716293, |
| "epoch": 0.00504, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353395576894381e-05, |
| "loss": 0.0, |
| "num_tokens": 451571.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 63, |
| "step_time": 22.139962298009777 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 141.0, |
| "completions/mean_length": 219.90625, |
| "completions/mean_terminated_length": 63.5, |
| "completions/min_length": 15.0, |
| "completions/min_terminated_length": 15.0, |
| "entropy": 2.774052321910858, |
| "epoch": 0.00512, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353394865961223e-05, |
| "loss": 0.0, |
| "num_tokens": 458828.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 64, |
| "step_time": 21.979154282984382 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 165.0, |
| "completions/mean_length": 206.4375, |
| "completions/mean_terminated_length": 97.4000015258789, |
| "completions/min_length": 14.0, |
| "completions/min_terminated_length": 14.0, |
| "entropy": 2.6954946517944336, |
| "epoch": 0.0052, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353394129176058e-05, |
| "loss": 0.0, |
| "num_tokens": 465654.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 65, |
| "step_time": 22.214402237004833 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 239.0, |
| "completions/mean_length": 232.625, |
| "completions/mean_terminated_length": 131.33334350585938, |
| "completions/min_length": 23.0, |
| "completions/min_terminated_length": 23.0, |
| "entropy": 2.571521297097206, |
| "epoch": 0.00528, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353393366538898e-05, |
| "loss": 0.0, |
| "num_tokens": 473322.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 66, |
| "step_time": 19.304359686997486 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.5625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 250.0, |
| "completions/mean_length": 197.125, |
| "completions/mean_terminated_length": 121.42857360839844, |
| "completions/min_length": 20.0, |
| "completions/min_terminated_length": 20.0, |
| "entropy": 2.909770429134369, |
| "epoch": 0.00536, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353392578049757e-05, |
| "loss": 0.0, |
| "num_tokens": 479854.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 67, |
| "step_time": 19.36013725100929 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.75, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 239.0, |
| "completions/mean_length": 227.125, |
| "completions/mean_terminated_length": 140.5, |
| "completions/min_length": 49.0, |
| "completions/min_terminated_length": 49.0, |
| "entropy": 2.744248181581497, |
| "epoch": 0.00544, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353391763708646e-05, |
| "loss": 0.0, |
| "num_tokens": 487338.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 68, |
| "step_time": 21.965071903985518 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 188.0, |
| "completions/mean_length": 223.9375, |
| "completions/mean_terminated_length": 85.0, |
| "completions/min_length": 8.0, |
| "completions/min_terminated_length": 8.0, |
| "entropy": 2.6102449893951416, |
| "epoch": 0.00552, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353390923515578e-05, |
| "loss": 0.0, |
| "num_tokens": 494716.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 69, |
| "step_time": 23.3430329200055 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.6875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 254.0, |
| "completions/mean_length": 210.8125, |
| "completions/mean_terminated_length": 111.4000015258789, |
| "completions/min_length": 28.0, |
| "completions/min_terminated_length": 28.0, |
| "entropy": 2.576720654964447, |
| "epoch": 0.0056, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353390057470567e-05, |
| "loss": 0.0, |
| "num_tokens": 501686.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 70, |
| "step_time": 19.697308761002205 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.84375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 224.0, |
| "completions/mean_length": 233.6875, |
| "completions/mean_terminated_length": 113.20000457763672, |
| "completions/min_length": 55.0, |
| "completions/min_terminated_length": 55.0, |
| "entropy": 2.9254136979579926, |
| "epoch": 0.00568, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353389165573626e-05, |
| "loss": 0.0, |
| "num_tokens": 509388.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 71, |
| "step_time": 19.443207848002203 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 221.0, |
| "completions/mean_length": 227.15625, |
| "completions/mean_terminated_length": 102.16667175292969, |
| "completions/min_length": 6.0, |
| "completions/min_terminated_length": 6.0, |
| "entropy": 2.8628475964069366, |
| "epoch": 0.00576, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353388247824768e-05, |
| "loss": 0.0, |
| "num_tokens": 516881.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 72, |
| "step_time": 20.01639660699584 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 227.0, |
| "completions/mean_length": 231.375, |
| "completions/mean_terminated_length": 124.66667175292969, |
| "completions/min_length": 18.0, |
| "completions/min_terminated_length": 18.0, |
| "entropy": 2.296522408723831, |
| "epoch": 0.00584, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.3533873042240096e-05, |
| "loss": 0.0, |
| "num_tokens": 524501.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 73, |
| "step_time": 23.112452601002587 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.71875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 246.0, |
| "completions/mean_length": 236.3125, |
| "completions/mean_terminated_length": 186.0, |
| "completions/min_length": 91.0, |
| "completions/min_terminated_length": 91.0, |
| "entropy": 2.8620297014713287, |
| "epoch": 0.00592, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353386334771366e-05, |
| "loss": 0.0, |
| "num_tokens": 532283.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 74, |
| "step_time": 23.19768616399233 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.84375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 250.0, |
| "completions/mean_length": 236.0, |
| "completions/mean_terminated_length": 128.0, |
| "completions/min_length": 5.0, |
| "completions/min_terminated_length": 5.0, |
| "entropy": 2.5197259187698364, |
| "epoch": 0.006, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "kl": 0.0, |
| "learning_rate": 4.353385339466851e-05, |
| "loss": 0.0, |
| "num_tokens": 540059.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/rollout_reward_func/mean": 0.0, |
| "rewards/rollout_reward_func/std": 0.0, |
| "step": 75, |
| "step_time": 19.854602511004487 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 25000, |
| "num_input_tokens_seen": 540059, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|