environment-ttt / trainer_state.json
haihp02's picture
Upload task output 1
9dce2c8 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.006,
"eval_steps": 500,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.84375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 173.0,
"completions/mean_length": 230.65625,
"completions/mean_terminated_length": 93.80000305175781,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"entropy": 2.7413549423217773,
"epoch": 8e-05,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 0.0,
"loss": 0.0,
"num_tokens": 7605.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 1,
"step_time": 20.038708471984137
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 207.0,
"completions/mean_length": 220.71875,
"completions/mean_terminated_length": 114.875,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 2.8548775017261505,
"epoch": 0.00016,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.2438299999999998e-06,
"loss": 0.0,
"num_tokens": 14888.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 2,
"step_time": 22.31387728100526
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 248.0,
"completions/mean_length": 216.3125,
"completions/mean_terminated_length": 129.0,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 2.8777474462985992,
"epoch": 0.00024,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.4876599999999997e-06,
"loss": 0.0,
"num_tokens": 22034.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 3,
"step_time": 19.508486614991853
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 203.0,
"completions/mean_length": 225.40625,
"completions/mean_terminated_length": 92.83333587646484,
"completions/min_length": 12.0,
"completions/min_terminated_length": 12.0,
"entropy": 2.708147943019867,
"epoch": 0.00032,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.73149e-06,
"loss": 0.0,
"num_tokens": 29471.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 4,
"step_time": 20.417726718005724
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 246.0,
"completions/mean_length": 210.0,
"completions/mean_terminated_length": 92.44444274902344,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"entropy": 2.7702046930789948,
"epoch": 0.0004,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.975319999999999e-06,
"loss": 0.0,
"num_tokens": 36415.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 5,
"step_time": 19.458035143004963
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.59375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 144.0,
"completions/mean_length": 180.46875,
"completions/mean_terminated_length": 70.0769271850586,
"completions/min_length": 14.0,
"completions/min_terminated_length": 14.0,
"entropy": 2.7136347889900208,
"epoch": 0.00048,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 6.2191499999999996e-06,
"loss": 0.0,
"num_tokens": 42410.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 6,
"step_time": 22.11808781498985
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 237.0,
"completions/mean_length": 215.84375,
"completions/mean_terminated_length": 113.22222137451172,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 2.875216066837311,
"epoch": 0.00056,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 7.46298e-06,
"loss": 0.0,
"num_tokens": 49537.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 7,
"step_time": 21.699966289990698
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 245.0,
"completions/mean_length": 219.625,
"completions/mean_terminated_length": 110.5,
"completions/min_length": 10.0,
"completions/min_terminated_length": 10.0,
"entropy": 2.737216532230377,
"epoch": 0.00064,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 8.70681e-06,
"loss": 0.0,
"num_tokens": 56789.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 8,
"step_time": 19.435475739024696
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 239.0,
"completions/mean_length": 210.9375,
"completions/mean_terminated_length": 95.77777862548828,
"completions/min_length": 44.0,
"completions/min_terminated_length": 44.0,
"entropy": 2.9164214432239532,
"epoch": 0.00072,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 9.950639999999999e-06,
"loss": 0.0,
"num_tokens": 63759.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 9,
"step_time": 23.149850735993823
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 182.0,
"completions/mean_length": 245.96875,
"completions/mean_terminated_length": 95.5,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"entropy": 2.7655889093875885,
"epoch": 0.0008,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.1194469999999999e-05,
"loss": 0.0,
"num_tokens": 71850.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 10,
"step_time": 21.312995460008096
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 239.0,
"completions/mean_length": 221.75,
"completions/mean_terminated_length": 119.0,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"entropy": 2.4570625126361847,
"epoch": 0.00088,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.2438299999999999e-05,
"loss": 0.0,
"num_tokens": 79170.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 11,
"step_time": 18.84161558598862
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.78125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.0,
"completions/mean_length": 221.59375,
"completions/mean_terminated_length": 98.71428680419922,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"entropy": 2.7960894107818604,
"epoch": 0.00096,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.368213e-05,
"loss": 0.0,
"num_tokens": 86481.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 12,
"step_time": 21.481500715010043
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 194.0,
"completions/mean_length": 175.84375,
"completions/mean_terminated_length": 72.78572082519531,
"completions/min_length": 14.0,
"completions/min_terminated_length": 14.0,
"entropy": 2.777267038822174,
"epoch": 0.00104,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.492596e-05,
"loss": 0.0,
"num_tokens": 92316.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 13,
"step_time": 22.648648835995118
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 247.0,
"completions/mean_length": 222.03125,
"completions/mean_terminated_length": 120.125,
"completions/min_length": 47.0,
"completions/min_terminated_length": 47.0,
"entropy": 2.7303763031959534,
"epoch": 0.00112,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.6169789999999998e-05,
"loss": 0.0,
"num_tokens": 99645.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 14,
"step_time": 19.483697141011362
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 248.0,
"completions/mean_length": 210.625,
"completions/mean_terminated_length": 110.80000305175781,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"entropy": 2.6232912838459015,
"epoch": 0.0012,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.741362e-05,
"loss": 0.0,
"num_tokens": 106597.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 15,
"step_time": 22.048389301991847
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 232.78125,
"completions/mean_terminated_length": 70.25,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 2.666172578930855,
"epoch": 0.00128,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.865745e-05,
"loss": 0.0,
"num_tokens": 114266.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 16,
"step_time": 21.76690764699015
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 229.0,
"completions/mean_length": 215.90625,
"completions/mean_terminated_length": 113.44444274902344,
"completions/min_length": 6.0,
"completions/min_terminated_length": 6.0,
"entropy": 2.6928308606147766,
"epoch": 0.00136,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 1.9901279999999997e-05,
"loss": 0.0,
"num_tokens": 121399.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 17,
"step_time": 19.46455569099635
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.59375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 167.0,
"completions/mean_length": 183.09375,
"completions/mean_terminated_length": 76.53846740722656,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"entropy": 2.5316834151744843,
"epoch": 0.00144,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.114511e-05,
"loss": 0.0,
"num_tokens": 127478.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 18,
"step_time": 22.669331256991427
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 234.0,
"completions/mean_length": 251.125,
"completions/mean_terminated_length": 178.0,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"entropy": 2.7606712579727173,
"epoch": 0.00152,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.2388939999999998e-05,
"loss": 0.0,
"num_tokens": 135734.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 19,
"step_time": 22.320524194008613
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.78125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 235.0,
"completions/mean_length": 221.90625,
"completions/mean_terminated_length": 100.14286041259766,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"entropy": 2.6717658638954163,
"epoch": 0.0016,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.3632769999999996e-05,
"loss": 0.0,
"num_tokens": 143047.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 20,
"step_time": 22.302162043000862
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 217.96875,
"completions/mean_terminated_length": 103.875,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"entropy": 2.536282777786255,
"epoch": 0.00168,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.4876599999999998e-05,
"loss": 0.0,
"num_tokens": 150246.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 21,
"step_time": 19.36471923001227
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 225.0,
"completions/mean_length": 206.90625,
"completions/mean_terminated_length": 98.9000015258789,
"completions/min_length": 13.0,
"completions/min_terminated_length": 13.0,
"entropy": 2.541882336139679,
"epoch": 0.00176,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.6120429999999997e-05,
"loss": 0.0,
"num_tokens": 157083.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 22,
"step_time": 22.757351590000326
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 172.0,
"completions/mean_length": 223.78125,
"completions/mean_terminated_length": 84.16667175292969,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"entropy": 2.657119005918503,
"epoch": 0.00184,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.736426e-05,
"loss": 0.0,
"num_tokens": 164464.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 23,
"step_time": 22.50556095898355
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 196.0,
"completions/mean_length": 219.0625,
"completions/mean_terminated_length": 108.25,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"entropy": 2.8032337725162506,
"epoch": 0.00192,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.8608089999999997e-05,
"loss": 0.0,
"num_tokens": 171698.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 24,
"step_time": 19.709908160984924
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 170.0,
"completions/mean_length": 195.96875,
"completions/mean_terminated_length": 63.900001525878906,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"entropy": 2.8214994370937347,
"epoch": 0.002,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 2.985192e-05,
"loss": 0.0,
"num_tokens": 178181.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 25,
"step_time": 22.27659140600008
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.78125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 217.0,
"completions/mean_length": 228.3125,
"completions/mean_terminated_length": 129.42857360839844,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 2.782959371805191,
"epoch": 0.00208,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.109575e-05,
"loss": 0.0,
"num_tokens": 185703.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 26,
"step_time": 21.666986704993178
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 253.0,
"completions/mean_length": 229.625,
"completions/mean_terminated_length": 150.5,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 2.837796986103058,
"epoch": 0.00216,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.2339579999999996e-05,
"loss": 0.0,
"num_tokens": 193275.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 27,
"step_time": 19.754789013990376
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.84375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 132.0,
"completions/mean_length": 228.0,
"completions/mean_terminated_length": 76.80000305175781,
"completions/min_length": 12.0,
"completions/min_terminated_length": 12.0,
"entropy": 2.697370797395706,
"epoch": 0.00224,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.358341e-05,
"loss": 0.0,
"num_tokens": 200791.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 28,
"step_time": 22.38940100500622
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 180.0,
"completions/mean_length": 201.375,
"completions/mean_terminated_length": 81.20000457763672,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 2.6589381992816925,
"epoch": 0.00232,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.482724e-05,
"loss": 0.0,
"num_tokens": 207459.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 29,
"step_time": 19.54482721599925
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 246.0,
"completions/mean_length": 195.15625,
"completions/mean_terminated_length": 93.75,
"completions/min_length": 13.0,
"completions/min_terminated_length": 13.0,
"entropy": 2.9770112335681915,
"epoch": 0.0024,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.607107e-05,
"loss": 0.0,
"num_tokens": 213928.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 30,
"step_time": 19.39816167599929
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 247.0,
"completions/mean_length": 217.6875,
"completions/mean_terminated_length": 153.83334350585938,
"completions/min_length": 36.0,
"completions/min_terminated_length": 36.0,
"entropy": 2.709945023059845,
"epoch": 0.00248,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.73149e-05,
"loss": 0.0,
"num_tokens": 221118.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 31,
"step_time": 19.455606768009602
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 221.0,
"completions/mean_length": 213.4375,
"completions/mean_terminated_length": 119.80000305175781,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 2.7361134737730026,
"epoch": 0.00256,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.8558729999999996e-05,
"loss": 0.0,
"num_tokens": 228168.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 32,
"step_time": 22.964359290992434
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 252.0,
"completions/mean_length": 216.0,
"completions/mean_terminated_length": 96.0,
"completions/min_length": 6.0,
"completions/min_terminated_length": 6.0,
"entropy": 2.909568816423416,
"epoch": 0.00264,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 3.9802559999999995e-05,
"loss": 0.0,
"num_tokens": 235304.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 33,
"step_time": 19.29167694800708
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 180.0,
"completions/mean_length": 209.1875,
"completions/mean_terminated_length": 89.55555725097656,
"completions/min_length": 11.0,
"completions/min_terminated_length": 11.0,
"entropy": 2.879765272140503,
"epoch": 0.00272,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.104638999999999e-05,
"loss": 0.0,
"num_tokens": 242218.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 34,
"step_time": 21.556990506993316
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.90625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 185.0,
"completions/mean_length": 243.1875,
"completions/mean_terminated_length": 119.33333587646484,
"completions/min_length": 75.0,
"completions/min_terminated_length": 75.0,
"entropy": 2.5094977021217346,
"epoch": 0.0028,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.229022e-05,
"loss": 0.0,
"num_tokens": 250216.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 35,
"step_time": 21.540133035996405
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 195.0,
"completions/mean_length": 211.125,
"completions/mean_terminated_length": 96.44444274902344,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 2.7391299456357956,
"epoch": 0.00288,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353405e-05,
"loss": 0.0,
"num_tokens": 257196.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 36,
"step_time": 20.61158860699652
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.59375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 184.0,
"completions/mean_length": 188.9375,
"completions/mean_terminated_length": 90.92308044433594,
"completions/min_length": 5.0,
"completions/min_terminated_length": 5.0,
"entropy": 2.5645452737808228,
"epoch": 0.00296,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534049870739164e-05,
"loss": 0.0,
"num_tokens": 263466.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 37,
"step_time": 19.463059345995134
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.90625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 228.0,
"completions/mean_length": 243.15625,
"completions/mean_terminated_length": 119.0,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"entropy": 2.6515900790691376,
"epoch": 0.00304,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534049482956666e-05,
"loss": 0.0,
"num_tokens": 271471.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 38,
"step_time": 19.32687450600497
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.78125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 246.0,
"completions/mean_length": 233.4375,
"completions/mean_terminated_length": 152.85714721679688,
"completions/min_length": 71.0,
"completions/min_terminated_length": 71.0,
"entropy": 2.815293073654175,
"epoch": 0.00312,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353404883665252e-05,
"loss": 0.0,
"num_tokens": 279165.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 39,
"step_time": 19.368901928013656
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 187.0,
"completions/mean_length": 227.8125,
"completions/mean_terminated_length": 105.66667175292969,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 2.8266728222370148,
"epoch": 0.0032,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353404793182672e-05,
"loss": 0.0,
"num_tokens": 286679.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 40,
"step_time": 19.347783612996864
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 193.0,
"completions/mean_length": 211.5,
"completions/mean_terminated_length": 78.0,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"entropy": 2.8580541610717773,
"epoch": 0.00328,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534046768479294e-05,
"loss": 0.0,
"num_tokens": 293667.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 41,
"step_time": 22.78021706399886
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 155.0,
"completions/mean_length": 219.9375,
"completions/mean_terminated_length": 63.66666793823242,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"entropy": 2.999121993780136,
"epoch": 0.00336,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353404534661025e-05,
"loss": 0.0,
"num_tokens": 300929.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 42,
"step_time": 19.530366815997695
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 222.0,
"completions/mean_length": 222.59375,
"completions/mean_terminated_length": 122.375,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"entropy": 2.805495321750641,
"epoch": 0.00344,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353404366621962e-05,
"loss": 0.0,
"num_tokens": 308276.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 43,
"step_time": 19.35092342599819
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 248.0,
"completions/mean_length": 233.1875,
"completions/mean_terminated_length": 134.33334350585938,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 2.8153499960899353,
"epoch": 0.00352,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534041727307414e-05,
"loss": 0.0,
"num_tokens": 315962.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 44,
"step_time": 19.47554490200855
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.59375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 249.0,
"completions/mean_length": 182.71875,
"completions/mean_terminated_length": 75.61538696289062,
"completions/min_length": 3.0,
"completions/min_terminated_length": 3.0,
"entropy": 2.6707731187343597,
"epoch": 0.0036,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534039529873685e-05,
"loss": 0.0,
"num_tokens": 322029.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 45,
"step_time": 22.798208642001555
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.84375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 211.0,
"completions/mean_length": 236.9375,
"completions/mean_terminated_length": 134.0,
"completions/min_length": 55.0,
"completions/min_terminated_length": 55.0,
"entropy": 2.5431984215974808,
"epoch": 0.00368,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534037073918466e-05,
"loss": 0.0,
"num_tokens": 329835.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 46,
"step_time": 19.88411584899586
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 247.0,
"completions/mean_length": 222.0,
"completions/mean_terminated_length": 147.1999969482422,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 2.9005468487739563,
"epoch": 0.00376,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353403435944177e-05,
"loss": 0.0,
"num_tokens": 337163.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 47,
"step_time": 19.48703931599448
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 239.0,
"completions/mean_length": 198.15625,
"completions/mean_terminated_length": 101.75,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 2.7508918046951294,
"epoch": 0.00384,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353403138644366e-05,
"loss": 0.0,
"num_tokens": 343724.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 48,
"step_time": 22.090118679989246
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 218.0,
"completions/mean_length": 226.65625,
"completions/mean_terminated_length": 99.5,
"completions/min_length": 35.0,
"completions/min_terminated_length": 35.0,
"entropy": 2.669710338115692,
"epoch": 0.00392,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534028154924186e-05,
"loss": 0.0,
"num_tokens": 351201.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 49,
"step_time": 19.470153064998158
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 172.0,
"completions/mean_length": 219.40625,
"completions/mean_terminated_length": 60.833335876464844,
"completions/min_length": 9.0,
"completions/min_terminated_length": 9.0,
"entropy": 2.4048453122377396,
"epoch": 0.004,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.35340246648834e-05,
"loss": 0.0,
"num_tokens": 358438.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 50,
"step_time": 22.60661829500168
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 250.0,
"completions/mean_length": 222.65625,
"completions/mean_terminated_length": 122.625,
"completions/min_length": 14.0,
"completions/min_terminated_length": 14.0,
"entropy": 2.9614007472991943,
"epoch": 0.00408,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534020916321335e-05,
"loss": 0.0,
"num_tokens": 365783.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 51,
"step_time": 22.327632517990423
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 243.0,
"completions/mean_length": 231.625,
"completions/mean_terminated_length": 126.0,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 2.654112696647644,
"epoch": 0.00416,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3534016909238075e-05,
"loss": 0.0,
"num_tokens": 373419.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 52,
"step_time": 19.20923549200961
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 211.0,
"completions/mean_length": 215.09375,
"completions/mean_terminated_length": 92.375,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 2.7122311294078827,
"epoch": 0.00424,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353401264363367e-05,
"loss": 0.0,
"num_tokens": 380526.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 53,
"step_time": 19.398160734999692
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.90625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 223.0,
"completions/mean_length": 246.46875,
"completions/mean_terminated_length": 154.33334350585938,
"completions/min_length": 116.0,
"completions/min_terminated_length": 116.0,
"entropy": 2.6178116649389267,
"epoch": 0.00432,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.35340081195082e-05,
"loss": 0.0,
"num_tokens": 388637.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 54,
"step_time": 19.338012945008813
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 230.5625,
"completions/mean_terminated_length": 52.5,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 2.716535747051239,
"epoch": 0.0044,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353400333686172e-05,
"loss": 0.0,
"num_tokens": 396239.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 55,
"step_time": 20.151991571001417
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 218.0,
"completions/mean_length": 218.3125,
"completions/mean_terminated_length": 135.40000915527344,
"completions/min_length": 39.0,
"completions/min_terminated_length": 39.0,
"entropy": 2.8078980445861816,
"epoch": 0.00448,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353399829569432e-05,
"loss": 0.0,
"num_tokens": 403433.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 56,
"step_time": 22.507306526997127
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 252.0,
"completions/mean_length": 217.375,
"completions/mean_terminated_length": 118.66666412353516,
"completions/min_length": 45.0,
"completions/min_terminated_length": 45.0,
"entropy": 2.833368271589279,
"epoch": 0.00456,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353399299600607e-05,
"loss": 0.0,
"num_tokens": 410613.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 57,
"step_time": 19.39536341799976
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 137.0,
"completions/mean_length": 214.375,
"completions/mean_terminated_length": 89.5,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 2.6436397433280945,
"epoch": 0.00464,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353398743779707e-05,
"loss": 0.0,
"num_tokens": 417697.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 58,
"step_time": 19.44620426499023
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 232.0,
"completions/mean_length": 216.5625,
"completions/mean_terminated_length": 98.25,
"completions/min_length": 35.0,
"completions/min_terminated_length": 35.0,
"entropy": 2.812237471342087,
"epoch": 0.00472,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353398162106738e-05,
"loss": 0.0,
"num_tokens": 424847.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 59,
"step_time": 22.402232911990723
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 163.0,
"completions/mean_length": 198.5625,
"completions/mean_terminated_length": 72.20000457763672,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"entropy": 2.834031730890274,
"epoch": 0.0048,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353397554581712e-05,
"loss": 0.0,
"num_tokens": 431425.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 60,
"step_time": 19.969688828998187
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 245.0,
"completions/mean_length": 202.3125,
"completions/mean_terminated_length": 112.83333587646484,
"completions/min_length": 14.0,
"completions/min_terminated_length": 14.0,
"entropy": 2.8446905314922333,
"epoch": 0.00488,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3533969212046366e-05,
"loss": 0.0,
"num_tokens": 438123.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 61,
"step_time": 19.51650980200793
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.78125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 212.0,
"completions/mean_length": 218.71875,
"completions/mean_terminated_length": 85.5714340209961,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 2.6947758495807648,
"epoch": 0.00496,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3533962619755234e-05,
"loss": 0.0,
"num_tokens": 445342.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 62,
"step_time": 21.935334763016726
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 212.0,
"completions/mean_length": 187.90625,
"completions/mean_terminated_length": 74.41667175292969,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 2.6052669137716293,
"epoch": 0.00504,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353395576894381e-05,
"loss": 0.0,
"num_tokens": 451571.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 63,
"step_time": 22.139962298009777
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 141.0,
"completions/mean_length": 219.90625,
"completions/mean_terminated_length": 63.5,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"entropy": 2.774052321910858,
"epoch": 0.00512,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353394865961223e-05,
"loss": 0.0,
"num_tokens": 458828.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 64,
"step_time": 21.979154282984382
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 165.0,
"completions/mean_length": 206.4375,
"completions/mean_terminated_length": 97.4000015258789,
"completions/min_length": 14.0,
"completions/min_terminated_length": 14.0,
"entropy": 2.6954946517944336,
"epoch": 0.0052,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353394129176058e-05,
"loss": 0.0,
"num_tokens": 465654.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 65,
"step_time": 22.214402237004833
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 239.0,
"completions/mean_length": 232.625,
"completions/mean_terminated_length": 131.33334350585938,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 2.571521297097206,
"epoch": 0.00528,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353393366538898e-05,
"loss": 0.0,
"num_tokens": 473322.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 66,
"step_time": 19.304359686997486
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.5625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 250.0,
"completions/mean_length": 197.125,
"completions/mean_terminated_length": 121.42857360839844,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 2.909770429134369,
"epoch": 0.00536,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353392578049757e-05,
"loss": 0.0,
"num_tokens": 479854.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 67,
"step_time": 19.36013725100929
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.75,
"completions/max_length": 256.0,
"completions/max_terminated_length": 239.0,
"completions/mean_length": 227.125,
"completions/mean_terminated_length": 140.5,
"completions/min_length": 49.0,
"completions/min_terminated_length": 49.0,
"entropy": 2.744248181581497,
"epoch": 0.00544,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353391763708646e-05,
"loss": 0.0,
"num_tokens": 487338.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 68,
"step_time": 21.965071903985518
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 188.0,
"completions/mean_length": 223.9375,
"completions/mean_terminated_length": 85.0,
"completions/min_length": 8.0,
"completions/min_terminated_length": 8.0,
"entropy": 2.6102449893951416,
"epoch": 0.00552,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353390923515578e-05,
"loss": 0.0,
"num_tokens": 494716.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 69,
"step_time": 23.3430329200055
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.6875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 254.0,
"completions/mean_length": 210.8125,
"completions/mean_terminated_length": 111.4000015258789,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 2.576720654964447,
"epoch": 0.0056,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353390057470567e-05,
"loss": 0.0,
"num_tokens": 501686.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 70,
"step_time": 19.697308761002205
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.84375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 224.0,
"completions/mean_length": 233.6875,
"completions/mean_terminated_length": 113.20000457763672,
"completions/min_length": 55.0,
"completions/min_terminated_length": 55.0,
"entropy": 2.9254136979579926,
"epoch": 0.00568,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353389165573626e-05,
"loss": 0.0,
"num_tokens": 509388.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 71,
"step_time": 19.443207848002203
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 221.0,
"completions/mean_length": 227.15625,
"completions/mean_terminated_length": 102.16667175292969,
"completions/min_length": 6.0,
"completions/min_terminated_length": 6.0,
"entropy": 2.8628475964069366,
"epoch": 0.00576,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353388247824768e-05,
"loss": 0.0,
"num_tokens": 516881.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 72,
"step_time": 20.01639660699584
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.8125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 227.0,
"completions/mean_length": 231.375,
"completions/mean_terminated_length": 124.66667175292969,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 2.296522408723831,
"epoch": 0.00584,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.3533873042240096e-05,
"loss": 0.0,
"num_tokens": 524501.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 73,
"step_time": 23.112452601002587
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.71875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 246.0,
"completions/mean_length": 236.3125,
"completions/mean_terminated_length": 186.0,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"entropy": 2.8620297014713287,
"epoch": 0.00592,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353386334771366e-05,
"loss": 0.0,
"num_tokens": 532283.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 74,
"step_time": 23.19768616399233
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.84375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 250.0,
"completions/mean_length": 236.0,
"completions/mean_terminated_length": 128.0,
"completions/min_length": 5.0,
"completions/min_terminated_length": 5.0,
"entropy": 2.5197259187698364,
"epoch": 0.006,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"kl": 0.0,
"learning_rate": 4.353385339466851e-05,
"loss": 0.0,
"num_tokens": 540059.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/rollout_reward_func/mean": 0.0,
"rewards/rollout_reward_func/std": 0.0,
"step": 75,
"step_time": 19.854602511004487
}
],
"logging_steps": 1.0,
"max_steps": 25000,
"num_input_tokens_seen": 540059,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}