| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 434.0, | |
| "completions/max_terminated_length": 434.0, | |
| "completions/mean_length": 293.75, | |
| "completions/mean_terminated_length": 335.7142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 184.0, | |
| "epoch": 0.004, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5197089910507202, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0278, | |
| "num_tokens": 9800.0, | |
| "reward": 0.3660714328289032, | |
| "reward_std": 0.36236491799354553, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 278.0, | |
| "completions/max_terminated_length": 278.0, | |
| "completions/mean_length": 134.875, | |
| "completions/mean_terminated_length": 154.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 51.0, | |
| "epoch": 0.008, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.8656461238861084, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1584, | |
| "num_tokens": 19920.0, | |
| "reward": 0.34375, | |
| "reward_std": 0.48065245151519775, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 261.0, | |
| "completions/max_terminated_length": 261.0, | |
| "completions/mean_length": 176.625, | |
| "completions/mean_terminated_length": 201.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 138.0, | |
| "epoch": 0.012, | |
| "format_failures": 0.0, | |
| "grad_norm": 7.7805867195129395, | |
| "kl": 1.0173164680600166, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0063, | |
| "num_tokens": 28896.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 332.0, | |
| "completions/max_terminated_length": 332.0, | |
| "completions/mean_length": 216.625, | |
| "completions/mean_terminated_length": 247.57142857142858, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 190.0, | |
| "epoch": 0.016, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.34460729360580444, | |
| "kl": 0.005293647991493344, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0149, | |
| "num_tokens": 35688.0, | |
| "reward": 0.316850483417511, | |
| "reward_std": 0.19629573822021484, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 141.0, | |
| "completions/max_terminated_length": 141.0, | |
| "completions/mean_length": 107.75, | |
| "completions/mean_terminated_length": 123.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 109.0, | |
| "epoch": 0.02, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.950016975402832, | |
| "kl": 0.19140876829624176, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0265, | |
| "num_tokens": 44320.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 480.0, | |
| "completions/max_terminated_length": 480.0, | |
| "completions/mean_length": 347.375, | |
| "completions/mean_terminated_length": 397.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 316.0, | |
| "epoch": 0.024, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.27606070041656494, | |
| "kl": 0.004609360825270414, | |
| "learning_rate": 1e-06, | |
| "loss": 0.019, | |
| "num_tokens": 55480.0, | |
| "reward": 0.20555555820465088, | |
| "reward_std": 0.22662308812141418, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 98.0, | |
| "completions/max_terminated_length": 98.0, | |
| "completions/mean_length": 54.75, | |
| "completions/mean_terminated_length": 62.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 36.0, | |
| "epoch": 0.028, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.512669563293457, | |
| "kl": 0.0004560185334412381, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1926, | |
| "num_tokens": 76568.0, | |
| "reward": 0.0416666679084301, | |
| "reward_std": 0.1178511381149292, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 380.0, | |
| "completions/max_terminated_length": 380.0, | |
| "completions/mean_length": 189.75, | |
| "completions/mean_terminated_length": 216.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 94.0, | |
| "epoch": 0.032, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.6258090734481812, | |
| "kl": 0.133640818297863, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0094, | |
| "num_tokens": 88120.0, | |
| "reward": 0.05000000074505806, | |
| "reward_std": 0.1414213478565216, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1412.0, | |
| "completions/max_terminated_length": 1412.0, | |
| "completions/mean_length": 426.125, | |
| "completions/mean_terminated_length": 487.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 218.0, | |
| "epoch": 0.036, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.3745494782924652, | |
| "kl": 0.0010488361003808677, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1003, | |
| "num_tokens": 110584.0, | |
| "reward": 0.05859375, | |
| "reward_std": 0.1657281517982483, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 62.0, | |
| "completions/max_terminated_length": 62.0, | |
| "completions/mean_length": 41.25, | |
| "completions/mean_terminated_length": 47.142857142857146, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 35.0, | |
| "epoch": 0.04, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.635150909423828, | |
| "kl": 1.000607669353485, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0558, | |
| "num_tokens": 115888.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 126.0, | |
| "completions/max_terminated_length": 126.0, | |
| "completions/mean_length": 60.25, | |
| "completions/mean_terminated_length": 96.4, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 0.044, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.5436906814575195, | |
| "kl": 0.534478023648262, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1301, | |
| "num_tokens": 123984.0, | |
| "reward": 0.375, | |
| "reward_std": 0.5175491571426392, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 2047.0, | |
| "completions/max_terminated_length": 2047.0, | |
| "completions/mean_length": 702.625, | |
| "completions/mean_terminated_length": 936.8333333333334, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 341.0, | |
| "epoch": 0.048, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.34704723954200745, | |
| "kl": 0.0009783765999600291, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0431, | |
| "num_tokens": 146192.0, | |
| "reward": 0.38749998807907104, | |
| "reward_std": 0.4181165099143982, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 122.0, | |
| "completions/max_terminated_length": 122.0, | |
| "completions/mean_length": 40.375, | |
| "completions/mean_terminated_length": 46.142857142857146, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 0.052, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.004240340553224087, | |
| "kl": 0.004628603579476476, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 166896.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 973.0, | |
| "completions/max_terminated_length": 973.0, | |
| "completions/mean_length": 452.5, | |
| "completions/mean_terminated_length": 517.1428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 275.0, | |
| "epoch": 0.056, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.18779706954956055, | |
| "kl": 0.0052806169260293245, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0313, | |
| "num_tokens": 185392.0, | |
| "reward": 0.11513157933950424, | |
| "reward_std": 0.16955535113811493, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 304.0, | |
| "completions/max_terminated_length": 304.0, | |
| "completions/mean_length": 202.0, | |
| "completions/mean_terminated_length": 230.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 113.0, | |
| "epoch": 0.06, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.6387383341789246, | |
| "kl": 0.02643415331840515, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0717, | |
| "num_tokens": 193056.0, | |
| "reward": 0.53125, | |
| "reward_std": 0.31045761704444885, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 203.0, | |
| "completions/max_terminated_length": 203.0, | |
| "completions/mean_length": 151.25, | |
| "completions/mean_terminated_length": 172.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 160.0, | |
| "epoch": 0.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.2569343149662018, | |
| "kl": 0.09986447170376778, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 201256.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 295.0, | |
| "completions/max_terminated_length": 295.0, | |
| "completions/mean_length": 192.0, | |
| "completions/mean_terminated_length": 219.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 0.068, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.04395958036184311, | |
| "kl": 0.027548893354833126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 209920.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 44.0, | |
| "completions/max_terminated_length": 44.0, | |
| "completions/mean_length": 20.125, | |
| "completions/mean_terminated_length": 40.25, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 39.0, | |
| "epoch": 0.072, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.16681237518787384, | |
| "kl": 0.03394318092614412, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 214144.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 527.0, | |
| "completions/max_terminated_length": 527.0, | |
| "completions/mean_length": 215.75, | |
| "completions/mean_terminated_length": 246.57142857142858, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 0.076, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5867045521736145, | |
| "kl": 0.00954199954867363, | |
| "learning_rate": 1e-06, | |
| "loss": -0.2047, | |
| "num_tokens": 234096.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.35634833574295044, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 169.0, | |
| "completions/max_terminated_length": 169.0, | |
| "completions/mean_length": 91.75, | |
| "completions/mean_terminated_length": 104.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 66.0, | |
| "epoch": 0.08, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.331188917160034, | |
| "kl": 0.05314544588327408, | |
| "learning_rate": 1e-06, | |
| "loss": 0.048, | |
| "num_tokens": 243464.0, | |
| "reward": 0.21875, | |
| "reward_std": 0.36443448066711426, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 130.0, | |
| "completions/max_terminated_length": 130.0, | |
| "completions/mean_length": 81.25, | |
| "completions/mean_terminated_length": 92.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 57.0, | |
| "epoch": 0.084, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.2006300687789917, | |
| "kl": 0.07363329455256462, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0094, | |
| "num_tokens": 250720.0, | |
| "reward": 0.21875, | |
| "reward_std": 0.33905068039894104, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 197.0, | |
| "completions/max_terminated_length": 197.0, | |
| "completions/mean_length": 82.0, | |
| "completions/mean_terminated_length": 93.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 27.0, | |
| "epoch": 0.088, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.3736180067062378, | |
| "kl": 0.04446508176624775, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0541, | |
| "num_tokens": 257944.0, | |
| "reward": 0.0535714291036129, | |
| "reward_std": 0.15152288973331451, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 544.0, | |
| "completions/max_terminated_length": 544.0, | |
| "completions/mean_length": 242.75, | |
| "completions/mean_terminated_length": 277.42857142857144, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 38.0, | |
| "epoch": 0.092, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.9332400560379028, | |
| "kl": 0.026759919710457325, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0979, | |
| "num_tokens": 270512.0, | |
| "reward": 0.17383432388305664, | |
| "reward_std": 0.5423066020011902, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 334.0, | |
| "completions/max_terminated_length": 334.0, | |
| "completions/mean_length": 193.875, | |
| "completions/mean_terminated_length": 221.57142857142858, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 114.0, | |
| "epoch": 0.096, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5741273164749146, | |
| "kl": 0.061491173692047596, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0724, | |
| "num_tokens": 279544.0, | |
| "reward": 0.3214285969734192, | |
| "reward_std": 0.3162277638912201, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 191.0, | |
| "completions/max_terminated_length": 191.0, | |
| "completions/mean_length": 131.625, | |
| "completions/mean_terminated_length": 150.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 109.0, | |
| "epoch": 0.1, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.8438379168510437, | |
| "kl": 0.10757053177803755, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0168, | |
| "num_tokens": 285872.0, | |
| "reward": 0.3083333373069763, | |
| "reward_std": 0.3443548381328583, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 345.0, | |
| "completions/max_terminated_length": 345.0, | |
| "completions/mean_length": 224.0, | |
| "completions/mean_terminated_length": 256.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 151.0, | |
| "epoch": 0.104, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.6450461149215698, | |
| "kl": 0.04460714943706989, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0276, | |
| "num_tokens": 293816.0, | |
| "reward": 0.3494505286216736, | |
| "reward_std": 0.3268265724182129, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 210.0, | |
| "completions/max_terminated_length": 210.0, | |
| "completions/mean_length": 110.375, | |
| "completions/mean_terminated_length": 126.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 68.0, | |
| "epoch": 0.108, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.17123964428901672, | |
| "kl": 0.09914526715874672, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 300160.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 128.0, | |
| "completions/max_terminated_length": 128.0, | |
| "completions/mean_length": 82.5, | |
| "completions/mean_terminated_length": 94.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 86.0, | |
| "epoch": 0.112, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.9953401684761047, | |
| "kl": 0.18897472321987152, | |
| "learning_rate": 1e-06, | |
| "loss": 0.002, | |
| "num_tokens": 307720.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 335.0, | |
| "completions/max_terminated_length": 335.0, | |
| "completions/mean_length": 229.375, | |
| "completions/mean_terminated_length": 262.14285714285717, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 187.0, | |
| "epoch": 0.116, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.1179044246673584, | |
| "kl": 0.013377793598920107, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3156, | |
| "num_tokens": 328920.0, | |
| "reward": 0.3519230782985687, | |
| "reward_std": 0.3794543743133545, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 184.0, | |
| "completions/max_terminated_length": 184.0, | |
| "completions/mean_length": 131.375, | |
| "completions/mean_terminated_length": 150.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 14.0, | |
| "epoch": 0.12, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.2885483503341675, | |
| "kl": 0.009146903175860643, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0387, | |
| "num_tokens": 335880.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1936.0, | |
| "completions/max_terminated_length": 1936.0, | |
| "completions/mean_length": 410.0, | |
| "completions/mean_terminated_length": 468.57142857142856, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 0.124, | |
| "format_failures": 1.0, | |
| "grad_norm": 1.5897152423858643, | |
| "kl": 0.06828754395246506, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0215, | |
| "num_tokens": 358104.0, | |
| "reward": 0.45494991540908813, | |
| "reward_std": 0.48848965764045715, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 366.0, | |
| "completions/max_terminated_length": 366.0, | |
| "completions/mean_length": 202.375, | |
| "completions/mean_terminated_length": 231.28571428571428, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 122.0, | |
| "epoch": 0.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.8364682793617249, | |
| "kl": 0.12048156931996346, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0898, | |
| "num_tokens": 365656.0, | |
| "reward": 0.4521104097366333, | |
| "reward_std": 0.2924821972846985, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 68.0, | |
| "completions/max_terminated_length": 68.0, | |
| "completions/mean_length": 48.875, | |
| "completions/mean_terminated_length": 55.857142857142854, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 46.0, | |
| "epoch": 0.132, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.7178492546081543, | |
| "kl": 0.13572826609015465, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0249, | |
| "num_tokens": 371392.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 435.0, | |
| "completions/max_terminated_length": 435.0, | |
| "completions/mean_length": 293.5, | |
| "completions/mean_terminated_length": 335.42857142857144, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 210.0, | |
| "epoch": 0.136, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.9806227087974548, | |
| "kl": 0.012222900055348873, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3233, | |
| "num_tokens": 392240.0, | |
| "reward": 0.47658732533454895, | |
| "reward_std": 0.4081757962703705, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 97.0, | |
| "completions/max_terminated_length": 97.0, | |
| "completions/mean_length": 64.875, | |
| "completions/mean_terminated_length": 74.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 43.0, | |
| "epoch": 0.14, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.8304542303085327, | |
| "kl": 0.031799230724573135, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0113, | |
| "num_tokens": 396792.0, | |
| "reward": 0.6166666746139526, | |
| "reward_std": 0.31773003935813904, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 265.0, | |
| "completions/max_terminated_length": 265.0, | |
| "completions/mean_length": 114.25, | |
| "completions/mean_terminated_length": 130.57142857142858, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 39.0, | |
| "epoch": 0.144, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.793579339981079, | |
| "kl": 0.6158746182918549, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0043, | |
| "num_tokens": 404472.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 233.0, | |
| "completions/max_terminated_length": 233.0, | |
| "completions/mean_length": 169.75, | |
| "completions/mean_terminated_length": 194.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 122.0, | |
| "epoch": 0.148, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.3936280906200409, | |
| "kl": 0.04245052766054869, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0153, | |
| "num_tokens": 411600.0, | |
| "reward": 0.5294643044471741, | |
| "reward_std": 0.21430060267448425, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 152.0, | |
| "completions/max_terminated_length": 152.0, | |
| "completions/mean_length": 74.625, | |
| "completions/mean_terminated_length": 85.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 59.0, | |
| "epoch": 0.152, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.592628002166748, | |
| "kl": 0.14406441897153854, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0363, | |
| "num_tokens": 417456.0, | |
| "reward": 0.0555555559694767, | |
| "reward_std": 0.11878278106451035, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 726.0, | |
| "completions/max_terminated_length": 726.0, | |
| "completions/mean_length": 330.25, | |
| "completions/mean_terminated_length": 377.42857142857144, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 196.0, | |
| "epoch": 0.156, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.7340777516365051, | |
| "kl": 0.02144559659063816, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0557, | |
| "num_tokens": 439208.0, | |
| "reward": 0.10000000149011612, | |
| "reward_std": 0.2828426957130432, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 625.0, | |
| "completions/max_terminated_length": 625.0, | |
| "completions/mean_length": 336.0, | |
| "completions/mean_terminated_length": 384.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 175.0, | |
| "epoch": 0.16, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.32950443029403687, | |
| "kl": 0.018678720109164715, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1579, | |
| "num_tokens": 464616.0, | |
| "reward": 0.68376624584198, | |
| "reward_std": 0.16028425097465515, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 91.0, | |
| "completions/max_terminated_length": 91.0, | |
| "completions/mean_length": 53.75, | |
| "completions/mean_terminated_length": 61.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 39.0, | |
| "epoch": 0.164, | |
| "format_failures": 0.0, | |
| "grad_norm": 15.617924690246582, | |
| "kl": 2.1802964210510254, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1623, | |
| "num_tokens": 473272.0, | |
| "reward": 0.4464285671710968, | |
| "reward_std": 0.49744242429733276, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 74.0, | |
| "completions/max_terminated_length": 74.0, | |
| "completions/mean_length": 62.625, | |
| "completions/mean_terminated_length": 71.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 69.0, | |
| "epoch": 0.168, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5167672634124756, | |
| "kl": 0.192179337143898, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 477896.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 187.0, | |
| "completions/max_terminated_length": 187.0, | |
| "completions/mean_length": 124.625, | |
| "completions/mean_terminated_length": 142.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 57.0, | |
| "epoch": 0.172, | |
| "format_failures": 1.0, | |
| "grad_norm": 1.7434178590774536, | |
| "kl": 0.43839313089847565, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0081, | |
| "num_tokens": 485584.0, | |
| "reward": 0.1041666716337204, | |
| "reward_std": 0.19795581698417664, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 53.0, | |
| "completions/max_terminated_length": 53.0, | |
| "completions/mean_length": 21.5, | |
| "completions/mean_terminated_length": 43.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 33.0, | |
| "epoch": 0.176, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.19118274748325348, | |
| "kl": 0.021482082083821297, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 491072.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 184.0, | |
| "completions/max_terminated_length": 184.0, | |
| "completions/mean_length": 101.375, | |
| "completions/mean_terminated_length": 115.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 64.0, | |
| "epoch": 0.18, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5414936542510986, | |
| "kl": 0.23846322298049927, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0026, | |
| "num_tokens": 501048.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 184.0, | |
| "completions/max_terminated_length": 184.0, | |
| "completions/mean_length": 105.25, | |
| "completions/mean_terminated_length": 120.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 60.0, | |
| "epoch": 0.184, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.3124736547470093, | |
| "kl": 0.02640421688556671, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0418, | |
| "num_tokens": 509688.0, | |
| "reward": 0.3333333432674408, | |
| "reward_std": 0.35634833574295044, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 308.0, | |
| "completions/max_terminated_length": 308.0, | |
| "completions/mean_length": 222.625, | |
| "completions/mean_terminated_length": 254.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 140.0, | |
| "epoch": 0.188, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.6642023324966431, | |
| "kl": 0.038137754425406456, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0281, | |
| "num_tokens": 516136.0, | |
| "reward": 0.5722222328186035, | |
| "reward_std": 0.3752013146877289, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 220.0, | |
| "completions/max_terminated_length": 220.0, | |
| "completions/mean_length": 139.0, | |
| "completions/mean_terminated_length": 158.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 52.0, | |
| "epoch": 0.192, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.5801048278808594, | |
| "kl": 0.31588232330977917, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0356, | |
| "num_tokens": 525216.0, | |
| "reward": 0.16785714030265808, | |
| "reward_std": 0.3453776240348816, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 142.0, | |
| "completions/max_terminated_length": 142.0, | |
| "completions/mean_length": 103.0, | |
| "completions/mean_terminated_length": 117.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 72.0, | |
| "epoch": 0.196, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.5228773355484009, | |
| "kl": 0.3656068593263626, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0299, | |
| "num_tokens": 532920.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.15430335700511932, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 167.0, | |
| "completions/max_terminated_length": 167.0, | |
| "completions/mean_length": 58.625, | |
| "completions/mean_terminated_length": 67.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 23.0, | |
| "epoch": 0.2, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.357253074645996, | |
| "kl": 0.021084215957671404, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1241, | |
| "num_tokens": 539800.0, | |
| "reward": 0.24715909361839294, | |
| "reward_std": 0.3969031274318695, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 79.0, | |
| "completions/max_terminated_length": 79.0, | |
| "completions/mean_length": 47.5, | |
| "completions/mean_terminated_length": 76.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 74.0, | |
| "epoch": 0.204, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.9780025482177734, | |
| "kl": 0.04299665614962578, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0066, | |
| "num_tokens": 547080.0, | |
| "reward": 0.75, | |
| "reward_std": 0.38832157850265503, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 377.0, | |
| "completions/max_terminated_length": 377.0, | |
| "completions/mean_length": 245.0, | |
| "completions/mean_terminated_length": 280.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 236.0, | |
| "epoch": 0.208, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.824322521686554, | |
| "kl": 0.04343542829155922, | |
| "learning_rate": 1e-06, | |
| "loss": -0.394, | |
| "num_tokens": 565368.0, | |
| "reward": 0.3678571581840515, | |
| "reward_std": 0.38505232334136963, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 313.0, | |
| "completions/max_terminated_length": 313.0, | |
| "completions/mean_length": 223.5, | |
| "completions/mean_terminated_length": 255.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 229.0, | |
| "epoch": 0.212, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.8966130018234253, | |
| "kl": 0.022847690619528294, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0523, | |
| "num_tokens": 584552.0, | |
| "reward": 0.09375, | |
| "reward_std": 0.2651650309562683, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 463.0, | |
| "completions/max_terminated_length": 463.0, | |
| "completions/mean_length": 301.75, | |
| "completions/mean_terminated_length": 344.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 222.0, | |
| "epoch": 0.216, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5948707461357117, | |
| "kl": 0.0344517957419157, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0372, | |
| "num_tokens": 605144.0, | |
| "reward": 0.3611606955528259, | |
| "reward_std": 0.24707795679569244, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 183.0, | |
| "completions/max_terminated_length": 183.0, | |
| "completions/mean_length": 99.75, | |
| "completions/mean_terminated_length": 114.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 69.0, | |
| "epoch": 0.22, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.431544065475464, | |
| "kl": 0.39844033867120743, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0435, | |
| "num_tokens": 612304.0, | |
| "reward": 0.3895833492279053, | |
| "reward_std": 0.4363391399383545, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 189.0, | |
| "completions/max_terminated_length": 189.0, | |
| "completions/mean_length": 158.875, | |
| "completions/mean_terminated_length": 181.57142857142858, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 170.0, | |
| "epoch": 0.224, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.419069528579712, | |
| "kl": 0.18863588571548462, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0102, | |
| "num_tokens": 619832.0, | |
| "reward": 0.3333333432674408, | |
| "reward_std": 0.4714045226573944, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 560.0, | |
| "completions/max_terminated_length": 560.0, | |
| "completions/mean_length": 250.5, | |
| "completions/mean_terminated_length": 286.2857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 112.0, | |
| "epoch": 0.228, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0427495501935482, | |
| "kl": 0.06415125727653503, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 632688.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 95.0, | |
| "completions/max_terminated_length": 95.0, | |
| "completions/mean_length": 62.0, | |
| "completions/mean_terminated_length": 70.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 55.0, | |
| "epoch": 0.232, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.9774202108383179, | |
| "kl": 0.05197676923125982, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0204, | |
| "num_tokens": 637680.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1235.0, | |
| "completions/max_terminated_length": 1235.0, | |
| "completions/mean_length": 317.5, | |
| "completions/mean_terminated_length": 362.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 29.0, | |
| "epoch": 0.236, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.3588317036628723, | |
| "kl": 0.008119639242067933, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0679, | |
| "num_tokens": 662240.0, | |
| "reward": 0.0625, | |
| "reward_std": 0.1767766922712326, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 482.0, | |
| "completions/max_terminated_length": 482.0, | |
| "completions/mean_length": 302.625, | |
| "completions/mean_terminated_length": 345.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 224.0, | |
| "epoch": 0.24, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.43694156408309937, | |
| "kl": 0.13442928344011307, | |
| "learning_rate": 1e-06, | |
| "loss": 0.035, | |
| "num_tokens": 671136.0, | |
| "reward": 0.4389880895614624, | |
| "reward_std": 0.314676970243454, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 155.0, | |
| "completions/max_terminated_length": 155.0, | |
| "completions/mean_length": 76.625, | |
| "completions/mean_terminated_length": 87.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 0.244, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.0356831550598145, | |
| "kl": 0.10412658751010895, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0941, | |
| "num_tokens": 678296.0, | |
| "reward": 0.2856481671333313, | |
| "reward_std": 0.44585946202278137, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 526.0, | |
| "completions/max_terminated_length": 526.0, | |
| "completions/mean_length": 302.125, | |
| "completions/mean_terminated_length": 345.2857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 173.0, | |
| "epoch": 0.248, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.2828364074230194, | |
| "kl": 0.06026838719844818, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0307, | |
| "num_tokens": 688328.0, | |
| "reward": 0.37730082869529724, | |
| "reward_std": 0.22057875990867615, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 1564.0, | |
| "completions/max_terminated_length": 1564.0, | |
| "completions/mean_length": 436.5, | |
| "completions/mean_terminated_length": 498.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 278.0, | |
| "epoch": 0.252, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.460735559463501, | |
| "kl": 0.03187366481870413, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3464, | |
| "num_tokens": 710552.0, | |
| "reward": 0.7753968238830566, | |
| "reward_std": 0.3274153470993042, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 183.0, | |
| "completions/max_terminated_length": 183.0, | |
| "completions/mean_length": 112.0, | |
| "completions/mean_terminated_length": 128.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 77.0, | |
| "epoch": 0.256, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.9710547924041748, | |
| "kl": 0.056045059114694595, | |
| "learning_rate": 1e-06, | |
| "loss": 0.397, | |
| "num_tokens": 730936.0, | |
| "reward": 0.4721861779689789, | |
| "reward_std": 0.31307727098464966, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 282.0, | |
| "completions/max_terminated_length": 282.0, | |
| "completions/mean_length": 181.25, | |
| "completions/mean_terminated_length": 207.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 115.0, | |
| "epoch": 0.26, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5494914054870605, | |
| "kl": 0.17688407003879547, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0636, | |
| "num_tokens": 737640.0, | |
| "reward": 0.4345238208770752, | |
| "reward_std": 0.24914170801639557, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 782.0, | |
| "completions/max_terminated_length": 782.0, | |
| "completions/mean_length": 442.625, | |
| "completions/mean_terminated_length": 505.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 371.0, | |
| "epoch": 0.264, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.2535926103591919, | |
| "kl": 0.027257385663688183, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0455, | |
| "num_tokens": 749424.0, | |
| "reward": 0.4035714268684387, | |
| "reward_std": 0.21609759330749512, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 536.0, | |
| "completions/max_terminated_length": 536.0, | |
| "completions/mean_length": 360.375, | |
| "completions/mean_terminated_length": 411.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 286.0, | |
| "epoch": 0.268, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.2211979627609253, | |
| "kl": 0.03450755029916763, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0173, | |
| "num_tokens": 758368.0, | |
| "reward": 0.26453372836112976, | |
| "reward_std": 0.18241503834724426, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 324.0, | |
| "completions/max_terminated_length": 324.0, | |
| "completions/mean_length": 171.0, | |
| "completions/mean_terminated_length": 195.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 64.0, | |
| "epoch": 0.272, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.1518077850341797, | |
| "kl": 0.7764540687203407, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0543, | |
| "num_tokens": 769808.0, | |
| "reward": 0.20863094925880432, | |
| "reward_std": 0.1800907701253891, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 255.0, | |
| "completions/max_terminated_length": 255.0, | |
| "completions/mean_length": 146.875, | |
| "completions/mean_terminated_length": 167.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 90.0, | |
| "epoch": 0.276, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.4199182987213135, | |
| "kl": 0.03853025659918785, | |
| "learning_rate": 1e-06, | |
| "loss": -0.3424, | |
| "num_tokens": 787960.0, | |
| "reward": 0.29305553436279297, | |
| "reward_std": 0.3426187038421631, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 497.0, | |
| "completions/max_terminated_length": 497.0, | |
| "completions/mean_length": 260.25, | |
| "completions/mean_terminated_length": 297.42857142857144, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 193.0, | |
| "epoch": 0.28, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.95790034532547, | |
| "kl": 0.04087948985397816, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0072, | |
| "num_tokens": 808840.0, | |
| "reward": 0.30420100688934326, | |
| "reward_std": 0.21492989361286163, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 457.0, | |
| "completions/max_terminated_length": 457.0, | |
| "completions/mean_length": 277.125, | |
| "completions/mean_terminated_length": 316.7142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 222.0, | |
| "epoch": 0.284, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.6122504472732544, | |
| "kl": 0.043809447437524796, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0844, | |
| "num_tokens": 820184.0, | |
| "reward": 0.4826388657093048, | |
| "reward_std": 0.40854451060295105, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.625, | |
| "completions/max_length": 130.0, | |
| "completions/max_terminated_length": 130.0, | |
| "completions/mean_length": 31.875, | |
| "completions/mean_terminated_length": 85.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 0.288, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.6429221630096436, | |
| "kl": 0.14530150592327118, | |
| "learning_rate": 1e-06, | |
| "loss": -0.3358, | |
| "num_tokens": 828280.0, | |
| "reward": 0.625, | |
| "reward_std": 0.41547447443008423, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 2028.0, | |
| "completions/max_terminated_length": 2028.0, | |
| "completions/mean_length": 568.375, | |
| "completions/mean_terminated_length": 649.5714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 233.0, | |
| "epoch": 0.292, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.340351402759552, | |
| "kl": 0.04210643842816353, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1705, | |
| "num_tokens": 850536.0, | |
| "reward": 0.255952388048172, | |
| "reward_std": 0.28989601135253906, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 298.0, | |
| "completions/max_terminated_length": 298.0, | |
| "completions/mean_length": 243.5, | |
| "completions/mean_terminated_length": 278.2857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 271.0, | |
| "epoch": 0.296, | |
| "format_failures": 0.0, | |
| "grad_norm": 16.964588165283203, | |
| "kl": 2.3798545002937317, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0303, | |
| "num_tokens": 861552.0, | |
| "reward": 0.5833333730697632, | |
| "reward_std": 0.4629100263118744, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 545.0, | |
| "completions/max_terminated_length": 545.0, | |
| "completions/mean_length": 225.375, | |
| "completions/mean_terminated_length": 257.57142857142856, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 163.0, | |
| "epoch": 0.3, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.23826824128627777, | |
| "kl": 0.033232852816581726, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0132, | |
| "num_tokens": 872312.0, | |
| "reward": 0.20226716995239258, | |
| "reward_std": 0.15315401554107666, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 222.0, | |
| "completions/max_terminated_length": 222.0, | |
| "completions/mean_length": 145.75, | |
| "completions/mean_terminated_length": 166.57142857142858, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 0.304, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.913487434387207, | |
| "kl": 1.3894951939582825, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0165, | |
| "num_tokens": 879880.0, | |
| "reward": 0.17698413133621216, | |
| "reward_std": 0.1964721530675888, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 264.0, | |
| "completions/max_terminated_length": 264.0, | |
| "completions/mean_length": 155.0, | |
| "completions/mean_terminated_length": 177.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 92.0, | |
| "epoch": 0.308, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.5412757396698, | |
| "kl": 1.028398334980011, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0962, | |
| "num_tokens": 887960.0, | |
| "reward": 0.45376986265182495, | |
| "reward_std": 0.3097318112850189, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 536.0, | |
| "completions/max_terminated_length": 536.0, | |
| "completions/mean_length": 286.375, | |
| "completions/mean_terminated_length": 327.2857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 176.0, | |
| "epoch": 0.312, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.6730135679244995, | |
| "kl": 0.0538824163377285, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1157, | |
| "num_tokens": 898928.0, | |
| "reward": 0.20416666567325592, | |
| "reward_std": 0.3781481683254242, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 188.0, | |
| "completions/max_terminated_length": 188.0, | |
| "completions/mean_length": 99.25, | |
| "completions/mean_terminated_length": 158.8, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 48.0, | |
| "epoch": 0.316, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.8478459119796753, | |
| "kl": 0.015719112940132618, | |
| "learning_rate": 1e-06, | |
| "loss": -0.134, | |
| "num_tokens": 908336.0, | |
| "reward": 0.75, | |
| "reward_std": 0.4629100561141968, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 290.0, | |
| "completions/max_terminated_length": 290.0, | |
| "completions/mean_length": 218.0, | |
| "completions/mean_terminated_length": 249.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 111.0, | |
| "epoch": 0.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.647150039672852, | |
| "kl": 1.3871727883815765, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0114, | |
| "num_tokens": 919144.0, | |
| "reward": 0.515625, | |
| "reward_std": 0.5194326043128967, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 61.0, | |
| "completions/max_terminated_length": 61.0, | |
| "completions/mean_length": 44.75, | |
| "completions/mean_terminated_length": 51.142857142857146, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 0.324, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.4413957595825195, | |
| "kl": 1.4963605403900146, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0199, | |
| "num_tokens": 924120.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 158.0, | |
| "completions/max_terminated_length": 158.0, | |
| "completions/mean_length": 114.0, | |
| "completions/mean_terminated_length": 130.28571428571428, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 70.0, | |
| "epoch": 0.328, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.7050689458847046, | |
| "kl": 0.046199409291148186, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0456, | |
| "num_tokens": 930960.0, | |
| "reward": 0.5011904835700989, | |
| "reward_std": 0.24937564134597778, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 518.0, | |
| "completions/max_terminated_length": 518.0, | |
| "completions/mean_length": 449.875, | |
| "completions/mean_terminated_length": 514.1428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 509.0, | |
| "epoch": 0.332, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.26836591958999634, | |
| "kl": 0.006152217974886298, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0312, | |
| "num_tokens": 948424.0, | |
| "reward": 0.7916666865348816, | |
| "reward_std": 0.39591163396835327, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 246.0, | |
| "completions/max_terminated_length": 246.0, | |
| "completions/mean_length": 138.625, | |
| "completions/mean_terminated_length": 158.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 130.0, | |
| "epoch": 0.336, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.0764328241348267, | |
| "kl": 0.07650505751371384, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0964, | |
| "num_tokens": 956768.0, | |
| "reward": 0.3864583373069763, | |
| "reward_std": 0.3207734227180481, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 531.0, | |
| "completions/max_terminated_length": 531.0, | |
| "completions/mean_length": 292.0, | |
| "completions/mean_terminated_length": 333.7142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 178.0, | |
| "epoch": 0.34, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5540055632591248, | |
| "kl": 0.054012734442949295, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1183, | |
| "num_tokens": 966600.0, | |
| "reward": 0.34756946563720703, | |
| "reward_std": 0.300673246383667, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 153.0, | |
| "completions/max_terminated_length": 153.0, | |
| "completions/mean_length": 126.0, | |
| "completions/mean_terminated_length": 144.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 104.0, | |
| "epoch": 0.344, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.176490306854248, | |
| "kl": 0.14486993476748466, | |
| "learning_rate": 1e-06, | |
| "loss": 0.044, | |
| "num_tokens": 974040.0, | |
| "reward": 0.6666666269302368, | |
| "reward_std": 0.4714045226573944, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 163.0, | |
| "completions/max_terminated_length": 163.0, | |
| "completions/mean_length": 139.875, | |
| "completions/mean_terminated_length": 159.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 154.0, | |
| "epoch": 0.348, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.048673391342163, | |
| "kl": 0.05823306553065777, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0611, | |
| "num_tokens": 995888.0, | |
| "reward": 0.625, | |
| "reward_std": 0.5175491571426392, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 281.0, | |
| "completions/max_terminated_length": 281.0, | |
| "completions/mean_length": 101.125, | |
| "completions/mean_terminated_length": 134.83333333333334, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 75.0, | |
| "epoch": 0.352, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.9394124746322632, | |
| "kl": 0.09709636494517326, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3171, | |
| "num_tokens": 1016272.0, | |
| "reward": 0.47559523582458496, | |
| "reward_std": 0.2696917653083801, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 160.0, | |
| "completions/max_terminated_length": 160.0, | |
| "completions/mean_length": 92.375, | |
| "completions/mean_terminated_length": 105.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 64.0, | |
| "epoch": 0.356, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.0850152969360352, | |
| "kl": 0.11065866611897945, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0191, | |
| "num_tokens": 1022584.0, | |
| "reward": 0.027205882593989372, | |
| "reward_std": 0.050595808774232864, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 238.0, | |
| "completions/max_terminated_length": 238.0, | |
| "completions/mean_length": 152.125, | |
| "completions/mean_terminated_length": 173.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 129.0, | |
| "epoch": 0.36, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.7975893020629883, | |
| "kl": 0.4505193531513214, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0489, | |
| "num_tokens": 1028024.0, | |
| "reward": 0.4837797284126282, | |
| "reward_std": 0.3459106385707855, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 198.0, | |
| "completions/max_terminated_length": 198.0, | |
| "completions/mean_length": 122.875, | |
| "completions/mean_terminated_length": 196.6, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 195.0, | |
| "epoch": 0.364, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.371446430683136, | |
| "kl": 0.017493599094450474, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0009, | |
| "num_tokens": 1039176.0, | |
| "reward": 0.7916666865348816, | |
| "reward_std": 0.39591163396835327, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 368.0, | |
| "completions/max_terminated_length": 368.0, | |
| "completions/mean_length": 228.5, | |
| "completions/mean_terminated_length": 261.14285714285717, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 132.0, | |
| "epoch": 0.368, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.6181436777114868, | |
| "kl": 1.322296380996704, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0419, | |
| "num_tokens": 1047784.0, | |
| "reward": 0.2874999940395355, | |
| "reward_std": 0.39957815408706665, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 445.0, | |
| "completions/max_terminated_length": 445.0, | |
| "completions/mean_length": 250.125, | |
| "completions/mean_terminated_length": 285.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 206.0, | |
| "epoch": 0.372, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.4590940773487091, | |
| "kl": 0.03011018969118595, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0477, | |
| "num_tokens": 1058760.0, | |
| "reward": 0.38749998807907104, | |
| "reward_std": 0.3058944642543793, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 64.0, | |
| "completions/max_terminated_length": 64.0, | |
| "completions/mean_length": 55.75, | |
| "completions/mean_terminated_length": 63.714285714285715, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 0.376, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.706254720687866, | |
| "kl": 0.022694013081490993, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4609, | |
| "num_tokens": 1069792.0, | |
| "reward": 0.5052083730697632, | |
| "reward_std": 0.25630685687065125, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 297.0, | |
| "completions/max_terminated_length": 297.0, | |
| "completions/mean_length": 155.75, | |
| "completions/mean_terminated_length": 178.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 101.0, | |
| "epoch": 0.38, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.6162223815917969, | |
| "kl": 0.43194980919361115, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0132, | |
| "num_tokens": 1079864.0, | |
| "reward": 0.21741071343421936, | |
| "reward_std": 0.28225868940353394, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 141.0, | |
| "completions/max_terminated_length": 141.0, | |
| "completions/mean_length": 120.125, | |
| "completions/mean_terminated_length": 137.28571428571428, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 134.0, | |
| "epoch": 0.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 18.852705001831055, | |
| "kl": 4.019676446914673, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0359, | |
| "num_tokens": 1088416.0, | |
| "reward": 0.90625, | |
| "reward_std": 0.1293872892856598, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 81.0, | |
| "completions/max_terminated_length": 81.0, | |
| "completions/mean_length": 65.125, | |
| "completions/mean_terminated_length": 74.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 72.0, | |
| "epoch": 0.388, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.17805831134319305, | |
| "kl": 0.0494217723608017, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0198, | |
| "num_tokens": 1095056.0, | |
| "reward": 0.984375, | |
| "reward_std": 0.04419417306780815, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 63.0, | |
| "completions/max_terminated_length": 63.0, | |
| "completions/mean_length": 34.75, | |
| "completions/mean_terminated_length": 39.714285714285715, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 0.392, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.5279428958892822, | |
| "kl": 0.29206034541130066, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0386, | |
| "num_tokens": 1100752.0, | |
| "reward": 0.0416666679084301, | |
| "reward_std": 0.1178511381149292, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 351.0, | |
| "completions/max_terminated_length": 351.0, | |
| "completions/mean_length": 249.375, | |
| "completions/mean_terminated_length": 285.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 212.0, | |
| "epoch": 0.396, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.56284499168396, | |
| "kl": 0.11262823268771172, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0758, | |
| "num_tokens": 1112056.0, | |
| "reward": 0.5658119916915894, | |
| "reward_std": 0.2206362932920456, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 194.0, | |
| "completions/max_terminated_length": 194.0, | |
| "completions/mean_length": 149.5, | |
| "completions/mean_terminated_length": 170.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 52.0, | |
| "epoch": 0.4, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.1969668865203857, | |
| "kl": 0.0690736249089241, | |
| "learning_rate": 1e-06, | |
| "loss": -0.001, | |
| "num_tokens": 1121104.0, | |
| "reward": 0.75, | |
| "reward_std": 0.4629100561141968, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 1121104, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |