| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.384, | |
| "eval_steps": 500, | |
| "global_step": 102, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 177.0, | |
| "completions/max_terminated_length": 177.0, | |
| "completions/mean_length": 105.5, | |
| "completions/mean_terminated_length": 120.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 16.0, | |
| "epoch": 0.064, | |
| "format_failures": 3.0, | |
| "grad_norm": 2.247725486755371, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0347, | |
| "num_tokens": 6048.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 131.0, | |
| "completions/max_terminated_length": 131.0, | |
| "completions/mean_length": 75.625, | |
| "completions/mean_terminated_length": 86.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 0.128, | |
| "format_failures": 3.0, | |
| "grad_norm": 1.4242777824401855, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1028, | |
| "num_tokens": 13280.0, | |
| "reward": 0.1875, | |
| "reward_std": 0.3720118999481201, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 213.0, | |
| "completions/max_terminated_length": 213.0, | |
| "completions/mean_length": 124.25, | |
| "completions/mean_terminated_length": 142.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 41.0, | |
| "epoch": 0.192, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.020250532776117325, | |
| "kl": 0.0035181287967134267, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 21904.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 249.0, | |
| "completions/max_terminated_length": 249.0, | |
| "completions/mean_length": 73.25, | |
| "completions/mean_terminated_length": 83.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 0.256, | |
| "format_failures": 1.0, | |
| "grad_norm": 8.061470031738281, | |
| "kl": 0.034313585492782295, | |
| "learning_rate": 1e-06, | |
| "loss": -0.2682, | |
| "num_tokens": 27552.0, | |
| "reward": 0.27916666865348816, | |
| "reward_std": 0.8364584445953369, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 385.0, | |
| "completions/max_terminated_length": 385.0, | |
| "completions/mean_length": 145.625, | |
| "completions/mean_terminated_length": 166.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.32, | |
| "format_failures": 1.0, | |
| "grad_norm": 1.223435401916504, | |
| "kl": 0.03014595981221646, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1171, | |
| "num_tokens": 43192.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 277.0, | |
| "completions/max_terminated_length": 277.0, | |
| "completions/mean_length": 109.625, | |
| "completions/mean_terminated_length": 125.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 56.0, | |
| "epoch": 0.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.1720724254846573, | |
| "kl": 0.03908220527227968, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 55448.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 126.0, | |
| "completions/max_terminated_length": 126.0, | |
| "completions/mean_length": 74.625, | |
| "completions/mean_terminated_length": 85.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 51.0, | |
| "epoch": 0.448, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.5268336534500122, | |
| "kl": 0.021530768717639148, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0075, | |
| "num_tokens": 62672.0, | |
| "reward": 0.03125, | |
| "reward_std": 0.0883883461356163, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 118.0, | |
| "completions/max_terminated_length": 118.0, | |
| "completions/mean_length": 62.125, | |
| "completions/mean_terminated_length": 71.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.512, | |
| "format_failures": 2.0, | |
| "grad_norm": 2.541877031326294, | |
| "kl": 0.3408850164851174, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1278, | |
| "num_tokens": 70896.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 117.0, | |
| "completions/max_terminated_length": 117.0, | |
| "completions/mean_length": 82.125, | |
| "completions/mean_terminated_length": 93.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 65.0, | |
| "epoch": 0.576, | |
| "format_failures": 2.0, | |
| "grad_norm": 1.876581072807312, | |
| "kl": 0.0260943416506052, | |
| "learning_rate": 1e-06, | |
| "loss": -0.053, | |
| "num_tokens": 78128.0, | |
| "reward": 0.4166666865348816, | |
| "reward_std": 0.49601587653160095, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 106.0, | |
| "completions/max_terminated_length": 106.0, | |
| "completions/mean_length": 59.125, | |
| "completions/mean_terminated_length": 67.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 54.0, | |
| "epoch": 0.64, | |
| "format_failures": 1.0, | |
| "grad_norm": 1.4804662466049194, | |
| "kl": 0.17110479215625674, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0096, | |
| "num_tokens": 83696.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 141.0, | |
| "completions/max_terminated_length": 141.0, | |
| "completions/mean_length": 90.625, | |
| "completions/mean_terminated_length": 103.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.704, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.5350069999694824, | |
| "kl": 0.48000563448294997, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0756, | |
| "num_tokens": 92216.0, | |
| "reward": 0.375, | |
| "reward_std": 0.4154745042324066, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 130.0, | |
| "completions/max_terminated_length": 130.0, | |
| "completions/mean_length": 66.125, | |
| "completions/mean_terminated_length": 75.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 55.0, | |
| "epoch": 0.768, | |
| "format_failures": 0.0, | |
| "grad_norm": 7.105235576629639, | |
| "kl": 0.25097161275334656, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1211, | |
| "num_tokens": 101288.0, | |
| "reward": 0.25, | |
| "reward_std": 0.38832157850265503, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 76.0, | |
| "completions/max_terminated_length": 76.0, | |
| "completions/mean_length": 41.375, | |
| "completions/mean_terminated_length": 47.285714285714285, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 38.0, | |
| "epoch": 0.832, | |
| "format_failures": 1.0, | |
| "grad_norm": 8.552057266235352, | |
| "kl": 0.887442918960005, | |
| "learning_rate": 1e-06, | |
| "loss": 0.6279, | |
| "num_tokens": 108296.0, | |
| "reward": 0.625, | |
| "reward_std": 0.4520675837993622, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 508.0, | |
| "completions/max_terminated_length": 508.0, | |
| "completions/mean_length": 169.125, | |
| "completions/mean_terminated_length": 193.28571428571428, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 0.896, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.0173394680023193, | |
| "kl": 0.7231281753629446, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0056, | |
| "num_tokens": 123336.0, | |
| "reward": 0.0535714291036129, | |
| "reward_std": 0.15152288973331451, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 161.0, | |
| "completions/max_terminated_length": 161.0, | |
| "completions/mean_length": 89.375, | |
| "completions/mean_terminated_length": 102.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 47.0, | |
| "epoch": 0.96, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.813839912414551, | |
| "kl": 1.1184300668537617, | |
| "learning_rate": 1e-06, | |
| "loss": -0.231, | |
| "num_tokens": 136000.0, | |
| "reward": -0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 206.0, | |
| "completions/max_terminated_length": 206.0, | |
| "completions/mean_length": 131.875, | |
| "completions/mean_terminated_length": 150.71428571428572, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 1.0, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.23264528810977936, | |
| "kl": 0.09705191291868687, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 146704.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 702.0, | |
| "completions/max_terminated_length": 702.0, | |
| "completions/mean_length": 300.875, | |
| "completions/mean_terminated_length": 343.85714285714283, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 163.0, | |
| "epoch": 1.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.7797789573669434, | |
| "kl": 0.031833621207624674, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0936, | |
| "num_tokens": 161184.0, | |
| "reward": 0.32083332538604736, | |
| "reward_std": 0.4521333873271942, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.000908265239559114, | |
| "clip_ratio/high_mean": 0.000908265239559114, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.000908265239559114, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 73.0, | |
| "completions/max_terminated_length": 73.0, | |
| "completions/mean_length": 51.75, | |
| "completions/mean_terminated_length": 59.142857142857146, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 1.1280000000000001, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.9153517484664917, | |
| "kl": 0.045906367246061563, | |
| "learning_rate": 1e-06, | |
| "loss": 0.2096, | |
| "num_tokens": 165496.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 354.0, | |
| "completions/max_terminated_length": 354.0, | |
| "completions/mean_length": 242.375, | |
| "completions/mean_terminated_length": 277.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 136.0, | |
| "epoch": 1.192, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.8472970724105835, | |
| "kl": 0.020359830697998405, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0893, | |
| "num_tokens": 174512.0, | |
| "reward": 0.5625, | |
| "reward_std": 0.4955156147480011, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00041345093632116914, | |
| "clip_ratio/high_mean": 0.00041345093632116914, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00041345093632116914, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 115.0, | |
| "completions/max_terminated_length": 115.0, | |
| "completions/mean_length": 80.0, | |
| "completions/mean_terminated_length": 91.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 34.0, | |
| "epoch": 1.256, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.18609619140625, | |
| "kl": 0.037674687220714986, | |
| "learning_rate": 1e-06, | |
| "loss": 0.5542, | |
| "num_tokens": 182664.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0023937864461913705, | |
| "clip_ratio/low_min": 0.0023937864461913705, | |
| "clip_ratio/region_mean": 0.0023937864461913705, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 189.0, | |
| "completions/max_terminated_length": 189.0, | |
| "completions/mean_length": 90.375, | |
| "completions/mean_terminated_length": 103.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 1.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.047491550445557, | |
| "kl": 0.262689758092165, | |
| "learning_rate": 1e-06, | |
| "loss": -0.5506, | |
| "num_tokens": 189752.0, | |
| "reward": 0.03125, | |
| "reward_std": 0.0883883461356163, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.00015489467477891594, | |
| "clip_ratio/low_min": 0.00015489467477891594, | |
| "clip_ratio/region_mean": 0.00015489467477891594, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 220.0, | |
| "completions/max_terminated_length": 220.0, | |
| "completions/mean_length": 110.25, | |
| "completions/mean_terminated_length": 126.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 90.0, | |
| "epoch": 1.384, | |
| "format_failures": 1.0, | |
| "grad_norm": 1.870309591293335, | |
| "kl": 0.15177738456986845, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0972, | |
| "num_tokens": 197664.0, | |
| "reward": 0.36250001192092896, | |
| "reward_std": 0.4405759274959564, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0007937598857097328, | |
| "clip_ratio/high_mean": 0.0007937598857097328, | |
| "clip_ratio/low_mean": 0.00033377838553860784, | |
| "clip_ratio/low_min": 0.00033377838553860784, | |
| "clip_ratio/region_mean": 0.0011275382712483406, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 552.0, | |
| "completions/max_terminated_length": 552.0, | |
| "completions/mean_length": 174.75, | |
| "completions/mean_terminated_length": 199.71428571428572, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 82.0, | |
| "epoch": 1.448, | |
| "format_failures": 0.0, | |
| "grad_norm": 7.092167377471924, | |
| "kl": 0.34660289715975523, | |
| "learning_rate": 1e-06, | |
| "loss": 0.8114, | |
| "num_tokens": 210072.0, | |
| "reward": 0.25275737047195435, | |
| "reward_std": 0.3869698941707611, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0025806452613323927, | |
| "clip_ratio/low_min": 0.0025806452613323927, | |
| "clip_ratio/region_mean": 0.0025806452613323927, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 953.0, | |
| "completions/max_terminated_length": 953.0, | |
| "completions/mean_length": 233.125, | |
| "completions/mean_terminated_length": 266.42857142857144, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 61.0, | |
| "epoch": 1.512, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.143402099609375, | |
| "kl": 0.3320934564108029, | |
| "learning_rate": 1e-06, | |
| "loss": -0.9434, | |
| "num_tokens": 224336.0, | |
| "reward": 0.5197916626930237, | |
| "reward_std": 0.43734264373779297, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0009831460192799568, | |
| "clip_ratio/high_mean": 0.0009831460192799568, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0009831460192799568, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 169.0, | |
| "completions/max_terminated_length": 169.0, | |
| "completions/mean_length": 75.0, | |
| "completions/mean_terminated_length": 100.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 43.0, | |
| "epoch": 1.576, | |
| "format_failures": 0.0, | |
| "grad_norm": 12.648838996887207, | |
| "kl": 0.08752637438010424, | |
| "learning_rate": 1e-06, | |
| "loss": -1.3126, | |
| "num_tokens": 237344.0, | |
| "reward": 0.3630952537059784, | |
| "reward_std": 0.3474069833755493, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 85.0, | |
| "completions/max_terminated_length": 85.0, | |
| "completions/mean_length": 60.25, | |
| "completions/mean_terminated_length": 80.33333333333333, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 76.0, | |
| "epoch": 1.6400000000000001, | |
| "format_failures": 0.0, | |
| "grad_norm": 22.935155868530273, | |
| "kl": 0.040498227812349796, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0449, | |
| "num_tokens": 243264.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.00016914748994167894, | |
| "clip_ratio/low_min": 0.00016914748994167894, | |
| "clip_ratio/region_mean": 0.00016914748994167894, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 114.0, | |
| "completions/max_terminated_length": 114.0, | |
| "completions/mean_length": 68.5, | |
| "completions/mean_terminated_length": 78.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 1.704, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.5060811042785645, | |
| "kl": 0.05175229045562446, | |
| "learning_rate": 1e-06, | |
| "loss": -0.231, | |
| "num_tokens": 248064.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 120.0, | |
| "completions/max_terminated_length": 120.0, | |
| "completions/mean_length": 74.375, | |
| "completions/mean_terminated_length": 85.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 14.0, | |
| "epoch": 1.768, | |
| "format_failures": 2.0, | |
| "grad_norm": 5.602163791656494, | |
| "kl": 0.16080649592913687, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4472, | |
| "num_tokens": 255520.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.003766579437069595, | |
| "clip_ratio/low_min": 0.003766579437069595, | |
| "clip_ratio/region_mean": 0.003766579437069595, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 854.0, | |
| "completions/max_terminated_length": 854.0, | |
| "completions/mean_length": 186.5, | |
| "completions/mean_terminated_length": 213.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 26.0, | |
| "epoch": 1.8319999999999999, | |
| "format_failures": 1.0, | |
| "grad_norm": 8.381872177124023, | |
| "kl": 0.047735671047121286, | |
| "learning_rate": 1e-06, | |
| "loss": -1.0193, | |
| "num_tokens": 268512.0, | |
| "reward": 0.109375, | |
| "reward_std": 0.30935922265052795, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 183.0, | |
| "completions/max_terminated_length": 183.0, | |
| "completions/mean_length": 116.75, | |
| "completions/mean_terminated_length": 133.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 1.896, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.668828010559082, | |
| "kl": 0.038008465664461255, | |
| "learning_rate": 1e-06, | |
| "loss": -0.7992, | |
| "num_tokens": 282112.0, | |
| "reward": 0.3333333432674408, | |
| "reward_std": 0.4714045226573944, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 7.35726862330921e-05, | |
| "clip_ratio/low_min": 7.35726862330921e-05, | |
| "clip_ratio/region_mean": 7.35726862330921e-05, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 126.0, | |
| "completions/max_terminated_length": 126.0, | |
| "completions/mean_length": 78.125, | |
| "completions/mean_terminated_length": 89.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 68.0, | |
| "epoch": 1.96, | |
| "format_failures": 1.0, | |
| "grad_norm": 11.598993301391602, | |
| "kl": 0.08647240558639169, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9553, | |
| "num_tokens": 289264.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.002619047649204731, | |
| "clip_ratio/low_min": 0.002619047649204731, | |
| "clip_ratio/region_mean": 0.002619047649204731, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 257.0, | |
| "completions/max_terminated_length": 257.0, | |
| "completions/mean_length": 108.25, | |
| "completions/mean_terminated_length": 123.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 61.0, | |
| "epoch": 2.0, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.694812297821045, | |
| "kl": 0.039789453893899915, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1238, | |
| "num_tokens": 302080.0, | |
| "reward": 0.574999988079071, | |
| "reward_std": 0.41661903262138367, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.004414635943248868, | |
| "clip_ratio/low_min": 0.004414635943248868, | |
| "clip_ratio/region_mean": 0.004414635943248868, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 130.0, | |
| "completions/max_terminated_length": 130.0, | |
| "completions/mean_length": 65.75, | |
| "completions/mean_terminated_length": 75.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 2.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.186154842376709, | |
| "kl": 0.050242609810084105, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3273, | |
| "num_tokens": 308472.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0015756364446133375, | |
| "clip_ratio/high_mean": 0.0015756364446133375, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0015756364446133375, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 355.0, | |
| "completions/max_terminated_length": 355.0, | |
| "completions/mean_length": 152.375, | |
| "completions/mean_terminated_length": 203.16666666666666, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 57.0, | |
| "epoch": 2.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.178646564483643, | |
| "kl": 0.04819304798729718, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0732, | |
| "num_tokens": 321144.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 162.0, | |
| "completions/max_terminated_length": 162.0, | |
| "completions/mean_length": 70.25, | |
| "completions/mean_terminated_length": 80.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 43.0, | |
| "epoch": 2.192, | |
| "format_failures": 0.0, | |
| "grad_norm": 9.679683685302734, | |
| "kl": 0.04483591788448393, | |
| "learning_rate": 1e-06, | |
| "loss": -0.7427, | |
| "num_tokens": 332568.0, | |
| "reward": 0.5416666865348816, | |
| "reward_std": 0.5019802451133728, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0005470459582284093, | |
| "clip_ratio/high_mean": 0.0005470459582284093, | |
| "clip_ratio/low_mean": 0.0024912295630201697, | |
| "clip_ratio/low_min": 0.0024912295630201697, | |
| "clip_ratio/region_mean": 0.003038275521248579, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 123.0, | |
| "completions/max_terminated_length": 123.0, | |
| "completions/mean_length": 61.875, | |
| "completions/mean_terminated_length": 70.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 2.2560000000000002, | |
| "format_failures": 0.0, | |
| "grad_norm": 20.632793426513672, | |
| "kl": 0.06712129758670926, | |
| "learning_rate": 1e-06, | |
| "loss": 2.5061, | |
| "num_tokens": 338352.0, | |
| "reward": 0.2708333432674408, | |
| "reward_std": 0.39778655767440796, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0031341375433839858, | |
| "clip_ratio/low_min": 0.0031341375433839858, | |
| "clip_ratio/region_mean": 0.0031341375433839858, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 201.0, | |
| "completions/max_terminated_length": 201.0, | |
| "completions/mean_length": 97.25, | |
| "completions/mean_terminated_length": 129.66666666666666, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 93.0, | |
| "epoch": 2.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.325549125671387, | |
| "kl": 0.07476615975610912, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0014, | |
| "num_tokens": 351528.0, | |
| "reward": 0.4439394176006317, | |
| "reward_std": 0.215702623128891, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0006686007836833596, | |
| "clip_ratio/high_mean": 0.0006686007836833596, | |
| "clip_ratio/low_mean": 0.004799673450179398, | |
| "clip_ratio/low_min": 0.004799673450179398, | |
| "clip_ratio/region_mean": 0.005468274233862758, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 162.0, | |
| "completions/max_terminated_length": 162.0, | |
| "completions/mean_length": 83.25, | |
| "completions/mean_terminated_length": 95.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 2.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.579444885253906, | |
| "kl": 0.44708020030520856, | |
| "learning_rate": 1e-06, | |
| "loss": 0.7024, | |
| "num_tokens": 357048.0, | |
| "reward": 0.44583332538604736, | |
| "reward_std": 0.4876042604446411, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0014224655460566282, | |
| "clip_ratio/low_min": 0.0014224655460566282, | |
| "clip_ratio/region_mean": 0.0014224655460566282, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 168.0, | |
| "completions/max_terminated_length": 168.0, | |
| "completions/mean_length": 105.5, | |
| "completions/mean_terminated_length": 120.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 81.0, | |
| "epoch": 2.448, | |
| "format_failures": 1.0, | |
| "grad_norm": 8.134405136108398, | |
| "kl": 0.4579888880252838, | |
| "learning_rate": 1e-06, | |
| "loss": -0.7953, | |
| "num_tokens": 370592.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 152.0, | |
| "completions/max_terminated_length": 152.0, | |
| "completions/mean_length": 101.0, | |
| "completions/mean_terminated_length": 115.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 58.0, | |
| "epoch": 2.512, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.998343467712402, | |
| "kl": 0.059122598730027676, | |
| "learning_rate": 1e-06, | |
| "loss": -0.4243, | |
| "num_tokens": 387824.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.002086994703859091, | |
| "clip_ratio/high_mean": 0.002086994703859091, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.002086994703859091, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 249.0, | |
| "completions/max_terminated_length": 249.0, | |
| "completions/mean_length": 98.5, | |
| "completions/mean_terminated_length": 112.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 65.0, | |
| "epoch": 2.576, | |
| "format_failures": 0.0, | |
| "grad_norm": 72.67411804199219, | |
| "kl": 0.05187072162516415, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3373, | |
| "num_tokens": 403648.0, | |
| "reward": 0.5208333730697632, | |
| "reward_std": 0.39276695251464844, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0003625637182267383, | |
| "clip_ratio/high_mean": 0.0003625637182267383, | |
| "clip_ratio/low_mean": 0.0002896586374845356, | |
| "clip_ratio/low_min": 0.0002896586374845356, | |
| "clip_ratio/region_mean": 0.0006522223557112738, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 105.0, | |
| "completions/max_terminated_length": 105.0, | |
| "completions/mean_length": 73.25, | |
| "completions/mean_terminated_length": 83.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 61.0, | |
| "epoch": 2.64, | |
| "format_failures": 0.0, | |
| "grad_norm": 14.554330825805664, | |
| "kl": 0.15414534136652946, | |
| "learning_rate": 1e-06, | |
| "loss": 2.2367, | |
| "num_tokens": 409704.0, | |
| "reward": 0.4583333432674408, | |
| "reward_std": 0.501980185508728, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0007432290876749903, | |
| "clip_ratio/low_min": 0.0007432290876749903, | |
| "clip_ratio/region_mean": 0.0007432290876749903, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 174.0, | |
| "completions/max_terminated_length": 174.0, | |
| "completions/mean_length": 109.375, | |
| "completions/mean_terminated_length": 125.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 29.0, | |
| "epoch": 2.7039999999999997, | |
| "format_failures": 0.0, | |
| "grad_norm": 53.890411376953125, | |
| "kl": 1.3919735243543983, | |
| "learning_rate": 1e-06, | |
| "loss": -0.6229, | |
| "num_tokens": 417312.0, | |
| "reward": 0.3125, | |
| "reward_std": 0.2912411689758301, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00039795115299057215, | |
| "clip_ratio/high_mean": 0.00039795115299057215, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00039795115299057215, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 50.0, | |
| "completions/max_terminated_length": 50.0, | |
| "completions/mean_length": 36.375, | |
| "completions/mean_terminated_length": 48.5, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 45.0, | |
| "epoch": 2.768, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.3910932540893555, | |
| "kl": 0.1744281006976962, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1856, | |
| "num_tokens": 422800.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 68.0, | |
| "completions/max_terminated_length": 68.0, | |
| "completions/mean_length": 49.0, | |
| "completions/mean_terminated_length": 56.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 2.832, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.309182733297348, | |
| "kl": 0.09828702360391617, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0019, | |
| "num_tokens": 431080.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 95.0, | |
| "completions/max_terminated_length": 95.0, | |
| "completions/mean_length": 49.625, | |
| "completions/mean_terminated_length": 56.714285714285715, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 34.0, | |
| "epoch": 2.896, | |
| "format_failures": 1.0, | |
| "grad_norm": 16.60660743713379, | |
| "kl": 0.11247169971466064, | |
| "learning_rate": 1e-06, | |
| "loss": -1.7005, | |
| "num_tokens": 439296.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 134.0, | |
| "completions/max_terminated_length": 134.0, | |
| "completions/mean_length": 68.75, | |
| "completions/mean_terminated_length": 78.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 46.0, | |
| "epoch": 2.96, | |
| "format_failures": 0.0, | |
| "grad_norm": 31.673078536987305, | |
| "kl": 2.0126035660505295, | |
| "learning_rate": 1e-06, | |
| "loss": 0.8165, | |
| "num_tokens": 449224.0, | |
| "reward": 0.6875, | |
| "reward_std": 0.45806270837783813, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0006863656220957637, | |
| "clip_ratio/low_min": 0.0006863656220957637, | |
| "clip_ratio/region_mean": 0.0006863656220957637, | |
| "epoch": 3.0, | |
| "grad_norm": 6.059280872344971, | |
| "kl": 0.1403810739517212, | |
| "learning_rate": 1e-06, | |
| "loss": 0.5743, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.00021477663540281355, | |
| "clip_ratio/low_min": 0.00021477663540281355, | |
| "clip_ratio/region_mean": 0.00021477663540281355, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 154.0, | |
| "completions/max_terminated_length": 154.0, | |
| "completions/mean_length": 80.25, | |
| "completions/mean_terminated_length": 91.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 3.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 24.96416664123535, | |
| "kl": 4.326897906605154, | |
| "learning_rate": 1e-06, | |
| "loss": 0.25, | |
| "num_tokens": 458280.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 158.0, | |
| "completions/max_terminated_length": 158.0, | |
| "completions/mean_length": 73.625, | |
| "completions/mean_terminated_length": 84.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 3.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.976156711578369, | |
| "kl": 0.1405428946018219, | |
| "learning_rate": 1e-06, | |
| "loss": -0.6803, | |
| "num_tokens": 465592.0, | |
| "reward": 0.3125, | |
| "reward_std": 0.38253021240234375, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 52.0, | |
| "completions/max_terminated_length": 52.0, | |
| "completions/mean_length": 40.375, | |
| "completions/mean_terminated_length": 46.142857142857146, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 3.192, | |
| "format_failures": 0.0, | |
| "grad_norm": 767.0111694335938, | |
| "kl": 30.081211734563112, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7347, | |
| "num_tokens": 470880.0, | |
| "reward": 0.5833333730697632, | |
| "reward_std": 0.49601587653160095, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00039308174746111035, | |
| "clip_ratio/high_mean": 0.00039308174746111035, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00039308174746111035, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 108.0, | |
| "completions/max_terminated_length": 108.0, | |
| "completions/mean_length": 56.25, | |
| "completions/mean_terminated_length": 64.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 25.0, | |
| "epoch": 3.2560000000000002, | |
| "format_failures": 0.0, | |
| "grad_norm": 10.541399955749512, | |
| "kl": 0.2744437651708722, | |
| "learning_rate": 1e-06, | |
| "loss": -1.0422, | |
| "num_tokens": 479136.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 85.0, | |
| "completions/max_terminated_length": 85.0, | |
| "completions/mean_length": 61.75, | |
| "completions/mean_terminated_length": 70.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 60.0, | |
| "epoch": 3.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.463606595993042, | |
| "kl": 0.10342029482126236, | |
| "learning_rate": 1e-06, | |
| "loss": 0.4301, | |
| "num_tokens": 486000.0, | |
| "reward": 0.875, | |
| "reward_std": 0.3535533845424652, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00014585764438379556, | |
| "clip_ratio/high_mean": 0.00014585764438379556, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00014585764438379556, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 287.0, | |
| "completions/max_terminated_length": 287.0, | |
| "completions/mean_length": 177.625, | |
| "completions/mean_terminated_length": 203.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 71.0, | |
| "epoch": 3.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.68437123298645, | |
| "kl": 0.10048098210245371, | |
| "learning_rate": 1e-06, | |
| "loss": -0.7393, | |
| "num_tokens": 497136.0, | |
| "reward": 0.5583333373069763, | |
| "reward_std": 0.4766783118247986, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 101.0, | |
| "completions/max_terminated_length": 101.0, | |
| "completions/mean_length": 72.25, | |
| "completions/mean_terminated_length": 82.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 66.0, | |
| "epoch": 3.448, | |
| "format_failures": 0.0, | |
| "grad_norm": 7.700087070465088, | |
| "kl": 0.17961894627660513, | |
| "learning_rate": 1e-06, | |
| "loss": 1.025, | |
| "num_tokens": 502688.0, | |
| "reward": 0.44999998807907104, | |
| "reward_std": 0.4985693693161011, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00020938023226335645, | |
| "clip_ratio/high_mean": 0.00020938023226335645, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00020938023226335645, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 181.0, | |
| "completions/max_terminated_length": 181.0, | |
| "completions/mean_length": 114.125, | |
| "completions/mean_terminated_length": 130.42857142857142, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 69.0, | |
| "epoch": 3.512, | |
| "format_failures": 1.0, | |
| "grad_norm": 6.477407455444336, | |
| "kl": 0.18405211344361305, | |
| "learning_rate": 1e-06, | |
| "loss": -0.815, | |
| "num_tokens": 509816.0, | |
| "reward": 0.28125, | |
| "reward_std": 0.33905068039894104, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 217.0, | |
| "completions/max_terminated_length": 217.0, | |
| "completions/mean_length": 144.375, | |
| "completions/mean_terminated_length": 165.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 3.576, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.1489181518554688, | |
| "kl": 0.18948577530682087, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1775, | |
| "num_tokens": 518072.0, | |
| "reward": 0.6666666865348816, | |
| "reward_std": 0.4364357590675354, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 246.0, | |
| "completions/max_terminated_length": 246.0, | |
| "completions/mean_length": 125.125, | |
| "completions/mean_terminated_length": 143.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 89.0, | |
| "epoch": 3.64, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.2657573223114014, | |
| "kl": 0.1387784667313099, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0604, | |
| "num_tokens": 531728.0, | |
| "reward": 0.25, | |
| "reward_std": 0.4629100561141968, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.00014551804633811116, | |
| "clip_ratio/low_min": 0.00014551804633811116, | |
| "clip_ratio/region_mean": 0.00014551804633811116, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 83.0, | |
| "completions/max_terminated_length": 83.0, | |
| "completions/mean_length": 56.5, | |
| "completions/mean_terminated_length": 64.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 52.0, | |
| "epoch": 3.7039999999999997, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.2649238109588623, | |
| "kl": 0.28891171142458916, | |
| "learning_rate": 1e-06, | |
| "loss": 0.2216, | |
| "num_tokens": 536768.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 227.0, | |
| "completions/max_terminated_length": 227.0, | |
| "completions/mean_length": 91.5, | |
| "completions/mean_terminated_length": 104.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 33.0, | |
| "epoch": 3.768, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.3132457733154297, | |
| "kl": 0.096153249964118, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3965, | |
| "num_tokens": 546784.0, | |
| "reward": 0.1875, | |
| "reward_std": 0.2587745785713196, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 143.0, | |
| "completions/max_terminated_length": 143.0, | |
| "completions/mean_length": 90.75, | |
| "completions/mean_terminated_length": 103.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 71.0, | |
| "epoch": 3.832, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.948695182800293, | |
| "kl": 0.1259058197028935, | |
| "learning_rate": 1e-06, | |
| "loss": -0.4309, | |
| "num_tokens": 559872.0, | |
| "reward": 0.375, | |
| "reward_std": 0.5175491571426392, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0003612716682255268, | |
| "clip_ratio/low_min": 0.0003612716682255268, | |
| "clip_ratio/region_mean": 0.0003612716682255268, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 131.0, | |
| "completions/max_terminated_length": 131.0, | |
| "completions/mean_length": 62.125, | |
| "completions/mean_terminated_length": 71.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 3.896, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.347101211547852, | |
| "kl": 0.8767695324495435, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0695, | |
| "num_tokens": 565032.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 138.0, | |
| "completions/max_terminated_length": 138.0, | |
| "completions/mean_length": 84.625, | |
| "completions/mean_terminated_length": 96.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 47.0, | |
| "epoch": 3.96, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.449214935302734, | |
| "kl": 0.26848094910383224, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0353, | |
| "num_tokens": 572664.0, | |
| "reward": 0.4464285671710968, | |
| "reward_std": 0.49744242429733276, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 643.0, | |
| "completions/max_terminated_length": 643.0, | |
| "completions/mean_length": 136.75, | |
| "completions/mean_terminated_length": 156.28571428571428, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 54.0, | |
| "epoch": 4.0, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.11106014251709, | |
| "kl": 0.17550407350063324, | |
| "learning_rate": 1e-06, | |
| "loss": 0.7516, | |
| "num_tokens": 586696.0, | |
| "reward": 0.6588234901428223, | |
| "reward_std": 0.44546374678611755, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 80.0, | |
| "completions/max_terminated_length": 80.0, | |
| "completions/mean_length": 40.125, | |
| "completions/mean_terminated_length": 45.857142857142854, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 26.0, | |
| "epoch": 4.064, | |
| "format_failures": 1.0, | |
| "grad_norm": 9.72535514831543, | |
| "kl": 0.20796778332442045, | |
| "learning_rate": 1e-06, | |
| "loss": 0.8956, | |
| "num_tokens": 591600.0, | |
| "reward": 0.30000001192092896, | |
| "reward_std": 0.4535573422908783, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0013570611481554806, | |
| "clip_ratio/high_mean": 0.0013570611481554806, | |
| "clip_ratio/low_mean": 0.012927594594657421, | |
| "clip_ratio/low_min": 0.012927594594657421, | |
| "clip_ratio/region_mean": 0.014284655742812902, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 87.0, | |
| "completions/max_terminated_length": 87.0, | |
| "completions/mean_length": 55.25, | |
| "completions/mean_terminated_length": 63.142857142857146, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 48.0, | |
| "epoch": 4.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 15.252068519592285, | |
| "kl": 0.22740534879267216, | |
| "learning_rate": 1e-06, | |
| "loss": 0.8349, | |
| "num_tokens": 596880.0, | |
| "reward": 0.8125, | |
| "reward_std": 0.3720118999481201, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 110.0, | |
| "completions/max_terminated_length": 110.0, | |
| "completions/mean_length": 55.0, | |
| "completions/mean_terminated_length": 88.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 58.0, | |
| "epoch": 4.192, | |
| "format_failures": 1.0, | |
| "grad_norm": 16.80088233947754, | |
| "kl": 0.31182049214839935, | |
| "learning_rate": 1e-06, | |
| "loss": 0.693, | |
| "num_tokens": 603344.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.004188144113868475, | |
| "clip_ratio/low_min": 0.004188144113868475, | |
| "clip_ratio/region_mean": 0.004188144113868475, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 161.0, | |
| "completions/max_terminated_length": 161.0, | |
| "completions/mean_length": 100.25, | |
| "completions/mean_terminated_length": 114.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 84.0, | |
| "epoch": 4.256, | |
| "format_failures": 0.0, | |
| "grad_norm": 13.643096923828125, | |
| "kl": 0.11746187414973974, | |
| "learning_rate": 1e-06, | |
| "loss": -1.335, | |
| "num_tokens": 610184.0, | |
| "reward": 0.5208333730697632, | |
| "reward_std": 0.46664538979530334, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00018115942657459527, | |
| "clip_ratio/high_mean": 0.00018115942657459527, | |
| "clip_ratio/low_mean": 0.00037650601007044315, | |
| "clip_ratio/low_min": 0.00037650601007044315, | |
| "clip_ratio/region_mean": 0.0005576654366450384, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 199.0, | |
| "completions/max_terminated_length": 199.0, | |
| "completions/mean_length": 72.875, | |
| "completions/mean_terminated_length": 83.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 46.0, | |
| "epoch": 4.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.553096771240234, | |
| "kl": 0.19322836678475142, | |
| "learning_rate": 1e-06, | |
| "loss": -0.6568, | |
| "num_tokens": 619440.0, | |
| "reward": 0.2916666865348816, | |
| "reward_std": 0.4520675837993622, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 87.0, | |
| "completions/max_terminated_length": 87.0, | |
| "completions/mean_length": 61.375, | |
| "completions/mean_terminated_length": 70.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 17.0, | |
| "epoch": 4.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 11.096977233886719, | |
| "kl": 0.205445297062397, | |
| "learning_rate": 1e-06, | |
| "loss": -0.9614, | |
| "num_tokens": 626024.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0001707650226308033, | |
| "clip_ratio/high_mean": 0.0001707650226308033, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0001707650226308033, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 113.0, | |
| "completions/max_terminated_length": 113.0, | |
| "completions/mean_length": 59.25, | |
| "completions/mean_terminated_length": 67.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 30.0, | |
| "epoch": 4.448, | |
| "format_failures": 0.0, | |
| "grad_norm": 13.960062026977539, | |
| "kl": 0.2419998161494732, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6815, | |
| "num_tokens": 631704.0, | |
| "reward": 0.30000001192092896, | |
| "reward_std": 0.4535573720932007, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 236.0, | |
| "completions/max_terminated_length": 236.0, | |
| "completions/mean_length": 119.625, | |
| "completions/mean_terminated_length": 136.71428571428572, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 66.0, | |
| "epoch": 4.5120000000000005, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.790798187255859, | |
| "kl": 0.14366307947784662, | |
| "learning_rate": 1e-06, | |
| "loss": -0.622, | |
| "num_tokens": 640736.0, | |
| "reward": 0.6041666269302368, | |
| "reward_std": 0.5034602880477905, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.001402775407768786, | |
| "clip_ratio/high_mean": 0.001402775407768786, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.001402775407768786, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 163.0, | |
| "completions/max_terminated_length": 163.0, | |
| "completions/mean_length": 89.875, | |
| "completions/mean_terminated_length": 102.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 4.576, | |
| "format_failures": 0.0, | |
| "grad_norm": 24.572803497314453, | |
| "kl": 2.3969106171280146, | |
| "learning_rate": 1e-06, | |
| "loss": 0.7312, | |
| "num_tokens": 649920.0, | |
| "reward": 0.0416666679084301, | |
| "reward_std": 0.1178511381149292, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0001328374055447057, | |
| "clip_ratio/low_min": 0.0001328374055447057, | |
| "clip_ratio/region_mean": 0.0001328374055447057, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 231.0, | |
| "completions/max_terminated_length": 231.0, | |
| "completions/mean_length": 118.0, | |
| "completions/mean_terminated_length": 134.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 101.0, | |
| "epoch": 4.64, | |
| "format_failures": 0.0, | |
| "grad_norm": 11.096585273742676, | |
| "kl": 0.14362134877592325, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6126, | |
| "num_tokens": 660512.0, | |
| "reward": 0.59375, | |
| "reward_std": 0.4988826811313629, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 277.0, | |
| "completions/max_terminated_length": 277.0, | |
| "completions/mean_length": 171.625, | |
| "completions/mean_terminated_length": 196.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 4.704, | |
| "format_failures": 0.0, | |
| "grad_norm": 10.942404747009277, | |
| "kl": 0.09571220818907022, | |
| "learning_rate": 1e-06, | |
| "loss": -1.9221, | |
| "num_tokens": 671728.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 83.0, | |
| "completions/max_terminated_length": 83.0, | |
| "completions/mean_length": 51.25, | |
| "completions/mean_terminated_length": 58.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 33.0, | |
| "epoch": 4.768, | |
| "format_failures": 0.0, | |
| "grad_norm": 17.237686157226562, | |
| "kl": 0.1505587575957179, | |
| "learning_rate": 1e-06, | |
| "loss": -0.4979, | |
| "num_tokens": 678808.0, | |
| "reward": 0.375, | |
| "reward_std": 0.5175491571426392, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0069027612917125225, | |
| "clip_ratio/high_mean": 0.0069027612917125225, | |
| "clip_ratio/low_mean": 0.00044653778604697436, | |
| "clip_ratio/low_min": 0.00044653778604697436, | |
| "clip_ratio/region_mean": 0.007349299077759497, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 95.0, | |
| "completions/max_terminated_length": 95.0, | |
| "completions/mean_length": 63.25, | |
| "completions/mean_terminated_length": 72.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 4.832, | |
| "format_failures": 0.0, | |
| "grad_norm": 48.175540924072266, | |
| "kl": 0.12417041137814522, | |
| "learning_rate": 1e-06, | |
| "loss": -0.3992, | |
| "num_tokens": 685376.0, | |
| "reward": 0.75, | |
| "reward_std": 0.4629100561141968, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 105.0, | |
| "completions/max_terminated_length": 105.0, | |
| "completions/mean_length": 66.625, | |
| "completions/mean_terminated_length": 76.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 40.0, | |
| "epoch": 4.896, | |
| "format_failures": 0.0, | |
| "grad_norm": 21.25414276123047, | |
| "kl": 0.3155105458572507, | |
| "learning_rate": 1e-06, | |
| "loss": 2.8688, | |
| "num_tokens": 691504.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 428.0, | |
| "completions/max_terminated_length": 428.0, | |
| "completions/mean_length": 160.875, | |
| "completions/mean_terminated_length": 183.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 55.0, | |
| "epoch": 4.96, | |
| "format_failures": 0.0, | |
| "grad_norm": 11.223791122436523, | |
| "kl": 0.1855175606906414, | |
| "learning_rate": 1e-06, | |
| "loss": -1.5493, | |
| "num_tokens": 700192.0, | |
| "reward": 0.3015109896659851, | |
| "reward_std": 0.42723536491394043, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 192.0, | |
| "completions/max_terminated_length": 192.0, | |
| "completions/mean_length": 87.875, | |
| "completions/mean_terminated_length": 100.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 5.0, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.051183700561523, | |
| "kl": 0.6679443523287774, | |
| "learning_rate": 1e-06, | |
| "loss": 0.6727, | |
| "num_tokens": 715360.0, | |
| "reward": 0.3965517282485962, | |
| "reward_std": 0.503090500831604, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0012536601134343073, | |
| "clip_ratio/low_min": 0.0012536601134343073, | |
| "clip_ratio/region_mean": 0.0012536601134343073, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 295.0, | |
| "completions/max_terminated_length": 295.0, | |
| "completions/mean_length": 192.25, | |
| "completions/mean_terminated_length": 219.71428571428572, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 51.0, | |
| "epoch": 5.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.032639503479004, | |
| "kl": 0.1344920275732875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.8074, | |
| "num_tokens": 725176.0, | |
| "reward": 0.5416666865348816, | |
| "reward_std": 0.46929529309272766, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 57.0, | |
| "completions/max_terminated_length": 57.0, | |
| "completions/mean_length": 44.625, | |
| "completions/mean_terminated_length": 51.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 34.0, | |
| "epoch": 5.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 9.636035919189453, | |
| "kl": 0.15830809529870749, | |
| "learning_rate": 1e-06, | |
| "loss": 0.9562, | |
| "num_tokens": 729984.0, | |
| "reward": 0.46875, | |
| "reward_std": 0.5077524185180664, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.000637518212897703, | |
| "clip_ratio/low_min": 0.000637518212897703, | |
| "clip_ratio/region_mean": 0.000637518212897703, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 90.0, | |
| "completions/max_terminated_length": 90.0, | |
| "completions/mean_length": 63.625, | |
| "completions/mean_terminated_length": 72.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 38.0, | |
| "epoch": 5.192, | |
| "format_failures": 0.0, | |
| "grad_norm": 7.9917144775390625, | |
| "kl": 0.1429830752313137, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1994, | |
| "num_tokens": 736312.0, | |
| "reward": 0.375, | |
| "reward_std": 0.5175491571426392, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.001965374161954969, | |
| "clip_ratio/low_min": 0.001965374161954969, | |
| "clip_ratio/region_mean": 0.001965374161954969, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 394.0, | |
| "completions/max_terminated_length": 394.0, | |
| "completions/mean_length": 136.0, | |
| "completions/mean_terminated_length": 181.33333333333334, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 5.256, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.276095390319824, | |
| "kl": 0.28229224402457476, | |
| "learning_rate": 1e-06, | |
| "loss": -1.1183, | |
| "num_tokens": 749648.0, | |
| "reward": 0.637499988079071, | |
| "reward_std": 0.4405759274959564, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 76.0, | |
| "completions/max_terminated_length": 76.0, | |
| "completions/mean_length": 56.125, | |
| "completions/mean_terminated_length": 64.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 52.0, | |
| "epoch": 5.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.3836967945098877, | |
| "kl": 0.11320708272978663, | |
| "learning_rate": 1e-06, | |
| "loss": -0.262, | |
| "num_tokens": 755344.0, | |
| "reward": 0.1875, | |
| "reward_std": 0.3720118999481201, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00038880249485373497, | |
| "clip_ratio/high_mean": 0.00038880249485373497, | |
| "clip_ratio/low_mean": 0.0007896393508417532, | |
| "clip_ratio/low_min": 0.0007896393508417532, | |
| "clip_ratio/region_mean": 0.0011784418456954882, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 192.0, | |
| "completions/max_terminated_length": 192.0, | |
| "completions/mean_length": 85.25, | |
| "completions/mean_terminated_length": 97.42857142857143, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 56.0, | |
| "epoch": 5.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.297000885009766, | |
| "kl": 0.7561929021030664, | |
| "learning_rate": 1e-06, | |
| "loss": 0.5695, | |
| "num_tokens": 762432.0, | |
| "reward": 0.09375, | |
| "reward_std": 0.2651650309562683, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 153.0, | |
| "completions/max_terminated_length": 153.0, | |
| "completions/mean_length": 102.0, | |
| "completions/mean_terminated_length": 116.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 5.448, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.523719310760498, | |
| "kl": 0.19376599509269, | |
| "learning_rate": 1e-06, | |
| "loss": -0.6165, | |
| "num_tokens": 769192.0, | |
| "reward": 0.5833333730697632, | |
| "reward_std": 0.49601587653160095, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.00017908310110215098, | |
| "clip_ratio/low_min": 0.00017908310110215098, | |
| "clip_ratio/region_mean": 0.00017908310110215098, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 109.0, | |
| "completions/max_terminated_length": 109.0, | |
| "completions/mean_length": 81.75, | |
| "completions/mean_terminated_length": 109.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 109.0, | |
| "epoch": 5.5120000000000005, | |
| "format_failures": 0.0, | |
| "grad_norm": 4.026613235473633, | |
| "kl": 0.32431851979345083, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3918, | |
| "num_tokens": 775912.0, | |
| "reward": 0.125, | |
| "reward_std": 0.3535533845424652, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0009421340801054612, | |
| "clip_ratio/low_min": 0.0009421340801054612, | |
| "clip_ratio/region_mean": 0.0009421340801054612, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 87.0, | |
| "completions/max_terminated_length": 87.0, | |
| "completions/mean_length": 41.625, | |
| "completions/mean_terminated_length": 47.57142857142857, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 5.576, | |
| "format_failures": 0.0, | |
| "grad_norm": 9.119647979736328, | |
| "kl": 0.20464181900024414, | |
| "learning_rate": 1e-06, | |
| "loss": -0.8791, | |
| "num_tokens": 781112.0, | |
| "reward": 0.5890151262283325, | |
| "reward_std": 0.4705297350883484, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00911893486045301, | |
| "clip_ratio/high_mean": 0.00911893486045301, | |
| "clip_ratio/low_mean": 0.000877421407494694, | |
| "clip_ratio/low_min": 0.000877421407494694, | |
| "clip_ratio/region_mean": 0.009996356267947704, | |
| "completions/clipped_ratio": 0.375, | |
| "completions/max_length": 586.0, | |
| "completions/max_terminated_length": 586.0, | |
| "completions/mean_length": 143.375, | |
| "completions/mean_terminated_length": 229.4, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 36.0, | |
| "epoch": 5.64, | |
| "format_failures": 0.0, | |
| "grad_norm": 32.418331146240234, | |
| "kl": 0.3150494508445263, | |
| "learning_rate": 1e-06, | |
| "loss": 0.7753, | |
| "num_tokens": 793976.0, | |
| "reward": 0.02777777798473835, | |
| "reward_std": 0.07856742292642593, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 181.0, | |
| "completions/max_terminated_length": 181.0, | |
| "completions/mean_length": 85.5, | |
| "completions/mean_terminated_length": 97.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 14.0, | |
| "epoch": 5.704, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.2142812013626099, | |
| "kl": 0.1678312411531806, | |
| "learning_rate": 1e-06, | |
| "loss": -0.2285, | |
| "num_tokens": 800528.0, | |
| "reward": 0.5806547999382019, | |
| "reward_std": 0.40297815203666687, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.008678364916704595, | |
| "clip_ratio/high_mean": 0.008678364916704595, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.008678364916704595, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 195.0, | |
| "completions/max_terminated_length": 195.0, | |
| "completions/mean_length": 119.875, | |
| "completions/mean_terminated_length": 137.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 5.768, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.557031631469727, | |
| "kl": 0.26262282859534025, | |
| "learning_rate": 1e-06, | |
| "loss": -0.786, | |
| "num_tokens": 811680.0, | |
| "reward": 0.375, | |
| "reward_std": 0.5175491571426392, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0003788308094954118, | |
| "clip_ratio/high_mean": 0.0003788308094954118, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0003788308094954118, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 387.0, | |
| "completions/max_terminated_length": 387.0, | |
| "completions/mean_length": 156.0, | |
| "completions/mean_terminated_length": 178.28571428571428, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 82.0, | |
| "epoch": 5.832, | |
| "format_failures": 0.0, | |
| "grad_norm": 8.220466613769531, | |
| "kl": 0.14401236828416586, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0757, | |
| "num_tokens": 824400.0, | |
| "reward": 0.3035714328289032, | |
| "reward_std": 0.45456862449645996, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00015356265066657215, | |
| "clip_ratio/high_mean": 0.00015356265066657215, | |
| "clip_ratio/low_mean": 0.0011615749244811013, | |
| "clip_ratio/low_min": 0.0011615749244811013, | |
| "clip_ratio/region_mean": 0.0013151375751476735, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 177.0, | |
| "completions/max_terminated_length": 177.0, | |
| "completions/mean_length": 77.625, | |
| "completions/mean_terminated_length": 88.71428571428571, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 58.0, | |
| "epoch": 5.896, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.23447322845459, | |
| "kl": 0.21856553480029106, | |
| "learning_rate": 1e-06, | |
| "loss": -0.4024, | |
| "num_tokens": 838072.0, | |
| "reward": 0.359375, | |
| "reward_std": 0.469790518283844, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.002261076238937676, | |
| "clip_ratio/high_mean": 0.002261076238937676, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.002261076238937676, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 87.0, | |
| "completions/max_terminated_length": 87.0, | |
| "completions/mean_length": 49.625, | |
| "completions/mean_terminated_length": 56.714285714285715, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 5.96, | |
| "format_failures": 0.0, | |
| "grad_norm": 16.173349380493164, | |
| "kl": 0.41087135300040245, | |
| "learning_rate": 1e-06, | |
| "loss": 0.8071, | |
| "num_tokens": 846120.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "epoch": 6.0, | |
| "grad_norm": 2.644404888153076, | |
| "kl": 0.6906098246574401, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0081, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 131.0, | |
| "completions/max_terminated_length": 131.0, | |
| "completions/mean_length": 100.875, | |
| "completions/mean_terminated_length": 115.28571428571429, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 108.0, | |
| "epoch": 6.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 10.684069633483887, | |
| "kl": 1.1826152130961418, | |
| "learning_rate": 1e-06, | |
| "loss": 0.7069, | |
| "num_tokens": 854688.0, | |
| "reward": 0.75, | |
| "reward_std": 0.4629100561141968, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 141.0, | |
| "completions/max_terminated_length": 141.0, | |
| "completions/mean_length": 89.375, | |
| "completions/mean_terminated_length": 102.14285714285714, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 6.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 6.4194746017456055, | |
| "kl": 0.12883292511105537, | |
| "learning_rate": 1e-06, | |
| "loss": -1.1247, | |
| "num_tokens": 863240.0, | |
| "reward": 0.5625, | |
| "reward_std": 0.3720118999481201, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 72.0, | |
| "completions/max_terminated_length": 72.0, | |
| "completions/mean_length": 36.0, | |
| "completions/mean_terminated_length": 72.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 72.0, | |
| "epoch": 6.192, | |
| "format_failures": 0.0, | |
| "grad_norm": 7.2926411628723145, | |
| "kl": 0.1561364121735096, | |
| "learning_rate": 1e-06, | |
| "loss": 1.0999, | |
| "num_tokens": 869160.0, | |
| "reward": 0.875, | |
| "reward_std": 0.3535533845424652, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 48.0, | |
| "completions/max_terminated_length": 48.0, | |
| "completions/mean_length": 11.25, | |
| "completions/mean_terminated_length": 45.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 6.256, | |
| "format_failures": 0.0, | |
| "grad_norm": 68.3973159790039, | |
| "kl": 0.22240112535655499, | |
| "learning_rate": 1e-06, | |
| "loss": 6.8633, | |
| "num_tokens": 874632.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5345224738121033, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 90.0, | |
| "completions/max_terminated_length": 90.0, | |
| "completions/mean_length": 59.625, | |
| "completions/mean_terminated_length": 79.5, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 75.0, | |
| "epoch": 6.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 61.82405090332031, | |
| "kl": 0.04734344594180584, | |
| "learning_rate": 1e-06, | |
| "loss": -7.4582, | |
| "num_tokens": 881504.0, | |
| "reward": 1.0, | |
| "reward_std": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 154.0, | |
| "completions/max_terminated_length": 154.0, | |
| "completions/mean_length": 127.625, | |
| "completions/mean_terminated_length": 145.85714285714286, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 97.0, | |
| "epoch": 6.384, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.07336875051259995, | |
| "kl": 0.056114144157618284, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 889832.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 102 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 889832, | |
| "num_train_epochs": 63, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |