| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.11048059056897504, | |
| "eval_steps": 500, | |
| "global_step": 1100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 459.2, | |
| "completions/max_terminated_length": 272.7, | |
| "completions/mean_length": 76.24375, | |
| "completions/mean_terminated_length": 64.11458358764648, | |
| "completions/min_length": 16.8, | |
| "completions/min_terminated_length": 16.8, | |
| "epoch": 0.0010043690051725004, | |
| "frac_reward_zero_std": 0.875, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1999999999999998e-08, | |
| "loss": 0.0208, | |
| "num_tokens": 108131.0, | |
| "reward": 1.2312812566757203, | |
| "reward_std": 0.05931956073036417, | |
| "rewards/combined_reward/mean": 1.2312812566757203, | |
| "rewards/combined_reward/std": 0.4361365109682083, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01875, | |
| "completions/max_length": 330.9, | |
| "completions/max_terminated_length": 147.6, | |
| "completions/mean_length": 75.425, | |
| "completions/mean_terminated_length": 61.425418090820315, | |
| "completions/min_length": 13.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.002008738010345001, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.5333333333333335e-08, | |
| "loss": 0.0279, | |
| "num_tokens": 233579.0, | |
| "reward": 1.3428645849227905, | |
| "reward_std": 0.029872814007103444, | |
| "rewards/combined_reward/mean": 1.3428645849227905, | |
| "rewards/combined_reward/std": 0.3860916443169117, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 110.6, | |
| "completions/max_terminated_length": 110.6, | |
| "completions/mean_length": 51.04375, | |
| "completions/mean_terminated_length": 51.04375, | |
| "completions/min_length": 16.7, | |
| "completions/min_terminated_length": 16.7, | |
| "epoch": 0.003013107015517501, | |
| "frac_reward_zero_std": 0.875, | |
| "grad_norm": 3.3646392822265625, | |
| "learning_rate": 3.866666666666666e-08, | |
| "loss": -0.0132, | |
| "num_tokens": 352258.0, | |
| "reward": 1.323312509059906, | |
| "reward_std": 0.05337500050663948, | |
| "rewards/combined_reward/mean": 1.323312509059906, | |
| "rewards/combined_reward/std": 0.39539981335401536, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01875, | |
| "completions/max_length": 307.1, | |
| "completions/max_terminated_length": 211.8, | |
| "completions/mean_length": 95.83125, | |
| "completions/mean_terminated_length": 60.24375, | |
| "completions/min_length": 11.5, | |
| "completions/min_terminated_length": 11.5, | |
| "epoch": 0.004017476020690002, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.2e-08, | |
| "loss": 0.0143, | |
| "num_tokens": 485155.0, | |
| "reward": 1.2628658890724183, | |
| "reward_std": 0.03280075653456151, | |
| "rewards/combined_reward/mean": 1.2628658890724183, | |
| "rewards/combined_reward/std": 0.4110621690750122, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 140.5, | |
| "completions/max_terminated_length": 140.5, | |
| "completions/mean_length": 61.7875, | |
| "completions/mean_terminated_length": 61.7875, | |
| "completions/min_length": 23.7, | |
| "completions/min_terminated_length": 23.7, | |
| "epoch": 0.005021845025862502, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 1.8525996208190918, | |
| "learning_rate": 6.533333333333332e-08, | |
| "loss": 0.0147, | |
| "num_tokens": 607629.0, | |
| "reward": 1.3795833349227906, | |
| "reward_std": 0.00583496168255806, | |
| "rewards/combined_reward/mean": 1.3795833349227906, | |
| "rewards/combined_reward/std": 0.30837071537971494, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 338.5, | |
| "completions/max_terminated_length": 238.4, | |
| "completions/mean_length": 102.60625, | |
| "completions/mean_terminated_length": 91.32791748046876, | |
| "completions/min_length": 21.6, | |
| "completions/min_terminated_length": 21.6, | |
| "epoch": 0.006026214031035002, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 2.992983818054199, | |
| "learning_rate": 7.866666666666666e-08, | |
| "loss": 0.0045, | |
| "num_tokens": 728802.0, | |
| "reward": 1.3164896011352538, | |
| "reward_std": 0.02619450243655592, | |
| "rewards/combined_reward/mean": 1.3164896011352538, | |
| "rewards/combined_reward/std": 0.3474510669708252, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 128.0, | |
| "completions/max_terminated_length": 128.0, | |
| "completions/mean_length": 62.81875, | |
| "completions/mean_terminated_length": 61.769583511352536, | |
| "completions/min_length": 20.2, | |
| "completions/min_terminated_length": 20.2, | |
| "epoch": 0.007030583036207502, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.2e-08, | |
| "loss": 0.0098, | |
| "num_tokens": 836341.0, | |
| "reward": 1.355798614025116, | |
| "reward_std": 0.004375000763684511, | |
| "rewards/combined_reward/mean": 1.355798614025116, | |
| "rewards/combined_reward/std": 0.29267608374357224, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 110.2, | |
| "completions/max_terminated_length": 110.2, | |
| "completions/mean_length": 54.3375, | |
| "completions/mean_terminated_length": 54.3375, | |
| "completions/min_length": 24.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 0.008034952041380003, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 13.820528984069824, | |
| "learning_rate": 1.0533333333333332e-07, | |
| "loss": 0.0119, | |
| "num_tokens": 945703.0, | |
| "reward": 1.4564843893051147, | |
| "reward_std": 0.003906251955777406, | |
| "rewards/combined_reward/mean": 1.4564843893051147, | |
| "rewards/combined_reward/std": 0.1776508768554777, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 201.9, | |
| "completions/max_terminated_length": 201.9, | |
| "completions/mean_length": 70.6125, | |
| "completions/mean_terminated_length": 70.6125, | |
| "completions/min_length": 21.5, | |
| "completions/min_terminated_length": 21.5, | |
| "epoch": 0.009039321046552503, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.1866666666666667e-07, | |
| "loss": 0.0195, | |
| "num_tokens": 1062961.0, | |
| "reward": 1.3238854348659514, | |
| "reward_std": 0.005562501423992216, | |
| "rewards/combined_reward/mean": 1.3238854348659514, | |
| "rewards/combined_reward/std": 0.22054901346564293, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 113.7, | |
| "completions/max_terminated_length": 113.7, | |
| "completions/mean_length": 60.275, | |
| "completions/mean_terminated_length": 60.275, | |
| "completions/min_length": 24.1, | |
| "completions/min_terminated_length": 24.1, | |
| "epoch": 0.010043690051725004, | |
| "frac_reward_zero_std": 0.875, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.32e-07, | |
| "loss": 0.0058, | |
| "num_tokens": 1175365.0, | |
| "reward": 1.4070937514305115, | |
| "reward_std": 0.034517763555049895, | |
| "rewards/combined_reward/mean": 1.4070937514305115, | |
| "rewards/combined_reward/std": 0.26661672741174697, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 158.5, | |
| "completions/max_terminated_length": 158.5, | |
| "completions/mean_length": 65.46875, | |
| "completions/mean_terminated_length": 64.41750030517578, | |
| "completions/min_length": 19.1, | |
| "completions/min_terminated_length": 19.1, | |
| "epoch": 0.011048059056897505, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4533333333333334e-07, | |
| "loss": 0.0019, | |
| "num_tokens": 1288772.0, | |
| "reward": 1.2793750286102294, | |
| "reward_std": 0.0024999996647238733, | |
| "rewards/combined_reward/mean": 1.2793750286102294, | |
| "rewards/combined_reward/std": 0.31086390763521193, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 322.2, | |
| "completions/max_terminated_length": 134.1, | |
| "completions/mean_length": 77.01875, | |
| "completions/mean_terminated_length": 64.77458343505859, | |
| "completions/min_length": 20.7, | |
| "completions/min_terminated_length": 20.7, | |
| "epoch": 0.012052428062070004, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 8.276171684265137, | |
| "learning_rate": 1.5866666666666666e-07, | |
| "loss": 0.0134, | |
| "num_tokens": 1403035.0, | |
| "reward": 1.3504362106323242, | |
| "reward_std": 0.030459362699184568, | |
| "rewards/combined_reward/mean": 1.3504362106323242, | |
| "rewards/combined_reward/std": 0.309928272664547, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 102.1, | |
| "completions/max_terminated_length": 102.1, | |
| "completions/mean_length": 61.0625, | |
| "completions/mean_terminated_length": 61.0625, | |
| "completions/min_length": 31.5, | |
| "completions/min_terminated_length": 31.5, | |
| "epoch": 0.013056797067242505, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7199999999999998e-07, | |
| "loss": -0.0027, | |
| "num_tokens": 1524697.0, | |
| "reward": 1.361527794599533, | |
| "reward_std": 0.008749999664723873, | |
| "rewards/combined_reward/mean": 1.361527794599533, | |
| "rewards/combined_reward/std": 0.2736371263861656, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 123.4, | |
| "completions/max_terminated_length": 123.4, | |
| "completions/mean_length": 58.05, | |
| "completions/mean_terminated_length": 58.05, | |
| "completions/min_length": 16.8, | |
| "completions/min_terminated_length": 16.8, | |
| "epoch": 0.014061166072415004, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 3.9411776065826416, | |
| "learning_rate": 1.8533333333333333e-07, | |
| "loss": 0.0062, | |
| "num_tokens": 1622389.0, | |
| "reward": 1.3123229265213012, | |
| "reward_std": 0.03212499991059303, | |
| "rewards/combined_reward/mean": 1.3123229265213012, | |
| "rewards/combined_reward/std": 0.35334871551021935, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 333.1, | |
| "completions/max_terminated_length": 135.6, | |
| "completions/mean_length": 111.125, | |
| "completions/mean_terminated_length": 61.191666793823245, | |
| "completions/min_length": 21.8, | |
| "completions/min_terminated_length": 21.8, | |
| "epoch": 0.015065535077587506, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9866666666666665e-07, | |
| "loss": 0.0039, | |
| "num_tokens": 1734901.0, | |
| "reward": 1.2678720355033875, | |
| "reward_std": 0.0006250014062970877, | |
| "rewards/combined_reward/mean": 1.2678720355033875, | |
| "rewards/combined_reward/std": 0.2531693406403065, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 85.7, | |
| "completions/max_terminated_length": 85.7, | |
| "completions/mean_length": 48.81875, | |
| "completions/mean_terminated_length": 48.81875, | |
| "completions/min_length": 17.9, | |
| "completions/min_terminated_length": 17.9, | |
| "epoch": 0.016069904082760007, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9999507890797406e-07, | |
| "loss": 0.0046, | |
| "num_tokens": 1847536.0, | |
| "reward": 1.345395851135254, | |
| "reward_std": 0.0016666671261191368, | |
| "rewards/combined_reward/mean": 1.345395851135254, | |
| "rewards/combined_reward/std": 0.29257251909002663, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 467.7, | |
| "completions/max_terminated_length": 277.7, | |
| "completions/mean_length": 144.23125, | |
| "completions/mean_terminated_length": 95.81041717529297, | |
| "completions/min_length": 28.9, | |
| "completions/min_terminated_length": 28.9, | |
| "epoch": 0.017074273087932506, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9997806834748455e-07, | |
| "loss": -0.0018, | |
| "num_tokens": 1970837.0, | |
| "reward": 1.3027083039283753, | |
| "reward_std": 0.004424501396715641, | |
| "rewards/combined_reward/mean": 1.3027083039283753, | |
| "rewards/combined_reward/std": 0.4294335596263409, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 99.5, | |
| "completions/max_terminated_length": 99.5, | |
| "completions/mean_length": 50.44375, | |
| "completions/mean_terminated_length": 50.44375, | |
| "completions/min_length": 14.6, | |
| "completions/min_terminated_length": 14.6, | |
| "epoch": 0.018078642093105005, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9994890963073946e-07, | |
| "loss": 0.0059, | |
| "num_tokens": 2088820.0, | |
| "reward": 1.2765364408493043, | |
| "reward_std": 0.00015624959487468005, | |
| "rewards/combined_reward/mean": 1.2765364408493043, | |
| "rewards/combined_reward/std": 0.3481216669082642, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 170.0, | |
| "completions/max_terminated_length": 170.0, | |
| "completions/mean_length": 67.0, | |
| "completions/mean_terminated_length": 67.0, | |
| "completions/min_length": 15.8, | |
| "completions/min_terminated_length": 15.8, | |
| "epoch": 0.019083011098277508, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9990760630076236e-07, | |
| "loss": -0.0197, | |
| "num_tokens": 2217116.0, | |
| "reward": 1.3771250247955322, | |
| "reward_std": 0.001916667865589261, | |
| "rewards/combined_reward/mean": 1.3771250247955322, | |
| "rewards/combined_reward/std": 0.29997652024030685, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 99.2, | |
| "completions/max_terminated_length": 99.2, | |
| "completions/mean_length": 41.91875, | |
| "completions/mean_terminated_length": 41.91875, | |
| "completions/min_length": 12.8, | |
| "completions/min_terminated_length": 12.8, | |
| "epoch": 0.020087380103450007, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 3.989150047302246, | |
| "learning_rate": 1.99854163376247e-07, | |
| "loss": 0.0011, | |
| "num_tokens": 2329863.0, | |
| "reward": 1.1117187559604644, | |
| "reward_std": 0.02916821506805718, | |
| "rewards/combined_reward/mean": 1.1117187559604644, | |
| "rewards/combined_reward/std": 0.37413454949855807, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 408.0, | |
| "completions/max_terminated_length": 220.7, | |
| "completions/mean_length": 133.575, | |
| "completions/mean_terminated_length": 84.2875, | |
| "completions/min_length": 25.0, | |
| "completions/min_terminated_length": 25.0, | |
| "epoch": 0.021091749108622507, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9978858735094754e-07, | |
| "loss": 0.0285, | |
| "num_tokens": 2457743.0, | |
| "reward": 1.3693958520889282, | |
| "reward_std": 0.004563984216656536, | |
| "rewards/combined_reward/mean": 1.3693958520889282, | |
| "rewards/combined_reward/std": 0.33579447590745987, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 115.4, | |
| "completions/max_terminated_length": 115.4, | |
| "completions/mean_length": 60.24375, | |
| "completions/mean_terminated_length": 60.24375, | |
| "completions/min_length": 20.8, | |
| "completions/min_terminated_length": 20.8, | |
| "epoch": 0.02209611811379501, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9971088619288948e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2581282.0, | |
| "reward": 1.284375011920929, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.284375011920929, | |
| "rewards/combined_reward/std": 0.3291483834385872, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 110.9, | |
| "completions/max_terminated_length": 110.9, | |
| "completions/mean_length": 52.08125, | |
| "completions/mean_terminated_length": 51.73625030517578, | |
| "completions/min_length": 15.5, | |
| "completions/min_terminated_length": 15.5, | |
| "epoch": 0.02310048711896751, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.996210693434016e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2716695.0, | |
| "reward": 1.3078229188919068, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3078229188919068, | |
| "rewards/combined_reward/std": 0.3146174341440201, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01875, | |
| "completions/max_length": 316.8, | |
| "completions/max_terminated_length": 296.7, | |
| "completions/mean_length": 106.325, | |
| "completions/mean_terminated_length": 71.55961608886719, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 0.024104856124140008, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9951914771596858e-07, | |
| "loss": 0.0, | |
| "num_tokens": 2820347.0, | |
| "reward": 1.2994583308696748, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2994583308696748, | |
| "rewards/combined_reward/std": 0.35011555850505827, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0125, | |
| "completions/max_length": 105.0, | |
| "completions/max_terminated_length": 105.0, | |
| "completions/mean_length": 58.80625, | |
| "completions/mean_terminated_length": 57.67589340209961, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "epoch": 0.02510922512931251, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9940513369490513e-07, | |
| "loss": 0.0119, | |
| "num_tokens": 2937640.0, | |
| "reward": 1.2942708253860473, | |
| "reward_std": 0.0020473659737035633, | |
| "rewards/combined_reward/mean": 1.2942708253860473, | |
| "rewards/combined_reward/std": 0.34473495446145536, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 136.2, | |
| "completions/max_terminated_length": 136.2, | |
| "completions/mean_length": 68.56875, | |
| "completions/mean_terminated_length": 68.56875, | |
| "completions/min_length": 27.3, | |
| "completions/min_terminated_length": 27.3, | |
| "epoch": 0.02611359413448501, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9927904113385096e-07, | |
| "loss": 0.0134, | |
| "num_tokens": 3051799.0, | |
| "reward": 1.3380468726158141, | |
| "reward_std": 0.00270459558814764, | |
| "rewards/combined_reward/mean": 1.3380468726158141, | |
| "rewards/combined_reward/std": 0.28382683396339414, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 172.2, | |
| "completions/max_terminated_length": 172.2, | |
| "completions/mean_length": 72.875, | |
| "completions/mean_terminated_length": 72.875, | |
| "completions/min_length": 26.4, | |
| "completions/min_terminated_length": 26.4, | |
| "epoch": 0.02711796313965751, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9914088535408765e-07, | |
| "loss": -0.0019, | |
| "num_tokens": 3164803.0, | |
| "reward": 1.4464478969573975, | |
| "reward_std": 0.0021736113354563712, | |
| "rewards/combined_reward/mean": 1.4464478969573975, | |
| "rewards/combined_reward/std": 0.19929498732089995, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 140.5, | |
| "completions/max_terminated_length": 140.5, | |
| "completions/mean_length": 59.38125, | |
| "completions/mean_terminated_length": 59.38125, | |
| "completions/min_length": 15.9, | |
| "completions/min_terminated_length": 15.9, | |
| "epoch": 0.02812233214483001, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9899068314267685e-07, | |
| "loss": 0.001, | |
| "num_tokens": 3280220.0, | |
| "reward": 1.3454687356948853, | |
| "reward_std": 0.004999999329447747, | |
| "rewards/combined_reward/mean": 1.3454687356948853, | |
| "rewards/combined_reward/std": 0.31286893486976625, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 94.7, | |
| "completions/max_terminated_length": 94.7, | |
| "completions/mean_length": 55.0, | |
| "completions/mean_terminated_length": 55.0, | |
| "completions/min_length": 22.0, | |
| "completions/min_terminated_length": 22.0, | |
| "epoch": 0.029126701150002512, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9882845275042067e-07, | |
| "loss": 0.0065, | |
| "num_tokens": 3385228.0, | |
| "reward": 1.4142057299613953, | |
| "reward_std": 0.00044270951766520736, | |
| "rewards/combined_reward/mean": 1.4142057299613953, | |
| "rewards/combined_reward/std": 0.20944447480142117, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 173.3, | |
| "completions/max_terminated_length": 173.3, | |
| "completions/mean_length": 76.13125, | |
| "completions/mean_terminated_length": 76.13125, | |
| "completions/min_length": 23.4, | |
| "completions/min_terminated_length": 23.4, | |
| "epoch": 0.03013107015517501, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9865421388964382e-07, | |
| "loss": -0.0017, | |
| "num_tokens": 3496189.0, | |
| "reward": 1.3910624980926514, | |
| "reward_std": 0.0021650632843375206, | |
| "rewards/combined_reward/mean": 1.3910624980926514, | |
| "rewards/combined_reward/std": 0.28597628474235537, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 315.4, | |
| "completions/max_terminated_length": 315.4, | |
| "completions/mean_length": 99.93125, | |
| "completions/mean_terminated_length": 99.93125, | |
| "completions/min_length": 18.4, | |
| "completions/min_terminated_length": 18.4, | |
| "epoch": 0.03113543916034751, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 3.8047702312469482, | |
| "learning_rate": 1.9846798773179865e-07, | |
| "loss": 0.0118, | |
| "num_tokens": 3602282.0, | |
| "reward": 1.2963680744171142, | |
| "reward_std": 0.01609460562467575, | |
| "rewards/combined_reward/mean": 1.2963680744171142, | |
| "rewards/combined_reward/std": 0.3926819786429405, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 103.8, | |
| "completions/max_terminated_length": 103.8, | |
| "completions/mean_length": 52.2875, | |
| "completions/mean_terminated_length": 52.2875, | |
| "completions/min_length": 20.6, | |
| "completions/min_terminated_length": 20.6, | |
| "epoch": 0.03213980816552001, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9826979690489249e-07, | |
| "loss": 0.0014, | |
| "num_tokens": 3717904.0, | |
| "reward": 1.403697907924652, | |
| "reward_std": 0.0003125001909211278, | |
| "rewards/combined_reward/mean": 1.403697907924652, | |
| "rewards/combined_reward/std": 0.24410614371299744, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 79.8, | |
| "completions/max_terminated_length": 79.8, | |
| "completions/mean_length": 44.49375, | |
| "completions/mean_terminated_length": 44.49375, | |
| "completions/min_length": 16.1, | |
| "completions/min_terminated_length": 16.1, | |
| "epoch": 0.03314417717069251, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9805966549073822e-07, | |
| "loss": 0.0057, | |
| "num_tokens": 3825867.0, | |
| "reward": 1.3135937452316284, | |
| "reward_std": 0.007812501117587089, | |
| "rewards/combined_reward/mean": 1.3135937452316284, | |
| "rewards/combined_reward/std": 0.3756252348423004, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 117.9, | |
| "completions/max_terminated_length": 117.9, | |
| "completions/mean_length": 54.15, | |
| "completions/mean_terminated_length": 54.15, | |
| "completions/min_length": 15.8, | |
| "completions/min_terminated_length": 15.8, | |
| "epoch": 0.03414854617586501, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9783761902202812e-07, | |
| "loss": 0.0067, | |
| "num_tokens": 3942087.0, | |
| "reward": 1.290208351612091, | |
| "reward_std": 0.0010206203907728196, | |
| "rewards/combined_reward/mean": 1.290208351612091, | |
| "rewards/combined_reward/std": 0.27491325289011004, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 89.2, | |
| "completions/max_terminated_length": 89.2, | |
| "completions/mean_length": 45.46875, | |
| "completions/mean_terminated_length": 45.46875, | |
| "completions/min_length": 12.9, | |
| "completions/min_terminated_length": 12.9, | |
| "epoch": 0.03515291518103751, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9760368447923143e-07, | |
| "loss": 0.0, | |
| "num_tokens": 4077218.0, | |
| "reward": 1.271875011920929, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.271875011920929, | |
| "rewards/combined_reward/std": 0.3903637401759624, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 86.1, | |
| "completions/max_terminated_length": 86.1, | |
| "completions/mean_length": 47.9125, | |
| "completions/mean_terminated_length": 47.9125, | |
| "completions/min_length": 19.1, | |
| "completions/min_terminated_length": 19.1, | |
| "epoch": 0.03615728418621001, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 5.847682952880859, | |
| "learning_rate": 1.9735789028731602e-07, | |
| "loss": -0.0023, | |
| "num_tokens": 4189144.0, | |
| "reward": 1.3238541960716248, | |
| "reward_std": 0.03020833432674408, | |
| "rewards/combined_reward/mean": 1.3238541960716248, | |
| "rewards/combined_reward/std": 0.32445888966321945, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 103.3, | |
| "completions/max_terminated_length": 103.3, | |
| "completions/mean_length": 55.5, | |
| "completions/mean_terminated_length": 55.5, | |
| "completions/min_length": 20.6, | |
| "completions/min_terminated_length": 20.6, | |
| "epoch": 0.03716165319138252, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9710026631229448e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 4294100.0, | |
| "reward": 1.3909027934074403, | |
| "reward_std": 0.00034722290001809597, | |
| "rewards/combined_reward/mean": 1.3909027934074403, | |
| "rewards/combined_reward/std": 0.2816110193729401, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 119.4, | |
| "completions/max_terminated_length": 119.4, | |
| "completions/mean_length": 57.65625, | |
| "completions/mean_terminated_length": 57.65625, | |
| "completions/min_length": 19.0, | |
| "completions/min_terminated_length": 19.0, | |
| "epoch": 0.038166022196555016, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9683084385759522e-07, | |
| "loss": -0.0002, | |
| "num_tokens": 4400477.0, | |
| "reward": 1.333958351612091, | |
| "reward_std": 0.0012500007636845113, | |
| "rewards/combined_reward/mean": 1.333958351612091, | |
| "rewards/combined_reward/std": 0.2801030218601227, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 112.6, | |
| "completions/max_terminated_length": 112.6, | |
| "completions/mean_length": 55.225, | |
| "completions/mean_terminated_length": 55.225, | |
| "completions/min_length": 18.2, | |
| "completions/min_terminated_length": 18.2, | |
| "epoch": 0.039170391201727515, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 4.480510234832764, | |
| "learning_rate": 1.9654965566025878e-07, | |
| "loss": 0.006, | |
| "num_tokens": 4516865.0, | |
| "reward": 1.370369803905487, | |
| "reward_std": 0.002187502384185791, | |
| "rewards/combined_reward/mean": 1.370369803905487, | |
| "rewards/combined_reward/std": 0.27093904092907906, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 139.2, | |
| "completions/max_terminated_length": 139.2, | |
| "completions/mean_length": 55.54375, | |
| "completions/mean_terminated_length": 55.54375, | |
| "completions/min_length": 12.2, | |
| "completions/min_terminated_length": 12.2, | |
| "epoch": 0.040174760206900015, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9625673588696007e-07, | |
| "loss": 0.0, | |
| "num_tokens": 4634776.0, | |
| "reward": 1.2619999647140503, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2619999647140503, | |
| "rewards/combined_reward/std": 0.3673270642757416, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 106.5, | |
| "completions/max_terminated_length": 106.5, | |
| "completions/mean_length": 52.2875, | |
| "completions/mean_terminated_length": 52.2875, | |
| "completions/min_length": 13.1, | |
| "completions/min_terminated_length": 13.1, | |
| "epoch": 0.041179129212072514, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 5.624104022979736, | |
| "learning_rate": 1.959521201298568e-07, | |
| "loss": 0.0061, | |
| "num_tokens": 4766894.0, | |
| "reward": 1.3308506846427917, | |
| "reward_std": 0.003342500701546669, | |
| "rewards/combined_reward/mean": 1.3308506846427917, | |
| "rewards/combined_reward/std": 0.37019643262028695, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 144.9, | |
| "completions/max_terminated_length": 144.9, | |
| "completions/mean_length": 63.63125, | |
| "completions/mean_terminated_length": 63.63125, | |
| "completions/min_length": 18.3, | |
| "completions/min_terminated_length": 18.3, | |
| "epoch": 0.042183498217245013, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.956358454022648e-07, | |
| "loss": -0.0011, | |
| "num_tokens": 4887883.0, | |
| "reward": 1.3249478936195374, | |
| "reward_std": 0.016550703253597022, | |
| "rewards/combined_reward/mean": 1.3249478936195374, | |
| "rewards/combined_reward/std": 0.31248683035373687, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 70.8, | |
| "completions/max_terminated_length": 70.8, | |
| "completions/mean_length": 40.03125, | |
| "completions/mean_terminated_length": 40.03125, | |
| "completions/min_length": 21.5, | |
| "completions/min_terminated_length": 21.5, | |
| "epoch": 0.04318786722241751, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9530795013416043e-07, | |
| "loss": -0.0062, | |
| "num_tokens": 5017432.0, | |
| "reward": 1.2040624856948852, | |
| "reward_std": 0.003125, | |
| "rewards/combined_reward/mean": 1.2040624856948852, | |
| "rewards/combined_reward/std": 0.28724531903862954, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0125, | |
| "completions/max_length": 95.9, | |
| "completions/max_terminated_length": 95.9, | |
| "completions/mean_length": 47.64375, | |
| "completions/mean_terminated_length": 46.64416732788086, | |
| "completions/min_length": 14.4, | |
| "completions/min_terminated_length": 14.4, | |
| "epoch": 0.04419223622759002, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9496847416751122e-07, | |
| "loss": -0.0055, | |
| "num_tokens": 5127539.0, | |
| "reward": 1.3247395992279052, | |
| "reward_std": 0.005520834401249885, | |
| "rewards/combined_reward/mean": 1.3247395992279052, | |
| "rewards/combined_reward/std": 0.353334778547287, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 101.6, | |
| "completions/max_terminated_length": 101.6, | |
| "completions/mean_length": 53.95625, | |
| "completions/mean_terminated_length": 53.95625, | |
| "completions/min_length": 21.6, | |
| "completions/min_terminated_length": 21.6, | |
| "epoch": 0.04519660523276252, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9461745875143477e-07, | |
| "loss": -0.0013, | |
| "num_tokens": 5239592.0, | |
| "reward": 1.2362499833106995, | |
| "reward_std": 0.0016666660085320473, | |
| "rewards/combined_reward/mean": 1.2362499833106995, | |
| "rewards/combined_reward/std": 0.33721971064805983, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 156.9, | |
| "completions/max_terminated_length": 156.9, | |
| "completions/mean_length": 73.56875, | |
| "completions/mean_terminated_length": 73.56875, | |
| "completions/min_length": 16.6, | |
| "completions/min_terminated_length": 16.6, | |
| "epoch": 0.04620097423793502, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.942549465371863e-07, | |
| "loss": -0.0051, | |
| "num_tokens": 5360759.0, | |
| "reward": 1.364300584793091, | |
| "reward_std": 0.0033333331346511843, | |
| "rewards/combined_reward/mean": 1.364300584793091, | |
| "rewards/combined_reward/std": 0.29198225438594816, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 89.4, | |
| "completions/max_terminated_length": 89.4, | |
| "completions/mean_length": 49.9, | |
| "completions/mean_terminated_length": 49.9, | |
| "completions/min_length": 14.5, | |
| "completions/min_terminated_length": 14.5, | |
| "epoch": 0.04720534324310752, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.938809815729766e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5489735.0, | |
| "reward": 1.2914583563804627, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2914583563804627, | |
| "rewards/combined_reward/std": 0.32128691375255586, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 116.8, | |
| "completions/max_terminated_length": 116.8, | |
| "completions/mean_length": 54.26875, | |
| "completions/mean_terminated_length": 54.26875, | |
| "completions/min_length": 16.8, | |
| "completions/min_terminated_length": 16.8, | |
| "epoch": 0.048209712248280016, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9349560929861957e-07, | |
| "loss": 0.0036, | |
| "num_tokens": 5618126.0, | |
| "reward": 1.2964062452316285, | |
| "reward_std": 0.0034375011920928953, | |
| "rewards/combined_reward/mean": 1.2964062452316285, | |
| "rewards/combined_reward/std": 0.3410232897847891, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 138.5, | |
| "completions/max_terminated_length": 138.5, | |
| "completions/mean_length": 63.425, | |
| "completions/mean_terminated_length": 63.425, | |
| "completions/min_length": 17.2, | |
| "completions/min_terminated_length": 17.2, | |
| "epoch": 0.049214081253452516, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 5.859716892242432, | |
| "learning_rate": 1.9309887654001093e-07, | |
| "loss": -0.0122, | |
| "num_tokens": 5732858.0, | |
| "reward": 1.3710416555404663, | |
| "reward_std": 0.005000000074505806, | |
| "rewards/combined_reward/mean": 1.3710416555404663, | |
| "rewards/combined_reward/std": 0.2569635409861803, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 71.1, | |
| "completions/max_terminated_length": 71.1, | |
| "completions/mean_length": 37.5125, | |
| "completions/mean_terminated_length": 37.5125, | |
| "completions/min_length": 15.6, | |
| "completions/min_terminated_length": 15.6, | |
| "epoch": 0.05021845025862502, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9269083150343857e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5827508.0, | |
| "reward": 1.2737499952316285, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2737499952316285, | |
| "rewards/combined_reward/std": 0.36351585388183594, | |
| "step": 500 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 92.3, | |
| "completions/max_terminated_length": 92.3, | |
| "completions/mean_length": 49.31875, | |
| "completions/mean_terminated_length": 49.31875, | |
| "completions/min_length": 16.5, | |
| "completions/min_terminated_length": 16.5, | |
| "epoch": 0.05122281926379752, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9227152376972505e-07, | |
| "loss": 0.0, | |
| "num_tokens": 5940043.0, | |
| "reward": 1.3223958492279053, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3223958492279053, | |
| "rewards/combined_reward/std": 0.32680114805698396, | |
| "step": 510 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 112.1, | |
| "completions/max_terminated_length": 112.1, | |
| "completions/mean_length": 60.84375, | |
| "completions/mean_terminated_length": 60.84375, | |
| "completions/min_length": 22.7, | |
| "completions/min_terminated_length": 22.7, | |
| "epoch": 0.05222718826897002, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.91841004288203e-07, | |
| "loss": 0.0, | |
| "num_tokens": 6061038.0, | |
| "reward": 1.3749479293823241, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3749479293823241, | |
| "rewards/combined_reward/std": 0.2760587348602712, | |
| "step": 520 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 383.1, | |
| "completions/max_terminated_length": 211.9, | |
| "completions/mean_length": 101.45, | |
| "completions/mean_terminated_length": 89.37000045776367, | |
| "completions/min_length": 29.4, | |
| "completions/min_terminated_length": 29.4, | |
| "epoch": 0.05323155727414252, | |
| "frac_reward_zero_std": 0.875, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.913993253705246e-07, | |
| "loss": 0.0182, | |
| "num_tokens": 6172502.0, | |
| "reward": 1.3482013940811157, | |
| "reward_std": 0.004686582600697875, | |
| "rewards/combined_reward/mean": 1.3482013940811157, | |
| "rewards/combined_reward/std": 0.26615125834941866, | |
| "step": 530 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 116.0, | |
| "completions/max_terminated_length": 116.0, | |
| "completions/mean_length": 61.33125, | |
| "completions/mean_terminated_length": 61.33125, | |
| "completions/min_length": 26.7, | |
| "completions/min_terminated_length": 26.7, | |
| "epoch": 0.05423592627931502, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 6.519238471984863, | |
| "learning_rate": 1.9094654068430515e-07, | |
| "loss": -0.014, | |
| "num_tokens": 6279539.0, | |
| "reward": 1.456402564048767, | |
| "reward_std": 0.0006212619598954916, | |
| "rewards/combined_reward/mean": 1.456402564048767, | |
| "rewards/combined_reward/std": 0.17502975650131702, | |
| "step": 540 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 319.8, | |
| "completions/max_terminated_length": 302.8, | |
| "completions/mean_length": 102.7, | |
| "completions/mean_terminated_length": 92.22833557128907, | |
| "completions/min_length": 14.5, | |
| "completions/min_terminated_length": 14.5, | |
| "epoch": 0.05524029528448752, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 6.630038738250732, | |
| "learning_rate": 1.9048270524660196e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 6401355.0, | |
| "reward": 1.2464791774749755, | |
| "reward_std": 0.016750000603497028, | |
| "rewards/combined_reward/mean": 1.2464791774749755, | |
| "rewards/combined_reward/std": 0.43877428472042085, | |
| "step": 550 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 108.4, | |
| "completions/max_terminated_length": 108.4, | |
| "completions/mean_length": 57.21875, | |
| "completions/mean_terminated_length": 57.21875, | |
| "completions/min_length": 22.1, | |
| "completions/min_terminated_length": 22.1, | |
| "epoch": 0.05624466428966002, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 4.464468955993652, | |
| "learning_rate": 1.9000787541722936e-07, | |
| "loss": -0.0008, | |
| "num_tokens": 6512806.0, | |
| "reward": 1.3637500047683715, | |
| "reward_std": 0.0056250004563480616, | |
| "rewards/combined_reward/mean": 1.3637500047683715, | |
| "rewards/combined_reward/std": 0.25516389338299633, | |
| "step": 560 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00625, | |
| "completions/max_length": 296.2, | |
| "completions/max_terminated_length": 201.7, | |
| "completions/mean_length": 86.75625, | |
| "completions/mean_terminated_length": 75.22125091552735, | |
| "completions/min_length": 24.9, | |
| "completions/min_terminated_length": 24.9, | |
| "epoch": 0.057249033294832524, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8952210889191065e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 6619515.0, | |
| "reward": 1.3538541674613953, | |
| "reward_std": 0.009270833618938924, | |
| "rewards/combined_reward/mean": 1.3538541674613953, | |
| "rewards/combined_reward/std": 0.35525577939115466, | |
| "step": 570 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 104.1, | |
| "completions/max_terminated_length": 104.1, | |
| "completions/mean_length": 48.9, | |
| "completions/mean_terminated_length": 48.9, | |
| "completions/min_length": 16.2, | |
| "completions/min_terminated_length": 16.2, | |
| "epoch": 0.058253402300005024, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.890254646952674e-07, | |
| "loss": 0.0, | |
| "num_tokens": 6728163.0, | |
| "reward": 1.2268749833106996, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2268749833106996, | |
| "rewards/combined_reward/std": 0.33372554890811446, | |
| "step": 580 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 115.7, | |
| "completions/max_terminated_length": 115.7, | |
| "completions/mean_length": 61.34375, | |
| "completions/mean_terminated_length": 61.34375, | |
| "completions/min_length": 20.3, | |
| "completions/min_terminated_length": 20.3, | |
| "epoch": 0.05925777130517752, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 3.076678991317749, | |
| "learning_rate": 1.885180031736477e-07, | |
| "loss": -0.0013, | |
| "num_tokens": 6845358.0, | |
| "reward": 1.3715885639190675, | |
| "reward_std": 0.0037068985402584076, | |
| "rewards/combined_reward/mean": 1.3715885639190675, | |
| "rewards/combined_reward/std": 0.3188589945435524, | |
| "step": 590 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 122.6, | |
| "completions/max_terminated_length": 122.6, | |
| "completions/mean_length": 55.81875, | |
| "completions/mean_terminated_length": 55.81875, | |
| "completions/min_length": 14.8, | |
| "completions/min_terminated_length": 14.8, | |
| "epoch": 0.06026214031035002, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.879997859877932e-07, | |
| "loss": 0.0032, | |
| "num_tokens": 6971649.0, | |
| "reward": 1.280833327770233, | |
| "reward_std": 0.0006132050417363644, | |
| "rewards/combined_reward/mean": 1.280833327770233, | |
| "rewards/combined_reward/std": 0.338599956035614, | |
| "step": 600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 387.6, | |
| "completions/max_terminated_length": 192.1, | |
| "completions/mean_length": 122.46875, | |
| "completions/mean_terminated_length": 72.42708358764648, | |
| "completions/min_length": 23.3, | |
| "completions/min_terminated_length": 23.3, | |
| "epoch": 0.06126650931552252, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8747087610534734e-07, | |
| "loss": 0.019, | |
| "num_tokens": 7087600.0, | |
| "reward": 1.338072907924652, | |
| "reward_std": 0.013132144883275031, | |
| "rewards/combined_reward/mean": 1.338072907924652, | |
| "rewards/combined_reward/std": 0.30777021273970606, | |
| "step": 610 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 112.7, | |
| "completions/max_terminated_length": 112.7, | |
| "completions/mean_length": 58.44375, | |
| "completions/mean_terminated_length": 58.44375, | |
| "completions/min_length": 23.9, | |
| "completions/min_terminated_length": 23.9, | |
| "epoch": 0.06227087832069502, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8693133779320382e-07, | |
| "loss": -0.0031, | |
| "num_tokens": 7191467.0, | |
| "reward": 1.3348880290985108, | |
| "reward_std": 0.007124999910593033, | |
| "rewards/combined_reward/mean": 1.3348880290985108, | |
| "rewards/combined_reward/std": 0.2751554258167744, | |
| "step": 620 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 194.8, | |
| "completions/max_terminated_length": 194.8, | |
| "completions/mean_length": 84.76875, | |
| "completions/mean_terminated_length": 84.76875, | |
| "completions/min_length": 21.4, | |
| "completions/min_terminated_length": 21.4, | |
| "epoch": 0.06327524732586752, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8638123660969793e-07, | |
| "loss": -0.0084, | |
| "num_tokens": 7304146.0, | |
| "reward": 1.3757467865943909, | |
| "reward_std": 0.0030034731142222883, | |
| "rewards/combined_reward/mean": 1.3757467865943909, | |
| "rewards/combined_reward/std": 0.28882216811180117, | |
| "step": 630 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 101.9, | |
| "completions/max_terminated_length": 101.9, | |
| "completions/mean_length": 56.925, | |
| "completions/mean_terminated_length": 56.925, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 0.06427961633104003, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.858206393966405e-07, | |
| "loss": 0.0, | |
| "num_tokens": 7415006.0, | |
| "reward": 1.3215104341506958, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3215104341506958, | |
| "rewards/combined_reward/std": 0.33309968262910844, | |
| "step": 640 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 106.5, | |
| "completions/max_terminated_length": 106.5, | |
| "completions/mean_length": 58.26875, | |
| "completions/mean_terminated_length": 58.26875, | |
| "completions/min_length": 14.2, | |
| "completions/min_terminated_length": 14.2, | |
| "epoch": 0.06528398533621252, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8524961427119615e-07, | |
| "loss": -0.009, | |
| "num_tokens": 7546381.0, | |
| "reward": 1.3129427313804627, | |
| "reward_std": 0.002951054647564888, | |
| "rewards/combined_reward/mean": 1.3129427313804627, | |
| "rewards/combined_reward/std": 0.3575292468070984, | |
| "step": 650 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 84.0, | |
| "completions/max_terminated_length": 84.0, | |
| "completions/mean_length": 46.75625, | |
| "completions/mean_terminated_length": 46.75625, | |
| "completions/min_length": 15.4, | |
| "completions/min_terminated_length": 15.4, | |
| "epoch": 0.06628835434138503, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.846682306176065e-07, | |
| "loss": 0.0, | |
| "num_tokens": 7668158.0, | |
| "reward": 1.3184374928474427, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3184374928474427, | |
| "rewards/combined_reward/std": 0.35122168958187105, | |
| "step": 660 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 101.7, | |
| "completions/max_terminated_length": 101.7, | |
| "completions/mean_length": 56.3375, | |
| "completions/mean_terminated_length": 56.3375, | |
| "completions/min_length": 17.9, | |
| "completions/min_terminated_length": 17.9, | |
| "epoch": 0.06729272334655753, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8407655907875938e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 7794644.0, | |
| "reward": 1.331454861164093, | |
| "reward_std": 0.007124999910593033, | |
| "rewards/combined_reward/mean": 1.331454861164093, | |
| "rewards/combined_reward/std": 0.3434182394295931, | |
| "step": 670 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 135.7, | |
| "completions/max_terminated_length": 135.7, | |
| "completions/mean_length": 68.90625, | |
| "completions/mean_terminated_length": 68.90625, | |
| "completions/min_length": 16.1, | |
| "completions/min_terminated_length": 16.1, | |
| "epoch": 0.06829709235173002, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8347467154760515e-07, | |
| "loss": 0.0079, | |
| "num_tokens": 7913933.0, | |
| "reward": 1.3356944441795349, | |
| "reward_std": 0.0053335148841142654, | |
| "rewards/combined_reward/mean": 1.3356944441795349, | |
| "rewards/combined_reward/std": 0.3590264985337853, | |
| "step": 680 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 265.9, | |
| "completions/max_terminated_length": 265.9, | |
| "completions/mean_length": 91.5, | |
| "completions/mean_terminated_length": 91.5, | |
| "completions/min_length": 24.8, | |
| "completions/min_terminated_length": 24.8, | |
| "epoch": 0.06930146135690253, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8286264115842114e-07, | |
| "loss": 0.0017, | |
| "num_tokens": 8033153.0, | |
| "reward": 1.3431249916553498, | |
| "reward_std": 0.0044791650027036665, | |
| "rewards/combined_reward/mean": 1.3431249916553498, | |
| "rewards/combined_reward/std": 0.3242304854094982, | |
| "step": 690 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 73.2, | |
| "completions/max_terminated_length": 73.2, | |
| "completions/mean_length": 39.55625, | |
| "completions/mean_terminated_length": 39.55625, | |
| "completions/min_length": 17.7, | |
| "completions/min_terminated_length": 17.7, | |
| "epoch": 0.07030583036207502, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8224054227792522e-07, | |
| "loss": -0.003, | |
| "num_tokens": 8147198.0, | |
| "reward": 1.3440885424613953, | |
| "reward_std": 0.0002604176523163915, | |
| "rewards/combined_reward/mean": 1.3440885424613953, | |
| "rewards/combined_reward/std": 0.3006736177019775, | |
| "step": 700 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 123.6, | |
| "completions/max_terminated_length": 123.6, | |
| "completions/mean_length": 67.76875, | |
| "completions/mean_terminated_length": 67.76875, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.07131019936724753, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 1.436936616897583, | |
| "learning_rate": 1.816084504962396e-07, | |
| "loss": 0.0009, | |
| "num_tokens": 8248985.0, | |
| "reward": 1.459496557712555, | |
| "reward_std": 0.002500000596046448, | |
| "rewards/combined_reward/mean": 1.459496557712555, | |
| "rewards/combined_reward/std": 0.15663873171433806, | |
| "step": 710 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 172.1, | |
| "completions/max_terminated_length": 172.1, | |
| "completions/mean_length": 76.96875, | |
| "completions/mean_terminated_length": 76.96875, | |
| "completions/min_length": 24.5, | |
| "completions/min_terminated_length": 24.5, | |
| "epoch": 0.07231456837242002, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8096644261770608e-07, | |
| "loss": 0.0179, | |
| "num_tokens": 8373128.0, | |
| "reward": 1.3943750143051148, | |
| "reward_std": 0.005624998733401299, | |
| "rewards/combined_reward/mean": 1.3943750143051148, | |
| "rewards/combined_reward/std": 0.24296645894646646, | |
| "step": 720 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 120.0, | |
| "completions/max_terminated_length": 120.0, | |
| "completions/mean_length": 60.15625, | |
| "completions/mean_terminated_length": 60.15625, | |
| "completions/min_length": 18.8, | |
| "completions/min_terminated_length": 18.8, | |
| "epoch": 0.07331893737759253, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.8031459665155363e-07, | |
| "loss": -0.001, | |
| "num_tokens": 8487649.0, | |
| "reward": 1.4223046898841858, | |
| "reward_std": 0.0001302093267440796, | |
| "rewards/combined_reward/mean": 1.4223046898841858, | |
| "rewards/combined_reward/std": 0.2848698660731316, | |
| "step": 730 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 77.8, | |
| "completions/max_terminated_length": 77.8, | |
| "completions/mean_length": 45.84375, | |
| "completions/mean_terminated_length": 45.84375, | |
| "completions/min_length": 18.4, | |
| "completions/min_terminated_length": 18.4, | |
| "epoch": 0.07432330638276503, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.796529918024196e-07, | |
| "loss": 0.0, | |
| "num_tokens": 8603284.0, | |
| "reward": 1.37947918176651, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.37947918176651, | |
| "rewards/combined_reward/std": 0.27231944501399996, | |
| "step": 740 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 204.9, | |
| "completions/max_terminated_length": 204.9, | |
| "completions/mean_length": 76.9375, | |
| "completions/mean_terminated_length": 76.9375, | |
| "completions/min_length": 18.3, | |
| "completions/min_terminated_length": 18.3, | |
| "epoch": 0.07532767538793753, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7898170846072592e-07, | |
| "loss": 0.0009, | |
| "num_tokens": 8718758.0, | |
| "reward": 1.32010418176651, | |
| "reward_std": 0.002500000596046448, | |
| "rewards/combined_reward/mean": 1.32010418176651, | |
| "rewards/combined_reward/std": 0.34439257588237526, | |
| "step": 750 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 148.0, | |
| "completions/max_terminated_length": 148.0, | |
| "completions/mean_length": 64.11875, | |
| "completions/mean_terminated_length": 64.11875, | |
| "completions/min_length": 16.4, | |
| "completions/min_terminated_length": 16.4, | |
| "epoch": 0.07633204439311003, | |
| "frac_reward_zero_std": 0.9, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.783008281929106e-07, | |
| "loss": -0.0051, | |
| "num_tokens": 8833993.0, | |
| "reward": 1.3178860425949097, | |
| "reward_std": 0.016688717156648637, | |
| "rewards/combined_reward/mean": 1.3178860425949097, | |
| "rewards/combined_reward/std": 0.3388564258813858, | |
| "step": 760 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 122.4, | |
| "completions/max_terminated_length": 122.4, | |
| "completions/mean_length": 62.99375, | |
| "completions/mean_terminated_length": 62.99375, | |
| "completions/min_length": 21.2, | |
| "completions/min_terminated_length": 21.2, | |
| "epoch": 0.07733641339828252, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 1.1234172582626343, | |
| "learning_rate": 1.7761043373151713e-07, | |
| "loss": -0.0046, | |
| "num_tokens": 8950896.0, | |
| "reward": 1.3376388788223266, | |
| "reward_std": 0.00034722290001809597, | |
| "rewards/combined_reward/mean": 1.3376388788223266, | |
| "rewards/combined_reward/std": 0.34661323949694633, | |
| "step": 770 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 111.0, | |
| "completions/max_terminated_length": 111.0, | |
| "completions/mean_length": 56.3, | |
| "completions/mean_terminated_length": 56.3, | |
| "completions/min_length": 20.3, | |
| "completions/min_terminated_length": 20.3, | |
| "epoch": 0.07834078240345503, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7691060896514168e-07, | |
| "loss": -0.0003, | |
| "num_tokens": 9071600.0, | |
| "reward": 1.3996267199516297, | |
| "reward_std": 0.002080751396715641, | |
| "rewards/combined_reward/mean": 1.3996267199516297, | |
| "rewards/combined_reward/std": 0.26108508543111386, | |
| "step": 780 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 79.4, | |
| "completions/max_terminated_length": 79.4, | |
| "completions/mean_length": 45.76875, | |
| "completions/mean_terminated_length": 45.76875, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 0.07934515140862752, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7620143892823975e-07, | |
| "loss": -0.0062, | |
| "num_tokens": 9174599.0, | |
| "reward": 1.378697919845581, | |
| "reward_std": 0.0003125001909211278, | |
| "rewards/combined_reward/mean": 1.378697919845581, | |
| "rewards/combined_reward/std": 0.2739857309497893, | |
| "step": 790 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 98.3, | |
| "completions/max_terminated_length": 98.3, | |
| "completions/mean_length": 50.98125, | |
| "completions/mean_terminated_length": 50.98125, | |
| "completions/min_length": 19.2, | |
| "completions/min_terminated_length": 19.2, | |
| "epoch": 0.08034952041380003, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7548300979079413e-07, | |
| "loss": -0.0008, | |
| "num_tokens": 9284796.0, | |
| "reward": 1.368190097808838, | |
| "reward_std": 0.004609373956918716, | |
| "rewards/combined_reward/mean": 1.368190097808838, | |
| "rewards/combined_reward/std": 0.25843119765631856, | |
| "step": 800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 80.0, | |
| "completions/max_terminated_length": 80.0, | |
| "completions/mean_length": 41.175, | |
| "completions/mean_terminated_length": 41.175, | |
| "completions/min_length": 12.8, | |
| "completions/min_terminated_length": 12.8, | |
| "epoch": 0.08135388941897254, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7475540884784422e-07, | |
| "loss": 0.0, | |
| "num_tokens": 9398356.0, | |
| "reward": 1.2378819465637207, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2378819465637207, | |
| "rewards/combined_reward/std": 0.3914600659161806, | |
| "step": 810 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 96.3, | |
| "completions/max_terminated_length": 96.3, | |
| "completions/mean_length": 54.50625, | |
| "completions/mean_terminated_length": 54.50625, | |
| "completions/min_length": 19.6, | |
| "completions/min_terminated_length": 19.6, | |
| "epoch": 0.08235825842414503, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7401872450887915e-07, | |
| "loss": -0.0007, | |
| "num_tokens": 9497821.0, | |
| "reward": 1.3947187542915345, | |
| "reward_std": 0.0015624999767169356, | |
| "rewards/combined_reward/mean": 1.3947187542915345, | |
| "rewards/combined_reward/std": 0.2990885377395898, | |
| "step": 820 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 96.7, | |
| "completions/max_terminated_length": 96.7, | |
| "completions/mean_length": 49.1875, | |
| "completions/mean_terminated_length": 49.1875, | |
| "completions/min_length": 17.9, | |
| "completions/min_terminated_length": 17.9, | |
| "epoch": 0.08336262742931753, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7327304628709528e-07, | |
| "loss": 0.0, | |
| "num_tokens": 9641355.0, | |
| "reward": 1.3011458396911622, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3011458396911622, | |
| "rewards/combined_reward/std": 0.2698082665912807, | |
| "step": 830 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 99.3, | |
| "completions/max_terminated_length": 99.3, | |
| "completions/mean_length": 54.9125, | |
| "completions/mean_terminated_length": 54.9125, | |
| "completions/min_length": 21.7, | |
| "completions/min_terminated_length": 21.7, | |
| "epoch": 0.08436699643449003, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7251846478851951e-07, | |
| "loss": 0.0083, | |
| "num_tokens": 9759969.0, | |
| "reward": 1.2925694584846497, | |
| "reward_std": 0.0019245008006691933, | |
| "rewards/combined_reward/mean": 1.2925694584846497, | |
| "rewards/combined_reward/std": 0.26882885694503783, | |
| "step": 840 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 168.1, | |
| "completions/max_terminated_length": 168.1, | |
| "completions/mean_length": 66.68125, | |
| "completions/mean_terminated_length": 66.68125, | |
| "completions/min_length": 19.3, | |
| "completions/min_terminated_length": 19.3, | |
| "epoch": 0.08537136543966253, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 6.147635459899902, | |
| "learning_rate": 1.7175507170100008e-07, | |
| "loss": -0.0077, | |
| "num_tokens": 9881310.0, | |
| "reward": 1.2720364809036255, | |
| "reward_std": 0.011238560592755676, | |
| "rewards/combined_reward/mean": 1.2720364809036255, | |
| "rewards/combined_reward/std": 0.31835093796253205, | |
| "step": 850 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 91.0, | |
| "completions/max_terminated_length": 91.0, | |
| "completions/mean_length": 47.25, | |
| "completions/mean_terminated_length": 47.25, | |
| "completions/min_length": 23.2, | |
| "completions/min_terminated_length": 23.2, | |
| "epoch": 0.08637573444483503, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 1.287226676940918, | |
| "learning_rate": 1.7098295978306552e-07, | |
| "loss": -0.012, | |
| "num_tokens": 9981046.0, | |
| "reward": 1.322606337070465, | |
| "reward_std": 0.0022470591589808463, | |
| "rewards/combined_reward/mean": 1.322606337070465, | |
| "rewards/combined_reward/std": 0.3106359137222171, | |
| "step": 860 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 91.9, | |
| "completions/max_terminated_length": 91.9, | |
| "completions/mean_length": 46.50625, | |
| "completions/mean_terminated_length": 46.50625, | |
| "completions/min_length": 13.9, | |
| "completions/min_terminated_length": 13.9, | |
| "epoch": 0.08738010345000753, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.7020222285265395e-07, | |
| "loss": 0.0, | |
| "num_tokens": 10089371.0, | |
| "reward": 1.2643750071525575, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2643750071525575, | |
| "rewards/combined_reward/std": 0.4044176399707794, | |
| "step": 870 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 217.1, | |
| "completions/max_terminated_length": 217.1, | |
| "completions/mean_length": 70.81875, | |
| "completions/mean_terminated_length": 70.81875, | |
| "completions/min_length": 18.9, | |
| "completions/min_terminated_length": 18.9, | |
| "epoch": 0.08838447245518004, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6941295577571328e-07, | |
| "loss": 0.0079, | |
| "num_tokens": 10197254.0, | |
| "reward": 1.309374988079071, | |
| "reward_std": 0.002500000596046448, | |
| "rewards/combined_reward/mean": 1.309374988079071, | |
| "rewards/combined_reward/std": 0.325995758920908, | |
| "step": 880 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 94.7, | |
| "completions/max_terminated_length": 94.7, | |
| "completions/mean_length": 53.04375, | |
| "completions/mean_terminated_length": 53.04375, | |
| "completions/min_length": 22.5, | |
| "completions/min_terminated_length": 22.5, | |
| "epoch": 0.08938884146035253, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.686152544546743e-07, | |
| "loss": 0.0008, | |
| "num_tokens": 10316525.0, | |
| "reward": 1.3464062690734864, | |
| "reward_std": 0.00416666641831398, | |
| "rewards/combined_reward/mean": 1.3464062690734864, | |
| "rewards/combined_reward/std": 0.2880703628063202, | |
| "step": 890 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 97.0, | |
| "completions/max_terminated_length": 97.0, | |
| "completions/mean_length": 50.95625, | |
| "completions/mean_terminated_length": 50.95625, | |
| "completions/min_length": 16.8, | |
| "completions/min_terminated_length": 16.8, | |
| "epoch": 0.09039321046552504, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6780921581679763e-07, | |
| "loss": 0.0021, | |
| "num_tokens": 10435242.0, | |
| "reward": 1.2726041793823242, | |
| "reward_std": 0.009523502597585321, | |
| "rewards/combined_reward/mean": 1.2726041793823242, | |
| "rewards/combined_reward/std": 0.33535852897912266, | |
| "step": 900 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 104.1, | |
| "completions/max_terminated_length": 104.1, | |
| "completions/mean_length": 57.20625, | |
| "completions/mean_terminated_length": 57.20625, | |
| "completions/min_length": 19.4, | |
| "completions/min_terminated_length": 19.4, | |
| "epoch": 0.09139757947069753, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6699493780239649e-07, | |
| "loss": 0.0, | |
| "num_tokens": 10548043.0, | |
| "reward": 1.3535937666893005, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3535937666893005, | |
| "rewards/combined_reward/std": 0.33704030215740205, | |
| "step": 910 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 107.5, | |
| "completions/max_terminated_length": 107.5, | |
| "completions/mean_length": 52.25, | |
| "completions/mean_terminated_length": 52.25, | |
| "completions/min_length": 16.2, | |
| "completions/min_terminated_length": 16.2, | |
| "epoch": 0.09240194847587004, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6617251935293588e-07, | |
| "loss": -0.0028, | |
| "num_tokens": 10675027.0, | |
| "reward": 1.3419270992279053, | |
| "reward_std": 0.0015625, | |
| "rewards/combined_reward/mean": 1.3419270992279053, | |
| "rewards/combined_reward/std": 0.32070667631924155, | |
| "step": 920 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 104.1, | |
| "completions/max_terminated_length": 104.1, | |
| "completions/mean_length": 58.05625, | |
| "completions/mean_terminated_length": 58.05625, | |
| "completions/min_length": 25.7, | |
| "completions/min_terminated_length": 25.7, | |
| "epoch": 0.09340631748104254, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6534206039901054e-07, | |
| "loss": 0.0, | |
| "num_tokens": 10805048.0, | |
| "reward": 1.4538020730018615, | |
| "reward_std": 0.0005208343267440796, | |
| "rewards/combined_reward/mean": 1.4538020730018615, | |
| "rewards/combined_reward/std": 0.17151957787573338, | |
| "step": 930 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 79.8, | |
| "completions/max_terminated_length": 79.8, | |
| "completions/mean_length": 39.75, | |
| "completions/mean_terminated_length": 39.75, | |
| "completions/min_length": 12.6, | |
| "completions/min_terminated_length": 12.6, | |
| "epoch": 0.09441068648621503, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6450366184820256e-07, | |
| "loss": 0.0, | |
| "num_tokens": 10906272.0, | |
| "reward": 1.258458322286606, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.258458322286606, | |
| "rewards/combined_reward/std": 0.3260463088750839, | |
| "step": 940 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 118.2, | |
| "completions/max_terminated_length": 118.2, | |
| "completions/mean_length": 61.65625, | |
| "completions/mean_terminated_length": 61.65625, | |
| "completions/min_length": 22.5, | |
| "completions/min_terminated_length": 22.5, | |
| "epoch": 0.09541505549138754, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6365742557282017e-07, | |
| "loss": 0.0091, | |
| "num_tokens": 11023301.0, | |
| "reward": 1.3930208325386046, | |
| "reward_std": 0.0050495008006691934, | |
| "rewards/combined_reward/mean": 1.3930208325386046, | |
| "rewards/combined_reward/std": 0.30010328590869906, | |
| "step": 950 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 105.4, | |
| "completions/max_terminated_length": 105.4, | |
| "completions/mean_length": 55.79375, | |
| "completions/mean_terminated_length": 55.79375, | |
| "completions/min_length": 23.8, | |
| "completions/min_terminated_length": 23.8, | |
| "epoch": 0.09641942449656003, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6280345439751956e-07, | |
| "loss": 0.0044, | |
| "num_tokens": 11148588.0, | |
| "reward": 1.3295885443687439, | |
| "reward_std": 0.024523502215743065, | |
| "rewards/combined_reward/mean": 1.3295885443687439, | |
| "rewards/combined_reward/std": 0.2928910902235657, | |
| "step": 960 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 121.7, | |
| "completions/max_terminated_length": 121.7, | |
| "completions/mean_length": 57.56875, | |
| "completions/mean_terminated_length": 57.56875, | |
| "completions/min_length": 14.2, | |
| "completions/min_terminated_length": 14.2, | |
| "epoch": 0.09742379350173254, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6194185208681082e-07, | |
| "loss": -0.0043, | |
| "num_tokens": 11268271.0, | |
| "reward": 1.2413020730018616, | |
| "reward_std": 0.005312500335276127, | |
| "rewards/combined_reward/mean": 1.2413020730018616, | |
| "rewards/combined_reward/std": 0.3525692358613014, | |
| "step": 970 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 117.1, | |
| "completions/max_terminated_length": 117.1, | |
| "completions/mean_length": 57.45625, | |
| "completions/mean_terminated_length": 57.45625, | |
| "completions/min_length": 19.1, | |
| "completions/min_terminated_length": 19.1, | |
| "epoch": 0.09842816250690503, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.610727233324495e-07, | |
| "loss": 0.0, | |
| "num_tokens": 11388376.0, | |
| "reward": 1.2743749976158143, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.2743749976158143, | |
| "rewards/combined_reward/std": 0.2959941983222961, | |
| "step": 980 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 99.0, | |
| "completions/max_terminated_length": 99.0, | |
| "completions/mean_length": 51.1875, | |
| "completions/mean_terminated_length": 51.1875, | |
| "completions/min_length": 15.9, | |
| "completions/min_terminated_length": 15.9, | |
| "epoch": 0.09943253151207754, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.6019617374071597e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 11503346.0, | |
| "reward": 1.3223437547683716, | |
| "reward_std": 0.0028867511078715324, | |
| "rewards/combined_reward/mean": 1.3223437547683716, | |
| "rewards/combined_reward/std": 0.37292833551764487, | |
| "step": 990 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 146.8, | |
| "completions/max_terminated_length": 146.8, | |
| "completions/mean_length": 64.61875, | |
| "completions/mean_terminated_length": 64.61875, | |
| "completions/min_length": 23.9, | |
| "completions/min_terminated_length": 23.9, | |
| "epoch": 0.10043690051725004, | |
| "frac_reward_zero_std": 1.0, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5931230981958326e-07, | |
| "loss": 0.0, | |
| "num_tokens": 11600585.0, | |
| "reward": 1.3246874928474426, | |
| "reward_std": 0.0, | |
| "rewards/combined_reward/mean": 1.3246874928474426, | |
| "rewards/combined_reward/std": 0.23927139891311527, | |
| "step": 1000 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 118.5, | |
| "completions/max_terminated_length": 118.5, | |
| "completions/mean_length": 65.5, | |
| "completions/mean_terminated_length": 65.5, | |
| "completions/min_length": 19.9, | |
| "completions/min_terminated_length": 19.9, | |
| "epoch": 0.10144126952242254, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5842123896577543e-07, | |
| "loss": -0.0036, | |
| "num_tokens": 11737513.0, | |
| "reward": 1.4228541851043701, | |
| "reward_std": 0.001154701132327318, | |
| "rewards/combined_reward/mean": 1.4228541851043701, | |
| "rewards/combined_reward/std": 0.25313766626641154, | |
| "step": 1010 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01875, | |
| "completions/max_length": 311.5, | |
| "completions/max_terminated_length": 224.3, | |
| "completions/mean_length": 90.28125, | |
| "completions/mean_terminated_length": 54.49903869628906, | |
| "completions/min_length": 12.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.10244563852759504, | |
| "frac_reward_zero_std": 0.925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5752306945171818e-07, | |
| "loss": -0.0115, | |
| "num_tokens": 11875626.0, | |
| "reward": 1.2103593707084657, | |
| "reward_std": 0.004468750953674316, | |
| "rewards/combined_reward/mean": 1.2103593707084657, | |
| "rewards/combined_reward/std": 0.40379793345928194, | |
| "step": 1020 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 128.9, | |
| "completions/max_terminated_length": 128.9, | |
| "completions/mean_length": 59.56875, | |
| "completions/mean_terminated_length": 59.56875, | |
| "completions/min_length": 15.4, | |
| "completions/min_terminated_length": 15.4, | |
| "epoch": 0.10345000753276754, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5661791041238254e-07, | |
| "loss": 0.0054, | |
| "num_tokens": 11995581.0, | |
| "reward": 1.3099791407585144, | |
| "reward_std": 0.00020833313465118408, | |
| "rewards/combined_reward/mean": 1.3099791407585144, | |
| "rewards/combined_reward/std": 0.33452749061398207, | |
| "step": 1030 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01875, | |
| "completions/max_length": 343.2, | |
| "completions/max_terminated_length": 228.1, | |
| "completions/mean_length": 114.825, | |
| "completions/mean_terminated_length": 78.1860580444336, | |
| "completions/min_length": 25.7, | |
| "completions/min_terminated_length": 25.7, | |
| "epoch": 0.10445437653794004, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5570587183202433e-07, | |
| "loss": -0.0099, | |
| "num_tokens": 12114797.0, | |
| "reward": 1.2818815290927887, | |
| "reward_std": 0.0018619796261191367, | |
| "rewards/combined_reward/mean": 1.2818815290927887, | |
| "rewards/combined_reward/std": 0.31765228807926177, | |
| "step": 1040 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 113.8, | |
| "completions/max_terminated_length": 113.8, | |
| "completions/mean_length": 55.68125, | |
| "completions/mean_terminated_length": 55.68125, | |
| "completions/min_length": 16.7, | |
| "completions/min_terminated_length": 16.7, | |
| "epoch": 0.10545874554311253, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5478706453082016e-07, | |
| "loss": -0.0016, | |
| "num_tokens": 12246978.0, | |
| "reward": 1.3307923913002013, | |
| "reward_std": 0.0002604176523163915, | |
| "rewards/combined_reward/mean": 1.3307923913002013, | |
| "rewards/combined_reward/std": 0.3518651008605957, | |
| "step": 1050 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 144.1, | |
| "completions/max_terminated_length": 144.1, | |
| "completions/mean_length": 69.0, | |
| "completions/mean_terminated_length": 69.0, | |
| "completions/min_length": 17.6, | |
| "completions/min_terminated_length": 17.6, | |
| "epoch": 0.10646311454828504, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5386160015140167e-07, | |
| "loss": 0.0061, | |
| "num_tokens": 12363690.0, | |
| "reward": 1.3816666841506957, | |
| "reward_std": 0.00692450013011694, | |
| "rewards/combined_reward/mean": 1.3816666841506957, | |
| "rewards/combined_reward/std": 0.2784981057047844, | |
| "step": 1060 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 94.3, | |
| "completions/max_terminated_length": 94.3, | |
| "completions/mean_length": 49.63125, | |
| "completions/mean_terminated_length": 49.63125, | |
| "completions/min_length": 13.9, | |
| "completions/min_terminated_length": 13.9, | |
| "epoch": 0.10746748355345755, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5292959114529024e-07, | |
| "loss": 0.0011, | |
| "num_tokens": 12481815.0, | |
| "reward": 1.3338541746139527, | |
| "reward_std": 0.002886752039194107, | |
| "rewards/combined_reward/mean": 1.3338541746139527, | |
| "rewards/combined_reward/std": 0.3240374196320772, | |
| "step": 1070 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 97.0, | |
| "completions/max_terminated_length": 97.0, | |
| "completions/mean_length": 49.3375, | |
| "completions/mean_terminated_length": 49.3375, | |
| "completions/min_length": 19.6, | |
| "completions/min_terminated_length": 19.6, | |
| "epoch": 0.10847185255863004, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5199115075923323e-07, | |
| "loss": -0.0008, | |
| "num_tokens": 12604637.0, | |
| "reward": 1.2796875, | |
| "reward_std": 0.0003608435858041048, | |
| "rewards/combined_reward/mean": 1.2796875, | |
| "rewards/combined_reward/std": 0.3038814663887024, | |
| "step": 1080 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 313.5, | |
| "completions/max_terminated_length": 115.4, | |
| "completions/mean_length": 112.05, | |
| "completions/mean_terminated_length": 61.88333358764648, | |
| "completions/min_length": 20.3, | |
| "completions/min_terminated_length": 20.3, | |
| "epoch": 0.10947622156380254, | |
| "frac_reward_zero_std": 0.975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5104639302144326e-07, | |
| "loss": 0.0052, | |
| "num_tokens": 12735697.0, | |
| "reward": 1.342291682958603, | |
| "reward_std": 0.0007216888945549727, | |
| "rewards/combined_reward/mean": 1.342291682958603, | |
| "rewards/combined_reward/std": 0.31657470017671585, | |
| "step": 1090 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 127.9, | |
| "completions/max_terminated_length": 127.9, | |
| "completions/mean_length": 61.70625, | |
| "completions/mean_terminated_length": 61.70625, | |
| "completions/min_length": 17.8, | |
| "completions/min_terminated_length": 17.8, | |
| "epoch": 0.11048059056897504, | |
| "frac_reward_zero_std": 0.95, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.5009543272774323e-07, | |
| "loss": 0.0029, | |
| "num_tokens": 12842590.0, | |
| "reward": 1.3991406440734864, | |
| "reward_std": 0.000572918844409287, | |
| "rewards/combined_reward/mean": 1.3991406440734864, | |
| "rewards/combined_reward/std": 0.27981497598811983, | |
| "step": 1100 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3000, | |
| "num_input_tokens_seen": 12842590, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |