| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 391.0, | |
| "completions/max_terminated_length": 391.0, | |
| "completions/mean_length": 202.91666666666666, | |
| "completions/mean_terminated_length": 221.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.004, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "num_tokens": 18672.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 161.0, | |
| "completions/max_terminated_length": 161.0, | |
| "completions/mean_length": 92.83333333333333, | |
| "completions/mean_terminated_length": 101.27272727272727, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.008, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 29988.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 151.0, | |
| "completions/max_terminated_length": 151.0, | |
| "completions/mean_length": 52.333333333333336, | |
| "completions/mean_terminated_length": 57.09090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.012, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 39576.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 326.0, | |
| "completions/max_terminated_length": 326.0, | |
| "completions/mean_length": 161.75, | |
| "completions/mean_terminated_length": 176.45454545454547, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 0.016, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 53340.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 120.0, | |
| "completions/max_terminated_length": 120.0, | |
| "completions/mean_length": 75.08333333333333, | |
| "completions/mean_terminated_length": 81.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.02, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 60420.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 374.0, | |
| "completions/max_terminated_length": 374.0, | |
| "completions/mean_length": 178.0, | |
| "completions/mean_terminated_length": 194.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.024, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 75348.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 146.0, | |
| "completions/max_terminated_length": 146.0, | |
| "completions/mean_length": 99.08333333333333, | |
| "completions/mean_terminated_length": 108.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 94.0, | |
| "epoch": 0.028, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.6362388134002686, | |
| "kl": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0806, | |
| "num_tokens": 83868.0, | |
| "reward": 0.5833333730697632, | |
| "reward_std": 0.5149286389350891, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 358.0, | |
| "completions/max_terminated_length": 358.0, | |
| "completions/mean_length": 190.0, | |
| "completions/mean_terminated_length": 207.27272727272728, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 124.0, | |
| "epoch": 0.032, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0022762538865208626, | |
| "kl": 0.0005378490750445053, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 97464.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 245.0, | |
| "completions/max_terminated_length": 245.0, | |
| "completions/mean_length": 141.16666666666666, | |
| "completions/mean_terminated_length": 154.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.036, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.007215190213173628, | |
| "kl": 0.0019240143010392785, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 108636.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 266.0, | |
| "completions/max_terminated_length": 266.0, | |
| "completions/mean_length": 178.41666666666666, | |
| "completions/mean_terminated_length": 194.63636363636363, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 142.0, | |
| "epoch": 0.04, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.7695807218551636, | |
| "kl": 0.014113324228674173, | |
| "learning_rate": 1e-06, | |
| "loss": -0.002, | |
| "num_tokens": 116256.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.38924944400787354, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 135.0, | |
| "completions/max_terminated_length": 135.0, | |
| "completions/mean_length": 84.25, | |
| "completions/mean_terminated_length": 91.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.044, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.026847243309021, | |
| "kl": 0.013075211551040411, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0413, | |
| "num_tokens": 124440.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.38924944400787354, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 199.0, | |
| "completions/max_terminated_length": 199.0, | |
| "completions/mean_length": 99.25, | |
| "completions/mean_terminated_length": 108.27272727272727, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.048, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.011818243190646172, | |
| "kl": 0.003624255710747093, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 132732.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 577.0, | |
| "completions/max_terminated_length": 577.0, | |
| "completions/mean_length": 246.08333333333334, | |
| "completions/mean_terminated_length": 268.45454545454544, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.052, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.022241737693548203, | |
| "kl": 0.00960063119418919, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 152424.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 344.0, | |
| "completions/max_terminated_length": 344.0, | |
| "completions/mean_length": 173.0, | |
| "completions/mean_terminated_length": 188.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 117.0, | |
| "epoch": 0.056, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.036054644733667374, | |
| "kl": 0.01761903613805771, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 162636.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 402.0, | |
| "completions/max_terminated_length": 402.0, | |
| "completions/mean_length": 268.1666666666667, | |
| "completions/mean_terminated_length": 292.54545454545456, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 102.0, | |
| "epoch": 0.06, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.00860360637307167, | |
| "kl": 0.005409976467490196, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 176904.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 127.0, | |
| "completions/max_terminated_length": 127.0, | |
| "completions/mean_length": 72.16666666666667, | |
| "completions/mean_terminated_length": 78.72727272727273, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.064, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.8470466136932373, | |
| "kl": 0.09470756724476814, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0173, | |
| "num_tokens": 186564.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 245.0, | |
| "completions/max_terminated_length": 245.0, | |
| "completions/mean_length": 118.5, | |
| "completions/mean_terminated_length": 129.27272727272728, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 34.0, | |
| "epoch": 0.068, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.10086339712142944, | |
| "kl": 0.04859759844839573, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 197484.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 88.0, | |
| "completions/max_terminated_length": 88.0, | |
| "completions/mean_length": 57.833333333333336, | |
| "completions/mean_terminated_length": 63.09090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.072, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.4592796564102173, | |
| "kl": 0.010172125417739153, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0087, | |
| "num_tokens": 207252.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 57.0, | |
| "completions/max_terminated_length": 57.0, | |
| "completions/mean_length": 32.583333333333336, | |
| "completions/mean_terminated_length": 35.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.076, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.4069900512695312, | |
| "kl": 0.025834742933511734, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0695, | |
| "num_tokens": 214320.0, | |
| "reward": 0.6666666865348816, | |
| "reward_std": 0.4923659861087799, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 386.0, | |
| "completions/max_terminated_length": 386.0, | |
| "completions/mean_length": 192.41666666666666, | |
| "completions/mean_terminated_length": 209.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 32.0, | |
| "epoch": 0.08, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.10245665162801743, | |
| "kl": 0.043199990526773036, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 228996.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.16666666666666663, | |
| "completions/max_length": 88.0, | |
| "completions/max_terminated_length": 88.0, | |
| "completions/mean_length": 67.91666666666667, | |
| "completions/mean_terminated_length": 81.5, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 74.0, | |
| "epoch": 0.084, | |
| "format_failures": 1.0, | |
| "grad_norm": 1.388899326324463, | |
| "kl": 0.07192051783204079, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0112, | |
| "num_tokens": 238104.0, | |
| "reward": 0.75, | |
| "reward_std": 0.45226702094078064, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 73.0, | |
| "completions/max_terminated_length": 73.0, | |
| "completions/mean_length": 43.583333333333336, | |
| "completions/mean_terminated_length": 47.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.088, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.2448337078094482, | |
| "kl": 0.0771165993064642, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0077, | |
| "num_tokens": 245280.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5222329497337341, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 335.0, | |
| "completions/max_terminated_length": 335.0, | |
| "completions/mean_length": 167.83333333333334, | |
| "completions/mean_terminated_length": 183.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.092, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.0195705890655518, | |
| "kl": 0.211347796022892, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 257148.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 350.0, | |
| "completions/max_terminated_length": 350.0, | |
| "completions/mean_length": 201.41666666666666, | |
| "completions/mean_terminated_length": 219.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 101.0, | |
| "epoch": 0.096, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.20492610335350037, | |
| "kl": 0.08658944815397263, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 266304.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 312.0, | |
| "completions/max_terminated_length": 312.0, | |
| "completions/mean_length": 170.0, | |
| "completions/mean_terminated_length": 185.45454545454547, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.1, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0755978599190712, | |
| "kl": 0.040397679433226585, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 278760.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 411.0, | |
| "completions/max_terminated_length": 411.0, | |
| "completions/mean_length": 173.83333333333334, | |
| "completions/mean_terminated_length": 189.63636363636363, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.104, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.04659981280565262, | |
| "kl": 0.023209942039102316, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 293628.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 92.0, | |
| "completions/max_terminated_length": 92.0, | |
| "completions/mean_length": 55.25, | |
| "completions/mean_terminated_length": 60.27272727272727, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 43.0, | |
| "epoch": 0.108, | |
| "format_failures": 0.0, | |
| "grad_norm": 21.968534469604492, | |
| "kl": 5.299874305725098, | |
| "learning_rate": 1e-06, | |
| "loss": 0.1192, | |
| "num_tokens": 301488.0, | |
| "reward": 0.5833333730697632, | |
| "reward_std": 0.5149286389350891, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 358.0, | |
| "completions/max_terminated_length": 358.0, | |
| "completions/mean_length": 165.41666666666666, | |
| "completions/mean_terminated_length": 180.45454545454547, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.112, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.014507513493299484, | |
| "kl": 0.01523882569745183, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 314748.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 289.0, | |
| "completions/max_terminated_length": 289.0, | |
| "completions/mean_length": 193.33333333333334, | |
| "completions/mean_terminated_length": 210.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 119.0, | |
| "epoch": 0.116, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.010872351005673409, | |
| "kl": 0.010655859019607306, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 328692.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 271.0, | |
| "completions/max_terminated_length": 271.0, | |
| "completions/mean_length": 167.16666666666666, | |
| "completions/mean_terminated_length": 182.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 0.12, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.0025266408920288, | |
| "kl": 0.025600655004382133, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0472, | |
| "num_tokens": 340752.0, | |
| "reward": 0.3333333432674408, | |
| "reward_std": 0.4923659861087799, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 162.0, | |
| "completions/max_terminated_length": 162.0, | |
| "completions/mean_length": 79.33333333333333, | |
| "completions/mean_terminated_length": 86.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 17.0, | |
| "epoch": 0.124, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.01500674244016409, | |
| "kl": 0.006932976422831416, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 366936.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 153.0, | |
| "completions/max_terminated_length": 153.0, | |
| "completions/mean_length": 100.25, | |
| "completions/mean_terminated_length": 109.36363636363636, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.128, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.572136640548706, | |
| "kl": 0.016836593858897686, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0253, | |
| "num_tokens": 375948.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 91.0, | |
| "completions/max_terminated_length": 91.0, | |
| "completions/mean_length": 52.75, | |
| "completions/mean_terminated_length": 57.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.132, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.6049137115478516, | |
| "kl": 0.08474422618746758, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0534, | |
| "num_tokens": 382608.0, | |
| "reward": 0.3333333432674408, | |
| "reward_std": 0.4923659861087799, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 98.0, | |
| "completions/max_terminated_length": 98.0, | |
| "completions/mean_length": 61.416666666666664, | |
| "completions/mean_terminated_length": 67.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.136, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.9431159496307373, | |
| "kl": 0.04839755780994892, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1095, | |
| "num_tokens": 389208.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.38924944400787354, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 151.0, | |
| "completions/max_terminated_length": 151.0, | |
| "completions/mean_length": 118.91666666666667, | |
| "completions/mean_terminated_length": 129.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 105.0, | |
| "epoch": 0.14, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.03593799099326134, | |
| "kl": 0.03462314326316118, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 396696.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 371.0, | |
| "completions/max_terminated_length": 371.0, | |
| "completions/mean_length": 245.58333333333334, | |
| "completions/mean_terminated_length": 267.90909090909093, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 183.0, | |
| "epoch": 0.144, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.025885488837957382, | |
| "kl": 0.02637413516640663, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 411372.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 296.0, | |
| "completions/max_terminated_length": 296.0, | |
| "completions/mean_length": 179.41666666666666, | |
| "completions/mean_terminated_length": 195.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 129.0, | |
| "epoch": 0.148, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.11734314262866974, | |
| "kl": 0.0526489345356822, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 424404.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 630.0, | |
| "completions/max_terminated_length": 630.0, | |
| "completions/mean_length": 332.0833333333333, | |
| "completions/mean_terminated_length": 362.27272727272725, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.152, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.5079672932624817, | |
| "kl": 0.052276700269430876, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0148, | |
| "num_tokens": 444576.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 148.0, | |
| "completions/max_terminated_length": 148.0, | |
| "completions/mean_length": 67.25, | |
| "completions/mean_terminated_length": 73.36363636363636, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 0.156, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.6541359424591064, | |
| "kl": 0.5338308056816459, | |
| "learning_rate": 1e-06, | |
| "loss": -0.1217, | |
| "num_tokens": 453192.0, | |
| "reward": 0.25, | |
| "reward_std": 0.45226702094078064, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 245.0, | |
| "completions/max_terminated_length": 245.0, | |
| "completions/mean_length": 139.91666666666666, | |
| "completions/mean_terminated_length": 152.63636363636363, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.16, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.3757868707180023, | |
| "kl": 0.13857688568532467, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 467928.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 84.0, | |
| "completions/max_terminated_length": 84.0, | |
| "completions/mean_length": 48.0, | |
| "completions/mean_terminated_length": 52.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.164, | |
| "format_failures": 1.0, | |
| "grad_norm": 4.323275566101074, | |
| "kl": 0.21433213353157043, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0993, | |
| "num_tokens": 473472.0, | |
| "reward": 0.25, | |
| "reward_std": 0.45226702094078064, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 311.0, | |
| "completions/max_terminated_length": 311.0, | |
| "completions/mean_length": 109.58333333333333, | |
| "completions/mean_terminated_length": 119.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.168, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.22781899571418762, | |
| "kl": 0.07318945415318012, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 488148.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 49.0, | |
| "completions/max_terminated_length": 49.0, | |
| "completions/mean_length": 31.166666666666668, | |
| "completions/mean_terminated_length": 34.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.172, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.492840051651001, | |
| "kl": 0.224076546728611, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0283, | |
| "num_tokens": 492624.0, | |
| "reward": 0.25, | |
| "reward_std": 0.45226702094078064, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 66.0, | |
| "completions/max_terminated_length": 66.0, | |
| "completions/mean_length": 52.25, | |
| "completions/mean_terminated_length": 57.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 32.0, | |
| "epoch": 0.176, | |
| "format_failures": 0.0, | |
| "grad_norm": 3.037781000137329, | |
| "kl": 0.2150058075785637, | |
| "learning_rate": 1e-06, | |
| "loss": -0.013, | |
| "num_tokens": 499752.0, | |
| "reward": 0.6666666865348816, | |
| "reward_std": 0.4923659861087799, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 144.0, | |
| "completions/max_terminated_length": 144.0, | |
| "completions/mean_length": 83.0, | |
| "completions/mean_terminated_length": 90.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.18, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.3224222660064697, | |
| "kl": 0.36255764216184616, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0749, | |
| "num_tokens": 508428.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 160.0, | |
| "completions/max_terminated_length": 160.0, | |
| "completions/mean_length": 119.0, | |
| "completions/mean_terminated_length": 129.8181818181818, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 106.0, | |
| "epoch": 0.184, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.5437301397323608, | |
| "kl": 0.06716796010732651, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0526, | |
| "num_tokens": 517416.0, | |
| "reward": 0.75, | |
| "reward_std": 0.45226702094078064, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 162.0, | |
| "completions/max_terminated_length": 162.0, | |
| "completions/mean_length": 86.66666666666667, | |
| "completions/mean_terminated_length": 94.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.188, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.92403244972229, | |
| "kl": 0.04993921332061291, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0667, | |
| "num_tokens": 525384.0, | |
| "reward": 0.25, | |
| "reward_std": 0.45226702094078064, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 607.0, | |
| "completions/max_terminated_length": 607.0, | |
| "completions/mean_length": 263.0833333333333, | |
| "completions/mean_terminated_length": 287.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.192, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.002583070658147335, | |
| "kl": 0.0069114591460675, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 560328.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 337.0, | |
| "completions/max_terminated_length": 337.0, | |
| "completions/mean_length": 178.16666666666666, | |
| "completions/mean_terminated_length": 194.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 9.0, | |
| "epoch": 0.196, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.055018555372953415, | |
| "kl": 0.04814303293824196, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 573552.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 205.0, | |
| "completions/max_terminated_length": 205.0, | |
| "completions/mean_length": 104.25, | |
| "completions/mean_terminated_length": 113.72727272727273, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.2, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.10304596275091171, | |
| "kl": 0.0782565288245678, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 583980.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 529.0, | |
| "completions/max_terminated_length": 529.0, | |
| "completions/mean_length": 171.25, | |
| "completions/mean_terminated_length": 186.8181818181818, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 0.204, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.10456845909357071, | |
| "kl": 0.05266672745347023, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 606264.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 142.0, | |
| "completions/max_terminated_length": 142.0, | |
| "completions/mean_length": 86.0, | |
| "completions/mean_terminated_length": 93.81818181818181, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.208, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.117820978164673, | |
| "kl": 0.12709446623921394, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0977, | |
| "num_tokens": 616176.0, | |
| "reward": 0.25, | |
| "reward_std": 0.45226702094078064, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 169.0, | |
| "completions/max_terminated_length": 169.0, | |
| "completions/mean_length": 67.66666666666667, | |
| "completions/mean_terminated_length": 73.81818181818181, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 26.0, | |
| "epoch": 0.212, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.36178988218307495, | |
| "kl": 0.06635316368192434, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 625992.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 292.0, | |
| "completions/max_terminated_length": 292.0, | |
| "completions/mean_length": 180.5, | |
| "completions/mean_terminated_length": 196.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 88.0, | |
| "epoch": 0.216, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.520895957946777, | |
| "kl": 0.6420021317899227, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0043, | |
| "num_tokens": 640824.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 81.0, | |
| "completions/max_terminated_length": 81.0, | |
| "completions/mean_length": 59.5, | |
| "completions/mean_terminated_length": 64.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.22, | |
| "format_failures": 0.0, | |
| "grad_norm": 9.40858268737793, | |
| "kl": 1.514443002641201, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0487, | |
| "num_tokens": 649008.0, | |
| "reward": 0.5833333730697632, | |
| "reward_std": 0.5149286389350891, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 647.0, | |
| "completions/max_terminated_length": 647.0, | |
| "completions/mean_length": 265.25, | |
| "completions/mean_terminated_length": 289.3636363636364, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.224, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.12246920168399811, | |
| "kl": 0.04888852685689926, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 665112.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.16666666666666663, | |
| "completions/max_length": 90.0, | |
| "completions/max_terminated_length": 90.0, | |
| "completions/mean_length": 54.416666666666664, | |
| "completions/mean_terminated_length": 65.3, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.228, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.4643149971961975, | |
| "kl": 0.2062125913798809, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 671268.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 285.0, | |
| "completions/max_terminated_length": 285.0, | |
| "completions/mean_length": 138.83333333333334, | |
| "completions/mean_terminated_length": 151.45454545454547, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.232, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.028489232063293457, | |
| "kl": 0.028692953288555145, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 681648.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 189.0, | |
| "completions/max_terminated_length": 189.0, | |
| "completions/mean_length": 119.33333333333333, | |
| "completions/mean_terminated_length": 130.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.236, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.2943709194660187, | |
| "kl": 0.021217118948698044, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 692148.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 123.0, | |
| "completions/max_terminated_length": 123.0, | |
| "completions/mean_length": 65.25, | |
| "completions/mean_terminated_length": 71.18181818181819, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.24, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.4704815149307251, | |
| "kl": 0.1355944722890854, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0016, | |
| "num_tokens": 705504.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 209.0, | |
| "completions/max_terminated_length": 209.0, | |
| "completions/mean_length": 106.75, | |
| "completions/mean_terminated_length": 116.45454545454545, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 61.0, | |
| "epoch": 0.244, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.8021370768547058, | |
| "kl": 0.06047418341040611, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0229, | |
| "num_tokens": 712920.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 356.0, | |
| "completions/max_terminated_length": 356.0, | |
| "completions/mean_length": 201.83333333333334, | |
| "completions/mean_terminated_length": 220.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.248, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0354565754532814, | |
| "kl": 0.051246967166662216, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 725280.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 260.0, | |
| "completions/max_terminated_length": 260.0, | |
| "completions/mean_length": 129.0, | |
| "completions/mean_terminated_length": 140.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.252, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.838399350643158, | |
| "kl": 0.03389432094991207, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0256, | |
| "num_tokens": 733980.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 227.0, | |
| "completions/max_terminated_length": 227.0, | |
| "completions/mean_length": 121.08333333333333, | |
| "completions/mean_terminated_length": 132.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.256, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.008542679250240326, | |
| "kl": 0.02384038269519806, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 763224.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 302.0, | |
| "completions/max_terminated_length": 302.0, | |
| "completions/mean_length": 180.33333333333334, | |
| "completions/mean_terminated_length": 196.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 36.0, | |
| "epoch": 0.26, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.01127433218061924, | |
| "kl": 0.013883833773434162, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 777984.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 141.0, | |
| "completions/max_terminated_length": 141.0, | |
| "completions/mean_length": 75.83333333333333, | |
| "completions/mean_terminated_length": 82.72727272727273, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 0.264, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.09972423315048218, | |
| "kl": 0.06396586634218693, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 785844.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 222.0, | |
| "completions/max_terminated_length": 222.0, | |
| "completions/mean_length": 140.41666666666666, | |
| "completions/mean_terminated_length": 153.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.268, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.03430556878447533, | |
| "kl": 0.03857766184955835, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 796632.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 209.0, | |
| "completions/max_terminated_length": 209.0, | |
| "completions/mean_length": 115.08333333333333, | |
| "completions/mean_terminated_length": 125.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 14.0, | |
| "epoch": 0.272, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.6054855585098267, | |
| "kl": 0.020691730547696352, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0511, | |
| "num_tokens": 807576.0, | |
| "reward": 0.4166666865348816, | |
| "reward_std": 0.5149286389350891, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 106.0, | |
| "completions/max_terminated_length": 106.0, | |
| "completions/mean_length": 80.5, | |
| "completions/mean_terminated_length": 87.81818181818181, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 64.0, | |
| "epoch": 0.276, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.1459321975708008, | |
| "kl": 0.017325148917734623, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0067, | |
| "num_tokens": 814644.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 304.0, | |
| "completions/max_terminated_length": 304.0, | |
| "completions/mean_length": 175.58333333333334, | |
| "completions/mean_terminated_length": 191.54545454545453, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 17.0, | |
| "epoch": 0.28, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.008818876929581165, | |
| "kl": 0.012372600380331278, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 826932.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 292.0, | |
| "completions/max_terminated_length": 292.0, | |
| "completions/mean_length": 195.0, | |
| "completions/mean_terminated_length": 212.72727272727272, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 115.0, | |
| "epoch": 0.284, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.014721119776368141, | |
| "kl": 0.012880454771220684, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 842268.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 145.0, | |
| "completions/max_terminated_length": 145.0, | |
| "completions/mean_length": 94.83333333333333, | |
| "completions/mean_terminated_length": 103.45454545454545, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 75.0, | |
| "epoch": 0.288, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.9220354557037354, | |
| "kl": 0.046924193389713764, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0118, | |
| "num_tokens": 849612.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 262.0, | |
| "completions/max_terminated_length": 262.0, | |
| "completions/mean_length": 149.5, | |
| "completions/mean_terminated_length": 163.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.292, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.0295345988124609, | |
| "kl": 0.03905524965375662, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 859632.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 220.0, | |
| "completions/max_terminated_length": 220.0, | |
| "completions/mean_length": 158.41666666666666, | |
| "completions/mean_terminated_length": 172.8181818181818, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 24.0, | |
| "epoch": 0.296, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.11439846456050873, | |
| "kl": 0.07962214201688766, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 870756.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 223.0, | |
| "completions/max_terminated_length": 223.0, | |
| "completions/mean_length": 110.08333333333333, | |
| "completions/mean_terminated_length": 120.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.3, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.04706709831953049, | |
| "kl": 0.03136777225881815, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 887700.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 544.0, | |
| "completions/max_terminated_length": 544.0, | |
| "completions/mean_length": 219.58333333333334, | |
| "completions/mean_terminated_length": 239.54545454545453, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 8.0, | |
| "epoch": 0.304, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.106910839676857, | |
| "kl": 0.16153255105018616, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 899544.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 175.0, | |
| "completions/max_terminated_length": 175.0, | |
| "completions/mean_length": 118.66666666666667, | |
| "completions/mean_terminated_length": 129.45454545454547, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 48.0, | |
| "epoch": 0.308, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.9582226276397705, | |
| "kl": 0.1435188725590706, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0297, | |
| "num_tokens": 909816.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.38924944400787354, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 200.0, | |
| "completions/max_terminated_length": 200.0, | |
| "completions/mean_length": 151.83333333333334, | |
| "completions/mean_terminated_length": 165.63636363636363, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 85.0, | |
| "epoch": 0.312, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.6430385112762451, | |
| "kl": 0.021885435096919537, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0413, | |
| "num_tokens": 919620.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 127.0, | |
| "completions/max_terminated_length": 127.0, | |
| "completions/mean_length": 87.66666666666667, | |
| "completions/mean_terminated_length": 95.63636363636364, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 72.0, | |
| "epoch": 0.316, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.1316368579864502, | |
| "kl": 0.052431097254157066, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 930468.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 357.0, | |
| "completions/max_terminated_length": 357.0, | |
| "completions/mean_length": 188.66666666666666, | |
| "completions/mean_terminated_length": 205.8181818181818, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.32, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.24080750346183777, | |
| "kl": 0.25305451452732086, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 947112.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 171.0, | |
| "completions/max_terminated_length": 171.0, | |
| "completions/mean_length": 130.75, | |
| "completions/mean_terminated_length": 142.63636363636363, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 77.0, | |
| "epoch": 0.324, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.9585680961608887, | |
| "kl": 0.02085646940395236, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0725, | |
| "num_tokens": 956448.0, | |
| "reward": 0.5, | |
| "reward_std": 0.5222329497337341, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 220.0, | |
| "completions/max_terminated_length": 220.0, | |
| "completions/mean_length": 119.25, | |
| "completions/mean_terminated_length": 130.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.328, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.02760450914502144, | |
| "kl": 0.020923216827213764, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 966324.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 476.0, | |
| "completions/max_terminated_length": 476.0, | |
| "completions/mean_length": 252.33333333333334, | |
| "completions/mean_terminated_length": 275.27272727272725, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.332, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.011845018714666367, | |
| "kl": 0.017354148440063, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 985296.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 234.0, | |
| "completions/max_terminated_length": 234.0, | |
| "completions/mean_length": 101.16666666666667, | |
| "completions/mean_terminated_length": 110.36363636363636, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.336, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.02075113356113434, | |
| "kl": 0.013977942056953907, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 998856.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 217.0, | |
| "completions/max_terminated_length": 217.0, | |
| "completions/mean_length": 156.0, | |
| "completions/mean_terminated_length": 170.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 44.0, | |
| "epoch": 0.34, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.018603280186653137, | |
| "kl": 0.020112676545977592, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 1008864.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 364.0, | |
| "completions/max_terminated_length": 364.0, | |
| "completions/mean_length": 223.66666666666666, | |
| "completions/mean_terminated_length": 244.0, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 0.344, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.011895284056663513, | |
| "kl": 0.021254747174680233, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 1022556.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 159.0, | |
| "completions/max_terminated_length": 159.0, | |
| "completions/mean_length": 117.66666666666667, | |
| "completions/mean_terminated_length": 128.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 112.0, | |
| "epoch": 0.348, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.1451243162155151, | |
| "kl": 0.026615198701620102, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0587, | |
| "num_tokens": 1032684.0, | |
| "reward": 0.4166666865348816, | |
| "reward_std": 0.5149286389350891, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 304.0, | |
| "completions/max_terminated_length": 304.0, | |
| "completions/mean_length": 142.25, | |
| "completions/mean_terminated_length": 155.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 12.0, | |
| "epoch": 0.352, | |
| "format_failures": 2.0, | |
| "grad_norm": 0.8502682447433472, | |
| "kl": 0.012907921802252531, | |
| "learning_rate": 1e-06, | |
| "loss": 0.079, | |
| "num_tokens": 1067328.0, | |
| "reward": 0.1666666716337204, | |
| "reward_std": 0.3892494738101959, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 177.0, | |
| "completions/max_terminated_length": 177.0, | |
| "completions/mean_length": 114.5, | |
| "completions/mean_terminated_length": 124.9090909090909, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 78.0, | |
| "epoch": 0.356, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.7634170651435852, | |
| "kl": 0.08245750516653061, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0177, | |
| "num_tokens": 1074756.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 141.0, | |
| "completions/max_terminated_length": 141.0, | |
| "completions/mean_length": 73.0, | |
| "completions/mean_terminated_length": 79.63636363636364, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 16.0, | |
| "epoch": 0.36, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.750490665435791, | |
| "kl": 0.032081443816423416, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0503, | |
| "num_tokens": 1083096.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 138.0, | |
| "completions/max_terminated_length": 138.0, | |
| "completions/mean_length": 53.666666666666664, | |
| "completions/mean_terminated_length": 58.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.364, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.12016791850328445, | |
| "kl": 0.04432140104472637, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 1090380.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 800.0, | |
| "completions/max_terminated_length": 800.0, | |
| "completions/mean_length": 397.5, | |
| "completions/mean_terminated_length": 433.6363636363636, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 14.0, | |
| "epoch": 0.368, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.012203319929540157, | |
| "kl": 0.009247956797480583, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 1113504.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 261.0, | |
| "completions/max_terminated_length": 261.0, | |
| "completions/mean_length": 138.75, | |
| "completions/mean_terminated_length": 151.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 23.0, | |
| "epoch": 0.372, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.03371990844607353, | |
| "kl": 0.029644143767654896, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 1125492.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 89.0, | |
| "completions/max_terminated_length": 89.0, | |
| "completions/mean_length": 51.833333333333336, | |
| "completions/mean_terminated_length": 56.54545454545455, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.376, | |
| "format_failures": 0.0, | |
| "grad_norm": 2.027597427368164, | |
| "kl": 0.19823284726589918, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0712, | |
| "num_tokens": 1130748.0, | |
| "reward": 0.75, | |
| "reward_std": 0.45226702094078064, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 118.0, | |
| "completions/max_terminated_length": 118.0, | |
| "completions/mean_length": 74.33333333333333, | |
| "completions/mean_terminated_length": 81.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 62.0, | |
| "epoch": 0.38, | |
| "format_failures": 0.0, | |
| "grad_norm": 1.4458988904953003, | |
| "kl": 0.07499337941408157, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0187, | |
| "num_tokens": 1138584.0, | |
| "reward": 0.0833333358168602, | |
| "reward_std": 0.28867512941360474, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 285.0, | |
| "completions/max_terminated_length": 285.0, | |
| "completions/mean_length": 137.66666666666666, | |
| "completions/mean_terminated_length": 150.1818181818182, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 15.0, | |
| "epoch": 0.384, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.03813532739877701, | |
| "kl": 0.023914064280688763, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 1154124.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 206.0, | |
| "completions/max_terminated_length": 206.0, | |
| "completions/mean_length": 125.66666666666667, | |
| "completions/mean_terminated_length": 137.0909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.388, | |
| "format_failures": 1.0, | |
| "grad_norm": 0.016639724373817444, | |
| "kl": 0.019042176194489002, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 1165560.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 209.0, | |
| "completions/max_terminated_length": 209.0, | |
| "completions/mean_length": 136.0, | |
| "completions/mean_terminated_length": 148.36363636363637, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 11.0, | |
| "epoch": 0.392, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.041289571672677994, | |
| "kl": 0.025019565597176552, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 1176936.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 539.0, | |
| "completions/max_terminated_length": 539.0, | |
| "completions/mean_length": 236.75, | |
| "completions/mean_terminated_length": 258.27272727272725, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 10.0, | |
| "epoch": 0.396, | |
| "format_failures": 0.0, | |
| "grad_norm": 0.029155507683753967, | |
| "kl": 0.03094907756894827, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 1194108.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08333333333333337, | |
| "completions/max_length": 31.0, | |
| "completions/max_terminated_length": 31.0, | |
| "completions/mean_length": 21.0, | |
| "completions/mean_terminated_length": 22.90909090909091, | |
| "completions/min_length": 0.0, | |
| "completions/min_terminated_length": 13.0, | |
| "epoch": 0.4, | |
| "format_failures": 0.0, | |
| "grad_norm": 5.876866340637207, | |
| "kl": 0.1100139394402504, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0323, | |
| "num_tokens": 1202412.0, | |
| "reward": 0.75, | |
| "reward_std": 0.45226702094078064, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 1202412, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |