| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9984, | |
| "eval_steps": 50, | |
| "global_step": 312, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.028125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1430.2, | |
| "completions/mean_length": 166.39462890625, | |
| "completions/mean_terminated_length": 126.74219970703125, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "epoch": 0.016, | |
| "grad_norm": 0.0278764758259058, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "loss": 0.0308, | |
| "num_tokens": 13404233.0, | |
| "reward": 0.435546875, | |
| "reward_std": 0.3221697866916656, | |
| "rewards/accuracy_reward": 0.2009765625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.6701171875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02431640625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1472.4, | |
| "completions/mean_length": 162.2275390625, | |
| "completions/mean_terminated_length": 128.01040344238282, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "epoch": 0.032, | |
| "grad_norm": 0.022988498210906982, | |
| "learning_rate": 6.249999999999999e-07, | |
| "loss": 0.038, | |
| "num_tokens": 27022115.0, | |
| "reward": 0.462451171875, | |
| "reward_std": 0.3004496514797211, | |
| "rewards/accuracy_reward": 0.19091796875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.733984375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00947265625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1380.6, | |
| "completions/mean_length": 117.775, | |
| "completions/mean_terminated_length": 104.22791290283203, | |
| "completions/min_length": 3.6, | |
| "completions/min_terminated_length": 3.6, | |
| "epoch": 0.048, | |
| "grad_norm": 0.025120964273810387, | |
| "learning_rate": 9.374999999999999e-07, | |
| "loss": 0.0318, | |
| "num_tokens": 40133187.0, | |
| "reward": 0.583056640625, | |
| "reward_std": 0.2110624998807907, | |
| "rewards/accuracy_reward": 0.24814453125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.91796875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0048828125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 1017.2, | |
| "completions/mean_length": 87.47548828125, | |
| "completions/mean_terminated_length": 80.37409057617188, | |
| "completions/min_length": 8.8, | |
| "completions/min_terminated_length": 8.8, | |
| "epoch": 0.064, | |
| "grad_norm": 0.00622530234977603, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0215, | |
| "num_tokens": 52803656.0, | |
| "reward": 0.6568359375, | |
| "reward_std": 0.15337491929531097, | |
| "rewards/accuracy_reward": 0.3328125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.980859375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0005859375, | |
| "completions/max_length": 1171.8, | |
| "completions/max_terminated_length": 593.8, | |
| "completions/mean_length": 73.31455078125, | |
| "completions/mean_terminated_length": 72.45734252929688, | |
| "completions/min_length": 13.6, | |
| "completions/min_terminated_length": 13.6, | |
| "epoch": 0.08, | |
| "grad_norm": 0.01711602509021759, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0012, | |
| "num_tokens": 65343869.0, | |
| "reward": 0.697216796875, | |
| "reward_std": 0.1176684021949768, | |
| "rewards/accuracy_reward": 0.39697265625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9974609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00029296875, | |
| "completions/max_length": 1257.4, | |
| "completions/max_terminated_length": 598.0, | |
| "completions/mean_length": 72.3849609375, | |
| "completions/mean_terminated_length": 71.9559326171875, | |
| "completions/min_length": 18.0, | |
| "completions/min_terminated_length": 18.0, | |
| "epoch": 0.096, | |
| "grad_norm": 0.0022409269586205482, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 77986019.0, | |
| "reward": 0.699267578125, | |
| "reward_std": 0.10926563590765, | |
| "rewards/accuracy_reward": 0.4005859375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99794921875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 1141.6, | |
| "completions/max_terminated_length": 687.8, | |
| "completions/mean_length": 75.8130859375, | |
| "completions/mean_terminated_length": 75.24237976074218, | |
| "completions/min_length": 19.8, | |
| "completions/min_terminated_length": 19.8, | |
| "epoch": 0.112, | |
| "grad_norm": 0.002861637622117996, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0027, | |
| "num_tokens": 90728137.0, | |
| "reward": 0.71552734375, | |
| "reward_std": 0.1081365168094635, | |
| "rewards/accuracy_reward": 0.43212890625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99892578125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00048828125, | |
| "completions/max_length": 1178.2, | |
| "completions/max_terminated_length": 563.4, | |
| "completions/mean_length": 79.030859375, | |
| "completions/mean_terminated_length": 78.31913757324219, | |
| "completions/min_length": 20.8, | |
| "completions/min_terminated_length": 20.8, | |
| "epoch": 0.128, | |
| "grad_norm": 0.0015531065873801708, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 103310405.0, | |
| "reward": 0.713525390625, | |
| "reward_std": 0.09464964717626571, | |
| "rewards/accuracy_reward": 0.4279296875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99912109375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00048828125, | |
| "completions/max_length": 1106.2, | |
| "completions/max_terminated_length": 417.8, | |
| "completions/mean_length": 77.76083984375, | |
| "completions/mean_terminated_length": 77.0487274169922, | |
| "completions/min_length": 17.6, | |
| "completions/min_terminated_length": 17.6, | |
| "epoch": 0.144, | |
| "grad_norm": 0.0022233380004763603, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 115913428.0, | |
| "reward": 0.76220703125, | |
| "reward_std": 0.09807199090719224, | |
| "rewards/accuracy_reward": 0.52548828125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99892578125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 853.4, | |
| "completions/max_terminated_length": 390.8, | |
| "completions/mean_length": 79.07109375, | |
| "completions/mean_terminated_length": 78.78670043945313, | |
| "completions/min_length": 23.6, | |
| "completions/min_terminated_length": 23.6, | |
| "epoch": 0.16, | |
| "grad_norm": 0.0017749707913026214, | |
| "learning_rate": 1e-06, | |
| "loss": 0.001, | |
| "num_tokens": 128600364.0, | |
| "reward": 0.7380859375, | |
| "reward_std": 0.09451625794172287, | |
| "rewards/accuracy_reward": 0.4765625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 232.5, | |
| "eval_completions/max_terminated_length": 232.5, | |
| "eval_completions/mean_length": 82.95326042175293, | |
| "eval_completions/mean_terminated_length": 82.95326042175293, | |
| "eval_completions/min_length": 28.25, | |
| "eval_completions/min_terminated_length": 28.25, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 128600364.0, | |
| "eval_reward": 0.69140625, | |
| "eval_reward_std": 0.24272222816944122, | |
| "eval_rewards/accuracy_reward": 0.3828125, | |
| "eval_rewards/brier_reward": 0.0, | |
| "eval_rewards/confidence_one_or_zero": 0.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 13.311, | |
| "eval_samples_per_second": 37.563, | |
| "eval_steps_per_second": 0.301, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00029296875, | |
| "completions/max_length": 834.6, | |
| "completions/max_terminated_length": 366.6, | |
| "completions/mean_length": 83.1912109375, | |
| "completions/mean_terminated_length": 82.76549530029297, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 0.176, | |
| "grad_norm": 0.0019512384897097945, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0016, | |
| "num_tokens": 141545682.0, | |
| "reward": 0.733056640625, | |
| "reward_std": 0.095227712392807, | |
| "rewards/accuracy_reward": 0.46650390625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00048828125, | |
| "completions/max_length": 1311.4, | |
| "completions/max_terminated_length": 485.4, | |
| "completions/mean_length": 86.99189453125, | |
| "completions/mean_terminated_length": 86.28425750732421, | |
| "completions/min_length": 22.6, | |
| "completions/min_terminated_length": 22.6, | |
| "epoch": 0.192, | |
| "grad_norm": 0.0016324262833222747, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0021, | |
| "num_tokens": 154107615.0, | |
| "reward": 0.74697265625, | |
| "reward_std": 0.08921304196119309, | |
| "rewards/accuracy_reward": 0.4947265625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99921875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 1086.2, | |
| "completions/max_terminated_length": 592.6, | |
| "completions/mean_length": 90.79443359375, | |
| "completions/mean_terminated_length": 90.22920989990234, | |
| "completions/min_length": 28.2, | |
| "completions/min_terminated_length": 28.2, | |
| "epoch": 0.208, | |
| "grad_norm": 0.0016499038320034742, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 166925894.0, | |
| "reward": 0.77353515625, | |
| "reward_std": 0.08650225400924683, | |
| "rewards/accuracy_reward": 0.54765625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9994140625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0009765625, | |
| "completions/max_length": 1338.6, | |
| "completions/max_terminated_length": 731.0, | |
| "completions/mean_length": 96.20087890625, | |
| "completions/mean_terminated_length": 94.7965301513672, | |
| "completions/min_length": 28.6, | |
| "completions/min_terminated_length": 28.6, | |
| "epoch": 0.224, | |
| "grad_norm": 0.001517058233730495, | |
| "learning_rate": 1e-06, | |
| "loss": 0.002, | |
| "num_tokens": 179920495.0, | |
| "reward": 0.74716796875, | |
| "reward_std": 0.08538677096366883, | |
| "rewards/accuracy_reward": 0.49560546875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99873046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00107421875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 574.4, | |
| "completions/mean_length": 97.76240234375, | |
| "completions/mean_terminated_length": 96.21627960205078, | |
| "completions/min_length": 25.4, | |
| "completions/min_terminated_length": 25.4, | |
| "epoch": 0.24, | |
| "grad_norm": 0.0017738911556079984, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0028, | |
| "num_tokens": 193029582.0, | |
| "reward": 0.77578125, | |
| "reward_std": 0.09508010596036912, | |
| "rewards/accuracy_reward": 0.55283203125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99873046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0009765625, | |
| "completions/max_length": 1319.4, | |
| "completions/max_terminated_length": 417.2, | |
| "completions/mean_length": 96.47294921875, | |
| "completions/mean_terminated_length": 95.06647644042968, | |
| "completions/min_length": 30.6, | |
| "completions/min_terminated_length": 30.6, | |
| "epoch": 0.256, | |
| "grad_norm": 0.0015991576947271824, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0029, | |
| "num_tokens": 205928601.0, | |
| "reward": 0.75869140625, | |
| "reward_std": 0.08824991285800934, | |
| "rewards/accuracy_reward": 0.518359375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00087890625, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 459.0, | |
| "completions/mean_length": 99.259765625, | |
| "completions/mean_terminated_length": 97.99633331298828, | |
| "completions/min_length": 31.0, | |
| "completions/min_terminated_length": 31.0, | |
| "epoch": 0.272, | |
| "grad_norm": 0.0014954438665881753, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0026, | |
| "num_tokens": 218767037.0, | |
| "reward": 0.7568359375, | |
| "reward_std": 0.08405127227306367, | |
| "rewards/accuracy_reward": 0.51484375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.998828125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00068359375, | |
| "completions/max_length": 1149.6, | |
| "completions/max_terminated_length": 682.4, | |
| "completions/mean_length": 94.7064453125, | |
| "completions/mean_terminated_length": 93.72140197753906, | |
| "completions/min_length": 32.2, | |
| "completions/min_terminated_length": 32.2, | |
| "epoch": 0.288, | |
| "grad_norm": 0.0017134748632088304, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0026, | |
| "num_tokens": 231551327.0, | |
| "reward": 0.7654296875, | |
| "reward_std": 0.08652912825345993, | |
| "rewards/accuracy_reward": 0.53154296875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99931640625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00078125, | |
| "completions/max_length": 1091.0, | |
| "completions/max_terminated_length": 413.2, | |
| "completions/mean_length": 94.5794921875, | |
| "completions/mean_terminated_length": 93.45206604003906, | |
| "completions/min_length": 34.4, | |
| "completions/min_terminated_length": 34.4, | |
| "epoch": 0.304, | |
| "grad_norm": 0.002114097587764263, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 244306093.0, | |
| "reward": 0.762939453125, | |
| "reward_std": 0.08613481372594833, | |
| "rewards/accuracy_reward": 0.52705078125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.998828125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0005859375, | |
| "completions/max_length": 1399.2, | |
| "completions/max_terminated_length": 458.8, | |
| "completions/mean_length": 91.70283203125, | |
| "completions/mean_terminated_length": 90.85653686523438, | |
| "completions/min_length": 33.8, | |
| "completions/min_terminated_length": 33.8, | |
| "epoch": 0.32, | |
| "grad_norm": 0.00154271034989506, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 257190154.0, | |
| "reward": 0.77060546875, | |
| "reward_std": 0.06635084152221679, | |
| "rewards/accuracy_reward": 0.5419921875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99921875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 232.5, | |
| "eval_completions/max_terminated_length": 232.5, | |
| "eval_completions/mean_length": 93.95743560791016, | |
| "eval_completions/mean_terminated_length": 93.95743560791016, | |
| "eval_completions/min_length": 41.75, | |
| "eval_completions/min_terminated_length": 41.75, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 257190154.0, | |
| "eval_reward": 0.712890625, | |
| "eval_reward_std": 0.2481144778430462, | |
| "eval_rewards/accuracy_reward": 0.42578125, | |
| "eval_rewards/brier_reward": 0.0, | |
| "eval_rewards/confidence_one_or_zero": 0.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 14.235, | |
| "eval_samples_per_second": 35.125, | |
| "eval_steps_per_second": 0.281, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00146484375, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 624.2, | |
| "completions/mean_length": 94.64306640625, | |
| "completions/mean_terminated_length": 92.5290313720703, | |
| "completions/min_length": 30.4, | |
| "completions/min_terminated_length": 30.4, | |
| "epoch": 0.336, | |
| "grad_norm": 0.001522217644378543, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0043, | |
| "num_tokens": 269738051.0, | |
| "reward": 0.77568359375, | |
| "reward_std": 0.07683707624673844, | |
| "rewards/accuracy_reward": 0.5529296875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9984375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.001171875, | |
| "completions/max_length": 1307.4, | |
| "completions/max_terminated_length": 435.8, | |
| "completions/mean_length": 94.1294921875, | |
| "completions/mean_terminated_length": 92.43776245117188, | |
| "completions/min_length": 21.8, | |
| "completions/min_terminated_length": 21.8, | |
| "epoch": 0.352, | |
| "grad_norm": 0.001526491018012166, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0039, | |
| "num_tokens": 282818673.0, | |
| "reward": 0.745751953125, | |
| "reward_std": 0.08200332224369049, | |
| "rewards/accuracy_reward": 0.49306640625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9984375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0013671875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 588.4, | |
| "completions/mean_length": 92.6953125, | |
| "completions/mean_terminated_length": 90.7186294555664, | |
| "completions/min_length": 30.6, | |
| "completions/min_terminated_length": 30.6, | |
| "epoch": 0.368, | |
| "grad_norm": 0.0013903952203691006, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0039, | |
| "num_tokens": 295689665.0, | |
| "reward": 0.754443359375, | |
| "reward_std": 0.07026491463184356, | |
| "rewards/accuracy_reward": 0.5103515625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99853515625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0009765625, | |
| "completions/max_length": 1296.6, | |
| "completions/max_terminated_length": 561.2, | |
| "completions/mean_length": 91.94736328125, | |
| "completions/mean_terminated_length": 90.53576354980468, | |
| "completions/min_length": 32.0, | |
| "completions/min_terminated_length": 32.0, | |
| "epoch": 0.384, | |
| "grad_norm": 0.0018187090754508972, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 308344038.0, | |
| "reward": 0.7708984375, | |
| "reward_std": 0.07117158472537995, | |
| "rewards/accuracy_reward": 0.54296875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.998828125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.001171875, | |
| "completions/max_length": 1305.0, | |
| "completions/max_terminated_length": 520.2, | |
| "completions/mean_length": 89.5787109375, | |
| "completions/mean_terminated_length": 87.88221435546875, | |
| "completions/min_length": 29.4, | |
| "completions/min_terminated_length": 29.4, | |
| "epoch": 0.4, | |
| "grad_norm": 0.0016340231522917747, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0038, | |
| "num_tokens": 321154092.0, | |
| "reward": 0.76103515625, | |
| "reward_std": 0.07767283618450165, | |
| "rewards/accuracy_reward": 0.52333984375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99873046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00068359375, | |
| "completions/max_length": 1244.4, | |
| "completions/max_terminated_length": 590.0, | |
| "completions/mean_length": 89.6955078125, | |
| "completions/mean_terminated_length": 88.70513153076172, | |
| "completions/min_length": 31.8, | |
| "completions/min_terminated_length": 31.8, | |
| "epoch": 0.416, | |
| "grad_norm": 0.0015821981942281127, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0019, | |
| "num_tokens": 333810078.0, | |
| "reward": 0.764208984375, | |
| "reward_std": 0.06756853386759758, | |
| "rewards/accuracy_reward": 0.5291015625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99931640625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00048828125, | |
| "completions/max_length": 1174.4, | |
| "completions/max_terminated_length": 537.0, | |
| "completions/mean_length": 90.36181640625, | |
| "completions/mean_terminated_length": 89.65568542480469, | |
| "completions/min_length": 35.0, | |
| "completions/min_terminated_length": 35.0, | |
| "epoch": 0.432, | |
| "grad_norm": 0.0014525202568620443, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0025, | |
| "num_tokens": 346606039.0, | |
| "reward": 0.7806640625, | |
| "reward_std": 0.06507465690374374, | |
| "rewards/accuracy_reward": 0.56181640625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99951171875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00068359375, | |
| "completions/max_length": 1301.0, | |
| "completions/max_terminated_length": 503.6, | |
| "completions/mean_length": 94.90302734375, | |
| "completions/mean_terminated_length": 93.91733856201172, | |
| "completions/min_length": 42.2, | |
| "completions/min_terminated_length": 42.2, | |
| "epoch": 0.448, | |
| "grad_norm": 0.0015080425655469298, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 359386966.0, | |
| "reward": 0.76650390625, | |
| "reward_std": 0.069513601064682, | |
| "rewards/accuracy_reward": 0.53369140625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99931640625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00185546875, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 541.6, | |
| "completions/mean_length": 99.85595703125, | |
| "completions/mean_terminated_length": 97.19031372070313, | |
| "completions/min_length": 40.8, | |
| "completions/min_terminated_length": 40.8, | |
| "epoch": 0.464, | |
| "grad_norm": 0.0010223939316347241, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0047, | |
| "num_tokens": 372436627.0, | |
| "reward": 0.7345703125, | |
| "reward_std": 0.05768234580755234, | |
| "rewards/accuracy_reward": 0.47109375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.998046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00068359375, | |
| "completions/max_length": 1303.2, | |
| "completions/max_terminated_length": 405.6, | |
| "completions/mean_length": 98.14833984375, | |
| "completions/mean_terminated_length": 97.16446533203126, | |
| "completions/min_length": 43.8, | |
| "completions/min_terminated_length": 43.8, | |
| "epoch": 0.48, | |
| "grad_norm": 0.0016905076336115599, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 385346018.0, | |
| "reward": 0.769873046875, | |
| "reward_std": 0.07736360728740692, | |
| "rewards/accuracy_reward": 0.5408203125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99892578125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_completions/clipped_ratio": 0.001953125, | |
| "eval_completions/max_length": 582.5, | |
| "eval_completions/max_terminated_length": 264.0, | |
| "eval_completions/mean_length": 101.61126136779785, | |
| "eval_completions/mean_terminated_length": 98.80445098876953, | |
| "eval_completions/min_length": 51.25, | |
| "eval_completions/min_terminated_length": 51.25, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 385346018.0, | |
| "eval_reward": 0.7265625, | |
| "eval_reward_std": 0.2514254078269005, | |
| "eval_rewards/accuracy_reward": 0.455078125, | |
| "eval_rewards/brier_reward": 0.0, | |
| "eval_rewards/confidence_one_or_zero": 0.0, | |
| "eval_rewards/format_reward": 0.998046875, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 24.3286, | |
| "eval_samples_per_second": 20.552, | |
| "eval_steps_per_second": 0.164, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00078125, | |
| "completions/max_length": 1536.0, | |
| "completions/max_terminated_length": 386.0, | |
| "completions/mean_length": 97.898046875, | |
| "completions/mean_terminated_length": 96.7735092163086, | |
| "completions/min_length": 40.2, | |
| "completions/min_terminated_length": 40.2, | |
| "epoch": 0.496, | |
| "grad_norm": 0.001528796274214983, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0029, | |
| "num_tokens": 398512654.0, | |
| "reward": 0.7744140625, | |
| "reward_std": 0.0720748171210289, | |
| "rewards/accuracy_reward": 0.549609375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99921875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00048828125, | |
| "completions/max_length": 1317.0, | |
| "completions/max_terminated_length": 399.8, | |
| "completions/mean_length": 95.37158203125, | |
| "completions/mean_terminated_length": 94.66787719726562, | |
| "completions/min_length": 40.8, | |
| "completions/min_terminated_length": 40.8, | |
| "epoch": 0.512, | |
| "grad_norm": 0.00132983538787812, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0025, | |
| "num_tokens": 411491243.0, | |
| "reward": 0.779931640625, | |
| "reward_std": 0.06670133695006371, | |
| "rewards/accuracy_reward": 0.56044921875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9994140625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0009765625, | |
| "completions/max_length": 1341.4, | |
| "completions/max_terminated_length": 662.8, | |
| "completions/mean_length": 96.72470703125, | |
| "completions/mean_terminated_length": 95.31717834472656, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 0.528, | |
| "grad_norm": 0.001326797646470368, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0037, | |
| "num_tokens": 424367560.0, | |
| "reward": 0.77578125, | |
| "reward_std": 0.06582950651645661, | |
| "rewards/accuracy_reward": 0.5525390625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 699.4, | |
| "completions/max_terminated_length": 456.6, | |
| "completions/mean_length": 93.36513671875, | |
| "completions/mean_terminated_length": 93.0830810546875, | |
| "completions/min_length": 34.2, | |
| "completions/min_terminated_length": 34.2, | |
| "epoch": 0.544, | |
| "grad_norm": 0.0017570438794791698, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 437343523.0, | |
| "reward": 0.796533203125, | |
| "reward_std": 0.07356481105089188, | |
| "rewards/accuracy_reward": 0.593359375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99970703125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 444.8, | |
| "completions/max_terminated_length": 444.8, | |
| "completions/mean_length": 91.45390625, | |
| "completions/mean_terminated_length": 91.45390625, | |
| "completions/min_length": 41.8, | |
| "completions/min_terminated_length": 41.8, | |
| "epoch": 0.56, | |
| "grad_norm": 0.0014896654756739736, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 449957739.0, | |
| "reward": 0.76611328125, | |
| "reward_std": 0.060523012280464174, | |
| "rewards/accuracy_reward": 0.53232421875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 1289.4, | |
| "completions/max_terminated_length": 382.2, | |
| "completions/mean_length": 90.7419921875, | |
| "completions/mean_terminated_length": 90.17736206054687, | |
| "completions/min_length": 43.0, | |
| "completions/min_terminated_length": 43.0, | |
| "epoch": 0.576, | |
| "grad_norm": 0.0016016842564567924, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 462929881.0, | |
| "reward": 0.764111328125, | |
| "reward_std": 0.05608753189444542, | |
| "rewards/accuracy_reward": 0.52861328125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 603.8, | |
| "completions/max_terminated_length": 356.0, | |
| "completions/mean_length": 91.03701171875, | |
| "completions/mean_terminated_length": 90.8958251953125, | |
| "completions/min_length": 42.6, | |
| "completions/min_terminated_length": 42.6, | |
| "epoch": 0.592, | |
| "grad_norm": 0.0013300231657922268, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0014, | |
| "num_tokens": 475886132.0, | |
| "reward": 0.763671875, | |
| "reward_std": 0.06121245920658112, | |
| "rewards/accuracy_reward": 0.52744140625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00029296875, | |
| "completions/max_length": 831.0, | |
| "completions/max_terminated_length": 363.8, | |
| "completions/mean_length": 94.77607421875, | |
| "completions/mean_terminated_length": 94.35356140136719, | |
| "completions/min_length": 42.6, | |
| "completions/min_terminated_length": 42.6, | |
| "epoch": 0.608, | |
| "grad_norm": 0.001449022558517754, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0015, | |
| "num_tokens": 488712447.0, | |
| "reward": 0.776220703125, | |
| "reward_std": 0.055175574868917464, | |
| "rewards/accuracy_reward": 0.55283203125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 719.8, | |
| "completions/max_terminated_length": 511.4, | |
| "completions/mean_length": 95.49326171875, | |
| "completions/mean_terminated_length": 95.3530044555664, | |
| "completions/min_length": 42.2, | |
| "completions/min_terminated_length": 42.2, | |
| "epoch": 0.624, | |
| "grad_norm": 0.0016013348940759897, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 501890522.0, | |
| "reward": 0.770703125, | |
| "reward_std": 0.06375713348388672, | |
| "rewards/accuracy_reward": 0.54150390625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.001171875, | |
| "completions/max_length": 1310.4, | |
| "completions/max_terminated_length": 450.2, | |
| "completions/mean_length": 97.82939453125, | |
| "completions/mean_terminated_length": 96.14148406982422, | |
| "completions/min_length": 43.8, | |
| "completions/min_terminated_length": 43.8, | |
| "epoch": 0.64, | |
| "grad_norm": 0.00144854630343616, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 515091303.0, | |
| "reward": 0.79462890625, | |
| "reward_std": 0.05500866025686264, | |
| "rewards/accuracy_reward": 0.5904296875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.998828125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 226.5, | |
| "eval_completions/max_terminated_length": 226.5, | |
| "eval_completions/mean_length": 97.45433807373047, | |
| "eval_completions/mean_terminated_length": 97.45433807373047, | |
| "eval_completions/min_length": 48.5, | |
| "eval_completions/min_terminated_length": 48.5, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 515091303.0, | |
| "eval_reward": 0.7216796875, | |
| "eval_reward_std": 0.24690637737512589, | |
| "eval_rewards/accuracy_reward": 0.443359375, | |
| "eval_rewards/brier_reward": 0.0, | |
| "eval_rewards/confidence_one_or_zero": 0.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 14.5533, | |
| "eval_samples_per_second": 34.357, | |
| "eval_steps_per_second": 0.275, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 809.0, | |
| "completions/max_terminated_length": 348.2, | |
| "completions/mean_length": 95.0333984375, | |
| "completions/mean_terminated_length": 94.75228271484374, | |
| "completions/min_length": 42.4, | |
| "completions/min_terminated_length": 42.4, | |
| "epoch": 0.656, | |
| "grad_norm": 0.0017163316952064633, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0013, | |
| "num_tokens": 527777309.0, | |
| "reward": 0.75673828125, | |
| "reward_std": 0.060856021195650103, | |
| "rewards/accuracy_reward": 0.513671875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9998046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 828.2, | |
| "completions/max_terminated_length": 388.2, | |
| "completions/mean_length": 95.4216796875, | |
| "completions/mean_terminated_length": 94.85912628173828, | |
| "completions/min_length": 41.4, | |
| "completions/min_terminated_length": 41.4, | |
| "epoch": 0.672, | |
| "grad_norm": 0.0013491360004991293, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0013, | |
| "num_tokens": 540524187.0, | |
| "reward": 0.768115234375, | |
| "reward_std": 0.05722193792462349, | |
| "rewards/accuracy_reward": 0.53662109375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 1050.6, | |
| "completions/max_terminated_length": 373.2, | |
| "completions/mean_length": 95.3826171875, | |
| "completions/mean_terminated_length": 94.82014465332031, | |
| "completions/min_length": 43.8, | |
| "completions/min_terminated_length": 43.8, | |
| "epoch": 0.688, | |
| "grad_norm": 0.0017077566590160131, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0011, | |
| "num_tokens": 553311145.0, | |
| "reward": 0.779443359375, | |
| "reward_std": 0.06311368122696877, | |
| "rewards/accuracy_reward": 0.55927734375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 810.0, | |
| "completions/max_terminated_length": 377.8, | |
| "completions/mean_length": 90.67568359375, | |
| "completions/mean_terminated_length": 90.11159210205078, | |
| "completions/min_length": 42.6, | |
| "completions/min_terminated_length": 42.6, | |
| "epoch": 0.704, | |
| "grad_norm": 0.0018953669350594282, | |
| "learning_rate": 1e-06, | |
| "loss": 0.001, | |
| "num_tokens": 565962128.0, | |
| "reward": 0.78388671875, | |
| "reward_std": 0.05257489308714867, | |
| "rewards/accuracy_reward": 0.5681640625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0005859375, | |
| "completions/max_length": 1065.6, | |
| "completions/max_terminated_length": 403.6, | |
| "completions/mean_length": 91.39638671875, | |
| "completions/mean_terminated_length": 90.54814147949219, | |
| "completions/min_length": 43.6, | |
| "completions/min_terminated_length": 43.6, | |
| "epoch": 0.72, | |
| "grad_norm": 0.0019243984716013074, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0025, | |
| "num_tokens": 578764203.0, | |
| "reward": 0.790576171875, | |
| "reward_std": 0.05948638021945953, | |
| "rewards/accuracy_reward": 0.58173828125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9994140625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00078125, | |
| "completions/max_length": 1299.8, | |
| "completions/max_terminated_length": 342.0, | |
| "completions/mean_length": 91.9408203125, | |
| "completions/mean_terminated_length": 90.81210021972656, | |
| "completions/min_length": 45.0, | |
| "completions/min_terminated_length": 45.0, | |
| "epoch": 0.736, | |
| "grad_norm": 0.0013582052197307348, | |
| "learning_rate": 1e-06, | |
| "loss": 0.002, | |
| "num_tokens": 591501581.0, | |
| "reward": 0.79150390625, | |
| "reward_std": 0.05623424053192139, | |
| "rewards/accuracy_reward": 0.5837890625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99921875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 565.0, | |
| "completions/max_terminated_length": 327.2, | |
| "completions/mean_length": 91.51416015625, | |
| "completions/mean_terminated_length": 91.23197326660156, | |
| "completions/min_length": 43.8, | |
| "completions/min_terminated_length": 43.8, | |
| "epoch": 0.752, | |
| "grad_norm": 0.0015649450942873955, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 604522206.0, | |
| "reward": 0.7857421875, | |
| "reward_std": 0.05413587838411331, | |
| "rewards/accuracy_reward": 0.5716796875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9998046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 646.2, | |
| "completions/max_terminated_length": 607.4, | |
| "completions/mean_length": 94.9529296875, | |
| "completions/mean_terminated_length": 94.39071350097656, | |
| "completions/min_length": 42.4, | |
| "completions/min_terminated_length": 42.4, | |
| "epoch": 0.768, | |
| "grad_norm": 0.0017648260109126568, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 617283548.0, | |
| "reward": 0.761279296875, | |
| "reward_std": 0.057571640610694884, | |
| "rewards/accuracy_reward": 0.52294921875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 413.6, | |
| "completions/max_terminated_length": 413.6, | |
| "completions/mean_length": 93.616015625, | |
| "completions/mean_terminated_length": 93.616015625, | |
| "completions/min_length": 43.8, | |
| "completions/min_terminated_length": 43.8, | |
| "epoch": 0.784, | |
| "grad_norm": 0.0011762650683522224, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 630272864.0, | |
| "reward": 0.7892578125, | |
| "reward_std": 0.05626345500349998, | |
| "rewards/accuracy_reward": 0.578515625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 326.8, | |
| "completions/max_terminated_length": 326.8, | |
| "completions/mean_length": 92.14990234375, | |
| "completions/mean_terminated_length": 92.14990234375, | |
| "completions/min_length": 44.6, | |
| "completions/min_terminated_length": 44.6, | |
| "epoch": 0.8, | |
| "grad_norm": 0.0013073732843622565, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 643083359.0, | |
| "reward": 0.804052734375, | |
| "reward_std": 0.05071377567946911, | |
| "rewards/accuracy_reward": 0.60810546875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 236.75, | |
| "eval_completions/max_terminated_length": 236.75, | |
| "eval_completions/mean_length": 91.57341003417969, | |
| "eval_completions/mean_terminated_length": 91.57341003417969, | |
| "eval_completions/min_length": 49.5, | |
| "eval_completions/min_terminated_length": 49.5, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 643083359.0, | |
| "eval_reward": 0.7265625, | |
| "eval_reward_std": 0.24742106348276138, | |
| "eval_rewards/accuracy_reward": 0.453125, | |
| "eval_rewards/brier_reward": 0.0, | |
| "eval_rewards/confidence_one_or_zero": 0.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 14.2768, | |
| "eval_samples_per_second": 35.022, | |
| "eval_steps_per_second": 0.28, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 588.8, | |
| "completions/max_terminated_length": 456.2, | |
| "completions/mean_length": 89.20595703125, | |
| "completions/mean_terminated_length": 88.92289428710937, | |
| "completions/min_length": 43.2, | |
| "completions/min_terminated_length": 43.2, | |
| "epoch": 0.816, | |
| "grad_norm": 0.0019169868901371956, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 655952316.0, | |
| "reward": 0.805029296875, | |
| "reward_std": 0.054716046899557114, | |
| "rewards/accuracy_reward": 0.61025390625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9998046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 462.6, | |
| "completions/max_terminated_length": 462.6, | |
| "completions/mean_length": 91.78896484375, | |
| "completions/mean_terminated_length": 91.78896484375, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 0.832, | |
| "grad_norm": 0.0014281836338341236, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 668756907.0, | |
| "reward": 0.790087890625, | |
| "reward_std": 0.0539084292948246, | |
| "rewards/accuracy_reward": 0.58017578125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 663.6, | |
| "completions/max_terminated_length": 417.8, | |
| "completions/mean_length": 91.91787109375, | |
| "completions/mean_terminated_length": 91.77665557861329, | |
| "completions/min_length": 45.4, | |
| "completions/min_terminated_length": 45.4, | |
| "epoch": 0.848, | |
| "grad_norm": 0.001417965511791408, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 681568834.0, | |
| "reward": 0.77236328125, | |
| "reward_std": 0.0536438025534153, | |
| "rewards/accuracy_reward": 0.54482421875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.000390625, | |
| "completions/max_length": 1059.4, | |
| "completions/max_terminated_length": 416.2, | |
| "completions/mean_length": 94.8748046875, | |
| "completions/mean_terminated_length": 94.31214294433593, | |
| "completions/min_length": 42.8, | |
| "completions/min_terminated_length": 42.8, | |
| "epoch": 0.864, | |
| "grad_norm": 0.0025267351884394884, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0014, | |
| "num_tokens": 694383488.0, | |
| "reward": 0.809716796875, | |
| "reward_std": 0.05303701683878899, | |
| "rewards/accuracy_reward": 0.61982421875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999609375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 592.0, | |
| "completions/max_terminated_length": 383.0, | |
| "completions/mean_length": 94.92939453125, | |
| "completions/mean_terminated_length": 94.78859100341796, | |
| "completions/min_length": 45.4, | |
| "completions/min_terminated_length": 45.4, | |
| "epoch": 0.88, | |
| "grad_norm": 0.001522132777608931, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0011, | |
| "num_tokens": 707358957.0, | |
| "reward": 0.760546875, | |
| "reward_std": 0.055512601137161256, | |
| "rewards/accuracy_reward": 0.52119140625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 572.6, | |
| "completions/max_terminated_length": 337.0, | |
| "completions/mean_length": 97.50546875, | |
| "completions/mean_terminated_length": 97.36535034179687, | |
| "completions/min_length": 44.0, | |
| "completions/min_terminated_length": 44.0, | |
| "epoch": 0.896, | |
| "grad_norm": 0.0011945873266085982, | |
| "learning_rate": 1e-06, | |
| "loss": 0.001, | |
| "num_tokens": 720324581.0, | |
| "reward": 0.784912109375, | |
| "reward_std": 0.04334753602743149, | |
| "rewards/accuracy_reward": 0.569921875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 960.6, | |
| "completions/max_terminated_length": 469.8, | |
| "completions/mean_length": 97.699609375, | |
| "completions/mean_terminated_length": 97.41859893798828, | |
| "completions/min_length": 42.0, | |
| "completions/min_terminated_length": 42.0, | |
| "epoch": 0.912, | |
| "grad_norm": 0.0012196388561278582, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 733232641.0, | |
| "reward": 0.781201171875, | |
| "reward_std": 0.0503702849149704, | |
| "rewards/accuracy_reward": 0.56259765625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9998046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 527.2, | |
| "completions/max_terminated_length": 509.0, | |
| "completions/mean_length": 94.194921875, | |
| "completions/mean_terminated_length": 94.05436248779297, | |
| "completions/min_length": 39.6, | |
| "completions/min_terminated_length": 39.6, | |
| "epoch": 0.928, | |
| "grad_norm": 0.0010627944720909, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 746080333.0, | |
| "reward": 0.77783203125, | |
| "reward_std": 0.0477489285171032, | |
| "rewards/accuracy_reward": 0.55576171875, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 830.8, | |
| "completions/max_terminated_length": 384.8, | |
| "completions/mean_length": 96.19130859375, | |
| "completions/mean_terminated_length": 95.91045227050782, | |
| "completions/min_length": 41.0, | |
| "completions/min_terminated_length": 41.0, | |
| "epoch": 0.944, | |
| "grad_norm": 0.0012808856554329395, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0014, | |
| "num_tokens": 758897076.0, | |
| "reward": 0.774853515625, | |
| "reward_std": 0.06085398942232132, | |
| "rewards/accuracy_reward": 0.54990234375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9998046875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 618.6, | |
| "completions/max_terminated_length": 385.6, | |
| "completions/mean_length": 95.019921875, | |
| "completions/mean_terminated_length": 94.87908020019532, | |
| "completions/min_length": 41.6, | |
| "completions/min_terminated_length": 41.6, | |
| "epoch": 0.96, | |
| "grad_norm": 0.001602579141035676, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 771666720.0, | |
| "reward": 0.77060546875, | |
| "reward_std": 0.047741709649562834, | |
| "rewards/accuracy_reward": 0.54130859375, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 212.5, | |
| "eval_completions/max_terminated_length": 212.5, | |
| "eval_completions/mean_length": 94.41190719604492, | |
| "eval_completions/mean_terminated_length": 94.41190719604492, | |
| "eval_completions/min_length": 51.0, | |
| "eval_completions/min_terminated_length": 51.0, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 771666720.0, | |
| "eval_reward": 0.7158203125, | |
| "eval_reward_std": 0.24653732031583786, | |
| "eval_rewards/accuracy_reward": 0.431640625, | |
| "eval_rewards/brier_reward": 0.0, | |
| "eval_rewards/confidence_one_or_zero": 0.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 13.6738, | |
| "eval_samples_per_second": 36.566, | |
| "eval_steps_per_second": 0.293, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0005859375, | |
| "completions/max_length": 1112.6, | |
| "completions/max_terminated_length": 416.8, | |
| "completions/mean_length": 95.6703125, | |
| "completions/mean_terminated_length": 94.82695617675782, | |
| "completions/min_length": 39.8, | |
| "completions/min_terminated_length": 39.8, | |
| "epoch": 0.976, | |
| "grad_norm": 0.0012756388168781996, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 784363824.0, | |
| "reward": 0.789599609375, | |
| "reward_std": 0.05751314386725426, | |
| "rewards/accuracy_reward": 0.57978515625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.9994140625, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00029296875, | |
| "completions/max_length": 813.6, | |
| "completions/max_terminated_length": 335.2, | |
| "completions/mean_length": 93.523046875, | |
| "completions/mean_terminated_length": 93.10098266601562, | |
| "completions/min_length": 41.8, | |
| "completions/min_terminated_length": 41.8, | |
| "epoch": 0.992, | |
| "grad_norm": 0.0012513543479144573, | |
| "learning_rate": 1e-06, | |
| "loss": 0.001, | |
| "num_tokens": 797306300.0, | |
| "reward": 0.778857421875, | |
| "reward_std": 0.04584160037338734, | |
| "rewards/accuracy_reward": 0.5580078125, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.99970703125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 341.0, | |
| "completions/max_terminated_length": 341.0, | |
| "completions/mean_length": 92.21929550170898, | |
| "completions/mean_terminated_length": 92.21929550170898, | |
| "completions/min_length": 40.5, | |
| "completions/min_terminated_length": 40.5, | |
| "epoch": 0.9984, | |
| "num_tokens": 802441496.0, | |
| "reward": 0.784912109375, | |
| "reward_std": 0.055518221110105515, | |
| "rewards/accuracy_reward": 0.570556640625, | |
| "rewards/brier_reward": 0.0, | |
| "rewards/confidence_one_or_zero": 0.0, | |
| "rewards/format_reward": 0.999267578125, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 312, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0036987085283889873, | |
| "train_runtime": 71191.4821, | |
| "train_samples_per_second": 0.281, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 312, | |
| "num_input_tokens_seen": 802441496, | |
| "num_train_epochs": 1, | |
| "save_steps": 60, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |