| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996190476190476, | |
| "eval_steps": 50, | |
| "global_step": 164, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.77080078125, | |
| "completions/max_length": 128.0, | |
| "completions/max_terminated_length": 128.0, | |
| "completions/mean_length": 116.9482421875, | |
| "completions/mean_terminated_length": 79.82200317382812, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "epoch": 0.030476190476190476, | |
| "grad_norm": 0.2547481060028076, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 0.1961, | |
| "num_tokens": 3786478.0, | |
| "reward": 0.8699400663375855, | |
| "reward_std": 1.7978902339935303, | |
| "rewards/accuracy_reward": 0.0283203125, | |
| "rewards/brier_reward": 0.12994426488876343, | |
| "rewards/confidence_one_or_zero": 0.1083984375, | |
| "rewards/format_reward": 0.16884765625, | |
| "rewards/log_2_reward": 1.5427121639251709, | |
| "rewards/mean_confidence_reward": 0.10250047892332077, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.44853515625, | |
| "completions/max_length": 128.0, | |
| "completions/max_terminated_length": 128.0, | |
| "completions/mean_length": 98.93623046875, | |
| "completions/mean_terminated_length": 76.31569213867188, | |
| "completions/min_length": 6.4, | |
| "completions/min_terminated_length": 6.4, | |
| "epoch": 0.06095238095238095, | |
| "grad_norm": 0.9249232411384583, | |
| "learning_rate": 1e-06, | |
| "loss": 0.3834, | |
| "num_tokens": 7388449.0, | |
| "reward": 2.7091397285461425, | |
| "reward_std": 2.2789249181747437, | |
| "rewards/accuracy_reward": 0.107421875, | |
| "rewards/brier_reward": 0.3979889452457428, | |
| "rewards/confidence_one_or_zero": 0.21123046875, | |
| "rewards/format_reward": 0.52158203125, | |
| "rewards/log_2_reward": 4.789275646209717, | |
| "rewards/mean_confidence_reward": 0.254657456278801, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04111328125, | |
| "completions/max_length": 128.0, | |
| "completions/max_terminated_length": 126.8, | |
| "completions/mean_length": 64.40888671875, | |
| "completions/mean_terminated_length": 61.81023941040039, | |
| "completions/min_length": 16.6, | |
| "completions/min_terminated_length": 16.6, | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 0.09482545405626297, | |
| "learning_rate": 1e-06, | |
| "loss": 0.115, | |
| "num_tokens": 10635996.0, | |
| "reward": 4.9152580261230465, | |
| "reward_std": 1.1983887672424316, | |
| "rewards/accuracy_reward": 0.1896484375, | |
| "rewards/brier_reward": 0.7029260516166687, | |
| "rewards/confidence_one_or_zero": 0.308984375, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/log_2_reward": 8.69164867401123, | |
| "rewards/mean_confidence_reward": 0.43842514157295226, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0001953125, | |
| "completions/max_length": 113.0, | |
| "completions/max_terminated_length": 100.2, | |
| "completions/mean_length": 43.7119140625, | |
| "completions/mean_terminated_length": 43.69547882080078, | |
| "completions/min_length": 18.8, | |
| "completions/min_terminated_length": 18.8, | |
| "epoch": 0.1219047619047619, | |
| "grad_norm": 0.00744120217859745, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0043, | |
| "num_tokens": 13670230.0, | |
| "reward": 5.309306526184082, | |
| "reward_std": 0.32826481461524964, | |
| "rewards/accuracy_reward": 0.18349609375, | |
| "rewards/brier_reward": 0.8068550109863282, | |
| "rewards/confidence_one_or_zero": 0.39267578125, | |
| "rewards/format_reward": 0.99794921875, | |
| "rewards/log_2_reward": 9.437167739868164, | |
| "rewards/mean_confidence_reward": 0.35327613949775694, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 9.765625e-05, | |
| "completions/max_length": 106.8, | |
| "completions/max_terminated_length": 98.0, | |
| "completions/mean_length": 42.294140625, | |
| "completions/mean_terminated_length": 42.28580551147461, | |
| "completions/min_length": 21.8, | |
| "completions/min_terminated_length": 21.8, | |
| "epoch": 0.1523809523809524, | |
| "grad_norm": 0.0060423314571380615, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 16692602.0, | |
| "reward": 5.426451015472412, | |
| "reward_std": 0.18737927973270416, | |
| "rewards/accuracy_reward": 0.16845703125, | |
| "rewards/brier_reward": 0.8912161231040955, | |
| "rewards/confidence_one_or_zero": 0.5732421875, | |
| "rewards/format_reward": 0.998828125, | |
| "rewards/log_2_reward": 9.68561668395996, | |
| "rewards/mean_confidence_reward": 0.21070439517498016, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 76.4, | |
| "completions/max_terminated_length": 76.4, | |
| "completions/mean_length": 41.1986328125, | |
| "completions/mean_terminated_length": 41.1986328125, | |
| "completions/min_length": 23.2, | |
| "completions/min_terminated_length": 23.2, | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 0.004254295490682125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 19701580.0, | |
| "reward": 5.47640905380249, | |
| "reward_std": 0.0742596685886383, | |
| "rewards/accuracy_reward": 0.05986328125, | |
| "rewards/brier_reward": 0.9633856773376465, | |
| "rewards/confidence_one_or_zero": 0.86806640625, | |
| "rewards/format_reward": 0.99921875, | |
| "rewards/log_2_reward": 9.893736457824707, | |
| "rewards/mean_confidence_reward": 0.05211978480219841, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 71.0, | |
| "completions/max_terminated_length": 71.0, | |
| "completions/mean_length": 39.89755859375, | |
| "completions/mean_terminated_length": 39.89755859375, | |
| "completions/min_length": 23.8, | |
| "completions/min_terminated_length": 23.8, | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 5.5466916819568723e-05, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 22695315.0, | |
| "reward": 5.49622688293457, | |
| "reward_std": 0.015401666914112867, | |
| "rewards/accuracy_reward": 0.0021484375, | |
| "rewards/brier_reward": 0.9974494576454163, | |
| "rewards/confidence_one_or_zero": 0.98984375, | |
| "rewards/format_reward": 0.99970703125, | |
| "rewards/log_2_reward": 9.990598487854005, | |
| "rewards/mean_confidence_reward": 0.0037255858958815226, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 62.0, | |
| "completions/max_terminated_length": 62.0, | |
| "completions/mean_length": 39.62392578125, | |
| "completions/mean_terminated_length": 39.62392578125, | |
| "completions/min_length": 24.8, | |
| "completions/min_terminated_length": 24.8, | |
| "epoch": 0.2438095238095238, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 25691976.0, | |
| "reward": 5.499967384338379, | |
| "reward_std": 0.00018490625079721213, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.9999882698059082, | |
| "rewards/confidence_one_or_zero": 0.99970703125, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 9.999934768676757, | |
| "rewards/mean_confidence_reward": 5.859375087311491e-05, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.0, | |
| "completions/max_terminated_length": 57.0, | |
| "completions/mean_length": 39.17685546875, | |
| "completions/mean_terminated_length": 39.17685546875, | |
| "completions/min_length": 25.2, | |
| "completions/min_terminated_length": 25.2, | |
| "epoch": 0.2742857142857143, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 28683067.0, | |
| "reward": 5.499900531768799, | |
| "reward_std": 0.0005625242134556174, | |
| "rewards/accuracy_reward": 9.765625e-05, | |
| "rewards/brier_reward": 0.9999013662338256, | |
| "rewards/confidence_one_or_zero": 0.99970703125, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 9.999703407287598, | |
| "rewards/mean_confidence_reward": 6.83593774738256e-05, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 55.4, | |
| "completions/max_terminated_length": 55.4, | |
| "completions/mean_length": 38.91845703125, | |
| "completions/mean_terminated_length": 38.91845703125, | |
| "completions/min_length": 25.4, | |
| "completions/min_terminated_length": 25.4, | |
| "epoch": 0.3047619047619048, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 31665112.0, | |
| "reward": 5.499433135986328, | |
| "reward_std": 0.0030375825241208076, | |
| "rewards/accuracy_reward": 9.765625e-05, | |
| "rewards/brier_reward": 0.9998398423194885, | |
| "rewards/confidence_one_or_zero": 0.99990234375, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/log_2_reward": 9.998866271972656, | |
| "rewards/mean_confidence_reward": 1.9531250291038305e-05, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3047619047619048, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 46.5, | |
| "eval_completions/max_terminated_length": 46.5, | |
| "eval_completions/mean_length": 38.41298484802246, | |
| "eval_completions/mean_terminated_length": 38.41298484802246, | |
| "eval_completions/min_length": 26.5, | |
| "eval_completions/min_terminated_length": 26.5, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 31665112.0, | |
| "eval_reward": 5.5, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward": 0.0, | |
| "eval_rewards/brier_reward": 1.0, | |
| "eval_rewards/confidence_one_or_zero": 1.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/log_2_reward": 10.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 8.5224, | |
| "eval_samples_per_second": 58.669, | |
| "eval_steps_per_second": 0.469, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.8, | |
| "completions/max_terminated_length": 57.8, | |
| "completions/mean_length": 38.93466796875, | |
| "completions/mean_terminated_length": 38.93466796875, | |
| "completions/min_length": 25.4, | |
| "completions/min_terminated_length": 25.4, | |
| "epoch": 0.3352380952380952, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 34651099.0, | |
| "reward": 5.4989158630371096, | |
| "reward_std": 0.006133038754342124, | |
| "rewards/accuracy_reward": 9.765625e-05, | |
| "rewards/brier_reward": 0.9997568368911743, | |
| "rewards/confidence_one_or_zero": 0.9998046875, | |
| "rewards/format_reward": 0.9998046875, | |
| "rewards/log_2_reward": 9.99792938232422, | |
| "rewards/mean_confidence_reward": 2.9296876164153218e-05, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 58.4, | |
| "completions/max_terminated_length": 58.4, | |
| "completions/mean_length": 39.1458984375, | |
| "completions/mean_terminated_length": 39.1458984375, | |
| "completions/min_length": 25.6, | |
| "completions/min_terminated_length": 25.6, | |
| "epoch": 0.3657142857142857, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 37641969.0, | |
| "reward": 5.499462890625, | |
| "reward_std": 0.003038349375128746, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.99990234375, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/log_2_reward": 9.9990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 56.6, | |
| "completions/max_terminated_length": 56.6, | |
| "completions/mean_length": 39.14443359375, | |
| "completions/mean_terminated_length": 39.14443359375, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.3961904761904762, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0, | |
| "num_tokens": 40631320.0, | |
| "reward": 5.499462890625, | |
| "reward_std": 0.003038349375128746, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.99990234375, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/log_2_reward": 9.9990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 58.0, | |
| "completions/max_terminated_length": 58.0, | |
| "completions/mean_length": 39.17412109375, | |
| "completions/mean_terminated_length": 39.17412109375, | |
| "completions/min_length": 24.6, | |
| "completions/min_terminated_length": 24.6, | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 43622127.0, | |
| "reward": 5.499462890625, | |
| "reward_std": 0.003038349375128746, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.99990234375, | |
| "rewards/confidence_one_or_zero": 0.99990234375, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/log_2_reward": 9.9990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 60.4, | |
| "completions/max_terminated_length": 60.4, | |
| "completions/mean_length": 38.94501953125, | |
| "completions/mean_terminated_length": 38.94501953125, | |
| "completions/min_length": 25.2, | |
| "completions/min_terminated_length": 25.2, | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 46606236.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.2, | |
| "completions/max_terminated_length": 57.2, | |
| "completions/mean_length": 39.04951171875, | |
| "completions/mean_terminated_length": 39.04951171875, | |
| "completions/min_length": 25.4, | |
| "completions/min_terminated_length": 25.4, | |
| "epoch": 0.4876190476190476, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 49590423.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 60.2, | |
| "completions/max_terminated_length": 60.2, | |
| "completions/mean_length": 39.11123046875, | |
| "completions/mean_terminated_length": 39.11123046875, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.518095238095238, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 52576458.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 58.0, | |
| "completions/max_terminated_length": 58.0, | |
| "completions/mean_length": 39.10869140625, | |
| "completions/mean_terminated_length": 39.10869140625, | |
| "completions/min_length": 25.6, | |
| "completions/min_terminated_length": 25.6, | |
| "epoch": 0.5485714285714286, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 55563779.0, | |
| "reward": 5.499970245361328, | |
| "reward_std": 0.00016833491390570997, | |
| "rewards/accuracy_reward": 9.765625e-05, | |
| "rewards/brier_reward": 0.9999374985694885, | |
| "rewards/confidence_one_or_zero": 0.99990234375, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 9.999842834472656, | |
| "rewards/mean_confidence_reward": 1.9531250291038305e-05, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 55.2, | |
| "completions/max_terminated_length": 55.2, | |
| "completions/mean_length": 39.05234375, | |
| "completions/mean_terminated_length": 39.05234375, | |
| "completions/min_length": 26.2, | |
| "completions/min_terminated_length": 26.2, | |
| "epoch": 0.579047619047619, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 58549307.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.6, | |
| "completions/max_terminated_length": 57.6, | |
| "completions/mean_length": 39.0751953125, | |
| "completions/mean_terminated_length": 39.0751953125, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.6095238095238096, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 61537405.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6095238095238096, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 53.25, | |
| "eval_completions/max_terminated_length": 53.25, | |
| "eval_completions/mean_length": 39.020877838134766, | |
| "eval_completions/mean_terminated_length": 39.020877838134766, | |
| "eval_completions/min_length": 29.0, | |
| "eval_completions/min_terminated_length": 29.0, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 61537405.0, | |
| "eval_reward": 5.5, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward": 0.0, | |
| "eval_rewards/brier_reward": 1.0, | |
| "eval_rewards/confidence_one_or_zero": 1.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/log_2_reward": 10.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 8.25, | |
| "eval_samples_per_second": 60.606, | |
| "eval_steps_per_second": 0.485, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 58.0, | |
| "completions/max_terminated_length": 58.0, | |
| "completions/mean_length": 39.10244140625, | |
| "completions/mean_terminated_length": 39.10244140625, | |
| "completions/min_length": 25.6, | |
| "completions/min_terminated_length": 25.6, | |
| "epoch": 0.64, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 64529366.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 55.8, | |
| "completions/max_terminated_length": 55.8, | |
| "completions/mean_length": 39.0609375, | |
| "completions/mean_terminated_length": 39.0609375, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.6704761904761904, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 67517446.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 56.4, | |
| "completions/max_terminated_length": 56.4, | |
| "completions/mean_length": 38.946875, | |
| "completions/mean_terminated_length": 38.946875, | |
| "completions/min_length": 25.4, | |
| "completions/min_terminated_length": 25.4, | |
| "epoch": 0.700952380952381, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 70503046.0, | |
| "reward": 5.499989128112793, | |
| "reward_std": 6.163541693240404e-05, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.9999960899353028, | |
| "rewards/confidence_one_or_zero": 0.99990234375, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 9.999978256225585, | |
| "rewards/mean_confidence_reward": 1.9531250291038305e-05, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 58.4, | |
| "completions/max_terminated_length": 58.4, | |
| "completions/mean_length": 39.1416015625, | |
| "completions/mean_terminated_length": 39.1416015625, | |
| "completions/min_length": 26.2, | |
| "completions/min_terminated_length": 26.2, | |
| "epoch": 0.7314285714285714, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 73492144.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.8, | |
| "completions/max_terminated_length": 57.8, | |
| "completions/mean_length": 39.1482421875, | |
| "completions/mean_terminated_length": 39.1482421875, | |
| "completions/min_length": 26.2, | |
| "completions/min_terminated_length": 26.2, | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 76482046.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.0, | |
| "completions/max_terminated_length": 57.0, | |
| "completions/mean_length": 39.20634765625, | |
| "completions/mean_terminated_length": 39.20634765625, | |
| "completions/min_length": 25.6, | |
| "completions/min_terminated_length": 25.6, | |
| "epoch": 0.7923809523809524, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 79472607.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.8, | |
| "completions/max_terminated_length": 57.8, | |
| "completions/mean_length": 38.862890625, | |
| "completions/mean_terminated_length": 38.862890625, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.8228571428571428, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 82453699.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 56.6, | |
| "completions/max_terminated_length": 56.6, | |
| "completions/mean_length": 39.03369140625, | |
| "completions/mean_terminated_length": 39.03369140625, | |
| "completions/min_length": 24.8, | |
| "completions/min_terminated_length": 24.8, | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 85441020.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.4, | |
| "completions/max_terminated_length": 57.4, | |
| "completions/mean_length": 39.27880859375, | |
| "completions/mean_terminated_length": 39.27880859375, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.8838095238095238, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0, | |
| "num_tokens": 88437123.0, | |
| "reward": 5.499462890625, | |
| "reward_std": 0.003038349375128746, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.99990234375, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 0.99990234375, | |
| "rewards/log_2_reward": 9.9990234375, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 56.4, | |
| "completions/max_terminated_length": 56.4, | |
| "completions/mean_length": 38.880078125, | |
| "completions/mean_terminated_length": 38.880078125, | |
| "completions/min_length": 25.4, | |
| "completions/min_terminated_length": 25.4, | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 91421015.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 52.5, | |
| "eval_completions/max_terminated_length": 52.5, | |
| "eval_completions/mean_length": 38.77289867401123, | |
| "eval_completions/mean_terminated_length": 38.77289867401123, | |
| "eval_completions/min_length": 28.5, | |
| "eval_completions/min_terminated_length": 28.5, | |
| "eval_loss": 0.0, | |
| "eval_num_tokens": 91421015.0, | |
| "eval_reward": 5.5, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/accuracy_reward": 0.0, | |
| "eval_rewards/brier_reward": 1.0, | |
| "eval_rewards/confidence_one_or_zero": 1.0, | |
| "eval_rewards/format_reward": 1.0, | |
| "eval_rewards/log_2_reward": 10.0, | |
| "eval_rewards/mean_confidence_reward": 0.0, | |
| "eval_runtime": 9.1652, | |
| "eval_samples_per_second": 54.554, | |
| "eval_steps_per_second": 0.436, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 57.8, | |
| "completions/max_terminated_length": 57.8, | |
| "completions/mean_length": 38.95888671875, | |
| "completions/mean_terminated_length": 38.95888671875, | |
| "completions/min_length": 25.6, | |
| "completions/min_terminated_length": 25.6, | |
| "epoch": 0.9447619047619048, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 94407698.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 56.8, | |
| "completions/max_terminated_length": 56.8, | |
| "completions/mean_length": 39.01162109375, | |
| "completions/mean_terminated_length": 39.01162109375, | |
| "completions/min_length": 25.8, | |
| "completions/min_terminated_length": 25.8, | |
| "epoch": 0.9752380952380952, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0, | |
| "num_tokens": 97395881.0, | |
| "reward": 5.5, | |
| "reward_std": 0.0, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 1.0, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 1.0, | |
| "rewards/log_2_reward": 10.0, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 56.75, | |
| "completions/max_terminated_length": 56.75, | |
| "completions/mean_length": 38.78663635253906, | |
| "completions/mean_terminated_length": 38.78663635253906, | |
| "completions/min_length": 25.75, | |
| "completions/min_terminated_length": 25.75, | |
| "epoch": 0.9996190476190476, | |
| "num_tokens": 99785845.0, | |
| "reward": 5.49932861328125, | |
| "reward_std": 0.0037979367189109325, | |
| "rewards/accuracy_reward": 0.0, | |
| "rewards/brier_reward": 0.9998779296875, | |
| "rewards/confidence_one_or_zero": 1.0, | |
| "rewards/format_reward": 0.9998779296875, | |
| "rewards/log_2_reward": 9.998779296875, | |
| "rewards/mean_confidence_reward": 0.0, | |
| "step": 164, | |
| "total_flos": 0.0, | |
| "train_loss": 0.021034386144844753, | |
| "train_runtime": 15577.6776, | |
| "train_samples_per_second": 0.674, | |
| "train_steps_per_second": 0.011 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 164, | |
| "num_input_tokens_seen": 99785845, | |
| "num_train_epochs": 1, | |
| "save_steps": 60, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |