| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.029769959404600813, | |
| "eval_steps": 500, | |
| "global_step": 110, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3875, | |
| "completions/max_length": 698.2, | |
| "completions/max_terminated_length": 387.0, | |
| "completions/mean_length": 668.8875, | |
| "completions/mean_terminated_length": 358.1875, | |
| "completions/min_length": 596.2, | |
| "completions/min_terminated_length": 289.0, | |
| "entropy": 0.462119147926569, | |
| "epoch": 0.0027063599458728013, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.975642760487146e-06, | |
| "loss": -0.0149, | |
| "num_tokens": 83271.0, | |
| "reward": 0.210569304227829, | |
| "reward_std": 0.04435553438961506, | |
| "rewards/accuracy_reward/mean": 0.210569304227829, | |
| "rewards/accuracy_reward/std": 0.04435553532093763, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 511.3, | |
| "completions/max_terminated_length": 364.6, | |
| "completions/mean_length": 470.2375, | |
| "completions/mean_terminated_length": 316.6333374023437, | |
| "completions/min_length": 373.4, | |
| "completions/min_terminated_length": 271.0, | |
| "entropy": 0.47093094028532506, | |
| "epoch": 0.005412719891745603, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.029935751110315323, | |
| "learning_rate": 9.948579161028418e-06, | |
| "loss": 0.0069, | |
| "num_tokens": 145050.0, | |
| "reward": 0.19647710705175997, | |
| "reward_std": 0.06507570259273052, | |
| "rewards/accuracy_reward/mean": 0.19647710705175997, | |
| "rewards/accuracy_reward/std": 0.06507570343092084, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1125, | |
| "completions/max_length": 471.1, | |
| "completions/max_terminated_length": 320.4, | |
| "completions/mean_length": 344.5125, | |
| "completions/mean_terminated_length": 233.0625, | |
| "completions/min_length": 280.5, | |
| "completions/min_terminated_length": 178.1, | |
| "entropy": 0.802415794506669, | |
| "epoch": 0.008119079837618403, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.045139819383621216, | |
| "learning_rate": 9.92151556156969e-06, | |
| "loss": 0.0352, | |
| "num_tokens": 202275.0, | |
| "reward": 0.2054170697927475, | |
| "reward_std": 0.10532984081655741, | |
| "rewards/accuracy_reward/mean": 0.2054170697927475, | |
| "rewards/accuracy_reward/std": 0.1053298432379961, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.45, | |
| "completions/max_length": 606.3, | |
| "completions/max_terminated_length": 270.8, | |
| "completions/mean_length": 575.7625, | |
| "completions/mean_terminated_length": 241.1041687011719, | |
| "completions/min_length": 509.7, | |
| "completions/min_terminated_length": 202.5, | |
| "entropy": 0.6945646680891514, | |
| "epoch": 0.010825439783491205, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.03177578002214432, | |
| "learning_rate": 9.894451962110961e-06, | |
| "loss": -0.0239, | |
| "num_tokens": 282232.0, | |
| "reward": 0.18761721327900888, | |
| "reward_std": 0.08178944233804941, | |
| "rewards/accuracy_reward/mean": 0.18761721327900888, | |
| "rewards/accuracy_reward/std": 0.08178944438695908, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.325, | |
| "completions/max_length": 569.8, | |
| "completions/max_terminated_length": 409.2, | |
| "completions/mean_length": 541.25, | |
| "completions/mean_terminated_length": 387.575, | |
| "completions/min_length": 469.1, | |
| "completions/min_terminated_length": 366.7, | |
| "entropy": 0.5521751202642917, | |
| "epoch": 0.013531799729364006, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.0339401476085186, | |
| "learning_rate": 9.867388362652234e-06, | |
| "loss": 0.0189, | |
| "num_tokens": 368316.0, | |
| "reward": 0.20753128491342068, | |
| "reward_std": 0.06242967322468758, | |
| "rewards/accuracy_reward/mean": 0.20753128491342068, | |
| "rewards/accuracy_reward/std": 0.06242967564612627, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 630.9, | |
| "completions/max_terminated_length": 322.8, | |
| "completions/mean_length": 589.95, | |
| "completions/mean_terminated_length": 297.5357177734375, | |
| "completions/min_length": 479.7, | |
| "completions/min_terminated_length": 274.9, | |
| "entropy": 0.42048906795680524, | |
| "epoch": 0.016238159675236806, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 0.019655214622616768, | |
| "learning_rate": 9.840324763193504e-06, | |
| "loss": -0.0069, | |
| "num_tokens": 447960.0, | |
| "reward": 0.26279650181531905, | |
| "reward_std": 0.05111534409224987, | |
| "rewards/accuracy_reward/mean": 0.26279650181531905, | |
| "rewards/accuracy_reward/std": 0.0511153444647789, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.55, | |
| "completions/max_length": 728.8, | |
| "completions/max_terminated_length": 283.5, | |
| "completions/mean_length": 672.65, | |
| "completions/mean_terminated_length": 207.1375, | |
| "completions/min_length": 561.7, | |
| "completions/min_terminated_length": 152.1, | |
| "entropy": 0.7023736372590065, | |
| "epoch": 0.018944519621109608, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.030278857797384262, | |
| "learning_rate": 9.813261163734777e-06, | |
| "loss": 0.0385, | |
| "num_tokens": 527468.0, | |
| "reward": 0.14871588042005895, | |
| "reward_std": 0.07659928184002637, | |
| "rewards/accuracy_reward/mean": 0.14871588042005895, | |
| "rewards/accuracy_reward/std": 0.07659928491339088, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1, | |
| "completions/max_length": 515.9, | |
| "completions/max_terminated_length": 413.5, | |
| "completions/mean_length": 458.55, | |
| "completions/mean_terminated_length": 356.15, | |
| "completions/min_length": 427.9, | |
| "completions/min_terminated_length": 325.5, | |
| "entropy": 0.3309335965663195, | |
| "epoch": 0.02165087956698241, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.07239779829978943, | |
| "learning_rate": 9.78619756427605e-06, | |
| "loss": 0.0416, | |
| "num_tokens": 600264.0, | |
| "reward": 0.4057682067155838, | |
| "reward_std": 0.09317798167467117, | |
| "rewards/accuracy_reward/mean": 0.4057682067155838, | |
| "rewards/accuracy_reward/std": 0.09317798614501953, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1, | |
| "completions/max_length": 334.3, | |
| "completions/max_terminated_length": 231.9, | |
| "completions/mean_length": 313.7875, | |
| "completions/mean_terminated_length": 211.3875, | |
| "completions/min_length": 289.0, | |
| "completions/min_terminated_length": 186.6, | |
| "entropy": 0.32498618103563787, | |
| "epoch": 0.02435723951285521, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.018347127363085747, | |
| "learning_rate": 9.759133964817322e-06, | |
| "loss": 0.0259, | |
| "num_tokens": 651295.0, | |
| "reward": 0.3123705621808767, | |
| "reward_std": 0.07841869294643403, | |
| "rewards/accuracy_reward/mean": 0.3123705621808767, | |
| "rewards/accuracy_reward/std": 0.07841869369149208, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4375, | |
| "completions/max_length": 738.6, | |
| "completions/max_terminated_length": 518.8, | |
| "completions/mean_length": 664.9375, | |
| "completions/mean_terminated_length": 428.275, | |
| "completions/min_length": 535.7, | |
| "completions/min_terminated_length": 330.9, | |
| "entropy": 0.6629625763744116, | |
| "epoch": 0.02706359945872801, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02026297152042389, | |
| "learning_rate": 9.732070365358594e-06, | |
| "loss": -0.0092, | |
| "num_tokens": 726946.0, | |
| "reward": 0.22115838173776864, | |
| "reward_std": 0.058896982856094834, | |
| "rewards/accuracy_reward/mean": 0.22115838173776864, | |
| "rewards/accuracy_reward/std": 0.0588969849050045, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025, | |
| "completions/max_length": 491.4, | |
| "completions/max_terminated_length": 449.0, | |
| "completions/mean_length": 415.975, | |
| "completions/mean_terminated_length": 408.90000610351564, | |
| "completions/min_length": 357.1, | |
| "completions/min_terminated_length": 357.1, | |
| "entropy": 0.39121876694262026, | |
| "epoch": 0.029769959404600813, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.017219241708517075, | |
| "learning_rate": 9.705006765899865e-06, | |
| "loss": 0.0361, | |
| "num_tokens": 791352.0, | |
| "reward": 0.3889605298638344, | |
| "reward_std": 0.0880117567256093, | |
| "rewards/accuracy_reward/mean": 0.3889605298638344, | |
| "rewards/accuracy_reward/std": 0.0880117580294609, | |
| "step": 110 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3695, | |
| "num_input_tokens_seen": 791352, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |