| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9929577464788732, |
| "eval_steps": 500, |
| "global_step": 141, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.1208984375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.9, |
| "completions/mean_length": 120.4628173828125, |
| "completions/mean_terminated_length": 106.38647994995117, |
| "completions/min_length": 19.2, |
| "completions/min_terminated_length": 19.2, |
| "epoch": 0.07042253521126761, |
| "grad_norm": 0.2456492780734702, |
| "kl": 0.246875, |
| "learning_rate": 9.366197183098593e-06, |
| "loss": 0.0828, |
| "num_tokens": 11811341.0, |
| "reward": 0.933349609375, |
| "reward_std": 0.22970878034830094, |
| "rewards/accuracy_reward/mean": 0.0947021484375, |
| "rewards/accuracy_reward/std": 0.28115386664867403, |
| "rewards/format_reward/mean": 0.8386474609375, |
| "rewards/format_reward/std": 0.2507546439766884, |
| "step": 10 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0341796875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 136.043115234375, |
| "completions/mean_terminated_length": 131.86617279052734, |
| "completions/min_length": 22.5, |
| "completions/min_terminated_length": 22.5, |
| "epoch": 0.14084507042253522, |
| "grad_norm": 0.19689937531771842, |
| "kl": 0.20615234375, |
| "learning_rate": 8.661971830985915e-06, |
| "loss": 0.0354, |
| "num_tokens": 24245531.0, |
| "reward": 1.25078125, |
| "reward_std": 0.29540089666843417, |
| "rewards/accuracy_reward/mean": 0.2865966796875, |
| "rewards/accuracy_reward/std": 0.4441226840019226, |
| "rewards/format_reward/mean": 0.9641845703125, |
| "rewards/format_reward/std": 0.17938514649868012, |
| "step": 20 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.020068359375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.8, |
| "completions/mean_length": 140.8588623046875, |
| "completions/mean_terminated_length": 138.51354064941407, |
| "completions/min_length": 20.5, |
| "completions/min_terminated_length": 20.5, |
| "epoch": 0.2112676056338028, |
| "grad_norm": 0.21172988571553053, |
| "kl": 0.21923828125, |
| "learning_rate": 7.95774647887324e-06, |
| "loss": 0.0305, |
| "num_tokens": 36814350.0, |
| "reward": 1.3020751953125, |
| "reward_std": 0.295776504278183, |
| "rewards/accuracy_reward/mean": 0.3288818359375, |
| "rewards/accuracy_reward/std": 0.46963170170783997, |
| "rewards/format_reward/mean": 0.973193359375, |
| "rewards/format_reward/std": 0.16002206206321717, |
| "step": 30 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.1049072265625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.9, |
| "completions/mean_length": 148.0318603515625, |
| "completions/mean_terminated_length": 136.30471115112306, |
| "completions/min_length": 15.4, |
| "completions/min_terminated_length": 15.4, |
| "epoch": 0.28169014084507044, |
| "grad_norm": 0.22532591156498452, |
| "kl": 0.25146484375, |
| "learning_rate": 7.253521126760564e-06, |
| "loss": 0.0696, |
| "num_tokens": 49745295.0, |
| "reward": 1.104150390625, |
| "reward_std": 0.3860862344503403, |
| "rewards/accuracy_reward/mean": 0.3103271484375, |
| "rewards/accuracy_reward/std": 0.4604327976703644, |
| "rewards/format_reward/mean": 0.7938232421875, |
| "rewards/format_reward/std": 0.28022926300764084, |
| "step": 40 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.030517578125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.9, |
| "completions/mean_length": 143.491650390625, |
| "completions/mean_terminated_length": 139.98492889404298, |
| "completions/min_length": 18.1, |
| "completions/min_terminated_length": 18.1, |
| "epoch": 0.352112676056338, |
| "grad_norm": 0.1885789608304361, |
| "kl": 0.22822265625, |
| "learning_rate": 6.549295774647888e-06, |
| "loss": 0.0455, |
| "num_tokens": 62485177.0, |
| "reward": 1.3081787109375, |
| "reward_std": 0.3404128760099411, |
| "rewards/accuracy_reward/mean": 0.3631103515625, |
| "rewards/accuracy_reward/std": 0.4808199375867844, |
| "rewards/format_reward/mean": 0.945068359375, |
| "rewards/format_reward/std": 0.2246607005596161, |
| "step": 50 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0277099609375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.9, |
| "completions/mean_length": 146.1615234375, |
| "completions/mean_terminated_length": 143.0530227661133, |
| "completions/min_length": 25.8, |
| "completions/min_terminated_length": 25.8, |
| "epoch": 0.4225352112676056, |
| "grad_norm": 0.17282346465533863, |
| "kl": 0.20244140625, |
| "learning_rate": 5.845070422535212e-06, |
| "loss": 0.0395, |
| "num_tokens": 75340361.0, |
| "reward": 1.33486328125, |
| "reward_std": 0.31391614973545073, |
| "rewards/accuracy_reward/mean": 0.3770263671875, |
| "rewards/accuracy_reward/std": 0.48451300263404845, |
| "rewards/format_reward/mean": 0.9578369140625, |
| "rewards/format_reward/std": 0.19974014312028884, |
| "step": 60 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0384521484375, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 148.16982421875, |
| "completions/mean_terminated_length": 143.90497741699218, |
| "completions/min_length": 26.1, |
| "completions/min_terminated_length": 26.1, |
| "epoch": 0.49295774647887325, |
| "grad_norm": 0.20083770112758828, |
| "kl": 0.2142578125, |
| "learning_rate": 5.140845070422536e-06, |
| "loss": 0.0525, |
| "num_tokens": 88271653.0, |
| "reward": 1.3040771484375, |
| "reward_std": 0.3394395083189011, |
| "rewards/accuracy_reward/mean": 0.3633056640625, |
| "rewards/accuracy_reward/std": 0.4807720482349396, |
| "rewards/format_reward/mean": 0.940771484375, |
| "rewards/format_reward/std": 0.23174136728048325, |
| "step": 70 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0213134765625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.9, |
| "completions/mean_length": 145.615234375, |
| "completions/mean_terminated_length": 143.20397033691407, |
| "completions/min_length": 23.1, |
| "completions/min_terminated_length": 23.1, |
| "epoch": 0.5633802816901409, |
| "grad_norm": 0.18419537410639902, |
| "kl": 0.22841796875, |
| "learning_rate": 4.43661971830986e-06, |
| "loss": 0.0372, |
| "num_tokens": 101124269.0, |
| "reward": 1.3465087890625, |
| "reward_std": 0.3062387377023697, |
| "rewards/accuracy_reward/mean": 0.382861328125, |
| "rewards/accuracy_reward/std": 0.48586891293525697, |
| "rewards/format_reward/mean": 0.9636474609375, |
| "rewards/format_reward/std": 0.18586143404245375, |
| "step": 80 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.028466796875, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 152.4924072265625, |
| "completions/mean_terminated_length": 149.45853271484376, |
| "completions/min_length": 23.3, |
| "completions/min_terminated_length": 23.3, |
| "epoch": 0.6338028169014085, |
| "grad_norm": 0.18212247166689255, |
| "kl": 0.197265625, |
| "learning_rate": 3.7323943661971835e-06, |
| "loss": 0.0378, |
| "num_tokens": 114254494.0, |
| "reward": 1.310498046875, |
| "reward_std": 0.31236537992954255, |
| "rewards/accuracy_reward/mean": 0.3539794921875, |
| "rewards/accuracy_reward/std": 0.4776217073202133, |
| "rewards/format_reward/mean": 0.9565185546875, |
| "rewards/format_reward/std": 0.2028029069304466, |
| "step": 90 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0301025390625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 255.7, |
| "completions/mean_length": 151.31767578125, |
| "completions/mean_terminated_length": 148.0802963256836, |
| "completions/min_length": 25.2, |
| "completions/min_terminated_length": 25.2, |
| "epoch": 0.704225352112676, |
| "grad_norm": 0.21910042885938372, |
| "kl": 0.18720703125, |
| "learning_rate": 3.0281690140845073e-06, |
| "loss": 0.0392, |
| "num_tokens": 127312026.0, |
| "reward": 1.3383544921875, |
| "reward_std": 0.32255696356296537, |
| "rewards/accuracy_reward/mean": 0.382861328125, |
| "rewards/accuracy_reward/std": 0.48587043583393097, |
| "rewards/format_reward/mean": 0.9554931640625, |
| "rewards/format_reward/std": 0.20596200376749038, |
| "step": 100 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.030126953125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 153.7516357421875, |
| "completions/mean_terminated_length": 150.5800354003906, |
| "completions/min_length": 26.2, |
| "completions/min_terminated_length": 26.2, |
| "epoch": 0.7746478873239436, |
| "grad_norm": 0.1726874715898045, |
| "kl": 0.18974609375, |
| "learning_rate": 2.323943661971831e-06, |
| "loss": 0.0395, |
| "num_tokens": 140468685.0, |
| "reward": 1.3381103515625, |
| "reward_std": 0.31997495591640474, |
| "rewards/accuracy_reward/mean": 0.383740234375, |
| "rewards/accuracy_reward/std": 0.486175873875618, |
| "rewards/format_reward/mean": 0.9543701171875, |
| "rewards/format_reward/std": 0.20838392823934554, |
| "step": 110 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0293212890625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 153.7035400390625, |
| "completions/mean_terminated_length": 150.61290435791017, |
| "completions/min_length": 28.3, |
| "completions/min_terminated_length": 28.3, |
| "epoch": 0.8450704225352113, |
| "grad_norm": 0.1653288093843954, |
| "kl": 0.183984375, |
| "learning_rate": 1.6197183098591552e-06, |
| "loss": 0.0372, |
| "num_tokens": 153599310.0, |
| "reward": 1.35302734375, |
| "reward_std": 0.3156912326812744, |
| "rewards/accuracy_reward/mean": 0.3957275390625, |
| "rewards/accuracy_reward/std": 0.48883563578128814, |
| "rewards/format_reward/mean": 0.9572998046875, |
| "rewards/format_reward/std": 0.2019893079996109, |
| "step": 120 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0310791015625, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 152.9286376953125, |
| "completions/mean_terminated_length": 149.62524871826173, |
| "completions/min_length": 25.6, |
| "completions/min_terminated_length": 25.6, |
| "epoch": 0.9154929577464789, |
| "grad_norm": 0.17111412613161744, |
| "kl": 0.18369140625, |
| "learning_rate": 9.154929577464789e-07, |
| "loss": 0.0413, |
| "num_tokens": 166750947.0, |
| "reward": 1.3502197265625, |
| "reward_std": 0.3155178099870682, |
| "rewards/accuracy_reward/mean": 0.3943359375, |
| "rewards/accuracy_reward/std": 0.48866645693778993, |
| "rewards/format_reward/mean": 0.9558837890625, |
| "rewards/format_reward/std": 0.20521053820848464, |
| "step": 130 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0335205078125, |
| "completions/max_length": 256.0, |
| "completions/max_terminated_length": 256.0, |
| "completions/mean_length": 152.0265625, |
| "completions/mean_terminated_length": 148.4231918334961, |
| "completions/min_length": 25.9, |
| "completions/min_terminated_length": 25.9, |
| "epoch": 0.9859154929577465, |
| "grad_norm": 0.18623854088340255, |
| "kl": 0.20126953125, |
| "learning_rate": 2.1126760563380284e-07, |
| "loss": 0.0456, |
| "num_tokens": 179822731.0, |
| "reward": 1.348291015625, |
| "reward_std": 0.3256455361843109, |
| "rewards/accuracy_reward/mean": 0.3951904296875, |
| "rewards/accuracy_reward/std": 0.4886902630329132, |
| "rewards/format_reward/mean": 0.9531005859375, |
| "rewards/format_reward/std": 0.21116815358400345, |
| "step": 140 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 142, |
| "num_input_tokens_seen": 181126457, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|