| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.032, |
| "eval_steps": 500, |
| "global_step": 16, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 1174.0, |
| "completions/max_terminated_length": 1174.0, |
| "completions/mean_length": 909.0, |
| "completions/mean_terminated_length": 909.0, |
| "completions/min_length": 783.0, |
| "completions/min_terminated_length": 783.0, |
| "entropy": 0.05627120193094015, |
| "epoch": 0.002, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.5968819260597229, |
| "learning_rate": 5e-06, |
| "loss": -0.0469, |
| "num_tokens": 3712.0, |
| "reward": 0.8966667056083679, |
| "reward_std": 0.050852831453084946, |
| "rewards/reward_func_with_saving/mean": 0.8966667056083679, |
| "rewards/reward_func_with_saving/std": 0.05085281282663345, |
| "step": 1 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 613.0, |
| "completions/max_terminated_length": 613.0, |
| "completions/mean_length": 611.5, |
| "completions/mean_terminated_length": 611.5, |
| "completions/min_length": 611.0, |
| "completions/min_terminated_length": 611.0, |
| "entropy": 0.02739148633554578, |
| "epoch": 0.004, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.4688481092453003, |
| "learning_rate": 5e-06, |
| "loss": -0.0004, |
| "num_tokens": 6254.0, |
| "reward": 0.7022222280502319, |
| "reward_std": 0.02222222089767456, |
| "rewards/reward_func_with_saving/mean": 0.7022222280502319, |
| "rewards/reward_func_with_saving/std": 0.02222222276031971, |
| "step": 2 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 780.0, |
| "completions/max_terminated_length": 780.0, |
| "completions/mean_length": 703.75, |
| "completions/mean_terminated_length": 703.75, |
| "completions/min_length": 666.0, |
| "completions/min_terminated_length": 666.0, |
| "entropy": 0.10391132719814777, |
| "epoch": 0.006, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.0785013437271118, |
| "learning_rate": 5e-06, |
| "loss": -0.016, |
| "num_tokens": 9153.0, |
| "reward": 0.8922222256660461, |
| "reward_std": 0.011111120693385601, |
| "rewards/reward_func_with_saving/mean": 0.8922222256660461, |
| "rewards/reward_func_with_saving/std": 0.011111111380159855, |
| "step": 3 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 798.0, |
| "completions/max_terminated_length": 798.0, |
| "completions/mean_length": 732.0, |
| "completions/mean_terminated_length": 732.0, |
| "completions/min_length": 639.0, |
| "completions/min_terminated_length": 639.0, |
| "entropy": 0.07874358911067247, |
| "epoch": 0.008, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.7055785655975342, |
| "learning_rate": 5e-06, |
| "loss": 0.0102, |
| "num_tokens": 12157.0, |
| "reward": 0.8300000429153442, |
| "reward_std": 0.08785511553287506, |
| "rewards/reward_func_with_saving/mean": 0.8300000429153442, |
| "rewards/reward_func_with_saving/std": 0.08785512298345566, |
| "step": 4 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 731.0, |
| "completions/max_terminated_length": 731.0, |
| "completions/mean_length": 704.5, |
| "completions/mean_terminated_length": 704.5, |
| "completions/min_length": 654.0, |
| "completions/min_terminated_length": 654.0, |
| "entropy": 0.12625528872013092, |
| "epoch": 0.01, |
| "frac_reward_zero_std": 1.0, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 0.0, |
| "num_tokens": 15051.0, |
| "reward": 0.8311111330986023, |
| "reward_std": 0.0, |
| "rewards/reward_func_with_saving/mean": 0.8311111330986023, |
| "rewards/reward_func_with_saving/std": 0.0, |
| "step": 5 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 1007.0, |
| "completions/max_terminated_length": 1007.0, |
| "completions/mean_length": 816.0, |
| "completions/mean_terminated_length": 816.0, |
| "completions/min_length": 624.0, |
| "completions/min_terminated_length": 624.0, |
| "entropy": 0.1077885851264, |
| "epoch": 0.012, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.0341051816940308, |
| "learning_rate": 5e-06, |
| "loss": -0.119, |
| "num_tokens": 18407.0, |
| "reward": 0.8322222232818604, |
| "reward_std": 0.06416287273168564, |
| "rewards/reward_func_with_saving/mean": 0.8322222232818604, |
| "rewards/reward_func_with_saving/std": 0.06416288018226624, |
| "step": 6 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 783.0, |
| "completions/max_terminated_length": 783.0, |
| "completions/mean_length": 724.5, |
| "completions/mean_terminated_length": 724.5, |
| "completions/min_length": 690.0, |
| "completions/min_terminated_length": 690.0, |
| "entropy": 0.1391423474997282, |
| "epoch": 0.014, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.149609088897705, |
| "learning_rate": 5e-06, |
| "loss": -0.004, |
| "num_tokens": 21385.0, |
| "reward": 0.7538889050483704, |
| "reward_std": 0.1363290250301361, |
| "rewards/reward_func_with_saving/mean": 0.7538889050483704, |
| "rewards/reward_func_with_saving/std": 0.1363290250301361, |
| "step": 7 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 611.0, |
| "completions/max_terminated_length": 611.0, |
| "completions/mean_length": 608.25, |
| "completions/mean_terminated_length": 608.25, |
| "completions/min_length": 601.0, |
| "completions/min_terminated_length": 601.0, |
| "entropy": 0.04191916948184371, |
| "epoch": 0.016, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.440533995628357, |
| "learning_rate": 5e-06, |
| "loss": 0.006, |
| "num_tokens": 23894.0, |
| "reward": 0.4983333349227905, |
| "reward_std": 0.09888887405395508, |
| "rewards/reward_func_with_saving/mean": 0.4983333349227905, |
| "rewards/reward_func_with_saving/std": 0.09888887405395508, |
| "step": 8 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 643.0, |
| "completions/max_terminated_length": 643.0, |
| "completions/mean_length": 609.25, |
| "completions/mean_terminated_length": 609.25, |
| "completions/min_length": 576.0, |
| "completions/min_terminated_length": 576.0, |
| "entropy": 0.04268141835927963, |
| "epoch": 0.018, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.0735034942626953, |
| "learning_rate": 5e-06, |
| "loss": -0.0273, |
| "num_tokens": 26407.0, |
| "reward": 0.5416666865348816, |
| "reward_std": 0.15209239721298218, |
| "rewards/reward_func_with_saving/mean": 0.5416666865348816, |
| "rewards/reward_func_with_saving/std": 0.15209239721298218, |
| "step": 9 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 750.0, |
| "completions/max_terminated_length": 750.0, |
| "completions/mean_length": 678.25, |
| "completions/mean_terminated_length": 678.25, |
| "completions/min_length": 612.0, |
| "completions/min_terminated_length": 612.0, |
| "entropy": 0.10207068175077438, |
| "epoch": 0.02, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.8149948120117188, |
| "learning_rate": 5e-06, |
| "loss": 0.0611, |
| "num_tokens": 29192.0, |
| "reward": 0.7227777242660522, |
| "reward_std": 0.12421109527349472, |
| "rewards/reward_func_with_saving/mean": 0.7227777242660522, |
| "rewards/reward_func_with_saving/std": 0.12421111017465591, |
| "step": 10 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 762.0, |
| "completions/max_terminated_length": 762.0, |
| "completions/mean_length": 659.75, |
| "completions/mean_terminated_length": 659.75, |
| "completions/min_length": 607.0, |
| "completions/min_terminated_length": 607.0, |
| "entropy": 0.11515359580516815, |
| "epoch": 0.022, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.5091134309768677, |
| "learning_rate": 5e-06, |
| "loss": 0.0615, |
| "num_tokens": 31907.0, |
| "reward": 0.7888888716697693, |
| "reward_std": 0.10210946202278137, |
| "rewards/reward_func_with_saving/mean": 0.7888888716697693, |
| "rewards/reward_func_with_saving/std": 0.10210946202278137, |
| "step": 11 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 856.0, |
| "completions/max_terminated_length": 856.0, |
| "completions/mean_length": 705.75, |
| "completions/mean_terminated_length": 705.75, |
| "completions/min_length": 530.0, |
| "completions/min_terminated_length": 530.0, |
| "entropy": 0.15026956051588058, |
| "epoch": 0.024, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.3372756242752075, |
| "learning_rate": 5e-06, |
| "loss": 0.1075, |
| "num_tokens": 34818.0, |
| "reward": 0.7594444751739502, |
| "reward_std": 0.16865848004817963, |
| "rewards/reward_func_with_saving/mean": 0.7594444751739502, |
| "rewards/reward_func_with_saving/std": 0.16865849494934082, |
| "step": 12 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 919.0, |
| "completions/max_terminated_length": 919.0, |
| "completions/mean_length": 863.0, |
| "completions/mean_terminated_length": 863.0, |
| "completions/min_length": 783.0, |
| "completions/min_terminated_length": 783.0, |
| "entropy": 0.07314991764724255, |
| "epoch": 0.026, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.5220526456832886, |
| "learning_rate": 5e-06, |
| "loss": 0.0265, |
| "num_tokens": 38338.0, |
| "reward": 0.8427777886390686, |
| "reward_std": 0.021111130714416504, |
| "rewards/reward_func_with_saving/mean": 0.8427777886390686, |
| "rewards/reward_func_with_saving/std": 0.021111130714416504, |
| "step": 13 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 909.0, |
| "completions/max_terminated_length": 909.0, |
| "completions/mean_length": 733.5, |
| "completions/mean_terminated_length": 733.5, |
| "completions/min_length": 602.0, |
| "completions/min_terminated_length": 602.0, |
| "entropy": 0.11434740386903286, |
| "epoch": 0.028, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.422579050064087, |
| "learning_rate": 5e-06, |
| "loss": 0.0105, |
| "num_tokens": 41344.0, |
| "reward": 0.8372222185134888, |
| "reward_std": 0.03222225233912468, |
| "rewards/reward_func_with_saving/mean": 0.8372222185134888, |
| "rewards/reward_func_with_saving/std": 0.032222241163253784, |
| "step": 14 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 655.0, |
| "completions/max_terminated_length": 655.0, |
| "completions/mean_length": 630.0, |
| "completions/mean_terminated_length": 630.0, |
| "completions/min_length": 611.0, |
| "completions/min_terminated_length": 611.0, |
| "entropy": 0.048715847078710794, |
| "epoch": 0.03, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 0.6969804763793945, |
| "learning_rate": 5e-06, |
| "loss": 0.0151, |
| "num_tokens": 43936.0, |
| "reward": 0.528333306312561, |
| "reward_std": 0.15888887643814087, |
| "rewards/reward_func_with_saving/mean": 0.528333306312561, |
| "rewards/reward_func_with_saving/std": 0.15888887643814087, |
| "step": 15 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 879.0, |
| "completions/max_terminated_length": 879.0, |
| "completions/mean_length": 747.25, |
| "completions/mean_terminated_length": 747.25, |
| "completions/min_length": 643.0, |
| "completions/min_terminated_length": 643.0, |
| "entropy": 0.07380866352468729, |
| "epoch": 0.032, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.3967198133468628, |
| "learning_rate": 5e-06, |
| "loss": 0.0736, |
| "num_tokens": 46997.0, |
| "reward": 0.7199999690055847, |
| "reward_std": 0.14237260818481445, |
| "rewards/reward_func_with_saving/mean": 0.7199999690055847, |
| "rewards/reward_func_with_saving/std": 0.14237260818481445, |
| "step": 16 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 16, |
| "num_input_tokens_seen": 46997, |
| "num_train_epochs": 1, |
| "save_steps": 4, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|