| { | |
| "best_global_step": 75, | |
| "best_metric": -0.0067981877364218235, | |
| "best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown/checkpoint-75", | |
| "epoch": 3.0, | |
| "eval_steps": 5, | |
| "global_step": 75, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.5520595297857653e-06, | |
| "learning_rate": 9.466666666666666e-07, | |
| "loss": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 250.575, | |
| "eval_kl": 3.934502601623535e-05, | |
| "eval_loss": 0.015041607432067394, | |
| "eval_num_tokens": 31688.0, | |
| "eval_reward": 0.05, | |
| "eval_reward_std": 0.1, | |
| "eval_rewards/equation_reward_func": 0.05, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 283.7484, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.023073066025972366, | |
| "learning_rate": 8.799999999999999e-07, | |
| "loss": 0.0033, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 252.7875, | |
| "eval_kl": 3.967881202697754e-05, | |
| "eval_loss": -0.00017719810421112925, | |
| "eval_num_tokens": 62987.0, | |
| "eval_reward": 0.0375, | |
| "eval_reward_std": 0.075, | |
| "eval_rewards/equation_reward_func": 0.0375, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 282.9691, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.7865870631794678e-06, | |
| "learning_rate": 8.133333333333333e-07, | |
| "loss": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 248.875, | |
| "eval_kl": 4.363656044006348e-05, | |
| "eval_loss": 1.7436344705856754e-06, | |
| "eval_num_tokens": 94591.0, | |
| "eval_reward": 0.025, | |
| "eval_reward_std": 0.05, | |
| "eval_rewards/equation_reward_func": 0.025, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 282.3086, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.466666666666667e-07, | |
| "loss": 0.0138, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 253.1875, | |
| "eval_kl": 3.9270520210266115e-05, | |
| "eval_loss": -0.0007528069545514882, | |
| "eval_num_tokens": 125922.0, | |
| "eval_reward": 0.0125, | |
| "eval_reward_std": 0.025, | |
| "eval_rewards/equation_reward_func": 0.0125, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 282.3999, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.02649177424609661, | |
| "learning_rate": 6.800000000000001e-07, | |
| "loss": 0.0059, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 252.9875, | |
| "eval_kl": 3.8030743598937986e-05, | |
| "eval_loss": 0.006908929906785488, | |
| "eval_num_tokens": 157014.0, | |
| "eval_reward": 0.075, | |
| "eval_reward_std": 0.12886751294136048, | |
| "eval_rewards/equation_reward_func": 0.075, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 282.4923, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.017524780705571175, | |
| "learning_rate": 6.133333333333332e-07, | |
| "loss": 0.0098, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 251.6625, | |
| "eval_kl": 3.592073917388916e-05, | |
| "eval_loss": 0.0063271126709878445, | |
| "eval_num_tokens": 188533.0, | |
| "eval_reward": 0.0375, | |
| "eval_reward_std": 0.075, | |
| "eval_rewards/equation_reward_func": 0.025, | |
| "eval_rewards/format_reward_func": 0.0125, | |
| "eval_runtime": 283.0566, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.993278601730708e-06, | |
| "learning_rate": 5.466666666666666e-07, | |
| "loss": 0.0024, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 253.1, | |
| "eval_kl": 4.151761531829834e-05, | |
| "eval_loss": 0.009441868402063847, | |
| "eval_num_tokens": 219791.0, | |
| "eval_reward": 0.05, | |
| "eval_reward_std": 0.07886751294136048, | |
| "eval_rewards/equation_reward_func": 0.0375, | |
| "eval_rewards/format_reward_func": 0.0125, | |
| "eval_runtime": 282.8829, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 5.774197120445024e-07, | |
| "learning_rate": 4.8e-07, | |
| "loss": 0.0106, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 250.6375, | |
| "eval_kl": 4.119873046875e-05, | |
| "eval_loss": 1.6474120911880163e-06, | |
| "eval_num_tokens": 251168.0, | |
| "eval_reward": 0.0, | |
| "eval_reward_std": 0.0, | |
| "eval_rewards/equation_reward_func": 0.0, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 282.7407, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 4.944530473949271e-07, | |
| "learning_rate": 4.1333333333333333e-07, | |
| "loss": 0.0019, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 250.8, | |
| "eval_kl": 3.807544708251953e-05, | |
| "eval_loss": 0.019552746787667274, | |
| "eval_num_tokens": 282579.0, | |
| "eval_reward": 0.05, | |
| "eval_reward_std": 0.07886751294136048, | |
| "eval_rewards/equation_reward_func": 0.0375, | |
| "eval_rewards/format_reward_func": 0.0125, | |
| "eval_runtime": 282.3595, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.0208375733345747, | |
| "learning_rate": 3.4666666666666665e-07, | |
| "loss": 0.0081, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 254.1, | |
| "eval_kl": 3.8304924964904784e-05, | |
| "eval_loss": 0.004257645923644304, | |
| "eval_num_tokens": 313952.0, | |
| "eval_reward": 0.0125, | |
| "eval_reward_std": 0.025, | |
| "eval_rewards/equation_reward_func": 0.0125, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 282.4466, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.027176212519407272, | |
| "learning_rate": 2.8e-07, | |
| "loss": 0.0077, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 251.9625, | |
| "eval_kl": 4.739761352539063e-05, | |
| "eval_loss": 0.00685427850112319, | |
| "eval_num_tokens": 345452.0, | |
| "eval_reward": 0.025, | |
| "eval_reward_std": 0.05, | |
| "eval_rewards/equation_reward_func": 0.0125, | |
| "eval_rewards/format_reward_func": 0.0125, | |
| "eval_runtime": 283.0534, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.1333333333333334e-07, | |
| "loss": 0.0039, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 251.025, | |
| "eval_kl": 3.7276744842529296e-05, | |
| "eval_loss": 0.0007022842764854431, | |
| "eval_num_tokens": 376777.0, | |
| "eval_reward": 0.0125, | |
| "eval_reward_std": 0.025, | |
| "eval_rewards/equation_reward_func": 0.0125, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 283.0247, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.01980462484061718, | |
| "learning_rate": 1.4666666666666666e-07, | |
| "loss": 0.016, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 251.0625, | |
| "eval_kl": 4.115998744964599e-05, | |
| "eval_loss": 0.010943427681922913, | |
| "eval_num_tokens": 407811.0, | |
| "eval_reward": 0.0625, | |
| "eval_reward_std": 0.125, | |
| "eval_rewards/equation_reward_func": 0.05, | |
| "eval_rewards/format_reward_func": 0.0125, | |
| "eval_runtime": 283.1384, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 8.191254892153665e-07, | |
| "learning_rate": 8e-08, | |
| "loss": 0.0001, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 251.125, | |
| "eval_kl": 4.7346949577331544e-05, | |
| "eval_loss": 0.0024404774885624647, | |
| "eval_num_tokens": 439212.0, | |
| "eval_reward": 0.0375, | |
| "eval_reward_std": 0.075, | |
| "eval_rewards/equation_reward_func": 0.0375, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 283.0414, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.02634088508784771, | |
| "learning_rate": 1.3333333333333334e-08, | |
| "loss": 0.0145, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 252.075, | |
| "eval_kl": 4.2390823364257815e-05, | |
| "eval_loss": -0.0067981877364218235, | |
| "eval_num_tokens": 470623.0, | |
| "eval_reward": 0.0125, | |
| "eval_reward_std": 0.025, | |
| "eval_rewards/equation_reward_func": 0.0125, | |
| "eval_rewards/format_reward_func": 0.0, | |
| "eval_runtime": 283.5777, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 75 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 75, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |