| { | |
| "best_global_step": 30, | |
| "best_metric": 0.009350189939141273, | |
| "best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown_n100_mcl_256_pretrained/checkpoint-30", | |
| "epoch": 3.0, | |
| "eval_steps": 5, | |
| "global_step": 75, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.04710305854678154, | |
| "learning_rate": 9.466666666666666e-07, | |
| "loss": 0.0581, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 249.7375, | |
| "eval_kl": 2.7620792388916014e-05, | |
| "eval_loss": 0.011466369964182377, | |
| "eval_num_tokens": 30743.0, | |
| "eval_reward": 0.0875, | |
| "eval_reward_std": 0.14787135720252992, | |
| "eval_rewards/equation_reward_func": 0.0375, | |
| "eval_rewards/format_reward_func": 0.05, | |
| "eval_runtime": 283.5242, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.0248898696154356, | |
| "learning_rate": 8.799999999999999e-07, | |
| "loss": 0.0305, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 248.3125, | |
| "eval_kl": 3.051459789276123e-05, | |
| "eval_loss": 0.010714234784245491, | |
| "eval_num_tokens": 61654.0, | |
| "eval_reward": 0.1, | |
| "eval_reward_std": 0.11969234347343445, | |
| "eval_rewards/equation_reward_func": 0.075, | |
| "eval_rewards/format_reward_func": 0.025, | |
| "eval_runtime": 279.7789, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.03560088202357292, | |
| "learning_rate": 8.133333333333333e-07, | |
| "loss": 0.0333, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 248.8125, | |
| "eval_kl": 3.6323070526123046e-05, | |
| "eval_loss": 0.016332309693098068, | |
| "eval_num_tokens": 92513.0, | |
| "eval_reward": 0.1, | |
| "eval_reward_std": 0.15773502588272095, | |
| "eval_rewards/equation_reward_func": 0.0375, | |
| "eval_rewards/format_reward_func": 0.0625, | |
| "eval_runtime": 282.7334, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.02903689257800579, | |
| "learning_rate": 7.466666666666667e-07, | |
| "loss": 0.0363, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 241.4375, | |
| "eval_kl": 3.466010093688965e-05, | |
| "eval_loss": 0.03578554838895798, | |
| "eval_num_tokens": 123451.0, | |
| "eval_reward": 0.1125, | |
| "eval_reward_std": 0.1978713572025299, | |
| "eval_rewards/equation_reward_func": 0.0625, | |
| "eval_rewards/format_reward_func": 0.05, | |
| "eval_runtime": 282.549, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 5.04811282553419e-07, | |
| "learning_rate": 6.800000000000001e-07, | |
| "loss": 0.0011, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 247.55, | |
| "eval_kl": 3.0243396759033202e-05, | |
| "eval_loss": 0.023990554735064507, | |
| "eval_num_tokens": 154568.0, | |
| "eval_reward": 0.1, | |
| "eval_reward_std": 0.17886751294136047, | |
| "eval_rewards/equation_reward_func": 0.05, | |
| "eval_rewards/format_reward_func": 0.05, | |
| "eval_runtime": 282.4574, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.133333333333332e-07, | |
| "loss": 0.0337, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 249.3875, | |
| "eval_kl": 2.872645854949951e-05, | |
| "eval_loss": 0.009350189939141273, | |
| "eval_num_tokens": 185750.0, | |
| "eval_reward": 0.1, | |
| "eval_reward_std": 0.15, | |
| "eval_rewards/equation_reward_func": 0.0625, | |
| "eval_rewards/format_reward_func": 0.0375, | |
| "eval_runtime": 283.2443, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.025708282366394997, | |
| "learning_rate": 5.466666666666666e-07, | |
| "loss": 0.0273, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 242.05, | |
| "eval_kl": 3.2845139503479e-05, | |
| "eval_loss": 0.038312580436468124, | |
| "eval_num_tokens": 217087.0, | |
| "eval_reward": 0.1375, | |
| "eval_reward_std": 0.1978713572025299, | |
| "eval_rewards/equation_reward_func": 0.0625, | |
| "eval_rewards/format_reward_func": 0.075, | |
| "eval_runtime": 283.9676, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.04088641330599785, | |
| "learning_rate": 4.8e-07, | |
| "loss": 0.0318, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 247.9375, | |
| "eval_kl": 3.358125686645508e-05, | |
| "eval_loss": 0.028362590819597244, | |
| "eval_num_tokens": 247949.0, | |
| "eval_reward": 0.1375, | |
| "eval_reward_std": 0.1886961877346039, | |
| "eval_rewards/equation_reward_func": 0.0875, | |
| "eval_rewards/format_reward_func": 0.05, | |
| "eval_runtime": 285.356, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.024730732664465904, | |
| "learning_rate": 4.1333333333333333e-07, | |
| "loss": 0.0578, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 246.9625, | |
| "eval_kl": 2.7140974998474122e-05, | |
| "eval_loss": 0.043169133365154266, | |
| "eval_num_tokens": 278710.0, | |
| "eval_reward": 0.125, | |
| "eval_reward_std": 0.2, | |
| "eval_rewards/equation_reward_func": 0.075, | |
| "eval_rewards/format_reward_func": 0.05, | |
| "eval_runtime": 285.2657, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.03765915334224701, | |
| "learning_rate": 3.4666666666666665e-07, | |
| "loss": 0.0234, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 247.05, | |
| "eval_kl": 3.2660365104675296e-05, | |
| "eval_loss": 0.026807209476828575, | |
| "eval_num_tokens": 309954.0, | |
| "eval_reward": 0.125, | |
| "eval_reward_std": 0.20773502588272094, | |
| "eval_rewards/equation_reward_func": 0.0875, | |
| "eval_rewards/format_reward_func": 0.0375, | |
| "eval_runtime": 284.404, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.01991177722811699, | |
| "learning_rate": 2.8e-07, | |
| "loss": 0.0691, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 242.6375, | |
| "eval_kl": 3.68952751159668e-05, | |
| "eval_loss": 0.04483898729085922, | |
| "eval_num_tokens": 340605.0, | |
| "eval_reward": 0.1625, | |
| "eval_reward_std": 0.25560638308525085, | |
| "eval_rewards/equation_reward_func": 0.1, | |
| "eval_rewards/format_reward_func": 0.0625, | |
| "eval_runtime": 283.6459, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.0200728178024292, | |
| "learning_rate": 2.1333333333333334e-07, | |
| "loss": 0.0518, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 250.45, | |
| "eval_kl": 3.1629204750061034e-05, | |
| "eval_loss": 0.009741068817675114, | |
| "eval_num_tokens": 371457.0, | |
| "eval_reward": 0.075, | |
| "eval_reward_std": 0.12886751294136048, | |
| "eval_rewards/equation_reward_func": 0.05, | |
| "eval_rewards/format_reward_func": 0.025, | |
| "eval_runtime": 283.7186, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.021919438615441322, | |
| "learning_rate": 1.4666666666666666e-07, | |
| "loss": 0.0167, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 250.35, | |
| "eval_kl": 2.8392672538757326e-05, | |
| "eval_loss": 0.016574550420045853, | |
| "eval_num_tokens": 402684.0, | |
| "eval_reward": 0.1125, | |
| "eval_reward_std": 0.14469234347343446, | |
| "eval_rewards/equation_reward_func": 0.075, | |
| "eval_rewards/format_reward_func": 0.0375, | |
| "eval_runtime": 283.4564, | |
| "eval_samples_per_second": 0.071, | |
| "eval_steps_per_second": 0.018, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.03317731246352196, | |
| "learning_rate": 8e-08, | |
| "loss": 0.0358, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 246.825, | |
| "eval_kl": 3.1587481498718264e-05, | |
| "eval_loss": 0.04395188018679619, | |
| "eval_num_tokens": 433755.0, | |
| "eval_reward": 0.2, | |
| "eval_reward_std": 0.3154700517654419, | |
| "eval_rewards/equation_reward_func": 0.0625, | |
| "eval_rewards/format_reward_func": 0.1375, | |
| "eval_runtime": 284.1288, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.04194802790880203, | |
| "learning_rate": 1.3333333333333334e-08, | |
| "loss": 0.0372, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_clip_ratio": 0.0, | |
| "eval_completion_length": 247.825, | |
| "eval_kl": 3.3229589462280273e-05, | |
| "eval_loss": 0.034269753843545914, | |
| "eval_num_tokens": 464648.0, | |
| "eval_reward": 0.1, | |
| "eval_reward_std": 0.15, | |
| "eval_rewards/equation_reward_func": 0.0625, | |
| "eval_rewards/format_reward_func": 0.0375, | |
| "eval_runtime": 283.7321, | |
| "eval_samples_per_second": 0.07, | |
| "eval_steps_per_second": 0.018, | |
| "step": 75 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 75, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |