| { |
| "best_global_step": 30, |
| "best_metric": 0.009350189939141273, |
| "best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown_n100_mcl_256_pretrained/checkpoint-30", |
| "epoch": 3.0, |
| "eval_steps": 5, |
| "global_step": 75, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.04710305854678154, |
| "learning_rate": 9.466666666666666e-07, |
| "loss": 0.0581, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 249.7375, |
| "eval_kl": 2.7620792388916014e-05, |
| "eval_loss": 0.011466369964182377, |
| "eval_num_tokens": 30743.0, |
| "eval_reward": 0.0875, |
| "eval_reward_std": 0.14787135720252992, |
| "eval_rewards/equation_reward_func": 0.0375, |
| "eval_rewards/format_reward_func": 0.05, |
| "eval_runtime": 283.5242, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.0248898696154356, |
| "learning_rate": 8.799999999999999e-07, |
| "loss": 0.0305, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 248.3125, |
| "eval_kl": 3.051459789276123e-05, |
| "eval_loss": 0.010714234784245491, |
| "eval_num_tokens": 61654.0, |
| "eval_reward": 0.1, |
| "eval_reward_std": 0.11969234347343445, |
| "eval_rewards/equation_reward_func": 0.075, |
| "eval_rewards/format_reward_func": 0.025, |
| "eval_runtime": 279.7789, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.03560088202357292, |
| "learning_rate": 8.133333333333333e-07, |
| "loss": 0.0333, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 248.8125, |
| "eval_kl": 3.6323070526123046e-05, |
| "eval_loss": 0.016332309693098068, |
| "eval_num_tokens": 92513.0, |
| "eval_reward": 0.1, |
| "eval_reward_std": 0.15773502588272095, |
| "eval_rewards/equation_reward_func": 0.0375, |
| "eval_rewards/format_reward_func": 0.0625, |
| "eval_runtime": 282.7334, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.02903689257800579, |
| "learning_rate": 7.466666666666667e-07, |
| "loss": 0.0363, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 241.4375, |
| "eval_kl": 3.466010093688965e-05, |
| "eval_loss": 0.03578554838895798, |
| "eval_num_tokens": 123451.0, |
| "eval_reward": 0.1125, |
| "eval_reward_std": 0.1978713572025299, |
| "eval_rewards/equation_reward_func": 0.0625, |
| "eval_rewards/format_reward_func": 0.05, |
| "eval_runtime": 282.549, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.04811282553419e-07, |
| "learning_rate": 6.800000000000001e-07, |
| "loss": 0.0011, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 247.55, |
| "eval_kl": 3.0243396759033202e-05, |
| "eval_loss": 0.023990554735064507, |
| "eval_num_tokens": 154568.0, |
| "eval_reward": 0.1, |
| "eval_reward_std": 0.17886751294136047, |
| "eval_rewards/equation_reward_func": 0.05, |
| "eval_rewards/format_reward_func": 0.05, |
| "eval_runtime": 282.4574, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.0, |
| "learning_rate": 6.133333333333332e-07, |
| "loss": 0.0337, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 249.3875, |
| "eval_kl": 2.872645854949951e-05, |
| "eval_loss": 0.009350189939141273, |
| "eval_num_tokens": 185750.0, |
| "eval_reward": 0.1, |
| "eval_reward_std": 0.15, |
| "eval_rewards/equation_reward_func": 0.0625, |
| "eval_rewards/format_reward_func": 0.0375, |
| "eval_runtime": 283.2443, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.025708282366394997, |
| "learning_rate": 5.466666666666666e-07, |
| "loss": 0.0273, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 242.05, |
| "eval_kl": 3.2845139503479e-05, |
| "eval_loss": 0.038312580436468124, |
| "eval_num_tokens": 217087.0, |
| "eval_reward": 0.1375, |
| "eval_reward_std": 0.1978713572025299, |
| "eval_rewards/equation_reward_func": 0.0625, |
| "eval_rewards/format_reward_func": 0.075, |
| "eval_runtime": 283.9676, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.04088641330599785, |
| "learning_rate": 4.8e-07, |
| "loss": 0.0318, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 247.9375, |
| "eval_kl": 3.358125686645508e-05, |
| "eval_loss": 0.028362590819597244, |
| "eval_num_tokens": 247949.0, |
| "eval_reward": 0.1375, |
| "eval_reward_std": 0.1886961877346039, |
| "eval_rewards/equation_reward_func": 0.0875, |
| "eval_rewards/format_reward_func": 0.05, |
| "eval_runtime": 285.356, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.024730732664465904, |
| "learning_rate": 4.1333333333333333e-07, |
| "loss": 0.0578, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 246.9625, |
| "eval_kl": 2.7140974998474122e-05, |
| "eval_loss": 0.043169133365154266, |
| "eval_num_tokens": 278710.0, |
| "eval_reward": 0.125, |
| "eval_reward_std": 0.2, |
| "eval_rewards/equation_reward_func": 0.075, |
| "eval_rewards/format_reward_func": 0.05, |
| "eval_runtime": 285.2657, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 45 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.03765915334224701, |
| "learning_rate": 3.4666666666666665e-07, |
| "loss": 0.0234, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 247.05, |
| "eval_kl": 3.2660365104675296e-05, |
| "eval_loss": 0.026807209476828575, |
| "eval_num_tokens": 309954.0, |
| "eval_reward": 0.125, |
| "eval_reward_std": 0.20773502588272094, |
| "eval_rewards/equation_reward_func": 0.0875, |
| "eval_rewards/format_reward_func": 0.0375, |
| "eval_runtime": 284.404, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.01991177722811699, |
| "learning_rate": 2.8e-07, |
| "loss": 0.0691, |
| "step": 55 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 242.6375, |
| "eval_kl": 3.68952751159668e-05, |
| "eval_loss": 0.04483898729085922, |
| "eval_num_tokens": 340605.0, |
| "eval_reward": 0.1625, |
| "eval_reward_std": 0.25560638308525085, |
| "eval_rewards/equation_reward_func": 0.1, |
| "eval_rewards/format_reward_func": 0.0625, |
| "eval_runtime": 283.6459, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 55 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.0200728178024292, |
| "learning_rate": 2.1333333333333334e-07, |
| "loss": 0.0518, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 250.45, |
| "eval_kl": 3.1629204750061034e-05, |
| "eval_loss": 0.009741068817675114, |
| "eval_num_tokens": 371457.0, |
| "eval_reward": 0.075, |
| "eval_reward_std": 0.12886751294136048, |
| "eval_rewards/equation_reward_func": 0.05, |
| "eval_rewards/format_reward_func": 0.025, |
| "eval_runtime": 283.7186, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.021919438615441322, |
| "learning_rate": 1.4666666666666666e-07, |
| "loss": 0.0167, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 250.35, |
| "eval_kl": 2.8392672538757326e-05, |
| "eval_loss": 0.016574550420045853, |
| "eval_num_tokens": 402684.0, |
| "eval_reward": 0.1125, |
| "eval_reward_std": 0.14469234347343446, |
| "eval_rewards/equation_reward_func": 0.075, |
| "eval_rewards/format_reward_func": 0.0375, |
| "eval_runtime": 283.4564, |
| "eval_samples_per_second": 0.071, |
| "eval_steps_per_second": 0.018, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.03317731246352196, |
| "learning_rate": 8e-08, |
| "loss": 0.0358, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 246.825, |
| "eval_kl": 3.1587481498718264e-05, |
| "eval_loss": 0.04395188018679619, |
| "eval_num_tokens": 433755.0, |
| "eval_reward": 0.2, |
| "eval_reward_std": 0.3154700517654419, |
| "eval_rewards/equation_reward_func": 0.0625, |
| "eval_rewards/format_reward_func": 0.1375, |
| "eval_runtime": 284.1288, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 70 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.04194802790880203, |
| "learning_rate": 1.3333333333333334e-08, |
| "loss": 0.0372, |
| "step": 75 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_clip_ratio": 0.0, |
| "eval_completion_length": 247.825, |
| "eval_kl": 3.3229589462280273e-05, |
| "eval_loss": 0.034269753843545914, |
| "eval_num_tokens": 464648.0, |
| "eval_reward": 0.1, |
| "eval_reward_std": 0.15, |
| "eval_rewards/equation_reward_func": 0.0625, |
| "eval_rewards/format_reward_func": 0.0375, |
| "eval_runtime": 283.7321, |
| "eval_samples_per_second": 0.07, |
| "eval_steps_per_second": 0.018, |
| "step": 75 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 75, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|