phi_countdown_n100_mcl_1024 / trainer_state.json
Infernaught's picture
Upload folder using huggingface_hub
d22c8fb verified
{
"best_global_step": 55,
"best_metric": 0.020872214809060097,
"best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown_mcl_1024/checkpoint-15",
"epoch": 3.0,
"eval_steps": 5,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 0.019189711660146713,
"learning_rate": 9.466666666666666e-07,
"loss": 0.0336,
"step": 5
},
{
"epoch": 0.2,
"eval_clip_ratio": 0.0,
"eval_completion_length": 802.8125,
"eval_kl": 3.0040740966796875e-05,
"eval_loss": 0.07697294652462006,
"eval_num_tokens": 76355.0,
"eval_reward": 0.0875,
"eval_reward_std": 0.13273502588272096,
"eval_rewards/equation_reward_func": 0.075,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1338.4011,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 5
},
{
"epoch": 0.4,
"grad_norm": 0.01623854786157608,
"learning_rate": 8.799999999999999e-07,
"loss": 0.0454,
"step": 10
},
{
"epoch": 0.4,
"eval_clip_ratio": 0.0,
"eval_completion_length": 855.5625,
"eval_kl": 3.3321976661682126e-05,
"eval_loss": 0.04702009633183479,
"eval_num_tokens": 152886.0,
"eval_reward": 0.0625,
"eval_reward_std": 0.125,
"eval_rewards/equation_reward_func": 0.0375,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 1294.6762,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 10
},
{
"epoch": 0.6,
"grad_norm": 0.0,
"learning_rate": 8.133333333333333e-07,
"loss": 0.0494,
"step": 15
},
{
"epoch": 0.6,
"eval_clip_ratio": 0.0,
"eval_completion_length": 796.0875,
"eval_kl": 3.191232681274414e-05,
"eval_loss": 0.03644490987062454,
"eval_num_tokens": 230732.0,
"eval_reward": 0.1125,
"eval_reward_std": 0.18273502588272095,
"eval_rewards/equation_reward_func": 0.0875,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 1335.5012,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 15
},
{
"epoch": 0.8,
"grad_norm": 0.009749338962137699,
"learning_rate": 7.466666666666667e-07,
"loss": 0.0723,
"step": 20
},
{
"epoch": 0.8,
"eval_clip_ratio": 0.0,
"eval_completion_length": 810.8,
"eval_kl": 2.9605627059936523e-05,
"eval_loss": 0.04239075630903244,
"eval_num_tokens": 308908.0,
"eval_reward": 0.0875,
"eval_reward_std": 0.15386751294136047,
"eval_rewards/equation_reward_func": 0.05,
"eval_rewards/format_reward_func": 0.0375,
"eval_runtime": 1337.0845,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 20
},
{
"epoch": 1.0,
"grad_norm": 0.0265500470995903,
"learning_rate": 6.800000000000001e-07,
"loss": 0.0296,
"step": 25
},
{
"epoch": 1.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 769.025,
"eval_kl": 2.89231538772583e-05,
"eval_loss": 0.07508356869220734,
"eval_num_tokens": 385628.0,
"eval_reward": 0.1125,
"eval_reward_std": 0.18273502588272095,
"eval_rewards/equation_reward_func": 0.1,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1334.3196,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 25
},
{
"epoch": 1.2,
"grad_norm": 0.0,
"learning_rate": 6.133333333333332e-07,
"loss": 0.0476,
"step": 30
},
{
"epoch": 1.2,
"eval_clip_ratio": 0.0,
"eval_completion_length": 743.5375,
"eval_kl": 3.173947334289551e-05,
"eval_loss": 0.09708776324987411,
"eval_num_tokens": 463386.0,
"eval_reward": 0.15,
"eval_reward_std": 0.25773502588272096,
"eval_rewards/equation_reward_func": 0.1375,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1333.5969,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 30
},
{
"epoch": 1.4,
"grad_norm": 0.0,
"learning_rate": 5.466666666666666e-07,
"loss": 0.0398,
"step": 35
},
{
"epoch": 1.4,
"eval_clip_ratio": 0.0,
"eval_completion_length": 791.5125,
"eval_kl": 3.103315830230713e-05,
"eval_loss": 0.04481809586286545,
"eval_num_tokens": 536555.0,
"eval_reward": 0.1125,
"eval_reward_std": 0.20386751294136046,
"eval_rewards/equation_reward_func": 0.0875,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 1304.5413,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 35
},
{
"epoch": 1.6,
"grad_norm": 0.01814551278948784,
"learning_rate": 4.8e-07,
"loss": 0.0884,
"step": 40
},
{
"epoch": 1.6,
"eval_clip_ratio": 0.0,
"eval_completion_length": 798.8,
"eval_kl": 3.2714009284973146e-05,
"eval_loss": 0.06108971685171127,
"eval_num_tokens": 615157.0,
"eval_reward": 0.125,
"eval_reward_std": 0.17886751294136047,
"eval_rewards/equation_reward_func": 0.1125,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1301.9697,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 40
},
{
"epoch": 1.8,
"grad_norm": 0.0,
"learning_rate": 4.1333333333333333e-07,
"loss": 0.1015,
"step": 45
},
{
"epoch": 1.8,
"eval_clip_ratio": 0.0,
"eval_completion_length": 792.85,
"eval_kl": 2.993345260620117e-05,
"eval_loss": 0.06866836547851562,
"eval_num_tokens": 687524.0,
"eval_reward": 0.1375,
"eval_reward_std": 0.22673887014389038,
"eval_rewards/equation_reward_func": 0.075,
"eval_rewards/format_reward_func": 0.0625,
"eval_runtime": 1314.7184,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 45
},
{
"epoch": 2.0,
"grad_norm": 0.01888681948184967,
"learning_rate": 3.4666666666666665e-07,
"loss": 0.0441,
"step": 50
},
{
"epoch": 2.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 828.3625,
"eval_kl": 3.242790699005127e-05,
"eval_loss": 0.03552088886499405,
"eval_num_tokens": 764139.0,
"eval_reward": 0.0375,
"eval_reward_std": 0.075,
"eval_rewards/equation_reward_func": 0.0125,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 1335.2965,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 50
},
{
"epoch": 2.2,
"grad_norm": 0.012753931805491447,
"learning_rate": 2.8e-07,
"loss": 0.0577,
"step": 55
},
{
"epoch": 2.2,
"eval_clip_ratio": 0.0,
"eval_completion_length": 838.8,
"eval_kl": 3.0362606048583983e-05,
"eval_loss": 0.020872214809060097,
"eval_num_tokens": 845274.0,
"eval_reward": 0.05,
"eval_reward_std": 0.1,
"eval_rewards/equation_reward_func": 0.0375,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1294.8372,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 55
},
{
"epoch": 2.4,
"grad_norm": 0.0,
"learning_rate": 2.1333333333333334e-07,
"loss": 0.1044,
"step": 60
},
{
"epoch": 2.4,
"eval_clip_ratio": 0.0,
"eval_completion_length": 786.7,
"eval_kl": 3.013014793395996e-05,
"eval_loss": 0.07739870995283127,
"eval_num_tokens": 919161.0,
"eval_reward": 0.1,
"eval_reward_std": 0.17886751294136047,
"eval_rewards/equation_reward_func": 0.0875,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1281.0863,
"eval_samples_per_second": 0.016,
"eval_steps_per_second": 0.004,
"step": 60
},
{
"epoch": 2.6,
"grad_norm": 0.0,
"learning_rate": 1.4666666666666666e-07,
"loss": 0.0556,
"step": 65
},
{
"epoch": 2.6,
"eval_clip_ratio": 0.0,
"eval_completion_length": 787.15,
"eval_kl": 3.191828727722168e-05,
"eval_loss": 0.12674936652183533,
"eval_num_tokens": 991008.0,
"eval_reward": 0.125,
"eval_reward_std": 0.22886751294136048,
"eval_rewards/equation_reward_func": 0.1,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 1337.2162,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 65
},
{
"epoch": 2.8,
"grad_norm": 0.011743688955903053,
"learning_rate": 8e-08,
"loss": 0.0575,
"step": 70
},
{
"epoch": 2.8,
"eval_clip_ratio": 0.0,
"eval_completion_length": 865.05,
"eval_kl": 3.235936164855957e-05,
"eval_loss": 0.022181231528520584,
"eval_num_tokens": 1072417.0,
"eval_reward": 0.0375,
"eval_reward_std": 0.075,
"eval_rewards/equation_reward_func": 0.025,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1336.7662,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 70
},
{
"epoch": 3.0,
"grad_norm": 0.012797041796147823,
"learning_rate": 1.3333333333333334e-08,
"loss": 0.0873,
"step": 75
},
{
"epoch": 3.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 838.275,
"eval_kl": 3.303289413452149e-05,
"eval_loss": 0.026845881715416908,
"eval_num_tokens": 1151449.0,
"eval_reward": 0.1125,
"eval_reward_std": 0.15560638308525085,
"eval_rewards/equation_reward_func": 0.1,
"eval_rewards/format_reward_func": 0.0125,
"eval_runtime": 1328.2121,
"eval_samples_per_second": 0.015,
"eval_steps_per_second": 0.004,
"step": 75
}
],
"logging_steps": 5,
"max_steps": 75,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 15,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}