Infernaught's picture
Upload folder using huggingface_hub
e112099 verified
{
"best_global_step": 30,
"best_metric": 0.009350189939141273,
"best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown_n100_mcl_256_pretrained/checkpoint-30",
"epoch": 3.0,
"eval_steps": 5,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 0.04710305854678154,
"learning_rate": 9.466666666666666e-07,
"loss": 0.0581,
"step": 5
},
{
"epoch": 0.2,
"eval_clip_ratio": 0.0,
"eval_completion_length": 249.7375,
"eval_kl": 2.7620792388916014e-05,
"eval_loss": 0.011466369964182377,
"eval_num_tokens": 30743.0,
"eval_reward": 0.0875,
"eval_reward_std": 0.14787135720252992,
"eval_rewards/equation_reward_func": 0.0375,
"eval_rewards/format_reward_func": 0.05,
"eval_runtime": 283.5242,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 5
},
{
"epoch": 0.4,
"grad_norm": 0.0248898696154356,
"learning_rate": 8.799999999999999e-07,
"loss": 0.0305,
"step": 10
},
{
"epoch": 0.4,
"eval_clip_ratio": 0.0,
"eval_completion_length": 248.3125,
"eval_kl": 3.051459789276123e-05,
"eval_loss": 0.010714234784245491,
"eval_num_tokens": 61654.0,
"eval_reward": 0.1,
"eval_reward_std": 0.11969234347343445,
"eval_rewards/equation_reward_func": 0.075,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 279.7789,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 10
},
{
"epoch": 0.6,
"grad_norm": 0.03560088202357292,
"learning_rate": 8.133333333333333e-07,
"loss": 0.0333,
"step": 15
},
{
"epoch": 0.6,
"eval_clip_ratio": 0.0,
"eval_completion_length": 248.8125,
"eval_kl": 3.6323070526123046e-05,
"eval_loss": 0.016332309693098068,
"eval_num_tokens": 92513.0,
"eval_reward": 0.1,
"eval_reward_std": 0.15773502588272095,
"eval_rewards/equation_reward_func": 0.0375,
"eval_rewards/format_reward_func": 0.0625,
"eval_runtime": 282.7334,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 15
},
{
"epoch": 0.8,
"grad_norm": 0.02903689257800579,
"learning_rate": 7.466666666666667e-07,
"loss": 0.0363,
"step": 20
},
{
"epoch": 0.8,
"eval_clip_ratio": 0.0,
"eval_completion_length": 241.4375,
"eval_kl": 3.466010093688965e-05,
"eval_loss": 0.03578554838895798,
"eval_num_tokens": 123451.0,
"eval_reward": 0.1125,
"eval_reward_std": 0.1978713572025299,
"eval_rewards/equation_reward_func": 0.0625,
"eval_rewards/format_reward_func": 0.05,
"eval_runtime": 282.549,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 20
},
{
"epoch": 1.0,
"grad_norm": 5.04811282553419e-07,
"learning_rate": 6.800000000000001e-07,
"loss": 0.0011,
"step": 25
},
{
"epoch": 1.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 247.55,
"eval_kl": 3.0243396759033202e-05,
"eval_loss": 0.023990554735064507,
"eval_num_tokens": 154568.0,
"eval_reward": 0.1,
"eval_reward_std": 0.17886751294136047,
"eval_rewards/equation_reward_func": 0.05,
"eval_rewards/format_reward_func": 0.05,
"eval_runtime": 282.4574,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 25
},
{
"epoch": 1.2,
"grad_norm": 0.0,
"learning_rate": 6.133333333333332e-07,
"loss": 0.0337,
"step": 30
},
{
"epoch": 1.2,
"eval_clip_ratio": 0.0,
"eval_completion_length": 249.3875,
"eval_kl": 2.872645854949951e-05,
"eval_loss": 0.009350189939141273,
"eval_num_tokens": 185750.0,
"eval_reward": 0.1,
"eval_reward_std": 0.15,
"eval_rewards/equation_reward_func": 0.0625,
"eval_rewards/format_reward_func": 0.0375,
"eval_runtime": 283.2443,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 30
},
{
"epoch": 1.4,
"grad_norm": 0.025708282366394997,
"learning_rate": 5.466666666666666e-07,
"loss": 0.0273,
"step": 35
},
{
"epoch": 1.4,
"eval_clip_ratio": 0.0,
"eval_completion_length": 242.05,
"eval_kl": 3.2845139503479e-05,
"eval_loss": 0.038312580436468124,
"eval_num_tokens": 217087.0,
"eval_reward": 0.1375,
"eval_reward_std": 0.1978713572025299,
"eval_rewards/equation_reward_func": 0.0625,
"eval_rewards/format_reward_func": 0.075,
"eval_runtime": 283.9676,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 35
},
{
"epoch": 1.6,
"grad_norm": 0.04088641330599785,
"learning_rate": 4.8e-07,
"loss": 0.0318,
"step": 40
},
{
"epoch": 1.6,
"eval_clip_ratio": 0.0,
"eval_completion_length": 247.9375,
"eval_kl": 3.358125686645508e-05,
"eval_loss": 0.028362590819597244,
"eval_num_tokens": 247949.0,
"eval_reward": 0.1375,
"eval_reward_std": 0.1886961877346039,
"eval_rewards/equation_reward_func": 0.0875,
"eval_rewards/format_reward_func": 0.05,
"eval_runtime": 285.356,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 40
},
{
"epoch": 1.8,
"grad_norm": 0.024730732664465904,
"learning_rate": 4.1333333333333333e-07,
"loss": 0.0578,
"step": 45
},
{
"epoch": 1.8,
"eval_clip_ratio": 0.0,
"eval_completion_length": 246.9625,
"eval_kl": 2.7140974998474122e-05,
"eval_loss": 0.043169133365154266,
"eval_num_tokens": 278710.0,
"eval_reward": 0.125,
"eval_reward_std": 0.2,
"eval_rewards/equation_reward_func": 0.075,
"eval_rewards/format_reward_func": 0.05,
"eval_runtime": 285.2657,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 45
},
{
"epoch": 2.0,
"grad_norm": 0.03765915334224701,
"learning_rate": 3.4666666666666665e-07,
"loss": 0.0234,
"step": 50
},
{
"epoch": 2.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 247.05,
"eval_kl": 3.2660365104675296e-05,
"eval_loss": 0.026807209476828575,
"eval_num_tokens": 309954.0,
"eval_reward": 0.125,
"eval_reward_std": 0.20773502588272094,
"eval_rewards/equation_reward_func": 0.0875,
"eval_rewards/format_reward_func": 0.0375,
"eval_runtime": 284.404,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 50
},
{
"epoch": 2.2,
"grad_norm": 0.01991177722811699,
"learning_rate": 2.8e-07,
"loss": 0.0691,
"step": 55
},
{
"epoch": 2.2,
"eval_clip_ratio": 0.0,
"eval_completion_length": 242.6375,
"eval_kl": 3.68952751159668e-05,
"eval_loss": 0.04483898729085922,
"eval_num_tokens": 340605.0,
"eval_reward": 0.1625,
"eval_reward_std": 0.25560638308525085,
"eval_rewards/equation_reward_func": 0.1,
"eval_rewards/format_reward_func": 0.0625,
"eval_runtime": 283.6459,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 55
},
{
"epoch": 2.4,
"grad_norm": 0.0200728178024292,
"learning_rate": 2.1333333333333334e-07,
"loss": 0.0518,
"step": 60
},
{
"epoch": 2.4,
"eval_clip_ratio": 0.0,
"eval_completion_length": 250.45,
"eval_kl": 3.1629204750061034e-05,
"eval_loss": 0.009741068817675114,
"eval_num_tokens": 371457.0,
"eval_reward": 0.075,
"eval_reward_std": 0.12886751294136048,
"eval_rewards/equation_reward_func": 0.05,
"eval_rewards/format_reward_func": 0.025,
"eval_runtime": 283.7186,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 60
},
{
"epoch": 2.6,
"grad_norm": 0.021919438615441322,
"learning_rate": 1.4666666666666666e-07,
"loss": 0.0167,
"step": 65
},
{
"epoch": 2.6,
"eval_clip_ratio": 0.0,
"eval_completion_length": 250.35,
"eval_kl": 2.8392672538757326e-05,
"eval_loss": 0.016574550420045853,
"eval_num_tokens": 402684.0,
"eval_reward": 0.1125,
"eval_reward_std": 0.14469234347343446,
"eval_rewards/equation_reward_func": 0.075,
"eval_rewards/format_reward_func": 0.0375,
"eval_runtime": 283.4564,
"eval_samples_per_second": 0.071,
"eval_steps_per_second": 0.018,
"step": 65
},
{
"epoch": 2.8,
"grad_norm": 0.03317731246352196,
"learning_rate": 8e-08,
"loss": 0.0358,
"step": 70
},
{
"epoch": 2.8,
"eval_clip_ratio": 0.0,
"eval_completion_length": 246.825,
"eval_kl": 3.1587481498718264e-05,
"eval_loss": 0.04395188018679619,
"eval_num_tokens": 433755.0,
"eval_reward": 0.2,
"eval_reward_std": 0.3154700517654419,
"eval_rewards/equation_reward_func": 0.0625,
"eval_rewards/format_reward_func": 0.1375,
"eval_runtime": 284.1288,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 70
},
{
"epoch": 3.0,
"grad_norm": 0.04194802790880203,
"learning_rate": 1.3333333333333334e-08,
"loss": 0.0372,
"step": 75
},
{
"epoch": 3.0,
"eval_clip_ratio": 0.0,
"eval_completion_length": 247.825,
"eval_kl": 3.3229589462280273e-05,
"eval_loss": 0.034269753843545914,
"eval_num_tokens": 464648.0,
"eval_reward": 0.1,
"eval_reward_std": 0.15,
"eval_rewards/equation_reward_func": 0.0625,
"eval_rewards/format_reward_func": 0.0375,
"eval_runtime": 283.7321,
"eval_samples_per_second": 0.07,
"eval_steps_per_second": 0.018,
"step": 75
}
],
"logging_steps": 5,
"max_steps": 75,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}