pbelevich's picture
Upload folder using huggingface_hub
fe272b0 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9929577464788732,
"eval_steps": 500,
"global_step": 141,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1208984375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.9,
"completions/mean_length": 120.4628173828125,
"completions/mean_terminated_length": 106.38647994995117,
"completions/min_length": 19.2,
"completions/min_terminated_length": 19.2,
"epoch": 0.07042253521126761,
"grad_norm": 0.2456492780734702,
"kl": 0.246875,
"learning_rate": 9.366197183098593e-06,
"loss": 0.0828,
"num_tokens": 11811341.0,
"reward": 0.933349609375,
"reward_std": 0.22970878034830094,
"rewards/accuracy_reward/mean": 0.0947021484375,
"rewards/accuracy_reward/std": 0.28115386664867403,
"rewards/format_reward/mean": 0.8386474609375,
"rewards/format_reward/std": 0.2507546439766884,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0341796875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 136.043115234375,
"completions/mean_terminated_length": 131.86617279052734,
"completions/min_length": 22.5,
"completions/min_terminated_length": 22.5,
"epoch": 0.14084507042253522,
"grad_norm": 0.19689937531771842,
"kl": 0.20615234375,
"learning_rate": 8.661971830985915e-06,
"loss": 0.0354,
"num_tokens": 24245531.0,
"reward": 1.25078125,
"reward_std": 0.29540089666843417,
"rewards/accuracy_reward/mean": 0.2865966796875,
"rewards/accuracy_reward/std": 0.4441226840019226,
"rewards/format_reward/mean": 0.9641845703125,
"rewards/format_reward/std": 0.17938514649868012,
"step": 20
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.020068359375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.8,
"completions/mean_length": 140.8588623046875,
"completions/mean_terminated_length": 138.51354064941407,
"completions/min_length": 20.5,
"completions/min_terminated_length": 20.5,
"epoch": 0.2112676056338028,
"grad_norm": 0.21172988571553053,
"kl": 0.21923828125,
"learning_rate": 7.95774647887324e-06,
"loss": 0.0305,
"num_tokens": 36814350.0,
"reward": 1.3020751953125,
"reward_std": 0.295776504278183,
"rewards/accuracy_reward/mean": 0.3288818359375,
"rewards/accuracy_reward/std": 0.46963170170783997,
"rewards/format_reward/mean": 0.973193359375,
"rewards/format_reward/std": 0.16002206206321717,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1049072265625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.9,
"completions/mean_length": 148.0318603515625,
"completions/mean_terminated_length": 136.30471115112306,
"completions/min_length": 15.4,
"completions/min_terminated_length": 15.4,
"epoch": 0.28169014084507044,
"grad_norm": 0.22532591156498452,
"kl": 0.25146484375,
"learning_rate": 7.253521126760564e-06,
"loss": 0.0696,
"num_tokens": 49745295.0,
"reward": 1.104150390625,
"reward_std": 0.3860862344503403,
"rewards/accuracy_reward/mean": 0.3103271484375,
"rewards/accuracy_reward/std": 0.4604327976703644,
"rewards/format_reward/mean": 0.7938232421875,
"rewards/format_reward/std": 0.28022926300764084,
"step": 40
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.030517578125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.9,
"completions/mean_length": 143.491650390625,
"completions/mean_terminated_length": 139.98492889404298,
"completions/min_length": 18.1,
"completions/min_terminated_length": 18.1,
"epoch": 0.352112676056338,
"grad_norm": 0.1885789608304361,
"kl": 0.22822265625,
"learning_rate": 6.549295774647888e-06,
"loss": 0.0455,
"num_tokens": 62485177.0,
"reward": 1.3081787109375,
"reward_std": 0.3404128760099411,
"rewards/accuracy_reward/mean": 0.3631103515625,
"rewards/accuracy_reward/std": 0.4808199375867844,
"rewards/format_reward/mean": 0.945068359375,
"rewards/format_reward/std": 0.2246607005596161,
"step": 50
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0277099609375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.9,
"completions/mean_length": 146.1615234375,
"completions/mean_terminated_length": 143.0530227661133,
"completions/min_length": 25.8,
"completions/min_terminated_length": 25.8,
"epoch": 0.4225352112676056,
"grad_norm": 0.17282346465533863,
"kl": 0.20244140625,
"learning_rate": 5.845070422535212e-06,
"loss": 0.0395,
"num_tokens": 75340361.0,
"reward": 1.33486328125,
"reward_std": 0.31391614973545073,
"rewards/accuracy_reward/mean": 0.3770263671875,
"rewards/accuracy_reward/std": 0.48451300263404845,
"rewards/format_reward/mean": 0.9578369140625,
"rewards/format_reward/std": 0.19974014312028884,
"step": 60
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0384521484375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 148.16982421875,
"completions/mean_terminated_length": 143.90497741699218,
"completions/min_length": 26.1,
"completions/min_terminated_length": 26.1,
"epoch": 0.49295774647887325,
"grad_norm": 0.20083770112758828,
"kl": 0.2142578125,
"learning_rate": 5.140845070422536e-06,
"loss": 0.0525,
"num_tokens": 88271653.0,
"reward": 1.3040771484375,
"reward_std": 0.3394395083189011,
"rewards/accuracy_reward/mean": 0.3633056640625,
"rewards/accuracy_reward/std": 0.4807720482349396,
"rewards/format_reward/mean": 0.940771484375,
"rewards/format_reward/std": 0.23174136728048325,
"step": 70
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0213134765625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.9,
"completions/mean_length": 145.615234375,
"completions/mean_terminated_length": 143.20397033691407,
"completions/min_length": 23.1,
"completions/min_terminated_length": 23.1,
"epoch": 0.5633802816901409,
"grad_norm": 0.18419537410639902,
"kl": 0.22841796875,
"learning_rate": 4.43661971830986e-06,
"loss": 0.0372,
"num_tokens": 101124269.0,
"reward": 1.3465087890625,
"reward_std": 0.3062387377023697,
"rewards/accuracy_reward/mean": 0.382861328125,
"rewards/accuracy_reward/std": 0.48586891293525697,
"rewards/format_reward/mean": 0.9636474609375,
"rewards/format_reward/std": 0.18586143404245375,
"step": 80
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.028466796875,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 152.4924072265625,
"completions/mean_terminated_length": 149.45853271484376,
"completions/min_length": 23.3,
"completions/min_terminated_length": 23.3,
"epoch": 0.6338028169014085,
"grad_norm": 0.18212247166689255,
"kl": 0.197265625,
"learning_rate": 3.7323943661971835e-06,
"loss": 0.0378,
"num_tokens": 114254494.0,
"reward": 1.310498046875,
"reward_std": 0.31236537992954255,
"rewards/accuracy_reward/mean": 0.3539794921875,
"rewards/accuracy_reward/std": 0.4776217073202133,
"rewards/format_reward/mean": 0.9565185546875,
"rewards/format_reward/std": 0.2028029069304466,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0301025390625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 255.7,
"completions/mean_length": 151.31767578125,
"completions/mean_terminated_length": 148.0802963256836,
"completions/min_length": 25.2,
"completions/min_terminated_length": 25.2,
"epoch": 0.704225352112676,
"grad_norm": 0.21910042885938372,
"kl": 0.18720703125,
"learning_rate": 3.0281690140845073e-06,
"loss": 0.0392,
"num_tokens": 127312026.0,
"reward": 1.3383544921875,
"reward_std": 0.32255696356296537,
"rewards/accuracy_reward/mean": 0.382861328125,
"rewards/accuracy_reward/std": 0.48587043583393097,
"rewards/format_reward/mean": 0.9554931640625,
"rewards/format_reward/std": 0.20596200376749038,
"step": 100
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.030126953125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 153.7516357421875,
"completions/mean_terminated_length": 150.5800354003906,
"completions/min_length": 26.2,
"completions/min_terminated_length": 26.2,
"epoch": 0.7746478873239436,
"grad_norm": 0.1726874715898045,
"kl": 0.18974609375,
"learning_rate": 2.323943661971831e-06,
"loss": 0.0395,
"num_tokens": 140468685.0,
"reward": 1.3381103515625,
"reward_std": 0.31997495591640474,
"rewards/accuracy_reward/mean": 0.383740234375,
"rewards/accuracy_reward/std": 0.486175873875618,
"rewards/format_reward/mean": 0.9543701171875,
"rewards/format_reward/std": 0.20838392823934554,
"step": 110
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0293212890625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 153.7035400390625,
"completions/mean_terminated_length": 150.61290435791017,
"completions/min_length": 28.3,
"completions/min_terminated_length": 28.3,
"epoch": 0.8450704225352113,
"grad_norm": 0.1653288093843954,
"kl": 0.183984375,
"learning_rate": 1.6197183098591552e-06,
"loss": 0.0372,
"num_tokens": 153599310.0,
"reward": 1.35302734375,
"reward_std": 0.3156912326812744,
"rewards/accuracy_reward/mean": 0.3957275390625,
"rewards/accuracy_reward/std": 0.48883563578128814,
"rewards/format_reward/mean": 0.9572998046875,
"rewards/format_reward/std": 0.2019893079996109,
"step": 120
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0310791015625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 152.9286376953125,
"completions/mean_terminated_length": 149.62524871826173,
"completions/min_length": 25.6,
"completions/min_terminated_length": 25.6,
"epoch": 0.9154929577464789,
"grad_norm": 0.17111412613161744,
"kl": 0.18369140625,
"learning_rate": 9.154929577464789e-07,
"loss": 0.0413,
"num_tokens": 166750947.0,
"reward": 1.3502197265625,
"reward_std": 0.3155178099870682,
"rewards/accuracy_reward/mean": 0.3943359375,
"rewards/accuracy_reward/std": 0.48866645693778993,
"rewards/format_reward/mean": 0.9558837890625,
"rewards/format_reward/std": 0.20521053820848464,
"step": 130
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0335205078125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 256.0,
"completions/mean_length": 152.0265625,
"completions/mean_terminated_length": 148.4231918334961,
"completions/min_length": 25.9,
"completions/min_terminated_length": 25.9,
"epoch": 0.9859154929577465,
"grad_norm": 0.18623854088340255,
"kl": 0.20126953125,
"learning_rate": 2.1126760563380284e-07,
"loss": 0.0456,
"num_tokens": 179822731.0,
"reward": 1.348291015625,
"reward_std": 0.3256455361843109,
"rewards/accuracy_reward/mean": 0.3951904296875,
"rewards/accuracy_reward/std": 0.4886902630329132,
"rewards/format_reward/mean": 0.9531005859375,
"rewards/format_reward/std": 0.21116815358400345,
"step": 140
}
],
"logging_steps": 10,
"max_steps": 142,
"num_input_tokens_seen": 181126457,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}