ee_gol_f1_500 / trainer_state.json
usr256864's picture
Initial model upload
2d2561b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.30175015087507545,
"eval_steps": 250,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12125,
"completions/max_length": 255.34,
"completions/max_terminated_length": 252.14,
"completions/mean_length": 221.534375,
"completions/mean_terminated_length": 216.93697082519532,
"completions/min_length": 173.54,
"completions/min_terminated_length": 173.54,
"entropy": 0.10048629969358444,
"epoch": 0.030175015087507542,
"frac_reward_zero_std": 0.3225,
"grad_norm": 0.46380576491355896,
"learning_rate": 5e-05,
"loss": 0.004,
"num_tokens": 8142396.0,
"reward": 7.30375,
"reward_std": 1.5006456315517425,
"rewards/event_reward_fn/mean": 7.30375,
"rewards/event_reward_fn/std": 6.278198585510254,
"step": 50,
"step_time": 40.824848868116966
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.068125,
"completions/max_length": 251.74,
"completions/max_terminated_length": 248.06,
"completions/mean_length": 215.08625,
"completions/mean_terminated_length": 212.25316284179686,
"completions/min_length": 171.76,
"completions/min_terminated_length": 171.76,
"entropy": 0.10318506792187691,
"epoch": 0.060350030175015085,
"frac_reward_zero_std": 0.325,
"grad_norm": 0.21978232264518738,
"learning_rate": 5e-05,
"loss": -0.0025,
"num_tokens": 16421719.0,
"reward": 7.36875,
"reward_std": 1.3263894939422607,
"rewards/event_reward_fn/mean": 7.36875,
"rewards/event_reward_fn/std": 6.119045643806458,
"step": 100,
"step_time": 38.99798643006128
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.4825,
"completions/max_length": 256.0,
"completions/max_terminated_length": 251.32,
"completions/mean_length": 238.104375,
"completions/mean_terminated_length": 221.8957485961914,
"completions/min_length": 191.34,
"completions/min_terminated_length": 191.34,
"entropy": 0.10444845259189606,
"epoch": 0.09052504526252263,
"frac_reward_zero_std": 0.2925,
"grad_norm": 0.5579063892364502,
"learning_rate": 5e-05,
"loss": -0.0006,
"num_tokens": 24885844.0,
"reward": 7.74625,
"reward_std": 1.5345598912239076,
"rewards/event_reward_fn/mean": 7.74625,
"rewards/event_reward_fn/std": 6.464660973548889,
"step": 150,
"step_time": 41.26081488572061
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.7925,
"completions/max_length": 256.0,
"completions/max_terminated_length": 202.92,
"completions/mean_length": 245.916875,
"completions/mean_terminated_length": 184.6587713623047,
"completions/min_length": 199.94,
"completions/min_terminated_length": 169.22,
"entropy": 0.10581055819988251,
"epoch": 0.12070006035003017,
"frac_reward_zero_std": 0.33,
"grad_norm": 0.31808722019195557,
"learning_rate": 5e-05,
"loss": 0.0003,
"num_tokens": 33226966.0,
"reward": 7.19125,
"reward_std": 1.4298825466632843,
"rewards/event_reward_fn/mean": 7.19125,
"rewards/event_reward_fn/std": 5.8599746036529545,
"step": 200,
"step_time": 41.91275953448203
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.825,
"completions/max_length": 256.0,
"completions/max_terminated_length": 181.12,
"completions/mean_length": 245.851875,
"completions/mean_terminated_length": 163.72261688232422,
"completions/min_length": 198.46,
"completions/min_terminated_length": 152.38,
"entropy": 0.10499135926365852,
"epoch": 0.15087507543753773,
"frac_reward_zero_std": 0.2875,
"grad_norm": 0.2646925449371338,
"learning_rate": 5e-05,
"loss": 0.0005,
"num_tokens": 41523308.0,
"reward": 7.9475,
"reward_std": 1.5300491595268249,
"rewards/event_reward_fn/mean": 7.9475,
"rewards/event_reward_fn/std": 6.3965685844421385,
"step": 250,
"step_time": 41.663273623897695
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.898125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 149.62,
"completions/mean_length": 250.625625,
"completions/mean_terminated_length": 144.78653198242188,
"completions/min_length": 215.68,
"completions/min_terminated_length": 138.88,
"entropy": 0.10884671121835708,
"epoch": 0.18105009052504525,
"frac_reward_zero_std": 0.3325,
"grad_norm": 0.5418329834938049,
"learning_rate": 5e-05,
"loss": -0.0002,
"num_tokens": 49889481.0,
"reward": 7.489375,
"reward_std": 1.5504147619009019,
"rewards/event_reward_fn/mean": 7.489375,
"rewards/event_reward_fn/std": 6.099679977893829,
"step": 300,
"step_time": 40.817094522019616
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.9275,
"completions/max_length": 256.0,
"completions/max_terminated_length": 131.48,
"completions/mean_length": 253.1625,
"completions/mean_terminated_length": 125.53590209960937,
"completions/min_length": 228.04,
"completions/min_terminated_length": 120.52,
"entropy": 0.10796756476163864,
"epoch": 0.2112251056125528,
"frac_reward_zero_std": 0.3175,
"grad_norm": 0.4433981776237488,
"learning_rate": 5e-05,
"loss": 0.0019,
"num_tokens": 58206892.0,
"reward": 7.89625,
"reward_std": 1.573977051973343,
"rewards/event_reward_fn/mean": 7.89625,
"rewards/event_reward_fn/std": 6.586006484031677,
"step": 350,
"step_time": 42.12015992245928
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.945625,
"completions/max_length": 256.0,
"completions/max_terminated_length": 151.42,
"completions/mean_length": 254.76625,
"completions/mean_terminated_length": 146.34000091552736,
"completions/min_length": 238.42,
"completions/min_terminated_length": 141.14,
"entropy": 0.11530103281140328,
"epoch": 0.24140012070006034,
"frac_reward_zero_std": 0.29,
"grad_norm": 0.3932775855064392,
"learning_rate": 5e-05,
"loss": 0.0001,
"num_tokens": 66513664.0,
"reward": 7.304375,
"reward_std": 1.552179645895958,
"rewards/event_reward_fn/mean": 7.304375,
"rewards/event_reward_fn/std": 5.687906408309937,
"step": 400,
"step_time": 40.78123372233997
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.92375,
"completions/max_length": 256.0,
"completions/max_terminated_length": 185.1,
"completions/mean_length": 254.35875,
"completions/mean_terminated_length": 178.61883544921875,
"completions/min_length": 232.56,
"completions/min_terminated_length": 171.12,
"entropy": 0.13443249970674515,
"epoch": 0.27157513578756787,
"frac_reward_zero_std": 0.315,
"grad_norm": 0.2284364551305771,
"learning_rate": 5e-05,
"loss": -0.0013,
"num_tokens": 74493599.0,
"reward": 7.766875,
"reward_std": 1.5890911322832109,
"rewards/event_reward_fn/mean": 7.766875,
"rewards/event_reward_fn/std": 6.074563751220703,
"step": 450,
"step_time": 40.8964025861409
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.983125,
"completions/max_length": 256.0,
"completions/max_terminated_length": 73.22,
"completions/mean_length": 255.728125,
"completions/mean_terminated_length": 72.48666687011719,
"completions/min_length": 250.14,
"completions/min_terminated_length": 70.94,
"entropy": 0.1348781806230545,
"epoch": 0.30175015087507545,
"frac_reward_zero_std": 0.32,
"grad_norm": 0.44683775305747986,
"learning_rate": 5e-05,
"loss": 0.0006,
"num_tokens": 82766712.0,
"reward": 7.835625,
"reward_std": 1.6530324041843414,
"rewards/event_reward_fn/mean": 7.835625,
"rewards/event_reward_fn/std": 6.139980282783508,
"step": 500,
"step_time": 41.13054014526191
}
],
"logging_steps": 50,
"max_steps": 16570,
"num_input_tokens_seen": 82766712,
"num_train_epochs": 10,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}