LCR1_1.5B / trainer_state.json
zx10086's picture
Add files using upload-large-folder tool
721193c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.40336134453781514,
"eval_steps": 500,
"global_step": 42,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.18055555555555558,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8149.333333333333,
"completions/mean_length": 3217.178955078125,
"completions/mean_terminated_length": 2105.4993489583335,
"completions/min_length": 559.3333333333334,
"completions/min_terminated_length": 559.3333333333334,
"epoch": 0.028811524609843937,
"grad_norm": 0.09286145865917206,
"kl": 0.0001745025316874186,
"learning_rate": 2.942307692307692e-06,
"loss": 0.1215,
"num_tokens": 1897631.0,
"reward": 1.810763915379842,
"reward_std": 0.18809527655442557,
"rewards/accuracy_reward/mean": 0.8125,
"rewards/accuracy_reward/std": 0.3738736609617869,
"rewards/format_reward/mean": 0.9982638955116272,
"rewards/format_reward/std": 0.02405626078446706,
"step": 3
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.19791666666666666,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 7958.0,
"completions/mean_length": 3270.2118326822915,
"completions/mean_terminated_length": 2069.518107096354,
"completions/min_length": 492.0,
"completions/min_terminated_length": 492.0,
"epoch": 0.057623049219687875,
"grad_norm": 0.08364126831293106,
"kl": 0.0004324515660603841,
"learning_rate": 2.8557692307692307e-06,
"loss": 0.0934,
"num_tokens": 3822113.0,
"reward": 1.7673611640930176,
"reward_std": 0.14717517793178558,
"rewards/accuracy_reward/mean": 0.7673611044883728,
"rewards/accuracy_reward/std": 0.4205925464630127,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.234375,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 7983.666666666667,
"completions/mean_length": 3627.2797037760415,
"completions/mean_terminated_length": 2248.2770182291665,
"completions/min_length": 657.3333333333334,
"completions/min_terminated_length": 657.3333333333334,
"epoch": 0.08643457382953182,
"grad_norm": 0.09271353483200073,
"kl": 0.0014851093292236328,
"learning_rate": 2.7692307692307693e-06,
"loss": 0.1043,
"num_tokens": 5951186.0,
"reward": 1.73437503973643,
"reward_std": 0.1764681041240692,
"rewards/accuracy_reward/mean": 0.7361111044883728,
"rewards/accuracy_reward/std": 0.4373584985733032,
"rewards/format_reward/mean": 0.9982638955116272,
"rewards/format_reward/std": 0.02405626078446706,
"step": 9
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11111111111111112,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8012.666666666667,
"completions/mean_length": 2749.7119140625,
"completions/mean_terminated_length": 2069.81005859375,
"completions/min_length": 558.0,
"completions/min_terminated_length": 558.0,
"epoch": 0.11524609843937575,
"grad_norm": 0.06276765465736389,
"kl": 0.002723217010498047,
"learning_rate": 2.682692307692308e-06,
"loss": 0.0766,
"num_tokens": 7577284.0,
"reward": 1.8333333333333333,
"reward_std": 0.1446588784456253,
"rewards/accuracy_reward/mean": 0.8350694378217062,
"rewards/accuracy_reward/std": 0.36645371715227765,
"rewards/format_reward/mean": 0.9982638955116272,
"rewards/format_reward/std": 0.02405626078446706,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11805555555555558,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8078.333333333333,
"completions/mean_length": 2837.998291015625,
"completions/mean_terminated_length": 2121.116943359375,
"completions/min_length": 586.0,
"completions/min_terminated_length": 586.0,
"epoch": 0.14405762304921968,
"grad_norm": 0.06373849511146545,
"kl": 0.0064856211344401045,
"learning_rate": 2.5961538461538465e-06,
"loss": 0.0795,
"num_tokens": 9254931.0,
"reward": 1.8125000794728596,
"reward_std": 0.13619043429692587,
"rewards/accuracy_reward/mean": 0.812500019868215,
"rewards/accuracy_reward/std": 0.3885917862256368,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 15
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11284722222222225,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8120.0,
"completions/mean_length": 2874.5538736979165,
"completions/mean_terminated_length": 2199.148152669271,
"completions/min_length": 438.6666666666667,
"completions/min_terminated_length": 438.6666666666667,
"epoch": 0.17286914765906364,
"grad_norm": 0.06650689989328384,
"kl": 0.01007843017578125,
"learning_rate": 2.5096153846153847e-06,
"loss": 0.0729,
"num_tokens": 10960010.0,
"reward": 1.7986111640930176,
"reward_std": 0.1752894123395284,
"rewards/accuracy_reward/mean": 0.7986111044883728,
"rewards/accuracy_reward/std": 0.3986728588740031,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12673611111111108,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8150.666666666667,
"completions/mean_length": 3212.4671223958335,
"completions/mean_terminated_length": 2483.940673828125,
"completions/min_length": 749.6666666666666,
"completions/min_terminated_length": 749.6666666666666,
"epoch": 0.20168067226890757,
"grad_norm": 0.061652038246393204,
"kl": 0.011129379272460938,
"learning_rate": 2.4230769230769233e-06,
"loss": 0.0656,
"num_tokens": 12855127.0,
"reward": 1.7256944974263508,
"reward_std": 0.19639561573664346,
"rewards/accuracy_reward/mean": 0.725694457689921,
"rewards/accuracy_reward/std": 0.4470636049906413,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 21
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08159722222222221,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8009.0,
"completions/mean_length": 2894.3212076822915,
"completions/mean_terminated_length": 2424.7539876302085,
"completions/min_length": 455.6666666666667,
"completions/min_terminated_length": 455.6666666666667,
"epoch": 0.2304921968787515,
"grad_norm": 0.06359543651342392,
"kl": 0.013215382893880209,
"learning_rate": 2.3365384615384615e-06,
"loss": 0.0777,
"num_tokens": 14566568.0,
"reward": 1.7986111243565877,
"reward_std": 0.20375757416089377,
"rewards/accuracy_reward/mean": 0.7986111044883728,
"rewards/accuracy_reward/std": 0.3931320408980052,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 24
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09201388888888891,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 7929.666666666667,
"completions/mean_length": 2709.375,
"completions/mean_terminated_length": 2146.6839599609375,
"completions/min_length": 373.6666666666667,
"completions/min_terminated_length": 373.6666666666667,
"epoch": 0.25930372148859543,
"grad_norm": 0.05772211030125618,
"kl": 0.017008463541666668,
"learning_rate": 2.25e-06,
"loss": 0.0681,
"num_tokens": 16187976.0,
"reward": 1.76562503973643,
"reward_std": 0.1697404384613037,
"rewards/accuracy_reward/mean": 0.765625,
"rewards/accuracy_reward/std": 0.4218848447004954,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 27
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07638888888888888,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8090.0,
"completions/mean_length": 2675.694580078125,
"completions/mean_terminated_length": 2217.5712076822915,
"completions/min_length": 401.6666666666667,
"completions/min_terminated_length": 401.6666666666667,
"epoch": 0.28811524609843936,
"grad_norm": 0.06288590282201767,
"kl": 0.020589192708333332,
"learning_rate": 2.1634615384615387e-06,
"loss": 0.0697,
"num_tokens": 17790312.0,
"reward": 1.7760417461395264,
"reward_std": 0.20130781332651773,
"rewards/accuracy_reward/mean": 0.7777777910232544,
"rewards/accuracy_reward/std": 0.4139314691225688,
"rewards/format_reward/mean": 0.9982638955116272,
"rewards/format_reward/std": 0.02405626078446706,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08680555555555554,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 8054.0,
"completions/mean_length": 2780.01220703125,
"completions/mean_terminated_length": 2265.2118326822915,
"completions/min_length": 392.6666666666667,
"completions/min_terminated_length": 392.6666666666667,
"epoch": 0.3169267707082833,
"grad_norm": 0.6698484420776367,
"kl": 0.03530248006184896,
"learning_rate": 2.076923076923077e-06,
"loss": 0.0537,
"num_tokens": 19449207.0,
"reward": 1.7899305820465088,
"reward_std": 0.18347221612930298,
"rewards/accuracy_reward/mean": 0.789930542310079,
"rewards/accuracy_reward/std": 0.40485529104868573,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 33
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.060763888888888916,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 7845.0,
"completions/mean_length": 2849.2032877604165,
"completions/mean_terminated_length": 2505.287841796875,
"completions/min_length": 393.3333333333333,
"completions/min_terminated_length": 393.3333333333333,
"epoch": 0.3457382953181273,
"grad_norm": 0.09857647120952606,
"kl": 0.032840728759765625,
"learning_rate": 1.9903846153846155e-06,
"loss": 0.0509,
"num_tokens": 21157860.0,
"reward": 1.717013955116272,
"reward_std": 0.22714433073997498,
"rewards/accuracy_reward/mean": 0.7170138955116272,
"rewards/accuracy_reward/std": 0.45049455761909485,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 36
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.060763888888888874,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 7866.0,
"completions/mean_length": 2628.4080403645835,
"completions/mean_terminated_length": 2266.409220377604,
"completions/min_length": 435.0,
"completions/min_terminated_length": 435.0,
"epoch": 0.3745498199279712,
"grad_norm": 2.40429949760437,
"kl": 0.058779398600260414,
"learning_rate": 1.9038461538461538e-06,
"loss": 0.0515,
"num_tokens": 22722487.0,
"reward": 1.7274306217829387,
"reward_std": 0.24683435757954916,
"rewards/accuracy_reward/mean": 0.7274305621782938,
"rewards/accuracy_reward/std": 0.4455043375492096,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 39
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.048611111111111126,
"completions/max_length": 8192.0,
"completions/max_terminated_length": 7949.0,
"completions/mean_length": 2644.7657063802085,
"completions/mean_terminated_length": 2360.8841959635415,
"completions/min_length": 295.3333333333333,
"completions/min_terminated_length": 295.3333333333333,
"epoch": 0.40336134453781514,
"grad_norm": 0.7801563739776611,
"kl": 0.0440673828125,
"learning_rate": 1.8173076923076922e-06,
"loss": 0.0578,
"num_tokens": 24298560.0,
"reward": 1.7239583730697632,
"reward_std": 0.24683218201001486,
"rewards/accuracy_reward/mean": 0.7239583333333334,
"rewards/accuracy_reward/std": 0.44757089018821716,
"rewards/format_reward/mean": 1.0,
"rewards/format_reward/std": 0.0,
"step": 42
}
],
"logging_steps": 3,
"max_steps": 104,
"num_input_tokens_seen": 24298560,
"num_train_epochs": 1,
"save_steps": 21,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}