Xunzi-Yayun-R1 / trainer_state.json
ricardozhy's picture
Upload 27 files
dbdda7f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7407407407407407,
"eval_steps": 500,
"global_step": 180,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completion_length": 487.7625,
"epoch": 0.0411522633744856,
"grad_norm": 1.8671875,
"kl": 0.0007157690197345801,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0269,
"num_tokens": 55831.0,
"reward": 0.22805000096559525,
"reward_std": 0.19275731525849552,
"rewards/combined_reward_func": 0.14375,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.14375,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.059450000524520874,
"step": 10
},
{
"clip_ratio": 0.0,
"completion_length": 475.9875,
"epoch": 0.0823045267489712,
"grad_norm": 1.9375,
"kl": 0.0010341577755752951,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0419,
"num_tokens": 110728.0,
"reward": 0.3608124990016222,
"reward_std": 0.2826482572359964,
"rewards/combined_reward_func": 0.21875,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.21875,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.07668750174343586,
"step": 20
},
{
"clip_ratio": 0.0,
"completion_length": 473.9375,
"epoch": 0.12345679012345678,
"grad_norm": 0.044677734375,
"kl": 0.004194418436964042,
"learning_rate": 4.99351292572632e-06,
"loss": 0.0335,
"num_tokens": 165479.0,
"reward": 0.5708000004291535,
"reward_std": 0.35507366359233855,
"rewards/combined_reward_func": 0.23125,
"rewards/poem_score_reward_func": 0.0125,
"rewards/soft_format_reward_func": 0.21875,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.10830000005662441,
"step": 30
},
{
"clip_ratio": 0.0,
"completion_length": 476.05,
"epoch": 0.1646090534979424,
"grad_norm": 1.8515625,
"kl": 0.005394756112946197,
"learning_rate": 4.941818151059956e-06,
"loss": 0.0215,
"num_tokens": 220427.0,
"reward": 0.559375,
"reward_std": 0.30493979509919883,
"rewards/combined_reward_func": 0.21875,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.21875,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.121875,
"step": 40
},
{
"clip_ratio": 0.0,
"completion_length": 456.7625,
"epoch": 0.205761316872428,
"grad_norm": 1.2265625,
"kl": 0.007781951373908668,
"learning_rate": 4.8395003225513855e-06,
"loss": 0.0329,
"num_tokens": 273764.0,
"reward": 0.5843874990940094,
"reward_std": 0.30495747216045854,
"rewards/combined_reward_func": 0.23125,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.23125,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.12188749983906746,
"step": 50
},
{
"clip_ratio": 0.0,
"completion_length": 466.275,
"epoch": 0.24691358024691357,
"grad_norm": 1.5859375,
"kl": 0.008946178632322698,
"learning_rate": 4.688680663248837e-06,
"loss": 0.0475,
"num_tokens": 327888.0,
"reward": 0.5625,
"reward_std": 0.4419417306780815,
"rewards/combined_reward_func": 0.21875,
"rewards/poem_score_reward_func": 0.00625,
"rewards/soft_format_reward_func": 0.2125,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 60
},
{
"clip_ratio": 0.0,
"completion_length": 471.45,
"epoch": 0.2880658436213992,
"grad_norm": 1.5859375,
"kl": 0.009686458273790777,
"learning_rate": 4.492485921853894e-06,
"loss": 0.0234,
"num_tokens": 382466.0,
"reward": 0.575,
"reward_std": 0.24748736917972564,
"rewards/combined_reward_func": 0.225,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.225,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 70
},
{
"clip_ratio": 0.0,
"completion_length": 456.2375,
"epoch": 0.3292181069958848,
"grad_norm": 1.8828125,
"kl": 0.01117126657627523,
"learning_rate": 4.254983549889467e-06,
"loss": 0.028,
"num_tokens": 435809.0,
"reward": 0.6625,
"reward_std": 0.30052037686109545,
"rewards/combined_reward_func": 0.26875,
"rewards/poem_score_reward_func": 0.00625,
"rewards/soft_format_reward_func": 0.2625,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 80
},
{
"clip_ratio": 0.0,
"completion_length": 455.775,
"epoch": 0.37037037037037035,
"grad_norm": 1.1484375,
"kl": 0.009843840857502072,
"learning_rate": 3.9810973764942595e-06,
"loss": 0.0378,
"num_tokens": 489115.0,
"reward": 0.7125,
"reward_std": 0.33587571531534194,
"rewards/combined_reward_func": 0.29375,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.29375,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 90
},
{
"clip_ratio": 0.0,
"completion_length": 447.6375,
"epoch": 0.411522633744856,
"grad_norm": 1.703125,
"kl": 0.011367120209615677,
"learning_rate": 3.676505529049052e-06,
"loss": 0.0411,
"num_tokens": 541756.0,
"reward": 0.7375,
"reward_std": 0.40658639222383497,
"rewards/combined_reward_func": 0.30625,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.30625,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 100
},
{
"clip_ratio": 0.0,
"completion_length": 433.5,
"epoch": 0.45267489711934156,
"grad_norm": 1.828125,
"kl": 0.012690221704542638,
"learning_rate": 3.347522715914262e-06,
"loss": 0.0251,
"num_tokens": 593296.0,
"reward": 0.8375,
"reward_std": 0.30052037686109545,
"rewards/combined_reward_func": 0.35625,
"rewards/poem_score_reward_func": 0.0125,
"rewards/soft_format_reward_func": 0.34375,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 110
},
{
"clip_ratio": 0.0,
"completion_length": 450.2125,
"epoch": 0.49382716049382713,
"grad_norm": 0.07568359375,
"kl": 0.0119381194235757,
"learning_rate": 3.0009693117583527e-06,
"loss": 0.0447,
"num_tokens": 646161.0,
"reward": 0.775,
"reward_std": 0.38890872299671175,
"rewards/combined_reward_func": 0.325,
"rewards/poem_score_reward_func": 0.00625,
"rewards/soft_format_reward_func": 0.31875,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 120
},
{
"clip_ratio": 0.0,
"completion_length": 449.3875,
"epoch": 0.5349794238683128,
"grad_norm": 1.140625,
"kl": 0.011628977861255408,
"learning_rate": 2.644029959561461e-06,
"loss": 0.0191,
"num_tokens": 698964.0,
"reward": 0.8375,
"reward_std": 0.30052037686109545,
"rewards/combined_reward_func": 0.35625,
"rewards/poem_score_reward_func": 0.0125,
"rewards/soft_format_reward_func": 0.34375,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 130
},
{
"clip_ratio": 0.0,
"completion_length": 443.4375,
"epoch": 0.5761316872427984,
"grad_norm": 1.2109375,
"kl": 0.011442524951417,
"learning_rate": 2.2841046207158073e-06,
"loss": 0.0263,
"num_tokens": 751285.0,
"reward": 0.8875,
"reward_std": 0.33587571531534194,
"rewards/combined_reward_func": 0.38125,
"rewards/poem_score_reward_func": 0.0125,
"rewards/soft_format_reward_func": 0.36875,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 140
},
{
"clip_ratio": 0.0,
"completion_length": 455.0125,
"epoch": 0.6172839506172839,
"grad_norm": 1.890625,
"kl": 0.011128004069905727,
"learning_rate": 1.9286551612082775e-06,
"loss": 0.0297,
"num_tokens": 804550.0,
"reward": 0.7390625,
"reward_std": 0.3027300855144858,
"rewards/combined_reward_func": 0.30625,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.30625,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.1265625,
"step": 150
},
{
"clip_ratio": 0.0,
"completion_length": 463.275,
"epoch": 0.6584362139917695,
"grad_norm": 1.5703125,
"kl": 0.009889759169891477,
"learning_rate": 1.5850506544152105e-06,
"loss": 0.0286,
"num_tokens": 858446.0,
"reward": 0.6,
"reward_std": 0.31819804608821867,
"rewards/combined_reward_func": 0.2375,
"rewards/poem_score_reward_func": 0.00625,
"rewards/soft_format_reward_func": 0.23125,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 160
},
{
"clip_ratio": 0.0,
"completion_length": 452.6625,
"epoch": 0.6995884773662552,
"grad_norm": 1.5859375,
"kl": 0.009591756144072861,
"learning_rate": 1.260414607646213e-06,
"loss": 0.0441,
"num_tokens": 911509.0,
"reward": 0.7125,
"reward_std": 0.33587571531534194,
"rewards/combined_reward_func": 0.29375,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.29375,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 170
},
{
"clip_ratio": 0.0,
"completion_length": 466.2625,
"epoch": 0.7407407407407407,
"grad_norm": 1.6171875,
"kl": 0.010076053719967604,
"learning_rate": 9.614772796912683e-07,
"loss": 0.0282,
"num_tokens": 965658.0,
"reward": 0.6,
"reward_std": 0.31819804608821867,
"rewards/combined_reward_func": 0.2375,
"rewards/poem_score_reward_func": 0.0,
"rewards/soft_format_reward_func": 0.2375,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.125,
"step": 180
}
],
"logging_steps": 10,
"max_steps": 243,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}