acc_rm_1e5_550 / trainer_state.json
Jennny's picture
upload checkpoint-550 to repo root
9ed9667 verified
{
"best_metric": 0.6732283464566929,
"best_model_checkpoint": "./llama_acc_reward_model_1e5-bz32/checkpoint-550",
"epoch": 3.8194444444444446,
"eval_steps": 50,
"global_step": 550,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06944444444444445,
"grad_norm": 237.0,
"learning_rate": 1.3888888888888892e-06,
"loss": 1.5531,
"step": 10
},
{
"epoch": 0.1388888888888889,
"grad_norm": 284.0,
"learning_rate": 2.7777777777777783e-06,
"loss": 1.2538,
"step": 20
},
{
"epoch": 0.20833333333333334,
"grad_norm": 83.0,
"learning_rate": 4.166666666666667e-06,
"loss": 0.8203,
"step": 30
},
{
"epoch": 0.2777777777777778,
"grad_norm": 171.0,
"learning_rate": 5.555555555555557e-06,
"loss": 0.7287,
"step": 40
},
{
"epoch": 0.3472222222222222,
"grad_norm": 88.0,
"learning_rate": 6.944444444444445e-06,
"loss": 0.8232,
"step": 50
},
{
"epoch": 0.3472222222222222,
"eval_accuracy": 0.6141732283464567,
"eval_auc": 0.6284406565656565,
"eval_f1": 0.7083333333333334,
"eval_loss": 0.8070767521858215,
"eval_precision": 0.6197916666666666,
"eval_recall": 0.8263888888888888,
"eval_runtime": 17.2507,
"eval_samples_per_second": 14.724,
"eval_steps_per_second": 0.232,
"step": 50
},
{
"epoch": 0.4166666666666667,
"grad_norm": 184.0,
"learning_rate": 8.333333333333334e-06,
"loss": 0.922,
"step": 60
},
{
"epoch": 0.4861111111111111,
"grad_norm": 45.5,
"learning_rate": 9.722222222222223e-06,
"loss": 0.838,
"step": 70
},
{
"epoch": 0.5555555555555556,
"grad_norm": 91.5,
"learning_rate": 9.996239762521152e-06,
"loss": 0.7773,
"step": 80
},
{
"epoch": 0.625,
"grad_norm": 50.5,
"learning_rate": 9.980973490458728e-06,
"loss": 0.744,
"step": 90
},
{
"epoch": 0.6944444444444444,
"grad_norm": 22.625,
"learning_rate": 9.954002016824226e-06,
"loss": 0.9683,
"step": 100
},
{
"epoch": 0.6944444444444444,
"eval_accuracy": 0.5787401574803149,
"eval_auc": 0.6018623737373737,
"eval_f1": 0.7002801120448179,
"eval_loss": 0.7812102437019348,
"eval_precision": 0.5868544600938967,
"eval_recall": 0.8680555555555556,
"eval_runtime": 17.3832,
"eval_samples_per_second": 14.612,
"eval_steps_per_second": 0.23,
"step": 100
},
{
"epoch": 0.7638888888888888,
"grad_norm": 126.5,
"learning_rate": 9.915388724114301e-06,
"loss": 0.8441,
"step": 110
},
{
"epoch": 0.8333333333333334,
"grad_norm": 44.0,
"learning_rate": 9.86522435289912e-06,
"loss": 0.7468,
"step": 120
},
{
"epoch": 0.9027777777777778,
"grad_norm": 145.0,
"learning_rate": 9.803626788583603e-06,
"loss": 0.7598,
"step": 130
},
{
"epoch": 0.9722222222222222,
"grad_norm": 100.0,
"learning_rate": 9.730740784378755e-06,
"loss": 0.7506,
"step": 140
},
{
"epoch": 1.0416666666666667,
"grad_norm": 18.375,
"learning_rate": 9.646737621134112e-06,
"loss": 0.669,
"step": 150
},
{
"epoch": 1.0416666666666667,
"eval_accuracy": 0.6023622047244095,
"eval_auc": 0.6256628787878789,
"eval_f1": 0.6710097719869706,
"eval_loss": 0.7021759748458862,
"eval_precision": 0.6319018404907976,
"eval_recall": 0.7152777777777778,
"eval_runtime": 17.2473,
"eval_samples_per_second": 14.727,
"eval_steps_per_second": 0.232,
"step": 150
},
{
"epoch": 1.1111111111111112,
"grad_norm": 74.0,
"learning_rate": 9.551814704830734e-06,
"loss": 0.6399,
"step": 160
},
{
"epoch": 1.1805555555555556,
"grad_norm": 91.5,
"learning_rate": 9.446195102680531e-06,
"loss": 0.6868,
"step": 170
},
{
"epoch": 1.25,
"grad_norm": 23.875,
"learning_rate": 9.330127018922195e-06,
"loss": 0.6817,
"step": 180
},
{
"epoch": 1.3194444444444444,
"grad_norm": 80.5,
"learning_rate": 9.203883211545517e-06,
"loss": 0.7281,
"step": 190
},
{
"epoch": 1.3888888888888888,
"grad_norm": 50.25,
"learning_rate": 9.067760351314838e-06,
"loss": 0.606,
"step": 200
},
{
"epoch": 1.3888888888888888,
"eval_accuracy": 0.5905511811023622,
"eval_auc": 0.6167929292929293,
"eval_f1": 0.695906432748538,
"eval_loss": 0.7337387204170227,
"eval_precision": 0.601010101010101,
"eval_recall": 0.8263888888888888,
"eval_runtime": 17.381,
"eval_samples_per_second": 14.614,
"eval_steps_per_second": 0.23,
"step": 200
},
{
"epoch": 1.4583333333333333,
"grad_norm": 44.5,
"learning_rate": 8.92207832459788e-06,
"loss": 0.5274,
"step": 210
},
{
"epoch": 1.5277777777777777,
"grad_norm": 61.75,
"learning_rate": 8.767179481638303e-06,
"loss": 0.6415,
"step": 220
},
{
"epoch": 1.5972222222222223,
"grad_norm": 60.75,
"learning_rate": 8.603427832038574e-06,
"loss": 0.7424,
"step": 230
},
{
"epoch": 1.6666666666666665,
"grad_norm": 67.5,
"learning_rate": 8.43120818934367e-06,
"loss": 0.5821,
"step": 240
},
{
"epoch": 1.7361111111111112,
"grad_norm": 118.0,
"learning_rate": 8.25092526673592e-06,
"loss": 0.6577,
"step": 250
},
{
"epoch": 1.7361111111111112,
"eval_accuracy": 0.6338582677165354,
"eval_auc": 0.6801136363636364,
"eval_f1": 0.7319884726224783,
"eval_loss": 0.6989323496818542,
"eval_precision": 0.625615763546798,
"eval_recall": 0.8819444444444444,
"eval_runtime": 17.2572,
"eval_samples_per_second": 14.718,
"eval_steps_per_second": 0.232,
"step": 250
},
{
"epoch": 1.8055555555555556,
"grad_norm": 35.25,
"learning_rate": 8.063002725966014e-06,
"loss": 0.5856,
"step": 260
},
{
"epoch": 1.875,
"grad_norm": 19.125,
"learning_rate": 7.86788218175523e-06,
"loss": 0.5973,
"step": 270
},
{
"epoch": 1.9444444444444444,
"grad_norm": 14.375,
"learning_rate": 7.666022164008458e-06,
"loss": 0.5621,
"step": 280
},
{
"epoch": 2.013888888888889,
"grad_norm": 59.75,
"learning_rate": 7.457897040276853e-06,
"loss": 0.5428,
"step": 290
},
{
"epoch": 2.0833333333333335,
"grad_norm": 88.0,
"learning_rate": 7.243995901002312e-06,
"loss": 0.5459,
"step": 300
},
{
"epoch": 2.0833333333333335,
"eval_accuracy": 0.6417322834645669,
"eval_auc": 0.7095012626262626,
"eval_f1": 0.7436619718309859,
"eval_loss": 0.704635500907898,
"eval_precision": 0.6255924170616114,
"eval_recall": 0.9166666666666666,
"eval_runtime": 17.2333,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 0.232,
"step": 300
},
{
"epoch": 2.1527777777777777,
"grad_norm": 32.0,
"learning_rate": 7.0248214101633685e-06,
"loss": 0.5229,
"step": 310
},
{
"epoch": 2.2222222222222223,
"grad_norm": 22.0,
"learning_rate": 6.800888624023552e-06,
"loss": 0.5532,
"step": 320
},
{
"epoch": 2.2916666666666665,
"grad_norm": 21.375,
"learning_rate": 6.572723780758069e-06,
"loss": 0.5129,
"step": 330
},
{
"epoch": 2.361111111111111,
"grad_norm": 46.0,
"learning_rate": 6.340863063803187e-06,
"loss": 0.4531,
"step": 340
},
{
"epoch": 2.4305555555555554,
"grad_norm": 24.125,
"learning_rate": 6.105851341834439e-06,
"loss": 0.4496,
"step": 350
},
{
"epoch": 2.4305555555555554,
"eval_accuracy": 0.6496062992125984,
"eval_auc": 0.6922032828282829,
"eval_f1": 0.6898954703832753,
"eval_loss": 0.6863601207733154,
"eval_precision": 0.6923076923076923,
"eval_recall": 0.6875,
"eval_runtime": 17.3556,
"eval_samples_per_second": 14.635,
"eval_steps_per_second": 0.23,
"step": 350
},
{
"epoch": 2.5,
"grad_norm": 66.0,
"learning_rate": 5.8682408883346535e-06,
"loss": 0.4314,
"step": 360
},
{
"epoch": 2.5694444444444446,
"grad_norm": 38.5,
"learning_rate": 5.628590083760815e-06,
"loss": 0.4262,
"step": 370
},
{
"epoch": 2.638888888888889,
"grad_norm": 28.75,
"learning_rate": 5.387462103359655e-06,
"loss": 0.461,
"step": 380
},
{
"epoch": 2.7083333333333335,
"grad_norm": 20.625,
"learning_rate": 5.145423593715558e-06,
"loss": 0.4406,
"step": 390
},
{
"epoch": 2.7777777777777777,
"grad_norm": 36.25,
"learning_rate": 4.903043341140879e-06,
"loss": 0.3986,
"step": 400
},
{
"epoch": 2.7777777777777777,
"eval_accuracy": 0.6535433070866141,
"eval_auc": 0.6909722222222223,
"eval_f1": 0.6691729323308271,
"eval_loss": 0.7401882410049438,
"eval_precision": 0.7295081967213115,
"eval_recall": 0.6180555555555556,
"eval_runtime": 17.2396,
"eval_samples_per_second": 14.734,
"eval_steps_per_second": 0.232,
"step": 400
},
{
"epoch": 2.8472222222222223,
"grad_norm": 35.0,
"learning_rate": 4.660890935037954e-06,
"loss": 0.3808,
"step": 410
},
{
"epoch": 2.9166666666666665,
"grad_norm": 42.25,
"learning_rate": 4.4195354293738484e-06,
"loss": 0.4725,
"step": 420
},
{
"epoch": 2.986111111111111,
"grad_norm": 26.125,
"learning_rate": 4.17954400541338e-06,
"loss": 0.4157,
"step": 430
},
{
"epoch": 3.0555555555555554,
"grad_norm": 20.875,
"learning_rate": 3.941480638852948e-06,
"loss": 0.2813,
"step": 440
},
{
"epoch": 3.125,
"grad_norm": 23.75,
"learning_rate": 3.705904774487396e-06,
"loss": 0.2891,
"step": 450
},
{
"epoch": 3.125,
"eval_accuracy": 0.6456692913385826,
"eval_auc": 0.6821338383838383,
"eval_f1": 0.6938775510204082,
"eval_loss": 0.7412765622138977,
"eval_precision": 0.68,
"eval_recall": 0.7083333333333334,
"eval_runtime": 17.2413,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 0.232,
"step": 450
},
{
"epoch": 3.1944444444444446,
"grad_norm": 18.0,
"learning_rate": 3.473370011524435e-06,
"loss": 0.1611,
"step": 460
},
{
"epoch": 3.263888888888889,
"grad_norm": 51.0,
"learning_rate": 3.244422802636057e-06,
"loss": 0.1808,
"step": 470
},
{
"epoch": 3.3333333333333335,
"grad_norm": 50.5,
"learning_rate": 3.019601169804216e-06,
"loss": 0.1928,
"step": 480
},
{
"epoch": 3.4027777777777777,
"grad_norm": 74.5,
"learning_rate": 2.7994334399784773e-06,
"loss": 0.2171,
"step": 490
},
{
"epoch": 3.4722222222222223,
"grad_norm": 55.25,
"learning_rate": 2.5844370035168077e-06,
"loss": 0.1798,
"step": 500
},
{
"epoch": 3.4722222222222223,
"eval_accuracy": 0.6535433070866141,
"eval_auc": 0.6843118686868688,
"eval_f1": 0.696551724137931,
"eval_loss": 0.8344177603721619,
"eval_precision": 0.6917808219178082,
"eval_recall": 0.7013888888888888,
"eval_runtime": 17.2629,
"eval_samples_per_second": 14.714,
"eval_steps_per_second": 0.232,
"step": 500
},
{
"epoch": 3.5416666666666665,
"grad_norm": 40.0,
"learning_rate": 2.3751170983272e-06,
"loss": 0.2129,
"step": 510
},
{
"epoch": 3.611111111111111,
"grad_norm": 38.5,
"learning_rate": 2.171965622567308e-06,
"loss": 0.1615,
"step": 520
},
{
"epoch": 3.6805555555555554,
"grad_norm": 50.0,
"learning_rate": 1.9754599786922913e-06,
"loss": 0.1841,
"step": 530
},
{
"epoch": 3.75,
"grad_norm": 33.5,
"learning_rate": 1.7860619515673034e-06,
"loss": 0.1303,
"step": 540
},
{
"epoch": 3.8194444444444446,
"grad_norm": 23.875,
"learning_rate": 1.6042166232810346e-06,
"loss": 0.1689,
"step": 550
},
{
"epoch": 3.8194444444444446,
"eval_accuracy": 0.6732283464566929,
"eval_auc": 0.6888888888888889,
"eval_f1": 0.7087719298245614,
"eval_loss": 0.8513283133506775,
"eval_precision": 0.7163120567375887,
"eval_recall": 0.7013888888888888,
"eval_runtime": 17.2698,
"eval_samples_per_second": 14.708,
"eval_steps_per_second": 0.232,
"step": 550
}
],
"logging_steps": 10,
"max_steps": 720,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.1871759346771968e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}