eng_rm_1e5_350 / trainer_state.json
Jennny's picture
upload checkpoint-350 to repo root
8add328 verified
{
"best_metric": 0.7266666666666667,
"best_model_checkpoint": "./llama_reward_model_1e5-bz32/checkpoint-350",
"epoch": 2.482269503546099,
"eval_steps": 50,
"global_step": 350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07092198581560284,
"grad_norm": 388.0,
"learning_rate": 1.4084507042253523e-06,
"loss": 2.1268,
"step": 10
},
{
"epoch": 0.14184397163120568,
"grad_norm": 219.0,
"learning_rate": 2.8169014084507046e-06,
"loss": 0.9385,
"step": 20
},
{
"epoch": 0.2127659574468085,
"grad_norm": 247.0,
"learning_rate": 4.225352112676057e-06,
"loss": 0.9245,
"step": 30
},
{
"epoch": 0.28368794326241137,
"grad_norm": 76.5,
"learning_rate": 5.633802816901409e-06,
"loss": 0.7768,
"step": 40
},
{
"epoch": 0.3546099290780142,
"grad_norm": 152.0,
"learning_rate": 7.042253521126761e-06,
"loss": 0.8706,
"step": 50
},
{
"epoch": 0.3546099290780142,
"eval_accuracy": 0.5166666666666667,
"eval_auc": 0.6492222222222223,
"eval_f1": 0.6635730858468677,
"eval_loss": 1.0949409008026123,
"eval_precision": 0.5088967971530249,
"eval_recall": 0.9533333333333334,
"eval_runtime": 21.6558,
"eval_samples_per_second": 13.853,
"eval_steps_per_second": 0.231,
"step": 50
},
{
"epoch": 0.425531914893617,
"grad_norm": 149.0,
"learning_rate": 8.450704225352114e-06,
"loss": 0.866,
"step": 60
},
{
"epoch": 0.49645390070921985,
"grad_norm": 112.5,
"learning_rate": 9.859154929577466e-06,
"loss": 0.7572,
"step": 70
},
{
"epoch": 0.5673758865248227,
"grad_norm": 32.0,
"learning_rate": 9.995028650728335e-06,
"loss": 0.8987,
"step": 80
},
{
"epoch": 0.6382978723404256,
"grad_norm": 82.0,
"learning_rate": 9.977856431060221e-06,
"loss": 0.7988,
"step": 90
},
{
"epoch": 0.7092198581560284,
"grad_norm": 148.0,
"learning_rate": 9.948464112207811e-06,
"loss": 0.8813,
"step": 100
},
{
"epoch": 0.7092198581560284,
"eval_accuracy": 0.6633333333333333,
"eval_auc": 0.7096444444444445,
"eval_f1": 0.6007905138339921,
"eval_loss": 0.6915330290794373,
"eval_precision": 0.7378640776699029,
"eval_recall": 0.5066666666666667,
"eval_runtime": 21.6662,
"eval_samples_per_second": 13.846,
"eval_steps_per_second": 0.231,
"step": 100
},
{
"epoch": 0.7801418439716312,
"grad_norm": 36.75,
"learning_rate": 9.906923849135118e-06,
"loss": 0.7012,
"step": 110
},
{
"epoch": 0.851063829787234,
"grad_norm": 26.0,
"learning_rate": 9.853337618695413e-06,
"loss": 0.6147,
"step": 120
},
{
"epoch": 0.9219858156028369,
"grad_norm": 22.125,
"learning_rate": 9.78783696928909e-06,
"loss": 0.5603,
"step": 130
},
{
"epoch": 0.9929078014184397,
"grad_norm": 72.5,
"learning_rate": 9.710582697926562e-06,
"loss": 0.6269,
"step": 140
},
{
"epoch": 1.0638297872340425,
"grad_norm": 129.0,
"learning_rate": 9.62176445548899e-06,
"loss": 0.7091,
"step": 150
},
{
"epoch": 1.0638297872340425,
"eval_accuracy": 0.6133333333333333,
"eval_auc": 0.7025777777777777,
"eval_f1": 0.5126050420168067,
"eval_loss": 0.7151511907577515,
"eval_precision": 0.6931818181818182,
"eval_recall": 0.4066666666666667,
"eval_runtime": 21.9923,
"eval_samples_per_second": 13.641,
"eval_steps_per_second": 0.227,
"step": 150
},
{
"epoch": 1.1347517730496455,
"grad_norm": 73.0,
"learning_rate": 9.521600281155894e-06,
"loss": 0.5991,
"step": 160
},
{
"epoch": 1.2056737588652482,
"grad_norm": 69.0,
"learning_rate": 9.410336067142525e-06,
"loss": 0.4835,
"step": 170
},
{
"epoch": 1.2765957446808511,
"grad_norm": 27.25,
"learning_rate": 9.28824495506109e-06,
"loss": 0.4869,
"step": 180
},
{
"epoch": 1.3475177304964538,
"grad_norm": 71.5,
"learning_rate": 9.155626665387625e-06,
"loss": 0.4671,
"step": 190
},
{
"epoch": 1.4184397163120568,
"grad_norm": 25.875,
"learning_rate": 9.012806761680642e-06,
"loss": 0.4129,
"step": 200
},
{
"epoch": 1.4184397163120568,
"eval_accuracy": 0.69,
"eval_auc": 0.7723555555555555,
"eval_f1": 0.7240356083086054,
"eval_loss": 0.6506745219230652,
"eval_precision": 0.6524064171122995,
"eval_recall": 0.8133333333333334,
"eval_runtime": 21.8002,
"eval_samples_per_second": 13.761,
"eval_steps_per_second": 0.229,
"step": 200
},
{
"epoch": 1.4893617021276595,
"grad_norm": 57.25,
"learning_rate": 8.860135851357803e-06,
"loss": 0.517,
"step": 210
},
{
"epoch": 1.5602836879432624,
"grad_norm": 30.5,
"learning_rate": 8.697988724992633e-06,
"loss": 0.4431,
"step": 220
},
{
"epoch": 1.6312056737588652,
"grad_norm": 16.0,
"learning_rate": 8.526763436244184e-06,
"loss": 0.4721,
"step": 230
},
{
"epoch": 1.702127659574468,
"grad_norm": 19.5,
"learning_rate": 8.346880324678359e-06,
"loss": 0.4094,
"step": 240
},
{
"epoch": 1.773049645390071,
"grad_norm": 114.0,
"learning_rate": 8.158780983879737e-06,
"loss": 0.5608,
"step": 250
},
{
"epoch": 1.773049645390071,
"eval_accuracy": 0.71,
"eval_auc": 0.7636,
"eval_f1": 0.7220447284345048,
"eval_loss": 0.6192474365234375,
"eval_precision": 0.6932515337423313,
"eval_recall": 0.7533333333333333,
"eval_runtime": 21.9348,
"eval_samples_per_second": 13.677,
"eval_steps_per_second": 0.228,
"step": 250
},
{
"epoch": 1.8439716312056738,
"grad_norm": 53.25,
"learning_rate": 7.962927177387085e-06,
"loss": 0.4805,
"step": 260
},
{
"epoch": 1.9148936170212765,
"grad_norm": 9.625,
"learning_rate": 7.759799705113797e-06,
"loss": 0.4569,
"step": 270
},
{
"epoch": 1.9858156028368794,
"grad_norm": 23.375,
"learning_rate": 7.54989722303612e-06,
"loss": 0.5058,
"step": 280
},
{
"epoch": 2.0567375886524824,
"grad_norm": 9.75,
"learning_rate": 7.33373501904665e-06,
"loss": 0.3455,
"step": 290
},
{
"epoch": 2.127659574468085,
"grad_norm": 31.875,
"learning_rate": 7.111843747978296e-06,
"loss": 0.3486,
"step": 300
},
{
"epoch": 2.127659574468085,
"eval_accuracy": 0.7066666666666667,
"eval_auc": 0.7925333333333333,
"eval_f1": 0.7426900584795322,
"eval_loss": 0.6051149964332581,
"eval_precision": 0.6614583333333334,
"eval_recall": 0.8466666666666667,
"eval_runtime": 21.6917,
"eval_samples_per_second": 13.83,
"eval_steps_per_second": 0.231,
"step": 300
},
{
"epoch": 2.198581560283688,
"grad_norm": 50.0,
"learning_rate": 6.884768128904038e-06,
"loss": 0.33,
"step": 310
},
{
"epoch": 2.269503546099291,
"grad_norm": 29.125,
"learning_rate": 6.653065607910535e-06,
"loss": 0.356,
"step": 320
},
{
"epoch": 2.3404255319148937,
"grad_norm": 15.25,
"learning_rate": 6.4173049896282525e-06,
"loss": 0.2589,
"step": 330
},
{
"epoch": 2.4113475177304964,
"grad_norm": 15.375,
"learning_rate": 6.1780650408776214e-06,
"loss": 0.2613,
"step": 340
},
{
"epoch": 2.482269503546099,
"grad_norm": 35.75,
"learning_rate": 5.9359330698590835e-06,
"loss": 0.2617,
"step": 350
},
{
"epoch": 2.482269503546099,
"eval_accuracy": 0.7266666666666667,
"eval_auc": 0.7925111111111112,
"eval_f1": 0.7210884353741497,
"eval_loss": 0.6037541627883911,
"eval_precision": 0.7361111111111112,
"eval_recall": 0.7066666666666667,
"eval_runtime": 21.8112,
"eval_samples_per_second": 13.754,
"eval_steps_per_second": 0.229,
"step": 350
}
],
"logging_steps": 10,
"max_steps": 705,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.568021622454272e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}