{ "best_metric": 0.8464566929133859, "best_model_checkpoint": "./llama_reward_model_1e5-bz32/checkpoint-500", "epoch": 4.861111111111111, "eval_steps": 50, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06944444444444445, "grad_norm": 111.5, "learning_rate": 1.3888888888888892e-06, "loss": 1.451, "step": 10 }, { "epoch": 0.1388888888888889, "grad_norm": 241.0, "learning_rate": 2.7777777777777783e-06, "loss": 0.9891, "step": 20 }, { "epoch": 0.20833333333333334, "grad_norm": 55.5, "learning_rate": 4.166666666666667e-06, "loss": 0.6873, "step": 30 }, { "epoch": 0.2777777777777778, "grad_norm": 89.5, "learning_rate": 5.555555555555557e-06, "loss": 0.5822, "step": 40 }, { "epoch": 0.3472222222222222, "grad_norm": 41.25, "learning_rate": 6.944444444444445e-06, "loss": 0.5683, "step": 50 }, { "epoch": 0.3472222222222222, "eval_accuracy": 0.7637795275590551, "eval_auc": 0.8218052109181142, "eval_f1": 0.7619047619047619, "eval_loss": 0.5505948066711426, "eval_precision": 0.75, "eval_recall": 0.7741935483870968, "eval_runtime": 17.2811, "eval_samples_per_second": 14.698, "eval_steps_per_second": 0.231, "step": 50 }, { "epoch": 0.4166666666666667, "grad_norm": 50.0, "learning_rate": 8.333333333333334e-06, "loss": 0.663, "step": 60 }, { "epoch": 0.4861111111111111, "grad_norm": 20.25, "learning_rate": 9.722222222222223e-06, "loss": 0.5973, "step": 70 }, { "epoch": 0.5555555555555556, "grad_norm": 75.0, "learning_rate": 9.996239762521152e-06, "loss": 0.5811, "step": 80 }, { "epoch": 0.625, "grad_norm": 38.25, "learning_rate": 9.980973490458728e-06, "loss": 0.5726, "step": 90 }, { "epoch": 0.6944444444444444, "grad_norm": 148.0, "learning_rate": 9.954002016824226e-06, "loss": 0.5948, "step": 100 }, { "epoch": 0.6944444444444444, "eval_accuracy": 0.6732283464566929, "eval_auc": 0.8793424317617865, "eval_f1": 0.546448087431694, "eval_loss": 0.5765186548233032, "eval_precision": 0.847457627118644, "eval_recall": 0.4032258064516129, "eval_runtime": 17.3424, "eval_samples_per_second": 14.646, "eval_steps_per_second": 0.231, "step": 100 }, { "epoch": 0.7638888888888888, "grad_norm": 41.0, "learning_rate": 9.915388724114301e-06, "loss": 0.5116, "step": 110 }, { "epoch": 0.8333333333333334, "grad_norm": 15.6875, "learning_rate": 9.86522435289912e-06, "loss": 0.5211, "step": 120 }, { "epoch": 0.9027777777777778, "grad_norm": 79.0, "learning_rate": 9.803626788583603e-06, "loss": 0.5657, "step": 130 }, { "epoch": 0.9722222222222222, "grad_norm": 44.5, "learning_rate": 9.730740784378755e-06, "loss": 0.4961, "step": 140 }, { "epoch": 1.0416666666666667, "grad_norm": 33.25, "learning_rate": 9.646737621134112e-06, "loss": 0.4932, "step": 150 }, { "epoch": 1.0416666666666667, "eval_accuracy": 0.8188976377952756, "eval_auc": 0.8933002481389578, "eval_f1": 0.8435374149659864, "eval_loss": 0.4920627772808075, "eval_precision": 0.7294117647058823, "eval_recall": 1.0, "eval_runtime": 17.2166, "eval_samples_per_second": 14.753, "eval_steps_per_second": 0.232, "step": 150 }, { "epoch": 1.1111111111111112, "grad_norm": 37.5, "learning_rate": 9.551814704830734e-06, "loss": 0.4114, "step": 160 }, { "epoch": 1.1805555555555556, "grad_norm": 22.0, "learning_rate": 9.446195102680531e-06, "loss": 0.4434, "step": 170 }, { "epoch": 1.25, "grad_norm": 48.25, "learning_rate": 9.330127018922195e-06, "loss": 0.5029, "step": 180 }, { "epoch": 1.3194444444444444, "grad_norm": 10.1875, "learning_rate": 9.203883211545517e-06, "loss": 0.36, "step": 190 }, { "epoch": 1.3888888888888888, "grad_norm": 22.5, "learning_rate": 9.067760351314838e-06, "loss": 0.3651, "step": 200 }, { "epoch": 1.3888888888888888, "eval_accuracy": 0.8110236220472441, "eval_auc": 0.8889267990074442, "eval_f1": 0.8248175182481752, "eval_loss": 0.4430588185787201, "eval_precision": 0.7533333333333333, "eval_recall": 0.9112903225806451, "eval_runtime": 17.3222, "eval_samples_per_second": 14.663, "eval_steps_per_second": 0.231, "step": 200 }, { "epoch": 1.4583333333333333, "grad_norm": 19.0, "learning_rate": 8.92207832459788e-06, "loss": 0.3805, "step": 210 }, { "epoch": 1.5277777777777777, "grad_norm": 43.0, "learning_rate": 8.767179481638303e-06, "loss": 0.3689, "step": 220 }, { "epoch": 1.5972222222222223, "grad_norm": 26.5, "learning_rate": 8.603427832038574e-06, "loss": 0.3745, "step": 230 }, { "epoch": 1.6666666666666665, "grad_norm": 68.0, "learning_rate": 8.43120818934367e-06, "loss": 0.3921, "step": 240 }, { "epoch": 1.7361111111111112, "grad_norm": 25.75, "learning_rate": 8.25092526673592e-06, "loss": 0.434, "step": 250 }, { "epoch": 1.7361111111111112, "eval_accuracy": 0.84251968503937, "eval_auc": 0.9133374689826302, "eval_f1": 0.8461538461538461, "eval_loss": 0.36681872606277466, "eval_precision": 0.8088235294117647, "eval_recall": 0.8870967741935484, "eval_runtime": 17.2167, "eval_samples_per_second": 14.753, "eval_steps_per_second": 0.232, "step": 250 }, { "epoch": 1.8055555555555556, "grad_norm": 11.9375, "learning_rate": 8.063002725966014e-06, "loss": 0.3524, "step": 260 }, { "epoch": 1.875, "grad_norm": 26.25, "learning_rate": 7.86788218175523e-06, "loss": 0.3992, "step": 270 }, { "epoch": 1.9444444444444444, "grad_norm": 32.75, "learning_rate": 7.666022164008458e-06, "loss": 0.4295, "step": 280 }, { "epoch": 2.013888888888889, "grad_norm": 10.4375, "learning_rate": 7.457897040276853e-06, "loss": 0.3542, "step": 290 }, { "epoch": 2.0833333333333335, "grad_norm": 26.125, "learning_rate": 7.243995901002312e-06, "loss": 0.2434, "step": 300 }, { "epoch": 2.0833333333333335, "eval_accuracy": 0.7992125984251969, "eval_auc": 0.9068238213399503, "eval_f1": 0.8197879858657244, "eval_loss": 0.5410435199737549, "eval_precision": 0.7295597484276729, "eval_recall": 0.9354838709677419, "eval_runtime": 17.3565, "eval_samples_per_second": 14.634, "eval_steps_per_second": 0.23, "step": 300 }, { "epoch": 2.1527777777777777, "grad_norm": 14.75, "learning_rate": 7.0248214101633685e-06, "loss": 0.2442, "step": 310 }, { "epoch": 2.2222222222222223, "grad_norm": 13.375, "learning_rate": 6.800888624023552e-06, "loss": 0.211, "step": 320 }, { "epoch": 2.2916666666666665, "grad_norm": 44.25, "learning_rate": 6.572723780758069e-06, "loss": 0.175, "step": 330 }, { "epoch": 2.361111111111111, "grad_norm": 20.0, "learning_rate": 6.340863063803187e-06, "loss": 0.1647, "step": 340 }, { "epoch": 2.4305555555555554, "grad_norm": 32.5, "learning_rate": 6.105851341834439e-06, "loss": 0.2687, "step": 350 }, { "epoch": 2.4305555555555554, "eval_accuracy": 0.8385826771653543, "eval_auc": 0.8970843672456575, "eval_f1": 0.846441947565543, "eval_loss": 0.45397356152534485, "eval_precision": 0.7902097902097902, "eval_recall": 0.9112903225806451, "eval_runtime": 17.2257, "eval_samples_per_second": 14.745, "eval_steps_per_second": 0.232, "step": 350 }, { "epoch": 2.5, "grad_norm": 34.25, "learning_rate": 5.8682408883346535e-06, "loss": 0.2253, "step": 360 }, { "epoch": 2.5694444444444446, "grad_norm": 42.25, "learning_rate": 5.628590083760815e-06, "loss": 0.2023, "step": 370 }, { "epoch": 2.638888888888889, "grad_norm": 23.5, "learning_rate": 5.387462103359655e-06, "loss": 0.1633, "step": 380 }, { "epoch": 2.7083333333333335, "grad_norm": 23.375, "learning_rate": 5.145423593715558e-06, "loss": 0.2195, "step": 390 }, { "epoch": 2.7777777777777777, "grad_norm": 31.75, "learning_rate": 4.903043341140879e-06, "loss": 0.1774, "step": 400 }, { "epoch": 2.7777777777777777, "eval_accuracy": 0.8149606299212598, "eval_auc": 0.90462158808933, "eval_f1": 0.8226415094339623, "eval_loss": 0.44090235233306885, "eval_precision": 0.7730496453900709, "eval_recall": 0.8790322580645161, "eval_runtime": 17.1892, "eval_samples_per_second": 14.777, "eval_steps_per_second": 0.233, "step": 400 }, { "epoch": 2.8472222222222223, "grad_norm": 40.5, "learning_rate": 4.660890935037954e-06, "loss": 0.1502, "step": 410 }, { "epoch": 2.9166666666666665, "grad_norm": 33.25, "learning_rate": 4.4195354293738484e-06, "loss": 0.1355, "step": 420 }, { "epoch": 2.986111111111111, "grad_norm": 30.125, "learning_rate": 4.17954400541338e-06, "loss": 0.1545, "step": 430 }, { "epoch": 3.0555555555555554, "grad_norm": 5.96875, "learning_rate": 3.941480638852948e-06, "loss": 0.0917, "step": 440 }, { "epoch": 3.125, "grad_norm": 3.90625, "learning_rate": 3.705904774487396e-06, "loss": 0.0732, "step": 450 }, { "epoch": 3.125, "eval_accuracy": 0.8385826771653543, "eval_auc": 0.9099565756823821, "eval_f1": 0.8452830188679246, "eval_loss": 0.47704994678497314, "eval_precision": 0.7943262411347518, "eval_recall": 0.9032258064516129, "eval_runtime": 17.2377, "eval_samples_per_second": 14.735, "eval_steps_per_second": 0.232, "step": 450 }, { "epoch": 3.1944444444444446, "grad_norm": 50.25, "learning_rate": 3.473370011524435e-06, "loss": 0.0313, "step": 460 }, { "epoch": 3.263888888888889, "grad_norm": 21.75, "learning_rate": 3.244422802636057e-06, "loss": 0.0233, "step": 470 }, { "epoch": 3.3333333333333335, "grad_norm": 14.5625, "learning_rate": 3.019601169804216e-06, "loss": 0.0316, "step": 480 }, { "epoch": 3.4027777777777777, "grad_norm": 9.5, "learning_rate": 2.7994334399784773e-06, "loss": 0.0272, "step": 490 }, { "epoch": 3.4722222222222223, "grad_norm": 29.0, "learning_rate": 2.5844370035168077e-06, "loss": 0.0598, "step": 500 }, { "epoch": 3.4722222222222223, "eval_accuracy": 0.8464566929133859, "eval_auc": 0.9099255583126551, "eval_f1": 0.8446215139442231, "eval_loss": 0.5372636318206787, "eval_precision": 0.8346456692913385, "eval_recall": 0.8548387096774194, "eval_runtime": 17.3025, "eval_samples_per_second": 14.68, "eval_steps_per_second": 0.231, "step": 500 }, { "epoch": 3.5416666666666665, "grad_norm": 4.09375, "learning_rate": 2.3751170983272e-06, "loss": 0.0248, "step": 510 }, { "epoch": 3.611111111111111, "grad_norm": 1.1484375, "learning_rate": 2.171965622567308e-06, "loss": 0.0152, "step": 520 }, { "epoch": 3.6805555555555554, "grad_norm": 2.546875, "learning_rate": 1.9754599786922913e-06, "loss": 0.0366, "step": 530 }, { "epoch": 3.75, "grad_norm": 5.0, "learning_rate": 1.7860619515673034e-06, "loss": 0.0311, "step": 540 }, { "epoch": 3.8194444444444446, "grad_norm": 3.46875, "learning_rate": 1.6042166232810346e-06, "loss": 0.0283, "step": 550 }, { "epoch": 3.8194444444444446, "eval_accuracy": 0.8385826771653543, "eval_auc": 0.9107009925558313, "eval_f1": 0.8404669260700389, "eval_loss": 0.6065385341644287, "eval_precision": 0.8120300751879699, "eval_recall": 0.8709677419354839, "eval_runtime": 17.1801, "eval_samples_per_second": 14.785, "eval_steps_per_second": 0.233, "step": 550 }, { "epoch": 3.888888888888889, "grad_norm": 10.0625, "learning_rate": 1.4303513272105057e-06, "loss": 0.0166, "step": 560 }, { "epoch": 3.9583333333333335, "grad_norm": 4.90625, "learning_rate": 1.264874643795021e-06, "loss": 0.0146, "step": 570 }, { "epoch": 4.027777777777778, "grad_norm": 13.3125, "learning_rate": 1.1081754403792e-06, "loss": 0.0125, "step": 580 }, { "epoch": 4.097222222222222, "grad_norm": 0.50390625, "learning_rate": 9.606219573814447e-07, "loss": 0.0019, "step": 590 }, { "epoch": 4.166666666666667, "grad_norm": 1.2734375, "learning_rate": 8.225609429353187e-07, "loss": 0.0161, "step": 600 }, { "epoch": 4.166666666666667, "eval_accuracy": 0.8464566929133859, "eval_auc": 0.9123138957816378, "eval_f1": 0.8482490272373541, "eval_loss": 0.6167948246002197, "eval_precision": 0.8195488721804511, "eval_recall": 0.8790322580645161, "eval_runtime": 17.3503, "eval_samples_per_second": 14.64, "eval_steps_per_second": 0.231, "step": 600 }, { "epoch": 4.236111111111111, "grad_norm": 1.3515625, "learning_rate": 6.94316838037431e-07, "loss": 0.0045, "step": 610 }, { "epoch": 4.305555555555555, "grad_norm": 0.84765625, "learning_rate": 5.76191014116711e-07, "loss": 0.005, "step": 620 }, { "epoch": 4.375, "grad_norm": 4.34375, "learning_rate": 4.6846106481675035e-07, "loss": 0.0101, "step": 630 }, { "epoch": 4.444444444444445, "grad_norm": 5.6875, "learning_rate": 3.7138015365554834e-07, "loss": 0.0091, "step": 640 }, { "epoch": 4.513888888888889, "grad_norm": 8.875, "learning_rate": 2.8517641909562075e-07, "loss": 0.0091, "step": 650 }, { "epoch": 4.513888888888889, "eval_accuracy": 0.8464566929133859, "eval_auc": 0.9123449131513648, "eval_f1": 0.8482490272373541, "eval_loss": 0.6211294531822205, "eval_precision": 0.8195488721804511, "eval_recall": 0.8790322580645161, "eval_runtime": 17.3655, "eval_samples_per_second": 14.627, "eval_steps_per_second": 0.23, "step": 650 }, { "epoch": 4.583333333333333, "grad_norm": 1.484375, "learning_rate": 2.1005243842255552e-07, "loss": 0.0037, "step": 660 }, { "epoch": 4.652777777777778, "grad_norm": 3.265625, "learning_rate": 1.4618475169190017e-07, "loss": 0.0033, "step": 670 }, { "epoch": 4.722222222222222, "grad_norm": 7.1875, "learning_rate": 9.372344686307655e-08, "loss": 0.0079, "step": 680 }, { "epoch": 4.791666666666667, "grad_norm": 0.6953125, "learning_rate": 5.279180709527765e-08, "loss": 0.0044, "step": 690 }, { "epoch": 4.861111111111111, "grad_norm": 0.41796875, "learning_rate": 2.3486021034170857e-08, "loss": 0.007, "step": 700 }, { "epoch": 4.861111111111111, "eval_accuracy": 0.8464566929133859, "eval_auc": 0.9124069478908188, "eval_f1": 0.8482490272373541, "eval_loss": 0.6205126047134399, "eval_precision": 0.8195488721804511, "eval_recall": 0.8790322580645161, "eval_runtime": 17.3822, "eval_samples_per_second": 14.613, "eval_steps_per_second": 0.23, "step": 700 } ], "logging_steps": 10, "max_steps": 720, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.5117205872894935e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }