| { |
| "best_metric": 0.8464566929133859, |
| "best_model_checkpoint": "./llama_reward_model_1e5-bz32/checkpoint-500", |
| "epoch": 4.861111111111111, |
| "eval_steps": 50, |
| "global_step": 700, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06944444444444445, |
| "grad_norm": 111.5, |
| "learning_rate": 1.3888888888888892e-06, |
| "loss": 1.451, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1388888888888889, |
| "grad_norm": 241.0, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.9891, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20833333333333334, |
| "grad_norm": 55.5, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.6873, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2777777777777778, |
| "grad_norm": 89.5, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.5822, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3472222222222222, |
| "grad_norm": 41.25, |
| "learning_rate": 6.944444444444445e-06, |
| "loss": 0.5683, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3472222222222222, |
| "eval_accuracy": 0.7637795275590551, |
| "eval_auc": 0.8218052109181142, |
| "eval_f1": 0.7619047619047619, |
| "eval_loss": 0.5505948066711426, |
| "eval_precision": 0.75, |
| "eval_recall": 0.7741935483870968, |
| "eval_runtime": 17.2811, |
| "eval_samples_per_second": 14.698, |
| "eval_steps_per_second": 0.231, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 50.0, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.663, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4861111111111111, |
| "grad_norm": 20.25, |
| "learning_rate": 9.722222222222223e-06, |
| "loss": 0.5973, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 75.0, |
| "learning_rate": 9.996239762521152e-06, |
| "loss": 0.5811, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 38.25, |
| "learning_rate": 9.980973490458728e-06, |
| "loss": 0.5726, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "grad_norm": 148.0, |
| "learning_rate": 9.954002016824226e-06, |
| "loss": 0.5948, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6944444444444444, |
| "eval_accuracy": 0.6732283464566929, |
| "eval_auc": 0.8793424317617865, |
| "eval_f1": 0.546448087431694, |
| "eval_loss": 0.5765186548233032, |
| "eval_precision": 0.847457627118644, |
| "eval_recall": 0.4032258064516129, |
| "eval_runtime": 17.3424, |
| "eval_samples_per_second": 14.646, |
| "eval_steps_per_second": 0.231, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7638888888888888, |
| "grad_norm": 41.0, |
| "learning_rate": 9.915388724114301e-06, |
| "loss": 0.5116, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 15.6875, |
| "learning_rate": 9.86522435289912e-06, |
| "loss": 0.5211, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9027777777777778, |
| "grad_norm": 79.0, |
| "learning_rate": 9.803626788583603e-06, |
| "loss": 0.5657, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9722222222222222, |
| "grad_norm": 44.5, |
| "learning_rate": 9.730740784378755e-06, |
| "loss": 0.4961, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0416666666666667, |
| "grad_norm": 33.25, |
| "learning_rate": 9.646737621134112e-06, |
| "loss": 0.4932, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0416666666666667, |
| "eval_accuracy": 0.8188976377952756, |
| "eval_auc": 0.8933002481389578, |
| "eval_f1": 0.8435374149659864, |
| "eval_loss": 0.4920627772808075, |
| "eval_precision": 0.7294117647058823, |
| "eval_recall": 1.0, |
| "eval_runtime": 17.2166, |
| "eval_samples_per_second": 14.753, |
| "eval_steps_per_second": 0.232, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 37.5, |
| "learning_rate": 9.551814704830734e-06, |
| "loss": 0.4114, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1805555555555556, |
| "grad_norm": 22.0, |
| "learning_rate": 9.446195102680531e-06, |
| "loss": 0.4434, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 48.25, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.5029, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3194444444444444, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.203883211545517e-06, |
| "loss": 0.36, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "grad_norm": 22.5, |
| "learning_rate": 9.067760351314838e-06, |
| "loss": 0.3651, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.3888888888888888, |
| "eval_accuracy": 0.8110236220472441, |
| "eval_auc": 0.8889267990074442, |
| "eval_f1": 0.8248175182481752, |
| "eval_loss": 0.4430588185787201, |
| "eval_precision": 0.7533333333333333, |
| "eval_recall": 0.9112903225806451, |
| "eval_runtime": 17.3222, |
| "eval_samples_per_second": 14.663, |
| "eval_steps_per_second": 0.231, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4583333333333333, |
| "grad_norm": 19.0, |
| "learning_rate": 8.92207832459788e-06, |
| "loss": 0.3805, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5277777777777777, |
| "grad_norm": 43.0, |
| "learning_rate": 8.767179481638303e-06, |
| "loss": 0.3689, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.5972222222222223, |
| "grad_norm": 26.5, |
| "learning_rate": 8.603427832038574e-06, |
| "loss": 0.3745, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 68.0, |
| "learning_rate": 8.43120818934367e-06, |
| "loss": 0.3921, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.7361111111111112, |
| "grad_norm": 25.75, |
| "learning_rate": 8.25092526673592e-06, |
| "loss": 0.434, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.7361111111111112, |
| "eval_accuracy": 0.84251968503937, |
| "eval_auc": 0.9133374689826302, |
| "eval_f1": 0.8461538461538461, |
| "eval_loss": 0.36681872606277466, |
| "eval_precision": 0.8088235294117647, |
| "eval_recall": 0.8870967741935484, |
| "eval_runtime": 17.2167, |
| "eval_samples_per_second": 14.753, |
| "eval_steps_per_second": 0.232, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8055555555555556, |
| "grad_norm": 11.9375, |
| "learning_rate": 8.063002725966014e-06, |
| "loss": 0.3524, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 26.25, |
| "learning_rate": 7.86788218175523e-06, |
| "loss": 0.3992, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9444444444444444, |
| "grad_norm": 32.75, |
| "learning_rate": 7.666022164008458e-06, |
| "loss": 0.4295, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.013888888888889, |
| "grad_norm": 10.4375, |
| "learning_rate": 7.457897040276853e-06, |
| "loss": 0.3542, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "grad_norm": 26.125, |
| "learning_rate": 7.243995901002312e-06, |
| "loss": 0.2434, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.0833333333333335, |
| "eval_accuracy": 0.7992125984251969, |
| "eval_auc": 0.9068238213399503, |
| "eval_f1": 0.8197879858657244, |
| "eval_loss": 0.5410435199737549, |
| "eval_precision": 0.7295597484276729, |
| "eval_recall": 0.9354838709677419, |
| "eval_runtime": 17.3565, |
| "eval_samples_per_second": 14.634, |
| "eval_steps_per_second": 0.23, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1527777777777777, |
| "grad_norm": 14.75, |
| "learning_rate": 7.0248214101633685e-06, |
| "loss": 0.2442, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 13.375, |
| "learning_rate": 6.800888624023552e-06, |
| "loss": 0.211, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.2916666666666665, |
| "grad_norm": 44.25, |
| "learning_rate": 6.572723780758069e-06, |
| "loss": 0.175, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.361111111111111, |
| "grad_norm": 20.0, |
| "learning_rate": 6.340863063803187e-06, |
| "loss": 0.1647, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.4305555555555554, |
| "grad_norm": 32.5, |
| "learning_rate": 6.105851341834439e-06, |
| "loss": 0.2687, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.4305555555555554, |
| "eval_accuracy": 0.8385826771653543, |
| "eval_auc": 0.8970843672456575, |
| "eval_f1": 0.846441947565543, |
| "eval_loss": 0.45397356152534485, |
| "eval_precision": 0.7902097902097902, |
| "eval_recall": 0.9112903225806451, |
| "eval_runtime": 17.2257, |
| "eval_samples_per_second": 14.745, |
| "eval_steps_per_second": 0.232, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 34.25, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.2253, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.5694444444444446, |
| "grad_norm": 42.25, |
| "learning_rate": 5.628590083760815e-06, |
| "loss": 0.2023, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.638888888888889, |
| "grad_norm": 23.5, |
| "learning_rate": 5.387462103359655e-06, |
| "loss": 0.1633, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.7083333333333335, |
| "grad_norm": 23.375, |
| "learning_rate": 5.145423593715558e-06, |
| "loss": 0.2195, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "grad_norm": 31.75, |
| "learning_rate": 4.903043341140879e-06, |
| "loss": 0.1774, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.7777777777777777, |
| "eval_accuracy": 0.8149606299212598, |
| "eval_auc": 0.90462158808933, |
| "eval_f1": 0.8226415094339623, |
| "eval_loss": 0.44090235233306885, |
| "eval_precision": 0.7730496453900709, |
| "eval_recall": 0.8790322580645161, |
| "eval_runtime": 17.1892, |
| "eval_samples_per_second": 14.777, |
| "eval_steps_per_second": 0.233, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.8472222222222223, |
| "grad_norm": 40.5, |
| "learning_rate": 4.660890935037954e-06, |
| "loss": 0.1502, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.9166666666666665, |
| "grad_norm": 33.25, |
| "learning_rate": 4.4195354293738484e-06, |
| "loss": 0.1355, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.986111111111111, |
| "grad_norm": 30.125, |
| "learning_rate": 4.17954400541338e-06, |
| "loss": 0.1545, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.0555555555555554, |
| "grad_norm": 5.96875, |
| "learning_rate": 3.941480638852948e-06, |
| "loss": 0.0917, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 3.90625, |
| "learning_rate": 3.705904774487396e-06, |
| "loss": 0.0732, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.125, |
| "eval_accuracy": 0.8385826771653543, |
| "eval_auc": 0.9099565756823821, |
| "eval_f1": 0.8452830188679246, |
| "eval_loss": 0.47704994678497314, |
| "eval_precision": 0.7943262411347518, |
| "eval_recall": 0.9032258064516129, |
| "eval_runtime": 17.2377, |
| "eval_samples_per_second": 14.735, |
| "eval_steps_per_second": 0.232, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.1944444444444446, |
| "grad_norm": 50.25, |
| "learning_rate": 3.473370011524435e-06, |
| "loss": 0.0313, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.263888888888889, |
| "grad_norm": 21.75, |
| "learning_rate": 3.244422802636057e-06, |
| "loss": 0.0233, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 14.5625, |
| "learning_rate": 3.019601169804216e-06, |
| "loss": 0.0316, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.4027777777777777, |
| "grad_norm": 9.5, |
| "learning_rate": 2.7994334399784773e-06, |
| "loss": 0.0272, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.4722222222222223, |
| "grad_norm": 29.0, |
| "learning_rate": 2.5844370035168077e-06, |
| "loss": 0.0598, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.4722222222222223, |
| "eval_accuracy": 0.8464566929133859, |
| "eval_auc": 0.9099255583126551, |
| "eval_f1": 0.8446215139442231, |
| "eval_loss": 0.5372636318206787, |
| "eval_precision": 0.8346456692913385, |
| "eval_recall": 0.8548387096774194, |
| "eval_runtime": 17.3025, |
| "eval_samples_per_second": 14.68, |
| "eval_steps_per_second": 0.231, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.5416666666666665, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.3751170983272e-06, |
| "loss": 0.0248, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.611111111111111, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.171965622567308e-06, |
| "loss": 0.0152, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.6805555555555554, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.9754599786922913e-06, |
| "loss": 0.0366, |
| "step": 530 |
| }, |
| { |
| "epoch": 3.75, |
| "grad_norm": 5.0, |
| "learning_rate": 1.7860619515673034e-06, |
| "loss": 0.0311, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.8194444444444446, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.6042166232810346e-06, |
| "loss": 0.0283, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.8194444444444446, |
| "eval_accuracy": 0.8385826771653543, |
| "eval_auc": 0.9107009925558313, |
| "eval_f1": 0.8404669260700389, |
| "eval_loss": 0.6065385341644287, |
| "eval_precision": 0.8120300751879699, |
| "eval_recall": 0.8709677419354839, |
| "eval_runtime": 17.1801, |
| "eval_samples_per_second": 14.785, |
| "eval_steps_per_second": 0.233, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 10.0625, |
| "learning_rate": 1.4303513272105057e-06, |
| "loss": 0.0166, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.9583333333333335, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.264874643795021e-06, |
| "loss": 0.0146, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.027777777777778, |
| "grad_norm": 13.3125, |
| "learning_rate": 1.1081754403792e-06, |
| "loss": 0.0125, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.097222222222222, |
| "grad_norm": 0.50390625, |
| "learning_rate": 9.606219573814447e-07, |
| "loss": 0.0019, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "grad_norm": 1.2734375, |
| "learning_rate": 8.225609429353187e-07, |
| "loss": 0.0161, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.166666666666667, |
| "eval_accuracy": 0.8464566929133859, |
| "eval_auc": 0.9123138957816378, |
| "eval_f1": 0.8482490272373541, |
| "eval_loss": 0.6167948246002197, |
| "eval_precision": 0.8195488721804511, |
| "eval_recall": 0.8790322580645161, |
| "eval_runtime": 17.3503, |
| "eval_samples_per_second": 14.64, |
| "eval_steps_per_second": 0.231, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.236111111111111, |
| "grad_norm": 1.3515625, |
| "learning_rate": 6.94316838037431e-07, |
| "loss": 0.0045, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.305555555555555, |
| "grad_norm": 0.84765625, |
| "learning_rate": 5.76191014116711e-07, |
| "loss": 0.005, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.375, |
| "grad_norm": 4.34375, |
| "learning_rate": 4.6846106481675035e-07, |
| "loss": 0.0101, |
| "step": 630 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 5.6875, |
| "learning_rate": 3.7138015365554834e-07, |
| "loss": 0.0091, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.513888888888889, |
| "grad_norm": 8.875, |
| "learning_rate": 2.8517641909562075e-07, |
| "loss": 0.0091, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.513888888888889, |
| "eval_accuracy": 0.8464566929133859, |
| "eval_auc": 0.9123449131513648, |
| "eval_f1": 0.8482490272373541, |
| "eval_loss": 0.6211294531822205, |
| "eval_precision": 0.8195488721804511, |
| "eval_recall": 0.8790322580645161, |
| "eval_runtime": 17.3655, |
| "eval_samples_per_second": 14.627, |
| "eval_steps_per_second": 0.23, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.583333333333333, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.1005243842255552e-07, |
| "loss": 0.0037, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.652777777777778, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.4618475169190017e-07, |
| "loss": 0.0033, |
| "step": 670 |
| }, |
| { |
| "epoch": 4.722222222222222, |
| "grad_norm": 7.1875, |
| "learning_rate": 9.372344686307655e-08, |
| "loss": 0.0079, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.791666666666667, |
| "grad_norm": 0.6953125, |
| "learning_rate": 5.279180709527765e-08, |
| "loss": 0.0044, |
| "step": 690 |
| }, |
| { |
| "epoch": 4.861111111111111, |
| "grad_norm": 0.41796875, |
| "learning_rate": 2.3486021034170857e-08, |
| "loss": 0.007, |
| "step": 700 |
| }, |
| { |
| "epoch": 4.861111111111111, |
| "eval_accuracy": 0.8464566929133859, |
| "eval_auc": 0.9124069478908188, |
| "eval_f1": 0.8482490272373541, |
| "eval_loss": 0.6205126047134399, |
| "eval_precision": 0.8195488721804511, |
| "eval_recall": 0.8790322580645161, |
| "eval_runtime": 17.3822, |
| "eval_samples_per_second": 14.613, |
| "eval_steps_per_second": 0.23, |
| "step": 700 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 720, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5117205872894935e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|