| { |
| "best_metric": 0.8166666666666667, |
| "best_model_checkpoint": "./llama_reward_model_acc_1e5-bz32/checkpoint-400", |
| "epoch": 2.8368794326241136, |
| "eval_steps": 50, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 236.0, |
| "learning_rate": 1.4084507042253523e-06, |
| "loss": 1.0123, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 380.0, |
| "learning_rate": 2.8169014084507046e-06, |
| "loss": 1.0112, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 95.0, |
| "learning_rate": 4.225352112676057e-06, |
| "loss": 0.7269, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 199.0, |
| "learning_rate": 5.633802816901409e-06, |
| "loss": 0.7443, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "grad_norm": 532.0, |
| "learning_rate": 7.042253521126761e-06, |
| "loss": 0.9104, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "eval_accuracy": 0.54, |
| "eval_auc": 0.6245777777777778, |
| "eval_f1": 0.1686746987951807, |
| "eval_loss": 1.2576130628585815, |
| "eval_precision": 0.875, |
| "eval_recall": 0.09333333333333334, |
| "eval_runtime": 21.7813, |
| "eval_samples_per_second": 13.773, |
| "eval_steps_per_second": 0.23, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 382.0, |
| "learning_rate": 8.450704225352114e-06, |
| "loss": 1.074, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "grad_norm": 332.0, |
| "learning_rate": 9.859154929577466e-06, |
| "loss": 1.5077, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 192.0, |
| "learning_rate": 9.995028650728335e-06, |
| "loss": 1.1512, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 229.0, |
| "learning_rate": 9.977856431060221e-06, |
| "loss": 0.9273, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 86.0, |
| "learning_rate": 9.948464112207811e-06, |
| "loss": 0.7322, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_accuracy": 0.5466666666666666, |
| "eval_auc": 0.6962222222222223, |
| "eval_f1": 0.6666666666666666, |
| "eval_loss": 0.8091464042663574, |
| "eval_precision": 0.5271317829457365, |
| "eval_recall": 0.9066666666666666, |
| "eval_runtime": 21.8429, |
| "eval_samples_per_second": 13.734, |
| "eval_steps_per_second": 0.229, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7801418439716312, |
| "grad_norm": 77.5, |
| "learning_rate": 9.906923849135118e-06, |
| "loss": 0.5589, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 137.0, |
| "learning_rate": 9.853337618695413e-06, |
| "loss": 0.6659, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9219858156028369, |
| "grad_norm": 89.0, |
| "learning_rate": 9.78783696928909e-06, |
| "loss": 0.7266, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "grad_norm": 44.5, |
| "learning_rate": 9.710582697926562e-06, |
| "loss": 0.5231, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 30.5, |
| "learning_rate": 9.62176445548899e-06, |
| "loss": 0.5396, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "eval_accuracy": 0.7266666666666667, |
| "eval_auc": 0.8042888888888888, |
| "eval_f1": 0.7657142857142857, |
| "eval_loss": 0.5730696320533752, |
| "eval_precision": 0.67, |
| "eval_recall": 0.8933333333333333, |
| "eval_runtime": 21.915, |
| "eval_samples_per_second": 13.689, |
| "eval_steps_per_second": 0.228, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "grad_norm": 42.0, |
| "learning_rate": 9.521600281155894e-06, |
| "loss": 0.4779, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2056737588652482, |
| "grad_norm": 131.0, |
| "learning_rate": 9.410336067142525e-06, |
| "loss": 0.5754, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 119.5, |
| "learning_rate": 9.28824495506109e-06, |
| "loss": 0.5001, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3475177304964538, |
| "grad_norm": 34.5, |
| "learning_rate": 9.155626665387625e-06, |
| "loss": 0.6337, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "grad_norm": 32.0, |
| "learning_rate": 9.012806761680642e-06, |
| "loss": 0.5741, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4184397163120568, |
| "eval_accuracy": 0.7166666666666667, |
| "eval_auc": 0.8280222222222222, |
| "eval_f1": 0.7578347578347578, |
| "eval_loss": 0.6074584126472473, |
| "eval_precision": 0.6616915422885572, |
| "eval_recall": 0.8866666666666667, |
| "eval_runtime": 21.8803, |
| "eval_samples_per_second": 13.711, |
| "eval_steps_per_second": 0.229, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 25.375, |
| "learning_rate": 8.860135851357803e-06, |
| "loss": 0.5611, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5602836879432624, |
| "grad_norm": 77.0, |
| "learning_rate": 8.697988724992633e-06, |
| "loss": 0.5492, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6312056737588652, |
| "grad_norm": 18.75, |
| "learning_rate": 8.526763436244184e-06, |
| "loss": 0.5065, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 53.25, |
| "learning_rate": 8.346880324678359e-06, |
| "loss": 0.3905, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.773049645390071, |
| "grad_norm": 21.625, |
| "learning_rate": 8.158780983879737e-06, |
| "loss": 0.4588, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.773049645390071, |
| "eval_accuracy": 0.76, |
| "eval_auc": 0.8448888888888889, |
| "eval_f1": 0.7293233082706767, |
| "eval_loss": 0.5784951448440552, |
| "eval_precision": 0.8362068965517241, |
| "eval_recall": 0.6466666666666666, |
| "eval_runtime": 21.7738, |
| "eval_samples_per_second": 13.778, |
| "eval_steps_per_second": 0.23, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8439716312056738, |
| "grad_norm": 30.25, |
| "learning_rate": 7.962927177387085e-06, |
| "loss": 0.5218, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 63.75, |
| "learning_rate": 7.759799705113797e-06, |
| "loss": 0.4509, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9858156028368794, |
| "grad_norm": 59.25, |
| "learning_rate": 7.54989722303612e-06, |
| "loss": 0.4318, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.0567375886524824, |
| "grad_norm": 40.0, |
| "learning_rate": 7.33373501904665e-06, |
| "loss": 0.2591, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.127659574468085, |
| "grad_norm": 27.125, |
| "learning_rate": 7.111843747978296e-06, |
| "loss": 0.1902, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.127659574468085, |
| "eval_accuracy": 0.7966666666666666, |
| "eval_auc": 0.8648888888888889, |
| "eval_f1": 0.7986798679867987, |
| "eval_loss": 0.5884929299354553, |
| "eval_precision": 0.7908496732026143, |
| "eval_recall": 0.8066666666666666, |
| "eval_runtime": 21.7884, |
| "eval_samples_per_second": 13.769, |
| "eval_steps_per_second": 0.229, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.198581560283688, |
| "grad_norm": 45.25, |
| "learning_rate": 6.884768128904038e-06, |
| "loss": 0.1312, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.269503546099291, |
| "grad_norm": 26.625, |
| "learning_rate": 6.653065607910535e-06, |
| "loss": 0.1942, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.3404255319148937, |
| "grad_norm": 19.75, |
| "learning_rate": 6.4173049896282525e-06, |
| "loss": 0.2093, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.4113475177304964, |
| "grad_norm": 8.8125, |
| "learning_rate": 6.1780650408776214e-06, |
| "loss": 0.1324, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.482269503546099, |
| "grad_norm": 67.0, |
| "learning_rate": 5.9359330698590835e-06, |
| "loss": 0.2027, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.482269503546099, |
| "eval_accuracy": 0.7766666666666666, |
| "eval_auc": 0.8605555555555555, |
| "eval_f1": 0.7831715210355987, |
| "eval_loss": 0.6597020626068115, |
| "eval_precision": 0.7610062893081762, |
| "eval_recall": 0.8066666666666666, |
| "eval_runtime": 21.778, |
| "eval_samples_per_second": 13.775, |
| "eval_steps_per_second": 0.23, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.5531914893617023, |
| "grad_norm": 26.875, |
| "learning_rate": 5.691503484374974e-06, |
| "loss": 0.1171, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.624113475177305, |
| "grad_norm": 35.75, |
| "learning_rate": 5.445376332622656e-06, |
| "loss": 0.1866, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.6950354609929077, |
| "grad_norm": 23.375, |
| "learning_rate": 5.198155830141091e-06, |
| "loss": 0.1363, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.7659574468085104, |
| "grad_norm": 44.75, |
| "learning_rate": 4.950448876527036e-06, |
| "loss": 0.1784, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.8368794326241136, |
| "grad_norm": 37.0, |
| "learning_rate": 4.702863565562163e-06, |
| "loss": 0.167, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.8368794326241136, |
| "eval_accuracy": 0.8166666666666667, |
| "eval_auc": 0.8730666666666665, |
| "eval_f1": 0.8148148148148148, |
| "eval_loss": 0.5733991265296936, |
| "eval_precision": 0.8231292517006803, |
| "eval_recall": 0.8066666666666666, |
| "eval_runtime": 21.8744, |
| "eval_samples_per_second": 13.715, |
| "eval_steps_per_second": 0.229, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 705, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.429605585197056e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|