| { | |
| "best_metric": 2.390516519546509, | |
| "best_model_checkpoint": "MIReAD_3e-05/checkpoint-13806", | |
| "epoch": 6.0, | |
| "global_step": 27612, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.94567579313342e-05, | |
| "loss": 4.5385, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.8913515862668408e-05, | |
| "loss": 3.745, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.8370273794002608e-05, | |
| "loss": 3.3652, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.782703172533681e-05, | |
| "loss": 3.1532, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.728378965667101e-05, | |
| "loss": 2.9768, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.6740547588005218e-05, | |
| "loss": 2.8561, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.6197305519339418e-05, | |
| "loss": 2.7393, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.565406345067362e-05, | |
| "loss": 2.6236, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.511082138200782e-05, | |
| "loss": 2.6074, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.3415979708306912, | |
| "eval_f1": 0.28057933815352265, | |
| "eval_loss": 2.6715340614318848, | |
| "eval_precision": 0.3280031388570244, | |
| "eval_recall": 0.30555570676656096, | |
| "eval_runtime": 128.8869, | |
| "eval_samples_per_second": 122.355, | |
| "eval_steps_per_second": 7.65, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.456757931334203e-05, | |
| "loss": 2.4343, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.402433724467623e-05, | |
| "loss": 2.2922, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.3481095176010432e-05, | |
| "loss": 2.2894, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.2937853107344632e-05, | |
| "loss": 2.2635, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.239461103867884e-05, | |
| "loss": 2.238, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.185136897001304e-05, | |
| "loss": 2.2425, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1308126901347242e-05, | |
| "loss": 2.1702, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.0764884832681442e-05, | |
| "loss": 2.1909, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 2.0221642764015646e-05, | |
| "loss": 2.1209, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.3929613189600507, | |
| "eval_f1": 0.3569292122063319, | |
| "eval_loss": 2.4266042709350586, | |
| "eval_precision": 0.37263788729318065, | |
| "eval_recall": 0.3854876449553655, | |
| "eval_runtime": 127.8545, | |
| "eval_samples_per_second": 123.343, | |
| "eval_steps_per_second": 7.712, | |
| "step": 9204 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.967840069534985e-05, | |
| "loss": 1.9419, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.913515862668405e-05, | |
| "loss": 1.7981, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.8591916558018253e-05, | |
| "loss": 1.82, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.8048674489352456e-05, | |
| "loss": 1.7695, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.750543242068666e-05, | |
| "loss": 1.7817, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.696219035202086e-05, | |
| "loss": 1.7977, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.6418948283355063e-05, | |
| "loss": 1.7337, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.5875706214689266e-05, | |
| "loss": 1.7563, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.533246414602347e-05, | |
| "loss": 1.7413, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.40786303107165506, | |
| "eval_f1": 0.3830566278260409, | |
| "eval_loss": 2.390516519546509, | |
| "eval_precision": 0.40575163850397916, | |
| "eval_recall": 0.39817650401283294, | |
| "eval_runtime": 129.2884, | |
| "eval_samples_per_second": 121.975, | |
| "eval_steps_per_second": 7.626, | |
| "step": 13806 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.478922207735767e-05, | |
| "loss": 1.6035, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.4245980008691873e-05, | |
| "loss": 1.4123, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 1.3702737940026075e-05, | |
| "loss": 1.3897, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.3159495871360279e-05, | |
| "loss": 1.3592, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.261625380269448e-05, | |
| "loss": 1.3673, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.2073011734028684e-05, | |
| "loss": 1.3792, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.1529769665362886e-05, | |
| "loss": 1.3385, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 1.0986527596697089e-05, | |
| "loss": 1.3258, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.044328552803129e-05, | |
| "loss": 1.3415, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4099556119213697, | |
| "eval_f1": 0.3978990080253614, | |
| "eval_loss": 2.46903133392334, | |
| "eval_precision": 0.4214347585145809, | |
| "eval_recall": 0.40462921661844276, | |
| "eval_runtime": 129.7625, | |
| "eval_samples_per_second": 121.53, | |
| "eval_steps_per_second": 7.598, | |
| "step": 18408 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 9.900043459365494e-06, | |
| "loss": 1.2695, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 9.356801390699696e-06, | |
| "loss": 1.0401, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 8.8135593220339e-06, | |
| "loss": 1.0362, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 8.270317253368101e-06, | |
| "loss": 1.0133, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 7.727075184702305e-06, | |
| "loss": 1.0128, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 7.183833116036506e-06, | |
| "loss": 0.9873, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 6.640591047370709e-06, | |
| "loss": 0.9855, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 6.0973489787049115e-06, | |
| "loss": 1.0153, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 5.554106910039114e-06, | |
| "loss": 0.9783, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 5.010864841373317e-06, | |
| "loss": 0.9837, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.4050095117311351, | |
| "eval_f1": 0.3983089847187763, | |
| "eval_loss": 2.622011184692383, | |
| "eval_precision": 0.411294379610805, | |
| "eval_recall": 0.40211336103710194, | |
| "eval_runtime": 129.5856, | |
| "eval_samples_per_second": 121.696, | |
| "eval_steps_per_second": 7.609, | |
| "step": 23010 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.467622772707519e-06, | |
| "loss": 0.7989, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 3.924380704041722e-06, | |
| "loss": 0.776, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 3.3811386353759236e-06, | |
| "loss": 0.7795, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.837896566710126e-06, | |
| "loss": 0.7417, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 2.2946544980443283e-06, | |
| "loss": 0.7566, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 1.7514124293785311e-06, | |
| "loss": 0.7571, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.2081703607127337e-06, | |
| "loss": 0.7666, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 6.649282920469362e-07, | |
| "loss": 0.7696, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 1.2168622338113866e-07, | |
| "loss": 0.7456, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.40107799619530754, | |
| "eval_f1": 0.39625451475720025, | |
| "eval_loss": 2.7117061614990234, | |
| "eval_precision": 0.4066294033650095, | |
| "eval_recall": 0.39986711618175386, | |
| "eval_runtime": 130.357, | |
| "eval_samples_per_second": 120.975, | |
| "eval_steps_per_second": 7.564, | |
| "step": 27612 | |
| } | |
| ], | |
| "max_steps": 27612, | |
| "num_train_epochs": 6, | |
| "total_flos": 1.1642961087465062e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |