| { | |
| "best_metric": 0.10347278416156769, | |
| "best_model_checkpoint": "./models/results_comb_23/checkpoint-400", | |
| "epoch": 0.887040887040887, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.6838, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.6683, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.6398, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.5841623557500994, | |
| "eval_f1": 0.7375031399145943, | |
| "eval_loss": 0.6682732701301575, | |
| "eval_precision": 0.5841623557500994, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 47.6905, | |
| "eval_samples_per_second": 158.082, | |
| "eval_steps_per_second": 19.773, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.6571, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2e-05, | |
| "loss": 0.6052, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.4953, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.8867223769730733, | |
| "eval_f1": 0.9052374611628939, | |
| "eval_loss": 0.3651227056980133, | |
| "eval_precision": 0.8851996527777778, | |
| "eval_recall": 0.9262034514078111, | |
| "eval_runtime": 47.4725, | |
| "eval_samples_per_second": 158.808, | |
| "eval_steps_per_second": 19.864, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.3792, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.2802, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.2595, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.9119246584427643, | |
| "eval_f1": 0.9219924812030076, | |
| "eval_loss": 0.21582257747650146, | |
| "eval_precision": 0.9552093476144109, | |
| "eval_recall": 0.8910081743869209, | |
| "eval_runtime": 47.5352, | |
| "eval_samples_per_second": 158.598, | |
| "eval_steps_per_second": 19.838, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4e-05, | |
| "loss": 0.2839, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.2088, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.2128, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.931157978511739, | |
| "eval_f1": 0.9428980085818022, | |
| "eval_loss": 0.18493123352527618, | |
| "eval_precision": 0.9146211312700107, | |
| "eval_recall": 0.9729791099000908, | |
| "eval_runtime": 47.5063, | |
| "eval_samples_per_second": 158.695, | |
| "eval_steps_per_second": 19.85, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.942857142857143e-05, | |
| "loss": 0.2927, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.828571428571429e-05, | |
| "loss": 0.2732, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.714285714285714e-05, | |
| "loss": 0.2793, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.9335455630720255, | |
| "eval_f1": 0.9418186041110208, | |
| "eval_loss": 0.17004907131195068, | |
| "eval_precision": 0.9638697409080105, | |
| "eval_recall": 0.9207538601271571, | |
| "eval_runtime": 47.6934, | |
| "eval_samples_per_second": 158.072, | |
| "eval_steps_per_second": 19.772, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.2195, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.485714285714286e-05, | |
| "loss": 0.197, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.371428571428572e-05, | |
| "loss": 0.2501, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.9409736039262502, | |
| "eval_f1": 0.9507034452198958, | |
| "eval_loss": 0.1616438329219818, | |
| "eval_precision": 0.9281851611507679, | |
| "eval_recall": 0.9743415077202543, | |
| "eval_runtime": 47.5143, | |
| "eval_samples_per_second": 158.668, | |
| "eval_steps_per_second": 19.847, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.257142857142857e-05, | |
| "loss": 0.1977, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.1428571428571437e-05, | |
| "loss": 0.1873, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.028571428571429e-05, | |
| "loss": 0.1876, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.9350046425255338, | |
| "eval_f1": 0.945116487455197, | |
| "eval_loss": 0.16360537707805634, | |
| "eval_precision": 0.9325817860300619, | |
| "eval_recall": 0.9579927338782924, | |
| "eval_runtime": 47.5692, | |
| "eval_samples_per_second": 158.485, | |
| "eval_steps_per_second": 19.824, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.9142857142857145e-05, | |
| "loss": 0.1954, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.1758, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.685714285714286e-05, | |
| "loss": 0.1847, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.1337, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.9501260114073484, | |
| "eval_f1": 0.9577243085225995, | |
| "eval_loss": 0.14026911556720734, | |
| "eval_precision": 0.9485523385300668, | |
| "eval_recall": 0.9670753860127157, | |
| "eval_runtime": 47.6318, | |
| "eval_samples_per_second": 158.277, | |
| "eval_steps_per_second": 19.798, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.4571428571428574e-05, | |
| "loss": 0.1665, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.342857142857143e-05, | |
| "loss": 0.1667, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 3.228571428571428e-05, | |
| "loss": 0.1778, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.8956094972808065, | |
| "eval_f1": 0.9173926734543928, | |
| "eval_loss": 0.2989156246185303, | |
| "eval_precision": 0.8530158110482139, | |
| "eval_recall": 0.9922797456857403, | |
| "eval_runtime": 47.5798, | |
| "eval_samples_per_second": 158.45, | |
| "eval_steps_per_second": 19.819, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.114285714285715e-05, | |
| "loss": 0.2196, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3e-05, | |
| "loss": 0.1774, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.885714285714286e-05, | |
| "loss": 0.1502, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.954105319007826, | |
| "eval_f1": 0.9604842393787117, | |
| "eval_loss": 0.12666857242584229, | |
| "eval_precision": 0.9662224264705882, | |
| "eval_recall": 0.9548138056312443, | |
| "eval_runtime": 47.5514, | |
| "eval_samples_per_second": 158.544, | |
| "eval_steps_per_second": 19.831, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.7714285714285716e-05, | |
| "loss": 0.1264, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.6571428571428576e-05, | |
| "loss": 0.1431, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.542857142857143e-05, | |
| "loss": 0.1386, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.9562276163947473, | |
| "eval_f1": 0.9631696428571428, | |
| "eval_loss": 0.11829638481140137, | |
| "eval_precision": 0.9471027216856892, | |
| "eval_recall": 0.9797910990009082, | |
| "eval_runtime": 47.5189, | |
| "eval_samples_per_second": 158.653, | |
| "eval_steps_per_second": 19.845, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 0.1334, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 2.3142857142857145e-05, | |
| "loss": 0.1468, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.1233, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.9538400318344608, | |
| "eval_f1": 0.9613933880630131, | |
| "eval_loss": 0.13518454134464264, | |
| "eval_precision": 0.9399132321041215, | |
| "eval_recall": 0.9838782924613987, | |
| "eval_runtime": 47.6619, | |
| "eval_samples_per_second": 158.177, | |
| "eval_steps_per_second": 19.785, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.0857142857142857e-05, | |
| "loss": 0.1453, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.9714285714285714e-05, | |
| "loss": 0.1354, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 0.1745, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.957288765088208, | |
| "eval_f1": 0.9632420091324201, | |
| "eval_loss": 0.11527514457702637, | |
| "eval_precision": 0.9685491276400368, | |
| "eval_recall": 0.9579927338782924, | |
| "eval_runtime": 47.6858, | |
| "eval_samples_per_second": 158.098, | |
| "eval_steps_per_second": 19.775, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.742857142857143e-05, | |
| "loss": 0.1349, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.6285714285714287e-05, | |
| "loss": 0.1105, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.5142857142857144e-05, | |
| "loss": 0.1101, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.9611354291020029, | |
| "eval_f1": 0.9667536593668444, | |
| "eval_loss": 0.11785919219255447, | |
| "eval_precision": 0.9662054887729644, | |
| "eval_recall": 0.9673024523160763, | |
| "eval_runtime": 47.5837, | |
| "eval_samples_per_second": 158.437, | |
| "eval_steps_per_second": 19.818, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.1148, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 0.0959, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.1714285714285715e-05, | |
| "loss": 0.1335, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.9571561215015254, | |
| "eval_f1": 0.9628778301344673, | |
| "eval_loss": 0.12226755917072296, | |
| "eval_precision": 0.9748661857109612, | |
| "eval_recall": 0.951180744777475, | |
| "eval_runtime": 47.5828, | |
| "eval_samples_per_second": 158.44, | |
| "eval_steps_per_second": 19.818, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.0571428571428572e-05, | |
| "loss": 0.1663, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 9.42857142857143e-06, | |
| "loss": 0.1249, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.285714285714285e-06, | |
| "loss": 0.1338, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 0.0969, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.9628597957288765, | |
| "eval_f1": 0.9683972911963884, | |
| "eval_loss": 0.10347278416156769, | |
| "eval_precision": 0.9627468581687613, | |
| "eval_recall": 0.9741144414168937, | |
| "eval_runtime": 47.716, | |
| "eval_samples_per_second": 157.997, | |
| "eval_steps_per_second": 19.763, | |
| "step": 400 | |
| } | |
| ], | |
| "max_steps": 450, | |
| "num_train_epochs": 1, | |
| "total_flos": 4354487966208000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |