| { | |
| "best_metric": 0.10954569280147552, | |
| "best_model_checkpoint": "./models/results_comb_693/checkpoint-400", | |
| "epoch": 0.887040887040887, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.6881, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.5e-06, | |
| "loss": 0.6788, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.1500000000000002e-05, | |
| "loss": 0.6504, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.5841623557500994, | |
| "eval_f1": 0.7375031399145943, | |
| "eval_loss": 0.6672813892364502, | |
| "eval_precision": 0.5841623557500994, | |
| "eval_recall": 1.0, | |
| "eval_runtime": 35.6463, | |
| "eval_samples_per_second": 211.494, | |
| "eval_steps_per_second": 26.454, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.55e-05, | |
| "loss": 0.6723, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9500000000000003e-05, | |
| "loss": 0.598, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.35e-05, | |
| "loss": 0.4717, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.8839368616527391, | |
| "eval_f1": 0.9069841607313702, | |
| "eval_loss": 0.3077848255634308, | |
| "eval_precision": 0.8526883869678193, | |
| "eval_recall": 0.9686648501362398, | |
| "eval_runtime": 34.1268, | |
| "eval_samples_per_second": 220.911, | |
| "eval_steps_per_second": 27.632, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.3128, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 3.15e-05, | |
| "loss": 0.332, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.55e-05, | |
| "loss": 0.2701, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.89322191272052, | |
| "eval_f1": 0.9027425395674761, | |
| "eval_loss": 0.25026363134384155, | |
| "eval_precision": 0.9646269042086238, | |
| "eval_recall": 0.8483197093551317, | |
| "eval_runtime": 33.4359, | |
| "eval_samples_per_second": 225.476, | |
| "eval_steps_per_second": 28.203, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.9500000000000005e-05, | |
| "loss": 0.284, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.35e-05, | |
| "loss": 0.283, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.1908, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.9243931555909272, | |
| "eval_f1": 0.9354327140915271, | |
| "eval_loss": 0.2004704475402832, | |
| "eval_precision": 0.9333182640144665, | |
| "eval_recall": 0.9375567665758402, | |
| "eval_runtime": 33.8959, | |
| "eval_samples_per_second": 222.416, | |
| "eval_steps_per_second": 27.82, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.957142857142857e-05, | |
| "loss": 0.237, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.842857142857143e-05, | |
| "loss": 0.2298, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.728571428571429e-05, | |
| "loss": 0.2824, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.8917628332670116, | |
| "eval_f1": 0.9143397018685703, | |
| "eval_loss": 0.301138699054718, | |
| "eval_precision": 0.850253807106599, | |
| "eval_recall": 0.9888737511353315, | |
| "eval_runtime": 33.5038, | |
| "eval_samples_per_second": 225.019, | |
| "eval_steps_per_second": 28.146, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.6142857142857145e-05, | |
| "loss": 0.2522, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.1874, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.385714285714286e-05, | |
| "loss": 0.1865, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.9358005040456294, | |
| "eval_f1": 0.9457033879291002, | |
| "eval_loss": 0.16087646782398224, | |
| "eval_precision": 0.934589800443459, | |
| "eval_recall": 0.9570844686648501, | |
| "eval_runtime": 33.8925, | |
| "eval_samples_per_second": 222.439, | |
| "eval_steps_per_second": 27.823, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.271428571428572e-05, | |
| "loss": 0.2079, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.1571428571428575e-05, | |
| "loss": 0.2041, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.042857142857143e-05, | |
| "loss": 0.1723, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.9396471680594243, | |
| "eval_f1": 0.9485468732330657, | |
| "eval_loss": 0.16514933109283447, | |
| "eval_precision": 0.9448073890515882, | |
| "eval_recall": 0.952316076294278, | |
| "eval_runtime": 32.9214, | |
| "eval_samples_per_second": 229.0, | |
| "eval_steps_per_second": 28.644, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.928571428571429e-05, | |
| "loss": 0.1804, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.814285714285715e-05, | |
| "loss": 0.1687, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.7e-05, | |
| "loss": 0.2033, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.585714285714286e-05, | |
| "loss": 0.1858, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.9452181987000928, | |
| "eval_f1": 0.9521492295214923, | |
| "eval_loss": 0.15048551559448242, | |
| "eval_precision": 0.9720842204873432, | |
| "eval_recall": 0.9330154405086285, | |
| "eval_runtime": 32.8586, | |
| "eval_samples_per_second": 229.438, | |
| "eval_steps_per_second": 28.699, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.471428571428571e-05, | |
| "loss": 0.1741, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.357142857142857e-05, | |
| "loss": 0.1503, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 3.242857142857143e-05, | |
| "loss": 0.1293, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.9112614405093514, | |
| "eval_f1": 0.9184643510054844, | |
| "eval_loss": 0.25113266706466675, | |
| "eval_precision": 0.9913180741910024, | |
| "eval_recall": 0.8555858310626703, | |
| "eval_runtime": 33.3517, | |
| "eval_samples_per_second": 226.045, | |
| "eval_steps_per_second": 28.274, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.1285714285714285e-05, | |
| "loss": 0.2328, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3.0142857142857146e-05, | |
| "loss": 0.1614, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.9e-05, | |
| "loss": 0.1339, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.9495954370606181, | |
| "eval_f1": 0.9561909153792945, | |
| "eval_loss": 0.12891609966754913, | |
| "eval_precision": 0.9711943793911006, | |
| "eval_recall": 0.9416439600363307, | |
| "eval_runtime": 32.9115, | |
| "eval_samples_per_second": 229.069, | |
| "eval_steps_per_second": 28.653, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.785714285714286e-05, | |
| "loss": 0.1318, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.6714285714285715e-05, | |
| "loss": 0.1038, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.5571428571428572e-05, | |
| "loss": 0.1714, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.9599416368218596, | |
| "eval_f1": 0.9657052009993187, | |
| "eval_loss": 0.12271451950073242, | |
| "eval_precision": 0.9659245797364834, | |
| "eval_recall": 0.9654859218891917, | |
| "eval_runtime": 33.9975, | |
| "eval_samples_per_second": 221.752, | |
| "eval_steps_per_second": 27.737, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.442857142857143e-05, | |
| "loss": 0.1444, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 2.3285714285714287e-05, | |
| "loss": 0.1771, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.214285714285714e-05, | |
| "loss": 0.1438, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.954105319007826, | |
| "eval_f1": 0.9600369600369599, | |
| "eval_loss": 0.12011300027370453, | |
| "eval_precision": 0.9769628584861307, | |
| "eval_recall": 0.9436875567665758, | |
| "eval_runtime": 34.0522, | |
| "eval_samples_per_second": 221.395, | |
| "eval_steps_per_second": 27.693, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.1213, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.9857142857142856e-05, | |
| "loss": 0.1517, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.8714285714285714e-05, | |
| "loss": 0.132, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.9594110624751293, | |
| "eval_f1": 0.9649885583524028, | |
| "eval_loss": 0.11286678165197372, | |
| "eval_precision": 0.9725553505535055, | |
| "eval_recall": 0.9575386012715713, | |
| "eval_runtime": 33.5869, | |
| "eval_samples_per_second": 224.463, | |
| "eval_steps_per_second": 28.076, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.757142857142857e-05, | |
| "loss": 0.1399, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.642857142857143e-05, | |
| "loss": 0.1393, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.5285714285714286e-05, | |
| "loss": 0.123, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.9590131317150816, | |
| "eval_f1": 0.964802369290352, | |
| "eval_loss": 0.11320723593235016, | |
| "eval_precision": 0.968, | |
| "eval_recall": 0.9616257947320618, | |
| "eval_runtime": 37.7864, | |
| "eval_samples_per_second": 199.516, | |
| "eval_steps_per_second": 24.956, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.4142857142857143e-05, | |
| "loss": 0.1067, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 0.1252, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.1857142857142858e-05, | |
| "loss": 0.1513, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.9595437060618119, | |
| "eval_f1": 0.9653684569092766, | |
| "eval_loss": 0.11097515374422073, | |
| "eval_precision": 0.9654780831251419, | |
| "eval_recall": 0.965258855585831, | |
| "eval_runtime": 37.8195, | |
| "eval_samples_per_second": 199.342, | |
| "eval_steps_per_second": 24.934, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 0.1322, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 9.571428571428572e-06, | |
| "loss": 0.1199, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.428571428571429e-06, | |
| "loss": 0.1171, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.285714285714286e-06, | |
| "loss": 0.1583, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.9600742804085423, | |
| "eval_f1": 0.965643191416505, | |
| "eval_loss": 0.10954569280147552, | |
| "eval_precision": 0.970851503327978, | |
| "eval_recall": 0.9604904632152589, | |
| "eval_runtime": 37.3506, | |
| "eval_samples_per_second": 201.844, | |
| "eval_steps_per_second": 25.247, | |
| "step": 400 | |
| } | |
| ], | |
| "max_steps": 450, | |
| "num_train_epochs": 1, | |
| "total_flos": 4354487966208000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |