{ "best_metric": 0.8116599606198099, "best_model_checkpoint": "final_models/structroberta_sx2_final/finetune/qqp/checkpoint-6800", "epoch": 3.5467980295566504, "global_step": 7200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "eval_accuracy": 0.7086541056632996, "eval_f1": 0.614392596967907, "eval_loss": 0.5525585412979126, "eval_mcc": 0.3913768984140464, "eval_runtime": 56.5108, "eval_samples_per_second": 475.82, "eval_steps_per_second": 59.493, "step": 400 }, { "epoch": 0.25, "learning_rate": 4.876847290640394e-05, "loss": 0.5844, "step": 500 }, { "epoch": 0.39, "eval_accuracy": 0.7449514865875244, "eval_f1": 0.6675714978187106, "eval_loss": 0.5103132724761963, "eval_mcc": 0.46936219476259444, "eval_runtime": 56.7709, "eval_samples_per_second": 473.641, "eval_steps_per_second": 59.22, "step": 800 }, { "epoch": 0.49, "learning_rate": 4.753694581280788e-05, "loss": 0.5275, "step": 1000 }, { "epoch": 0.59, "eval_accuracy": 0.7642530202865601, "eval_f1": 0.7277646553575263, "eval_loss": 0.484948068857193, "eval_mcc": 0.5208867129555503, "eval_runtime": 56.4156, "eval_samples_per_second": 476.624, "eval_steps_per_second": 59.593, "step": 1200 }, { "epoch": 0.74, "learning_rate": 4.630541871921182e-05, "loss": 0.4894, "step": 1500 }, { "epoch": 0.79, "eval_accuracy": 0.7876455187797546, "eval_f1": 0.7582966474771418, "eval_loss": 0.44241347908973694, "eval_mcc": 0.5712551968689948, "eval_runtime": 56.6157, "eval_samples_per_second": 474.939, "eval_steps_per_second": 59.383, "step": 1600 }, { "epoch": 0.99, "learning_rate": 4.507389162561577e-05, "loss": 0.4502, "step": 2000 }, { "epoch": 0.99, "eval_accuracy": 0.7965710759162903, "eval_f1": 0.7511147511147512, "eval_loss": 0.42830032110214233, "eval_mcc": 0.5800105484140912, "eval_runtime": 56.3774, "eval_samples_per_second": 476.947, "eval_steps_per_second": 59.634, "step": 2000 }, { "epoch": 1.18, "eval_accuracy": 0.8038231134414673, "eval_f1": 0.7768329314210771, "eval_loss": 0.4215456247329712, "eval_mcc": 0.6042518893599846, "eval_runtime": 56.4383, "eval_samples_per_second": 476.431, "eval_steps_per_second": 59.569, "step": 2400 }, { "epoch": 1.23, "learning_rate": 4.384236453201971e-05, "loss": 0.3907, "step": 2500 }, { "epoch": 1.38, "eval_accuracy": 0.8115957975387573, "eval_f1": 0.7812230091552944, "eval_loss": 0.4063948690891266, "eval_mcc": 0.6164914435664559, "eval_runtime": 56.8276, "eval_samples_per_second": 473.168, "eval_steps_per_second": 59.161, "step": 2800 }, { "epoch": 1.48, "learning_rate": 4.261083743842365e-05, "loss": 0.377, "step": 3000 }, { "epoch": 1.58, "eval_accuracy": 0.8172858953475952, "eval_f1": 0.7783442364087525, "eval_loss": 0.40149447321891785, "eval_mcc": 0.6233909091797747, "eval_runtime": 56.8714, "eval_samples_per_second": 472.804, "eval_steps_per_second": 59.116, "step": 3200 }, { "epoch": 1.72, "learning_rate": 4.1379310344827587e-05, "loss": 0.3689, "step": 3500 }, { "epoch": 1.77, "eval_accuracy": 0.8182156085968018, "eval_f1": 0.7741219963031424, "eval_loss": 0.3932678699493408, "eval_mcc": 0.6242644613929623, "eval_runtime": 56.7004, "eval_samples_per_second": 474.23, "eval_steps_per_second": 59.294, "step": 3600 }, { "epoch": 1.97, "learning_rate": 4.014778325123153e-05, "loss": 0.3635, "step": 4000 }, { "epoch": 1.97, "eval_accuracy": 0.8267692923545837, "eval_f1": 0.7970900853807283, "eval_loss": 0.3838008642196655, "eval_mcc": 0.6462575029978364, "eval_runtime": 56.8764, "eval_samples_per_second": 472.762, "eval_steps_per_second": 59.111, "step": 4000 }, { "epoch": 2.17, "eval_accuracy": 0.8200007677078247, "eval_f1": 0.7987860646877858, "eval_loss": 0.41626259684562683, "eval_mcc": 0.6408222382018317, "eval_runtime": 56.8313, "eval_samples_per_second": 473.137, "eval_steps_per_second": 59.158, "step": 4400 }, { "epoch": 2.22, "learning_rate": 3.891625615763547e-05, "loss": 0.2955, "step": 4500 }, { "epoch": 2.36, "eval_accuracy": 0.831083357334137, "eval_f1": 0.7944238254729791, "eval_loss": 0.4071265459060669, "eval_mcc": 0.651700327408142, "eval_runtime": 56.5102, "eval_samples_per_second": 475.825, "eval_steps_per_second": 59.494, "step": 4800 }, { "epoch": 2.46, "learning_rate": 3.768472906403941e-05, "loss": 0.2929, "step": 5000 }, { "epoch": 2.56, "eval_accuracy": 0.8340585231781006, "eval_f1": 0.8102407076635196, "eval_loss": 0.39103031158447266, "eval_mcc": 0.6647668434223991, "eval_runtime": 56.9146, "eval_samples_per_second": 472.445, "eval_steps_per_second": 59.071, "step": 5200 }, { "epoch": 2.71, "learning_rate": 3.645320197044335e-05, "loss": 0.2895, "step": 5500 }, { "epoch": 2.76, "eval_accuracy": 0.8280337452888489, "eval_f1": 0.8053544367738678, "eval_loss": 0.39237213134765625, "eval_mcc": 0.6544461740844153, "eval_runtime": 56.5155, "eval_samples_per_second": 475.781, "eval_steps_per_second": 59.488, "step": 5600 }, { "epoch": 2.96, "learning_rate": 3.522167487684729e-05, "loss": 0.2888, "step": 6000 }, { "epoch": 2.96, "eval_accuracy": 0.8361039757728577, "eval_f1": 0.8108827189632236, "eval_loss": 0.3707721531391144, "eval_mcc": 0.6674362482296753, "eval_runtime": 56.3098, "eval_samples_per_second": 477.519, "eval_steps_per_second": 59.705, "step": 6000 }, { "epoch": 3.15, "eval_accuracy": 0.8345048427581787, "eval_f1": 0.8052857267874332, "eval_loss": 0.4219077229499817, "eval_mcc": 0.6615341824981482, "eval_runtime": 56.4491, "eval_samples_per_second": 476.34, "eval_steps_per_second": 59.558, "step": 6400 }, { "epoch": 3.2, "learning_rate": 3.399014778325123e-05, "loss": 0.2296, "step": 6500 }, { "epoch": 3.35, "eval_accuracy": 0.8363643288612366, "eval_f1": 0.8116599606198099, "eval_loss": 0.41765111684799194, "eval_mcc": 0.6683672120496021, "eval_runtime": 56.4613, "eval_samples_per_second": 476.237, "eval_steps_per_second": 59.545, "step": 6800 }, { "epoch": 3.45, "learning_rate": 3.275862068965517e-05, "loss": 0.2251, "step": 7000 }, { "epoch": 3.55, "eval_accuracy": 0.8376287817955017, "eval_f1": 0.8089612321694233, "eval_loss": 0.4220374524593353, "eval_mcc": 0.6679266160668449, "eval_runtime": 56.3914, "eval_samples_per_second": 476.828, "eval_steps_per_second": 59.619, "step": 7200 }, { "epoch": 3.55, "step": 7200, "total_flos": 7.91177802915502e+16, "train_loss": 0.36522457705603706, "train_runtime": 4971.062, "train_samples_per_second": 489.831, "train_steps_per_second": 4.084 } ], "max_steps": 20300, "num_train_epochs": 10, "total_flos": 7.91177802915502e+16, "trial_name": null, "trial_params": null }