| { | |
| "best_metric": 0.8116599606198099, | |
| "best_model_checkpoint": "final_models/structroberta_sx2_final/finetune/qqp/checkpoint-6800", | |
| "epoch": 3.5467980295566504, | |
| "global_step": 7200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.7086541056632996, | |
| "eval_f1": 0.614392596967907, | |
| "eval_loss": 0.5525585412979126, | |
| "eval_mcc": 0.3913768984140464, | |
| "eval_runtime": 56.5108, | |
| "eval_samples_per_second": 475.82, | |
| "eval_steps_per_second": 59.493, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.876847290640394e-05, | |
| "loss": 0.5844, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.7449514865875244, | |
| "eval_f1": 0.6675714978187106, | |
| "eval_loss": 0.5103132724761963, | |
| "eval_mcc": 0.46936219476259444, | |
| "eval_runtime": 56.7709, | |
| "eval_samples_per_second": 473.641, | |
| "eval_steps_per_second": 59.22, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.753694581280788e-05, | |
| "loss": 0.5275, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.7642530202865601, | |
| "eval_f1": 0.7277646553575263, | |
| "eval_loss": 0.484948068857193, | |
| "eval_mcc": 0.5208867129555503, | |
| "eval_runtime": 56.4156, | |
| "eval_samples_per_second": 476.624, | |
| "eval_steps_per_second": 59.593, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.630541871921182e-05, | |
| "loss": 0.4894, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.7876455187797546, | |
| "eval_f1": 0.7582966474771418, | |
| "eval_loss": 0.44241347908973694, | |
| "eval_mcc": 0.5712551968689948, | |
| "eval_runtime": 56.6157, | |
| "eval_samples_per_second": 474.939, | |
| "eval_steps_per_second": 59.383, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.507389162561577e-05, | |
| "loss": 0.4502, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.7965710759162903, | |
| "eval_f1": 0.7511147511147512, | |
| "eval_loss": 0.42830032110214233, | |
| "eval_mcc": 0.5800105484140912, | |
| "eval_runtime": 56.3774, | |
| "eval_samples_per_second": 476.947, | |
| "eval_steps_per_second": 59.634, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_accuracy": 0.8038231134414673, | |
| "eval_f1": 0.7768329314210771, | |
| "eval_loss": 0.4215456247329712, | |
| "eval_mcc": 0.6042518893599846, | |
| "eval_runtime": 56.4383, | |
| "eval_samples_per_second": 476.431, | |
| "eval_steps_per_second": 59.569, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.384236453201971e-05, | |
| "loss": 0.3907, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_accuracy": 0.8115957975387573, | |
| "eval_f1": 0.7812230091552944, | |
| "eval_loss": 0.4063948690891266, | |
| "eval_mcc": 0.6164914435664559, | |
| "eval_runtime": 56.8276, | |
| "eval_samples_per_second": 473.168, | |
| "eval_steps_per_second": 59.161, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.261083743842365e-05, | |
| "loss": 0.377, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_accuracy": 0.8172858953475952, | |
| "eval_f1": 0.7783442364087525, | |
| "eval_loss": 0.40149447321891785, | |
| "eval_mcc": 0.6233909091797747, | |
| "eval_runtime": 56.8714, | |
| "eval_samples_per_second": 472.804, | |
| "eval_steps_per_second": 59.116, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.1379310344827587e-05, | |
| "loss": 0.3689, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_accuracy": 0.8182156085968018, | |
| "eval_f1": 0.7741219963031424, | |
| "eval_loss": 0.3932678699493408, | |
| "eval_mcc": 0.6242644613929623, | |
| "eval_runtime": 56.7004, | |
| "eval_samples_per_second": 474.23, | |
| "eval_steps_per_second": 59.294, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.014778325123153e-05, | |
| "loss": 0.3635, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.8267692923545837, | |
| "eval_f1": 0.7970900853807283, | |
| "eval_loss": 0.3838008642196655, | |
| "eval_mcc": 0.6462575029978364, | |
| "eval_runtime": 56.8764, | |
| "eval_samples_per_second": 472.762, | |
| "eval_steps_per_second": 59.111, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_accuracy": 0.8200007677078247, | |
| "eval_f1": 0.7987860646877858, | |
| "eval_loss": 0.41626259684562683, | |
| "eval_mcc": 0.6408222382018317, | |
| "eval_runtime": 56.8313, | |
| "eval_samples_per_second": 473.137, | |
| "eval_steps_per_second": 59.158, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.891625615763547e-05, | |
| "loss": 0.2955, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 0.831083357334137, | |
| "eval_f1": 0.7944238254729791, | |
| "eval_loss": 0.4071265459060669, | |
| "eval_mcc": 0.651700327408142, | |
| "eval_runtime": 56.5102, | |
| "eval_samples_per_second": 475.825, | |
| "eval_steps_per_second": 59.494, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.768472906403941e-05, | |
| "loss": 0.2929, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 0.8340585231781006, | |
| "eval_f1": 0.8102407076635196, | |
| "eval_loss": 0.39103031158447266, | |
| "eval_mcc": 0.6647668434223991, | |
| "eval_runtime": 56.9146, | |
| "eval_samples_per_second": 472.445, | |
| "eval_steps_per_second": 59.071, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.645320197044335e-05, | |
| "loss": 0.2895, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_accuracy": 0.8280337452888489, | |
| "eval_f1": 0.8053544367738678, | |
| "eval_loss": 0.39237213134765625, | |
| "eval_mcc": 0.6544461740844153, | |
| "eval_runtime": 56.5155, | |
| "eval_samples_per_second": 475.781, | |
| "eval_steps_per_second": 59.488, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 3.522167487684729e-05, | |
| "loss": 0.2888, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_accuracy": 0.8361039757728577, | |
| "eval_f1": 0.8108827189632236, | |
| "eval_loss": 0.3707721531391144, | |
| "eval_mcc": 0.6674362482296753, | |
| "eval_runtime": 56.3098, | |
| "eval_samples_per_second": 477.519, | |
| "eval_steps_per_second": 59.705, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_accuracy": 0.8345048427581787, | |
| "eval_f1": 0.8052857267874332, | |
| "eval_loss": 0.4219077229499817, | |
| "eval_mcc": 0.6615341824981482, | |
| "eval_runtime": 56.4491, | |
| "eval_samples_per_second": 476.34, | |
| "eval_steps_per_second": 59.558, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.399014778325123e-05, | |
| "loss": 0.2296, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "eval_accuracy": 0.8363643288612366, | |
| "eval_f1": 0.8116599606198099, | |
| "eval_loss": 0.41765111684799194, | |
| "eval_mcc": 0.6683672120496021, | |
| "eval_runtime": 56.4613, | |
| "eval_samples_per_second": 476.237, | |
| "eval_steps_per_second": 59.545, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.275862068965517e-05, | |
| "loss": 0.2251, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_accuracy": 0.8376287817955017, | |
| "eval_f1": 0.8089612321694233, | |
| "eval_loss": 0.4220374524593353, | |
| "eval_mcc": 0.6679266160668449, | |
| "eval_runtime": 56.3914, | |
| "eval_samples_per_second": 476.828, | |
| "eval_steps_per_second": 59.619, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "step": 7200, | |
| "total_flos": 7.91177802915502e+16, | |
| "train_loss": 0.36522457705603706, | |
| "train_runtime": 4971.062, | |
| "train_samples_per_second": 489.831, | |
| "train_steps_per_second": 4.084 | |
| } | |
| ], | |
| "max_steps": 20300, | |
| "num_train_epochs": 10, | |
| "total_flos": 7.91177802915502e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |