structroberta_sx2_final / finetune /qqp /trainer_state.json
Omar
update_resylts
35ea748
{
"best_metric": 0.8116599606198099,
"best_model_checkpoint": "final_models/structroberta_sx2_final/finetune/qqp/checkpoint-6800",
"epoch": 3.5467980295566504,
"global_step": 7200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"eval_accuracy": 0.7086541056632996,
"eval_f1": 0.614392596967907,
"eval_loss": 0.5525585412979126,
"eval_mcc": 0.3913768984140464,
"eval_runtime": 56.5108,
"eval_samples_per_second": 475.82,
"eval_steps_per_second": 59.493,
"step": 400
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.5844,
"step": 500
},
{
"epoch": 0.39,
"eval_accuracy": 0.7449514865875244,
"eval_f1": 0.6675714978187106,
"eval_loss": 0.5103132724761963,
"eval_mcc": 0.46936219476259444,
"eval_runtime": 56.7709,
"eval_samples_per_second": 473.641,
"eval_steps_per_second": 59.22,
"step": 800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.5275,
"step": 1000
},
{
"epoch": 0.59,
"eval_accuracy": 0.7642530202865601,
"eval_f1": 0.7277646553575263,
"eval_loss": 0.484948068857193,
"eval_mcc": 0.5208867129555503,
"eval_runtime": 56.4156,
"eval_samples_per_second": 476.624,
"eval_steps_per_second": 59.593,
"step": 1200
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.4894,
"step": 1500
},
{
"epoch": 0.79,
"eval_accuracy": 0.7876455187797546,
"eval_f1": 0.7582966474771418,
"eval_loss": 0.44241347908973694,
"eval_mcc": 0.5712551968689948,
"eval_runtime": 56.6157,
"eval_samples_per_second": 474.939,
"eval_steps_per_second": 59.383,
"step": 1600
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.4502,
"step": 2000
},
{
"epoch": 0.99,
"eval_accuracy": 0.7965710759162903,
"eval_f1": 0.7511147511147512,
"eval_loss": 0.42830032110214233,
"eval_mcc": 0.5800105484140912,
"eval_runtime": 56.3774,
"eval_samples_per_second": 476.947,
"eval_steps_per_second": 59.634,
"step": 2000
},
{
"epoch": 1.18,
"eval_accuracy": 0.8038231134414673,
"eval_f1": 0.7768329314210771,
"eval_loss": 0.4215456247329712,
"eval_mcc": 0.6042518893599846,
"eval_runtime": 56.4383,
"eval_samples_per_second": 476.431,
"eval_steps_per_second": 59.569,
"step": 2400
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.3907,
"step": 2500
},
{
"epoch": 1.38,
"eval_accuracy": 0.8115957975387573,
"eval_f1": 0.7812230091552944,
"eval_loss": 0.4063948690891266,
"eval_mcc": 0.6164914435664559,
"eval_runtime": 56.8276,
"eval_samples_per_second": 473.168,
"eval_steps_per_second": 59.161,
"step": 2800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.377,
"step": 3000
},
{
"epoch": 1.58,
"eval_accuracy": 0.8172858953475952,
"eval_f1": 0.7783442364087525,
"eval_loss": 0.40149447321891785,
"eval_mcc": 0.6233909091797747,
"eval_runtime": 56.8714,
"eval_samples_per_second": 472.804,
"eval_steps_per_second": 59.116,
"step": 3200
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.3689,
"step": 3500
},
{
"epoch": 1.77,
"eval_accuracy": 0.8182156085968018,
"eval_f1": 0.7741219963031424,
"eval_loss": 0.3932678699493408,
"eval_mcc": 0.6242644613929623,
"eval_runtime": 56.7004,
"eval_samples_per_second": 474.23,
"eval_steps_per_second": 59.294,
"step": 3600
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.3635,
"step": 4000
},
{
"epoch": 1.97,
"eval_accuracy": 0.8267692923545837,
"eval_f1": 0.7970900853807283,
"eval_loss": 0.3838008642196655,
"eval_mcc": 0.6462575029978364,
"eval_runtime": 56.8764,
"eval_samples_per_second": 472.762,
"eval_steps_per_second": 59.111,
"step": 4000
},
{
"epoch": 2.17,
"eval_accuracy": 0.8200007677078247,
"eval_f1": 0.7987860646877858,
"eval_loss": 0.41626259684562683,
"eval_mcc": 0.6408222382018317,
"eval_runtime": 56.8313,
"eval_samples_per_second": 473.137,
"eval_steps_per_second": 59.158,
"step": 4400
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.2955,
"step": 4500
},
{
"epoch": 2.36,
"eval_accuracy": 0.831083357334137,
"eval_f1": 0.7944238254729791,
"eval_loss": 0.4071265459060669,
"eval_mcc": 0.651700327408142,
"eval_runtime": 56.5102,
"eval_samples_per_second": 475.825,
"eval_steps_per_second": 59.494,
"step": 4800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.2929,
"step": 5000
},
{
"epoch": 2.56,
"eval_accuracy": 0.8340585231781006,
"eval_f1": 0.8102407076635196,
"eval_loss": 0.39103031158447266,
"eval_mcc": 0.6647668434223991,
"eval_runtime": 56.9146,
"eval_samples_per_second": 472.445,
"eval_steps_per_second": 59.071,
"step": 5200
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.2895,
"step": 5500
},
{
"epoch": 2.76,
"eval_accuracy": 0.8280337452888489,
"eval_f1": 0.8053544367738678,
"eval_loss": 0.39237213134765625,
"eval_mcc": 0.6544461740844153,
"eval_runtime": 56.5155,
"eval_samples_per_second": 475.781,
"eval_steps_per_second": 59.488,
"step": 5600
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.2888,
"step": 6000
},
{
"epoch": 2.96,
"eval_accuracy": 0.8361039757728577,
"eval_f1": 0.8108827189632236,
"eval_loss": 0.3707721531391144,
"eval_mcc": 0.6674362482296753,
"eval_runtime": 56.3098,
"eval_samples_per_second": 477.519,
"eval_steps_per_second": 59.705,
"step": 6000
},
{
"epoch": 3.15,
"eval_accuracy": 0.8345048427581787,
"eval_f1": 0.8052857267874332,
"eval_loss": 0.4219077229499817,
"eval_mcc": 0.6615341824981482,
"eval_runtime": 56.4491,
"eval_samples_per_second": 476.34,
"eval_steps_per_second": 59.558,
"step": 6400
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.2296,
"step": 6500
},
{
"epoch": 3.35,
"eval_accuracy": 0.8363643288612366,
"eval_f1": 0.8116599606198099,
"eval_loss": 0.41765111684799194,
"eval_mcc": 0.6683672120496021,
"eval_runtime": 56.4613,
"eval_samples_per_second": 476.237,
"eval_steps_per_second": 59.545,
"step": 6800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.2251,
"step": 7000
},
{
"epoch": 3.55,
"eval_accuracy": 0.8376287817955017,
"eval_f1": 0.8089612321694233,
"eval_loss": 0.4220374524593353,
"eval_mcc": 0.6679266160668449,
"eval_runtime": 56.3914,
"eval_samples_per_second": 476.828,
"eval_steps_per_second": 59.619,
"step": 7200
},
{
"epoch": 3.55,
"step": 7200,
"total_flos": 7.91177802915502e+16,
"train_loss": 0.36522457705603706,
"train_runtime": 4971.062,
"train_samples_per_second": 489.831,
"train_steps_per_second": 4.084
}
],
"max_steps": 20300,
"num_train_epochs": 10,
"total_flos": 7.91177802915502e+16,
"trial_name": null,
"trial_params": null
}