structroberta_s1_final / finetune /qqp /trainer_state.json
Omar
update
3859fdb
{
"best_metric": 0.8170573479802784,
"best_model_checkpoint": "final_models/glue_models/structroberta_s1_final//finetune/qqp/checkpoint-7600",
"epoch": 4.334975369458128,
"global_step": 8800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"eval_accuracy": 0.7290713787078857,
"eval_f1": 0.6721569686332748,
"eval_loss": 0.5238546133041382,
"eval_mcc": 0.4416121712965625,
"eval_runtime": 52.7834,
"eval_samples_per_second": 509.422,
"eval_steps_per_second": 63.694,
"step": 400
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.5638,
"step": 500
},
{
"epoch": 0.39,
"eval_accuracy": 0.7639183402061462,
"eval_f1": 0.7096597145993414,
"eval_loss": 0.47765403985977173,
"eval_mcc": 0.5119658214079615,
"eval_runtime": 52.587,
"eval_samples_per_second": 511.324,
"eval_steps_per_second": 63.932,
"step": 800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.5001,
"step": 1000
},
{
"epoch": 0.59,
"eval_accuracy": 0.776934802532196,
"eval_f1": 0.7363052844456167,
"eval_loss": 0.4574498236179352,
"eval_mcc": 0.543065767811832,
"eval_runtime": 52.5687,
"eval_samples_per_second": 511.502,
"eval_steps_per_second": 63.954,
"step": 1200
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.4661,
"step": 1500
},
{
"epoch": 0.79,
"eval_accuracy": 0.7959388494491577,
"eval_f1": 0.7507834854884862,
"eval_loss": 0.43747639656066895,
"eval_mcc": 0.5788217732475972,
"eval_runtime": 52.5537,
"eval_samples_per_second": 511.648,
"eval_steps_per_second": 63.973,
"step": 1600
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.4341,
"step": 2000
},
{
"epoch": 0.99,
"eval_accuracy": 0.7994347214698792,
"eval_f1": 0.7487303732003914,
"eval_loss": 0.42687392234802246,
"eval_mcc": 0.5849102854785966,
"eval_runtime": 52.6741,
"eval_samples_per_second": 510.479,
"eval_steps_per_second": 63.826,
"step": 2000
},
{
"epoch": 1.18,
"eval_accuracy": 0.8122280240058899,
"eval_f1": 0.7765237020316026,
"eval_loss": 0.40614333748817444,
"eval_mcc": 0.6146192229476998,
"eval_runtime": 52.5916,
"eval_samples_per_second": 511.279,
"eval_steps_per_second": 63.927,
"step": 2400
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.3672,
"step": 2500
},
{
"epoch": 1.38,
"eval_accuracy": 0.8153520226478577,
"eval_f1": 0.7860559314000086,
"eval_loss": 0.4019757807254791,
"eval_mcc": 0.6244919264192823,
"eval_runtime": 52.6013,
"eval_samples_per_second": 511.185,
"eval_steps_per_second": 63.915,
"step": 2800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.356,
"step": 3000
},
{
"epoch": 1.58,
"eval_accuracy": 0.8156867027282715,
"eval_f1": 0.7614555256064689,
"eval_loss": 0.4167090058326721,
"eval_mcc": 0.6195064450559568,
"eval_runtime": 52.6041,
"eval_samples_per_second": 511.158,
"eval_steps_per_second": 63.911,
"step": 3200
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.351,
"step": 3500
},
{
"epoch": 1.77,
"eval_accuracy": 0.8191825747489929,
"eval_f1": 0.7678349727819692,
"eval_loss": 0.39588743448257446,
"eval_mcc": 0.62655953398824,
"eval_runtime": 52.5568,
"eval_samples_per_second": 511.617,
"eval_steps_per_second": 63.969,
"step": 3600
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.3459,
"step": 4000
},
{
"epoch": 1.97,
"eval_accuracy": 0.8300420045852661,
"eval_f1": 0.7979842631067102,
"eval_loss": 0.37582555413246155,
"eval_mcc": 0.6513038205750896,
"eval_runtime": 52.5391,
"eval_samples_per_second": 511.79,
"eval_steps_per_second": 63.99,
"step": 4000
},
{
"epoch": 2.17,
"eval_accuracy": 0.8305255174636841,
"eval_f1": 0.797601598934044,
"eval_loss": 0.41245537996292114,
"eval_mcc": 0.6518572239989366,
"eval_runtime": 52.5395,
"eval_samples_per_second": 511.786,
"eval_steps_per_second": 63.99,
"step": 4400
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.2663,
"step": 4500
},
{
"epoch": 2.36,
"eval_accuracy": 0.8285916447639465,
"eval_f1": 0.7859961925987835,
"eval_loss": 0.41441312432289124,
"eval_mcc": 0.6458275621192345,
"eval_runtime": 52.5917,
"eval_samples_per_second": 511.278,
"eval_steps_per_second": 63.926,
"step": 4800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.2662,
"step": 5000
},
{
"epoch": 2.56,
"eval_accuracy": 0.8343932628631592,
"eval_f1": 0.801231977860108,
"eval_loss": 0.39984196424484253,
"eval_mcc": 0.6594012405759717,
"eval_runtime": 52.6028,
"eval_samples_per_second": 511.17,
"eval_steps_per_second": 63.913,
"step": 5200
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.2654,
"step": 5500
},
{
"epoch": 2.76,
"eval_accuracy": 0.83937668800354,
"eval_f1": 0.8067475054812295,
"eval_loss": 0.38627538084983826,
"eval_mcc": 0.6694848416701843,
"eval_runtime": 52.5399,
"eval_samples_per_second": 511.782,
"eval_steps_per_second": 63.989,
"step": 5600
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.2655,
"step": 6000
},
{
"epoch": 2.96,
"eval_accuracy": 0.8388932347297668,
"eval_f1": 0.8094986807387862,
"eval_loss": 0.3793533146381378,
"eval_mcc": 0.6699724511193045,
"eval_runtime": 52.5795,
"eval_samples_per_second": 511.397,
"eval_steps_per_second": 63.941,
"step": 6000
},
{
"epoch": 3.15,
"eval_accuracy": 0.8402320742607117,
"eval_f1": 0.808504947847018,
"eval_loss": 0.43779271841049194,
"eval_mcc": 0.6715154800036364,
"eval_runtime": 52.5715,
"eval_samples_per_second": 511.474,
"eval_steps_per_second": 63.951,
"step": 6400
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.1988,
"step": 6500
},
{
"epoch": 3.35,
"eval_accuracy": 0.8417196869850159,
"eval_f1": 0.8166149603584971,
"eval_loss": 0.4331710636615753,
"eval_mcc": 0.6782698139933768,
"eval_runtime": 52.5549,
"eval_samples_per_second": 511.636,
"eval_steps_per_second": 63.971,
"step": 6800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.1907,
"step": 7000
},
{
"epoch": 3.55,
"eval_accuracy": 0.8396742343902588,
"eval_f1": 0.8107965766951941,
"eval_loss": 0.4265030324459076,
"eval_mcc": 0.671779695965898,
"eval_runtime": 52.6448,
"eval_samples_per_second": 510.763,
"eval_steps_per_second": 63.862,
"step": 7200
},
{
"epoch": 3.69,
"learning_rate": 3.152709359605912e-05,
"loss": 0.1914,
"step": 7500
},
{
"epoch": 3.74,
"eval_accuracy": 0.8426865935325623,
"eval_f1": 0.8170573479802784,
"eval_loss": 0.41488954424858093,
"eval_mcc": 0.6797207290552535,
"eval_runtime": 52.6266,
"eval_samples_per_second": 510.939,
"eval_steps_per_second": 63.884,
"step": 7600
},
{
"epoch": 3.94,
"learning_rate": 3.0295566502463057e-05,
"loss": 0.1957,
"step": 8000
},
{
"epoch": 3.94,
"eval_accuracy": 0.8425378203392029,
"eval_f1": 0.8142167617376042,
"eval_loss": 0.428946852684021,
"eval_mcc": 0.6776669911036672,
"eval_runtime": 52.5778,
"eval_samples_per_second": 511.414,
"eval_steps_per_second": 63.943,
"step": 8000
},
{
"epoch": 4.14,
"eval_accuracy": 0.8410874605178833,
"eval_f1": 0.8148212351029251,
"eval_loss": 0.5232462286949158,
"eval_mcc": 0.6761827212445845,
"eval_runtime": 52.5692,
"eval_samples_per_second": 511.498,
"eval_steps_per_second": 63.954,
"step": 8400
},
{
"epoch": 4.19,
"learning_rate": 2.9064039408866993e-05,
"loss": 0.1418,
"step": 8500
},
{
"epoch": 4.33,
"eval_accuracy": 0.8425378203392029,
"eval_f1": 0.8093651508329581,
"eval_loss": 0.4935559630393982,
"eval_mcc": 0.6756223314865125,
"eval_runtime": 52.5582,
"eval_samples_per_second": 511.604,
"eval_steps_per_second": 63.967,
"step": 8800
},
{
"epoch": 4.33,
"step": 8800,
"total_flos": 8.808821834464051e+16,
"train_loss": 0.30929578044197775,
"train_runtime": 6074.005,
"train_samples_per_second": 400.885,
"train_steps_per_second": 3.342
}
],
"max_steps": 20300,
"num_train_epochs": 10,
"total_flos": 8.808821834464051e+16,
"trial_name": null,
"trial_params": null
}