structroberta_sx_final / finetune /sst2 /trainer_state.json
Omar
update
88cccb3
{
"best_metric": 0.8626692456479691,
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/sst2/checkpoint-3200",
"epoch": 6.582278481012658,
"global_step": 5200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.25,
"eval_accuracy": 0.7539370059967041,
"eval_f1": 0.7803163444639719,
"eval_loss": 0.5231051445007324,
"eval_mcc": 0.5247106973194531,
"eval_runtime": 1.0985,
"eval_samples_per_second": 462.456,
"eval_steps_per_second": 58.262,
"step": 200
},
{
"epoch": 0.51,
"eval_accuracy": 0.8031495809555054,
"eval_f1": 0.8148148148148149,
"eval_loss": 0.40333232283592224,
"eval_mcc": 0.6120025576737335,
"eval_runtime": 1.1024,
"eval_samples_per_second": 460.833,
"eval_steps_per_second": 58.058,
"step": 400
},
{
"epoch": 0.63,
"learning_rate": 4.683544303797468e-05,
"loss": 0.4182,
"step": 500
},
{
"epoch": 0.76,
"eval_accuracy": 0.8444882035255432,
"eval_f1": 0.8435643564356434,
"eval_loss": 0.38440778851509094,
"eval_mcc": 0.6889720757983125,
"eval_runtime": 1.1045,
"eval_samples_per_second": 459.935,
"eval_steps_per_second": 57.945,
"step": 600
},
{
"epoch": 1.01,
"eval_accuracy": 0.834645688533783,
"eval_f1": 0.8372093023255814,
"eval_loss": 0.43441784381866455,
"eval_mcc": 0.6698566624509036,
"eval_runtime": 1.1061,
"eval_samples_per_second": 459.26,
"eval_steps_per_second": 57.86,
"step": 800
},
{
"epoch": 1.27,
"learning_rate": 4.367088607594937e-05,
"loss": 0.2549,
"step": 1000
},
{
"epoch": 1.27,
"eval_accuracy": 0.8385826945304871,
"eval_f1": 0.8404669260700389,
"eval_loss": 0.4155767261981964,
"eval_mcc": 0.6775364473446112,
"eval_runtime": 1.0923,
"eval_samples_per_second": 465.066,
"eval_steps_per_second": 58.591,
"step": 1000
},
{
"epoch": 1.52,
"eval_accuracy": 0.834645688533783,
"eval_f1": 0.8438661710037175,
"eval_loss": 0.5223256349563599,
"eval_mcc": 0.6748218010834892,
"eval_runtime": 1.0975,
"eval_samples_per_second": 462.872,
"eval_steps_per_second": 58.315,
"step": 1200
},
{
"epoch": 1.77,
"eval_accuracy": 0.8503937125205994,
"eval_f1": 0.8515625,
"eval_loss": 0.47026267647743225,
"eval_mcc": 0.7010061668834338,
"eval_runtime": 1.0838,
"eval_samples_per_second": 468.727,
"eval_steps_per_second": 59.052,
"step": 1400
},
{
"epoch": 1.9,
"learning_rate": 4.050632911392405e-05,
"loss": 0.1967,
"step": 1500
},
{
"epoch": 2.03,
"eval_accuracy": 0.8582677245140076,
"eval_f1": 0.8582677165354331,
"eval_loss": 0.4532265067100525,
"eval_mcc": 0.7165620398356971,
"eval_runtime": 1.0715,
"eval_samples_per_second": 474.092,
"eval_steps_per_second": 59.728,
"step": 1600
},
{
"epoch": 2.28,
"eval_accuracy": 0.8307086825370789,
"eval_f1": 0.8333333333333334,
"eval_loss": 0.5167694687843323,
"eval_mcc": 0.6619776380575382,
"eval_runtime": 1.0813,
"eval_samples_per_second": 469.785,
"eval_steps_per_second": 59.186,
"step": 1800
},
{
"epoch": 2.53,
"learning_rate": 3.7341772151898736e-05,
"loss": 0.1348,
"step": 2000
},
{
"epoch": 2.53,
"eval_accuracy": 0.8385826945304871,
"eval_f1": 0.8498168498168498,
"eval_loss": 0.5124529600143433,
"eval_mcc": 0.6859086686135547,
"eval_runtime": 1.0858,
"eval_samples_per_second": 467.876,
"eval_steps_per_second": 58.945,
"step": 2000
},
{
"epoch": 2.78,
"eval_accuracy": 0.8425197005271912,
"eval_f1": 0.8387096774193549,
"eval_loss": 0.45541733503341675,
"eval_mcc": 0.685517324398053,
"eval_runtime": 1.0842,
"eval_samples_per_second": 468.534,
"eval_steps_per_second": 59.028,
"step": 2200
},
{
"epoch": 3.04,
"eval_accuracy": 0.8307086825370789,
"eval_f1": 0.8170212765957447,
"eval_loss": 0.7158520221710205,
"eval_mcc": 0.6679739948137614,
"eval_runtime": 1.0782,
"eval_samples_per_second": 471.16,
"eval_steps_per_second": 59.359,
"step": 2400
},
{
"epoch": 3.16,
"learning_rate": 3.4177215189873416e-05,
"loss": 0.1222,
"step": 2500
},
{
"epoch": 3.29,
"eval_accuracy": 0.8543307185173035,
"eval_f1": 0.8549019607843138,
"eval_loss": 0.5784336924552917,
"eval_mcc": 0.7087628480087672,
"eval_runtime": 1.0734,
"eval_samples_per_second": 473.262,
"eval_steps_per_second": 59.624,
"step": 2600
},
{
"epoch": 3.54,
"eval_accuracy": 0.834645688533783,
"eval_f1": 0.8372093023255814,
"eval_loss": 0.623878538608551,
"eval_mcc": 0.6698566624509036,
"eval_runtime": 1.0761,
"eval_samples_per_second": 472.087,
"eval_steps_per_second": 59.476,
"step": 2800
},
{
"epoch": 3.8,
"learning_rate": 3.10126582278481e-05,
"loss": 0.0846,
"step": 3000
},
{
"epoch": 3.8,
"eval_accuracy": 0.8464567065238953,
"eval_f1": 0.8488372093023256,
"eval_loss": 0.564211905002594,
"eval_mcc": 0.6934937356309998,
"eval_runtime": 1.0832,
"eval_samples_per_second": 468.993,
"eval_steps_per_second": 59.086,
"step": 3000
},
{
"epoch": 4.05,
"eval_accuracy": 0.8602362275123596,
"eval_f1": 0.8626692456479691,
"eval_loss": 0.5327543020248413,
"eval_mcc": 0.7211921679935971,
"eval_runtime": 1.0843,
"eval_samples_per_second": 468.514,
"eval_steps_per_second": 59.025,
"step": 3200
},
{
"epoch": 4.3,
"eval_accuracy": 0.834645688533783,
"eval_f1": 0.8450184501845018,
"eval_loss": 0.8166886568069458,
"eval_mcc": 0.6762903456703597,
"eval_runtime": 1.0816,
"eval_samples_per_second": 469.696,
"eval_steps_per_second": 59.174,
"step": 3400
},
{
"epoch": 4.43,
"learning_rate": 2.7848101265822786e-05,
"loss": 0.065,
"step": 3500
},
{
"epoch": 4.56,
"eval_accuracy": 0.8385826945304871,
"eval_f1": 0.8452830188679245,
"eval_loss": 0.7407500743865967,
"eval_mcc": 0.6803220026110445,
"eval_runtime": 1.0847,
"eval_samples_per_second": 468.341,
"eval_steps_per_second": 59.004,
"step": 3600
},
{
"epoch": 4.81,
"eval_accuracy": 0.834645688533783,
"eval_f1": 0.8384615384615384,
"eval_loss": 0.6380051970481873,
"eval_mcc": 0.6703766309280716,
"eval_runtime": 1.0846,
"eval_samples_per_second": 468.368,
"eval_steps_per_second": 59.007,
"step": 3800
},
{
"epoch": 5.06,
"learning_rate": 2.468354430379747e-05,
"loss": 0.0601,
"step": 4000
},
{
"epoch": 5.06,
"eval_accuracy": 0.8425197005271912,
"eval_f1": 0.8496240601503758,
"eval_loss": 0.7719384431838989,
"eval_mcc": 0.6887765705877247,
"eval_runtime": 1.0798,
"eval_samples_per_second": 470.438,
"eval_steps_per_second": 59.268,
"step": 4000
},
{
"epoch": 5.32,
"eval_accuracy": 0.8543307185173035,
"eval_f1": 0.859848484848485,
"eval_loss": 0.7159872651100159,
"eval_mcc": 0.7114280702270771,
"eval_runtime": 1.0773,
"eval_samples_per_second": 471.539,
"eval_steps_per_second": 59.406,
"step": 4200
},
{
"epoch": 5.57,
"eval_accuracy": 0.8425197005271912,
"eval_f1": 0.8443579766536965,
"eval_loss": 0.6608069539070129,
"eval_mcc": 0.6854135160080981,
"eval_runtime": 1.0746,
"eval_samples_per_second": 472.729,
"eval_steps_per_second": 59.556,
"step": 4400
},
{
"epoch": 5.7,
"learning_rate": 2.1518987341772153e-05,
"loss": 0.041,
"step": 4500
},
{
"epoch": 5.82,
"eval_accuracy": 0.8562992215156555,
"eval_f1": 0.8598848368522072,
"eval_loss": 0.7154455780982971,
"eval_mcc": 0.7139062005896402,
"eval_runtime": 1.0715,
"eval_samples_per_second": 474.092,
"eval_steps_per_second": 59.728,
"step": 4600
},
{
"epoch": 6.08,
"eval_accuracy": 0.8287401795387268,
"eval_f1": 0.8391866913123845,
"eval_loss": 0.9410210251808167,
"eval_mcc": 0.6639879844977076,
"eval_runtime": 1.0783,
"eval_samples_per_second": 471.114,
"eval_steps_per_second": 59.353,
"step": 4800
},
{
"epoch": 6.33,
"learning_rate": 1.8354430379746836e-05,
"loss": 0.0332,
"step": 5000
},
{
"epoch": 6.33,
"eval_accuracy": 0.8484252095222473,
"eval_f1": 0.8481262327416174,
"eval_loss": 0.8442137241363525,
"eval_mcc": 0.6968557943649227,
"eval_runtime": 1.074,
"eval_samples_per_second": 472.992,
"eval_steps_per_second": 59.59,
"step": 5000
},
{
"epoch": 6.58,
"eval_accuracy": 0.8307086825370789,
"eval_f1": 0.8424908424908425,
"eval_loss": 0.9499196410179138,
"eval_mcc": 0.6699715312084875,
"eval_runtime": 1.0843,
"eval_samples_per_second": 468.484,
"eval_steps_per_second": 59.022,
"step": 5200
},
{
"epoch": 6.58,
"step": 5200,
"total_flos": 3.046823046955008e+16,
"train_loss": 0.13669625529876123,
"train_runtime": 1803.9314,
"train_samples_per_second": 280.099,
"train_steps_per_second": 4.379
}
],
"max_steps": 7900,
"num_train_epochs": 10,
"total_flos": 3.046823046955008e+16,
"trial_name": null,
"trial_params": null
}