| { | |
| "best_metric": 0.8626692456479691, | |
| "best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/sst2/checkpoint-3200", | |
| "epoch": 6.582278481012658, | |
| "global_step": 5200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.7539370059967041, | |
| "eval_f1": 0.7803163444639719, | |
| "eval_loss": 0.5231051445007324, | |
| "eval_mcc": 0.5247106973194531, | |
| "eval_runtime": 1.0985, | |
| "eval_samples_per_second": 462.456, | |
| "eval_steps_per_second": 58.262, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.8031495809555054, | |
| "eval_f1": 0.8148148148148149, | |
| "eval_loss": 0.40333232283592224, | |
| "eval_mcc": 0.6120025576737335, | |
| "eval_runtime": 1.1024, | |
| "eval_samples_per_second": 460.833, | |
| "eval_steps_per_second": 58.058, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.683544303797468e-05, | |
| "loss": 0.4182, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.8444882035255432, | |
| "eval_f1": 0.8435643564356434, | |
| "eval_loss": 0.38440778851509094, | |
| "eval_mcc": 0.6889720757983125, | |
| "eval_runtime": 1.1045, | |
| "eval_samples_per_second": 459.935, | |
| "eval_steps_per_second": 57.945, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 0.834645688533783, | |
| "eval_f1": 0.8372093023255814, | |
| "eval_loss": 0.43441784381866455, | |
| "eval_mcc": 0.6698566624509036, | |
| "eval_runtime": 1.1061, | |
| "eval_samples_per_second": 459.26, | |
| "eval_steps_per_second": 57.86, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.367088607594937e-05, | |
| "loss": 0.2549, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_accuracy": 0.8385826945304871, | |
| "eval_f1": 0.8404669260700389, | |
| "eval_loss": 0.4155767261981964, | |
| "eval_mcc": 0.6775364473446112, | |
| "eval_runtime": 1.0923, | |
| "eval_samples_per_second": 465.066, | |
| "eval_steps_per_second": 58.591, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_accuracy": 0.834645688533783, | |
| "eval_f1": 0.8438661710037175, | |
| "eval_loss": 0.5223256349563599, | |
| "eval_mcc": 0.6748218010834892, | |
| "eval_runtime": 1.0975, | |
| "eval_samples_per_second": 462.872, | |
| "eval_steps_per_second": 58.315, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_accuracy": 0.8503937125205994, | |
| "eval_f1": 0.8515625, | |
| "eval_loss": 0.47026267647743225, | |
| "eval_mcc": 0.7010061668834338, | |
| "eval_runtime": 1.0838, | |
| "eval_samples_per_second": 468.727, | |
| "eval_steps_per_second": 59.052, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.050632911392405e-05, | |
| "loss": 0.1967, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_accuracy": 0.8582677245140076, | |
| "eval_f1": 0.8582677165354331, | |
| "eval_loss": 0.4532265067100525, | |
| "eval_mcc": 0.7165620398356971, | |
| "eval_runtime": 1.0715, | |
| "eval_samples_per_second": 474.092, | |
| "eval_steps_per_second": 59.728, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_accuracy": 0.8307086825370789, | |
| "eval_f1": 0.8333333333333334, | |
| "eval_loss": 0.5167694687843323, | |
| "eval_mcc": 0.6619776380575382, | |
| "eval_runtime": 1.0813, | |
| "eval_samples_per_second": 469.785, | |
| "eval_steps_per_second": 59.186, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.7341772151898736e-05, | |
| "loss": 0.1348, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 0.8385826945304871, | |
| "eval_f1": 0.8498168498168498, | |
| "eval_loss": 0.5124529600143433, | |
| "eval_mcc": 0.6859086686135547, | |
| "eval_runtime": 1.0858, | |
| "eval_samples_per_second": 467.876, | |
| "eval_steps_per_second": 58.945, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_accuracy": 0.8425197005271912, | |
| "eval_f1": 0.8387096774193549, | |
| "eval_loss": 0.45541733503341675, | |
| "eval_mcc": 0.685517324398053, | |
| "eval_runtime": 1.0842, | |
| "eval_samples_per_second": 468.534, | |
| "eval_steps_per_second": 59.028, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_accuracy": 0.8307086825370789, | |
| "eval_f1": 0.8170212765957447, | |
| "eval_loss": 0.7158520221710205, | |
| "eval_mcc": 0.6679739948137614, | |
| "eval_runtime": 1.0782, | |
| "eval_samples_per_second": 471.16, | |
| "eval_steps_per_second": 59.359, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.4177215189873416e-05, | |
| "loss": 0.1222, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "eval_accuracy": 0.8543307185173035, | |
| "eval_f1": 0.8549019607843138, | |
| "eval_loss": 0.5784336924552917, | |
| "eval_mcc": 0.7087628480087672, | |
| "eval_runtime": 1.0734, | |
| "eval_samples_per_second": 473.262, | |
| "eval_steps_per_second": 59.624, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "eval_accuracy": 0.834645688533783, | |
| "eval_f1": 0.8372093023255814, | |
| "eval_loss": 0.623878538608551, | |
| "eval_mcc": 0.6698566624509036, | |
| "eval_runtime": 1.0761, | |
| "eval_samples_per_second": 472.087, | |
| "eval_steps_per_second": 59.476, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.10126582278481e-05, | |
| "loss": 0.0846, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_accuracy": 0.8464567065238953, | |
| "eval_f1": 0.8488372093023256, | |
| "eval_loss": 0.564211905002594, | |
| "eval_mcc": 0.6934937356309998, | |
| "eval_runtime": 1.0832, | |
| "eval_samples_per_second": 468.993, | |
| "eval_steps_per_second": 59.086, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "eval_accuracy": 0.8602362275123596, | |
| "eval_f1": 0.8626692456479691, | |
| "eval_loss": 0.5327543020248413, | |
| "eval_mcc": 0.7211921679935971, | |
| "eval_runtime": 1.0843, | |
| "eval_samples_per_second": 468.514, | |
| "eval_steps_per_second": 59.025, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "eval_accuracy": 0.834645688533783, | |
| "eval_f1": 0.8450184501845018, | |
| "eval_loss": 0.8166886568069458, | |
| "eval_mcc": 0.6762903456703597, | |
| "eval_runtime": 1.0816, | |
| "eval_samples_per_second": 469.696, | |
| "eval_steps_per_second": 59.174, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 2.7848101265822786e-05, | |
| "loss": 0.065, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "eval_accuracy": 0.8385826945304871, | |
| "eval_f1": 0.8452830188679245, | |
| "eval_loss": 0.7407500743865967, | |
| "eval_mcc": 0.6803220026110445, | |
| "eval_runtime": 1.0847, | |
| "eval_samples_per_second": 468.341, | |
| "eval_steps_per_second": 59.004, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "eval_accuracy": 0.834645688533783, | |
| "eval_f1": 0.8384615384615384, | |
| "eval_loss": 0.6380051970481873, | |
| "eval_mcc": 0.6703766309280716, | |
| "eval_runtime": 1.0846, | |
| "eval_samples_per_second": 468.368, | |
| "eval_steps_per_second": 59.007, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.468354430379747e-05, | |
| "loss": 0.0601, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "eval_accuracy": 0.8425197005271912, | |
| "eval_f1": 0.8496240601503758, | |
| "eval_loss": 0.7719384431838989, | |
| "eval_mcc": 0.6887765705877247, | |
| "eval_runtime": 1.0798, | |
| "eval_samples_per_second": 470.438, | |
| "eval_steps_per_second": 59.268, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "eval_accuracy": 0.8543307185173035, | |
| "eval_f1": 0.859848484848485, | |
| "eval_loss": 0.7159872651100159, | |
| "eval_mcc": 0.7114280702270771, | |
| "eval_runtime": 1.0773, | |
| "eval_samples_per_second": 471.539, | |
| "eval_steps_per_second": 59.406, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "eval_accuracy": 0.8425197005271912, | |
| "eval_f1": 0.8443579766536965, | |
| "eval_loss": 0.6608069539070129, | |
| "eval_mcc": 0.6854135160080981, | |
| "eval_runtime": 1.0746, | |
| "eval_samples_per_second": 472.729, | |
| "eval_steps_per_second": 59.556, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.1518987341772153e-05, | |
| "loss": 0.041, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "eval_accuracy": 0.8562992215156555, | |
| "eval_f1": 0.8598848368522072, | |
| "eval_loss": 0.7154455780982971, | |
| "eval_mcc": 0.7139062005896402, | |
| "eval_runtime": 1.0715, | |
| "eval_samples_per_second": 474.092, | |
| "eval_steps_per_second": 59.728, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "eval_accuracy": 0.8287401795387268, | |
| "eval_f1": 0.8391866913123845, | |
| "eval_loss": 0.9410210251808167, | |
| "eval_mcc": 0.6639879844977076, | |
| "eval_runtime": 1.0783, | |
| "eval_samples_per_second": 471.114, | |
| "eval_steps_per_second": 59.353, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.8354430379746836e-05, | |
| "loss": 0.0332, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "eval_accuracy": 0.8484252095222473, | |
| "eval_f1": 0.8481262327416174, | |
| "eval_loss": 0.8442137241363525, | |
| "eval_mcc": 0.6968557943649227, | |
| "eval_runtime": 1.074, | |
| "eval_samples_per_second": 472.992, | |
| "eval_steps_per_second": 59.59, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "eval_accuracy": 0.8307086825370789, | |
| "eval_f1": 0.8424908424908425, | |
| "eval_loss": 0.9499196410179138, | |
| "eval_mcc": 0.6699715312084875, | |
| "eval_runtime": 1.0843, | |
| "eval_samples_per_second": 468.484, | |
| "eval_steps_per_second": 59.022, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "step": 5200, | |
| "total_flos": 3.046823046955008e+16, | |
| "train_loss": 0.13669625529876123, | |
| "train_runtime": 1803.9314, | |
| "train_samples_per_second": 280.099, | |
| "train_steps_per_second": 4.379 | |
| } | |
| ], | |
| "max_steps": 7900, | |
| "num_train_epochs": 10, | |
| "total_flos": 3.046823046955008e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |