ZaandaTeika's picture
Convert model to bfloat16 and fix total_parameters metadata
aee3614 verified
{
"best_global_step": null,
"best_metric": 0.9005018183708923,
"best_model_checkpoint": null,
"epoch": 0.9947643979057592,
"eval_steps": 16,
"global_step": 760,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005235602094240838,
"grad_norm": 433.5572204589844,
"learning_rate": 5.217391304347826e-07,
"loss": 5.5489,
"step": 4
},
{
"epoch": 0.005235602094240838,
"eval_F1_err_corr": 0.2899344909681993,
"eval_accuracy": 0.33964423820572315,
"eval_correct_accuracy": 0.21863927522501062,
"eval_error_accuracy": 0.4302247894550517,
"eval_f1": 0.2573237770510055,
"eval_loss": 1.461071491241455,
"eval_pr_auc": 0.16429768646454848,
"eval_precision": 0.15202466598150052,
"eval_recall": 0.8372198324654743,
"eval_runtime": 24.9385,
"eval_samples_per_second": 196.202,
"eval_steps_per_second": 0.802,
"step": 4
},
{
"epoch": 0.010471204188481676,
"grad_norm": 424.1092834472656,
"learning_rate": 1.2173913043478262e-06,
"loss": 5.392,
"step": 8
},
{
"epoch": 0.010471204188481676,
"eval_F1_err_corr": 0.321098900363111,
"eval_accuracy": 0.3666511987625677,
"eval_correct_accuracy": 0.2498037913412515,
"eval_error_accuracy": 0.4493435370426137,
"eval_f1": 0.25948565848012445,
"eval_loss": 1.3678739070892334,
"eval_pr_auc": 0.16391947366452397,
"eval_precision": 0.15441239776151527,
"eval_recall": 0.8120896536110482,
"eval_runtime": 24.8266,
"eval_samples_per_second": 197.087,
"eval_steps_per_second": 0.806,
"step": 8
},
{
"epoch": 0.015706806282722512,
"grad_norm": 328.165771484375,
"learning_rate": 1.9130434782608697e-06,
"loss": 4.6674,
"step": 12
},
{
"epoch": 0.015706806282722512,
"eval_F1_err_corr": 0.42193034247158306,
"eval_accuracy": 0.4537664346481052,
"eval_correct_accuracy": 0.36420961710522887,
"eval_error_accuracy": 0.5013920328557274,
"eval_f1": 0.25876327610091937,
"eval_loss": 1.08595609664917,
"eval_pr_auc": 0.16343108665970787,
"eval_precision": 0.15883323026180168,
"eval_recall": 0.6977586597237945,
"eval_runtime": 24.7964,
"eval_samples_per_second": 197.327,
"eval_steps_per_second": 0.807,
"step": 12
},
{
"epoch": 0.020942408376963352,
"grad_norm": 121.70630645751953,
"learning_rate": 2.6086956521739132e-06,
"loss": 3.0944,
"step": 16
},
{
"epoch": 0.020942408376963352,
"eval_F1_err_corr": 0.721921189250657,
"eval_accuracy": 0.7358391337973704,
"eval_correct_accuracy": 0.7870115524045516,
"eval_error_accuracy": 0.6667750717278245,
"eval_f1": 0.1906928253246138,
"eval_loss": 0.5721015334129333,
"eval_pr_auc": 0.16146374192556026,
"eval_precision": 0.16400391261819366,
"eval_recall": 0.22775639574371745,
"eval_runtime": 24.8528,
"eval_samples_per_second": 196.879,
"eval_steps_per_second": 0.805,
"step": 16
},
{
"epoch": 0.02617801047120419,
"grad_norm": 18.458759307861328,
"learning_rate": 3.3043478260869567e-06,
"loss": 2.002,
"step": 20
},
{
"epoch": 0.02617801047120419,
"eval_F1_err_corr": 0.8402340555989792,
"eval_accuracy": 0.8622119102861562,
"eval_correct_accuracy": 0.9922276019965096,
"eval_error_accuracy": 0.728620881787505,
"eval_f1": 0.0035794183445190158,
"eval_loss": 0.7684443593025208,
"eval_pr_auc": 0.17251592682571912,
"eval_precision": 0.1509433962264151,
"eval_recall": 0.001811184061580258,
"eval_runtime": 24.8601,
"eval_samples_per_second": 196.822,
"eval_steps_per_second": 0.805,
"step": 20
},
{
"epoch": 0.031413612565445025,
"grad_norm": 115.0860595703125,
"learning_rate": 4.000000000000001e-06,
"loss": 3.383,
"step": 24
},
{
"epoch": 0.031413612565445025,
"eval_F1_err_corr": 0.8378682847250856,
"eval_accuracy": 0.8620881670533642,
"eval_correct_accuracy": 0.9842169035446346,
"eval_error_accuracy": 0.7294085238385144,
"eval_f1": 0.008451957295373666,
"eval_loss": 0.8194268941879272,
"eval_pr_auc": 0.18009572056722223,
"eval_precision": 0.24050632911392406,
"eval_recall": 0.004301562146253113,
"eval_runtime": 24.8294,
"eval_samples_per_second": 197.065,
"eval_steps_per_second": 0.805,
"step": 24
},
{
"epoch": 0.03664921465968586,
"grad_norm": 99.96025848388672,
"learning_rate": 4.695652173913044e-06,
"loss": 3.1857,
"step": 28
},
{
"epoch": 0.03664921465968586,
"eval_F1_err_corr": 0.8207683484774771,
"eval_accuracy": 0.8591492652745553,
"eval_correct_accuracy": 0.9328157189465495,
"eval_error_accuracy": 0.7327520943454843,
"eval_f1": 0.060268317853457175,
"eval_loss": 0.6408756971359253,
"eval_pr_auc": 0.19809419361676622,
"eval_precision": 0.3411214953271028,
"eval_recall": 0.03305410912383971,
"eval_runtime": 24.8048,
"eval_samples_per_second": 197.261,
"eval_steps_per_second": 0.806,
"step": 28
},
{
"epoch": 0.041884816753926704,
"grad_norm": 48.16204071044922,
"learning_rate": 5.391304347826088e-06,
"loss": 2.1498,
"step": 32
},
{
"epoch": 0.041884816753926704,
"eval_F1_err_corr": 0.7694538304703895,
"eval_accuracy": 0.837370456303171,
"eval_correct_accuracy": 0.8088768689020601,
"eval_error_accuracy": 0.7336950054749075,
"eval_f1": 0.21148942552872357,
"eval_loss": 0.44809016585350037,
"eval_pr_auc": 0.2318064013915099,
"eval_precision": 0.31333333333333335,
"eval_recall": 0.15961059542676023,
"eval_runtime": 24.8313,
"eval_samples_per_second": 197.05,
"eval_steps_per_second": 0.805,
"step": 32
},
{
"epoch": 0.04712041884816754,
"grad_norm": 169.05667114257812,
"learning_rate": 6.086956521739132e-06,
"loss": 1.9176,
"step": 36
},
{
"epoch": 0.04712041884816754,
"eval_F1_err_corr": 0.6219027098098009,
"eval_accuracy": 0.7121732405259087,
"eval_correct_accuracy": 0.5674362971053744,
"eval_error_accuracy": 0.687935454975843,
"eval_f1": 0.33722752528850264,
"eval_loss": 0.5731640458106995,
"eval_pr_auc": 0.2788670530837179,
"eval_precision": 0.2460243217960711,
"eval_recall": 0.5358840842200588,
"eval_runtime": 24.8463,
"eval_samples_per_second": 196.931,
"eval_steps_per_second": 0.805,
"step": 36
},
{
"epoch": 0.05235602094240838,
"grad_norm": 114.60936737060547,
"learning_rate": 6.782608695652174e-06,
"loss": 2.0171,
"step": 40
},
{
"epoch": 0.05235602094240838,
"eval_F1_err_corr": 0.7430224807525013,
"eval_accuracy": 0.7990719257540603,
"eval_correct_accuracy": 0.7547544657206874,
"eval_error_accuracy": 0.7316496396965659,
"eval_f1": 0.35211970074812965,
"eval_loss": 0.4375106692314148,
"eval_pr_auc": 0.3012571706540082,
"eval_precision": 0.3147289586305278,
"eval_recall": 0.39959248358614446,
"eval_runtime": 24.8039,
"eval_samples_per_second": 197.268,
"eval_steps_per_second": 0.806,
"step": 40
},
{
"epoch": 0.05759162303664921,
"grad_norm": 32.457340240478516,
"learning_rate": 7.478260869565218e-06,
"loss": 1.611,
"step": 44
},
{
"epoch": 0.05759162303664921,
"eval_F1_err_corr": 0.8419241583637731,
"eval_accuracy": 0.862830626450116,
"eval_correct_accuracy": 0.9871360561186291,
"eval_error_accuracy": 0.7339561045659535,
"eval_f1": 0.09914668833807395,
"eval_loss": 0.38381654024124146,
"eval_pr_auc": 0.3152015073109494,
"eval_precision": 0.48316831683168315,
"eval_recall": 0.05524111387819787,
"eval_runtime": 24.832,
"eval_samples_per_second": 197.044,
"eval_steps_per_second": 0.805,
"step": 44
},
{
"epoch": 0.06282722513089005,
"grad_norm": 22.770957946777344,
"learning_rate": 8.173913043478263e-06,
"loss": 1.5516,
"step": 48
},
{
"epoch": 0.06282722513089005,
"eval_F1_err_corr": 0.8403483674599067,
"eval_accuracy": 0.859891724671307,
"eval_correct_accuracy": 0.9726090387657613,
"eval_error_accuracy": 0.7397527603961362,
"eval_f1": 0.19484444444444443,
"eval_loss": 0.355484277009964,
"eval_pr_auc": 0.3399032342610957,
"eval_precision": 0.45364238410596025,
"eval_recall": 0.12406610821824768,
"eval_runtime": 24.7998,
"eval_samples_per_second": 197.3,
"eval_steps_per_second": 0.806,
"step": 48
},
{
"epoch": 0.06806282722513089,
"grad_norm": 69.65153503417969,
"learning_rate": 8.869565217391306e-06,
"loss": 1.4648,
"step": 52
},
{
"epoch": 0.06806282722513089,
"eval_F1_err_corr": 0.8036896045473728,
"eval_accuracy": 0.8478267594740913,
"eval_correct_accuracy": 0.8494371774369468,
"eval_error_accuracy": 0.762617804842102,
"eval_f1": 0.38658186806334954,
"eval_loss": 0.36472800374031067,
"eval_pr_auc": 0.38944473786040484,
"eval_precision": 0.4303164908384231,
"eval_recall": 0.350916911931175,
"eval_runtime": 24.8345,
"eval_samples_per_second": 197.025,
"eval_steps_per_second": 0.805,
"step": 52
},
{
"epoch": 0.07329842931937172,
"grad_norm": 9.83507251739502,
"learning_rate": 9.565217391304349e-06,
"loss": 1.3967,
"step": 56
},
{
"epoch": 0.07329842931937172,
"eval_F1_err_corr": 0.8296970467203022,
"eval_accuracy": 0.8688321732405259,
"eval_correct_accuracy": 0.9181484628791651,
"eval_error_accuracy": 0.7567903433781845,
"eval_f1": 0.29262595929262597,
"eval_loss": 0.33204466104507446,
"eval_pr_auc": 0.41163972835541246,
"eval_precision": 0.5561192136968929,
"eval_recall": 0.19855105275073578,
"eval_runtime": 24.8293,
"eval_samples_per_second": 197.066,
"eval_steps_per_second": 0.805,
"step": 56
},
{
"epoch": 0.07853403141361257,
"grad_norm": 16.36752700805664,
"learning_rate": 1.0260869565217393e-05,
"loss": 1.2944,
"step": 60
},
{
"epoch": 0.07853403141361257,
"eval_F1_err_corr": 0.8421678884358804,
"eval_accuracy": 0.8736581593194123,
"eval_correct_accuracy": 0.9276144097142335,
"eval_error_accuracy": 0.7711353234659966,
"eval_f1": 0.38438347904733194,
"eval_loss": 0.31788310408592224,
"eval_pr_auc": 0.4608372769142751,
"eval_precision": 0.5751014884979703,
"eval_recall": 0.28865745981435365,
"eval_runtime": 24.8595,
"eval_samples_per_second": 196.826,
"eval_steps_per_second": 0.805,
"step": 60
},
{
"epoch": 0.08376963350785341,
"grad_norm": 17.503982543945312,
"learning_rate": 1.0956521739130435e-05,
"loss": 1.284,
"step": 64
},
{
"epoch": 0.08376963350785341,
"eval_F1_err_corr": 0.8579401681935629,
"eval_accuracy": 0.8769682907965971,
"eval_correct_accuracy": 0.9478163203454459,
"eval_error_accuracy": 0.7836326415058088,
"eval_f1": 0.4385147536354652,
"eval_loss": 0.30793023109436035,
"eval_pr_auc": 0.49259983395831536,
"eval_precision": 0.5825206301575394,
"eval_recall": 0.3515961059542676,
"eval_runtime": 24.8423,
"eval_samples_per_second": 196.962,
"eval_steps_per_second": 0.805,
"step": 64
},
{
"epoch": 0.08900523560209424,
"grad_norm": 18.571523666381836,
"learning_rate": 1.1652173913043478e-05,
"loss": 1.191,
"step": 68
},
{
"epoch": 0.08900523560209424,
"eval_F1_err_corr": 0.8618882109239977,
"eval_accuracy": 0.8807424593967518,
"eval_correct_accuracy": 0.9652649658606812,
"eval_error_accuracy": 0.7785120852969489,
"eval_f1": 0.38741458763705705,
"eval_loss": 0.30222997069358826,
"eval_pr_auc": 0.5105597340335465,
"eval_precision": 0.6497867803837953,
"eval_recall": 0.2759791713832918,
"eval_runtime": 24.8386,
"eval_samples_per_second": 196.992,
"eval_steps_per_second": 0.805,
"step": 68
},
{
"epoch": 0.09424083769633508,
"grad_norm": 19.51681900024414,
"learning_rate": 1.2347826086956523e-05,
"loss": 1.1905,
"step": 72
},
{
"epoch": 0.09424083769633508,
"eval_F1_err_corr": 0.8556927040319808,
"eval_accuracy": 0.8829079659706109,
"eval_correct_accuracy": 0.9252440207528985,
"eval_error_accuracy": 0.7958667759923117,
"eval_f1": 0.46682631356529086,
"eval_loss": 0.2958272099494934,
"eval_pr_auc": 0.5272941790977752,
"eval_precision": 0.6178225205070843,
"eval_recall": 0.37514149875481095,
"eval_runtime": 24.8293,
"eval_samples_per_second": 197.065,
"eval_steps_per_second": 0.805,
"step": 72
},
{
"epoch": 0.09947643979057591,
"grad_norm": 21.868053436279297,
"learning_rate": 1.3043478260869566e-05,
"loss": 1.1759,
"step": 76
},
{
"epoch": 0.09947643979057591,
"eval_F1_err_corr": 0.8671239387996902,
"eval_accuracy": 0.8833720030935808,
"eval_correct_accuracy": 0.965744942725071,
"eval_error_accuracy": 0.7867787965661607,
"eval_f1": 0.43932183224271265,
"eval_loss": 0.2900922894477844,
"eval_pr_auc": 0.5398410773230814,
"eval_precision": 0.6402254009536195,
"eval_recall": 0.33438985736925514,
"eval_runtime": 24.826,
"eval_samples_per_second": 197.092,
"eval_steps_per_second": 0.806,
"step": 76
},
{
"epoch": 0.10471204188481675,
"grad_norm": 11.858696937561035,
"learning_rate": 1.373913043478261e-05,
"loss": 1.1804,
"step": 80
},
{
"epoch": 0.10471204188481675,
"eval_F1_err_corr": 0.8643274810534162,
"eval_accuracy": 0.8850425367362722,
"eval_correct_accuracy": 0.951882356363745,
"eval_error_accuracy": 0.7915226184557567,
"eval_f1": 0.4779432424838438,
"eval_loss": 0.2871633768081665,
"eval_pr_auc": 0.5513156434574297,
"eval_precision": 0.6297667530544243,
"eval_recall": 0.38510301109350237,
"eval_runtime": 24.8425,
"eval_samples_per_second": 196.96,
"eval_steps_per_second": 0.805,
"step": 80
},
{
"epoch": 0.1099476439790576,
"grad_norm": 8.166620254516602,
"learning_rate": 1.4434782608695654e-05,
"loss": 1.1212,
"step": 84
},
{
"epoch": 0.11518324607329843,
"grad_norm": 4.340336799621582,
"learning_rate": 1.5130434782608697e-05,
"loss": 1.1325,
"step": 88
},
{
"epoch": 0.12041884816753927,
"grad_norm": 27.051570892333984,
"learning_rate": 1.582608695652174e-05,
"loss": 1.1218,
"step": 92
},
{
"epoch": 0.1256544502617801,
"grad_norm": 22.343820571899414,
"learning_rate": 1.6521739130434785e-05,
"loss": 1.1068,
"step": 96
},
{
"epoch": 0.13089005235602094,
"grad_norm": 47.00363540649414,
"learning_rate": 1.721739130434783e-05,
"loss": 1.1034,
"step": 100
},
{
"epoch": 0.13612565445026178,
"grad_norm": 40.41328048706055,
"learning_rate": 1.791304347826087e-05,
"loss": 1.1235,
"step": 104
},
{
"epoch": 0.14136125654450263,
"grad_norm": 31.55730628967285,
"learning_rate": 1.8608695652173912e-05,
"loss": 1.0747,
"step": 108
},
{
"epoch": 0.14659685863874344,
"grad_norm": 2.652536392211914,
"learning_rate": 1.9304347826086957e-05,
"loss": 0.9891,
"step": 112
},
{
"epoch": 0.1518324607329843,
"grad_norm": 3.2267162799835205,
"learning_rate": 2e-05,
"loss": 0.9607,
"step": 116
},
{
"epoch": 0.15706806282722513,
"grad_norm": 21.89421272277832,
"learning_rate": 1.9999942480792804e-05,
"loss": 1.0643,
"step": 120
},
{
"epoch": 0.15706806282722513,
"eval_F1_err_corr": 0.8734264964691199,
"eval_accuracy": 0.8929311678267595,
"eval_correct_accuracy": 0.9278371473433551,
"eval_error_accuracy": 0.8250438945941904,
"eval_f1": 0.5815499939547818,
"eval_loss": 0.2678382694721222,
"eval_pr_auc": 0.6347920534332054,
"eval_precision": 0.6240269849507005,
"eval_recall": 0.544487208512565,
"eval_runtime": 24.8153,
"eval_samples_per_second": 197.177,
"eval_steps_per_second": 0.806,
"step": 120
},
{
"epoch": 0.16230366492146597,
"grad_norm": 8.081486701965332,
"learning_rate": 1.999976992383291e-05,
"loss": 1.0189,
"step": 124
},
{
"epoch": 0.16753926701570682,
"grad_norm": 17.748775482177734,
"learning_rate": 1.9999482331105377e-05,
"loss": 0.9898,
"step": 128
},
{
"epoch": 0.17277486910994763,
"grad_norm": 41.294334411621094,
"learning_rate": 1.9999079705918636e-05,
"loss": 1.0795,
"step": 132
},
{
"epoch": 0.17801047120418848,
"grad_norm": 4.425788879394531,
"learning_rate": 1.999856205290442e-05,
"loss": 1.0274,
"step": 136
},
{
"epoch": 0.18324607329842932,
"grad_norm": 26.085590362548828,
"learning_rate": 1.9997929378017723e-05,
"loss": 0.9516,
"step": 140
},
{
"epoch": 0.18848167539267016,
"grad_norm": 18.811126708984375,
"learning_rate": 1.9997181688536746e-05,
"loss": 0.966,
"step": 144
},
{
"epoch": 0.193717277486911,
"grad_norm": 22.464527130126953,
"learning_rate": 1.999631899306278e-05,
"loss": 0.8932,
"step": 148
},
{
"epoch": 0.19895287958115182,
"grad_norm": 8.309951782226562,
"learning_rate": 1.999534130152014e-05,
"loss": 0.9756,
"step": 152
},
{
"epoch": 0.20418848167539266,
"grad_norm": 4.516532897949219,
"learning_rate": 1.999424862515604e-05,
"loss": 0.998,
"step": 156
},
{
"epoch": 0.2094240837696335,
"grad_norm": 10.015279769897461,
"learning_rate": 1.999304097654045e-05,
"loss": 0.9015,
"step": 160
},
{
"epoch": 0.2094240837696335,
"eval_F1_err_corr": 0.885087159946509,
"eval_accuracy": 0.9020572312451662,
"eval_correct_accuracy": 0.95333342698488,
"eval_error_accuracy": 0.8259592279571245,
"eval_f1": 0.5984271943176053,
"eval_loss": 0.24851758778095245,
"eval_pr_auc": 0.6675246054619536,
"eval_precision": 0.6804153446783963,
"eval_recall": 0.5340729001584786,
"eval_runtime": 24.8104,
"eval_samples_per_second": 197.216,
"eval_steps_per_second": 0.806,
"step": 160
},
{
"epoch": 0.21465968586387435,
"grad_norm": 14.583905220031738,
"learning_rate": 1.999171836956597e-05,
"loss": 0.9587,
"step": 164
},
{
"epoch": 0.2198952879581152,
"grad_norm": 9.168513298034668,
"learning_rate": 1.9990280819447662e-05,
"loss": 0.9663,
"step": 168
},
{
"epoch": 0.225130890052356,
"grad_norm": 24.278688430786133,
"learning_rate": 1.998872834272287e-05,
"loss": 0.9679,
"step": 172
},
{
"epoch": 0.23036649214659685,
"grad_norm": 23.693418502807617,
"learning_rate": 1.9987060957251047e-05,
"loss": 0.9541,
"step": 176
},
{
"epoch": 0.2356020942408377,
"grad_norm": 34.47703170776367,
"learning_rate": 1.9985278682213525e-05,
"loss": 0.8988,
"step": 180
},
{
"epoch": 0.24083769633507854,
"grad_norm": 17.93362045288086,
"learning_rate": 1.9983381538113317e-05,
"loss": 0.9296,
"step": 184
},
{
"epoch": 0.24607329842931938,
"grad_norm": 23.294275283813477,
"learning_rate": 1.998136954677487e-05,
"loss": 0.9337,
"step": 188
},
{
"epoch": 0.2513089005235602,
"grad_norm": 19.78593635559082,
"learning_rate": 1.9979242731343803e-05,
"loss": 0.8976,
"step": 192
},
{
"epoch": 0.25654450261780104,
"grad_norm": 16.300464630126953,
"learning_rate": 1.9977001116286675e-05,
"loss": 0.8705,
"step": 196
},
{
"epoch": 0.2617801047120419,
"grad_norm": 26.935935974121094,
"learning_rate": 1.9974644727390665e-05,
"loss": 0.8758,
"step": 200
},
{
"epoch": 0.2617801047120419,
"eval_F1_err_corr": 0.8910747356279248,
"eval_accuracy": 0.9052126836813612,
"eval_correct_accuracy": 0.9761037985940583,
"eval_error_accuracy": 0.819672508302841,
"eval_f1": 0.558119411595039,
"eval_loss": 0.24936090409755707,
"eval_pr_auc": 0.6830633725429478,
"eval_precision": 0.768772348033373,
"eval_recall": 0.4380801448947249,
"eval_runtime": 24.8593,
"eval_samples_per_second": 196.827,
"eval_steps_per_second": 0.805,
"step": 200
},
{
"epoch": 0.2670157068062827,
"grad_norm": 26.804174423217773,
"learning_rate": 1.9972173591763297e-05,
"loss": 0.9957,
"step": 204
},
{
"epoch": 0.27225130890052357,
"grad_norm": 12.255861282348633,
"learning_rate": 1.996958773783213e-05,
"loss": 0.8614,
"step": 208
},
{
"epoch": 0.2774869109947644,
"grad_norm": 10.577012062072754,
"learning_rate": 1.9966887195344403e-05,
"loss": 0.8539,
"step": 212
},
{
"epoch": 0.28272251308900526,
"grad_norm": 9.850268363952637,
"learning_rate": 1.9964071995366744e-05,
"loss": 0.8184,
"step": 216
},
{
"epoch": 0.2879581151832461,
"grad_norm": 4.022161960601807,
"learning_rate": 1.9961142170284762e-05,
"loss": 0.783,
"step": 220
},
{
"epoch": 0.2931937172774869,
"grad_norm": 4.174556732177734,
"learning_rate": 1.9958097753802693e-05,
"loss": 0.8355,
"step": 224
},
{
"epoch": 0.29842931937172773,
"grad_norm": 8.559288024902344,
"learning_rate": 1.9954938780943034e-05,
"loss": 0.8081,
"step": 228
},
{
"epoch": 0.3036649214659686,
"grad_norm": 11.881876945495605,
"learning_rate": 1.9951665288046098e-05,
"loss": 0.8846,
"step": 232
},
{
"epoch": 0.3089005235602094,
"grad_norm": 9.480097770690918,
"learning_rate": 1.994827731276963e-05,
"loss": 0.869,
"step": 236
},
{
"epoch": 0.31413612565445026,
"grad_norm": 18.96599006652832,
"learning_rate": 1.9944774894088367e-05,
"loss": 0.9044,
"step": 240
},
{
"epoch": 0.31413612565445026,
"eval_F1_err_corr": 0.8903583524392616,
"eval_accuracy": 0.8976334106728538,
"eval_correct_accuracy": 0.9422891260099501,
"eval_error_accuracy": 0.8438525462118894,
"eval_f1": 0.6341625207296849,
"eval_loss": 0.25486111640930176,
"eval_pr_auc": 0.6936322312463549,
"eval_precision": 0.6197061365600691,
"eval_recall": 0.6493094860765225,
"eval_runtime": 24.7931,
"eval_samples_per_second": 197.353,
"eval_steps_per_second": 0.807,
"step": 240
},
{
"epoch": 0.3193717277486911,
"grad_norm": 7.49755859375,
"learning_rate": 1.994115807229357e-05,
"loss": 0.8702,
"step": 244
},
{
"epoch": 0.32460732984293195,
"grad_norm": 19.93411636352539,
"learning_rate": 1.993742688899259e-05,
"loss": 0.8357,
"step": 248
},
{
"epoch": 0.3298429319371728,
"grad_norm": 18.435436248779297,
"learning_rate": 1.9933581387108358e-05,
"loss": 0.8185,
"step": 252
},
{
"epoch": 0.33507853403141363,
"grad_norm": 23.072092056274414,
"learning_rate": 1.992962161087893e-05,
"loss": 0.8371,
"step": 256
},
{
"epoch": 0.3403141361256545,
"grad_norm": 11.625171661376953,
"learning_rate": 1.9925547605856937e-05,
"loss": 0.8276,
"step": 260
},
{
"epoch": 0.34554973821989526,
"grad_norm": 18.671037673950195,
"learning_rate": 1.992135941890909e-05,
"loss": 0.8253,
"step": 264
},
{
"epoch": 0.3507853403141361,
"grad_norm": 15.393129348754883,
"learning_rate": 1.9917057098215624e-05,
"loss": 0.8245,
"step": 268
},
{
"epoch": 0.35602094240837695,
"grad_norm": 9.267082214355469,
"learning_rate": 1.9912640693269754e-05,
"loss": 0.8451,
"step": 272
},
{
"epoch": 0.3612565445026178,
"grad_norm": 5.4926252365112305,
"learning_rate": 1.9908110254877107e-05,
"loss": 0.813,
"step": 276
},
{
"epoch": 0.36649214659685864,
"grad_norm": 6.064371585845947,
"learning_rate": 1.9903465835155124e-05,
"loss": 0.7553,
"step": 280
},
{
"epoch": 0.36649214659685864,
"eval_F1_err_corr": 0.898106732050316,
"eval_accuracy": 0.9078112915699923,
"eval_correct_accuracy": 0.9649030769491357,
"eval_error_accuracy": 0.8399597119400094,
"eval_f1": 0.624117053481332,
"eval_loss": 0.23855358362197876,
"eval_pr_auc": 0.697922245841014,
"eval_precision": 0.704642551979493,
"eval_recall": 0.5601086710436948,
"eval_runtime": 24.8196,
"eval_samples_per_second": 197.143,
"eval_steps_per_second": 0.806,
"step": 280
},
{
"epoch": 0.3717277486910995,
"grad_norm": 11.443989753723145,
"learning_rate": 1.9898707487532475e-05,
"loss": 0.7992,
"step": 284
},
{
"epoch": 0.3769633507853403,
"grad_norm": 9.889354705810547,
"learning_rate": 1.9893835266748437e-05,
"loss": 0.8425,
"step": 288
},
{
"epoch": 0.38219895287958117,
"grad_norm": 6.687994480133057,
"learning_rate": 1.9888849228852262e-05,
"loss": 0.8465,
"step": 292
},
{
"epoch": 0.387434554973822,
"grad_norm": 3.455092430114746,
"learning_rate": 1.988374943120254e-05,
"loss": 0.8098,
"step": 296
},
{
"epoch": 0.39267015706806285,
"grad_norm": 4.258669376373291,
"learning_rate": 1.987853593246654e-05,
"loss": 0.8263,
"step": 300
},
{
"epoch": 0.39790575916230364,
"grad_norm": 5.940682888031006,
"learning_rate": 1.9873208792619517e-05,
"loss": 0.7651,
"step": 304
},
{
"epoch": 0.4031413612565445,
"grad_norm": 5.644289493560791,
"learning_rate": 1.9867768072944047e-05,
"loss": 0.7919,
"step": 308
},
{
"epoch": 0.4083769633507853,
"grad_norm": 6.426525115966797,
"learning_rate": 1.9862213836029308e-05,
"loss": 0.7661,
"step": 312
},
{
"epoch": 0.41361256544502617,
"grad_norm": 7.790468215942383,
"learning_rate": 1.985654614577036e-05,
"loss": 0.7592,
"step": 316
},
{
"epoch": 0.418848167539267,
"grad_norm": 8.240925788879395,
"learning_rate": 1.985076506736741e-05,
"loss": 0.7935,
"step": 320
},
{
"epoch": 0.418848167539267,
"eval_F1_err_corr": 0.8892707173263128,
"eval_accuracy": 0.900108275328693,
"eval_correct_accuracy": 0.9416031342860438,
"eval_error_accuracy": 0.8424490839609798,
"eval_f1": 0.636169014084507,
"eval_loss": 0.24991166591644287,
"eval_pr_auc": 0.6999774937080984,
"eval_precision": 0.6332436069986541,
"eval_recall": 0.6391215757301336,
"eval_runtime": 24.8123,
"eval_samples_per_second": 197.2,
"eval_steps_per_second": 0.806,
"step": 320
},
{
"epoch": 0.42408376963350786,
"grad_norm": 6.823334217071533,
"learning_rate": 1.9844870667325073e-05,
"loss": 0.7347,
"step": 324
},
{
"epoch": 0.4293193717277487,
"grad_norm": 4.039069175720215,
"learning_rate": 1.9838863013451587e-05,
"loss": 0.7886,
"step": 328
},
{
"epoch": 0.43455497382198954,
"grad_norm": 7.6934380531311035,
"learning_rate": 1.9832742174858052e-05,
"loss": 0.7608,
"step": 332
},
{
"epoch": 0.4397905759162304,
"grad_norm": 9.409914016723633,
"learning_rate": 1.9826508221957624e-05,
"loss": 0.7466,
"step": 336
},
{
"epoch": 0.44502617801047123,
"grad_norm": 7.726130962371826,
"learning_rate": 1.9820161226464708e-05,
"loss": 0.7023,
"step": 340
},
{
"epoch": 0.450261780104712,
"grad_norm": 3.726100206375122,
"learning_rate": 1.9813701261394136e-05,
"loss": 0.7078,
"step": 344
},
{
"epoch": 0.45549738219895286,
"grad_norm": 12.017361640930176,
"learning_rate": 1.980712840106032e-05,
"loss": 0.7383,
"step": 348
},
{
"epoch": 0.4607329842931937,
"grad_norm": 5.709269046783447,
"learning_rate": 1.9800442721076406e-05,
"loss": 0.7215,
"step": 352
},
{
"epoch": 0.46596858638743455,
"grad_norm": 12.649430274963379,
"learning_rate": 1.979364429835339e-05,
"loss": 0.7111,
"step": 356
},
{
"epoch": 0.4712041884816754,
"grad_norm": 16.15489959716797,
"learning_rate": 1.9786733211099257e-05,
"loss": 0.7764,
"step": 360
},
{
"epoch": 0.4712041884816754,
"eval_F1_err_corr": 0.894511960241892,
"eval_accuracy": 0.9100077339520495,
"eval_correct_accuracy": 0.9712793351142024,
"eval_error_accuracy": 0.8289907059644579,
"eval_f1": 0.5971472095277662,
"eval_loss": 0.2414369434118271,
"eval_pr_auc": 0.7108638111158798,
"eval_precision": 0.7689015691868759,
"eval_recall": 0.48811410459587956,
"eval_runtime": 25.0196,
"eval_samples_per_second": 195.567,
"eval_steps_per_second": 0.799,
"step": 360
},
{
"epoch": 0.47643979057591623,
"grad_norm": 12.530599594116211,
"learning_rate": 1.9779709538818052e-05,
"loss": 0.7715,
"step": 364
},
{
"epoch": 0.4816753926701571,
"grad_norm": 6.7939605712890625,
"learning_rate": 1.9772573362308992e-05,
"loss": 0.7522,
"step": 368
},
{
"epoch": 0.4869109947643979,
"grad_norm": 3.4304537773132324,
"learning_rate": 1.9765324763665516e-05,
"loss": 0.7511,
"step": 372
},
{
"epoch": 0.49214659685863876,
"grad_norm": 6.636844158172607,
"learning_rate": 1.9757963826274357e-05,
"loss": 0.7121,
"step": 376
},
{
"epoch": 0.4973821989528796,
"grad_norm": 4.51839017868042,
"learning_rate": 1.975049063481457e-05,
"loss": 0.7231,
"step": 380
},
{
"epoch": 0.5026178010471204,
"grad_norm": 9.865214347839355,
"learning_rate": 1.974290527525657e-05,
"loss": 0.762,
"step": 384
},
{
"epoch": 0.5078534031413613,
"grad_norm": 3.440359592437744,
"learning_rate": 1.9735207834861117e-05,
"loss": 0.7169,
"step": 388
},
{
"epoch": 0.5130890052356021,
"grad_norm": 3.5312769412994385,
"learning_rate": 1.972739840217836e-05,
"loss": 0.73,
"step": 392
},
{
"epoch": 0.518324607329843,
"grad_norm": 4.723533630371094,
"learning_rate": 1.9719477067046768e-05,
"loss": 0.6783,
"step": 396
},
{
"epoch": 0.5235602094240838,
"grad_norm": 3.5356740951538086,
"learning_rate": 1.971144392059212e-05,
"loss": 0.7155,
"step": 400
},
{
"epoch": 0.5235602094240838,
"eval_F1_err_corr": 0.893120798984817,
"eval_accuracy": 0.902954369682908,
"eval_correct_accuracy": 0.9461320280124133,
"eval_error_accuracy": 0.8457347701138861,
"eval_f1": 0.639051892762628,
"eval_loss": 0.24243153631687164,
"eval_pr_auc": 0.7029855391245526,
"eval_precision": 0.6497426298549368,
"eval_recall": 0.6287072673760471,
"eval_runtime": 24.8233,
"eval_samples_per_second": 197.113,
"eval_steps_per_second": 0.806,
"step": 400
},
{
"epoch": 0.5287958115183246,
"grad_norm": 13.087606430053711,
"learning_rate": 1.970329905522647e-05,
"loss": 0.7007,
"step": 404
},
{
"epoch": 0.5340314136125655,
"grad_norm": 14.260698318481445,
"learning_rate": 1.9695042564647045e-05,
"loss": 0.6817,
"step": 408
},
{
"epoch": 0.5392670157068062,
"grad_norm": 9.661425590515137,
"learning_rate": 1.9686674543835208e-05,
"loss": 0.7358,
"step": 412
},
{
"epoch": 0.5445026178010471,
"grad_norm": 5.698840618133545,
"learning_rate": 1.9678195089055347e-05,
"loss": 0.6646,
"step": 416
},
{
"epoch": 0.5497382198952879,
"grad_norm": 5.9759907722473145,
"learning_rate": 1.9669604297853766e-05,
"loss": 0.73,
"step": 420
},
{
"epoch": 0.5549738219895288,
"grad_norm": 4.276744842529297,
"learning_rate": 1.9660902269057558e-05,
"loss": 0.712,
"step": 424
},
{
"epoch": 0.5602094240837696,
"grad_norm": 4.572305679321289,
"learning_rate": 1.9652089102773487e-05,
"loss": 0.7033,
"step": 428
},
{
"epoch": 0.5654450261780105,
"grad_norm": 3.9941539764404297,
"learning_rate": 1.9643164900386824e-05,
"loss": 0.6695,
"step": 432
},
{
"epoch": 0.5706806282722513,
"grad_norm": 4.321977138519287,
"learning_rate": 1.963412976456017e-05,
"loss": 0.709,
"step": 436
},
{
"epoch": 0.5759162303664922,
"grad_norm": 4.374669551849365,
"learning_rate": 1.96249837992323e-05,
"loss": 0.6815,
"step": 440
},
{
"epoch": 0.5759162303664922,
"eval_F1_err_corr": 0.8937597915811933,
"eval_accuracy": 0.9036968290796598,
"eval_correct_accuracy": 0.9500814005540427,
"eval_error_accuracy": 0.8437420660571459,
"eval_f1": 0.6368832380730199,
"eval_loss": 0.24286404252052307,
"eval_pr_auc": 0.7035206327309997,
"eval_precision": 0.6568816169393648,
"eval_recall": 0.618066561014263,
"eval_runtime": 24.8231,
"eval_samples_per_second": 197.115,
"eval_steps_per_second": 0.806,
"step": 440
},
{
"epoch": 0.581151832460733,
"grad_norm": 3.3900415897369385,
"learning_rate": 1.961572710961695e-05,
"loss": 0.6042,
"step": 444
},
{
"epoch": 0.5863874345549738,
"grad_norm": 3.9020636081695557,
"learning_rate": 1.9606359802201608e-05,
"loss": 0.6541,
"step": 448
},
{
"epoch": 0.5916230366492147,
"grad_norm": 3.2324304580688477,
"learning_rate": 1.9596881984746288e-05,
"loss": 0.664,
"step": 452
},
{
"epoch": 0.5968586387434555,
"grad_norm": 3.6972060203552246,
"learning_rate": 1.958729376628231e-05,
"loss": 0.6325,
"step": 456
},
{
"epoch": 0.6020942408376964,
"grad_norm": 4.679067134857178,
"learning_rate": 1.957759525711101e-05,
"loss": 0.6851,
"step": 460
},
{
"epoch": 0.6073298429319371,
"grad_norm": 6.575286865234375,
"learning_rate": 1.9567786568802503e-05,
"loss": 0.6266,
"step": 464
},
{
"epoch": 0.612565445026178,
"grad_norm": 6.148586273193359,
"learning_rate": 1.9557867814194385e-05,
"loss": 0.6887,
"step": 468
},
{
"epoch": 0.6178010471204188,
"grad_norm": 3.9649710655212402,
"learning_rate": 1.9547839107390435e-05,
"loss": 0.6448,
"step": 472
},
{
"epoch": 0.6230366492146597,
"grad_norm": 3.5095326900482178,
"learning_rate": 1.9537700563759303e-05,
"loss": 0.6793,
"step": 476
},
{
"epoch": 0.6282722513089005,
"grad_norm": 5.709955215454102,
"learning_rate": 1.9527452299933192e-05,
"loss": 0.6321,
"step": 480
},
{
"epoch": 0.6282722513089005,
"eval_F1_err_corr": 0.8922176723044,
"eval_accuracy": 0.8975096674400619,
"eval_correct_accuracy": 0.9449689114373253,
"eval_error_accuracy": 0.8450445368681248,
"eval_f1": 0.6403994355801584,
"eval_loss": 0.25328728556632996,
"eval_pr_auc": 0.6997538853349474,
"eval_precision": 0.6150959132610508,
"eval_recall": 0.6678741227077202,
"eval_runtime": 24.8167,
"eval_samples_per_second": 197.166,
"eval_steps_per_second": 0.806,
"step": 480
},
{
"epoch": 0.6335078534031413,
"grad_norm": 3.6896157264709473,
"learning_rate": 1.95170944338065e-05,
"loss": 0.6806,
"step": 484
},
{
"epoch": 0.6387434554973822,
"grad_norm": 4.03073263168335,
"learning_rate": 1.9506627084534486e-05,
"loss": 0.6133,
"step": 488
},
{
"epoch": 0.643979057591623,
"grad_norm": 6.4314751625061035,
"learning_rate": 1.9496050372531864e-05,
"loss": 0.6098,
"step": 492
},
{
"epoch": 0.6492146596858639,
"grad_norm": 3.8455100059509277,
"learning_rate": 1.9485364419471454e-05,
"loss": 0.6306,
"step": 496
},
{
"epoch": 0.6544502617801047,
"grad_norm": 3.8784000873565674,
"learning_rate": 1.9474569348282774e-05,
"loss": 0.6104,
"step": 500
},
{
"epoch": 0.6596858638743456,
"grad_norm": 5.018595218658447,
"learning_rate": 1.9463665283150604e-05,
"loss": 0.6592,
"step": 504
},
{
"epoch": 0.6649214659685864,
"grad_norm": 3.5282726287841797,
"learning_rate": 1.9452652349513587e-05,
"loss": 0.621,
"step": 508
},
{
"epoch": 0.6701570680628273,
"grad_norm": 3.4036905765533447,
"learning_rate": 1.9441530674062754e-05,
"loss": 0.6744,
"step": 512
},
{
"epoch": 0.675392670157068,
"grad_norm": 4.95082950592041,
"learning_rate": 1.9430300384740108e-05,
"loss": 0.5925,
"step": 516
},
{
"epoch": 0.680628272251309,
"grad_norm": 5.078342437744141,
"learning_rate": 1.941896161073711e-05,
"loss": 0.5913,
"step": 520
},
{
"epoch": 0.680628272251309,
"eval_F1_err_corr": 0.885156181305656,
"eval_accuracy": 0.8942304717710751,
"eval_correct_accuracy": 0.9306883336673133,
"eval_error_accuracy": 0.8438713827505521,
"eval_f1": 0.6393079438759363,
"eval_loss": 0.27150195837020874,
"eval_pr_auc": 0.6992222071782436,
"eval_precision": 0.5985776372975109,
"eval_recall": 0.6859859633235228,
"eval_runtime": 24.819,
"eval_samples_per_second": 197.147,
"eval_steps_per_second": 0.806,
"step": 520
},
{
"epoch": 0.6858638743455497,
"grad_norm": 5.81033182144165,
"learning_rate": 1.9407514482493214e-05,
"loss": 0.6133,
"step": 524
},
{
"epoch": 0.6910994764397905,
"grad_norm": 4.901327133178711,
"learning_rate": 1.939595913169438e-05,
"loss": 0.6121,
"step": 528
},
{
"epoch": 0.6963350785340314,
"grad_norm": 3.7869937419891357,
"learning_rate": 1.9384295691271523e-05,
"loss": 0.5822,
"step": 532
},
{
"epoch": 0.7015706806282722,
"grad_norm": 3.8648629188537598,
"learning_rate": 1.9372524295399014e-05,
"loss": 0.6032,
"step": 536
},
{
"epoch": 0.7068062827225131,
"grad_norm": 3.9610342979431152,
"learning_rate": 1.9360645079493126e-05,
"loss": 0.59,
"step": 540
},
{
"epoch": 0.7120418848167539,
"grad_norm": 5.623746395111084,
"learning_rate": 1.9348658180210473e-05,
"loss": 0.5835,
"step": 544
},
{
"epoch": 0.7172774869109948,
"grad_norm": 6.02370548248291,
"learning_rate": 1.933656373544645e-05,
"loss": 0.6003,
"step": 548
},
{
"epoch": 0.7225130890052356,
"grad_norm": 5.652750492095947,
"learning_rate": 1.932436188433362e-05,
"loss": 0.5958,
"step": 552
},
{
"epoch": 0.7277486910994765,
"grad_norm": 7.355208396911621,
"learning_rate": 1.9312052767240153e-05,
"loss": 0.5677,
"step": 556
},
{
"epoch": 0.7329842931937173,
"grad_norm": 4.652146339416504,
"learning_rate": 1.9299636525768176e-05,
"loss": 0.5649,
"step": 560
},
{
"epoch": 0.7329842931937173,
"eval_F1_err_corr": 0.8974946334360716,
"eval_accuracy": 0.9049033255993812,
"eval_correct_accuracy": 0.9592731998252757,
"eval_error_accuracy": 0.843191870706177,
"eval_f1": 0.6410555815039701,
"eval_loss": 0.24959486722946167,
"eval_pr_auc": 0.6979561382710899,
"eval_precision": 0.6619242826139378,
"eval_recall": 0.621462531129726,
"eval_runtime": 24.817,
"eval_samples_per_second": 197.163,
"eval_steps_per_second": 0.806,
"step": 560
},
{
"epoch": 0.7382198952879581,
"grad_norm": 5.073575019836426,
"learning_rate": 1.9287113302752167e-05,
"loss": 0.5491,
"step": 564
},
{
"epoch": 0.743455497382199,
"grad_norm": 4.796985149383545,
"learning_rate": 1.927448324225729e-05,
"loss": 0.5849,
"step": 568
},
{
"epoch": 0.7486910994764397,
"grad_norm": 6.055835247039795,
"learning_rate": 1.9261746489577767e-05,
"loss": 0.5721,
"step": 572
},
{
"epoch": 0.7539267015706806,
"grad_norm": 7.7210893630981445,
"learning_rate": 1.9248903191235177e-05,
"loss": 0.5749,
"step": 576
},
{
"epoch": 0.7591623036649214,
"grad_norm": 3.5172553062438965,
"learning_rate": 1.9235953494976786e-05,
"loss": 0.6009,
"step": 580
},
{
"epoch": 0.7643979057591623,
"grad_norm": 5.326947212219238,
"learning_rate": 1.922289754977385e-05,
"loss": 0.5896,
"step": 584
},
{
"epoch": 0.7696335078534031,
"grad_norm": 3.990248203277588,
"learning_rate": 1.920973550581989e-05,
"loss": 0.578,
"step": 588
},
{
"epoch": 0.774869109947644,
"grad_norm": 3.6598334312438965,
"learning_rate": 1.9196467514528973e-05,
"loss": 0.567,
"step": 592
},
{
"epoch": 0.7801047120418848,
"grad_norm": 5.096114635467529,
"learning_rate": 1.9183093728533966e-05,
"loss": 0.5847,
"step": 596
},
{
"epoch": 0.7853403141361257,
"grad_norm": 5.4809889793396,
"learning_rate": 1.9169614301684786e-05,
"loss": 0.5934,
"step": 600
},
{
"epoch": 0.7853403141361257,
"eval_F1_err_corr": 0.8959803504098618,
"eval_accuracy": 0.9018097447795823,
"eval_correct_accuracy": 0.9504131731842577,
"eval_error_accuracy": 0.8474448138009186,
"eval_f1": 0.6463115667483842,
"eval_loss": 0.2541360855102539,
"eval_pr_auc": 0.7031337927296945,
"eval_precision": 0.6363835856923414,
"eval_recall": 0.6565542223228436,
"eval_runtime": 24.8027,
"eval_samples_per_second": 197.277,
"eval_steps_per_second": 0.806,
"step": 600
},
{
"epoch": 0.7905759162303665,
"grad_norm": 3.492452621459961,
"learning_rate": 1.915602938904662e-05,
"loss": 0.5974,
"step": 604
},
{
"epoch": 0.7958115183246073,
"grad_norm": 4.485317707061768,
"learning_rate": 1.914233914689815e-05,
"loss": 0.5269,
"step": 608
},
{
"epoch": 0.8010471204188482,
"grad_norm": 4.36208438873291,
"learning_rate": 1.912854373272975e-05,
"loss": 0.5794,
"step": 612
},
{
"epoch": 0.806282722513089,
"grad_norm": 4.126212120056152,
"learning_rate": 1.9114643305241678e-05,
"loss": 0.5454,
"step": 616
},
{
"epoch": 0.8115183246073299,
"grad_norm": 3.9140942096710205,
"learning_rate": 1.9100638024342245e-05,
"loss": 0.5615,
"step": 620
},
{
"epoch": 0.8167539267015707,
"grad_norm": 9.218249320983887,
"learning_rate": 1.908652805114598e-05,
"loss": 0.564,
"step": 624
},
{
"epoch": 0.8219895287958116,
"grad_norm": 4.118100166320801,
"learning_rate": 1.907231354797179e-05,
"loss": 0.5406,
"step": 628
},
{
"epoch": 0.8272251308900523,
"grad_norm": 3.917045831680298,
"learning_rate": 1.9057994678341053e-05,
"loss": 0.5581,
"step": 632
},
{
"epoch": 0.8324607329842932,
"grad_norm": 4.272670745849609,
"learning_rate": 1.9043571606975776e-05,
"loss": 0.5761,
"step": 636
},
{
"epoch": 0.837696335078534,
"grad_norm": 4.809320449829102,
"learning_rate": 1.902904449979669e-05,
"loss": 0.5422,
"step": 640
},
{
"epoch": 0.837696335078534,
"eval_F1_err_corr": 0.899383774542208,
"eval_accuracy": 0.905769528228925,
"eval_correct_accuracy": 0.9610494803595725,
"eval_error_accuracy": 0.8451544680769811,
"eval_f1": 0.6363419293218721,
"eval_loss": 0.2484092116355896,
"eval_pr_auc": 0.6976824941932482,
"eval_precision": 0.673149785299318,
"eval_recall": 0.6033506905139234,
"eval_runtime": 24.8065,
"eval_samples_per_second": 197.247,
"eval_steps_per_second": 0.806,
"step": 640
},
{
"epoch": 0.8429319371727748,
"grad_norm": 5.909646511077881,
"learning_rate": 1.901441352392133e-05,
"loss": 0.5825,
"step": 644
},
{
"epoch": 0.8481675392670157,
"grad_norm": 4.255792140960693,
"learning_rate": 1.8999678847662124e-05,
"loss": 0.5576,
"step": 648
},
{
"epoch": 0.8534031413612565,
"grad_norm": 6.5200114250183105,
"learning_rate": 1.8984840640524445e-05,
"loss": 0.5296,
"step": 652
},
{
"epoch": 0.8586387434554974,
"grad_norm": 8.32865047454834,
"learning_rate": 1.8969899073204687e-05,
"loss": 0.5655,
"step": 656
},
{
"epoch": 0.8638743455497382,
"grad_norm": 9.28367805480957,
"learning_rate": 1.8954854317588262e-05,
"loss": 0.5791,
"step": 660
},
{
"epoch": 0.8691099476439791,
"grad_norm": 4.166441917419434,
"learning_rate": 1.8939706546747656e-05,
"loss": 0.5214,
"step": 664
},
{
"epoch": 0.8743455497382199,
"grad_norm": 3.7278671264648438,
"learning_rate": 1.8924455934940424e-05,
"loss": 0.5087,
"step": 668
},
{
"epoch": 0.8795811518324608,
"grad_norm": 6.253541469573975,
"learning_rate": 1.8909102657607182e-05,
"loss": 0.5476,
"step": 672
},
{
"epoch": 0.8848167539267016,
"grad_norm": 9.273209571838379,
"learning_rate": 1.88936468913696e-05,
"loss": 0.4928,
"step": 676
},
{
"epoch": 0.8900523560209425,
"grad_norm": 5.4465532302856445,
"learning_rate": 1.8878088814028365e-05,
"loss": 0.4909,
"step": 680
},
{
"epoch": 0.8900523560209425,
"eval_F1_err_corr": 0.8973571707111299,
"eval_accuracy": 0.9004485692188708,
"eval_correct_accuracy": 0.9515640305646176,
"eval_error_accuracy": 0.8489933585798806,
"eval_f1": 0.6449691085613416,
"eval_loss": 0.25420647859573364,
"eval_pr_auc": 0.7006737583541583,
"eval_precision": 0.6290079621261029,
"eval_recall": 0.6617613764998868,
"eval_runtime": 24.8354,
"eval_samples_per_second": 197.017,
"eval_steps_per_second": 0.805,
"step": 680
},
{
"epoch": 0.8952879581151832,
"grad_norm": 3.929280996322632,
"learning_rate": 1.886242860456113e-05,
"loss": 0.518,
"step": 684
},
{
"epoch": 0.900523560209424,
"grad_norm": 3.3221724033355713,
"learning_rate": 1.884666644312046e-05,
"loss": 0.474,
"step": 688
},
{
"epoch": 0.9057591623036649,
"grad_norm": 4.1775126457214355,
"learning_rate": 1.8830802511031763e-05,
"loss": 0.513,
"step": 692
},
{
"epoch": 0.9109947643979057,
"grad_norm": 4.372125148773193,
"learning_rate": 1.88148369907912e-05,
"loss": 0.4958,
"step": 696
},
{
"epoch": 0.9162303664921466,
"grad_norm": 4.19729471206665,
"learning_rate": 1.8798770066063577e-05,
"loss": 0.5178,
"step": 700
},
{
"epoch": 0.9214659685863874,
"grad_norm": 4.332755088806152,
"learning_rate": 1.8782601921680258e-05,
"loss": 0.525,
"step": 704
},
{
"epoch": 0.9267015706806283,
"grad_norm": 4.065849304199219,
"learning_rate": 1.8766332743637002e-05,
"loss": 0.4692,
"step": 708
},
{
"epoch": 0.9319371727748691,
"grad_norm": 4.974046230316162,
"learning_rate": 1.8749962719091864e-05,
"loss": 0.4973,
"step": 712
},
{
"epoch": 0.93717277486911,
"grad_norm": 4.961699962615967,
"learning_rate": 1.8733492036363007e-05,
"loss": 0.5204,
"step": 716
},
{
"epoch": 0.9424083769633508,
"grad_norm": 4.140364646911621,
"learning_rate": 1.871692088492655e-05,
"loss": 0.4905,
"step": 720
},
{
"epoch": 0.9424083769633508,
"eval_F1_err_corr": 0.8932916712717729,
"eval_accuracy": 0.8947254447022428,
"eval_correct_accuracy": 0.9452793616476387,
"eval_error_accuracy": 0.8467242340670772,
"eval_f1": 0.6396272371068517,
"eval_loss": 0.2594238817691803,
"eval_pr_auc": 0.7027911559368634,
"eval_precision": 0.6008754476721051,
"eval_recall": 0.6837219832465474,
"eval_runtime": 24.8417,
"eval_samples_per_second": 196.967,
"eval_steps_per_second": 0.805,
"step": 720
},
{
"epoch": 0.9476439790575916,
"grad_norm": 8.625274658203125,
"learning_rate": 1.8700249455414394e-05,
"loss": 0.4686,
"step": 724
},
{
"epoch": 0.9528795811518325,
"grad_norm": 6.383296966552734,
"learning_rate": 1.8683477939612024e-05,
"loss": 0.4764,
"step": 728
},
{
"epoch": 0.9581151832460733,
"grad_norm": 7.345070838928223,
"learning_rate": 1.866660653045629e-05,
"loss": 0.4823,
"step": 732
},
{
"epoch": 0.9633507853403142,
"grad_norm": 4.40362548828125,
"learning_rate": 1.8649635422033218e-05,
"loss": 0.49,
"step": 736
},
{
"epoch": 0.9685863874345549,
"grad_norm": 3.8177592754364014,
"learning_rate": 1.863256480957574e-05,
"loss": 0.5004,
"step": 740
},
{
"epoch": 0.9738219895287958,
"grad_norm": 3.5552761554718018,
"learning_rate": 1.861539488946148e-05,
"loss": 0.4967,
"step": 744
},
{
"epoch": 0.9790575916230366,
"grad_norm": 3.948543071746826,
"learning_rate": 1.8598125859210475e-05,
"loss": 0.5106,
"step": 748
},
{
"epoch": 0.9842931937172775,
"grad_norm": 4.415132999420166,
"learning_rate": 1.858075791748291e-05,
"loss": 0.4919,
"step": 752
},
{
"epoch": 0.9895287958115183,
"grad_norm": 4.514105319976807,
"learning_rate": 1.8563291264076834e-05,
"loss": 0.4947,
"step": 756
},
{
"epoch": 0.9947643979057592,
"grad_norm": 6.685056209564209,
"learning_rate": 1.854572609992586e-05,
"loss": 0.4892,
"step": 760
},
{
"epoch": 0.9947643979057592,
"eval_F1_err_corr": 0.9005018183708923,
"eval_accuracy": 0.9076256767208043,
"eval_correct_accuracy": 0.9694615035570632,
"eval_error_accuracy": 0.8407011107412775,
"eval_f1": 0.6246857717445953,
"eval_loss": 0.24942660331726074,
"eval_pr_auc": 0.6972885689682531,
"eval_precision": 0.7021757558632382,
"eval_recall": 0.5625990491283677,
"eval_runtime": 24.7945,
"eval_samples_per_second": 197.342,
"eval_steps_per_second": 0.807,
"step": 760
}
],
"logging_steps": 4,
"max_steps": 3820,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 16,
"stateful_callbacks": {
"MinEpochEarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.37033143972266e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}