{ "best_global_step": null, "best_metric": 0.9005018183708923, "best_model_checkpoint": null, "epoch": 0.9947643979057592, "eval_steps": 16, "global_step": 760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005235602094240838, "grad_norm": 433.5572204589844, "learning_rate": 5.217391304347826e-07, "loss": 5.5489, "step": 4 }, { "epoch": 0.005235602094240838, "eval_F1_err_corr": 0.2899344909681993, "eval_accuracy": 0.33964423820572315, "eval_correct_accuracy": 0.21863927522501062, "eval_error_accuracy": 0.4302247894550517, "eval_f1": 0.2573237770510055, "eval_loss": 1.461071491241455, "eval_pr_auc": 0.16429768646454848, "eval_precision": 0.15202466598150052, "eval_recall": 0.8372198324654743, "eval_runtime": 24.9385, "eval_samples_per_second": 196.202, "eval_steps_per_second": 0.802, "step": 4 }, { "epoch": 0.010471204188481676, "grad_norm": 424.1092834472656, "learning_rate": 1.2173913043478262e-06, "loss": 5.392, "step": 8 }, { "epoch": 0.010471204188481676, "eval_F1_err_corr": 0.321098900363111, "eval_accuracy": 0.3666511987625677, "eval_correct_accuracy": 0.2498037913412515, "eval_error_accuracy": 0.4493435370426137, "eval_f1": 0.25948565848012445, "eval_loss": 1.3678739070892334, "eval_pr_auc": 0.16391947366452397, "eval_precision": 0.15441239776151527, "eval_recall": 0.8120896536110482, "eval_runtime": 24.8266, "eval_samples_per_second": 197.087, "eval_steps_per_second": 0.806, "step": 8 }, { "epoch": 0.015706806282722512, "grad_norm": 328.165771484375, "learning_rate": 1.9130434782608697e-06, "loss": 4.6674, "step": 12 }, { "epoch": 0.015706806282722512, "eval_F1_err_corr": 0.42193034247158306, "eval_accuracy": 0.4537664346481052, "eval_correct_accuracy": 0.36420961710522887, "eval_error_accuracy": 0.5013920328557274, "eval_f1": 0.25876327610091937, "eval_loss": 1.08595609664917, "eval_pr_auc": 0.16343108665970787, "eval_precision": 0.15883323026180168, "eval_recall": 0.6977586597237945, "eval_runtime": 24.7964, "eval_samples_per_second": 197.327, "eval_steps_per_second": 0.807, "step": 12 }, { "epoch": 0.020942408376963352, "grad_norm": 121.70630645751953, "learning_rate": 2.6086956521739132e-06, "loss": 3.0944, "step": 16 }, { "epoch": 0.020942408376963352, "eval_F1_err_corr": 0.721921189250657, "eval_accuracy": 0.7358391337973704, "eval_correct_accuracy": 0.7870115524045516, "eval_error_accuracy": 0.6667750717278245, "eval_f1": 0.1906928253246138, "eval_loss": 0.5721015334129333, "eval_pr_auc": 0.16146374192556026, "eval_precision": 0.16400391261819366, "eval_recall": 0.22775639574371745, "eval_runtime": 24.8528, "eval_samples_per_second": 196.879, "eval_steps_per_second": 0.805, "step": 16 }, { "epoch": 0.02617801047120419, "grad_norm": 18.458759307861328, "learning_rate": 3.3043478260869567e-06, "loss": 2.002, "step": 20 }, { "epoch": 0.02617801047120419, "eval_F1_err_corr": 0.8402340555989792, "eval_accuracy": 0.8622119102861562, "eval_correct_accuracy": 0.9922276019965096, "eval_error_accuracy": 0.728620881787505, "eval_f1": 0.0035794183445190158, "eval_loss": 0.7684443593025208, "eval_pr_auc": 0.17251592682571912, "eval_precision": 0.1509433962264151, "eval_recall": 0.001811184061580258, "eval_runtime": 24.8601, "eval_samples_per_second": 196.822, "eval_steps_per_second": 0.805, "step": 20 }, { "epoch": 0.031413612565445025, "grad_norm": 115.0860595703125, "learning_rate": 4.000000000000001e-06, "loss": 3.383, "step": 24 }, { "epoch": 0.031413612565445025, "eval_F1_err_corr": 0.8378682847250856, "eval_accuracy": 0.8620881670533642, "eval_correct_accuracy": 0.9842169035446346, "eval_error_accuracy": 0.7294085238385144, "eval_f1": 0.008451957295373666, "eval_loss": 0.8194268941879272, "eval_pr_auc": 0.18009572056722223, "eval_precision": 0.24050632911392406, "eval_recall": 0.004301562146253113, "eval_runtime": 24.8294, "eval_samples_per_second": 197.065, "eval_steps_per_second": 0.805, "step": 24 }, { "epoch": 0.03664921465968586, "grad_norm": 99.96025848388672, "learning_rate": 4.695652173913044e-06, "loss": 3.1857, "step": 28 }, { "epoch": 0.03664921465968586, "eval_F1_err_corr": 0.8207683484774771, "eval_accuracy": 0.8591492652745553, "eval_correct_accuracy": 0.9328157189465495, "eval_error_accuracy": 0.7327520943454843, "eval_f1": 0.060268317853457175, "eval_loss": 0.6408756971359253, "eval_pr_auc": 0.19809419361676622, "eval_precision": 0.3411214953271028, "eval_recall": 0.03305410912383971, "eval_runtime": 24.8048, "eval_samples_per_second": 197.261, "eval_steps_per_second": 0.806, "step": 28 }, { "epoch": 0.041884816753926704, "grad_norm": 48.16204071044922, "learning_rate": 5.391304347826088e-06, "loss": 2.1498, "step": 32 }, { "epoch": 0.041884816753926704, "eval_F1_err_corr": 0.7694538304703895, "eval_accuracy": 0.837370456303171, "eval_correct_accuracy": 0.8088768689020601, "eval_error_accuracy": 0.7336950054749075, "eval_f1": 0.21148942552872357, "eval_loss": 0.44809016585350037, "eval_pr_auc": 0.2318064013915099, "eval_precision": 0.31333333333333335, "eval_recall": 0.15961059542676023, "eval_runtime": 24.8313, "eval_samples_per_second": 197.05, "eval_steps_per_second": 0.805, "step": 32 }, { "epoch": 0.04712041884816754, "grad_norm": 169.05667114257812, "learning_rate": 6.086956521739132e-06, "loss": 1.9176, "step": 36 }, { "epoch": 0.04712041884816754, "eval_F1_err_corr": 0.6219027098098009, "eval_accuracy": 0.7121732405259087, "eval_correct_accuracy": 0.5674362971053744, "eval_error_accuracy": 0.687935454975843, "eval_f1": 0.33722752528850264, "eval_loss": 0.5731640458106995, "eval_pr_auc": 0.2788670530837179, "eval_precision": 0.2460243217960711, "eval_recall": 0.5358840842200588, "eval_runtime": 24.8463, "eval_samples_per_second": 196.931, "eval_steps_per_second": 0.805, "step": 36 }, { "epoch": 0.05235602094240838, "grad_norm": 114.60936737060547, "learning_rate": 6.782608695652174e-06, "loss": 2.0171, "step": 40 }, { "epoch": 0.05235602094240838, "eval_F1_err_corr": 0.7430224807525013, "eval_accuracy": 0.7990719257540603, "eval_correct_accuracy": 0.7547544657206874, "eval_error_accuracy": 0.7316496396965659, "eval_f1": 0.35211970074812965, "eval_loss": 0.4375106692314148, "eval_pr_auc": 0.3012571706540082, "eval_precision": 0.3147289586305278, "eval_recall": 0.39959248358614446, "eval_runtime": 24.8039, "eval_samples_per_second": 197.268, "eval_steps_per_second": 0.806, "step": 40 }, { "epoch": 0.05759162303664921, "grad_norm": 32.457340240478516, "learning_rate": 7.478260869565218e-06, "loss": 1.611, "step": 44 }, { "epoch": 0.05759162303664921, "eval_F1_err_corr": 0.8419241583637731, "eval_accuracy": 0.862830626450116, "eval_correct_accuracy": 0.9871360561186291, "eval_error_accuracy": 0.7339561045659535, "eval_f1": 0.09914668833807395, "eval_loss": 0.38381654024124146, "eval_pr_auc": 0.3152015073109494, "eval_precision": 0.48316831683168315, "eval_recall": 0.05524111387819787, "eval_runtime": 24.832, "eval_samples_per_second": 197.044, "eval_steps_per_second": 0.805, "step": 44 }, { "epoch": 0.06282722513089005, "grad_norm": 22.770957946777344, "learning_rate": 8.173913043478263e-06, "loss": 1.5516, "step": 48 }, { "epoch": 0.06282722513089005, "eval_F1_err_corr": 0.8403483674599067, "eval_accuracy": 0.859891724671307, "eval_correct_accuracy": 0.9726090387657613, "eval_error_accuracy": 0.7397527603961362, "eval_f1": 0.19484444444444443, "eval_loss": 0.355484277009964, "eval_pr_auc": 0.3399032342610957, "eval_precision": 0.45364238410596025, "eval_recall": 0.12406610821824768, "eval_runtime": 24.7998, "eval_samples_per_second": 197.3, "eval_steps_per_second": 0.806, "step": 48 }, { "epoch": 0.06806282722513089, "grad_norm": 69.65153503417969, "learning_rate": 8.869565217391306e-06, "loss": 1.4648, "step": 52 }, { "epoch": 0.06806282722513089, "eval_F1_err_corr": 0.8036896045473728, "eval_accuracy": 0.8478267594740913, "eval_correct_accuracy": 0.8494371774369468, "eval_error_accuracy": 0.762617804842102, "eval_f1": 0.38658186806334954, "eval_loss": 0.36472800374031067, "eval_pr_auc": 0.38944473786040484, "eval_precision": 0.4303164908384231, "eval_recall": 0.350916911931175, "eval_runtime": 24.8345, "eval_samples_per_second": 197.025, "eval_steps_per_second": 0.805, "step": 52 }, { "epoch": 0.07329842931937172, "grad_norm": 9.83507251739502, "learning_rate": 9.565217391304349e-06, "loss": 1.3967, "step": 56 }, { "epoch": 0.07329842931937172, "eval_F1_err_corr": 0.8296970467203022, "eval_accuracy": 0.8688321732405259, "eval_correct_accuracy": 0.9181484628791651, "eval_error_accuracy": 0.7567903433781845, "eval_f1": 0.29262595929262597, "eval_loss": 0.33204466104507446, "eval_pr_auc": 0.41163972835541246, "eval_precision": 0.5561192136968929, "eval_recall": 0.19855105275073578, "eval_runtime": 24.8293, "eval_samples_per_second": 197.066, "eval_steps_per_second": 0.805, "step": 56 }, { "epoch": 0.07853403141361257, "grad_norm": 16.36752700805664, "learning_rate": 1.0260869565217393e-05, "loss": 1.2944, "step": 60 }, { "epoch": 0.07853403141361257, "eval_F1_err_corr": 0.8421678884358804, "eval_accuracy": 0.8736581593194123, "eval_correct_accuracy": 0.9276144097142335, "eval_error_accuracy": 0.7711353234659966, "eval_f1": 0.38438347904733194, "eval_loss": 0.31788310408592224, "eval_pr_auc": 0.4608372769142751, "eval_precision": 0.5751014884979703, "eval_recall": 0.28865745981435365, "eval_runtime": 24.8595, "eval_samples_per_second": 196.826, "eval_steps_per_second": 0.805, "step": 60 }, { "epoch": 0.08376963350785341, "grad_norm": 17.503982543945312, "learning_rate": 1.0956521739130435e-05, "loss": 1.284, "step": 64 }, { "epoch": 0.08376963350785341, "eval_F1_err_corr": 0.8579401681935629, "eval_accuracy": 0.8769682907965971, "eval_correct_accuracy": 0.9478163203454459, "eval_error_accuracy": 0.7836326415058088, "eval_f1": 0.4385147536354652, "eval_loss": 0.30793023109436035, "eval_pr_auc": 0.49259983395831536, "eval_precision": 0.5825206301575394, "eval_recall": 0.3515961059542676, "eval_runtime": 24.8423, "eval_samples_per_second": 196.962, "eval_steps_per_second": 0.805, "step": 64 }, { "epoch": 0.08900523560209424, "grad_norm": 18.571523666381836, "learning_rate": 1.1652173913043478e-05, "loss": 1.191, "step": 68 }, { "epoch": 0.08900523560209424, "eval_F1_err_corr": 0.8618882109239977, "eval_accuracy": 0.8807424593967518, "eval_correct_accuracy": 0.9652649658606812, "eval_error_accuracy": 0.7785120852969489, "eval_f1": 0.38741458763705705, "eval_loss": 0.30222997069358826, "eval_pr_auc": 0.5105597340335465, "eval_precision": 0.6497867803837953, "eval_recall": 0.2759791713832918, "eval_runtime": 24.8386, "eval_samples_per_second": 196.992, "eval_steps_per_second": 0.805, "step": 68 }, { "epoch": 0.09424083769633508, "grad_norm": 19.51681900024414, "learning_rate": 1.2347826086956523e-05, "loss": 1.1905, "step": 72 }, { "epoch": 0.09424083769633508, "eval_F1_err_corr": 0.8556927040319808, "eval_accuracy": 0.8829079659706109, "eval_correct_accuracy": 0.9252440207528985, "eval_error_accuracy": 0.7958667759923117, "eval_f1": 0.46682631356529086, "eval_loss": 0.2958272099494934, "eval_pr_auc": 0.5272941790977752, "eval_precision": 0.6178225205070843, "eval_recall": 0.37514149875481095, "eval_runtime": 24.8293, "eval_samples_per_second": 197.065, "eval_steps_per_second": 0.805, "step": 72 }, { "epoch": 0.09947643979057591, "grad_norm": 21.868053436279297, "learning_rate": 1.3043478260869566e-05, "loss": 1.1759, "step": 76 }, { "epoch": 0.09947643979057591, "eval_F1_err_corr": 0.8671239387996902, "eval_accuracy": 0.8833720030935808, "eval_correct_accuracy": 0.965744942725071, "eval_error_accuracy": 0.7867787965661607, "eval_f1": 0.43932183224271265, "eval_loss": 0.2900922894477844, "eval_pr_auc": 0.5398410773230814, "eval_precision": 0.6402254009536195, "eval_recall": 0.33438985736925514, "eval_runtime": 24.826, "eval_samples_per_second": 197.092, "eval_steps_per_second": 0.806, "step": 76 }, { "epoch": 0.10471204188481675, "grad_norm": 11.858696937561035, "learning_rate": 1.373913043478261e-05, "loss": 1.1804, "step": 80 }, { "epoch": 0.10471204188481675, "eval_F1_err_corr": 0.8643274810534162, "eval_accuracy": 0.8850425367362722, "eval_correct_accuracy": 0.951882356363745, "eval_error_accuracy": 0.7915226184557567, "eval_f1": 0.4779432424838438, "eval_loss": 0.2871633768081665, "eval_pr_auc": 0.5513156434574297, "eval_precision": 0.6297667530544243, "eval_recall": 0.38510301109350237, "eval_runtime": 24.8425, "eval_samples_per_second": 196.96, "eval_steps_per_second": 0.805, "step": 80 }, { "epoch": 0.1099476439790576, "grad_norm": 8.166620254516602, "learning_rate": 1.4434782608695654e-05, "loss": 1.1212, "step": 84 }, { "epoch": 0.11518324607329843, "grad_norm": 4.340336799621582, "learning_rate": 1.5130434782608697e-05, "loss": 1.1325, "step": 88 }, { "epoch": 0.12041884816753927, "grad_norm": 27.051570892333984, "learning_rate": 1.582608695652174e-05, "loss": 1.1218, "step": 92 }, { "epoch": 0.1256544502617801, "grad_norm": 22.343820571899414, "learning_rate": 1.6521739130434785e-05, "loss": 1.1068, "step": 96 }, { "epoch": 0.13089005235602094, "grad_norm": 47.00363540649414, "learning_rate": 1.721739130434783e-05, "loss": 1.1034, "step": 100 }, { "epoch": 0.13612565445026178, "grad_norm": 40.41328048706055, "learning_rate": 1.791304347826087e-05, "loss": 1.1235, "step": 104 }, { "epoch": 0.14136125654450263, "grad_norm": 31.55730628967285, "learning_rate": 1.8608695652173912e-05, "loss": 1.0747, "step": 108 }, { "epoch": 0.14659685863874344, "grad_norm": 2.652536392211914, "learning_rate": 1.9304347826086957e-05, "loss": 0.9891, "step": 112 }, { "epoch": 0.1518324607329843, "grad_norm": 3.2267162799835205, "learning_rate": 2e-05, "loss": 0.9607, "step": 116 }, { "epoch": 0.15706806282722513, "grad_norm": 21.89421272277832, "learning_rate": 1.9999942480792804e-05, "loss": 1.0643, "step": 120 }, { "epoch": 0.15706806282722513, "eval_F1_err_corr": 0.8734264964691199, "eval_accuracy": 0.8929311678267595, "eval_correct_accuracy": 0.9278371473433551, "eval_error_accuracy": 0.8250438945941904, "eval_f1": 0.5815499939547818, "eval_loss": 0.2678382694721222, "eval_pr_auc": 0.6347920534332054, "eval_precision": 0.6240269849507005, "eval_recall": 0.544487208512565, "eval_runtime": 24.8153, "eval_samples_per_second": 197.177, "eval_steps_per_second": 0.806, "step": 120 }, { "epoch": 0.16230366492146597, "grad_norm": 8.081486701965332, "learning_rate": 1.999976992383291e-05, "loss": 1.0189, "step": 124 }, { "epoch": 0.16753926701570682, "grad_norm": 17.748775482177734, "learning_rate": 1.9999482331105377e-05, "loss": 0.9898, "step": 128 }, { "epoch": 0.17277486910994763, "grad_norm": 41.294334411621094, "learning_rate": 1.9999079705918636e-05, "loss": 1.0795, "step": 132 }, { "epoch": 0.17801047120418848, "grad_norm": 4.425788879394531, "learning_rate": 1.999856205290442e-05, "loss": 1.0274, "step": 136 }, { "epoch": 0.18324607329842932, "grad_norm": 26.085590362548828, "learning_rate": 1.9997929378017723e-05, "loss": 0.9516, "step": 140 }, { "epoch": 0.18848167539267016, "grad_norm": 18.811126708984375, "learning_rate": 1.9997181688536746e-05, "loss": 0.966, "step": 144 }, { "epoch": 0.193717277486911, "grad_norm": 22.464527130126953, "learning_rate": 1.999631899306278e-05, "loss": 0.8932, "step": 148 }, { "epoch": 0.19895287958115182, "grad_norm": 8.309951782226562, "learning_rate": 1.999534130152014e-05, "loss": 0.9756, "step": 152 }, { "epoch": 0.20418848167539266, "grad_norm": 4.516532897949219, "learning_rate": 1.999424862515604e-05, "loss": 0.998, "step": 156 }, { "epoch": 0.2094240837696335, "grad_norm": 10.015279769897461, "learning_rate": 1.999304097654045e-05, "loss": 0.9015, "step": 160 }, { "epoch": 0.2094240837696335, "eval_F1_err_corr": 0.885087159946509, "eval_accuracy": 0.9020572312451662, "eval_correct_accuracy": 0.95333342698488, "eval_error_accuracy": 0.8259592279571245, "eval_f1": 0.5984271943176053, "eval_loss": 0.24851758778095245, "eval_pr_auc": 0.6675246054619536, "eval_precision": 0.6804153446783963, "eval_recall": 0.5340729001584786, "eval_runtime": 24.8104, "eval_samples_per_second": 197.216, "eval_steps_per_second": 0.806, "step": 160 }, { "epoch": 0.21465968586387435, "grad_norm": 14.583905220031738, "learning_rate": 1.999171836956597e-05, "loss": 0.9587, "step": 164 }, { "epoch": 0.2198952879581152, "grad_norm": 9.168513298034668, "learning_rate": 1.9990280819447662e-05, "loss": 0.9663, "step": 168 }, { "epoch": 0.225130890052356, "grad_norm": 24.278688430786133, "learning_rate": 1.998872834272287e-05, "loss": 0.9679, "step": 172 }, { "epoch": 0.23036649214659685, "grad_norm": 23.693418502807617, "learning_rate": 1.9987060957251047e-05, "loss": 0.9541, "step": 176 }, { "epoch": 0.2356020942408377, "grad_norm": 34.47703170776367, "learning_rate": 1.9985278682213525e-05, "loss": 0.8988, "step": 180 }, { "epoch": 0.24083769633507854, "grad_norm": 17.93362045288086, "learning_rate": 1.9983381538113317e-05, "loss": 0.9296, "step": 184 }, { "epoch": 0.24607329842931938, "grad_norm": 23.294275283813477, "learning_rate": 1.998136954677487e-05, "loss": 0.9337, "step": 188 }, { "epoch": 0.2513089005235602, "grad_norm": 19.78593635559082, "learning_rate": 1.9979242731343803e-05, "loss": 0.8976, "step": 192 }, { "epoch": 0.25654450261780104, "grad_norm": 16.300464630126953, "learning_rate": 1.9977001116286675e-05, "loss": 0.8705, "step": 196 }, { "epoch": 0.2617801047120419, "grad_norm": 26.935935974121094, "learning_rate": 1.9974644727390665e-05, "loss": 0.8758, "step": 200 }, { "epoch": 0.2617801047120419, "eval_F1_err_corr": 0.8910747356279248, "eval_accuracy": 0.9052126836813612, "eval_correct_accuracy": 0.9761037985940583, "eval_error_accuracy": 0.819672508302841, "eval_f1": 0.558119411595039, "eval_loss": 0.24936090409755707, "eval_pr_auc": 0.6830633725429478, "eval_precision": 0.768772348033373, "eval_recall": 0.4380801448947249, "eval_runtime": 24.8593, "eval_samples_per_second": 196.827, "eval_steps_per_second": 0.805, "step": 200 }, { "epoch": 0.2670157068062827, "grad_norm": 26.804174423217773, "learning_rate": 1.9972173591763297e-05, "loss": 0.9957, "step": 204 }, { "epoch": 0.27225130890052357, "grad_norm": 12.255861282348633, "learning_rate": 1.996958773783213e-05, "loss": 0.8614, "step": 208 }, { "epoch": 0.2774869109947644, "grad_norm": 10.577012062072754, "learning_rate": 1.9966887195344403e-05, "loss": 0.8539, "step": 212 }, { "epoch": 0.28272251308900526, "grad_norm": 9.850268363952637, "learning_rate": 1.9964071995366744e-05, "loss": 0.8184, "step": 216 }, { "epoch": 0.2879581151832461, "grad_norm": 4.022161960601807, "learning_rate": 1.9961142170284762e-05, "loss": 0.783, "step": 220 }, { "epoch": 0.2931937172774869, "grad_norm": 4.174556732177734, "learning_rate": 1.9958097753802693e-05, "loss": 0.8355, "step": 224 }, { "epoch": 0.29842931937172773, "grad_norm": 8.559288024902344, "learning_rate": 1.9954938780943034e-05, "loss": 0.8081, "step": 228 }, { "epoch": 0.3036649214659686, "grad_norm": 11.881876945495605, "learning_rate": 1.9951665288046098e-05, "loss": 0.8846, "step": 232 }, { "epoch": 0.3089005235602094, "grad_norm": 9.480097770690918, "learning_rate": 1.994827731276963e-05, "loss": 0.869, "step": 236 }, { "epoch": 0.31413612565445026, "grad_norm": 18.96599006652832, "learning_rate": 1.9944774894088367e-05, "loss": 0.9044, "step": 240 }, { "epoch": 0.31413612565445026, "eval_F1_err_corr": 0.8903583524392616, "eval_accuracy": 0.8976334106728538, "eval_correct_accuracy": 0.9422891260099501, "eval_error_accuracy": 0.8438525462118894, "eval_f1": 0.6341625207296849, "eval_loss": 0.25486111640930176, "eval_pr_auc": 0.6936322312463549, "eval_precision": 0.6197061365600691, "eval_recall": 0.6493094860765225, "eval_runtime": 24.7931, "eval_samples_per_second": 197.353, "eval_steps_per_second": 0.807, "step": 240 }, { "epoch": 0.3193717277486911, "grad_norm": 7.49755859375, "learning_rate": 1.994115807229357e-05, "loss": 0.8702, "step": 244 }, { "epoch": 0.32460732984293195, "grad_norm": 19.93411636352539, "learning_rate": 1.993742688899259e-05, "loss": 0.8357, "step": 248 }, { "epoch": 0.3298429319371728, "grad_norm": 18.435436248779297, "learning_rate": 1.9933581387108358e-05, "loss": 0.8185, "step": 252 }, { "epoch": 0.33507853403141363, "grad_norm": 23.072092056274414, "learning_rate": 1.992962161087893e-05, "loss": 0.8371, "step": 256 }, { "epoch": 0.3403141361256545, "grad_norm": 11.625171661376953, "learning_rate": 1.9925547605856937e-05, "loss": 0.8276, "step": 260 }, { "epoch": 0.34554973821989526, "grad_norm": 18.671037673950195, "learning_rate": 1.992135941890909e-05, "loss": 0.8253, "step": 264 }, { "epoch": 0.3507853403141361, "grad_norm": 15.393129348754883, "learning_rate": 1.9917057098215624e-05, "loss": 0.8245, "step": 268 }, { "epoch": 0.35602094240837695, "grad_norm": 9.267082214355469, "learning_rate": 1.9912640693269754e-05, "loss": 0.8451, "step": 272 }, { "epoch": 0.3612565445026178, "grad_norm": 5.4926252365112305, "learning_rate": 1.9908110254877107e-05, "loss": 0.813, "step": 276 }, { "epoch": 0.36649214659685864, "grad_norm": 6.064371585845947, "learning_rate": 1.9903465835155124e-05, "loss": 0.7553, "step": 280 }, { "epoch": 0.36649214659685864, "eval_F1_err_corr": 0.898106732050316, "eval_accuracy": 0.9078112915699923, "eval_correct_accuracy": 0.9649030769491357, "eval_error_accuracy": 0.8399597119400094, "eval_f1": 0.624117053481332, "eval_loss": 0.23855358362197876, "eval_pr_auc": 0.697922245841014, "eval_precision": 0.704642551979493, "eval_recall": 0.5601086710436948, "eval_runtime": 24.8196, "eval_samples_per_second": 197.143, "eval_steps_per_second": 0.806, "step": 280 }, { "epoch": 0.3717277486910995, "grad_norm": 11.443989753723145, "learning_rate": 1.9898707487532475e-05, "loss": 0.7992, "step": 284 }, { "epoch": 0.3769633507853403, "grad_norm": 9.889354705810547, "learning_rate": 1.9893835266748437e-05, "loss": 0.8425, "step": 288 }, { "epoch": 0.38219895287958117, "grad_norm": 6.687994480133057, "learning_rate": 1.9888849228852262e-05, "loss": 0.8465, "step": 292 }, { "epoch": 0.387434554973822, "grad_norm": 3.455092430114746, "learning_rate": 1.988374943120254e-05, "loss": 0.8098, "step": 296 }, { "epoch": 0.39267015706806285, "grad_norm": 4.258669376373291, "learning_rate": 1.987853593246654e-05, "loss": 0.8263, "step": 300 }, { "epoch": 0.39790575916230364, "grad_norm": 5.940682888031006, "learning_rate": 1.9873208792619517e-05, "loss": 0.7651, "step": 304 }, { "epoch": 0.4031413612565445, "grad_norm": 5.644289493560791, "learning_rate": 1.9867768072944047e-05, "loss": 0.7919, "step": 308 }, { "epoch": 0.4083769633507853, "grad_norm": 6.426525115966797, "learning_rate": 1.9862213836029308e-05, "loss": 0.7661, "step": 312 }, { "epoch": 0.41361256544502617, "grad_norm": 7.790468215942383, "learning_rate": 1.985654614577036e-05, "loss": 0.7592, "step": 316 }, { "epoch": 0.418848167539267, "grad_norm": 8.240925788879395, "learning_rate": 1.985076506736741e-05, "loss": 0.7935, "step": 320 }, { "epoch": 0.418848167539267, "eval_F1_err_corr": 0.8892707173263128, "eval_accuracy": 0.900108275328693, "eval_correct_accuracy": 0.9416031342860438, "eval_error_accuracy": 0.8424490839609798, "eval_f1": 0.636169014084507, "eval_loss": 0.24991166591644287, "eval_pr_auc": 0.6999774937080984, "eval_precision": 0.6332436069986541, "eval_recall": 0.6391215757301336, "eval_runtime": 24.8123, "eval_samples_per_second": 197.2, "eval_steps_per_second": 0.806, "step": 320 }, { "epoch": 0.42408376963350786, "grad_norm": 6.823334217071533, "learning_rate": 1.9844870667325073e-05, "loss": 0.7347, "step": 324 }, { "epoch": 0.4293193717277487, "grad_norm": 4.039069175720215, "learning_rate": 1.9838863013451587e-05, "loss": 0.7886, "step": 328 }, { "epoch": 0.43455497382198954, "grad_norm": 7.6934380531311035, "learning_rate": 1.9832742174858052e-05, "loss": 0.7608, "step": 332 }, { "epoch": 0.4397905759162304, "grad_norm": 9.409914016723633, "learning_rate": 1.9826508221957624e-05, "loss": 0.7466, "step": 336 }, { "epoch": 0.44502617801047123, "grad_norm": 7.726130962371826, "learning_rate": 1.9820161226464708e-05, "loss": 0.7023, "step": 340 }, { "epoch": 0.450261780104712, "grad_norm": 3.726100206375122, "learning_rate": 1.9813701261394136e-05, "loss": 0.7078, "step": 344 }, { "epoch": 0.45549738219895286, "grad_norm": 12.017361640930176, "learning_rate": 1.980712840106032e-05, "loss": 0.7383, "step": 348 }, { "epoch": 0.4607329842931937, "grad_norm": 5.709269046783447, "learning_rate": 1.9800442721076406e-05, "loss": 0.7215, "step": 352 }, { "epoch": 0.46596858638743455, "grad_norm": 12.649430274963379, "learning_rate": 1.979364429835339e-05, "loss": 0.7111, "step": 356 }, { "epoch": 0.4712041884816754, "grad_norm": 16.15489959716797, "learning_rate": 1.9786733211099257e-05, "loss": 0.7764, "step": 360 }, { "epoch": 0.4712041884816754, "eval_F1_err_corr": 0.894511960241892, "eval_accuracy": 0.9100077339520495, "eval_correct_accuracy": 0.9712793351142024, "eval_error_accuracy": 0.8289907059644579, "eval_f1": 0.5971472095277662, "eval_loss": 0.2414369434118271, "eval_pr_auc": 0.7108638111158798, "eval_precision": 0.7689015691868759, "eval_recall": 0.48811410459587956, "eval_runtime": 25.0196, "eval_samples_per_second": 195.567, "eval_steps_per_second": 0.799, "step": 360 }, { "epoch": 0.47643979057591623, "grad_norm": 12.530599594116211, "learning_rate": 1.9779709538818052e-05, "loss": 0.7715, "step": 364 }, { "epoch": 0.4816753926701571, "grad_norm": 6.7939605712890625, "learning_rate": 1.9772573362308992e-05, "loss": 0.7522, "step": 368 }, { "epoch": 0.4869109947643979, "grad_norm": 3.4304537773132324, "learning_rate": 1.9765324763665516e-05, "loss": 0.7511, "step": 372 }, { "epoch": 0.49214659685863876, "grad_norm": 6.636844158172607, "learning_rate": 1.9757963826274357e-05, "loss": 0.7121, "step": 376 }, { "epoch": 0.4973821989528796, "grad_norm": 4.51839017868042, "learning_rate": 1.975049063481457e-05, "loss": 0.7231, "step": 380 }, { "epoch": 0.5026178010471204, "grad_norm": 9.865214347839355, "learning_rate": 1.974290527525657e-05, "loss": 0.762, "step": 384 }, { "epoch": 0.5078534031413613, "grad_norm": 3.440359592437744, "learning_rate": 1.9735207834861117e-05, "loss": 0.7169, "step": 388 }, { "epoch": 0.5130890052356021, "grad_norm": 3.5312769412994385, "learning_rate": 1.972739840217836e-05, "loss": 0.73, "step": 392 }, { "epoch": 0.518324607329843, "grad_norm": 4.723533630371094, "learning_rate": 1.9719477067046768e-05, "loss": 0.6783, "step": 396 }, { "epoch": 0.5235602094240838, "grad_norm": 3.5356740951538086, "learning_rate": 1.971144392059212e-05, "loss": 0.7155, "step": 400 }, { "epoch": 0.5235602094240838, "eval_F1_err_corr": 0.893120798984817, "eval_accuracy": 0.902954369682908, "eval_correct_accuracy": 0.9461320280124133, "eval_error_accuracy": 0.8457347701138861, "eval_f1": 0.639051892762628, "eval_loss": 0.24243153631687164, "eval_pr_auc": 0.7029855391245526, "eval_precision": 0.6497426298549368, "eval_recall": 0.6287072673760471, "eval_runtime": 24.8233, "eval_samples_per_second": 197.113, "eval_steps_per_second": 0.806, "step": 400 }, { "epoch": 0.5287958115183246, "grad_norm": 13.087606430053711, "learning_rate": 1.970329905522647e-05, "loss": 0.7007, "step": 404 }, { "epoch": 0.5340314136125655, "grad_norm": 14.260698318481445, "learning_rate": 1.9695042564647045e-05, "loss": 0.6817, "step": 408 }, { "epoch": 0.5392670157068062, "grad_norm": 9.661425590515137, "learning_rate": 1.9686674543835208e-05, "loss": 0.7358, "step": 412 }, { "epoch": 0.5445026178010471, "grad_norm": 5.698840618133545, "learning_rate": 1.9678195089055347e-05, "loss": 0.6646, "step": 416 }, { "epoch": 0.5497382198952879, "grad_norm": 5.9759907722473145, "learning_rate": 1.9669604297853766e-05, "loss": 0.73, "step": 420 }, { "epoch": 0.5549738219895288, "grad_norm": 4.276744842529297, "learning_rate": 1.9660902269057558e-05, "loss": 0.712, "step": 424 }, { "epoch": 0.5602094240837696, "grad_norm": 4.572305679321289, "learning_rate": 1.9652089102773487e-05, "loss": 0.7033, "step": 428 }, { "epoch": 0.5654450261780105, "grad_norm": 3.9941539764404297, "learning_rate": 1.9643164900386824e-05, "loss": 0.6695, "step": 432 }, { "epoch": 0.5706806282722513, "grad_norm": 4.321977138519287, "learning_rate": 1.963412976456017e-05, "loss": 0.709, "step": 436 }, { "epoch": 0.5759162303664922, "grad_norm": 4.374669551849365, "learning_rate": 1.96249837992323e-05, "loss": 0.6815, "step": 440 }, { "epoch": 0.5759162303664922, "eval_F1_err_corr": 0.8937597915811933, "eval_accuracy": 0.9036968290796598, "eval_correct_accuracy": 0.9500814005540427, "eval_error_accuracy": 0.8437420660571459, "eval_f1": 0.6368832380730199, "eval_loss": 0.24286404252052307, "eval_pr_auc": 0.7035206327309997, "eval_precision": 0.6568816169393648, "eval_recall": 0.618066561014263, "eval_runtime": 24.8231, "eval_samples_per_second": 197.115, "eval_steps_per_second": 0.806, "step": 440 }, { "epoch": 0.581151832460733, "grad_norm": 3.3900415897369385, "learning_rate": 1.961572710961695e-05, "loss": 0.6042, "step": 444 }, { "epoch": 0.5863874345549738, "grad_norm": 3.9020636081695557, "learning_rate": 1.9606359802201608e-05, "loss": 0.6541, "step": 448 }, { "epoch": 0.5916230366492147, "grad_norm": 3.2324304580688477, "learning_rate": 1.9596881984746288e-05, "loss": 0.664, "step": 452 }, { "epoch": 0.5968586387434555, "grad_norm": 3.6972060203552246, "learning_rate": 1.958729376628231e-05, "loss": 0.6325, "step": 456 }, { "epoch": 0.6020942408376964, "grad_norm": 4.679067134857178, "learning_rate": 1.957759525711101e-05, "loss": 0.6851, "step": 460 }, { "epoch": 0.6073298429319371, "grad_norm": 6.575286865234375, "learning_rate": 1.9567786568802503e-05, "loss": 0.6266, "step": 464 }, { "epoch": 0.612565445026178, "grad_norm": 6.148586273193359, "learning_rate": 1.9557867814194385e-05, "loss": 0.6887, "step": 468 }, { "epoch": 0.6178010471204188, "grad_norm": 3.9649710655212402, "learning_rate": 1.9547839107390435e-05, "loss": 0.6448, "step": 472 }, { "epoch": 0.6230366492146597, "grad_norm": 3.5095326900482178, "learning_rate": 1.9537700563759303e-05, "loss": 0.6793, "step": 476 }, { "epoch": 0.6282722513089005, "grad_norm": 5.709955215454102, "learning_rate": 1.9527452299933192e-05, "loss": 0.6321, "step": 480 }, { "epoch": 0.6282722513089005, "eval_F1_err_corr": 0.8922176723044, "eval_accuracy": 0.8975096674400619, "eval_correct_accuracy": 0.9449689114373253, "eval_error_accuracy": 0.8450445368681248, "eval_f1": 0.6403994355801584, "eval_loss": 0.25328728556632996, "eval_pr_auc": 0.6997538853349474, "eval_precision": 0.6150959132610508, "eval_recall": 0.6678741227077202, "eval_runtime": 24.8167, "eval_samples_per_second": 197.166, "eval_steps_per_second": 0.806, "step": 480 }, { "epoch": 0.6335078534031413, "grad_norm": 3.6896157264709473, "learning_rate": 1.95170944338065e-05, "loss": 0.6806, "step": 484 }, { "epoch": 0.6387434554973822, "grad_norm": 4.03073263168335, "learning_rate": 1.9506627084534486e-05, "loss": 0.6133, "step": 488 }, { "epoch": 0.643979057591623, "grad_norm": 6.4314751625061035, "learning_rate": 1.9496050372531864e-05, "loss": 0.6098, "step": 492 }, { "epoch": 0.6492146596858639, "grad_norm": 3.8455100059509277, "learning_rate": 1.9485364419471454e-05, "loss": 0.6306, "step": 496 }, { "epoch": 0.6544502617801047, "grad_norm": 3.8784000873565674, "learning_rate": 1.9474569348282774e-05, "loss": 0.6104, "step": 500 }, { "epoch": 0.6596858638743456, "grad_norm": 5.018595218658447, "learning_rate": 1.9463665283150604e-05, "loss": 0.6592, "step": 504 }, { "epoch": 0.6649214659685864, "grad_norm": 3.5282726287841797, "learning_rate": 1.9452652349513587e-05, "loss": 0.621, "step": 508 }, { "epoch": 0.6701570680628273, "grad_norm": 3.4036905765533447, "learning_rate": 1.9441530674062754e-05, "loss": 0.6744, "step": 512 }, { "epoch": 0.675392670157068, "grad_norm": 4.95082950592041, "learning_rate": 1.9430300384740108e-05, "loss": 0.5925, "step": 516 }, { "epoch": 0.680628272251309, "grad_norm": 5.078342437744141, "learning_rate": 1.941896161073711e-05, "loss": 0.5913, "step": 520 }, { "epoch": 0.680628272251309, "eval_F1_err_corr": 0.885156181305656, "eval_accuracy": 0.8942304717710751, "eval_correct_accuracy": 0.9306883336673133, "eval_error_accuracy": 0.8438713827505521, "eval_f1": 0.6393079438759363, "eval_loss": 0.27150195837020874, "eval_pr_auc": 0.6992222071782436, "eval_precision": 0.5985776372975109, "eval_recall": 0.6859859633235228, "eval_runtime": 24.819, "eval_samples_per_second": 197.147, "eval_steps_per_second": 0.806, "step": 520 }, { "epoch": 0.6858638743455497, "grad_norm": 5.81033182144165, "learning_rate": 1.9407514482493214e-05, "loss": 0.6133, "step": 524 }, { "epoch": 0.6910994764397905, "grad_norm": 4.901327133178711, "learning_rate": 1.939595913169438e-05, "loss": 0.6121, "step": 528 }, { "epoch": 0.6963350785340314, "grad_norm": 3.7869937419891357, "learning_rate": 1.9384295691271523e-05, "loss": 0.5822, "step": 532 }, { "epoch": 0.7015706806282722, "grad_norm": 3.8648629188537598, "learning_rate": 1.9372524295399014e-05, "loss": 0.6032, "step": 536 }, { "epoch": 0.7068062827225131, "grad_norm": 3.9610342979431152, "learning_rate": 1.9360645079493126e-05, "loss": 0.59, "step": 540 }, { "epoch": 0.7120418848167539, "grad_norm": 5.623746395111084, "learning_rate": 1.9348658180210473e-05, "loss": 0.5835, "step": 544 }, { "epoch": 0.7172774869109948, "grad_norm": 6.02370548248291, "learning_rate": 1.933656373544645e-05, "loss": 0.6003, "step": 548 }, { "epoch": 0.7225130890052356, "grad_norm": 5.652750492095947, "learning_rate": 1.932436188433362e-05, "loss": 0.5958, "step": 552 }, { "epoch": 0.7277486910994765, "grad_norm": 7.355208396911621, "learning_rate": 1.9312052767240153e-05, "loss": 0.5677, "step": 556 }, { "epoch": 0.7329842931937173, "grad_norm": 4.652146339416504, "learning_rate": 1.9299636525768176e-05, "loss": 0.5649, "step": 560 }, { "epoch": 0.7329842931937173, "eval_F1_err_corr": 0.8974946334360716, "eval_accuracy": 0.9049033255993812, "eval_correct_accuracy": 0.9592731998252757, "eval_error_accuracy": 0.843191870706177, "eval_f1": 0.6410555815039701, "eval_loss": 0.24959486722946167, "eval_pr_auc": 0.6979561382710899, "eval_precision": 0.6619242826139378, "eval_recall": 0.621462531129726, "eval_runtime": 24.817, "eval_samples_per_second": 197.163, "eval_steps_per_second": 0.806, "step": 560 }, { "epoch": 0.7382198952879581, "grad_norm": 5.073575019836426, "learning_rate": 1.9287113302752167e-05, "loss": 0.5491, "step": 564 }, { "epoch": 0.743455497382199, "grad_norm": 4.796985149383545, "learning_rate": 1.927448324225729e-05, "loss": 0.5849, "step": 568 }, { "epoch": 0.7486910994764397, "grad_norm": 6.055835247039795, "learning_rate": 1.9261746489577767e-05, "loss": 0.5721, "step": 572 }, { "epoch": 0.7539267015706806, "grad_norm": 7.7210893630981445, "learning_rate": 1.9248903191235177e-05, "loss": 0.5749, "step": 576 }, { "epoch": 0.7591623036649214, "grad_norm": 3.5172553062438965, "learning_rate": 1.9235953494976786e-05, "loss": 0.6009, "step": 580 }, { "epoch": 0.7643979057591623, "grad_norm": 5.326947212219238, "learning_rate": 1.922289754977385e-05, "loss": 0.5896, "step": 584 }, { "epoch": 0.7696335078534031, "grad_norm": 3.990248203277588, "learning_rate": 1.920973550581989e-05, "loss": 0.578, "step": 588 }, { "epoch": 0.774869109947644, "grad_norm": 3.6598334312438965, "learning_rate": 1.9196467514528973e-05, "loss": 0.567, "step": 592 }, { "epoch": 0.7801047120418848, "grad_norm": 5.096114635467529, "learning_rate": 1.9183093728533966e-05, "loss": 0.5847, "step": 596 }, { "epoch": 0.7853403141361257, "grad_norm": 5.4809889793396, "learning_rate": 1.9169614301684786e-05, "loss": 0.5934, "step": 600 }, { "epoch": 0.7853403141361257, "eval_F1_err_corr": 0.8959803504098618, "eval_accuracy": 0.9018097447795823, "eval_correct_accuracy": 0.9504131731842577, "eval_error_accuracy": 0.8474448138009186, "eval_f1": 0.6463115667483842, "eval_loss": 0.2541360855102539, "eval_pr_auc": 0.7031337927296945, "eval_precision": 0.6363835856923414, "eval_recall": 0.6565542223228436, "eval_runtime": 24.8027, "eval_samples_per_second": 197.277, "eval_steps_per_second": 0.806, "step": 600 }, { "epoch": 0.7905759162303665, "grad_norm": 3.492452621459961, "learning_rate": 1.915602938904662e-05, "loss": 0.5974, "step": 604 }, { "epoch": 0.7958115183246073, "grad_norm": 4.485317707061768, "learning_rate": 1.914233914689815e-05, "loss": 0.5269, "step": 608 }, { "epoch": 0.8010471204188482, "grad_norm": 4.36208438873291, "learning_rate": 1.912854373272975e-05, "loss": 0.5794, "step": 612 }, { "epoch": 0.806282722513089, "grad_norm": 4.126212120056152, "learning_rate": 1.9114643305241678e-05, "loss": 0.5454, "step": 616 }, { "epoch": 0.8115183246073299, "grad_norm": 3.9140942096710205, "learning_rate": 1.9100638024342245e-05, "loss": 0.5615, "step": 620 }, { "epoch": 0.8167539267015707, "grad_norm": 9.218249320983887, "learning_rate": 1.908652805114598e-05, "loss": 0.564, "step": 624 }, { "epoch": 0.8219895287958116, "grad_norm": 4.118100166320801, "learning_rate": 1.907231354797179e-05, "loss": 0.5406, "step": 628 }, { "epoch": 0.8272251308900523, "grad_norm": 3.917045831680298, "learning_rate": 1.9057994678341053e-05, "loss": 0.5581, "step": 632 }, { "epoch": 0.8324607329842932, "grad_norm": 4.272670745849609, "learning_rate": 1.9043571606975776e-05, "loss": 0.5761, "step": 636 }, { "epoch": 0.837696335078534, "grad_norm": 4.809320449829102, "learning_rate": 1.902904449979669e-05, "loss": 0.5422, "step": 640 }, { "epoch": 0.837696335078534, "eval_F1_err_corr": 0.899383774542208, "eval_accuracy": 0.905769528228925, "eval_correct_accuracy": 0.9610494803595725, "eval_error_accuracy": 0.8451544680769811, "eval_f1": 0.6363419293218721, "eval_loss": 0.2484092116355896, "eval_pr_auc": 0.6976824941932482, "eval_precision": 0.673149785299318, "eval_recall": 0.6033506905139234, "eval_runtime": 24.8065, "eval_samples_per_second": 197.247, "eval_steps_per_second": 0.806, "step": 640 }, { "epoch": 0.8429319371727748, "grad_norm": 5.909646511077881, "learning_rate": 1.901441352392133e-05, "loss": 0.5825, "step": 644 }, { "epoch": 0.8481675392670157, "grad_norm": 4.255792140960693, "learning_rate": 1.8999678847662124e-05, "loss": 0.5576, "step": 648 }, { "epoch": 0.8534031413612565, "grad_norm": 6.5200114250183105, "learning_rate": 1.8984840640524445e-05, "loss": 0.5296, "step": 652 }, { "epoch": 0.8586387434554974, "grad_norm": 8.32865047454834, "learning_rate": 1.8969899073204687e-05, "loss": 0.5655, "step": 656 }, { "epoch": 0.8638743455497382, "grad_norm": 9.28367805480957, "learning_rate": 1.8954854317588262e-05, "loss": 0.5791, "step": 660 }, { "epoch": 0.8691099476439791, "grad_norm": 4.166441917419434, "learning_rate": 1.8939706546747656e-05, "loss": 0.5214, "step": 664 }, { "epoch": 0.8743455497382199, "grad_norm": 3.7278671264648438, "learning_rate": 1.8924455934940424e-05, "loss": 0.5087, "step": 668 }, { "epoch": 0.8795811518324608, "grad_norm": 6.253541469573975, "learning_rate": 1.8909102657607182e-05, "loss": 0.5476, "step": 672 }, { "epoch": 0.8848167539267016, "grad_norm": 9.273209571838379, "learning_rate": 1.88936468913696e-05, "loss": 0.4928, "step": 676 }, { "epoch": 0.8900523560209425, "grad_norm": 5.4465532302856445, "learning_rate": 1.8878088814028365e-05, "loss": 0.4909, "step": 680 }, { "epoch": 0.8900523560209425, "eval_F1_err_corr": 0.8973571707111299, "eval_accuracy": 0.9004485692188708, "eval_correct_accuracy": 0.9515640305646176, "eval_error_accuracy": 0.8489933585798806, "eval_f1": 0.6449691085613416, "eval_loss": 0.25420647859573364, "eval_pr_auc": 0.7006737583541583, "eval_precision": 0.6290079621261029, "eval_recall": 0.6617613764998868, "eval_runtime": 24.8354, "eval_samples_per_second": 197.017, "eval_steps_per_second": 0.805, "step": 680 }, { "epoch": 0.8952879581151832, "grad_norm": 3.929280996322632, "learning_rate": 1.886242860456113e-05, "loss": 0.518, "step": 684 }, { "epoch": 0.900523560209424, "grad_norm": 3.3221724033355713, "learning_rate": 1.884666644312046e-05, "loss": 0.474, "step": 688 }, { "epoch": 0.9057591623036649, "grad_norm": 4.1775126457214355, "learning_rate": 1.8830802511031763e-05, "loss": 0.513, "step": 692 }, { "epoch": 0.9109947643979057, "grad_norm": 4.372125148773193, "learning_rate": 1.88148369907912e-05, "loss": 0.4958, "step": 696 }, { "epoch": 0.9162303664921466, "grad_norm": 4.19729471206665, "learning_rate": 1.8798770066063577e-05, "loss": 0.5178, "step": 700 }, { "epoch": 0.9214659685863874, "grad_norm": 4.332755088806152, "learning_rate": 1.8782601921680258e-05, "loss": 0.525, "step": 704 }, { "epoch": 0.9267015706806283, "grad_norm": 4.065849304199219, "learning_rate": 1.8766332743637002e-05, "loss": 0.4692, "step": 708 }, { "epoch": 0.9319371727748691, "grad_norm": 4.974046230316162, "learning_rate": 1.8749962719091864e-05, "loss": 0.4973, "step": 712 }, { "epoch": 0.93717277486911, "grad_norm": 4.961699962615967, "learning_rate": 1.8733492036363007e-05, "loss": 0.5204, "step": 716 }, { "epoch": 0.9424083769633508, "grad_norm": 4.140364646911621, "learning_rate": 1.871692088492655e-05, "loss": 0.4905, "step": 720 }, { "epoch": 0.9424083769633508, "eval_F1_err_corr": 0.8932916712717729, "eval_accuracy": 0.8947254447022428, "eval_correct_accuracy": 0.9452793616476387, "eval_error_accuracy": 0.8467242340670772, "eval_f1": 0.6396272371068517, "eval_loss": 0.2594238817691803, "eval_pr_auc": 0.7027911559368634, "eval_precision": 0.6008754476721051, "eval_recall": 0.6837219832465474, "eval_runtime": 24.8417, "eval_samples_per_second": 196.967, "eval_steps_per_second": 0.805, "step": 720 }, { "epoch": 0.9476439790575916, "grad_norm": 8.625274658203125, "learning_rate": 1.8700249455414394e-05, "loss": 0.4686, "step": 724 }, { "epoch": 0.9528795811518325, "grad_norm": 6.383296966552734, "learning_rate": 1.8683477939612024e-05, "loss": 0.4764, "step": 728 }, { "epoch": 0.9581151832460733, "grad_norm": 7.345070838928223, "learning_rate": 1.866660653045629e-05, "loss": 0.4823, "step": 732 }, { "epoch": 0.9633507853403142, "grad_norm": 4.40362548828125, "learning_rate": 1.8649635422033218e-05, "loss": 0.49, "step": 736 }, { "epoch": 0.9685863874345549, "grad_norm": 3.8177592754364014, "learning_rate": 1.863256480957574e-05, "loss": 0.5004, "step": 740 }, { "epoch": 0.9738219895287958, "grad_norm": 3.5552761554718018, "learning_rate": 1.861539488946148e-05, "loss": 0.4967, "step": 744 }, { "epoch": 0.9790575916230366, "grad_norm": 3.948543071746826, "learning_rate": 1.8598125859210475e-05, "loss": 0.5106, "step": 748 }, { "epoch": 0.9842931937172775, "grad_norm": 4.415132999420166, "learning_rate": 1.858075791748291e-05, "loss": 0.4919, "step": 752 }, { "epoch": 0.9895287958115183, "grad_norm": 4.514105319976807, "learning_rate": 1.8563291264076834e-05, "loss": 0.4947, "step": 756 }, { "epoch": 0.9947643979057592, "grad_norm": 6.685056209564209, "learning_rate": 1.854572609992586e-05, "loss": 0.4892, "step": 760 }, { "epoch": 0.9947643979057592, "eval_F1_err_corr": 0.9005018183708923, "eval_accuracy": 0.9076256767208043, "eval_correct_accuracy": 0.9694615035570632, "eval_error_accuracy": 0.8407011107412775, "eval_f1": 0.6246857717445953, "eval_loss": 0.24942660331726074, "eval_pr_auc": 0.6972885689682531, "eval_precision": 0.7021757558632382, "eval_recall": 0.5625990491283677, "eval_runtime": 24.7945, "eval_samples_per_second": 197.342, "eval_steps_per_second": 0.807, "step": 760 } ], "logging_steps": 4, "max_steps": 3820, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 16, "stateful_callbacks": { "MinEpochEarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.37033143972266e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }