| { |
| "best_global_step": null, |
| "best_metric": 0.9005018183708923, |
| "best_model_checkpoint": null, |
| "epoch": 0.9947643979057592, |
| "eval_steps": 16, |
| "global_step": 760, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005235602094240838, |
| "grad_norm": 433.5572204589844, |
| "learning_rate": 5.217391304347826e-07, |
| "loss": 5.5489, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005235602094240838, |
| "eval_F1_err_corr": 0.2899344909681993, |
| "eval_accuracy": 0.33964423820572315, |
| "eval_correct_accuracy": 0.21863927522501062, |
| "eval_error_accuracy": 0.4302247894550517, |
| "eval_f1": 0.2573237770510055, |
| "eval_loss": 1.461071491241455, |
| "eval_pr_auc": 0.16429768646454848, |
| "eval_precision": 0.15202466598150052, |
| "eval_recall": 0.8372198324654743, |
| "eval_runtime": 24.9385, |
| "eval_samples_per_second": 196.202, |
| "eval_steps_per_second": 0.802, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010471204188481676, |
| "grad_norm": 424.1092834472656, |
| "learning_rate": 1.2173913043478262e-06, |
| "loss": 5.392, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010471204188481676, |
| "eval_F1_err_corr": 0.321098900363111, |
| "eval_accuracy": 0.3666511987625677, |
| "eval_correct_accuracy": 0.2498037913412515, |
| "eval_error_accuracy": 0.4493435370426137, |
| "eval_f1": 0.25948565848012445, |
| "eval_loss": 1.3678739070892334, |
| "eval_pr_auc": 0.16391947366452397, |
| "eval_precision": 0.15441239776151527, |
| "eval_recall": 0.8120896536110482, |
| "eval_runtime": 24.8266, |
| "eval_samples_per_second": 197.087, |
| "eval_steps_per_second": 0.806, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.015706806282722512, |
| "grad_norm": 328.165771484375, |
| "learning_rate": 1.9130434782608697e-06, |
| "loss": 4.6674, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.015706806282722512, |
| "eval_F1_err_corr": 0.42193034247158306, |
| "eval_accuracy": 0.4537664346481052, |
| "eval_correct_accuracy": 0.36420961710522887, |
| "eval_error_accuracy": 0.5013920328557274, |
| "eval_f1": 0.25876327610091937, |
| "eval_loss": 1.08595609664917, |
| "eval_pr_auc": 0.16343108665970787, |
| "eval_precision": 0.15883323026180168, |
| "eval_recall": 0.6977586597237945, |
| "eval_runtime": 24.7964, |
| "eval_samples_per_second": 197.327, |
| "eval_steps_per_second": 0.807, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.020942408376963352, |
| "grad_norm": 121.70630645751953, |
| "learning_rate": 2.6086956521739132e-06, |
| "loss": 3.0944, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.020942408376963352, |
| "eval_F1_err_corr": 0.721921189250657, |
| "eval_accuracy": 0.7358391337973704, |
| "eval_correct_accuracy": 0.7870115524045516, |
| "eval_error_accuracy": 0.6667750717278245, |
| "eval_f1": 0.1906928253246138, |
| "eval_loss": 0.5721015334129333, |
| "eval_pr_auc": 0.16146374192556026, |
| "eval_precision": 0.16400391261819366, |
| "eval_recall": 0.22775639574371745, |
| "eval_runtime": 24.8528, |
| "eval_samples_per_second": 196.879, |
| "eval_steps_per_second": 0.805, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02617801047120419, |
| "grad_norm": 18.458759307861328, |
| "learning_rate": 3.3043478260869567e-06, |
| "loss": 2.002, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02617801047120419, |
| "eval_F1_err_corr": 0.8402340555989792, |
| "eval_accuracy": 0.8622119102861562, |
| "eval_correct_accuracy": 0.9922276019965096, |
| "eval_error_accuracy": 0.728620881787505, |
| "eval_f1": 0.0035794183445190158, |
| "eval_loss": 0.7684443593025208, |
| "eval_pr_auc": 0.17251592682571912, |
| "eval_precision": 0.1509433962264151, |
| "eval_recall": 0.001811184061580258, |
| "eval_runtime": 24.8601, |
| "eval_samples_per_second": 196.822, |
| "eval_steps_per_second": 0.805, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.031413612565445025, |
| "grad_norm": 115.0860595703125, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 3.383, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.031413612565445025, |
| "eval_F1_err_corr": 0.8378682847250856, |
| "eval_accuracy": 0.8620881670533642, |
| "eval_correct_accuracy": 0.9842169035446346, |
| "eval_error_accuracy": 0.7294085238385144, |
| "eval_f1": 0.008451957295373666, |
| "eval_loss": 0.8194268941879272, |
| "eval_pr_auc": 0.18009572056722223, |
| "eval_precision": 0.24050632911392406, |
| "eval_recall": 0.004301562146253113, |
| "eval_runtime": 24.8294, |
| "eval_samples_per_second": 197.065, |
| "eval_steps_per_second": 0.805, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03664921465968586, |
| "grad_norm": 99.96025848388672, |
| "learning_rate": 4.695652173913044e-06, |
| "loss": 3.1857, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03664921465968586, |
| "eval_F1_err_corr": 0.8207683484774771, |
| "eval_accuracy": 0.8591492652745553, |
| "eval_correct_accuracy": 0.9328157189465495, |
| "eval_error_accuracy": 0.7327520943454843, |
| "eval_f1": 0.060268317853457175, |
| "eval_loss": 0.6408756971359253, |
| "eval_pr_auc": 0.19809419361676622, |
| "eval_precision": 0.3411214953271028, |
| "eval_recall": 0.03305410912383971, |
| "eval_runtime": 24.8048, |
| "eval_samples_per_second": 197.261, |
| "eval_steps_per_second": 0.806, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.041884816753926704, |
| "grad_norm": 48.16204071044922, |
| "learning_rate": 5.391304347826088e-06, |
| "loss": 2.1498, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.041884816753926704, |
| "eval_F1_err_corr": 0.7694538304703895, |
| "eval_accuracy": 0.837370456303171, |
| "eval_correct_accuracy": 0.8088768689020601, |
| "eval_error_accuracy": 0.7336950054749075, |
| "eval_f1": 0.21148942552872357, |
| "eval_loss": 0.44809016585350037, |
| "eval_pr_auc": 0.2318064013915099, |
| "eval_precision": 0.31333333333333335, |
| "eval_recall": 0.15961059542676023, |
| "eval_runtime": 24.8313, |
| "eval_samples_per_second": 197.05, |
| "eval_steps_per_second": 0.805, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04712041884816754, |
| "grad_norm": 169.05667114257812, |
| "learning_rate": 6.086956521739132e-06, |
| "loss": 1.9176, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04712041884816754, |
| "eval_F1_err_corr": 0.6219027098098009, |
| "eval_accuracy": 0.7121732405259087, |
| "eval_correct_accuracy": 0.5674362971053744, |
| "eval_error_accuracy": 0.687935454975843, |
| "eval_f1": 0.33722752528850264, |
| "eval_loss": 0.5731640458106995, |
| "eval_pr_auc": 0.2788670530837179, |
| "eval_precision": 0.2460243217960711, |
| "eval_recall": 0.5358840842200588, |
| "eval_runtime": 24.8463, |
| "eval_samples_per_second": 196.931, |
| "eval_steps_per_second": 0.805, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05235602094240838, |
| "grad_norm": 114.60936737060547, |
| "learning_rate": 6.782608695652174e-06, |
| "loss": 2.0171, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05235602094240838, |
| "eval_F1_err_corr": 0.7430224807525013, |
| "eval_accuracy": 0.7990719257540603, |
| "eval_correct_accuracy": 0.7547544657206874, |
| "eval_error_accuracy": 0.7316496396965659, |
| "eval_f1": 0.35211970074812965, |
| "eval_loss": 0.4375106692314148, |
| "eval_pr_auc": 0.3012571706540082, |
| "eval_precision": 0.3147289586305278, |
| "eval_recall": 0.39959248358614446, |
| "eval_runtime": 24.8039, |
| "eval_samples_per_second": 197.268, |
| "eval_steps_per_second": 0.806, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05759162303664921, |
| "grad_norm": 32.457340240478516, |
| "learning_rate": 7.478260869565218e-06, |
| "loss": 1.611, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.05759162303664921, |
| "eval_F1_err_corr": 0.8419241583637731, |
| "eval_accuracy": 0.862830626450116, |
| "eval_correct_accuracy": 0.9871360561186291, |
| "eval_error_accuracy": 0.7339561045659535, |
| "eval_f1": 0.09914668833807395, |
| "eval_loss": 0.38381654024124146, |
| "eval_pr_auc": 0.3152015073109494, |
| "eval_precision": 0.48316831683168315, |
| "eval_recall": 0.05524111387819787, |
| "eval_runtime": 24.832, |
| "eval_samples_per_second": 197.044, |
| "eval_steps_per_second": 0.805, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06282722513089005, |
| "grad_norm": 22.770957946777344, |
| "learning_rate": 8.173913043478263e-06, |
| "loss": 1.5516, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06282722513089005, |
| "eval_F1_err_corr": 0.8403483674599067, |
| "eval_accuracy": 0.859891724671307, |
| "eval_correct_accuracy": 0.9726090387657613, |
| "eval_error_accuracy": 0.7397527603961362, |
| "eval_f1": 0.19484444444444443, |
| "eval_loss": 0.355484277009964, |
| "eval_pr_auc": 0.3399032342610957, |
| "eval_precision": 0.45364238410596025, |
| "eval_recall": 0.12406610821824768, |
| "eval_runtime": 24.7998, |
| "eval_samples_per_second": 197.3, |
| "eval_steps_per_second": 0.806, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06806282722513089, |
| "grad_norm": 69.65153503417969, |
| "learning_rate": 8.869565217391306e-06, |
| "loss": 1.4648, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06806282722513089, |
| "eval_F1_err_corr": 0.8036896045473728, |
| "eval_accuracy": 0.8478267594740913, |
| "eval_correct_accuracy": 0.8494371774369468, |
| "eval_error_accuracy": 0.762617804842102, |
| "eval_f1": 0.38658186806334954, |
| "eval_loss": 0.36472800374031067, |
| "eval_pr_auc": 0.38944473786040484, |
| "eval_precision": 0.4303164908384231, |
| "eval_recall": 0.350916911931175, |
| "eval_runtime": 24.8345, |
| "eval_samples_per_second": 197.025, |
| "eval_steps_per_second": 0.805, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07329842931937172, |
| "grad_norm": 9.83507251739502, |
| "learning_rate": 9.565217391304349e-06, |
| "loss": 1.3967, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07329842931937172, |
| "eval_F1_err_corr": 0.8296970467203022, |
| "eval_accuracy": 0.8688321732405259, |
| "eval_correct_accuracy": 0.9181484628791651, |
| "eval_error_accuracy": 0.7567903433781845, |
| "eval_f1": 0.29262595929262597, |
| "eval_loss": 0.33204466104507446, |
| "eval_pr_auc": 0.41163972835541246, |
| "eval_precision": 0.5561192136968929, |
| "eval_recall": 0.19855105275073578, |
| "eval_runtime": 24.8293, |
| "eval_samples_per_second": 197.066, |
| "eval_steps_per_second": 0.805, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07853403141361257, |
| "grad_norm": 16.36752700805664, |
| "learning_rate": 1.0260869565217393e-05, |
| "loss": 1.2944, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07853403141361257, |
| "eval_F1_err_corr": 0.8421678884358804, |
| "eval_accuracy": 0.8736581593194123, |
| "eval_correct_accuracy": 0.9276144097142335, |
| "eval_error_accuracy": 0.7711353234659966, |
| "eval_f1": 0.38438347904733194, |
| "eval_loss": 0.31788310408592224, |
| "eval_pr_auc": 0.4608372769142751, |
| "eval_precision": 0.5751014884979703, |
| "eval_recall": 0.28865745981435365, |
| "eval_runtime": 24.8595, |
| "eval_samples_per_second": 196.826, |
| "eval_steps_per_second": 0.805, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08376963350785341, |
| "grad_norm": 17.503982543945312, |
| "learning_rate": 1.0956521739130435e-05, |
| "loss": 1.284, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.08376963350785341, |
| "eval_F1_err_corr": 0.8579401681935629, |
| "eval_accuracy": 0.8769682907965971, |
| "eval_correct_accuracy": 0.9478163203454459, |
| "eval_error_accuracy": 0.7836326415058088, |
| "eval_f1": 0.4385147536354652, |
| "eval_loss": 0.30793023109436035, |
| "eval_pr_auc": 0.49259983395831536, |
| "eval_precision": 0.5825206301575394, |
| "eval_recall": 0.3515961059542676, |
| "eval_runtime": 24.8423, |
| "eval_samples_per_second": 196.962, |
| "eval_steps_per_second": 0.805, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.08900523560209424, |
| "grad_norm": 18.571523666381836, |
| "learning_rate": 1.1652173913043478e-05, |
| "loss": 1.191, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08900523560209424, |
| "eval_F1_err_corr": 0.8618882109239977, |
| "eval_accuracy": 0.8807424593967518, |
| "eval_correct_accuracy": 0.9652649658606812, |
| "eval_error_accuracy": 0.7785120852969489, |
| "eval_f1": 0.38741458763705705, |
| "eval_loss": 0.30222997069358826, |
| "eval_pr_auc": 0.5105597340335465, |
| "eval_precision": 0.6497867803837953, |
| "eval_recall": 0.2759791713832918, |
| "eval_runtime": 24.8386, |
| "eval_samples_per_second": 196.992, |
| "eval_steps_per_second": 0.805, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.09424083769633508, |
| "grad_norm": 19.51681900024414, |
| "learning_rate": 1.2347826086956523e-05, |
| "loss": 1.1905, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.09424083769633508, |
| "eval_F1_err_corr": 0.8556927040319808, |
| "eval_accuracy": 0.8829079659706109, |
| "eval_correct_accuracy": 0.9252440207528985, |
| "eval_error_accuracy": 0.7958667759923117, |
| "eval_f1": 0.46682631356529086, |
| "eval_loss": 0.2958272099494934, |
| "eval_pr_auc": 0.5272941790977752, |
| "eval_precision": 0.6178225205070843, |
| "eval_recall": 0.37514149875481095, |
| "eval_runtime": 24.8293, |
| "eval_samples_per_second": 197.065, |
| "eval_steps_per_second": 0.805, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.09947643979057591, |
| "grad_norm": 21.868053436279297, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 1.1759, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09947643979057591, |
| "eval_F1_err_corr": 0.8671239387996902, |
| "eval_accuracy": 0.8833720030935808, |
| "eval_correct_accuracy": 0.965744942725071, |
| "eval_error_accuracy": 0.7867787965661607, |
| "eval_f1": 0.43932183224271265, |
| "eval_loss": 0.2900922894477844, |
| "eval_pr_auc": 0.5398410773230814, |
| "eval_precision": 0.6402254009536195, |
| "eval_recall": 0.33438985736925514, |
| "eval_runtime": 24.826, |
| "eval_samples_per_second": 197.092, |
| "eval_steps_per_second": 0.806, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.10471204188481675, |
| "grad_norm": 11.858696937561035, |
| "learning_rate": 1.373913043478261e-05, |
| "loss": 1.1804, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10471204188481675, |
| "eval_F1_err_corr": 0.8643274810534162, |
| "eval_accuracy": 0.8850425367362722, |
| "eval_correct_accuracy": 0.951882356363745, |
| "eval_error_accuracy": 0.7915226184557567, |
| "eval_f1": 0.4779432424838438, |
| "eval_loss": 0.2871633768081665, |
| "eval_pr_auc": 0.5513156434574297, |
| "eval_precision": 0.6297667530544243, |
| "eval_recall": 0.38510301109350237, |
| "eval_runtime": 24.8425, |
| "eval_samples_per_second": 196.96, |
| "eval_steps_per_second": 0.805, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1099476439790576, |
| "grad_norm": 8.166620254516602, |
| "learning_rate": 1.4434782608695654e-05, |
| "loss": 1.1212, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.11518324607329843, |
| "grad_norm": 4.340336799621582, |
| "learning_rate": 1.5130434782608697e-05, |
| "loss": 1.1325, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.12041884816753927, |
| "grad_norm": 27.051570892333984, |
| "learning_rate": 1.582608695652174e-05, |
| "loss": 1.1218, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1256544502617801, |
| "grad_norm": 22.343820571899414, |
| "learning_rate": 1.6521739130434785e-05, |
| "loss": 1.1068, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.13089005235602094, |
| "grad_norm": 47.00363540649414, |
| "learning_rate": 1.721739130434783e-05, |
| "loss": 1.1034, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13612565445026178, |
| "grad_norm": 40.41328048706055, |
| "learning_rate": 1.791304347826087e-05, |
| "loss": 1.1235, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.14136125654450263, |
| "grad_norm": 31.55730628967285, |
| "learning_rate": 1.8608695652173912e-05, |
| "loss": 1.0747, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.14659685863874344, |
| "grad_norm": 2.652536392211914, |
| "learning_rate": 1.9304347826086957e-05, |
| "loss": 0.9891, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1518324607329843, |
| "grad_norm": 3.2267162799835205, |
| "learning_rate": 2e-05, |
| "loss": 0.9607, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.15706806282722513, |
| "grad_norm": 21.89421272277832, |
| "learning_rate": 1.9999942480792804e-05, |
| "loss": 1.0643, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.15706806282722513, |
| "eval_F1_err_corr": 0.8734264964691199, |
| "eval_accuracy": 0.8929311678267595, |
| "eval_correct_accuracy": 0.9278371473433551, |
| "eval_error_accuracy": 0.8250438945941904, |
| "eval_f1": 0.5815499939547818, |
| "eval_loss": 0.2678382694721222, |
| "eval_pr_auc": 0.6347920534332054, |
| "eval_precision": 0.6240269849507005, |
| "eval_recall": 0.544487208512565, |
| "eval_runtime": 24.8153, |
| "eval_samples_per_second": 197.177, |
| "eval_steps_per_second": 0.806, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16230366492146597, |
| "grad_norm": 8.081486701965332, |
| "learning_rate": 1.999976992383291e-05, |
| "loss": 1.0189, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.16753926701570682, |
| "grad_norm": 17.748775482177734, |
| "learning_rate": 1.9999482331105377e-05, |
| "loss": 0.9898, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.17277486910994763, |
| "grad_norm": 41.294334411621094, |
| "learning_rate": 1.9999079705918636e-05, |
| "loss": 1.0795, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.17801047120418848, |
| "grad_norm": 4.425788879394531, |
| "learning_rate": 1.999856205290442e-05, |
| "loss": 1.0274, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.18324607329842932, |
| "grad_norm": 26.085590362548828, |
| "learning_rate": 1.9997929378017723e-05, |
| "loss": 0.9516, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.18848167539267016, |
| "grad_norm": 18.811126708984375, |
| "learning_rate": 1.9997181688536746e-05, |
| "loss": 0.966, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.193717277486911, |
| "grad_norm": 22.464527130126953, |
| "learning_rate": 1.999631899306278e-05, |
| "loss": 0.8932, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.19895287958115182, |
| "grad_norm": 8.309951782226562, |
| "learning_rate": 1.999534130152014e-05, |
| "loss": 0.9756, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.20418848167539266, |
| "grad_norm": 4.516532897949219, |
| "learning_rate": 1.999424862515604e-05, |
| "loss": 0.998, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2094240837696335, |
| "grad_norm": 10.015279769897461, |
| "learning_rate": 1.999304097654045e-05, |
| "loss": 0.9015, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2094240837696335, |
| "eval_F1_err_corr": 0.885087159946509, |
| "eval_accuracy": 0.9020572312451662, |
| "eval_correct_accuracy": 0.95333342698488, |
| "eval_error_accuracy": 0.8259592279571245, |
| "eval_f1": 0.5984271943176053, |
| "eval_loss": 0.24851758778095245, |
| "eval_pr_auc": 0.6675246054619536, |
| "eval_precision": 0.6804153446783963, |
| "eval_recall": 0.5340729001584786, |
| "eval_runtime": 24.8104, |
| "eval_samples_per_second": 197.216, |
| "eval_steps_per_second": 0.806, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.21465968586387435, |
| "grad_norm": 14.583905220031738, |
| "learning_rate": 1.999171836956597e-05, |
| "loss": 0.9587, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2198952879581152, |
| "grad_norm": 9.168513298034668, |
| "learning_rate": 1.9990280819447662e-05, |
| "loss": 0.9663, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.225130890052356, |
| "grad_norm": 24.278688430786133, |
| "learning_rate": 1.998872834272287e-05, |
| "loss": 0.9679, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.23036649214659685, |
| "grad_norm": 23.693418502807617, |
| "learning_rate": 1.9987060957251047e-05, |
| "loss": 0.9541, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2356020942408377, |
| "grad_norm": 34.47703170776367, |
| "learning_rate": 1.9985278682213525e-05, |
| "loss": 0.8988, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.24083769633507854, |
| "grad_norm": 17.93362045288086, |
| "learning_rate": 1.9983381538113317e-05, |
| "loss": 0.9296, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.24607329842931938, |
| "grad_norm": 23.294275283813477, |
| "learning_rate": 1.998136954677487e-05, |
| "loss": 0.9337, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2513089005235602, |
| "grad_norm": 19.78593635559082, |
| "learning_rate": 1.9979242731343803e-05, |
| "loss": 0.8976, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.25654450261780104, |
| "grad_norm": 16.300464630126953, |
| "learning_rate": 1.9977001116286675e-05, |
| "loss": 0.8705, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2617801047120419, |
| "grad_norm": 26.935935974121094, |
| "learning_rate": 1.9974644727390665e-05, |
| "loss": 0.8758, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2617801047120419, |
| "eval_F1_err_corr": 0.8910747356279248, |
| "eval_accuracy": 0.9052126836813612, |
| "eval_correct_accuracy": 0.9761037985940583, |
| "eval_error_accuracy": 0.819672508302841, |
| "eval_f1": 0.558119411595039, |
| "eval_loss": 0.24936090409755707, |
| "eval_pr_auc": 0.6830633725429478, |
| "eval_precision": 0.768772348033373, |
| "eval_recall": 0.4380801448947249, |
| "eval_runtime": 24.8593, |
| "eval_samples_per_second": 196.827, |
| "eval_steps_per_second": 0.805, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2670157068062827, |
| "grad_norm": 26.804174423217773, |
| "learning_rate": 1.9972173591763297e-05, |
| "loss": 0.9957, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.27225130890052357, |
| "grad_norm": 12.255861282348633, |
| "learning_rate": 1.996958773783213e-05, |
| "loss": 0.8614, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2774869109947644, |
| "grad_norm": 10.577012062072754, |
| "learning_rate": 1.9966887195344403e-05, |
| "loss": 0.8539, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.28272251308900526, |
| "grad_norm": 9.850268363952637, |
| "learning_rate": 1.9964071995366744e-05, |
| "loss": 0.8184, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2879581151832461, |
| "grad_norm": 4.022161960601807, |
| "learning_rate": 1.9961142170284762e-05, |
| "loss": 0.783, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2931937172774869, |
| "grad_norm": 4.174556732177734, |
| "learning_rate": 1.9958097753802693e-05, |
| "loss": 0.8355, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.29842931937172773, |
| "grad_norm": 8.559288024902344, |
| "learning_rate": 1.9954938780943034e-05, |
| "loss": 0.8081, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3036649214659686, |
| "grad_norm": 11.881876945495605, |
| "learning_rate": 1.9951665288046098e-05, |
| "loss": 0.8846, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3089005235602094, |
| "grad_norm": 9.480097770690918, |
| "learning_rate": 1.994827731276963e-05, |
| "loss": 0.869, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.31413612565445026, |
| "grad_norm": 18.96599006652832, |
| "learning_rate": 1.9944774894088367e-05, |
| "loss": 0.9044, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.31413612565445026, |
| "eval_F1_err_corr": 0.8903583524392616, |
| "eval_accuracy": 0.8976334106728538, |
| "eval_correct_accuracy": 0.9422891260099501, |
| "eval_error_accuracy": 0.8438525462118894, |
| "eval_f1": 0.6341625207296849, |
| "eval_loss": 0.25486111640930176, |
| "eval_pr_auc": 0.6936322312463549, |
| "eval_precision": 0.6197061365600691, |
| "eval_recall": 0.6493094860765225, |
| "eval_runtime": 24.7931, |
| "eval_samples_per_second": 197.353, |
| "eval_steps_per_second": 0.807, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3193717277486911, |
| "grad_norm": 7.49755859375, |
| "learning_rate": 1.994115807229357e-05, |
| "loss": 0.8702, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.32460732984293195, |
| "grad_norm": 19.93411636352539, |
| "learning_rate": 1.993742688899259e-05, |
| "loss": 0.8357, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3298429319371728, |
| "grad_norm": 18.435436248779297, |
| "learning_rate": 1.9933581387108358e-05, |
| "loss": 0.8185, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.33507853403141363, |
| "grad_norm": 23.072092056274414, |
| "learning_rate": 1.992962161087893e-05, |
| "loss": 0.8371, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3403141361256545, |
| "grad_norm": 11.625171661376953, |
| "learning_rate": 1.9925547605856937e-05, |
| "loss": 0.8276, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.34554973821989526, |
| "grad_norm": 18.671037673950195, |
| "learning_rate": 1.992135941890909e-05, |
| "loss": 0.8253, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3507853403141361, |
| "grad_norm": 15.393129348754883, |
| "learning_rate": 1.9917057098215624e-05, |
| "loss": 0.8245, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.35602094240837695, |
| "grad_norm": 9.267082214355469, |
| "learning_rate": 1.9912640693269754e-05, |
| "loss": 0.8451, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3612565445026178, |
| "grad_norm": 5.4926252365112305, |
| "learning_rate": 1.9908110254877107e-05, |
| "loss": 0.813, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.36649214659685864, |
| "grad_norm": 6.064371585845947, |
| "learning_rate": 1.9903465835155124e-05, |
| "loss": 0.7553, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.36649214659685864, |
| "eval_F1_err_corr": 0.898106732050316, |
| "eval_accuracy": 0.9078112915699923, |
| "eval_correct_accuracy": 0.9649030769491357, |
| "eval_error_accuracy": 0.8399597119400094, |
| "eval_f1": 0.624117053481332, |
| "eval_loss": 0.23855358362197876, |
| "eval_pr_auc": 0.697922245841014, |
| "eval_precision": 0.704642551979493, |
| "eval_recall": 0.5601086710436948, |
| "eval_runtime": 24.8196, |
| "eval_samples_per_second": 197.143, |
| "eval_steps_per_second": 0.806, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3717277486910995, |
| "grad_norm": 11.443989753723145, |
| "learning_rate": 1.9898707487532475e-05, |
| "loss": 0.7992, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3769633507853403, |
| "grad_norm": 9.889354705810547, |
| "learning_rate": 1.9893835266748437e-05, |
| "loss": 0.8425, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.38219895287958117, |
| "grad_norm": 6.687994480133057, |
| "learning_rate": 1.9888849228852262e-05, |
| "loss": 0.8465, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.387434554973822, |
| "grad_norm": 3.455092430114746, |
| "learning_rate": 1.988374943120254e-05, |
| "loss": 0.8098, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.39267015706806285, |
| "grad_norm": 4.258669376373291, |
| "learning_rate": 1.987853593246654e-05, |
| "loss": 0.8263, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.39790575916230364, |
| "grad_norm": 5.940682888031006, |
| "learning_rate": 1.9873208792619517e-05, |
| "loss": 0.7651, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.4031413612565445, |
| "grad_norm": 5.644289493560791, |
| "learning_rate": 1.9867768072944047e-05, |
| "loss": 0.7919, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4083769633507853, |
| "grad_norm": 6.426525115966797, |
| "learning_rate": 1.9862213836029308e-05, |
| "loss": 0.7661, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.41361256544502617, |
| "grad_norm": 7.790468215942383, |
| "learning_rate": 1.985654614577036e-05, |
| "loss": 0.7592, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.418848167539267, |
| "grad_norm": 8.240925788879395, |
| "learning_rate": 1.985076506736741e-05, |
| "loss": 0.7935, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.418848167539267, |
| "eval_F1_err_corr": 0.8892707173263128, |
| "eval_accuracy": 0.900108275328693, |
| "eval_correct_accuracy": 0.9416031342860438, |
| "eval_error_accuracy": 0.8424490839609798, |
| "eval_f1": 0.636169014084507, |
| "eval_loss": 0.24991166591644287, |
| "eval_pr_auc": 0.6999774937080984, |
| "eval_precision": 0.6332436069986541, |
| "eval_recall": 0.6391215757301336, |
| "eval_runtime": 24.8123, |
| "eval_samples_per_second": 197.2, |
| "eval_steps_per_second": 0.806, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.42408376963350786, |
| "grad_norm": 6.823334217071533, |
| "learning_rate": 1.9844870667325073e-05, |
| "loss": 0.7347, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4293193717277487, |
| "grad_norm": 4.039069175720215, |
| "learning_rate": 1.9838863013451587e-05, |
| "loss": 0.7886, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.43455497382198954, |
| "grad_norm": 7.6934380531311035, |
| "learning_rate": 1.9832742174858052e-05, |
| "loss": 0.7608, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.4397905759162304, |
| "grad_norm": 9.409914016723633, |
| "learning_rate": 1.9826508221957624e-05, |
| "loss": 0.7466, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.44502617801047123, |
| "grad_norm": 7.726130962371826, |
| "learning_rate": 1.9820161226464708e-05, |
| "loss": 0.7023, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.450261780104712, |
| "grad_norm": 3.726100206375122, |
| "learning_rate": 1.9813701261394136e-05, |
| "loss": 0.7078, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.45549738219895286, |
| "grad_norm": 12.017361640930176, |
| "learning_rate": 1.980712840106032e-05, |
| "loss": 0.7383, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.4607329842931937, |
| "grad_norm": 5.709269046783447, |
| "learning_rate": 1.9800442721076406e-05, |
| "loss": 0.7215, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.46596858638743455, |
| "grad_norm": 12.649430274963379, |
| "learning_rate": 1.979364429835339e-05, |
| "loss": 0.7111, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.4712041884816754, |
| "grad_norm": 16.15489959716797, |
| "learning_rate": 1.9786733211099257e-05, |
| "loss": 0.7764, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4712041884816754, |
| "eval_F1_err_corr": 0.894511960241892, |
| "eval_accuracy": 0.9100077339520495, |
| "eval_correct_accuracy": 0.9712793351142024, |
| "eval_error_accuracy": 0.8289907059644579, |
| "eval_f1": 0.5971472095277662, |
| "eval_loss": 0.2414369434118271, |
| "eval_pr_auc": 0.7108638111158798, |
| "eval_precision": 0.7689015691868759, |
| "eval_recall": 0.48811410459587956, |
| "eval_runtime": 25.0196, |
| "eval_samples_per_second": 195.567, |
| "eval_steps_per_second": 0.799, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.47643979057591623, |
| "grad_norm": 12.530599594116211, |
| "learning_rate": 1.9779709538818052e-05, |
| "loss": 0.7715, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.4816753926701571, |
| "grad_norm": 6.7939605712890625, |
| "learning_rate": 1.9772573362308992e-05, |
| "loss": 0.7522, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4869109947643979, |
| "grad_norm": 3.4304537773132324, |
| "learning_rate": 1.9765324763665516e-05, |
| "loss": 0.7511, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.49214659685863876, |
| "grad_norm": 6.636844158172607, |
| "learning_rate": 1.9757963826274357e-05, |
| "loss": 0.7121, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4973821989528796, |
| "grad_norm": 4.51839017868042, |
| "learning_rate": 1.975049063481457e-05, |
| "loss": 0.7231, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5026178010471204, |
| "grad_norm": 9.865214347839355, |
| "learning_rate": 1.974290527525657e-05, |
| "loss": 0.762, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5078534031413613, |
| "grad_norm": 3.440359592437744, |
| "learning_rate": 1.9735207834861117e-05, |
| "loss": 0.7169, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5130890052356021, |
| "grad_norm": 3.5312769412994385, |
| "learning_rate": 1.972739840217836e-05, |
| "loss": 0.73, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.518324607329843, |
| "grad_norm": 4.723533630371094, |
| "learning_rate": 1.9719477067046768e-05, |
| "loss": 0.6783, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.5235602094240838, |
| "grad_norm": 3.5356740951538086, |
| "learning_rate": 1.971144392059212e-05, |
| "loss": 0.7155, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5235602094240838, |
| "eval_F1_err_corr": 0.893120798984817, |
| "eval_accuracy": 0.902954369682908, |
| "eval_correct_accuracy": 0.9461320280124133, |
| "eval_error_accuracy": 0.8457347701138861, |
| "eval_f1": 0.639051892762628, |
| "eval_loss": 0.24243153631687164, |
| "eval_pr_auc": 0.7029855391245526, |
| "eval_precision": 0.6497426298549368, |
| "eval_recall": 0.6287072673760471, |
| "eval_runtime": 24.8233, |
| "eval_samples_per_second": 197.113, |
| "eval_steps_per_second": 0.806, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5287958115183246, |
| "grad_norm": 13.087606430053711, |
| "learning_rate": 1.970329905522647e-05, |
| "loss": 0.7007, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5340314136125655, |
| "grad_norm": 14.260698318481445, |
| "learning_rate": 1.9695042564647045e-05, |
| "loss": 0.6817, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5392670157068062, |
| "grad_norm": 9.661425590515137, |
| "learning_rate": 1.9686674543835208e-05, |
| "loss": 0.7358, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5445026178010471, |
| "grad_norm": 5.698840618133545, |
| "learning_rate": 1.9678195089055347e-05, |
| "loss": 0.6646, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.5497382198952879, |
| "grad_norm": 5.9759907722473145, |
| "learning_rate": 1.9669604297853766e-05, |
| "loss": 0.73, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5549738219895288, |
| "grad_norm": 4.276744842529297, |
| "learning_rate": 1.9660902269057558e-05, |
| "loss": 0.712, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5602094240837696, |
| "grad_norm": 4.572305679321289, |
| "learning_rate": 1.9652089102773487e-05, |
| "loss": 0.7033, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.5654450261780105, |
| "grad_norm": 3.9941539764404297, |
| "learning_rate": 1.9643164900386824e-05, |
| "loss": 0.6695, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.5706806282722513, |
| "grad_norm": 4.321977138519287, |
| "learning_rate": 1.963412976456017e-05, |
| "loss": 0.709, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5759162303664922, |
| "grad_norm": 4.374669551849365, |
| "learning_rate": 1.96249837992323e-05, |
| "loss": 0.6815, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5759162303664922, |
| "eval_F1_err_corr": 0.8937597915811933, |
| "eval_accuracy": 0.9036968290796598, |
| "eval_correct_accuracy": 0.9500814005540427, |
| "eval_error_accuracy": 0.8437420660571459, |
| "eval_f1": 0.6368832380730199, |
| "eval_loss": 0.24286404252052307, |
| "eval_pr_auc": 0.7035206327309997, |
| "eval_precision": 0.6568816169393648, |
| "eval_recall": 0.618066561014263, |
| "eval_runtime": 24.8231, |
| "eval_samples_per_second": 197.115, |
| "eval_steps_per_second": 0.806, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.581151832460733, |
| "grad_norm": 3.3900415897369385, |
| "learning_rate": 1.961572710961695e-05, |
| "loss": 0.6042, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5863874345549738, |
| "grad_norm": 3.9020636081695557, |
| "learning_rate": 1.9606359802201608e-05, |
| "loss": 0.6541, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5916230366492147, |
| "grad_norm": 3.2324304580688477, |
| "learning_rate": 1.9596881984746288e-05, |
| "loss": 0.664, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5968586387434555, |
| "grad_norm": 3.6972060203552246, |
| "learning_rate": 1.958729376628231e-05, |
| "loss": 0.6325, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6020942408376964, |
| "grad_norm": 4.679067134857178, |
| "learning_rate": 1.957759525711101e-05, |
| "loss": 0.6851, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6073298429319371, |
| "grad_norm": 6.575286865234375, |
| "learning_rate": 1.9567786568802503e-05, |
| "loss": 0.6266, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.612565445026178, |
| "grad_norm": 6.148586273193359, |
| "learning_rate": 1.9557867814194385e-05, |
| "loss": 0.6887, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.6178010471204188, |
| "grad_norm": 3.9649710655212402, |
| "learning_rate": 1.9547839107390435e-05, |
| "loss": 0.6448, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.6230366492146597, |
| "grad_norm": 3.5095326900482178, |
| "learning_rate": 1.9537700563759303e-05, |
| "loss": 0.6793, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6282722513089005, |
| "grad_norm": 5.709955215454102, |
| "learning_rate": 1.9527452299933192e-05, |
| "loss": 0.6321, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6282722513089005, |
| "eval_F1_err_corr": 0.8922176723044, |
| "eval_accuracy": 0.8975096674400619, |
| "eval_correct_accuracy": 0.9449689114373253, |
| "eval_error_accuracy": 0.8450445368681248, |
| "eval_f1": 0.6403994355801584, |
| "eval_loss": 0.25328728556632996, |
| "eval_pr_auc": 0.6997538853349474, |
| "eval_precision": 0.6150959132610508, |
| "eval_recall": 0.6678741227077202, |
| "eval_runtime": 24.8167, |
| "eval_samples_per_second": 197.166, |
| "eval_steps_per_second": 0.806, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6335078534031413, |
| "grad_norm": 3.6896157264709473, |
| "learning_rate": 1.95170944338065e-05, |
| "loss": 0.6806, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.6387434554973822, |
| "grad_norm": 4.03073263168335, |
| "learning_rate": 1.9506627084534486e-05, |
| "loss": 0.6133, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.643979057591623, |
| "grad_norm": 6.4314751625061035, |
| "learning_rate": 1.9496050372531864e-05, |
| "loss": 0.6098, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6492146596858639, |
| "grad_norm": 3.8455100059509277, |
| "learning_rate": 1.9485364419471454e-05, |
| "loss": 0.6306, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.6544502617801047, |
| "grad_norm": 3.8784000873565674, |
| "learning_rate": 1.9474569348282774e-05, |
| "loss": 0.6104, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6596858638743456, |
| "grad_norm": 5.018595218658447, |
| "learning_rate": 1.9463665283150604e-05, |
| "loss": 0.6592, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.6649214659685864, |
| "grad_norm": 3.5282726287841797, |
| "learning_rate": 1.9452652349513587e-05, |
| "loss": 0.621, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.6701570680628273, |
| "grad_norm": 3.4036905765533447, |
| "learning_rate": 1.9441530674062754e-05, |
| "loss": 0.6744, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.675392670157068, |
| "grad_norm": 4.95082950592041, |
| "learning_rate": 1.9430300384740108e-05, |
| "loss": 0.5925, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.680628272251309, |
| "grad_norm": 5.078342437744141, |
| "learning_rate": 1.941896161073711e-05, |
| "loss": 0.5913, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.680628272251309, |
| "eval_F1_err_corr": 0.885156181305656, |
| "eval_accuracy": 0.8942304717710751, |
| "eval_correct_accuracy": 0.9306883336673133, |
| "eval_error_accuracy": 0.8438713827505521, |
| "eval_f1": 0.6393079438759363, |
| "eval_loss": 0.27150195837020874, |
| "eval_pr_auc": 0.6992222071782436, |
| "eval_precision": 0.5985776372975109, |
| "eval_recall": 0.6859859633235228, |
| "eval_runtime": 24.819, |
| "eval_samples_per_second": 197.147, |
| "eval_steps_per_second": 0.806, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6858638743455497, |
| "grad_norm": 5.81033182144165, |
| "learning_rate": 1.9407514482493214e-05, |
| "loss": 0.6133, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6910994764397905, |
| "grad_norm": 4.901327133178711, |
| "learning_rate": 1.939595913169438e-05, |
| "loss": 0.6121, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6963350785340314, |
| "grad_norm": 3.7869937419891357, |
| "learning_rate": 1.9384295691271523e-05, |
| "loss": 0.5822, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7015706806282722, |
| "grad_norm": 3.8648629188537598, |
| "learning_rate": 1.9372524295399014e-05, |
| "loss": 0.6032, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7068062827225131, |
| "grad_norm": 3.9610342979431152, |
| "learning_rate": 1.9360645079493126e-05, |
| "loss": 0.59, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7120418848167539, |
| "grad_norm": 5.623746395111084, |
| "learning_rate": 1.9348658180210473e-05, |
| "loss": 0.5835, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.7172774869109948, |
| "grad_norm": 6.02370548248291, |
| "learning_rate": 1.933656373544645e-05, |
| "loss": 0.6003, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.7225130890052356, |
| "grad_norm": 5.652750492095947, |
| "learning_rate": 1.932436188433362e-05, |
| "loss": 0.5958, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.7277486910994765, |
| "grad_norm": 7.355208396911621, |
| "learning_rate": 1.9312052767240153e-05, |
| "loss": 0.5677, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.7329842931937173, |
| "grad_norm": 4.652146339416504, |
| "learning_rate": 1.9299636525768176e-05, |
| "loss": 0.5649, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7329842931937173, |
| "eval_F1_err_corr": 0.8974946334360716, |
| "eval_accuracy": 0.9049033255993812, |
| "eval_correct_accuracy": 0.9592731998252757, |
| "eval_error_accuracy": 0.843191870706177, |
| "eval_f1": 0.6410555815039701, |
| "eval_loss": 0.24959486722946167, |
| "eval_pr_auc": 0.6979561382710899, |
| "eval_precision": 0.6619242826139378, |
| "eval_recall": 0.621462531129726, |
| "eval_runtime": 24.817, |
| "eval_samples_per_second": 197.163, |
| "eval_steps_per_second": 0.806, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7382198952879581, |
| "grad_norm": 5.073575019836426, |
| "learning_rate": 1.9287113302752167e-05, |
| "loss": 0.5491, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.743455497382199, |
| "grad_norm": 4.796985149383545, |
| "learning_rate": 1.927448324225729e-05, |
| "loss": 0.5849, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.7486910994764397, |
| "grad_norm": 6.055835247039795, |
| "learning_rate": 1.9261746489577767e-05, |
| "loss": 0.5721, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.7539267015706806, |
| "grad_norm": 7.7210893630981445, |
| "learning_rate": 1.9248903191235177e-05, |
| "loss": 0.5749, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.7591623036649214, |
| "grad_norm": 3.5172553062438965, |
| "learning_rate": 1.9235953494976786e-05, |
| "loss": 0.6009, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7643979057591623, |
| "grad_norm": 5.326947212219238, |
| "learning_rate": 1.922289754977385e-05, |
| "loss": 0.5896, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.7696335078534031, |
| "grad_norm": 3.990248203277588, |
| "learning_rate": 1.920973550581989e-05, |
| "loss": 0.578, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.774869109947644, |
| "grad_norm": 3.6598334312438965, |
| "learning_rate": 1.9196467514528973e-05, |
| "loss": 0.567, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.7801047120418848, |
| "grad_norm": 5.096114635467529, |
| "learning_rate": 1.9183093728533966e-05, |
| "loss": 0.5847, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.7853403141361257, |
| "grad_norm": 5.4809889793396, |
| "learning_rate": 1.9169614301684786e-05, |
| "loss": 0.5934, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7853403141361257, |
| "eval_F1_err_corr": 0.8959803504098618, |
| "eval_accuracy": 0.9018097447795823, |
| "eval_correct_accuracy": 0.9504131731842577, |
| "eval_error_accuracy": 0.8474448138009186, |
| "eval_f1": 0.6463115667483842, |
| "eval_loss": 0.2541360855102539, |
| "eval_pr_auc": 0.7031337927296945, |
| "eval_precision": 0.6363835856923414, |
| "eval_recall": 0.6565542223228436, |
| "eval_runtime": 24.8027, |
| "eval_samples_per_second": 197.277, |
| "eval_steps_per_second": 0.806, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7905759162303665, |
| "grad_norm": 3.492452621459961, |
| "learning_rate": 1.915602938904662e-05, |
| "loss": 0.5974, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.7958115183246073, |
| "grad_norm": 4.485317707061768, |
| "learning_rate": 1.914233914689815e-05, |
| "loss": 0.5269, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8010471204188482, |
| "grad_norm": 4.36208438873291, |
| "learning_rate": 1.912854373272975e-05, |
| "loss": 0.5794, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.806282722513089, |
| "grad_norm": 4.126212120056152, |
| "learning_rate": 1.9114643305241678e-05, |
| "loss": 0.5454, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.8115183246073299, |
| "grad_norm": 3.9140942096710205, |
| "learning_rate": 1.9100638024342245e-05, |
| "loss": 0.5615, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8167539267015707, |
| "grad_norm": 9.218249320983887, |
| "learning_rate": 1.908652805114598e-05, |
| "loss": 0.564, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.8219895287958116, |
| "grad_norm": 4.118100166320801, |
| "learning_rate": 1.907231354797179e-05, |
| "loss": 0.5406, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.8272251308900523, |
| "grad_norm": 3.917045831680298, |
| "learning_rate": 1.9057994678341053e-05, |
| "loss": 0.5581, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.8324607329842932, |
| "grad_norm": 4.272670745849609, |
| "learning_rate": 1.9043571606975776e-05, |
| "loss": 0.5761, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.837696335078534, |
| "grad_norm": 4.809320449829102, |
| "learning_rate": 1.902904449979669e-05, |
| "loss": 0.5422, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.837696335078534, |
| "eval_F1_err_corr": 0.899383774542208, |
| "eval_accuracy": 0.905769528228925, |
| "eval_correct_accuracy": 0.9610494803595725, |
| "eval_error_accuracy": 0.8451544680769811, |
| "eval_f1": 0.6363419293218721, |
| "eval_loss": 0.2484092116355896, |
| "eval_pr_auc": 0.6976824941932482, |
| "eval_precision": 0.673149785299318, |
| "eval_recall": 0.6033506905139234, |
| "eval_runtime": 24.8065, |
| "eval_samples_per_second": 197.247, |
| "eval_steps_per_second": 0.806, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8429319371727748, |
| "grad_norm": 5.909646511077881, |
| "learning_rate": 1.901441352392133e-05, |
| "loss": 0.5825, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.8481675392670157, |
| "grad_norm": 4.255792140960693, |
| "learning_rate": 1.8999678847662124e-05, |
| "loss": 0.5576, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.8534031413612565, |
| "grad_norm": 6.5200114250183105, |
| "learning_rate": 1.8984840640524445e-05, |
| "loss": 0.5296, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.8586387434554974, |
| "grad_norm": 8.32865047454834, |
| "learning_rate": 1.8969899073204687e-05, |
| "loss": 0.5655, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.8638743455497382, |
| "grad_norm": 9.28367805480957, |
| "learning_rate": 1.8954854317588262e-05, |
| "loss": 0.5791, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8691099476439791, |
| "grad_norm": 4.166441917419434, |
| "learning_rate": 1.8939706546747656e-05, |
| "loss": 0.5214, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.8743455497382199, |
| "grad_norm": 3.7278671264648438, |
| "learning_rate": 1.8924455934940424e-05, |
| "loss": 0.5087, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.8795811518324608, |
| "grad_norm": 6.253541469573975, |
| "learning_rate": 1.8909102657607182e-05, |
| "loss": 0.5476, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.8848167539267016, |
| "grad_norm": 9.273209571838379, |
| "learning_rate": 1.88936468913696e-05, |
| "loss": 0.4928, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.8900523560209425, |
| "grad_norm": 5.4465532302856445, |
| "learning_rate": 1.8878088814028365e-05, |
| "loss": 0.4909, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8900523560209425, |
| "eval_F1_err_corr": 0.8973571707111299, |
| "eval_accuracy": 0.9004485692188708, |
| "eval_correct_accuracy": 0.9515640305646176, |
| "eval_error_accuracy": 0.8489933585798806, |
| "eval_f1": 0.6449691085613416, |
| "eval_loss": 0.25420647859573364, |
| "eval_pr_auc": 0.7006737583541583, |
| "eval_precision": 0.6290079621261029, |
| "eval_recall": 0.6617613764998868, |
| "eval_runtime": 24.8354, |
| "eval_samples_per_second": 197.017, |
| "eval_steps_per_second": 0.805, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8952879581151832, |
| "grad_norm": 3.929280996322632, |
| "learning_rate": 1.886242860456113e-05, |
| "loss": 0.518, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.900523560209424, |
| "grad_norm": 3.3221724033355713, |
| "learning_rate": 1.884666644312046e-05, |
| "loss": 0.474, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.9057591623036649, |
| "grad_norm": 4.1775126457214355, |
| "learning_rate": 1.8830802511031763e-05, |
| "loss": 0.513, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.9109947643979057, |
| "grad_norm": 4.372125148773193, |
| "learning_rate": 1.88148369907912e-05, |
| "loss": 0.4958, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.9162303664921466, |
| "grad_norm": 4.19729471206665, |
| "learning_rate": 1.8798770066063577e-05, |
| "loss": 0.5178, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9214659685863874, |
| "grad_norm": 4.332755088806152, |
| "learning_rate": 1.8782601921680258e-05, |
| "loss": 0.525, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.9267015706806283, |
| "grad_norm": 4.065849304199219, |
| "learning_rate": 1.8766332743637002e-05, |
| "loss": 0.4692, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.9319371727748691, |
| "grad_norm": 4.974046230316162, |
| "learning_rate": 1.8749962719091864e-05, |
| "loss": 0.4973, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.93717277486911, |
| "grad_norm": 4.961699962615967, |
| "learning_rate": 1.8733492036363007e-05, |
| "loss": 0.5204, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.9424083769633508, |
| "grad_norm": 4.140364646911621, |
| "learning_rate": 1.871692088492655e-05, |
| "loss": 0.4905, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9424083769633508, |
| "eval_F1_err_corr": 0.8932916712717729, |
| "eval_accuracy": 0.8947254447022428, |
| "eval_correct_accuracy": 0.9452793616476387, |
| "eval_error_accuracy": 0.8467242340670772, |
| "eval_f1": 0.6396272371068517, |
| "eval_loss": 0.2594238817691803, |
| "eval_pr_auc": 0.7027911559368634, |
| "eval_precision": 0.6008754476721051, |
| "eval_recall": 0.6837219832465474, |
| "eval_runtime": 24.8417, |
| "eval_samples_per_second": 196.967, |
| "eval_steps_per_second": 0.805, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9476439790575916, |
| "grad_norm": 8.625274658203125, |
| "learning_rate": 1.8700249455414394e-05, |
| "loss": 0.4686, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.9528795811518325, |
| "grad_norm": 6.383296966552734, |
| "learning_rate": 1.8683477939612024e-05, |
| "loss": 0.4764, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.9581151832460733, |
| "grad_norm": 7.345070838928223, |
| "learning_rate": 1.866660653045629e-05, |
| "loss": 0.4823, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.9633507853403142, |
| "grad_norm": 4.40362548828125, |
| "learning_rate": 1.8649635422033218e-05, |
| "loss": 0.49, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.9685863874345549, |
| "grad_norm": 3.8177592754364014, |
| "learning_rate": 1.863256480957574e-05, |
| "loss": 0.5004, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9738219895287958, |
| "grad_norm": 3.5552761554718018, |
| "learning_rate": 1.861539488946148e-05, |
| "loss": 0.4967, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.9790575916230366, |
| "grad_norm": 3.948543071746826, |
| "learning_rate": 1.8598125859210475e-05, |
| "loss": 0.5106, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.9842931937172775, |
| "grad_norm": 4.415132999420166, |
| "learning_rate": 1.858075791748291e-05, |
| "loss": 0.4919, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.9895287958115183, |
| "grad_norm": 4.514105319976807, |
| "learning_rate": 1.8563291264076834e-05, |
| "loss": 0.4947, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.9947643979057592, |
| "grad_norm": 6.685056209564209, |
| "learning_rate": 1.854572609992586e-05, |
| "loss": 0.4892, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9947643979057592, |
| "eval_F1_err_corr": 0.9005018183708923, |
| "eval_accuracy": 0.9076256767208043, |
| "eval_correct_accuracy": 0.9694615035570632, |
| "eval_error_accuracy": 0.8407011107412775, |
| "eval_f1": 0.6246857717445953, |
| "eval_loss": 0.24942660331726074, |
| "eval_pr_auc": 0.6972885689682531, |
| "eval_precision": 0.7021757558632382, |
| "eval_recall": 0.5625990491283677, |
| "eval_runtime": 24.7945, |
| "eval_samples_per_second": 197.342, |
| "eval_steps_per_second": 0.807, |
| "step": 760 |
| } |
| ], |
| "logging_steps": 4, |
| "max_steps": 3820, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 16, |
| "stateful_callbacks": { |
| "MinEpochEarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.001 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.37033143972266e+17, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|