| { | |
| "best_global_step": null, | |
| "best_metric": 0.9005018183708923, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9947643979057592, | |
| "eval_steps": 16, | |
| "global_step": 760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005235602094240838, | |
| "grad_norm": 433.5572204589844, | |
| "learning_rate": 5.217391304347826e-07, | |
| "loss": 5.5489, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005235602094240838, | |
| "eval_F1_err_corr": 0.2899344909681993, | |
| "eval_accuracy": 0.33964423820572315, | |
| "eval_correct_accuracy": 0.21863927522501062, | |
| "eval_error_accuracy": 0.4302247894550517, | |
| "eval_f1": 0.2573237770510055, | |
| "eval_loss": 1.461071491241455, | |
| "eval_pr_auc": 0.16429768646454848, | |
| "eval_precision": 0.15202466598150052, | |
| "eval_recall": 0.8372198324654743, | |
| "eval_runtime": 24.9385, | |
| "eval_samples_per_second": 196.202, | |
| "eval_steps_per_second": 0.802, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010471204188481676, | |
| "grad_norm": 424.1092834472656, | |
| "learning_rate": 1.2173913043478262e-06, | |
| "loss": 5.392, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010471204188481676, | |
| "eval_F1_err_corr": 0.321098900363111, | |
| "eval_accuracy": 0.3666511987625677, | |
| "eval_correct_accuracy": 0.2498037913412515, | |
| "eval_error_accuracy": 0.4493435370426137, | |
| "eval_f1": 0.25948565848012445, | |
| "eval_loss": 1.3678739070892334, | |
| "eval_pr_auc": 0.16391947366452397, | |
| "eval_precision": 0.15441239776151527, | |
| "eval_recall": 0.8120896536110482, | |
| "eval_runtime": 24.8266, | |
| "eval_samples_per_second": 197.087, | |
| "eval_steps_per_second": 0.806, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.015706806282722512, | |
| "grad_norm": 328.165771484375, | |
| "learning_rate": 1.9130434782608697e-06, | |
| "loss": 4.6674, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.015706806282722512, | |
| "eval_F1_err_corr": 0.42193034247158306, | |
| "eval_accuracy": 0.4537664346481052, | |
| "eval_correct_accuracy": 0.36420961710522887, | |
| "eval_error_accuracy": 0.5013920328557274, | |
| "eval_f1": 0.25876327610091937, | |
| "eval_loss": 1.08595609664917, | |
| "eval_pr_auc": 0.16343108665970787, | |
| "eval_precision": 0.15883323026180168, | |
| "eval_recall": 0.6977586597237945, | |
| "eval_runtime": 24.7964, | |
| "eval_samples_per_second": 197.327, | |
| "eval_steps_per_second": 0.807, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.020942408376963352, | |
| "grad_norm": 121.70630645751953, | |
| "learning_rate": 2.6086956521739132e-06, | |
| "loss": 3.0944, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.020942408376963352, | |
| "eval_F1_err_corr": 0.721921189250657, | |
| "eval_accuracy": 0.7358391337973704, | |
| "eval_correct_accuracy": 0.7870115524045516, | |
| "eval_error_accuracy": 0.6667750717278245, | |
| "eval_f1": 0.1906928253246138, | |
| "eval_loss": 0.5721015334129333, | |
| "eval_pr_auc": 0.16146374192556026, | |
| "eval_precision": 0.16400391261819366, | |
| "eval_recall": 0.22775639574371745, | |
| "eval_runtime": 24.8528, | |
| "eval_samples_per_second": 196.879, | |
| "eval_steps_per_second": 0.805, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02617801047120419, | |
| "grad_norm": 18.458759307861328, | |
| "learning_rate": 3.3043478260869567e-06, | |
| "loss": 2.002, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02617801047120419, | |
| "eval_F1_err_corr": 0.8402340555989792, | |
| "eval_accuracy": 0.8622119102861562, | |
| "eval_correct_accuracy": 0.9922276019965096, | |
| "eval_error_accuracy": 0.728620881787505, | |
| "eval_f1": 0.0035794183445190158, | |
| "eval_loss": 0.7684443593025208, | |
| "eval_pr_auc": 0.17251592682571912, | |
| "eval_precision": 0.1509433962264151, | |
| "eval_recall": 0.001811184061580258, | |
| "eval_runtime": 24.8601, | |
| "eval_samples_per_second": 196.822, | |
| "eval_steps_per_second": 0.805, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.031413612565445025, | |
| "grad_norm": 115.0860595703125, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 3.383, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031413612565445025, | |
| "eval_F1_err_corr": 0.8378682847250856, | |
| "eval_accuracy": 0.8620881670533642, | |
| "eval_correct_accuracy": 0.9842169035446346, | |
| "eval_error_accuracy": 0.7294085238385144, | |
| "eval_f1": 0.008451957295373666, | |
| "eval_loss": 0.8194268941879272, | |
| "eval_pr_auc": 0.18009572056722223, | |
| "eval_precision": 0.24050632911392406, | |
| "eval_recall": 0.004301562146253113, | |
| "eval_runtime": 24.8294, | |
| "eval_samples_per_second": 197.065, | |
| "eval_steps_per_second": 0.805, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03664921465968586, | |
| "grad_norm": 99.96025848388672, | |
| "learning_rate": 4.695652173913044e-06, | |
| "loss": 3.1857, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03664921465968586, | |
| "eval_F1_err_corr": 0.8207683484774771, | |
| "eval_accuracy": 0.8591492652745553, | |
| "eval_correct_accuracy": 0.9328157189465495, | |
| "eval_error_accuracy": 0.7327520943454843, | |
| "eval_f1": 0.060268317853457175, | |
| "eval_loss": 0.6408756971359253, | |
| "eval_pr_auc": 0.19809419361676622, | |
| "eval_precision": 0.3411214953271028, | |
| "eval_recall": 0.03305410912383971, | |
| "eval_runtime": 24.8048, | |
| "eval_samples_per_second": 197.261, | |
| "eval_steps_per_second": 0.806, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.041884816753926704, | |
| "grad_norm": 48.16204071044922, | |
| "learning_rate": 5.391304347826088e-06, | |
| "loss": 2.1498, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.041884816753926704, | |
| "eval_F1_err_corr": 0.7694538304703895, | |
| "eval_accuracy": 0.837370456303171, | |
| "eval_correct_accuracy": 0.8088768689020601, | |
| "eval_error_accuracy": 0.7336950054749075, | |
| "eval_f1": 0.21148942552872357, | |
| "eval_loss": 0.44809016585350037, | |
| "eval_pr_auc": 0.2318064013915099, | |
| "eval_precision": 0.31333333333333335, | |
| "eval_recall": 0.15961059542676023, | |
| "eval_runtime": 24.8313, | |
| "eval_samples_per_second": 197.05, | |
| "eval_steps_per_second": 0.805, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04712041884816754, | |
| "grad_norm": 169.05667114257812, | |
| "learning_rate": 6.086956521739132e-06, | |
| "loss": 1.9176, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04712041884816754, | |
| "eval_F1_err_corr": 0.6219027098098009, | |
| "eval_accuracy": 0.7121732405259087, | |
| "eval_correct_accuracy": 0.5674362971053744, | |
| "eval_error_accuracy": 0.687935454975843, | |
| "eval_f1": 0.33722752528850264, | |
| "eval_loss": 0.5731640458106995, | |
| "eval_pr_auc": 0.2788670530837179, | |
| "eval_precision": 0.2460243217960711, | |
| "eval_recall": 0.5358840842200588, | |
| "eval_runtime": 24.8463, | |
| "eval_samples_per_second": 196.931, | |
| "eval_steps_per_second": 0.805, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05235602094240838, | |
| "grad_norm": 114.60936737060547, | |
| "learning_rate": 6.782608695652174e-06, | |
| "loss": 2.0171, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05235602094240838, | |
| "eval_F1_err_corr": 0.7430224807525013, | |
| "eval_accuracy": 0.7990719257540603, | |
| "eval_correct_accuracy": 0.7547544657206874, | |
| "eval_error_accuracy": 0.7316496396965659, | |
| "eval_f1": 0.35211970074812965, | |
| "eval_loss": 0.4375106692314148, | |
| "eval_pr_auc": 0.3012571706540082, | |
| "eval_precision": 0.3147289586305278, | |
| "eval_recall": 0.39959248358614446, | |
| "eval_runtime": 24.8039, | |
| "eval_samples_per_second": 197.268, | |
| "eval_steps_per_second": 0.806, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05759162303664921, | |
| "grad_norm": 32.457340240478516, | |
| "learning_rate": 7.478260869565218e-06, | |
| "loss": 1.611, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05759162303664921, | |
| "eval_F1_err_corr": 0.8419241583637731, | |
| "eval_accuracy": 0.862830626450116, | |
| "eval_correct_accuracy": 0.9871360561186291, | |
| "eval_error_accuracy": 0.7339561045659535, | |
| "eval_f1": 0.09914668833807395, | |
| "eval_loss": 0.38381654024124146, | |
| "eval_pr_auc": 0.3152015073109494, | |
| "eval_precision": 0.48316831683168315, | |
| "eval_recall": 0.05524111387819787, | |
| "eval_runtime": 24.832, | |
| "eval_samples_per_second": 197.044, | |
| "eval_steps_per_second": 0.805, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06282722513089005, | |
| "grad_norm": 22.770957946777344, | |
| "learning_rate": 8.173913043478263e-06, | |
| "loss": 1.5516, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06282722513089005, | |
| "eval_F1_err_corr": 0.8403483674599067, | |
| "eval_accuracy": 0.859891724671307, | |
| "eval_correct_accuracy": 0.9726090387657613, | |
| "eval_error_accuracy": 0.7397527603961362, | |
| "eval_f1": 0.19484444444444443, | |
| "eval_loss": 0.355484277009964, | |
| "eval_pr_auc": 0.3399032342610957, | |
| "eval_precision": 0.45364238410596025, | |
| "eval_recall": 0.12406610821824768, | |
| "eval_runtime": 24.7998, | |
| "eval_samples_per_second": 197.3, | |
| "eval_steps_per_second": 0.806, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06806282722513089, | |
| "grad_norm": 69.65153503417969, | |
| "learning_rate": 8.869565217391306e-06, | |
| "loss": 1.4648, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06806282722513089, | |
| "eval_F1_err_corr": 0.8036896045473728, | |
| "eval_accuracy": 0.8478267594740913, | |
| "eval_correct_accuracy": 0.8494371774369468, | |
| "eval_error_accuracy": 0.762617804842102, | |
| "eval_f1": 0.38658186806334954, | |
| "eval_loss": 0.36472800374031067, | |
| "eval_pr_auc": 0.38944473786040484, | |
| "eval_precision": 0.4303164908384231, | |
| "eval_recall": 0.350916911931175, | |
| "eval_runtime": 24.8345, | |
| "eval_samples_per_second": 197.025, | |
| "eval_steps_per_second": 0.805, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07329842931937172, | |
| "grad_norm": 9.83507251739502, | |
| "learning_rate": 9.565217391304349e-06, | |
| "loss": 1.3967, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07329842931937172, | |
| "eval_F1_err_corr": 0.8296970467203022, | |
| "eval_accuracy": 0.8688321732405259, | |
| "eval_correct_accuracy": 0.9181484628791651, | |
| "eval_error_accuracy": 0.7567903433781845, | |
| "eval_f1": 0.29262595929262597, | |
| "eval_loss": 0.33204466104507446, | |
| "eval_pr_auc": 0.41163972835541246, | |
| "eval_precision": 0.5561192136968929, | |
| "eval_recall": 0.19855105275073578, | |
| "eval_runtime": 24.8293, | |
| "eval_samples_per_second": 197.066, | |
| "eval_steps_per_second": 0.805, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07853403141361257, | |
| "grad_norm": 16.36752700805664, | |
| "learning_rate": 1.0260869565217393e-05, | |
| "loss": 1.2944, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07853403141361257, | |
| "eval_F1_err_corr": 0.8421678884358804, | |
| "eval_accuracy": 0.8736581593194123, | |
| "eval_correct_accuracy": 0.9276144097142335, | |
| "eval_error_accuracy": 0.7711353234659966, | |
| "eval_f1": 0.38438347904733194, | |
| "eval_loss": 0.31788310408592224, | |
| "eval_pr_auc": 0.4608372769142751, | |
| "eval_precision": 0.5751014884979703, | |
| "eval_recall": 0.28865745981435365, | |
| "eval_runtime": 24.8595, | |
| "eval_samples_per_second": 196.826, | |
| "eval_steps_per_second": 0.805, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08376963350785341, | |
| "grad_norm": 17.503982543945312, | |
| "learning_rate": 1.0956521739130435e-05, | |
| "loss": 1.284, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08376963350785341, | |
| "eval_F1_err_corr": 0.8579401681935629, | |
| "eval_accuracy": 0.8769682907965971, | |
| "eval_correct_accuracy": 0.9478163203454459, | |
| "eval_error_accuracy": 0.7836326415058088, | |
| "eval_f1": 0.4385147536354652, | |
| "eval_loss": 0.30793023109436035, | |
| "eval_pr_auc": 0.49259983395831536, | |
| "eval_precision": 0.5825206301575394, | |
| "eval_recall": 0.3515961059542676, | |
| "eval_runtime": 24.8423, | |
| "eval_samples_per_second": 196.962, | |
| "eval_steps_per_second": 0.805, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08900523560209424, | |
| "grad_norm": 18.571523666381836, | |
| "learning_rate": 1.1652173913043478e-05, | |
| "loss": 1.191, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08900523560209424, | |
| "eval_F1_err_corr": 0.8618882109239977, | |
| "eval_accuracy": 0.8807424593967518, | |
| "eval_correct_accuracy": 0.9652649658606812, | |
| "eval_error_accuracy": 0.7785120852969489, | |
| "eval_f1": 0.38741458763705705, | |
| "eval_loss": 0.30222997069358826, | |
| "eval_pr_auc": 0.5105597340335465, | |
| "eval_precision": 0.6497867803837953, | |
| "eval_recall": 0.2759791713832918, | |
| "eval_runtime": 24.8386, | |
| "eval_samples_per_second": 196.992, | |
| "eval_steps_per_second": 0.805, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.09424083769633508, | |
| "grad_norm": 19.51681900024414, | |
| "learning_rate": 1.2347826086956523e-05, | |
| "loss": 1.1905, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09424083769633508, | |
| "eval_F1_err_corr": 0.8556927040319808, | |
| "eval_accuracy": 0.8829079659706109, | |
| "eval_correct_accuracy": 0.9252440207528985, | |
| "eval_error_accuracy": 0.7958667759923117, | |
| "eval_f1": 0.46682631356529086, | |
| "eval_loss": 0.2958272099494934, | |
| "eval_pr_auc": 0.5272941790977752, | |
| "eval_precision": 0.6178225205070843, | |
| "eval_recall": 0.37514149875481095, | |
| "eval_runtime": 24.8293, | |
| "eval_samples_per_second": 197.065, | |
| "eval_steps_per_second": 0.805, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09947643979057591, | |
| "grad_norm": 21.868053436279297, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 1.1759, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09947643979057591, | |
| "eval_F1_err_corr": 0.8671239387996902, | |
| "eval_accuracy": 0.8833720030935808, | |
| "eval_correct_accuracy": 0.965744942725071, | |
| "eval_error_accuracy": 0.7867787965661607, | |
| "eval_f1": 0.43932183224271265, | |
| "eval_loss": 0.2900922894477844, | |
| "eval_pr_auc": 0.5398410773230814, | |
| "eval_precision": 0.6402254009536195, | |
| "eval_recall": 0.33438985736925514, | |
| "eval_runtime": 24.826, | |
| "eval_samples_per_second": 197.092, | |
| "eval_steps_per_second": 0.806, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "grad_norm": 11.858696937561035, | |
| "learning_rate": 1.373913043478261e-05, | |
| "loss": 1.1804, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "eval_F1_err_corr": 0.8643274810534162, | |
| "eval_accuracy": 0.8850425367362722, | |
| "eval_correct_accuracy": 0.951882356363745, | |
| "eval_error_accuracy": 0.7915226184557567, | |
| "eval_f1": 0.4779432424838438, | |
| "eval_loss": 0.2871633768081665, | |
| "eval_pr_auc": 0.5513156434574297, | |
| "eval_precision": 0.6297667530544243, | |
| "eval_recall": 0.38510301109350237, | |
| "eval_runtime": 24.8425, | |
| "eval_samples_per_second": 196.96, | |
| "eval_steps_per_second": 0.805, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1099476439790576, | |
| "grad_norm": 8.166620254516602, | |
| "learning_rate": 1.4434782608695654e-05, | |
| "loss": 1.1212, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.11518324607329843, | |
| "grad_norm": 4.340336799621582, | |
| "learning_rate": 1.5130434782608697e-05, | |
| "loss": 1.1325, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.12041884816753927, | |
| "grad_norm": 27.051570892333984, | |
| "learning_rate": 1.582608695652174e-05, | |
| "loss": 1.1218, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1256544502617801, | |
| "grad_norm": 22.343820571899414, | |
| "learning_rate": 1.6521739130434785e-05, | |
| "loss": 1.1068, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.13089005235602094, | |
| "grad_norm": 47.00363540649414, | |
| "learning_rate": 1.721739130434783e-05, | |
| "loss": 1.1034, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13612565445026178, | |
| "grad_norm": 40.41328048706055, | |
| "learning_rate": 1.791304347826087e-05, | |
| "loss": 1.1235, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.14136125654450263, | |
| "grad_norm": 31.55730628967285, | |
| "learning_rate": 1.8608695652173912e-05, | |
| "loss": 1.0747, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.14659685863874344, | |
| "grad_norm": 2.652536392211914, | |
| "learning_rate": 1.9304347826086957e-05, | |
| "loss": 0.9891, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1518324607329843, | |
| "grad_norm": 3.2267162799835205, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9607, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.15706806282722513, | |
| "grad_norm": 21.89421272277832, | |
| "learning_rate": 1.9999942480792804e-05, | |
| "loss": 1.0643, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15706806282722513, | |
| "eval_F1_err_corr": 0.8734264964691199, | |
| "eval_accuracy": 0.8929311678267595, | |
| "eval_correct_accuracy": 0.9278371473433551, | |
| "eval_error_accuracy": 0.8250438945941904, | |
| "eval_f1": 0.5815499939547818, | |
| "eval_loss": 0.2678382694721222, | |
| "eval_pr_auc": 0.6347920534332054, | |
| "eval_precision": 0.6240269849507005, | |
| "eval_recall": 0.544487208512565, | |
| "eval_runtime": 24.8153, | |
| "eval_samples_per_second": 197.177, | |
| "eval_steps_per_second": 0.806, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.16230366492146597, | |
| "grad_norm": 8.081486701965332, | |
| "learning_rate": 1.999976992383291e-05, | |
| "loss": 1.0189, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.16753926701570682, | |
| "grad_norm": 17.748775482177734, | |
| "learning_rate": 1.9999482331105377e-05, | |
| "loss": 0.9898, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.17277486910994763, | |
| "grad_norm": 41.294334411621094, | |
| "learning_rate": 1.9999079705918636e-05, | |
| "loss": 1.0795, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17801047120418848, | |
| "grad_norm": 4.425788879394531, | |
| "learning_rate": 1.999856205290442e-05, | |
| "loss": 1.0274, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.18324607329842932, | |
| "grad_norm": 26.085590362548828, | |
| "learning_rate": 1.9997929378017723e-05, | |
| "loss": 0.9516, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18848167539267016, | |
| "grad_norm": 18.811126708984375, | |
| "learning_rate": 1.9997181688536746e-05, | |
| "loss": 0.966, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.193717277486911, | |
| "grad_norm": 22.464527130126953, | |
| "learning_rate": 1.999631899306278e-05, | |
| "loss": 0.8932, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.19895287958115182, | |
| "grad_norm": 8.309951782226562, | |
| "learning_rate": 1.999534130152014e-05, | |
| "loss": 0.9756, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.20418848167539266, | |
| "grad_norm": 4.516532897949219, | |
| "learning_rate": 1.999424862515604e-05, | |
| "loss": 0.998, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "grad_norm": 10.015279769897461, | |
| "learning_rate": 1.999304097654045e-05, | |
| "loss": 0.9015, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "eval_F1_err_corr": 0.885087159946509, | |
| "eval_accuracy": 0.9020572312451662, | |
| "eval_correct_accuracy": 0.95333342698488, | |
| "eval_error_accuracy": 0.8259592279571245, | |
| "eval_f1": 0.5984271943176053, | |
| "eval_loss": 0.24851758778095245, | |
| "eval_pr_auc": 0.6675246054619536, | |
| "eval_precision": 0.6804153446783963, | |
| "eval_recall": 0.5340729001584786, | |
| "eval_runtime": 24.8104, | |
| "eval_samples_per_second": 197.216, | |
| "eval_steps_per_second": 0.806, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21465968586387435, | |
| "grad_norm": 14.583905220031738, | |
| "learning_rate": 1.999171836956597e-05, | |
| "loss": 0.9587, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2198952879581152, | |
| "grad_norm": 9.168513298034668, | |
| "learning_rate": 1.9990280819447662e-05, | |
| "loss": 0.9663, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.225130890052356, | |
| "grad_norm": 24.278688430786133, | |
| "learning_rate": 1.998872834272287e-05, | |
| "loss": 0.9679, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.23036649214659685, | |
| "grad_norm": 23.693418502807617, | |
| "learning_rate": 1.9987060957251047e-05, | |
| "loss": 0.9541, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2356020942408377, | |
| "grad_norm": 34.47703170776367, | |
| "learning_rate": 1.9985278682213525e-05, | |
| "loss": 0.8988, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24083769633507854, | |
| "grad_norm": 17.93362045288086, | |
| "learning_rate": 1.9983381538113317e-05, | |
| "loss": 0.9296, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.24607329842931938, | |
| "grad_norm": 23.294275283813477, | |
| "learning_rate": 1.998136954677487e-05, | |
| "loss": 0.9337, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2513089005235602, | |
| "grad_norm": 19.78593635559082, | |
| "learning_rate": 1.9979242731343803e-05, | |
| "loss": 0.8976, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.25654450261780104, | |
| "grad_norm": 16.300464630126953, | |
| "learning_rate": 1.9977001116286675e-05, | |
| "loss": 0.8705, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2617801047120419, | |
| "grad_norm": 26.935935974121094, | |
| "learning_rate": 1.9974644727390665e-05, | |
| "loss": 0.8758, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2617801047120419, | |
| "eval_F1_err_corr": 0.8910747356279248, | |
| "eval_accuracy": 0.9052126836813612, | |
| "eval_correct_accuracy": 0.9761037985940583, | |
| "eval_error_accuracy": 0.819672508302841, | |
| "eval_f1": 0.558119411595039, | |
| "eval_loss": 0.24936090409755707, | |
| "eval_pr_auc": 0.6830633725429478, | |
| "eval_precision": 0.768772348033373, | |
| "eval_recall": 0.4380801448947249, | |
| "eval_runtime": 24.8593, | |
| "eval_samples_per_second": 196.827, | |
| "eval_steps_per_second": 0.805, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2670157068062827, | |
| "grad_norm": 26.804174423217773, | |
| "learning_rate": 1.9972173591763297e-05, | |
| "loss": 0.9957, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.27225130890052357, | |
| "grad_norm": 12.255861282348633, | |
| "learning_rate": 1.996958773783213e-05, | |
| "loss": 0.8614, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2774869109947644, | |
| "grad_norm": 10.577012062072754, | |
| "learning_rate": 1.9966887195344403e-05, | |
| "loss": 0.8539, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.28272251308900526, | |
| "grad_norm": 9.850268363952637, | |
| "learning_rate": 1.9964071995366744e-05, | |
| "loss": 0.8184, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2879581151832461, | |
| "grad_norm": 4.022161960601807, | |
| "learning_rate": 1.9961142170284762e-05, | |
| "loss": 0.783, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2931937172774869, | |
| "grad_norm": 4.174556732177734, | |
| "learning_rate": 1.9958097753802693e-05, | |
| "loss": 0.8355, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.29842931937172773, | |
| "grad_norm": 8.559288024902344, | |
| "learning_rate": 1.9954938780943034e-05, | |
| "loss": 0.8081, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3036649214659686, | |
| "grad_norm": 11.881876945495605, | |
| "learning_rate": 1.9951665288046098e-05, | |
| "loss": 0.8846, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3089005235602094, | |
| "grad_norm": 9.480097770690918, | |
| "learning_rate": 1.994827731276963e-05, | |
| "loss": 0.869, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "grad_norm": 18.96599006652832, | |
| "learning_rate": 1.9944774894088367e-05, | |
| "loss": 0.9044, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "eval_F1_err_corr": 0.8903583524392616, | |
| "eval_accuracy": 0.8976334106728538, | |
| "eval_correct_accuracy": 0.9422891260099501, | |
| "eval_error_accuracy": 0.8438525462118894, | |
| "eval_f1": 0.6341625207296849, | |
| "eval_loss": 0.25486111640930176, | |
| "eval_pr_auc": 0.6936322312463549, | |
| "eval_precision": 0.6197061365600691, | |
| "eval_recall": 0.6493094860765225, | |
| "eval_runtime": 24.7931, | |
| "eval_samples_per_second": 197.353, | |
| "eval_steps_per_second": 0.807, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3193717277486911, | |
| "grad_norm": 7.49755859375, | |
| "learning_rate": 1.994115807229357e-05, | |
| "loss": 0.8702, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.32460732984293195, | |
| "grad_norm": 19.93411636352539, | |
| "learning_rate": 1.993742688899259e-05, | |
| "loss": 0.8357, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3298429319371728, | |
| "grad_norm": 18.435436248779297, | |
| "learning_rate": 1.9933581387108358e-05, | |
| "loss": 0.8185, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.33507853403141363, | |
| "grad_norm": 23.072092056274414, | |
| "learning_rate": 1.992962161087893e-05, | |
| "loss": 0.8371, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3403141361256545, | |
| "grad_norm": 11.625171661376953, | |
| "learning_rate": 1.9925547605856937e-05, | |
| "loss": 0.8276, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.34554973821989526, | |
| "grad_norm": 18.671037673950195, | |
| "learning_rate": 1.992135941890909e-05, | |
| "loss": 0.8253, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3507853403141361, | |
| "grad_norm": 15.393129348754883, | |
| "learning_rate": 1.9917057098215624e-05, | |
| "loss": 0.8245, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.35602094240837695, | |
| "grad_norm": 9.267082214355469, | |
| "learning_rate": 1.9912640693269754e-05, | |
| "loss": 0.8451, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3612565445026178, | |
| "grad_norm": 5.4926252365112305, | |
| "learning_rate": 1.9908110254877107e-05, | |
| "loss": 0.813, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.36649214659685864, | |
| "grad_norm": 6.064371585845947, | |
| "learning_rate": 1.9903465835155124e-05, | |
| "loss": 0.7553, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.36649214659685864, | |
| "eval_F1_err_corr": 0.898106732050316, | |
| "eval_accuracy": 0.9078112915699923, | |
| "eval_correct_accuracy": 0.9649030769491357, | |
| "eval_error_accuracy": 0.8399597119400094, | |
| "eval_f1": 0.624117053481332, | |
| "eval_loss": 0.23855358362197876, | |
| "eval_pr_auc": 0.697922245841014, | |
| "eval_precision": 0.704642551979493, | |
| "eval_recall": 0.5601086710436948, | |
| "eval_runtime": 24.8196, | |
| "eval_samples_per_second": 197.143, | |
| "eval_steps_per_second": 0.806, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3717277486910995, | |
| "grad_norm": 11.443989753723145, | |
| "learning_rate": 1.9898707487532475e-05, | |
| "loss": 0.7992, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3769633507853403, | |
| "grad_norm": 9.889354705810547, | |
| "learning_rate": 1.9893835266748437e-05, | |
| "loss": 0.8425, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.38219895287958117, | |
| "grad_norm": 6.687994480133057, | |
| "learning_rate": 1.9888849228852262e-05, | |
| "loss": 0.8465, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.387434554973822, | |
| "grad_norm": 3.455092430114746, | |
| "learning_rate": 1.988374943120254e-05, | |
| "loss": 0.8098, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.39267015706806285, | |
| "grad_norm": 4.258669376373291, | |
| "learning_rate": 1.987853593246654e-05, | |
| "loss": 0.8263, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39790575916230364, | |
| "grad_norm": 5.940682888031006, | |
| "learning_rate": 1.9873208792619517e-05, | |
| "loss": 0.7651, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4031413612565445, | |
| "grad_norm": 5.644289493560791, | |
| "learning_rate": 1.9867768072944047e-05, | |
| "loss": 0.7919, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4083769633507853, | |
| "grad_norm": 6.426525115966797, | |
| "learning_rate": 1.9862213836029308e-05, | |
| "loss": 0.7661, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.41361256544502617, | |
| "grad_norm": 7.790468215942383, | |
| "learning_rate": 1.985654614577036e-05, | |
| "loss": 0.7592, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "grad_norm": 8.240925788879395, | |
| "learning_rate": 1.985076506736741e-05, | |
| "loss": 0.7935, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "eval_F1_err_corr": 0.8892707173263128, | |
| "eval_accuracy": 0.900108275328693, | |
| "eval_correct_accuracy": 0.9416031342860438, | |
| "eval_error_accuracy": 0.8424490839609798, | |
| "eval_f1": 0.636169014084507, | |
| "eval_loss": 0.24991166591644287, | |
| "eval_pr_auc": 0.6999774937080984, | |
| "eval_precision": 0.6332436069986541, | |
| "eval_recall": 0.6391215757301336, | |
| "eval_runtime": 24.8123, | |
| "eval_samples_per_second": 197.2, | |
| "eval_steps_per_second": 0.806, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.42408376963350786, | |
| "grad_norm": 6.823334217071533, | |
| "learning_rate": 1.9844870667325073e-05, | |
| "loss": 0.7347, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4293193717277487, | |
| "grad_norm": 4.039069175720215, | |
| "learning_rate": 1.9838863013451587e-05, | |
| "loss": 0.7886, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.43455497382198954, | |
| "grad_norm": 7.6934380531311035, | |
| "learning_rate": 1.9832742174858052e-05, | |
| "loss": 0.7608, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4397905759162304, | |
| "grad_norm": 9.409914016723633, | |
| "learning_rate": 1.9826508221957624e-05, | |
| "loss": 0.7466, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.44502617801047123, | |
| "grad_norm": 7.726130962371826, | |
| "learning_rate": 1.9820161226464708e-05, | |
| "loss": 0.7023, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.450261780104712, | |
| "grad_norm": 3.726100206375122, | |
| "learning_rate": 1.9813701261394136e-05, | |
| "loss": 0.7078, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.45549738219895286, | |
| "grad_norm": 12.017361640930176, | |
| "learning_rate": 1.980712840106032e-05, | |
| "loss": 0.7383, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.4607329842931937, | |
| "grad_norm": 5.709269046783447, | |
| "learning_rate": 1.9800442721076406e-05, | |
| "loss": 0.7215, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.46596858638743455, | |
| "grad_norm": 12.649430274963379, | |
| "learning_rate": 1.979364429835339e-05, | |
| "loss": 0.7111, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4712041884816754, | |
| "grad_norm": 16.15489959716797, | |
| "learning_rate": 1.9786733211099257e-05, | |
| "loss": 0.7764, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4712041884816754, | |
| "eval_F1_err_corr": 0.894511960241892, | |
| "eval_accuracy": 0.9100077339520495, | |
| "eval_correct_accuracy": 0.9712793351142024, | |
| "eval_error_accuracy": 0.8289907059644579, | |
| "eval_f1": 0.5971472095277662, | |
| "eval_loss": 0.2414369434118271, | |
| "eval_pr_auc": 0.7108638111158798, | |
| "eval_precision": 0.7689015691868759, | |
| "eval_recall": 0.48811410459587956, | |
| "eval_runtime": 25.0196, | |
| "eval_samples_per_second": 195.567, | |
| "eval_steps_per_second": 0.799, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.47643979057591623, | |
| "grad_norm": 12.530599594116211, | |
| "learning_rate": 1.9779709538818052e-05, | |
| "loss": 0.7715, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4816753926701571, | |
| "grad_norm": 6.7939605712890625, | |
| "learning_rate": 1.9772573362308992e-05, | |
| "loss": 0.7522, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4869109947643979, | |
| "grad_norm": 3.4304537773132324, | |
| "learning_rate": 1.9765324763665516e-05, | |
| "loss": 0.7511, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.49214659685863876, | |
| "grad_norm": 6.636844158172607, | |
| "learning_rate": 1.9757963826274357e-05, | |
| "loss": 0.7121, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4973821989528796, | |
| "grad_norm": 4.51839017868042, | |
| "learning_rate": 1.975049063481457e-05, | |
| "loss": 0.7231, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5026178010471204, | |
| "grad_norm": 9.865214347839355, | |
| "learning_rate": 1.974290527525657e-05, | |
| "loss": 0.762, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5078534031413613, | |
| "grad_norm": 3.440359592437744, | |
| "learning_rate": 1.9735207834861117e-05, | |
| "loss": 0.7169, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5130890052356021, | |
| "grad_norm": 3.5312769412994385, | |
| "learning_rate": 1.972739840217836e-05, | |
| "loss": 0.73, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.518324607329843, | |
| "grad_norm": 4.723533630371094, | |
| "learning_rate": 1.9719477067046768e-05, | |
| "loss": 0.6783, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "grad_norm": 3.5356740951538086, | |
| "learning_rate": 1.971144392059212e-05, | |
| "loss": 0.7155, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "eval_F1_err_corr": 0.893120798984817, | |
| "eval_accuracy": 0.902954369682908, | |
| "eval_correct_accuracy": 0.9461320280124133, | |
| "eval_error_accuracy": 0.8457347701138861, | |
| "eval_f1": 0.639051892762628, | |
| "eval_loss": 0.24243153631687164, | |
| "eval_pr_auc": 0.7029855391245526, | |
| "eval_precision": 0.6497426298549368, | |
| "eval_recall": 0.6287072673760471, | |
| "eval_runtime": 24.8233, | |
| "eval_samples_per_second": 197.113, | |
| "eval_steps_per_second": 0.806, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5287958115183246, | |
| "grad_norm": 13.087606430053711, | |
| "learning_rate": 1.970329905522647e-05, | |
| "loss": 0.7007, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5340314136125655, | |
| "grad_norm": 14.260698318481445, | |
| "learning_rate": 1.9695042564647045e-05, | |
| "loss": 0.6817, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5392670157068062, | |
| "grad_norm": 9.661425590515137, | |
| "learning_rate": 1.9686674543835208e-05, | |
| "loss": 0.7358, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5445026178010471, | |
| "grad_norm": 5.698840618133545, | |
| "learning_rate": 1.9678195089055347e-05, | |
| "loss": 0.6646, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5497382198952879, | |
| "grad_norm": 5.9759907722473145, | |
| "learning_rate": 1.9669604297853766e-05, | |
| "loss": 0.73, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5549738219895288, | |
| "grad_norm": 4.276744842529297, | |
| "learning_rate": 1.9660902269057558e-05, | |
| "loss": 0.712, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5602094240837696, | |
| "grad_norm": 4.572305679321289, | |
| "learning_rate": 1.9652089102773487e-05, | |
| "loss": 0.7033, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5654450261780105, | |
| "grad_norm": 3.9941539764404297, | |
| "learning_rate": 1.9643164900386824e-05, | |
| "loss": 0.6695, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5706806282722513, | |
| "grad_norm": 4.321977138519287, | |
| "learning_rate": 1.963412976456017e-05, | |
| "loss": 0.709, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5759162303664922, | |
| "grad_norm": 4.374669551849365, | |
| "learning_rate": 1.96249837992323e-05, | |
| "loss": 0.6815, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5759162303664922, | |
| "eval_F1_err_corr": 0.8937597915811933, | |
| "eval_accuracy": 0.9036968290796598, | |
| "eval_correct_accuracy": 0.9500814005540427, | |
| "eval_error_accuracy": 0.8437420660571459, | |
| "eval_f1": 0.6368832380730199, | |
| "eval_loss": 0.24286404252052307, | |
| "eval_pr_auc": 0.7035206327309997, | |
| "eval_precision": 0.6568816169393648, | |
| "eval_recall": 0.618066561014263, | |
| "eval_runtime": 24.8231, | |
| "eval_samples_per_second": 197.115, | |
| "eval_steps_per_second": 0.806, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.581151832460733, | |
| "grad_norm": 3.3900415897369385, | |
| "learning_rate": 1.961572710961695e-05, | |
| "loss": 0.6042, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5863874345549738, | |
| "grad_norm": 3.9020636081695557, | |
| "learning_rate": 1.9606359802201608e-05, | |
| "loss": 0.6541, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5916230366492147, | |
| "grad_norm": 3.2324304580688477, | |
| "learning_rate": 1.9596881984746288e-05, | |
| "loss": 0.664, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5968586387434555, | |
| "grad_norm": 3.6972060203552246, | |
| "learning_rate": 1.958729376628231e-05, | |
| "loss": 0.6325, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6020942408376964, | |
| "grad_norm": 4.679067134857178, | |
| "learning_rate": 1.957759525711101e-05, | |
| "loss": 0.6851, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6073298429319371, | |
| "grad_norm": 6.575286865234375, | |
| "learning_rate": 1.9567786568802503e-05, | |
| "loss": 0.6266, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.612565445026178, | |
| "grad_norm": 6.148586273193359, | |
| "learning_rate": 1.9557867814194385e-05, | |
| "loss": 0.6887, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6178010471204188, | |
| "grad_norm": 3.9649710655212402, | |
| "learning_rate": 1.9547839107390435e-05, | |
| "loss": 0.6448, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6230366492146597, | |
| "grad_norm": 3.5095326900482178, | |
| "learning_rate": 1.9537700563759303e-05, | |
| "loss": 0.6793, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "grad_norm": 5.709955215454102, | |
| "learning_rate": 1.9527452299933192e-05, | |
| "loss": 0.6321, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "eval_F1_err_corr": 0.8922176723044, | |
| "eval_accuracy": 0.8975096674400619, | |
| "eval_correct_accuracy": 0.9449689114373253, | |
| "eval_error_accuracy": 0.8450445368681248, | |
| "eval_f1": 0.6403994355801584, | |
| "eval_loss": 0.25328728556632996, | |
| "eval_pr_auc": 0.6997538853349474, | |
| "eval_precision": 0.6150959132610508, | |
| "eval_recall": 0.6678741227077202, | |
| "eval_runtime": 24.8167, | |
| "eval_samples_per_second": 197.166, | |
| "eval_steps_per_second": 0.806, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6335078534031413, | |
| "grad_norm": 3.6896157264709473, | |
| "learning_rate": 1.95170944338065e-05, | |
| "loss": 0.6806, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6387434554973822, | |
| "grad_norm": 4.03073263168335, | |
| "learning_rate": 1.9506627084534486e-05, | |
| "loss": 0.6133, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.643979057591623, | |
| "grad_norm": 6.4314751625061035, | |
| "learning_rate": 1.9496050372531864e-05, | |
| "loss": 0.6098, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6492146596858639, | |
| "grad_norm": 3.8455100059509277, | |
| "learning_rate": 1.9485364419471454e-05, | |
| "loss": 0.6306, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.6544502617801047, | |
| "grad_norm": 3.8784000873565674, | |
| "learning_rate": 1.9474569348282774e-05, | |
| "loss": 0.6104, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6596858638743456, | |
| "grad_norm": 5.018595218658447, | |
| "learning_rate": 1.9463665283150604e-05, | |
| "loss": 0.6592, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6649214659685864, | |
| "grad_norm": 3.5282726287841797, | |
| "learning_rate": 1.9452652349513587e-05, | |
| "loss": 0.621, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6701570680628273, | |
| "grad_norm": 3.4036905765533447, | |
| "learning_rate": 1.9441530674062754e-05, | |
| "loss": 0.6744, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.675392670157068, | |
| "grad_norm": 4.95082950592041, | |
| "learning_rate": 1.9430300384740108e-05, | |
| "loss": 0.5925, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.680628272251309, | |
| "grad_norm": 5.078342437744141, | |
| "learning_rate": 1.941896161073711e-05, | |
| "loss": 0.5913, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.680628272251309, | |
| "eval_F1_err_corr": 0.885156181305656, | |
| "eval_accuracy": 0.8942304717710751, | |
| "eval_correct_accuracy": 0.9306883336673133, | |
| "eval_error_accuracy": 0.8438713827505521, | |
| "eval_f1": 0.6393079438759363, | |
| "eval_loss": 0.27150195837020874, | |
| "eval_pr_auc": 0.6992222071782436, | |
| "eval_precision": 0.5985776372975109, | |
| "eval_recall": 0.6859859633235228, | |
| "eval_runtime": 24.819, | |
| "eval_samples_per_second": 197.147, | |
| "eval_steps_per_second": 0.806, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6858638743455497, | |
| "grad_norm": 5.81033182144165, | |
| "learning_rate": 1.9407514482493214e-05, | |
| "loss": 0.6133, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6910994764397905, | |
| "grad_norm": 4.901327133178711, | |
| "learning_rate": 1.939595913169438e-05, | |
| "loss": 0.6121, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6963350785340314, | |
| "grad_norm": 3.7869937419891357, | |
| "learning_rate": 1.9384295691271523e-05, | |
| "loss": 0.5822, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7015706806282722, | |
| "grad_norm": 3.8648629188537598, | |
| "learning_rate": 1.9372524295399014e-05, | |
| "loss": 0.6032, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7068062827225131, | |
| "grad_norm": 3.9610342979431152, | |
| "learning_rate": 1.9360645079493126e-05, | |
| "loss": 0.59, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7120418848167539, | |
| "grad_norm": 5.623746395111084, | |
| "learning_rate": 1.9348658180210473e-05, | |
| "loss": 0.5835, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7172774869109948, | |
| "grad_norm": 6.02370548248291, | |
| "learning_rate": 1.933656373544645e-05, | |
| "loss": 0.6003, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7225130890052356, | |
| "grad_norm": 5.652750492095947, | |
| "learning_rate": 1.932436188433362e-05, | |
| "loss": 0.5958, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7277486910994765, | |
| "grad_norm": 7.355208396911621, | |
| "learning_rate": 1.9312052767240153e-05, | |
| "loss": 0.5677, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "grad_norm": 4.652146339416504, | |
| "learning_rate": 1.9299636525768176e-05, | |
| "loss": 0.5649, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "eval_F1_err_corr": 0.8974946334360716, | |
| "eval_accuracy": 0.9049033255993812, | |
| "eval_correct_accuracy": 0.9592731998252757, | |
| "eval_error_accuracy": 0.843191870706177, | |
| "eval_f1": 0.6410555815039701, | |
| "eval_loss": 0.24959486722946167, | |
| "eval_pr_auc": 0.6979561382710899, | |
| "eval_precision": 0.6619242826139378, | |
| "eval_recall": 0.621462531129726, | |
| "eval_runtime": 24.817, | |
| "eval_samples_per_second": 197.163, | |
| "eval_steps_per_second": 0.806, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7382198952879581, | |
| "grad_norm": 5.073575019836426, | |
| "learning_rate": 1.9287113302752167e-05, | |
| "loss": 0.5491, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.743455497382199, | |
| "grad_norm": 4.796985149383545, | |
| "learning_rate": 1.927448324225729e-05, | |
| "loss": 0.5849, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.7486910994764397, | |
| "grad_norm": 6.055835247039795, | |
| "learning_rate": 1.9261746489577767e-05, | |
| "loss": 0.5721, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7539267015706806, | |
| "grad_norm": 7.7210893630981445, | |
| "learning_rate": 1.9248903191235177e-05, | |
| "loss": 0.5749, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7591623036649214, | |
| "grad_norm": 3.5172553062438965, | |
| "learning_rate": 1.9235953494976786e-05, | |
| "loss": 0.6009, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7643979057591623, | |
| "grad_norm": 5.326947212219238, | |
| "learning_rate": 1.922289754977385e-05, | |
| "loss": 0.5896, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7696335078534031, | |
| "grad_norm": 3.990248203277588, | |
| "learning_rate": 1.920973550581989e-05, | |
| "loss": 0.578, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.774869109947644, | |
| "grad_norm": 3.6598334312438965, | |
| "learning_rate": 1.9196467514528973e-05, | |
| "loss": 0.567, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7801047120418848, | |
| "grad_norm": 5.096114635467529, | |
| "learning_rate": 1.9183093728533966e-05, | |
| "loss": 0.5847, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7853403141361257, | |
| "grad_norm": 5.4809889793396, | |
| "learning_rate": 1.9169614301684786e-05, | |
| "loss": 0.5934, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7853403141361257, | |
| "eval_F1_err_corr": 0.8959803504098618, | |
| "eval_accuracy": 0.9018097447795823, | |
| "eval_correct_accuracy": 0.9504131731842577, | |
| "eval_error_accuracy": 0.8474448138009186, | |
| "eval_f1": 0.6463115667483842, | |
| "eval_loss": 0.2541360855102539, | |
| "eval_pr_auc": 0.7031337927296945, | |
| "eval_precision": 0.6363835856923414, | |
| "eval_recall": 0.6565542223228436, | |
| "eval_runtime": 24.8027, | |
| "eval_samples_per_second": 197.277, | |
| "eval_steps_per_second": 0.806, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7905759162303665, | |
| "grad_norm": 3.492452621459961, | |
| "learning_rate": 1.915602938904662e-05, | |
| "loss": 0.5974, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7958115183246073, | |
| "grad_norm": 4.485317707061768, | |
| "learning_rate": 1.914233914689815e-05, | |
| "loss": 0.5269, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.8010471204188482, | |
| "grad_norm": 4.36208438873291, | |
| "learning_rate": 1.912854373272975e-05, | |
| "loss": 0.5794, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.806282722513089, | |
| "grad_norm": 4.126212120056152, | |
| "learning_rate": 1.9114643305241678e-05, | |
| "loss": 0.5454, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.8115183246073299, | |
| "grad_norm": 3.9140942096710205, | |
| "learning_rate": 1.9100638024342245e-05, | |
| "loss": 0.5615, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8167539267015707, | |
| "grad_norm": 9.218249320983887, | |
| "learning_rate": 1.908652805114598e-05, | |
| "loss": 0.564, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8219895287958116, | |
| "grad_norm": 4.118100166320801, | |
| "learning_rate": 1.907231354797179e-05, | |
| "loss": 0.5406, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.8272251308900523, | |
| "grad_norm": 3.917045831680298, | |
| "learning_rate": 1.9057994678341053e-05, | |
| "loss": 0.5581, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.8324607329842932, | |
| "grad_norm": 4.272670745849609, | |
| "learning_rate": 1.9043571606975776e-05, | |
| "loss": 0.5761, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "grad_norm": 4.809320449829102, | |
| "learning_rate": 1.902904449979669e-05, | |
| "loss": 0.5422, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "eval_F1_err_corr": 0.899383774542208, | |
| "eval_accuracy": 0.905769528228925, | |
| "eval_correct_accuracy": 0.9610494803595725, | |
| "eval_error_accuracy": 0.8451544680769811, | |
| "eval_f1": 0.6363419293218721, | |
| "eval_loss": 0.2484092116355896, | |
| "eval_pr_auc": 0.6976824941932482, | |
| "eval_precision": 0.673149785299318, | |
| "eval_recall": 0.6033506905139234, | |
| "eval_runtime": 24.8065, | |
| "eval_samples_per_second": 197.247, | |
| "eval_steps_per_second": 0.806, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8429319371727748, | |
| "grad_norm": 5.909646511077881, | |
| "learning_rate": 1.901441352392133e-05, | |
| "loss": 0.5825, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8481675392670157, | |
| "grad_norm": 4.255792140960693, | |
| "learning_rate": 1.8999678847662124e-05, | |
| "loss": 0.5576, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8534031413612565, | |
| "grad_norm": 6.5200114250183105, | |
| "learning_rate": 1.8984840640524445e-05, | |
| "loss": 0.5296, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.8586387434554974, | |
| "grad_norm": 8.32865047454834, | |
| "learning_rate": 1.8969899073204687e-05, | |
| "loss": 0.5655, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8638743455497382, | |
| "grad_norm": 9.28367805480957, | |
| "learning_rate": 1.8954854317588262e-05, | |
| "loss": 0.5791, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8691099476439791, | |
| "grad_norm": 4.166441917419434, | |
| "learning_rate": 1.8939706546747656e-05, | |
| "loss": 0.5214, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8743455497382199, | |
| "grad_norm": 3.7278671264648438, | |
| "learning_rate": 1.8924455934940424e-05, | |
| "loss": 0.5087, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.8795811518324608, | |
| "grad_norm": 6.253541469573975, | |
| "learning_rate": 1.8909102657607182e-05, | |
| "loss": 0.5476, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8848167539267016, | |
| "grad_norm": 9.273209571838379, | |
| "learning_rate": 1.88936468913696e-05, | |
| "loss": 0.4928, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8900523560209425, | |
| "grad_norm": 5.4465532302856445, | |
| "learning_rate": 1.8878088814028365e-05, | |
| "loss": 0.4909, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8900523560209425, | |
| "eval_F1_err_corr": 0.8973571707111299, | |
| "eval_accuracy": 0.9004485692188708, | |
| "eval_correct_accuracy": 0.9515640305646176, | |
| "eval_error_accuracy": 0.8489933585798806, | |
| "eval_f1": 0.6449691085613416, | |
| "eval_loss": 0.25420647859573364, | |
| "eval_pr_auc": 0.7006737583541583, | |
| "eval_precision": 0.6290079621261029, | |
| "eval_recall": 0.6617613764998868, | |
| "eval_runtime": 24.8354, | |
| "eval_samples_per_second": 197.017, | |
| "eval_steps_per_second": 0.805, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8952879581151832, | |
| "grad_norm": 3.929280996322632, | |
| "learning_rate": 1.886242860456113e-05, | |
| "loss": 0.518, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.900523560209424, | |
| "grad_norm": 3.3221724033355713, | |
| "learning_rate": 1.884666644312046e-05, | |
| "loss": 0.474, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.9057591623036649, | |
| "grad_norm": 4.1775126457214355, | |
| "learning_rate": 1.8830802511031763e-05, | |
| "loss": 0.513, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.9109947643979057, | |
| "grad_norm": 4.372125148773193, | |
| "learning_rate": 1.88148369907912e-05, | |
| "loss": 0.4958, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.9162303664921466, | |
| "grad_norm": 4.19729471206665, | |
| "learning_rate": 1.8798770066063577e-05, | |
| "loss": 0.5178, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9214659685863874, | |
| "grad_norm": 4.332755088806152, | |
| "learning_rate": 1.8782601921680258e-05, | |
| "loss": 0.525, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9267015706806283, | |
| "grad_norm": 4.065849304199219, | |
| "learning_rate": 1.8766332743637002e-05, | |
| "loss": 0.4692, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.9319371727748691, | |
| "grad_norm": 4.974046230316162, | |
| "learning_rate": 1.8749962719091864e-05, | |
| "loss": 0.4973, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.93717277486911, | |
| "grad_norm": 4.961699962615967, | |
| "learning_rate": 1.8733492036363007e-05, | |
| "loss": 0.5204, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "grad_norm": 4.140364646911621, | |
| "learning_rate": 1.871692088492655e-05, | |
| "loss": 0.4905, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "eval_F1_err_corr": 0.8932916712717729, | |
| "eval_accuracy": 0.8947254447022428, | |
| "eval_correct_accuracy": 0.9452793616476387, | |
| "eval_error_accuracy": 0.8467242340670772, | |
| "eval_f1": 0.6396272371068517, | |
| "eval_loss": 0.2594238817691803, | |
| "eval_pr_auc": 0.7027911559368634, | |
| "eval_precision": 0.6008754476721051, | |
| "eval_recall": 0.6837219832465474, | |
| "eval_runtime": 24.8417, | |
| "eval_samples_per_second": 196.967, | |
| "eval_steps_per_second": 0.805, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9476439790575916, | |
| "grad_norm": 8.625274658203125, | |
| "learning_rate": 1.8700249455414394e-05, | |
| "loss": 0.4686, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.9528795811518325, | |
| "grad_norm": 6.383296966552734, | |
| "learning_rate": 1.8683477939612024e-05, | |
| "loss": 0.4764, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.9581151832460733, | |
| "grad_norm": 7.345070838928223, | |
| "learning_rate": 1.866660653045629e-05, | |
| "loss": 0.4823, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.9633507853403142, | |
| "grad_norm": 4.40362548828125, | |
| "learning_rate": 1.8649635422033218e-05, | |
| "loss": 0.49, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9685863874345549, | |
| "grad_norm": 3.8177592754364014, | |
| "learning_rate": 1.863256480957574e-05, | |
| "loss": 0.5004, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9738219895287958, | |
| "grad_norm": 3.5552761554718018, | |
| "learning_rate": 1.861539488946148e-05, | |
| "loss": 0.4967, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9790575916230366, | |
| "grad_norm": 3.948543071746826, | |
| "learning_rate": 1.8598125859210475e-05, | |
| "loss": 0.5106, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9842931937172775, | |
| "grad_norm": 4.415132999420166, | |
| "learning_rate": 1.858075791748291e-05, | |
| "loss": 0.4919, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9895287958115183, | |
| "grad_norm": 4.514105319976807, | |
| "learning_rate": 1.8563291264076834e-05, | |
| "loss": 0.4947, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9947643979057592, | |
| "grad_norm": 6.685056209564209, | |
| "learning_rate": 1.854572609992586e-05, | |
| "loss": 0.4892, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9947643979057592, | |
| "eval_F1_err_corr": 0.9005018183708923, | |
| "eval_accuracy": 0.9076256767208043, | |
| "eval_correct_accuracy": 0.9694615035570632, | |
| "eval_error_accuracy": 0.8407011107412775, | |
| "eval_f1": 0.6246857717445953, | |
| "eval_loss": 0.24942660331726074, | |
| "eval_pr_auc": 0.6972885689682531, | |
| "eval_precision": 0.7021757558632382, | |
| "eval_recall": 0.5625990491283677, | |
| "eval_runtime": 24.7945, | |
| "eval_samples_per_second": 197.342, | |
| "eval_steps_per_second": 0.807, | |
| "step": 760 | |
| } | |
| ], | |
| "logging_steps": 4, | |
| "max_steps": 3820, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 16, | |
| "stateful_callbacks": { | |
| "MinEpochEarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.001 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.37033143972266e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |