| { | |
| "best_global_step": null, | |
| "best_metric": 0.9040361144490907, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9947643979057592, | |
| "eval_steps": 16, | |
| "global_step": 760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005235602094240838, | |
| "grad_norm": 43.404293060302734, | |
| "learning_rate": 5.217391304347826e-07, | |
| "loss": 5.0279, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010471204188481676, | |
| "grad_norm": 49.279701232910156, | |
| "learning_rate": 1.2173913043478262e-06, | |
| "loss": 4.7533, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010471204188481676, | |
| "eval_F1_err_corr": 0.7856240609830496, | |
| "eval_accuracy": 0.8077030162412993, | |
| "eval_correct_accuracy": 0.9044467268195442, | |
| "eval_error_accuracy": 0.69439691596995, | |
| "eval_f1": 0.11022044088176353, | |
| "eval_loss": 0.6095216870307922, | |
| "eval_pr_auc": 0.14219526881257386, | |
| "eval_precision": 0.14986376021798364, | |
| "eval_recall": 0.08716323296354993, | |
| "eval_runtime": 73.3028, | |
| "eval_samples_per_second": 66.751, | |
| "eval_steps_per_second": 0.532, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.015706806282722512, | |
| "grad_norm": 68.26057434082031, | |
| "learning_rate": 1.9130434782608697e-06, | |
| "loss": 4.998, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.020942408376963352, | |
| "grad_norm": 36.13338088989258, | |
| "learning_rate": 2.6086956521739132e-06, | |
| "loss": 4.5877, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.020942408376963352, | |
| "eval_F1_err_corr": 0.7534917056196923, | |
| "eval_accuracy": 0.8118174787316319, | |
| "eval_correct_accuracy": 0.7896249617333692, | |
| "eval_error_accuracy": 0.7205206637453856, | |
| "eval_f1": 0.19526392379944438, | |
| "eval_loss": 0.5342526435852051, | |
| "eval_pr_auc": 0.20049527075325158, | |
| "eval_precision": 0.23488224061107574, | |
| "eval_recall": 0.1670817296807788, | |
| "eval_runtime": 73.2246, | |
| "eval_samples_per_second": 66.822, | |
| "eval_steps_per_second": 0.533, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02617801047120419, | |
| "grad_norm": 47.67475128173828, | |
| "learning_rate": 3.3043478260869567e-06, | |
| "loss": 3.9689, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.031413612565445025, | |
| "grad_norm": 37.08432388305664, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 3.4487, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031413612565445025, | |
| "eval_F1_err_corr": 0.8365777452640643, | |
| "eval_accuracy": 0.859860788863109, | |
| "eval_correct_accuracy": 0.9621554711899274, | |
| "eval_error_accuracy": 0.7399956688054159, | |
| "eval_f1": 0.1793478260869565, | |
| "eval_loss": 0.407941073179245, | |
| "eval_pr_auc": 0.3074633641681041, | |
| "eval_precision": 0.4487760652765186, | |
| "eval_recall": 0.11206701381027848, | |
| "eval_runtime": 73.2248, | |
| "eval_samples_per_second": 66.822, | |
| "eval_steps_per_second": 0.533, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03664921465968586, | |
| "grad_norm": 25.600910186767578, | |
| "learning_rate": 4.695652173913044e-06, | |
| "loss": 3.0438, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.041884816753926704, | |
| "grad_norm": 34.34440612792969, | |
| "learning_rate": 5.391304347826088e-06, | |
| "loss": 2.7638, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.041884816753926704, | |
| "eval_F1_err_corr": 0.8428950058877069, | |
| "eval_accuracy": 0.8726991492652746, | |
| "eval_correct_accuracy": 0.9294179033702546, | |
| "eval_error_accuracy": 0.7711096044925128, | |
| "eval_f1": 0.3288207470233241, | |
| "eval_loss": 0.3438774645328522, | |
| "eval_pr_auc": 0.4404164901818042, | |
| "eval_precision": 0.588098016336056, | |
| "eval_recall": 0.22820919175911253, | |
| "eval_runtime": 73.4011, | |
| "eval_samples_per_second": 66.661, | |
| "eval_steps_per_second": 0.531, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04712041884816754, | |
| "grad_norm": 41.228370666503906, | |
| "learning_rate": 6.086956521739132e-06, | |
| "loss": 2.7747, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05235602094240838, | |
| "grad_norm": 32.323543548583984, | |
| "learning_rate": 6.782608695652174e-06, | |
| "loss": 2.5846, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05235602094240838, | |
| "eval_F1_err_corr": 0.8543599553580162, | |
| "eval_accuracy": 0.8773395204949729, | |
| "eval_correct_accuracy": 0.9330410885418402, | |
| "eval_error_accuracy": 0.7879168072379804, | |
| "eval_f1": 0.45929360425473886, | |
| "eval_loss": 0.31489187479019165, | |
| "eval_pr_auc": 0.5089374415932274, | |
| "eval_precision": 0.5775034293552812, | |
| "eval_recall": 0.3812542449626443, | |
| "eval_runtime": 73.3517, | |
| "eval_samples_per_second": 66.706, | |
| "eval_steps_per_second": 0.532, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05759162303664921, | |
| "grad_norm": 30.525619506835938, | |
| "learning_rate": 7.478260869565218e-06, | |
| "loss": 2.4505, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06282722513089005, | |
| "grad_norm": 73.03593444824219, | |
| "learning_rate": 8.173913043478263e-06, | |
| "loss": 2.5538, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06282722513089005, | |
| "eval_F1_err_corr": 0.8642908344403837, | |
| "eval_accuracy": 0.8819180201082754, | |
| "eval_correct_accuracy": 0.9289934972548709, | |
| "eval_error_accuracy": 0.8080141568385051, | |
| "eval_f1": 0.5086883768824817, | |
| "eval_loss": 0.300601601600647, | |
| "eval_pr_auc": 0.5509099723979306, | |
| "eval_precision": 0.5894988066825776, | |
| "eval_recall": 0.44736246321032375, | |
| "eval_runtime": 73.2719, | |
| "eval_samples_per_second": 66.779, | |
| "eval_steps_per_second": 0.532, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06806282722513089, | |
| "grad_norm": 20.755630493164062, | |
| "learning_rate": 8.869565217391306e-06, | |
| "loss": 2.3355, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07329842931937172, | |
| "grad_norm": 21.251449584960938, | |
| "learning_rate": 9.565217391304349e-06, | |
| "loss": 2.3184, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07329842931937172, | |
| "eval_F1_err_corr": 0.8654107678235833, | |
| "eval_accuracy": 0.888631090487239, | |
| "eval_correct_accuracy": 0.9337635011162132, | |
| "eval_error_accuracy": 0.8063824940859462, | |
| "eval_f1": 0.49622166246851385, | |
| "eval_loss": 0.28497353196144104, | |
| "eval_pr_auc": 0.575085189348315, | |
| "eval_precision": 0.6496885305972884, | |
| "eval_recall": 0.4014036676477247, | |
| "eval_runtime": 73.2577, | |
| "eval_samples_per_second": 66.792, | |
| "eval_steps_per_second": 0.532, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07853403141361257, | |
| "grad_norm": 13.29970645904541, | |
| "learning_rate": 1.0260869565217393e-05, | |
| "loss": 2.1286, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08376963350785341, | |
| "grad_norm": 7.41117000579834, | |
| "learning_rate": 1.0956521739130435e-05, | |
| "loss": 2.145, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08376963350785341, | |
| "eval_F1_err_corr": 0.8744012492187797, | |
| "eval_accuracy": 0.8954988399071926, | |
| "eval_correct_accuracy": 0.9417514666141938, | |
| "eval_error_accuracy": 0.8160413087302195, | |
| "eval_f1": 0.5564600840336135, | |
| "eval_loss": 0.269569456577301, | |
| "eval_pr_auc": 0.616943123420949, | |
| "eval_precision": 0.6623944982807127, | |
| "eval_recall": 0.4797373783110709, | |
| "eval_runtime": 73.2419, | |
| "eval_samples_per_second": 66.806, | |
| "eval_steps_per_second": 0.532, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08900523560209424, | |
| "grad_norm": 15.101433753967285, | |
| "learning_rate": 1.1652173913043478e-05, | |
| "loss": 2.0576, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.09424083769633508, | |
| "grad_norm": 23.230924606323242, | |
| "learning_rate": 1.2347826086956523e-05, | |
| "loss": 2.0721, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09424083769633508, | |
| "eval_F1_err_corr": 0.8819337284716499, | |
| "eval_accuracy": 0.902799690641918, | |
| "eval_correct_accuracy": 0.9505328366301462, | |
| "eval_error_accuracy": 0.8225696122378097, | |
| "eval_f1": 0.5853787278965427, | |
| "eval_loss": 0.2565557360649109, | |
| "eval_pr_auc": 0.6563233866294026, | |
| "eval_precision": 0.7016766845934831, | |
| "eval_recall": 0.5021507810731266, | |
| "eval_runtime": 73.2462, | |
| "eval_samples_per_second": 66.802, | |
| "eval_steps_per_second": 0.532, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09947643979057591, | |
| "grad_norm": 42.72426223754883, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 2.0267, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "grad_norm": 6.76558256149292, | |
| "learning_rate": 1.373913043478261e-05, | |
| "loss": 1.99, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10471204188481675, | |
| "eval_F1_err_corr": 0.8814308259221694, | |
| "eval_accuracy": 0.9006960556844548, | |
| "eval_correct_accuracy": 0.9394550028793558, | |
| "eval_error_accuracy": 0.8301572763056169, | |
| "eval_f1": 0.6051660516605166, | |
| "eval_loss": 0.25570690631866455, | |
| "eval_pr_auc": 0.6715258461977598, | |
| "eval_precision": 0.6625370320495556, | |
| "eval_recall": 0.5569390989359294, | |
| "eval_runtime": 73.2805, | |
| "eval_samples_per_second": 66.771, | |
| "eval_steps_per_second": 0.532, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1099476439790576, | |
| "grad_norm": 10.198799133300781, | |
| "learning_rate": 1.4434782608695654e-05, | |
| "loss": 1.9619, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.11518324607329843, | |
| "grad_norm": 14.052003860473633, | |
| "learning_rate": 1.5130434782608697e-05, | |
| "loss": 1.9286, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.12041884816753927, | |
| "grad_norm": 25.076927185058594, | |
| "learning_rate": 1.582608695652174e-05, | |
| "loss": 1.9326, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1256544502617801, | |
| "grad_norm": 13.464066505432129, | |
| "learning_rate": 1.6521739130434785e-05, | |
| "loss": 1.9319, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.13089005235602094, | |
| "grad_norm": 8.98434829711914, | |
| "learning_rate": 1.721739130434783e-05, | |
| "loss": 1.8496, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13612565445026178, | |
| "grad_norm": 37.530517578125, | |
| "learning_rate": 1.791304347826087e-05, | |
| "loss": 1.852, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.14136125654450263, | |
| "grad_norm": 59.9710578918457, | |
| "learning_rate": 1.8608695652173912e-05, | |
| "loss": 2.0372, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.14659685863874344, | |
| "grad_norm": 111.6693115234375, | |
| "learning_rate": 1.9304347826086957e-05, | |
| "loss": 2.0572, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1518324607329843, | |
| "grad_norm": 29.88340950012207, | |
| "learning_rate": 2e-05, | |
| "loss": 2.0327, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.15706806282722513, | |
| "grad_norm": 32.678375244140625, | |
| "learning_rate": 1.9999942480792804e-05, | |
| "loss": 1.9213, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15706806282722513, | |
| "eval_F1_err_corr": 0.8993955274771063, | |
| "eval_accuracy": 0.911152358855375, | |
| "eval_correct_accuracy": 0.9550211716256353, | |
| "eval_error_accuracy": 0.8498931149830868, | |
| "eval_f1": 0.6542258608235011, | |
| "eval_loss": 0.23431183397769928, | |
| "eval_pr_auc": 0.723673140949982, | |
| "eval_precision": 0.6986371817948058, | |
| "eval_recall": 0.6151233869141951, | |
| "eval_runtime": 73.2147, | |
| "eval_samples_per_second": 66.831, | |
| "eval_steps_per_second": 0.533, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.16230366492146597, | |
| "grad_norm": 23.950916290283203, | |
| "learning_rate": 1.999976992383291e-05, | |
| "loss": 1.8462, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.16753926701570682, | |
| "grad_norm": 19.700756072998047, | |
| "learning_rate": 1.9999482331105377e-05, | |
| "loss": 1.7507, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.17277486910994763, | |
| "grad_norm": 40.214717864990234, | |
| "learning_rate": 1.9999079705918636e-05, | |
| "loss": 1.9274, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.17801047120418848, | |
| "grad_norm": 16.36521339416504, | |
| "learning_rate": 1.999856205290442e-05, | |
| "loss": 1.6216, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.18324607329842932, | |
| "grad_norm": 14.577583312988281, | |
| "learning_rate": 1.9997929378017723e-05, | |
| "loss": 1.6965, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18848167539267016, | |
| "grad_norm": 32.502071380615234, | |
| "learning_rate": 1.9997181688536746e-05, | |
| "loss": 1.6885, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.193717277486911, | |
| "grad_norm": 20.771347045898438, | |
| "learning_rate": 1.999631899306278e-05, | |
| "loss": 1.6577, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.19895287958115182, | |
| "grad_norm": 9.587984085083008, | |
| "learning_rate": 1.999534130152014e-05, | |
| "loss": 1.6842, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.20418848167539266, | |
| "grad_norm": 23.711835861206055, | |
| "learning_rate": 1.999424862515604e-05, | |
| "loss": 1.8278, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "grad_norm": 22.35334014892578, | |
| "learning_rate": 1.999304097654045e-05, | |
| "loss": 1.743, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "eval_F1_err_corr": 0.8993887871915233, | |
| "eval_accuracy": 0.9107192575406032, | |
| "eval_correct_accuracy": 0.959441733768809, | |
| "eval_error_accuracy": 0.8464106325913571, | |
| "eval_f1": 0.6483061174750183, | |
| "eval_loss": 0.23133407533168793, | |
| "eval_pr_auc": 0.7244312751634396, | |
| "eval_precision": 0.7020321984692531, | |
| "eval_recall": 0.6022187004754358, | |
| "eval_runtime": 73.2104, | |
| "eval_samples_per_second": 66.835, | |
| "eval_steps_per_second": 0.533, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21465968586387435, | |
| "grad_norm": 18.791339874267578, | |
| "learning_rate": 1.999171836956597e-05, | |
| "loss": 1.6844, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2198952879581152, | |
| "grad_norm": 6.037458896636963, | |
| "learning_rate": 1.9990280819447662e-05, | |
| "loss": 1.6566, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.225130890052356, | |
| "grad_norm": 7.139956951141357, | |
| "learning_rate": 1.998872834272287e-05, | |
| "loss": 1.6759, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.23036649214659685, | |
| "grad_norm": 11.674882888793945, | |
| "learning_rate": 1.9987060957251047e-05, | |
| "loss": 1.6746, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2356020942408377, | |
| "grad_norm": 6.298519134521484, | |
| "learning_rate": 1.9985278682213525e-05, | |
| "loss": 1.4914, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24083769633507854, | |
| "grad_norm": 14.267353057861328, | |
| "learning_rate": 1.9983381538113317e-05, | |
| "loss": 1.6223, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.24607329842931938, | |
| "grad_norm": 7.899906158447266, | |
| "learning_rate": 1.998136954677487e-05, | |
| "loss": 1.6448, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2513089005235602, | |
| "grad_norm": 20.155229568481445, | |
| "learning_rate": 1.9979242731343803e-05, | |
| "loss": 1.6562, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.25654450261780104, | |
| "grad_norm": 21.51776123046875, | |
| "learning_rate": 1.9977001116286675e-05, | |
| "loss": 1.6211, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2617801047120419, | |
| "grad_norm": 17.941186904907227, | |
| "learning_rate": 1.9974644727390665e-05, | |
| "loss": 1.6279, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2617801047120419, | |
| "eval_F1_err_corr": 0.8988893377228542, | |
| "eval_accuracy": 0.9054601701469451, | |
| "eval_correct_accuracy": 0.9581679176379972, | |
| "eval_error_accuracy": 0.8465181481131606, | |
| "eval_f1": 0.6507428571428572, | |
| "eval_loss": 0.23669207096099854, | |
| "eval_pr_auc": 0.7193585351305057, | |
| "eval_precision": 0.6570505423494115, | |
| "eval_recall": 0.6445551279148743, | |
| "eval_runtime": 73.2472, | |
| "eval_samples_per_second": 66.801, | |
| "eval_steps_per_second": 0.532, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2670157068062827, | |
| "grad_norm": 22.866968154907227, | |
| "learning_rate": 1.9972173591763297e-05, | |
| "loss": 1.7616, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.27225130890052357, | |
| "grad_norm": 20.52739715576172, | |
| "learning_rate": 1.996958773783213e-05, | |
| "loss": 1.5429, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2774869109947644, | |
| "grad_norm": 8.633818626403809, | |
| "learning_rate": 1.9966887195344403e-05, | |
| "loss": 1.5773, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.28272251308900526, | |
| "grad_norm": 23.69244956970215, | |
| "learning_rate": 1.9964071995366744e-05, | |
| "loss": 1.5115, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2879581151832461, | |
| "grad_norm": 19.777175903320312, | |
| "learning_rate": 1.9961142170284762e-05, | |
| "loss": 1.4079, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2931937172774869, | |
| "grad_norm": 8.477285385131836, | |
| "learning_rate": 1.9958097753802693e-05, | |
| "loss": 1.5007, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.29842931937172773, | |
| "grad_norm": 10.63436508178711, | |
| "learning_rate": 1.9954938780943034e-05, | |
| "loss": 1.4683, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3036649214659686, | |
| "grad_norm": 7.749914646148682, | |
| "learning_rate": 1.9951665288046098e-05, | |
| "loss": 1.5781, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3089005235602094, | |
| "grad_norm": 8.496782302856445, | |
| "learning_rate": 1.994827731276963e-05, | |
| "loss": 1.5299, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "grad_norm": 19.28988265991211, | |
| "learning_rate": 1.9944774894088367e-05, | |
| "loss": 1.5927, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "eval_F1_err_corr": 0.8974736809922041, | |
| "eval_accuracy": 0.8993039443155453, | |
| "eval_correct_accuracy": 0.9425683761157468, | |
| "eval_error_accuracy": 0.8564968507306808, | |
| "eval_f1": 0.665845395749923, | |
| "eval_loss": 0.2493041455745697, | |
| "eval_pr_auc": 0.733870235874116, | |
| "eval_precision": 0.6091284748309542, | |
| "eval_recall": 0.7342087389630971, | |
| "eval_runtime": 73.2151, | |
| "eval_samples_per_second": 66.83, | |
| "eval_steps_per_second": 0.533, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3193717277486911, | |
| "grad_norm": 15.320127487182617, | |
| "learning_rate": 1.994115807229357e-05, | |
| "loss": 1.5878, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.32460732984293195, | |
| "grad_norm": 11.574797630310059, | |
| "learning_rate": 1.993742688899259e-05, | |
| "loss": 1.5463, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3298429319371728, | |
| "grad_norm": 10.225842475891113, | |
| "learning_rate": 1.9933581387108358e-05, | |
| "loss": 1.38, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.33507853403141363, | |
| "grad_norm": 7.9395623207092285, | |
| "learning_rate": 1.992962161087893e-05, | |
| "loss": 1.4878, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3403141361256545, | |
| "grad_norm": 6.2755937576293945, | |
| "learning_rate": 1.9925547605856937e-05, | |
| "loss": 1.467, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.34554973821989526, | |
| "grad_norm": 12.917094230651855, | |
| "learning_rate": 1.992135941890909e-05, | |
| "loss": 1.437, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3507853403141361, | |
| "grad_norm": 8.741606712341309, | |
| "learning_rate": 1.9917057098215624e-05, | |
| "loss": 1.4992, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.35602094240837695, | |
| "grad_norm": 22.467348098754883, | |
| "learning_rate": 1.9912640693269754e-05, | |
| "loss": 1.5051, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3612565445026178, | |
| "grad_norm": 22.836654663085938, | |
| "learning_rate": 1.9908110254877107e-05, | |
| "loss": 1.4494, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.36649214659685864, | |
| "grad_norm": 7.070972919464111, | |
| "learning_rate": 1.9903465835155124e-05, | |
| "loss": 1.3466, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.36649214659685864, | |
| "eval_F1_err_corr": 0.904888679332032, | |
| "eval_accuracy": 0.9101005413766434, | |
| "eval_correct_accuracy": 0.9618798467956057, | |
| "eval_error_accuracy": 0.8542731828285801, | |
| "eval_f1": 0.665053019824804, | |
| "eval_loss": 0.2285982072353363, | |
| "eval_pr_auc": 0.7431078398519564, | |
| "eval_precision": 0.6773890584644283, | |
| "eval_recall": 0.6531582522073806, | |
| "eval_runtime": 73.4017, | |
| "eval_samples_per_second": 66.661, | |
| "eval_steps_per_second": 0.531, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3717277486910995, | |
| "grad_norm": 6.082576274871826, | |
| "learning_rate": 1.9898707487532475e-05, | |
| "loss": 1.4552, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3769633507853403, | |
| "grad_norm": 7.502638339996338, | |
| "learning_rate": 1.9893835266748437e-05, | |
| "loss": 1.4758, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.38219895287958117, | |
| "grad_norm": 15.583269119262695, | |
| "learning_rate": 1.9888849228852262e-05, | |
| "loss": 1.4416, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.387434554973822, | |
| "grad_norm": 6.652861595153809, | |
| "learning_rate": 1.988374943120254e-05, | |
| "loss": 1.42, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.39267015706806285, | |
| "grad_norm": 16.742830276489258, | |
| "learning_rate": 1.987853593246654e-05, | |
| "loss": 1.4353, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39790575916230364, | |
| "grad_norm": 9.651524543762207, | |
| "learning_rate": 1.9873208792619517e-05, | |
| "loss": 1.3519, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4031413612565445, | |
| "grad_norm": 7.194350719451904, | |
| "learning_rate": 1.9867768072944047e-05, | |
| "loss": 1.4409, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4083769633507853, | |
| "grad_norm": 9.677583694458008, | |
| "learning_rate": 1.9862213836029308e-05, | |
| "loss": 1.3468, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.41361256544502617, | |
| "grad_norm": 10.901875495910645, | |
| "learning_rate": 1.985654614577036e-05, | |
| "loss": 1.3667, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "grad_norm": 11.349899291992188, | |
| "learning_rate": 1.985076506736741e-05, | |
| "loss": 1.3641, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "eval_F1_err_corr": 0.9031122184334268, | |
| "eval_accuracy": 0.9109667440061872, | |
| "eval_correct_accuracy": 0.9675908606266521, | |
| "eval_error_accuracy": 0.8466901950778333, | |
| "eval_f1": 0.656645192078263, | |
| "eval_loss": 0.22545810043811798, | |
| "eval_pr_auc": 0.7398421046192083, | |
| "eval_precision": 0.6940731399747794, | |
| "eval_recall": 0.6230473171836088, | |
| "eval_runtime": 73.2834, | |
| "eval_samples_per_second": 66.768, | |
| "eval_steps_per_second": 0.532, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.42408376963350786, | |
| "grad_norm": 9.947056770324707, | |
| "learning_rate": 1.9844870667325073e-05, | |
| "loss": 1.3222, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4293193717277487, | |
| "grad_norm": 9.555671691894531, | |
| "learning_rate": 1.9838863013451587e-05, | |
| "loss": 1.3875, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.43455497382198954, | |
| "grad_norm": 19.172666549682617, | |
| "learning_rate": 1.9832742174858052e-05, | |
| "loss": 1.3089, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4397905759162304, | |
| "grad_norm": 14.9385404586792, | |
| "learning_rate": 1.9826508221957624e-05, | |
| "loss": 1.3496, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.44502617801047123, | |
| "grad_norm": 9.018355369567871, | |
| "learning_rate": 1.9820161226464708e-05, | |
| "loss": 1.2145, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.450261780104712, | |
| "grad_norm": 9.747211456298828, | |
| "learning_rate": 1.9813701261394136e-05, | |
| "loss": 1.3075, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.45549738219895286, | |
| "grad_norm": 6.160175323486328, | |
| "learning_rate": 1.980712840106032e-05, | |
| "loss": 1.2454, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.4607329842931937, | |
| "grad_norm": 16.006214141845703, | |
| "learning_rate": 1.9800442721076406e-05, | |
| "loss": 1.2183, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.46596858638743455, | |
| "grad_norm": 8.38248062133789, | |
| "learning_rate": 1.979364429835339e-05, | |
| "loss": 1.3076, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4712041884816754, | |
| "grad_norm": 12.295247077941895, | |
| "learning_rate": 1.9786733211099257e-05, | |
| "loss": 1.3584, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4712041884816754, | |
| "eval_F1_err_corr": 0.9053759601017026, | |
| "eval_accuracy": 0.9166589327146172, | |
| "eval_correct_accuracy": 0.9761573206307471, | |
| "eval_error_accuracy": 0.8441653625762615, | |
| "eval_f1": 0.6447784810126582, | |
| "eval_loss": 0.2235945612192154, | |
| "eval_pr_auc": 0.7409747509705941, | |
| "eval_precision": 0.7720239974739501, | |
| "eval_recall": 0.5535431288204664, | |
| "eval_runtime": 73.3925, | |
| "eval_samples_per_second": 66.669, | |
| "eval_steps_per_second": 0.531, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.47643979057591623, | |
| "grad_norm": 16.797306060791016, | |
| "learning_rate": 1.9779709538818052e-05, | |
| "loss": 1.3383, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4816753926701571, | |
| "grad_norm": 8.570252418518066, | |
| "learning_rate": 1.9772573362308992e-05, | |
| "loss": 1.3023, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4869109947643979, | |
| "grad_norm": 12.603636741638184, | |
| "learning_rate": 1.9765324763665516e-05, | |
| "loss": 1.2977, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.49214659685863876, | |
| "grad_norm": 7.162085056304932, | |
| "learning_rate": 1.9757963826274357e-05, | |
| "loss": 1.218, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4973821989528796, | |
| "grad_norm": 7.1061882972717285, | |
| "learning_rate": 1.975049063481457e-05, | |
| "loss": 1.2535, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5026178010471204, | |
| "grad_norm": 10.717151641845703, | |
| "learning_rate": 1.974290527525657e-05, | |
| "loss": 1.3631, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5078534031413613, | |
| "grad_norm": 14.366579055786133, | |
| "learning_rate": 1.9735207834861117e-05, | |
| "loss": 1.2677, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5130890052356021, | |
| "grad_norm": 16.858505249023438, | |
| "learning_rate": 1.972739840217836e-05, | |
| "loss": 1.2859, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.518324607329843, | |
| "grad_norm": 9.74325180053711, | |
| "learning_rate": 1.9719477067046768e-05, | |
| "loss": 1.2454, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "grad_norm": 8.15595531463623, | |
| "learning_rate": 1.971144392059212e-05, | |
| "loss": 1.1893, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "eval_F1_err_corr": 0.9038263356777348, | |
| "eval_accuracy": 0.911307037896365, | |
| "eval_correct_accuracy": 0.9625925807635642, | |
| "eval_error_accuracy": 0.8518225945701047, | |
| "eval_f1": 0.6579983299534773, | |
| "eval_loss": 0.22870948910713196, | |
| "eval_pr_auc": 0.736284937342107, | |
| "eval_precision": 0.6954109934442764, | |
| "eval_recall": 0.624405705229794, | |
| "eval_runtime": 73.2606, | |
| "eval_samples_per_second": 66.789, | |
| "eval_steps_per_second": 0.532, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5287958115183246, | |
| "grad_norm": 14.265653610229492, | |
| "learning_rate": 1.970329905522647e-05, | |
| "loss": 1.2027, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5340314136125655, | |
| "grad_norm": 11.760555267333984, | |
| "learning_rate": 1.9695042564647045e-05, | |
| "loss": 1.1617, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5392670157068062, | |
| "grad_norm": 6.7435383796691895, | |
| "learning_rate": 1.9686674543835208e-05, | |
| "loss": 1.2307, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5445026178010471, | |
| "grad_norm": 10.695906639099121, | |
| "learning_rate": 1.9678195089055347e-05, | |
| "loss": 1.098, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5497382198952879, | |
| "grad_norm": 7.993613243103027, | |
| "learning_rate": 1.9669604297853766e-05, | |
| "loss": 1.285, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5549738219895288, | |
| "grad_norm": 8.1636962890625, | |
| "learning_rate": 1.9660902269057558e-05, | |
| "loss": 1.2282, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5602094240837696, | |
| "grad_norm": 13.683623313903809, | |
| "learning_rate": 1.9652089102773487e-05, | |
| "loss": 1.2109, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5654450261780105, | |
| "grad_norm": 7.191526889801025, | |
| "learning_rate": 1.9643164900386824e-05, | |
| "loss": 1.1563, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5706806282722513, | |
| "grad_norm": 14.868063926696777, | |
| "learning_rate": 1.963412976456017e-05, | |
| "loss": 1.2169, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5759162303664922, | |
| "grad_norm": 16.254392623901367, | |
| "learning_rate": 1.96249837992323e-05, | |
| "loss": 1.2315, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5759162303664922, | |
| "eval_F1_err_corr": 0.9032073036079588, | |
| "eval_accuracy": 0.9144934261407579, | |
| "eval_correct_accuracy": 0.9674043692973997, | |
| "eval_error_accuracy": 0.8470002679889722, | |
| "eval_f1": 0.6580900544285008, | |
| "eval_loss": 0.2237122654914856, | |
| "eval_pr_auc": 0.7399598147055758, | |
| "eval_precision": 0.7253886010362695, | |
| "eval_recall": 0.6022187004754358, | |
| "eval_runtime": 73.2075, | |
| "eval_samples_per_second": 66.837, | |
| "eval_steps_per_second": 0.533, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.581151832460733, | |
| "grad_norm": 13.422707557678223, | |
| "learning_rate": 1.961572710961695e-05, | |
| "loss": 1.0837, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5863874345549738, | |
| "grad_norm": 8.574109077453613, | |
| "learning_rate": 1.9606359802201608e-05, | |
| "loss": 1.1012, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5916230366492147, | |
| "grad_norm": 8.540183067321777, | |
| "learning_rate": 1.9596881984746288e-05, | |
| "loss": 1.1766, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5968586387434555, | |
| "grad_norm": 10.323470115661621, | |
| "learning_rate": 1.958729376628231e-05, | |
| "loss": 1.0225, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6020942408376964, | |
| "grad_norm": 7.158539772033691, | |
| "learning_rate": 1.957759525711101e-05, | |
| "loss": 1.1934, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6073298429319371, | |
| "grad_norm": 11.638604164123535, | |
| "learning_rate": 1.9567786568802503e-05, | |
| "loss": 1.1326, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.612565445026178, | |
| "grad_norm": 8.771400451660156, | |
| "learning_rate": 1.9557867814194385e-05, | |
| "loss": 1.1736, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6178010471204188, | |
| "grad_norm": 7.182300567626953, | |
| "learning_rate": 1.9547839107390435e-05, | |
| "loss": 1.0966, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6230366492146597, | |
| "grad_norm": 7.561171054840088, | |
| "learning_rate": 1.9537700563759303e-05, | |
| "loss": 1.1993, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "grad_norm": 11.45133113861084, | |
| "learning_rate": 1.9527452299933192e-05, | |
| "loss": 1.1176, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "eval_F1_err_corr": 0.8976839881538714, | |
| "eval_accuracy": 0.9037896365042537, | |
| "eval_correct_accuracy": 0.9433196060921855, | |
| "eval_error_accuracy": 0.8562601071557276, | |
| "eval_f1": 0.6728382074479277, | |
| "eval_loss": 0.24335815012454987, | |
| "eval_pr_auc": 0.7403424614633654, | |
| "eval_precision": 0.6284142267636078, | |
| "eval_recall": 0.7240208286167081, | |
| "eval_runtime": 73.2495, | |
| "eval_samples_per_second": 66.799, | |
| "eval_steps_per_second": 0.532, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6335078534031413, | |
| "grad_norm": 6.921932697296143, | |
| "learning_rate": 1.95170944338065e-05, | |
| "loss": 1.2263, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6387434554973822, | |
| "grad_norm": 8.900130271911621, | |
| "learning_rate": 1.9506627084534486e-05, | |
| "loss": 1.0606, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.643979057591623, | |
| "grad_norm": 7.923438549041748, | |
| "learning_rate": 1.9496050372531864e-05, | |
| "loss": 1.0479, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6492146596858639, | |
| "grad_norm": 7.973376750946045, | |
| "learning_rate": 1.9485364419471454e-05, | |
| "loss": 1.0766, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.6544502617801047, | |
| "grad_norm": 8.22862434387207, | |
| "learning_rate": 1.9474569348282774e-05, | |
| "loss": 1.0183, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6596858638743456, | |
| "grad_norm": 7.483217716217041, | |
| "learning_rate": 1.9463665283150604e-05, | |
| "loss": 1.1458, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6649214659685864, | |
| "grad_norm": 9.2059907913208, | |
| "learning_rate": 1.9452652349513587e-05, | |
| "loss": 1.0281, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6701570680628273, | |
| "grad_norm": 9.361949920654297, | |
| "learning_rate": 1.9441530674062754e-05, | |
| "loss": 1.1281, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.675392670157068, | |
| "grad_norm": 8.502827644348145, | |
| "learning_rate": 1.9430300384740108e-05, | |
| "loss": 1.0423, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.680628272251309, | |
| "grad_norm": 8.099173545837402, | |
| "learning_rate": 1.941896161073711e-05, | |
| "loss": 1.0373, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.680628272251309, | |
| "eval_F1_err_corr": 0.8887221516150057, | |
| "eval_accuracy": 0.8935189481825213, | |
| "eval_correct_accuracy": 0.9295264018044566, | |
| "eval_error_accuracy": 0.8513496941450817, | |
| "eval_f1": 0.6533037872683319, | |
| "eval_loss": 0.2687327563762665, | |
| "eval_pr_auc": 0.7259388976075478, | |
| "eval_precision": 0.5884594447468698, | |
| "eval_recall": 0.7342087389630971, | |
| "eval_runtime": 73.2743, | |
| "eval_samples_per_second": 66.776, | |
| "eval_steps_per_second": 0.532, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6858638743455497, | |
| "grad_norm": 10.470741271972656, | |
| "learning_rate": 1.9407514482493214e-05, | |
| "loss": 1.1026, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6910994764397905, | |
| "grad_norm": 8.012195587158203, | |
| "learning_rate": 1.939595913169438e-05, | |
| "loss": 1.0198, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6963350785340314, | |
| "grad_norm": 9.073850631713867, | |
| "learning_rate": 1.9384295691271523e-05, | |
| "loss": 1.0025, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7015706806282722, | |
| "grad_norm": 7.318668365478516, | |
| "learning_rate": 1.9372524295399014e-05, | |
| "loss": 1.0328, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7068062827225131, | |
| "grad_norm": 8.4859037399292, | |
| "learning_rate": 1.9360645079493126e-05, | |
| "loss": 1.017, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7120418848167539, | |
| "grad_norm": 8.274374008178711, | |
| "learning_rate": 1.9348658180210473e-05, | |
| "loss": 0.9398, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7172774869109948, | |
| "grad_norm": 9.273284912109375, | |
| "learning_rate": 1.933656373544645e-05, | |
| "loss": 0.9819, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7225130890052356, | |
| "grad_norm": 7.139795780181885, | |
| "learning_rate": 1.932436188433362e-05, | |
| "loss": 0.9578, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7277486910994765, | |
| "grad_norm": 11.788651466369629, | |
| "learning_rate": 1.9312052767240153e-05, | |
| "loss": 0.965, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "grad_norm": 8.076215744018555, | |
| "learning_rate": 1.9299636525768176e-05, | |
| "loss": 0.9347, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "eval_F1_err_corr": 0.9048476894056305, | |
| "eval_accuracy": 0.9066666666666666, | |
| "eval_correct_accuracy": 0.9648984138016987, | |
| "eval_error_accuracy": 0.851833586371737, | |
| "eval_f1": 0.663544106167057, | |
| "eval_loss": 0.2468423694372177, | |
| "eval_pr_auc": 0.7270364099943972, | |
| "eval_precision": 0.6538461538461539, | |
| "eval_recall": 0.6735340729001584, | |
| "eval_runtime": 73.26, | |
| "eval_samples_per_second": 66.79, | |
| "eval_steps_per_second": 0.532, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7382198952879581, | |
| "grad_norm": 7.399974346160889, | |
| "learning_rate": 1.9287113302752167e-05, | |
| "loss": 0.9323, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.743455497382199, | |
| "grad_norm": 8.363114356994629, | |
| "learning_rate": 1.927448324225729e-05, | |
| "loss": 0.9909, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.7486910994764397, | |
| "grad_norm": 8.044791221618652, | |
| "learning_rate": 1.9261746489577767e-05, | |
| "loss": 0.9245, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7539267015706806, | |
| "grad_norm": 10.069940567016602, | |
| "learning_rate": 1.9248903191235177e-05, | |
| "loss": 0.9564, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7591623036649214, | |
| "grad_norm": 15.672539710998535, | |
| "learning_rate": 1.9235953494976786e-05, | |
| "loss": 1.0271, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7643979057591623, | |
| "grad_norm": 9.93994140625, | |
| "learning_rate": 1.922289754977385e-05, | |
| "loss": 1.0102, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7696335078534031, | |
| "grad_norm": 11.041925430297852, | |
| "learning_rate": 1.920973550581989e-05, | |
| "loss": 0.9714, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.774869109947644, | |
| "grad_norm": 7.117118835449219, | |
| "learning_rate": 1.9196467514528973e-05, | |
| "loss": 0.9632, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7801047120418848, | |
| "grad_norm": 8.919998168945312, | |
| "learning_rate": 1.9183093728533966e-05, | |
| "loss": 0.9669, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7853403141361257, | |
| "grad_norm": 10.06571102142334, | |
| "learning_rate": 1.9169614301684786e-05, | |
| "loss": 1.0159, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7853403141361257, | |
| "eval_F1_err_corr": 0.8977009019045425, | |
| "eval_accuracy": 0.8963959783449342, | |
| "eval_correct_accuracy": 0.9496939887354193, | |
| "eval_error_accuracy": 0.8511052713135295, | |
| "eval_f1": 0.6557006271203866, | |
| "eval_loss": 0.2661343812942505, | |
| "eval_pr_auc": 0.7220293951604384, | |
| "eval_precision": 0.6005649717514124, | |
| "eval_recall": 0.7219832465474304, | |
| "eval_runtime": 73.2795, | |
| "eval_samples_per_second": 66.772, | |
| "eval_steps_per_second": 0.532, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7905759162303665, | |
| "grad_norm": 8.285188674926758, | |
| "learning_rate": 1.915602938904662e-05, | |
| "loss": 1.0173, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7958115183246073, | |
| "grad_norm": 7.242547035217285, | |
| "learning_rate": 1.914233914689815e-05, | |
| "loss": 0.9253, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.8010471204188482, | |
| "grad_norm": 8.031044960021973, | |
| "learning_rate": 1.912854373272975e-05, | |
| "loss": 0.9148, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.806282722513089, | |
| "grad_norm": 11.873749732971191, | |
| "learning_rate": 1.9114643305241678e-05, | |
| "loss": 0.9732, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.8115183246073299, | |
| "grad_norm": 9.597413063049316, | |
| "learning_rate": 1.9100638024342245e-05, | |
| "loss": 0.9687, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8167539267015707, | |
| "grad_norm": 8.924697875976562, | |
| "learning_rate": 1.908652805114598e-05, | |
| "loss": 0.9346, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8219895287958116, | |
| "grad_norm": 7.8740081787109375, | |
| "learning_rate": 1.907231354797179e-05, | |
| "loss": 0.916, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.8272251308900523, | |
| "grad_norm": 8.93918514251709, | |
| "learning_rate": 1.9057994678341053e-05, | |
| "loss": 0.9274, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.8324607329842932, | |
| "grad_norm": 8.441919326782227, | |
| "learning_rate": 1.9043571606975776e-05, | |
| "loss": 0.9868, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "grad_norm": 7.841855049133301, | |
| "learning_rate": 1.902904449979669e-05, | |
| "loss": 0.8763, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "eval_F1_err_corr": 0.9048139002664162, | |
| "eval_accuracy": 0.9069450889404486, | |
| "eval_correct_accuracy": 0.9629414818645937, | |
| "eval_error_accuracy": 0.8533045110380948, | |
| "eval_f1": 0.6676977463543968, | |
| "eval_loss": 0.24821153283119202, | |
| "eval_pr_auc": 0.7226788592887777, | |
| "eval_precision": 0.6519956850053937, | |
| "eval_recall": 0.6841747792619425, | |
| "eval_runtime": 73.2534, | |
| "eval_samples_per_second": 66.796, | |
| "eval_steps_per_second": 0.532, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8429319371727748, | |
| "grad_norm": 8.774613380432129, | |
| "learning_rate": 1.901441352392133e-05, | |
| "loss": 0.9297, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8481675392670157, | |
| "grad_norm": 7.270528316497803, | |
| "learning_rate": 1.8999678847662124e-05, | |
| "loss": 0.961, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8534031413612565, | |
| "grad_norm": 9.619499206542969, | |
| "learning_rate": 1.8984840640524445e-05, | |
| "loss": 0.8385, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.8586387434554974, | |
| "grad_norm": 9.912280082702637, | |
| "learning_rate": 1.8969899073204687e-05, | |
| "loss": 0.8858, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8638743455497382, | |
| "grad_norm": 7.621968746185303, | |
| "learning_rate": 1.8954854317588262e-05, | |
| "loss": 0.9501, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8691099476439791, | |
| "grad_norm": 6.877166271209717, | |
| "learning_rate": 1.8939706546747656e-05, | |
| "loss": 0.8896, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8743455497382199, | |
| "grad_norm": 12.053464889526367, | |
| "learning_rate": 1.8924455934940424e-05, | |
| "loss": 0.8812, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.8795811518324608, | |
| "grad_norm": 8.33543872833252, | |
| "learning_rate": 1.8909102657607182e-05, | |
| "loss": 0.8902, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8848167539267016, | |
| "grad_norm": 10.474023818969727, | |
| "learning_rate": 1.88936468913696e-05, | |
| "loss": 0.8269, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8900523560209425, | |
| "grad_norm": 6.880638122558594, | |
| "learning_rate": 1.8878088814028365e-05, | |
| "loss": 0.8485, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8900523560209425, | |
| "eval_F1_err_corr": 0.9059432251011823, | |
| "eval_accuracy": 0.9063882443928848, | |
| "eval_correct_accuracy": 0.9703779177060071, | |
| "eval_error_accuracy": 0.8495328444726701, | |
| "eval_f1": 0.6530612244897959, | |
| "eval_loss": 0.24516551196575165, | |
| "eval_pr_auc": 0.7134608408684431, | |
| "eval_precision": 0.6615563298490128, | |
| "eval_recall": 0.6447815259225719, | |
| "eval_runtime": 73.2495, | |
| "eval_samples_per_second": 66.799, | |
| "eval_steps_per_second": 0.532, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8952879581151832, | |
| "grad_norm": 6.686065673828125, | |
| "learning_rate": 1.886242860456113e-05, | |
| "loss": 0.8777, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.900523560209424, | |
| "grad_norm": 7.866757869720459, | |
| "learning_rate": 1.884666644312046e-05, | |
| "loss": 0.8438, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.9057591623036649, | |
| "grad_norm": 8.994976043701172, | |
| "learning_rate": 1.8830802511031763e-05, | |
| "loss": 0.8494, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.9109947643979057, | |
| "grad_norm": 11.257708549499512, | |
| "learning_rate": 1.88148369907912e-05, | |
| "loss": 0.8245, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.9162303664921466, | |
| "grad_norm": 7.707327365875244, | |
| "learning_rate": 1.8798770066063577e-05, | |
| "loss": 0.8057, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9214659685863874, | |
| "grad_norm": 11.17904281616211, | |
| "learning_rate": 1.8782601921680258e-05, | |
| "loss": 0.89, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.9267015706806283, | |
| "grad_norm": 8.393304824829102, | |
| "learning_rate": 1.8766332743637002e-05, | |
| "loss": 0.776, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.9319371727748691, | |
| "grad_norm": 10.385251998901367, | |
| "learning_rate": 1.8749962719091864e-05, | |
| "loss": 0.8372, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.93717277486911, | |
| "grad_norm": 7.805019855499268, | |
| "learning_rate": 1.8733492036363007e-05, | |
| "loss": 0.826, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "grad_norm": 7.322630405426025, | |
| "learning_rate": 1.871692088492655e-05, | |
| "loss": 0.8387, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "eval_F1_err_corr": 0.9048057208808482, | |
| "eval_accuracy": 0.9011291569992266, | |
| "eval_correct_accuracy": 0.9635766222061855, | |
| "eval_error_accuracy": 0.8527918542488826, | |
| "eval_f1": 0.6579623287671232, | |
| "eval_loss": 0.25547730922698975, | |
| "eval_pr_auc": 0.7115432250363661, | |
| "eval_precision": 0.6239090724578851, | |
| "eval_recall": 0.6959474756622142, | |
| "eval_runtime": 73.2576, | |
| "eval_samples_per_second": 66.792, | |
| "eval_steps_per_second": 0.532, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9476439790575916, | |
| "grad_norm": 8.483357429504395, | |
| "learning_rate": 1.8700249455414394e-05, | |
| "loss": 0.796, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.9528795811518325, | |
| "grad_norm": 7.0828423500061035, | |
| "learning_rate": 1.8683477939612024e-05, | |
| "loss": 0.7919, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.9581151832460733, | |
| "grad_norm": 7.401891231536865, | |
| "learning_rate": 1.866660653045629e-05, | |
| "loss": 0.7811, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.9633507853403142, | |
| "grad_norm": 7.2336273193359375, | |
| "learning_rate": 1.8649635422033218e-05, | |
| "loss": 0.7897, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9685863874345549, | |
| "grad_norm": 8.54906940460205, | |
| "learning_rate": 1.863256480957574e-05, | |
| "loss": 0.7877, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9738219895287958, | |
| "grad_norm": 6.458954334259033, | |
| "learning_rate": 1.861539488946148e-05, | |
| "loss": 0.8188, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9790575916230366, | |
| "grad_norm": 6.701814651489258, | |
| "learning_rate": 1.8598125859210475e-05, | |
| "loss": 0.8441, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9842931937172775, | |
| "grad_norm": 9.568708419799805, | |
| "learning_rate": 1.858075791748291e-05, | |
| "loss": 0.8249, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9895287958115183, | |
| "grad_norm": 7.8331170082092285, | |
| "learning_rate": 1.8563291264076834e-05, | |
| "loss": 0.7904, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9947643979057592, | |
| "grad_norm": 8.621946334838867, | |
| "learning_rate": 1.854572609992586e-05, | |
| "loss": 0.7826, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9947643979057592, | |
| "eval_F1_err_corr": 0.9040361144490907, | |
| "eval_accuracy": 0.9081515854601702, | |
| "eval_correct_accuracy": 0.9695169328815136, | |
| "eval_error_accuracy": 0.8468407972694707, | |
| "eval_f1": 0.6513211978860833, | |
| "eval_loss": 0.24031765758991241, | |
| "eval_pr_auc": 0.7155756128338253, | |
| "eval_precision": 0.6766715470961444, | |
| "eval_recall": 0.627801675345257, | |
| "eval_runtime": 73.2204, | |
| "eval_samples_per_second": 66.826, | |
| "eval_steps_per_second": 0.533, | |
| "step": 760 | |
| } | |
| ], | |
| "logging_steps": 4, | |
| "max_steps": 3820, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 16, | |
| "stateful_callbacks": { | |
| "MinEpochEarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.001 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5127896664393646e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |