| { |
| "best_global_step": 8775, |
| "best_metric": 2.2569968700408936, |
| "best_model_checkpoint": "./output/bert-base-mti881/checkpoint-8775", |
| "epoch": 15.0, |
| "eval_steps": 500, |
| "global_step": 43875, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 1.6328529119491577, |
| "learning_rate": 4.943019943019943e-05, |
| "loss": 2.469, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 1.7880568504333496, |
| "learning_rate": 4.886039886039887e-05, |
| "loss": 2.3525, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 2.440093994140625, |
| "learning_rate": 4.829059829059829e-05, |
| "loss": 2.331, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 2.336617946624756, |
| "learning_rate": 4.772079772079772e-05, |
| "loss": 2.3127, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 2.22334885597229, |
| "learning_rate": 4.7150997150997157e-05, |
| "loss": 2.2999, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.8566329784624442, |
| "eval_f1": 0.5589353612167302, |
| "eval_loss": 2.286189317703247, |
| "eval_precision": 0.5451906796742724, |
| "eval_recall": 0.5733909946578479, |
| "eval_runtime": 6.4563, |
| "eval_samples_per_second": 452.889, |
| "eval_steps_per_second": 56.689, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.0256410256410255, |
| "grad_norm": 1.5501340627670288, |
| "learning_rate": 4.6581196581196586e-05, |
| "loss": 2.2831, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1965811965811965, |
| "grad_norm": 1.7197738885879517, |
| "learning_rate": 4.6011396011396016e-05, |
| "loss": 2.2335, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.3675213675213675, |
| "grad_norm": 2.045734405517578, |
| "learning_rate": 4.544159544159544e-05, |
| "loss": 2.2371, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 1.6353585720062256, |
| "learning_rate": 4.4871794871794874e-05, |
| "loss": 2.2339, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.7094017094017095, |
| "grad_norm": 2.460322141647339, |
| "learning_rate": 4.4301994301994304e-05, |
| "loss": 2.233, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.8803418803418803, |
| "grad_norm": 1.5123356580734253, |
| "learning_rate": 4.3732193732193733e-05, |
| "loss": 2.2263, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8603247804543002, |
| "eval_f1": 0.5899178255372945, |
| "eval_loss": 2.275588274002075, |
| "eval_precision": 0.5522597825282936, |
| "eval_recall": 0.6330874247434919, |
| "eval_runtime": 5.6077, |
| "eval_samples_per_second": 521.421, |
| "eval_steps_per_second": 65.267, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.051282051282051, |
| "grad_norm": 1.0685631036758423, |
| "learning_rate": 4.316239316239317e-05, |
| "loss": 2.2079, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 1.3912978172302246, |
| "learning_rate": 4.259259259259259e-05, |
| "loss": 2.1651, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.393162393162393, |
| "grad_norm": 1.7378512620925903, |
| "learning_rate": 4.202279202279202e-05, |
| "loss": 2.1688, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.564102564102564, |
| "grad_norm": 2.20090913772583, |
| "learning_rate": 4.145299145299146e-05, |
| "loss": 2.1664, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.735042735042735, |
| "grad_norm": 1.6386638879776, |
| "learning_rate": 4.088319088319089e-05, |
| "loss": 2.1683, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.905982905982906, |
| "grad_norm": 0.7773854732513428, |
| "learning_rate": 4.031339031339032e-05, |
| "loss": 2.1624, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.8788941765196487, |
| "eval_f1": 0.6219396959024139, |
| "eval_loss": 2.2569968700408936, |
| "eval_precision": 0.6301679867699539, |
| "eval_recall": 0.6139235139489527, |
| "eval_runtime": 5.624, |
| "eval_samples_per_second": 519.914, |
| "eval_steps_per_second": 65.078, |
| "step": 8775 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 2.1296703815460205, |
| "learning_rate": 3.974358974358974e-05, |
| "loss": 2.1407, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.247863247863248, |
| "grad_norm": 3.029876708984375, |
| "learning_rate": 3.9173789173789176e-05, |
| "loss": 2.1139, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.4188034188034186, |
| "grad_norm": 2.393371820449829, |
| "learning_rate": 3.8603988603988605e-05, |
| "loss": 2.117, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.58974358974359, |
| "grad_norm": 3.3726866245269775, |
| "learning_rate": 3.8034188034188035e-05, |
| "loss": 2.1141, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.7606837606837606, |
| "grad_norm": 1.123772382736206, |
| "learning_rate": 3.746438746438747e-05, |
| "loss": 2.1151, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.931623931623932, |
| "grad_norm": 2.8514039516448975, |
| "learning_rate": 3.6894586894586894e-05, |
| "loss": 2.1192, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.8776431339842026, |
| "eval_f1": 0.6255963151834184, |
| "eval_loss": 2.269813299179077, |
| "eval_precision": 0.6073624530863212, |
| "eval_recall": 0.6449588739082507, |
| "eval_runtime": 5.588, |
| "eval_samples_per_second": 523.26, |
| "eval_steps_per_second": 65.497, |
| "step": 11700 |
| }, |
| { |
| "epoch": 4.102564102564102, |
| "grad_norm": 0.6382957100868225, |
| "learning_rate": 3.6324786324786323e-05, |
| "loss": 2.0942, |
| "step": 12000 |
| }, |
| { |
| "epoch": 4.273504273504273, |
| "grad_norm": 2.4572439193725586, |
| "learning_rate": 3.575498575498576e-05, |
| "loss": 2.079, |
| "step": 12500 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 4.030599117279053, |
| "learning_rate": 3.518518518518519e-05, |
| "loss": 2.0824, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 1.3176660537719727, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 2.0844, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.786324786324786, |
| "grad_norm": 2.164088010787964, |
| "learning_rate": 3.404558404558404e-05, |
| "loss": 2.0804, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.957264957264957, |
| "grad_norm": 6.9171552658081055, |
| "learning_rate": 3.347578347578348e-05, |
| "loss": 2.0896, |
| "step": 14500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.8791149487317863, |
| "eval_f1": 0.6324646008618922, |
| "eval_loss": 2.2901737689971924, |
| "eval_precision": 0.612869869551384, |
| "eval_recall": 0.6533536843890444, |
| "eval_runtime": 5.621, |
| "eval_samples_per_second": 520.196, |
| "eval_steps_per_second": 65.113, |
| "step": 14625 |
| }, |
| { |
| "epoch": 5.128205128205128, |
| "grad_norm": 2.0550243854522705, |
| "learning_rate": 3.290598290598291e-05, |
| "loss": 2.0631, |
| "step": 15000 |
| }, |
| { |
| "epoch": 5.299145299145299, |
| "grad_norm": 1.2494322061538696, |
| "learning_rate": 3.2336182336182337e-05, |
| "loss": 2.0561, |
| "step": 15500 |
| }, |
| { |
| "epoch": 5.47008547008547, |
| "grad_norm": 2.4397966861724854, |
| "learning_rate": 3.176638176638177e-05, |
| "loss": 2.058, |
| "step": 16000 |
| }, |
| { |
| "epoch": 5.641025641025641, |
| "grad_norm": 2.813675880432129, |
| "learning_rate": 3.1196581196581195e-05, |
| "loss": 2.0611, |
| "step": 16500 |
| }, |
| { |
| "epoch": 5.811965811965812, |
| "grad_norm": 1.493696928024292, |
| "learning_rate": 3.0626780626780625e-05, |
| "loss": 2.0609, |
| "step": 17000 |
| }, |
| { |
| "epoch": 5.982905982905983, |
| "grad_norm": 2.580273389816284, |
| "learning_rate": 3.005698005698006e-05, |
| "loss": 2.0621, |
| "step": 17500 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.8811141637639209, |
| "eval_f1": 0.6368790156637131, |
| "eval_loss": 2.2965099811553955, |
| "eval_precision": 0.630865224625624, |
| "eval_recall": 0.6430085644026117, |
| "eval_runtime": 5.5652, |
| "eval_samples_per_second": 525.412, |
| "eval_steps_per_second": 65.766, |
| "step": 17550 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "grad_norm": 3.136852741241455, |
| "learning_rate": 2.948717948717949e-05, |
| "loss": 2.0441, |
| "step": 18000 |
| }, |
| { |
| "epoch": 6.3247863247863245, |
| "grad_norm": 1.3632102012634277, |
| "learning_rate": 2.8917378917378917e-05, |
| "loss": 2.0433, |
| "step": 18500 |
| }, |
| { |
| "epoch": 6.495726495726496, |
| "grad_norm": 4.941199779510498, |
| "learning_rate": 2.8347578347578346e-05, |
| "loss": 2.0427, |
| "step": 19000 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 2.8133013248443604, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 2.0436, |
| "step": 19500 |
| }, |
| { |
| "epoch": 6.837606837606837, |
| "grad_norm": 1.1807732582092285, |
| "learning_rate": 2.720797720797721e-05, |
| "loss": 2.0442, |
| "step": 20000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8829661973212971, |
| "eval_f1": 0.6478284496091627, |
| "eval_loss": 2.306105852127075, |
| "eval_precision": 0.6388293487221764, |
| "eval_recall": 0.657084711269397, |
| "eval_runtime": 5.5992, |
| "eval_samples_per_second": 522.219, |
| "eval_steps_per_second": 65.367, |
| "step": 20475 |
| }, |
| { |
| "epoch": 7.0085470085470085, |
| "grad_norm": 1.7212845087051392, |
| "learning_rate": 2.6638176638176638e-05, |
| "loss": 2.0458, |
| "step": 20500 |
| }, |
| { |
| "epoch": 7.17948717948718, |
| "grad_norm": 2.134288787841797, |
| "learning_rate": 2.606837606837607e-05, |
| "loss": 2.0322, |
| "step": 21000 |
| }, |
| { |
| "epoch": 7.35042735042735, |
| "grad_norm": 2.6075599193573, |
| "learning_rate": 2.54985754985755e-05, |
| "loss": 2.033, |
| "step": 21500 |
| }, |
| { |
| "epoch": 7.521367521367521, |
| "grad_norm": 0.940613329410553, |
| "learning_rate": 2.492877492877493e-05, |
| "loss": 2.0315, |
| "step": 22000 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 5.997873783111572, |
| "learning_rate": 2.435897435897436e-05, |
| "loss": 2.0317, |
| "step": 22500 |
| }, |
| { |
| "epoch": 7.863247863247864, |
| "grad_norm": 1.9498519897460938, |
| "learning_rate": 2.3789173789173792e-05, |
| "loss": 2.0301, |
| "step": 23000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.8818132757690232, |
| "eval_f1": 0.6476386036960986, |
| "eval_loss": 2.3260273933410645, |
| "eval_precision": 0.6279366090626742, |
| "eval_recall": 0.6686169761723056, |
| "eval_runtime": 5.5616, |
| "eval_samples_per_second": 525.751, |
| "eval_steps_per_second": 65.809, |
| "step": 23400 |
| }, |
| { |
| "epoch": 8.034188034188034, |
| "grad_norm": 3.1696274280548096, |
| "learning_rate": 2.321937321937322e-05, |
| "loss": 2.0325, |
| "step": 23500 |
| }, |
| { |
| "epoch": 8.205128205128204, |
| "grad_norm": 0.9211856126785278, |
| "learning_rate": 2.264957264957265e-05, |
| "loss": 2.0222, |
| "step": 24000 |
| }, |
| { |
| "epoch": 8.376068376068377, |
| "grad_norm": 2.332916259765625, |
| "learning_rate": 2.207977207977208e-05, |
| "loss": 2.0244, |
| "step": 24500 |
| }, |
| { |
| "epoch": 8.547008547008547, |
| "grad_norm": 1.2731038331985474, |
| "learning_rate": 2.150997150997151e-05, |
| "loss": 2.0242, |
| "step": 25000 |
| }, |
| { |
| "epoch": 8.717948717948717, |
| "grad_norm": 0.8299376964569092, |
| "learning_rate": 2.0940170940170943e-05, |
| "loss": 2.0238, |
| "step": 25500 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 1.503308892250061, |
| "learning_rate": 2.037037037037037e-05, |
| "loss": 2.0242, |
| "step": 26000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.8830275229357798, |
| "eval_f1": 0.6493926454127109, |
| "eval_loss": 2.3398172855377197, |
| "eval_precision": 0.6353017521090201, |
| "eval_recall": 0.6641227847027897, |
| "eval_runtime": 5.6249, |
| "eval_samples_per_second": 519.833, |
| "eval_steps_per_second": 65.068, |
| "step": 26325 |
| }, |
| { |
| "epoch": 9.05982905982906, |
| "grad_norm": 1.7587120532989502, |
| "learning_rate": 1.9800569800569802e-05, |
| "loss": 2.0226, |
| "step": 26500 |
| }, |
| { |
| "epoch": 9.23076923076923, |
| "grad_norm": 0.7542155385017395, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 2.0177, |
| "step": 27000 |
| }, |
| { |
| "epoch": 9.401709401709402, |
| "grad_norm": 0.33988329768180847, |
| "learning_rate": 1.866096866096866e-05, |
| "loss": 2.0203, |
| "step": 27500 |
| }, |
| { |
| "epoch": 9.572649572649572, |
| "grad_norm": 1.8626066446304321, |
| "learning_rate": 1.8091168091168094e-05, |
| "loss": 2.0175, |
| "step": 28000 |
| }, |
| { |
| "epoch": 9.743589743589745, |
| "grad_norm": 2.40765118598938, |
| "learning_rate": 1.752136752136752e-05, |
| "loss": 2.0183, |
| "step": 28500 |
| }, |
| { |
| "epoch": 9.914529914529915, |
| "grad_norm": 2.155571222305298, |
| "learning_rate": 1.6951566951566953e-05, |
| "loss": 2.0173, |
| "step": 29000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.8841559142422607, |
| "eval_f1": 0.652157598499062, |
| "eval_loss": 2.3391082286834717, |
| "eval_precision": 0.641486220472441, |
| "eval_recall": 0.6631900279827017, |
| "eval_runtime": 5.5617, |
| "eval_samples_per_second": 525.742, |
| "eval_steps_per_second": 65.808, |
| "step": 29250 |
| }, |
| { |
| "epoch": 10.085470085470085, |
| "grad_norm": 1.149816870689392, |
| "learning_rate": 1.6381766381766382e-05, |
| "loss": 2.0171, |
| "step": 29500 |
| }, |
| { |
| "epoch": 10.256410256410255, |
| "grad_norm": 0.5041487812995911, |
| "learning_rate": 1.581196581196581e-05, |
| "loss": 2.0133, |
| "step": 30000 |
| }, |
| { |
| "epoch": 10.427350427350428, |
| "grad_norm": 6.211667537689209, |
| "learning_rate": 1.5242165242165243e-05, |
| "loss": 2.0144, |
| "step": 30500 |
| }, |
| { |
| "epoch": 10.598290598290598, |
| "grad_norm": 0.1538165956735611, |
| "learning_rate": 1.4672364672364672e-05, |
| "loss": 2.0135, |
| "step": 31000 |
| }, |
| { |
| "epoch": 10.76923076923077, |
| "grad_norm": 1.0518053770065308, |
| "learning_rate": 1.4102564102564104e-05, |
| "loss": 2.0128, |
| "step": 31500 |
| }, |
| { |
| "epoch": 10.94017094017094, |
| "grad_norm": 1.116525650024414, |
| "learning_rate": 1.3532763532763535e-05, |
| "loss": 2.0132, |
| "step": 32000 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.8832973556395035, |
| "eval_f1": 0.6500785318674052, |
| "eval_loss": 2.3498170375823975, |
| "eval_precision": 0.634142407870333, |
| "eval_recall": 0.6668362587975918, |
| "eval_runtime": 5.7697, |
| "eval_samples_per_second": 506.782, |
| "eval_steps_per_second": 63.434, |
| "step": 32175 |
| }, |
| { |
| "epoch": 11.11111111111111, |
| "grad_norm": 0.1830213963985443, |
| "learning_rate": 1.2962962962962962e-05, |
| "loss": 2.0121, |
| "step": 32500 |
| }, |
| { |
| "epoch": 11.282051282051283, |
| "grad_norm": 2.5111734867095947, |
| "learning_rate": 1.2393162393162394e-05, |
| "loss": 2.0103, |
| "step": 33000 |
| }, |
| { |
| "epoch": 11.452991452991453, |
| "grad_norm": 3.7082180976867676, |
| "learning_rate": 1.1823361823361825e-05, |
| "loss": 2.0103, |
| "step": 33500 |
| }, |
| { |
| "epoch": 11.623931623931623, |
| "grad_norm": 1.1296755075454712, |
| "learning_rate": 1.1253561253561254e-05, |
| "loss": 2.011, |
| "step": 34000 |
| }, |
| { |
| "epoch": 11.794871794871796, |
| "grad_norm": 2.4463248252868652, |
| "learning_rate": 1.0683760683760684e-05, |
| "loss": 2.0093, |
| "step": 34500 |
| }, |
| { |
| "epoch": 11.965811965811966, |
| "grad_norm": 0.03058500401675701, |
| "learning_rate": 1.0113960113960115e-05, |
| "loss": 2.0097, |
| "step": 35000 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.8845851935436393, |
| "eval_f1": 0.6505743299483937, |
| "eval_loss": 2.355226993560791, |
| "eval_precision": 0.6388230486309767, |
| "eval_recall": 0.6627660476553888, |
| "eval_runtime": 5.5805, |
| "eval_samples_per_second": 523.964, |
| "eval_steps_per_second": 65.585, |
| "step": 35100 |
| }, |
| { |
| "epoch": 12.136752136752136, |
| "grad_norm": 1.262992024421692, |
| "learning_rate": 9.544159544159544e-06, |
| "loss": 2.0083, |
| "step": 35500 |
| }, |
| { |
| "epoch": 12.307692307692308, |
| "grad_norm": 0.350888192653656, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 2.0082, |
| "step": 36000 |
| }, |
| { |
| "epoch": 12.478632478632479, |
| "grad_norm": 0.7504994869232178, |
| "learning_rate": 8.404558404558405e-06, |
| "loss": 2.0089, |
| "step": 36500 |
| }, |
| { |
| "epoch": 12.649572649572649, |
| "grad_norm": 2.052617311477661, |
| "learning_rate": 7.834757834757835e-06, |
| "loss": 2.0072, |
| "step": 37000 |
| }, |
| { |
| "epoch": 12.820512820512821, |
| "grad_norm": 0.4613409638404846, |
| "learning_rate": 7.264957264957266e-06, |
| "loss": 2.0073, |
| "step": 37500 |
| }, |
| { |
| "epoch": 12.991452991452991, |
| "grad_norm": 4.136294364929199, |
| "learning_rate": 6.695156695156696e-06, |
| "loss": 2.007, |
| "step": 38000 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.8839228769072266, |
| "eval_f1": 0.6545124566903151, |
| "eval_loss": 2.3634226322174072, |
| "eval_precision": 0.6372178941450486, |
| "eval_recall": 0.6727719833799711, |
| "eval_runtime": 5.622, |
| "eval_samples_per_second": 520.097, |
| "eval_steps_per_second": 65.101, |
| "step": 38025 |
| }, |
| { |
| "epoch": 13.162393162393162, |
| "grad_norm": 0.16694723069667816, |
| "learning_rate": 6.1253561253561255e-06, |
| "loss": 2.0057, |
| "step": 38500 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.8811143636703491, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 2.0065, |
| "step": 39000 |
| }, |
| { |
| "epoch": 13.504273504273504, |
| "grad_norm": 0.4992905855178833, |
| "learning_rate": 4.985754985754986e-06, |
| "loss": 2.0068, |
| "step": 39500 |
| }, |
| { |
| "epoch": 13.675213675213675, |
| "grad_norm": 0.6530119180679321, |
| "learning_rate": 4.415954415954416e-06, |
| "loss": 2.0052, |
| "step": 40000 |
| }, |
| { |
| "epoch": 13.846153846153847, |
| "grad_norm": 2.222022771835327, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 2.0062, |
| "step": 40500 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.884462542314674, |
| "eval_f1": 0.6561026065370293, |
| "eval_loss": 2.3629047870635986, |
| "eval_precision": 0.6406237375777653, |
| "eval_recall": 0.6723480030526584, |
| "eval_runtime": 5.6036, |
| "eval_samples_per_second": 521.81, |
| "eval_steps_per_second": 65.316, |
| "step": 40950 |
| }, |
| { |
| "epoch": 14.017094017094017, |
| "grad_norm": 0.11298029124736786, |
| "learning_rate": 3.2763532763532763e-06, |
| "loss": 2.0064, |
| "step": 41000 |
| }, |
| { |
| "epoch": 14.188034188034187, |
| "grad_norm": 0.11808889359235764, |
| "learning_rate": 2.7065527065527066e-06, |
| "loss": 2.0048, |
| "step": 41500 |
| }, |
| { |
| "epoch": 14.35897435897436, |
| "grad_norm": 0.051862556487321854, |
| "learning_rate": 2.136752136752137e-06, |
| "loss": 2.0052, |
| "step": 42000 |
| }, |
| { |
| "epoch": 14.52991452991453, |
| "grad_norm": 0.021300671622157097, |
| "learning_rate": 1.566951566951567e-06, |
| "loss": 2.0053, |
| "step": 42500 |
| }, |
| { |
| "epoch": 14.7008547008547, |
| "grad_norm": 0.11307813972234726, |
| "learning_rate": 9.971509971509971e-07, |
| "loss": 2.005, |
| "step": 43000 |
| }, |
| { |
| "epoch": 14.871794871794872, |
| "grad_norm": 1.3423974514007568, |
| "learning_rate": 4.273504273504274e-07, |
| "loss": 2.0041, |
| "step": 43500 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.8847446401412942, |
| "eval_f1": 0.6565610672834661, |
| "eval_loss": 2.365044116973877, |
| "eval_precision": 0.6400386535674022, |
| "eval_recall": 0.673959128296447, |
| "eval_runtime": 5.6195, |
| "eval_samples_per_second": 520.335, |
| "eval_steps_per_second": 65.131, |
| "step": 43875 |
| }, |
| { |
| "epoch": 15.0, |
| "step": 43875, |
| "total_flos": 1.39563382170006e+16, |
| "train_loss": 2.07735239021323, |
| "train_runtime": 2398.7609, |
| "train_samples_per_second": 146.319, |
| "train_steps_per_second": 18.291 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 43875, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 15, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.39563382170006e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|