| { | |
| "best_metric": 0.0, | |
| "best_model_checkpoint": "./results/checkpoint-500", | |
| "epoch": 0.5128205128205128, | |
| "eval_steps": 500, | |
| "global_step": 1500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003418803418803419, | |
| "grad_norm": 2.3258378505706787, | |
| "learning_rate": 4.9943019943019945e-05, | |
| "loss": 0.6681, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006837606837606838, | |
| "grad_norm": 0.7698261737823486, | |
| "learning_rate": 4.988603988603989e-05, | |
| "loss": 0.6451, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.010256410256410256, | |
| "grad_norm": 1.6664257049560547, | |
| "learning_rate": 4.982905982905983e-05, | |
| "loss": 0.6485, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.013675213675213675, | |
| "grad_norm": 0.6200563907623291, | |
| "learning_rate": 4.9772079772079774e-05, | |
| "loss": 0.6453, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.017094017094017096, | |
| "grad_norm": 0.5258885622024536, | |
| "learning_rate": 4.971509971509972e-05, | |
| "loss": 0.6569, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.020512820512820513, | |
| "grad_norm": 0.5715610384941101, | |
| "learning_rate": 4.965811965811966e-05, | |
| "loss": 0.6508, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.023931623931623933, | |
| "grad_norm": 0.5744765400886536, | |
| "learning_rate": 4.96011396011396e-05, | |
| "loss": 0.6029, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02735042735042735, | |
| "grad_norm": 0.9320403337478638, | |
| "learning_rate": 4.9544159544159546e-05, | |
| "loss": 0.6644, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 0.5994309186935425, | |
| "learning_rate": 4.948717948717949e-05, | |
| "loss": 0.6757, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03418803418803419, | |
| "grad_norm": 0.4685361385345459, | |
| "learning_rate": 4.943019943019943e-05, | |
| "loss": 0.6372, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.037606837606837605, | |
| "grad_norm": 0.6897755265235901, | |
| "learning_rate": 4.9373219373219375e-05, | |
| "loss": 0.6395, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.041025641025641026, | |
| "grad_norm": 0.5714218616485596, | |
| "learning_rate": 4.931623931623932e-05, | |
| "loss": 0.6323, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 0.6862583160400391, | |
| "learning_rate": 4.925925925925926e-05, | |
| "loss": 0.6307, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04786324786324787, | |
| "grad_norm": 1.1985986232757568, | |
| "learning_rate": 4.9202279202279204e-05, | |
| "loss": 0.6353, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 0.4656996428966522, | |
| "learning_rate": 4.9145299145299147e-05, | |
| "loss": 0.6552, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0547008547008547, | |
| "grad_norm": 1.3551446199417114, | |
| "learning_rate": 4.908831908831909e-05, | |
| "loss": 0.6484, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05811965811965812, | |
| "grad_norm": 1.137487769126892, | |
| "learning_rate": 4.903133903133903e-05, | |
| "loss": 0.5905, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 0.6064645051956177, | |
| "learning_rate": 4.8974358974358975e-05, | |
| "loss": 0.6157, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06495726495726496, | |
| "grad_norm": 2.0975794792175293, | |
| "learning_rate": 4.891737891737892e-05, | |
| "loss": 0.6701, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06837606837606838, | |
| "grad_norm": 0.48940032720565796, | |
| "learning_rate": 4.886039886039887e-05, | |
| "loss": 0.6342, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07179487179487179, | |
| "grad_norm": 1.2511190176010132, | |
| "learning_rate": 4.8803418803418804e-05, | |
| "loss": 0.6521, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07521367521367521, | |
| "grad_norm": 0.7074885964393616, | |
| "learning_rate": 4.874643874643875e-05, | |
| "loss": 0.6548, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07863247863247863, | |
| "grad_norm": 1.152065396308899, | |
| "learning_rate": 4.868945868945869e-05, | |
| "loss": 0.6589, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08205128205128205, | |
| "grad_norm": 0.39897221326828003, | |
| "learning_rate": 4.863247863247863e-05, | |
| "loss": 0.6595, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 0.5259735584259033, | |
| "learning_rate": 4.8575498575498576e-05, | |
| "loss": 0.665, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 0.5097119808197021, | |
| "learning_rate": 4.851851851851852e-05, | |
| "loss": 0.6498, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 0.48037877678871155, | |
| "learning_rate": 4.846153846153846e-05, | |
| "loss": 0.5882, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09572649572649573, | |
| "grad_norm": 0.6850088834762573, | |
| "learning_rate": 4.840455840455841e-05, | |
| "loss": 0.6329, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09914529914529914, | |
| "grad_norm": 0.6092679500579834, | |
| "learning_rate": 4.834757834757835e-05, | |
| "loss": 0.6246, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 1.0922237634658813, | |
| "learning_rate": 4.829059829059829e-05, | |
| "loss": 0.6144, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10598290598290598, | |
| "grad_norm": 1.4150214195251465, | |
| "learning_rate": 4.823361823361824e-05, | |
| "loss": 0.643, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1094017094017094, | |
| "grad_norm": 1.516169548034668, | |
| "learning_rate": 4.817663817663818e-05, | |
| "loss": 0.6046, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11282051282051282, | |
| "grad_norm": 0.5234593749046326, | |
| "learning_rate": 4.8119658119658126e-05, | |
| "loss": 0.6193, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11623931623931624, | |
| "grad_norm": 0.6485182046890259, | |
| "learning_rate": 4.806267806267806e-05, | |
| "loss": 0.6314, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11965811965811966, | |
| "grad_norm": 0.9457536935806274, | |
| "learning_rate": 4.8005698005698006e-05, | |
| "loss": 0.5802, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 1.2444144487380981, | |
| "learning_rate": 4.7948717948717955e-05, | |
| "loss": 0.5927, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1264957264957265, | |
| "grad_norm": 0.499647855758667, | |
| "learning_rate": 4.789173789173789e-05, | |
| "loss": 0.6358, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.12991452991452992, | |
| "grad_norm": 2.130183696746826, | |
| "learning_rate": 4.7834757834757834e-05, | |
| "loss": 0.6324, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 0.6378350257873535, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 0.6061, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.13675213675213677, | |
| "grad_norm": 0.39135029911994934, | |
| "learning_rate": 4.772079772079772e-05, | |
| "loss": 0.6329, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14017094017094017, | |
| "grad_norm": 0.5480381846427917, | |
| "learning_rate": 4.766381766381767e-05, | |
| "loss": 0.6607, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.14358974358974358, | |
| "grad_norm": 0.4431852400302887, | |
| "learning_rate": 4.7606837606837606e-05, | |
| "loss": 0.6233, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.147008547008547, | |
| "grad_norm": 0.4828330874443054, | |
| "learning_rate": 4.754985754985755e-05, | |
| "loss": 0.6437, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.15042735042735042, | |
| "grad_norm": 0.5272857546806335, | |
| "learning_rate": 4.74928774928775e-05, | |
| "loss": 0.6671, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 1.4251387119293213, | |
| "learning_rate": 4.7435897435897435e-05, | |
| "loss": 0.658, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15726495726495726, | |
| "grad_norm": 0.8041712641716003, | |
| "learning_rate": 4.737891737891738e-05, | |
| "loss": 0.6487, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1606837606837607, | |
| "grad_norm": 0.7019796371459961, | |
| "learning_rate": 4.732193732193733e-05, | |
| "loss": 0.6019, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1641025641025641, | |
| "grad_norm": 0.8561422228813171, | |
| "learning_rate": 4.7264957264957264e-05, | |
| "loss": 0.6897, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1675213675213675, | |
| "grad_norm": 1.0677204132080078, | |
| "learning_rate": 4.7207977207977214e-05, | |
| "loss": 0.6848, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 0.4762294590473175, | |
| "learning_rate": 4.7150997150997157e-05, | |
| "loss": 0.6527, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "eval_accuracy": 0.661082143772972, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.6433083415031433, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_roc_auc": 0.4981741909669265, | |
| "eval_runtime": 36.622, | |
| "eval_samples_per_second": 319.453, | |
| "eval_steps_per_second": 19.988, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17435897435897435, | |
| "grad_norm": 0.4656302034854889, | |
| "learning_rate": 4.709401709401709e-05, | |
| "loss": 0.6506, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 0.6288455724716187, | |
| "learning_rate": 4.703703703703704e-05, | |
| "loss": 0.6422, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1811965811965812, | |
| "grad_norm": 0.39913907647132874, | |
| "learning_rate": 4.698005698005698e-05, | |
| "loss": 0.6146, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 0.40889817476272583, | |
| "learning_rate": 4.692307692307693e-05, | |
| "loss": 0.6272, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.18803418803418803, | |
| "grad_norm": 0.9223109483718872, | |
| "learning_rate": 4.686609686609687e-05, | |
| "loss": 0.6391, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.19145299145299147, | |
| "grad_norm": 0.43170908093452454, | |
| "learning_rate": 4.680911680911681e-05, | |
| "loss": 0.6613, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.19487179487179487, | |
| "grad_norm": 0.6207427978515625, | |
| "learning_rate": 4.675213675213676e-05, | |
| "loss": 0.6471, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.19829059829059828, | |
| "grad_norm": 0.7672275304794312, | |
| "learning_rate": 4.66951566951567e-05, | |
| "loss": 0.6629, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.20170940170940171, | |
| "grad_norm": 0.4669424891471863, | |
| "learning_rate": 4.6638176638176636e-05, | |
| "loss": 0.6588, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 0.6726049184799194, | |
| "learning_rate": 4.6581196581196586e-05, | |
| "loss": 0.6258, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.20854700854700856, | |
| "grad_norm": 0.7948060035705566, | |
| "learning_rate": 4.652421652421652e-05, | |
| "loss": 0.5705, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.21196581196581196, | |
| "grad_norm": 0.419849693775177, | |
| "learning_rate": 4.646723646723647e-05, | |
| "loss": 0.6468, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 1.0143113136291504, | |
| "learning_rate": 4.6410256410256415e-05, | |
| "loss": 0.6297, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2188034188034188, | |
| "grad_norm": 0.7109899520874023, | |
| "learning_rate": 4.635327635327635e-05, | |
| "loss": 0.673, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 0.760080099105835, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.6227, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.22564102564102564, | |
| "grad_norm": 0.7442237138748169, | |
| "learning_rate": 4.6239316239316244e-05, | |
| "loss": 0.5715, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.22905982905982905, | |
| "grad_norm": 0.39145609736442566, | |
| "learning_rate": 4.618233618233619e-05, | |
| "loss": 0.6727, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.23247863247863249, | |
| "grad_norm": 0.868276059627533, | |
| "learning_rate": 4.612535612535613e-05, | |
| "loss": 0.6344, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2358974358974359, | |
| "grad_norm": 0.6120406985282898, | |
| "learning_rate": 4.6068376068376066e-05, | |
| "loss": 0.5954, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.23931623931623933, | |
| "grad_norm": 0.5536867380142212, | |
| "learning_rate": 4.6011396011396016e-05, | |
| "loss": 0.6476, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24273504273504273, | |
| "grad_norm": 0.4315416216850281, | |
| "learning_rate": 4.595441595441596e-05, | |
| "loss": 0.6215, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 0.517528235912323, | |
| "learning_rate": 4.5897435897435895e-05, | |
| "loss": 0.6258, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.24957264957264957, | |
| "grad_norm": 1.3188592195510864, | |
| "learning_rate": 4.5840455840455844e-05, | |
| "loss": 0.6469, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.252991452991453, | |
| "grad_norm": 1.2717797756195068, | |
| "learning_rate": 4.578347578347579e-05, | |
| "loss": 0.5683, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 1.0561293363571167, | |
| "learning_rate": 4.572649572649573e-05, | |
| "loss": 0.6769, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.25982905982905985, | |
| "grad_norm": 1.4157183170318604, | |
| "learning_rate": 4.566951566951567e-05, | |
| "loss": 0.6901, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.26324786324786326, | |
| "grad_norm": 0.4029109477996826, | |
| "learning_rate": 4.5612535612535616e-05, | |
| "loss": 0.593, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 1.0039498805999756, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 0.6798, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.27008547008547007, | |
| "grad_norm": 0.6905536651611328, | |
| "learning_rate": 4.54985754985755e-05, | |
| "loss": 0.6352, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.27350427350427353, | |
| "grad_norm": 0.8582714796066284, | |
| "learning_rate": 4.544159544159544e-05, | |
| "loss": 0.6438, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.27692307692307694, | |
| "grad_norm": 0.4063926339149475, | |
| "learning_rate": 4.538461538461539e-05, | |
| "loss": 0.6503, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.28034188034188035, | |
| "grad_norm": 1.0651031732559204, | |
| "learning_rate": 4.532763532763533e-05, | |
| "loss": 0.6296, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.28376068376068375, | |
| "grad_norm": 0.618545651435852, | |
| "learning_rate": 4.5270655270655274e-05, | |
| "loss": 0.6695, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.28717948717948716, | |
| "grad_norm": 1.4270812273025513, | |
| "learning_rate": 4.521367521367522e-05, | |
| "loss": 0.588, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2905982905982906, | |
| "grad_norm": 1.277422547340393, | |
| "learning_rate": 4.515669515669516e-05, | |
| "loss": 0.6822, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.294017094017094, | |
| "grad_norm": 0.44470494985580444, | |
| "learning_rate": 4.50997150997151e-05, | |
| "loss": 0.6401, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.29743589743589743, | |
| "grad_norm": 0.6381728053092957, | |
| "learning_rate": 4.5042735042735046e-05, | |
| "loss": 0.693, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.30085470085470084, | |
| "grad_norm": 0.4355703294277191, | |
| "learning_rate": 4.498575498575499e-05, | |
| "loss": 0.6083, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.30427350427350425, | |
| "grad_norm": 1.0187709331512451, | |
| "learning_rate": 4.492877492877493e-05, | |
| "loss": 0.5236, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.7143679261207581, | |
| "learning_rate": 4.4871794871794874e-05, | |
| "loss": 0.6413, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3111111111111111, | |
| "grad_norm": 1.0808229446411133, | |
| "learning_rate": 4.481481481481482e-05, | |
| "loss": 0.6026, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3145299145299145, | |
| "grad_norm": 0.796187698841095, | |
| "learning_rate": 4.475783475783476e-05, | |
| "loss": 0.6812, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.31794871794871793, | |
| "grad_norm": 0.5163740515708923, | |
| "learning_rate": 4.47008547008547e-05, | |
| "loss": 0.6537, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3213675213675214, | |
| "grad_norm": 0.7213220596313477, | |
| "learning_rate": 4.4643874643874646e-05, | |
| "loss": 0.6765, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3247863247863248, | |
| "grad_norm": 0.44362661242485046, | |
| "learning_rate": 4.458689458689459e-05, | |
| "loss": 0.6249, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3282051282051282, | |
| "grad_norm": 0.4917695224285126, | |
| "learning_rate": 4.452991452991453e-05, | |
| "loss": 0.63, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3316239316239316, | |
| "grad_norm": 0.709846556186676, | |
| "learning_rate": 4.4472934472934475e-05, | |
| "loss": 0.5544, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.335042735042735, | |
| "grad_norm": 1.065099835395813, | |
| "learning_rate": 4.441595441595442e-05, | |
| "loss": 0.6338, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3384615384615385, | |
| "grad_norm": 0.42223694920539856, | |
| "learning_rate": 4.435897435897436e-05, | |
| "loss": 0.5828, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 1.5173028707504272, | |
| "learning_rate": 4.4301994301994304e-05, | |
| "loss": 0.6229, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "eval_accuracy": 0.661082143772972, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.6458322405815125, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_roc_auc": 0.5011399036892176, | |
| "eval_runtime": 36.5197, | |
| "eval_samples_per_second": 320.347, | |
| "eval_steps_per_second": 20.044, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3452991452991453, | |
| "grad_norm": 0.8043766617774963, | |
| "learning_rate": 4.424501424501425e-05, | |
| "loss": 0.6463, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3487179487179487, | |
| "grad_norm": 0.6817493438720703, | |
| "learning_rate": 4.418803418803419e-05, | |
| "loss": 0.6266, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.35213675213675216, | |
| "grad_norm": 0.6765307784080505, | |
| "learning_rate": 4.413105413105413e-05, | |
| "loss": 0.6203, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 0.6116905808448792, | |
| "learning_rate": 4.4074074074074076e-05, | |
| "loss": 0.5933, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 0.3634931445121765, | |
| "learning_rate": 4.401709401709402e-05, | |
| "loss": 0.6612, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3623931623931624, | |
| "grad_norm": 0.8377366065979004, | |
| "learning_rate": 4.396011396011396e-05, | |
| "loss": 0.6933, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3658119658119658, | |
| "grad_norm": 0.7808057069778442, | |
| "learning_rate": 4.3903133903133905e-05, | |
| "loss": 0.6101, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 0.5020534992218018, | |
| "learning_rate": 4.384615384615385e-05, | |
| "loss": 0.6333, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.37264957264957266, | |
| "grad_norm": 0.9217988848686218, | |
| "learning_rate": 4.378917378917379e-05, | |
| "loss": 0.652, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.37606837606837606, | |
| "grad_norm": 0.426917165517807, | |
| "learning_rate": 4.3732193732193733e-05, | |
| "loss": 0.6776, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.37948717948717947, | |
| "grad_norm": 1.00786292552948, | |
| "learning_rate": 4.3675213675213676e-05, | |
| "loss": 0.6308, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.38290598290598293, | |
| "grad_norm": 0.5222122669219971, | |
| "learning_rate": 4.361823361823362e-05, | |
| "loss": 0.5881, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.38632478632478634, | |
| "grad_norm": 1.309751272201538, | |
| "learning_rate": 4.356125356125356e-05, | |
| "loss": 0.6988, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.38974358974358975, | |
| "grad_norm": 0.5627844929695129, | |
| "learning_rate": 4.3504273504273505e-05, | |
| "loss": 0.6396, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.39316239316239315, | |
| "grad_norm": 0.40362900495529175, | |
| "learning_rate": 4.344729344729345e-05, | |
| "loss": 0.639, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.39658119658119656, | |
| "grad_norm": 0.632331371307373, | |
| "learning_rate": 4.339031339031339e-05, | |
| "loss": 0.6187, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.1355897188186646, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.6317, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.40341880341880343, | |
| "grad_norm": 0.8610725998878479, | |
| "learning_rate": 4.327635327635328e-05, | |
| "loss": 0.631, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.40683760683760684, | |
| "grad_norm": 0.6825465559959412, | |
| "learning_rate": 4.321937321937322e-05, | |
| "loss": 0.6825, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 1.3887457847595215, | |
| "learning_rate": 4.316239316239317e-05, | |
| "loss": 0.6221, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.41367521367521365, | |
| "grad_norm": 0.5809090733528137, | |
| "learning_rate": 4.3105413105413106e-05, | |
| "loss": 0.6117, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4170940170940171, | |
| "grad_norm": 0.4157603681087494, | |
| "learning_rate": 4.304843304843305e-05, | |
| "loss": 0.613, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4205128205128205, | |
| "grad_norm": 0.4386206269264221, | |
| "learning_rate": 4.2991452991453e-05, | |
| "loss": 0.6458, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4239316239316239, | |
| "grad_norm": 1.4249426126480103, | |
| "learning_rate": 4.2934472934472935e-05, | |
| "loss": 0.66, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 1.3717528581619263, | |
| "learning_rate": 4.287749287749288e-05, | |
| "loss": 0.6497, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 0.6880800724029541, | |
| "learning_rate": 4.282051282051282e-05, | |
| "loss": 0.6231, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4341880341880342, | |
| "grad_norm": 0.9455773234367371, | |
| "learning_rate": 4.2763532763532764e-05, | |
| "loss": 0.6524, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.4376068376068376, | |
| "grad_norm": 1.2795006036758423, | |
| "learning_rate": 4.270655270655271e-05, | |
| "loss": 0.6039, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.441025641025641, | |
| "grad_norm": 0.4846753776073456, | |
| "learning_rate": 4.264957264957265e-05, | |
| "loss": 0.6066, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.49425560235977173, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 0.6545, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4478632478632479, | |
| "grad_norm": 0.924453854560852, | |
| "learning_rate": 4.253561253561254e-05, | |
| "loss": 0.6406, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4512820512820513, | |
| "grad_norm": 0.46777766942977905, | |
| "learning_rate": 4.247863247863248e-05, | |
| "loss": 0.6275, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4547008547008547, | |
| "grad_norm": 0.7829861044883728, | |
| "learning_rate": 4.242165242165243e-05, | |
| "loss": 0.6445, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4581196581196581, | |
| "grad_norm": 0.6596978306770325, | |
| "learning_rate": 4.2364672364672364e-05, | |
| "loss": 0.648, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.9732853770256042, | |
| "learning_rate": 4.230769230769231e-05, | |
| "loss": 0.6738, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.46495726495726497, | |
| "grad_norm": 0.4845993220806122, | |
| "learning_rate": 4.225071225071226e-05, | |
| "loss": 0.6464, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4683760683760684, | |
| "grad_norm": 0.40009310841560364, | |
| "learning_rate": 4.219373219373219e-05, | |
| "loss": 0.6193, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.4717948717948718, | |
| "grad_norm": 1.296000361442566, | |
| "learning_rate": 4.2136752136752136e-05, | |
| "loss": 0.608, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.4752136752136752, | |
| "grad_norm": 0.3851681351661682, | |
| "learning_rate": 4.2079772079772086e-05, | |
| "loss": 0.636, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.47863247863247865, | |
| "grad_norm": 1.5586471557617188, | |
| "learning_rate": 4.202279202279202e-05, | |
| "loss": 0.652, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.48205128205128206, | |
| "grad_norm": 1.1093754768371582, | |
| "learning_rate": 4.196581196581197e-05, | |
| "loss": 0.6397, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.48547008547008547, | |
| "grad_norm": 0.6494556665420532, | |
| "learning_rate": 4.190883190883191e-05, | |
| "loss": 0.6691, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4888888888888889, | |
| "grad_norm": 0.6842040419578552, | |
| "learning_rate": 4.185185185185185e-05, | |
| "loss": 0.653, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 0.39208441972732544, | |
| "learning_rate": 4.17948717948718e-05, | |
| "loss": 0.6303, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.49572649572649574, | |
| "grad_norm": 0.3755127787590027, | |
| "learning_rate": 4.1737891737891737e-05, | |
| "loss": 0.6619, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.49914529914529915, | |
| "grad_norm": 0.3358234167098999, | |
| "learning_rate": 4.168091168091168e-05, | |
| "loss": 0.6782, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5025641025641026, | |
| "grad_norm": 0.30498063564300537, | |
| "learning_rate": 4.162393162393163e-05, | |
| "loss": 0.6582, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.505982905982906, | |
| "grad_norm": 0.7140593528747559, | |
| "learning_rate": 4.1566951566951565e-05, | |
| "loss": 0.6749, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5094017094017094, | |
| "grad_norm": 0.4288971424102783, | |
| "learning_rate": 4.1509971509971515e-05, | |
| "loss": 0.6355, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.8717936277389526, | |
| "learning_rate": 4.145299145299146e-05, | |
| "loss": 0.6258, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "eval_accuracy": 0.661082143772972, | |
| "eval_f1": 0.0, | |
| "eval_loss": 0.6402843594551086, | |
| "eval_precision": 0.0, | |
| "eval_recall": 0.0, | |
| "eval_roc_auc": 0.5119639749280213, | |
| "eval_runtime": 35.8008, | |
| "eval_samples_per_second": 326.78, | |
| "eval_steps_per_second": 20.446, | |
| "step": 1500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 8775, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.001 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 794804391936000.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |