{ "best_metric": 0.0, "best_model_checkpoint": "./results/checkpoint-500", "epoch": 0.5128205128205128, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003418803418803419, "grad_norm": 2.3258378505706787, "learning_rate": 4.9943019943019945e-05, "loss": 0.6681, "step": 10 }, { "epoch": 0.006837606837606838, "grad_norm": 0.7698261737823486, "learning_rate": 4.988603988603989e-05, "loss": 0.6451, "step": 20 }, { "epoch": 0.010256410256410256, "grad_norm": 1.6664257049560547, "learning_rate": 4.982905982905983e-05, "loss": 0.6485, "step": 30 }, { "epoch": 0.013675213675213675, "grad_norm": 0.6200563907623291, "learning_rate": 4.9772079772079774e-05, "loss": 0.6453, "step": 40 }, { "epoch": 0.017094017094017096, "grad_norm": 0.5258885622024536, "learning_rate": 4.971509971509972e-05, "loss": 0.6569, "step": 50 }, { "epoch": 0.020512820512820513, "grad_norm": 0.5715610384941101, "learning_rate": 4.965811965811966e-05, "loss": 0.6508, "step": 60 }, { "epoch": 0.023931623931623933, "grad_norm": 0.5744765400886536, "learning_rate": 4.96011396011396e-05, "loss": 0.6029, "step": 70 }, { "epoch": 0.02735042735042735, "grad_norm": 0.9320403337478638, "learning_rate": 4.9544159544159546e-05, "loss": 0.6644, "step": 80 }, { "epoch": 0.03076923076923077, "grad_norm": 0.5994309186935425, "learning_rate": 4.948717948717949e-05, "loss": 0.6757, "step": 90 }, { "epoch": 0.03418803418803419, "grad_norm": 0.4685361385345459, "learning_rate": 4.943019943019943e-05, "loss": 0.6372, "step": 100 }, { "epoch": 0.037606837606837605, "grad_norm": 0.6897755265235901, "learning_rate": 4.9373219373219375e-05, "loss": 0.6395, "step": 110 }, { "epoch": 0.041025641025641026, "grad_norm": 0.5714218616485596, "learning_rate": 4.931623931623932e-05, "loss": 0.6323, "step": 120 }, { "epoch": 0.044444444444444446, "grad_norm": 0.6862583160400391, "learning_rate": 4.925925925925926e-05, "loss": 0.6307, "step": 130 }, { "epoch": 0.04786324786324787, "grad_norm": 1.1985986232757568, "learning_rate": 4.9202279202279204e-05, "loss": 0.6353, "step": 140 }, { "epoch": 0.05128205128205128, "grad_norm": 0.4656996428966522, "learning_rate": 4.9145299145299147e-05, "loss": 0.6552, "step": 150 }, { "epoch": 0.0547008547008547, "grad_norm": 1.3551446199417114, "learning_rate": 4.908831908831909e-05, "loss": 0.6484, "step": 160 }, { "epoch": 0.05811965811965812, "grad_norm": 1.137487769126892, "learning_rate": 4.903133903133903e-05, "loss": 0.5905, "step": 170 }, { "epoch": 0.06153846153846154, "grad_norm": 0.6064645051956177, "learning_rate": 4.8974358974358975e-05, "loss": 0.6157, "step": 180 }, { "epoch": 0.06495726495726496, "grad_norm": 2.0975794792175293, "learning_rate": 4.891737891737892e-05, "loss": 0.6701, "step": 190 }, { "epoch": 0.06837606837606838, "grad_norm": 0.48940032720565796, "learning_rate": 4.886039886039887e-05, "loss": 0.6342, "step": 200 }, { "epoch": 0.07179487179487179, "grad_norm": 1.2511190176010132, "learning_rate": 4.8803418803418804e-05, "loss": 0.6521, "step": 210 }, { "epoch": 0.07521367521367521, "grad_norm": 0.7074885964393616, "learning_rate": 4.874643874643875e-05, "loss": 0.6548, "step": 220 }, { "epoch": 0.07863247863247863, "grad_norm": 1.152065396308899, "learning_rate": 4.868945868945869e-05, "loss": 0.6589, "step": 230 }, { "epoch": 0.08205128205128205, "grad_norm": 0.39897221326828003, "learning_rate": 4.863247863247863e-05, "loss": 0.6595, "step": 240 }, { "epoch": 0.08547008547008547, "grad_norm": 0.5259735584259033, "learning_rate": 4.8575498575498576e-05, "loss": 0.665, "step": 250 }, { "epoch": 0.08888888888888889, "grad_norm": 0.5097119808197021, "learning_rate": 4.851851851851852e-05, "loss": 0.6498, "step": 260 }, { "epoch": 0.09230769230769231, "grad_norm": 0.48037877678871155, "learning_rate": 4.846153846153846e-05, "loss": 0.5882, "step": 270 }, { "epoch": 0.09572649572649573, "grad_norm": 0.6850088834762573, "learning_rate": 4.840455840455841e-05, "loss": 0.6329, "step": 280 }, { "epoch": 0.09914529914529914, "grad_norm": 0.6092679500579834, "learning_rate": 4.834757834757835e-05, "loss": 0.6246, "step": 290 }, { "epoch": 0.10256410256410256, "grad_norm": 1.0922237634658813, "learning_rate": 4.829059829059829e-05, "loss": 0.6144, "step": 300 }, { "epoch": 0.10598290598290598, "grad_norm": 1.4150214195251465, "learning_rate": 4.823361823361824e-05, "loss": 0.643, "step": 310 }, { "epoch": 0.1094017094017094, "grad_norm": 1.516169548034668, "learning_rate": 4.817663817663818e-05, "loss": 0.6046, "step": 320 }, { "epoch": 0.11282051282051282, "grad_norm": 0.5234593749046326, "learning_rate": 4.8119658119658126e-05, "loss": 0.6193, "step": 330 }, { "epoch": 0.11623931623931624, "grad_norm": 0.6485182046890259, "learning_rate": 4.806267806267806e-05, "loss": 0.6314, "step": 340 }, { "epoch": 0.11965811965811966, "grad_norm": 0.9457536935806274, "learning_rate": 4.8005698005698006e-05, "loss": 0.5802, "step": 350 }, { "epoch": 0.12307692307692308, "grad_norm": 1.2444144487380981, "learning_rate": 4.7948717948717955e-05, "loss": 0.5927, "step": 360 }, { "epoch": 0.1264957264957265, "grad_norm": 0.499647855758667, "learning_rate": 4.789173789173789e-05, "loss": 0.6358, "step": 370 }, { "epoch": 0.12991452991452992, "grad_norm": 2.130183696746826, "learning_rate": 4.7834757834757834e-05, "loss": 0.6324, "step": 380 }, { "epoch": 0.13333333333333333, "grad_norm": 0.6378350257873535, "learning_rate": 4.7777777777777784e-05, "loss": 0.6061, "step": 390 }, { "epoch": 0.13675213675213677, "grad_norm": 0.39135029911994934, "learning_rate": 4.772079772079772e-05, "loss": 0.6329, "step": 400 }, { "epoch": 0.14017094017094017, "grad_norm": 0.5480381846427917, "learning_rate": 4.766381766381767e-05, "loss": 0.6607, "step": 410 }, { "epoch": 0.14358974358974358, "grad_norm": 0.4431852400302887, "learning_rate": 4.7606837606837606e-05, "loss": 0.6233, "step": 420 }, { "epoch": 0.147008547008547, "grad_norm": 0.4828330874443054, "learning_rate": 4.754985754985755e-05, "loss": 0.6437, "step": 430 }, { "epoch": 0.15042735042735042, "grad_norm": 0.5272857546806335, "learning_rate": 4.74928774928775e-05, "loss": 0.6671, "step": 440 }, { "epoch": 0.15384615384615385, "grad_norm": 1.4251387119293213, "learning_rate": 4.7435897435897435e-05, "loss": 0.658, "step": 450 }, { "epoch": 0.15726495726495726, "grad_norm": 0.8041712641716003, "learning_rate": 4.737891737891738e-05, "loss": 0.6487, "step": 460 }, { "epoch": 0.1606837606837607, "grad_norm": 0.7019796371459961, "learning_rate": 4.732193732193733e-05, "loss": 0.6019, "step": 470 }, { "epoch": 0.1641025641025641, "grad_norm": 0.8561422228813171, "learning_rate": 4.7264957264957264e-05, "loss": 0.6897, "step": 480 }, { "epoch": 0.1675213675213675, "grad_norm": 1.0677204132080078, "learning_rate": 4.7207977207977214e-05, "loss": 0.6848, "step": 490 }, { "epoch": 0.17094017094017094, "grad_norm": 0.4762294590473175, "learning_rate": 4.7150997150997157e-05, "loss": 0.6527, "step": 500 }, { "epoch": 0.17094017094017094, "eval_accuracy": 0.661082143772972, "eval_f1": 0.0, "eval_loss": 0.6433083415031433, "eval_precision": 0.0, "eval_recall": 0.0, "eval_roc_auc": 0.4981741909669265, "eval_runtime": 36.622, "eval_samples_per_second": 319.453, "eval_steps_per_second": 19.988, "step": 500 }, { "epoch": 0.17435897435897435, "grad_norm": 0.4656302034854889, "learning_rate": 4.709401709401709e-05, "loss": 0.6506, "step": 510 }, { "epoch": 0.17777777777777778, "grad_norm": 0.6288455724716187, "learning_rate": 4.703703703703704e-05, "loss": 0.6422, "step": 520 }, { "epoch": 0.1811965811965812, "grad_norm": 0.39913907647132874, "learning_rate": 4.698005698005698e-05, "loss": 0.6146, "step": 530 }, { "epoch": 0.18461538461538463, "grad_norm": 0.40889817476272583, "learning_rate": 4.692307692307693e-05, "loss": 0.6272, "step": 540 }, { "epoch": 0.18803418803418803, "grad_norm": 0.9223109483718872, "learning_rate": 4.686609686609687e-05, "loss": 0.6391, "step": 550 }, { "epoch": 0.19145299145299147, "grad_norm": 0.43170908093452454, "learning_rate": 4.680911680911681e-05, "loss": 0.6613, "step": 560 }, { "epoch": 0.19487179487179487, "grad_norm": 0.6207427978515625, "learning_rate": 4.675213675213676e-05, "loss": 0.6471, "step": 570 }, { "epoch": 0.19829059829059828, "grad_norm": 0.7672275304794312, "learning_rate": 4.66951566951567e-05, "loss": 0.6629, "step": 580 }, { "epoch": 0.20170940170940171, "grad_norm": 0.4669424891471863, "learning_rate": 4.6638176638176636e-05, "loss": 0.6588, "step": 590 }, { "epoch": 0.20512820512820512, "grad_norm": 0.6726049184799194, "learning_rate": 4.6581196581196586e-05, "loss": 0.6258, "step": 600 }, { "epoch": 0.20854700854700856, "grad_norm": 0.7948060035705566, "learning_rate": 4.652421652421652e-05, "loss": 0.5705, "step": 610 }, { "epoch": 0.21196581196581196, "grad_norm": 0.419849693775177, "learning_rate": 4.646723646723647e-05, "loss": 0.6468, "step": 620 }, { "epoch": 0.2153846153846154, "grad_norm": 1.0143113136291504, "learning_rate": 4.6410256410256415e-05, "loss": 0.6297, "step": 630 }, { "epoch": 0.2188034188034188, "grad_norm": 0.7109899520874023, "learning_rate": 4.635327635327635e-05, "loss": 0.673, "step": 640 }, { "epoch": 0.2222222222222222, "grad_norm": 0.760080099105835, "learning_rate": 4.62962962962963e-05, "loss": 0.6227, "step": 650 }, { "epoch": 0.22564102564102564, "grad_norm": 0.7442237138748169, "learning_rate": 4.6239316239316244e-05, "loss": 0.5715, "step": 660 }, { "epoch": 0.22905982905982905, "grad_norm": 0.39145609736442566, "learning_rate": 4.618233618233619e-05, "loss": 0.6727, "step": 670 }, { "epoch": 0.23247863247863249, "grad_norm": 0.868276059627533, "learning_rate": 4.612535612535613e-05, "loss": 0.6344, "step": 680 }, { "epoch": 0.2358974358974359, "grad_norm": 0.6120406985282898, "learning_rate": 4.6068376068376066e-05, "loss": 0.5954, "step": 690 }, { "epoch": 0.23931623931623933, "grad_norm": 0.5536867380142212, "learning_rate": 4.6011396011396016e-05, "loss": 0.6476, "step": 700 }, { "epoch": 0.24273504273504273, "grad_norm": 0.4315416216850281, "learning_rate": 4.595441595441596e-05, "loss": 0.6215, "step": 710 }, { "epoch": 0.24615384615384617, "grad_norm": 0.517528235912323, "learning_rate": 4.5897435897435895e-05, "loss": 0.6258, "step": 720 }, { "epoch": 0.24957264957264957, "grad_norm": 1.3188592195510864, "learning_rate": 4.5840455840455844e-05, "loss": 0.6469, "step": 730 }, { "epoch": 0.252991452991453, "grad_norm": 1.2717797756195068, "learning_rate": 4.578347578347579e-05, "loss": 0.5683, "step": 740 }, { "epoch": 0.2564102564102564, "grad_norm": 1.0561293363571167, "learning_rate": 4.572649572649573e-05, "loss": 0.6769, "step": 750 }, { "epoch": 0.25982905982905985, "grad_norm": 1.4157183170318604, "learning_rate": 4.566951566951567e-05, "loss": 0.6901, "step": 760 }, { "epoch": 0.26324786324786326, "grad_norm": 0.4029109477996826, "learning_rate": 4.5612535612535616e-05, "loss": 0.593, "step": 770 }, { "epoch": 0.26666666666666666, "grad_norm": 1.0039498805999756, "learning_rate": 4.555555555555556e-05, "loss": 0.6798, "step": 780 }, { "epoch": 0.27008547008547007, "grad_norm": 0.6905536651611328, "learning_rate": 4.54985754985755e-05, "loss": 0.6352, "step": 790 }, { "epoch": 0.27350427350427353, "grad_norm": 0.8582714796066284, "learning_rate": 4.544159544159544e-05, "loss": 0.6438, "step": 800 }, { "epoch": 0.27692307692307694, "grad_norm": 0.4063926339149475, "learning_rate": 4.538461538461539e-05, "loss": 0.6503, "step": 810 }, { "epoch": 0.28034188034188035, "grad_norm": 1.0651031732559204, "learning_rate": 4.532763532763533e-05, "loss": 0.6296, "step": 820 }, { "epoch": 0.28376068376068375, "grad_norm": 0.618545651435852, "learning_rate": 4.5270655270655274e-05, "loss": 0.6695, "step": 830 }, { "epoch": 0.28717948717948716, "grad_norm": 1.4270812273025513, "learning_rate": 4.521367521367522e-05, "loss": 0.588, "step": 840 }, { "epoch": 0.2905982905982906, "grad_norm": 1.277422547340393, "learning_rate": 4.515669515669516e-05, "loss": 0.6822, "step": 850 }, { "epoch": 0.294017094017094, "grad_norm": 0.44470494985580444, "learning_rate": 4.50997150997151e-05, "loss": 0.6401, "step": 860 }, { "epoch": 0.29743589743589743, "grad_norm": 0.6381728053092957, "learning_rate": 4.5042735042735046e-05, "loss": 0.693, "step": 870 }, { "epoch": 0.30085470085470084, "grad_norm": 0.4355703294277191, "learning_rate": 4.498575498575499e-05, "loss": 0.6083, "step": 880 }, { "epoch": 0.30427350427350425, "grad_norm": 1.0187709331512451, "learning_rate": 4.492877492877493e-05, "loss": 0.5236, "step": 890 }, { "epoch": 0.3076923076923077, "grad_norm": 0.7143679261207581, "learning_rate": 4.4871794871794874e-05, "loss": 0.6413, "step": 900 }, { "epoch": 0.3111111111111111, "grad_norm": 1.0808229446411133, "learning_rate": 4.481481481481482e-05, "loss": 0.6026, "step": 910 }, { "epoch": 0.3145299145299145, "grad_norm": 0.796187698841095, "learning_rate": 4.475783475783476e-05, "loss": 0.6812, "step": 920 }, { "epoch": 0.31794871794871793, "grad_norm": 0.5163740515708923, "learning_rate": 4.47008547008547e-05, "loss": 0.6537, "step": 930 }, { "epoch": 0.3213675213675214, "grad_norm": 0.7213220596313477, "learning_rate": 4.4643874643874646e-05, "loss": 0.6765, "step": 940 }, { "epoch": 0.3247863247863248, "grad_norm": 0.44362661242485046, "learning_rate": 4.458689458689459e-05, "loss": 0.6249, "step": 950 }, { "epoch": 0.3282051282051282, "grad_norm": 0.4917695224285126, "learning_rate": 4.452991452991453e-05, "loss": 0.63, "step": 960 }, { "epoch": 0.3316239316239316, "grad_norm": 0.709846556186676, "learning_rate": 4.4472934472934475e-05, "loss": 0.5544, "step": 970 }, { "epoch": 0.335042735042735, "grad_norm": 1.065099835395813, "learning_rate": 4.441595441595442e-05, "loss": 0.6338, "step": 980 }, { "epoch": 0.3384615384615385, "grad_norm": 0.42223694920539856, "learning_rate": 4.435897435897436e-05, "loss": 0.5828, "step": 990 }, { "epoch": 0.3418803418803419, "grad_norm": 1.5173028707504272, "learning_rate": 4.4301994301994304e-05, "loss": 0.6229, "step": 1000 }, { "epoch": 0.3418803418803419, "eval_accuracy": 0.661082143772972, "eval_f1": 0.0, "eval_loss": 0.6458322405815125, "eval_precision": 0.0, "eval_recall": 0.0, "eval_roc_auc": 0.5011399036892176, "eval_runtime": 36.5197, "eval_samples_per_second": 320.347, "eval_steps_per_second": 20.044, "step": 1000 }, { "epoch": 0.3452991452991453, "grad_norm": 0.8043766617774963, "learning_rate": 4.424501424501425e-05, "loss": 0.6463, "step": 1010 }, { "epoch": 0.3487179487179487, "grad_norm": 0.6817493438720703, "learning_rate": 4.418803418803419e-05, "loss": 0.6266, "step": 1020 }, { "epoch": 0.35213675213675216, "grad_norm": 0.6765307784080505, "learning_rate": 4.413105413105413e-05, "loss": 0.6203, "step": 1030 }, { "epoch": 0.35555555555555557, "grad_norm": 0.6116905808448792, "learning_rate": 4.4074074074074076e-05, "loss": 0.5933, "step": 1040 }, { "epoch": 0.358974358974359, "grad_norm": 0.3634931445121765, "learning_rate": 4.401709401709402e-05, "loss": 0.6612, "step": 1050 }, { "epoch": 0.3623931623931624, "grad_norm": 0.8377366065979004, "learning_rate": 4.396011396011396e-05, "loss": 0.6933, "step": 1060 }, { "epoch": 0.3658119658119658, "grad_norm": 0.7808057069778442, "learning_rate": 4.3903133903133905e-05, "loss": 0.6101, "step": 1070 }, { "epoch": 0.36923076923076925, "grad_norm": 0.5020534992218018, "learning_rate": 4.384615384615385e-05, "loss": 0.6333, "step": 1080 }, { "epoch": 0.37264957264957266, "grad_norm": 0.9217988848686218, "learning_rate": 4.378917378917379e-05, "loss": 0.652, "step": 1090 }, { "epoch": 0.37606837606837606, "grad_norm": 0.426917165517807, "learning_rate": 4.3732193732193733e-05, "loss": 0.6776, "step": 1100 }, { "epoch": 0.37948717948717947, "grad_norm": 1.00786292552948, "learning_rate": 4.3675213675213676e-05, "loss": 0.6308, "step": 1110 }, { "epoch": 0.38290598290598293, "grad_norm": 0.5222122669219971, "learning_rate": 4.361823361823362e-05, "loss": 0.5881, "step": 1120 }, { "epoch": 0.38632478632478634, "grad_norm": 1.309751272201538, "learning_rate": 4.356125356125356e-05, "loss": 0.6988, "step": 1130 }, { "epoch": 0.38974358974358975, "grad_norm": 0.5627844929695129, "learning_rate": 4.3504273504273505e-05, "loss": 0.6396, "step": 1140 }, { "epoch": 0.39316239316239315, "grad_norm": 0.40362900495529175, "learning_rate": 4.344729344729345e-05, "loss": 0.639, "step": 1150 }, { "epoch": 0.39658119658119656, "grad_norm": 0.632331371307373, "learning_rate": 4.339031339031339e-05, "loss": 0.6187, "step": 1160 }, { "epoch": 0.4, "grad_norm": 1.1355897188186646, "learning_rate": 4.3333333333333334e-05, "loss": 0.6317, "step": 1170 }, { "epoch": 0.40341880341880343, "grad_norm": 0.8610725998878479, "learning_rate": 4.327635327635328e-05, "loss": 0.631, "step": 1180 }, { "epoch": 0.40683760683760684, "grad_norm": 0.6825465559959412, "learning_rate": 4.321937321937322e-05, "loss": 0.6825, "step": 1190 }, { "epoch": 0.41025641025641024, "grad_norm": 1.3887457847595215, "learning_rate": 4.316239316239317e-05, "loss": 0.6221, "step": 1200 }, { "epoch": 0.41367521367521365, "grad_norm": 0.5809090733528137, "learning_rate": 4.3105413105413106e-05, "loss": 0.6117, "step": 1210 }, { "epoch": 0.4170940170940171, "grad_norm": 0.4157603681087494, "learning_rate": 4.304843304843305e-05, "loss": 0.613, "step": 1220 }, { "epoch": 0.4205128205128205, "grad_norm": 0.4386206269264221, "learning_rate": 4.2991452991453e-05, "loss": 0.6458, "step": 1230 }, { "epoch": 0.4239316239316239, "grad_norm": 1.4249426126480103, "learning_rate": 4.2934472934472935e-05, "loss": 0.66, "step": 1240 }, { "epoch": 0.42735042735042733, "grad_norm": 1.3717528581619263, "learning_rate": 4.287749287749288e-05, "loss": 0.6497, "step": 1250 }, { "epoch": 0.4307692307692308, "grad_norm": 0.6880800724029541, "learning_rate": 4.282051282051282e-05, "loss": 0.6231, "step": 1260 }, { "epoch": 0.4341880341880342, "grad_norm": 0.9455773234367371, "learning_rate": 4.2763532763532764e-05, "loss": 0.6524, "step": 1270 }, { "epoch": 0.4376068376068376, "grad_norm": 1.2795006036758423, "learning_rate": 4.270655270655271e-05, "loss": 0.6039, "step": 1280 }, { "epoch": 0.441025641025641, "grad_norm": 0.4846753776073456, "learning_rate": 4.264957264957265e-05, "loss": 0.6066, "step": 1290 }, { "epoch": 0.4444444444444444, "grad_norm": 0.49425560235977173, "learning_rate": 4.259259259259259e-05, "loss": 0.6545, "step": 1300 }, { "epoch": 0.4478632478632479, "grad_norm": 0.924453854560852, "learning_rate": 4.253561253561254e-05, "loss": 0.6406, "step": 1310 }, { "epoch": 0.4512820512820513, "grad_norm": 0.46777766942977905, "learning_rate": 4.247863247863248e-05, "loss": 0.6275, "step": 1320 }, { "epoch": 0.4547008547008547, "grad_norm": 0.7829861044883728, "learning_rate": 4.242165242165243e-05, "loss": 0.6445, "step": 1330 }, { "epoch": 0.4581196581196581, "grad_norm": 0.6596978306770325, "learning_rate": 4.2364672364672364e-05, "loss": 0.648, "step": 1340 }, { "epoch": 0.46153846153846156, "grad_norm": 0.9732853770256042, "learning_rate": 4.230769230769231e-05, "loss": 0.6738, "step": 1350 }, { "epoch": 0.46495726495726497, "grad_norm": 0.4845993220806122, "learning_rate": 4.225071225071226e-05, "loss": 0.6464, "step": 1360 }, { "epoch": 0.4683760683760684, "grad_norm": 0.40009310841560364, "learning_rate": 4.219373219373219e-05, "loss": 0.6193, "step": 1370 }, { "epoch": 0.4717948717948718, "grad_norm": 1.296000361442566, "learning_rate": 4.2136752136752136e-05, "loss": 0.608, "step": 1380 }, { "epoch": 0.4752136752136752, "grad_norm": 0.3851681351661682, "learning_rate": 4.2079772079772086e-05, "loss": 0.636, "step": 1390 }, { "epoch": 0.47863247863247865, "grad_norm": 1.5586471557617188, "learning_rate": 4.202279202279202e-05, "loss": 0.652, "step": 1400 }, { "epoch": 0.48205128205128206, "grad_norm": 1.1093754768371582, "learning_rate": 4.196581196581197e-05, "loss": 0.6397, "step": 1410 }, { "epoch": 0.48547008547008547, "grad_norm": 0.6494556665420532, "learning_rate": 4.190883190883191e-05, "loss": 0.6691, "step": 1420 }, { "epoch": 0.4888888888888889, "grad_norm": 0.6842040419578552, "learning_rate": 4.185185185185185e-05, "loss": 0.653, "step": 1430 }, { "epoch": 0.49230769230769234, "grad_norm": 0.39208441972732544, "learning_rate": 4.17948717948718e-05, "loss": 0.6303, "step": 1440 }, { "epoch": 0.49572649572649574, "grad_norm": 0.3755127787590027, "learning_rate": 4.1737891737891737e-05, "loss": 0.6619, "step": 1450 }, { "epoch": 0.49914529914529915, "grad_norm": 0.3358234167098999, "learning_rate": 4.168091168091168e-05, "loss": 0.6782, "step": 1460 }, { "epoch": 0.5025641025641026, "grad_norm": 0.30498063564300537, "learning_rate": 4.162393162393163e-05, "loss": 0.6582, "step": 1470 }, { "epoch": 0.505982905982906, "grad_norm": 0.7140593528747559, "learning_rate": 4.1566951566951565e-05, "loss": 0.6749, "step": 1480 }, { "epoch": 0.5094017094017094, "grad_norm": 0.4288971424102783, "learning_rate": 4.1509971509971515e-05, "loss": 0.6355, "step": 1490 }, { "epoch": 0.5128205128205128, "grad_norm": 0.8717936277389526, "learning_rate": 4.145299145299146e-05, "loss": 0.6258, "step": 1500 }, { "epoch": 0.5128205128205128, "eval_accuracy": 0.661082143772972, "eval_f1": 0.0, "eval_loss": 0.6402843594551086, "eval_precision": 0.0, "eval_recall": 0.0, "eval_roc_auc": 0.5119639749280213, "eval_runtime": 35.8008, "eval_samples_per_second": 326.78, "eval_steps_per_second": 20.446, "step": 1500 } ], "logging_steps": 10, "max_steps": 8775, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 794804391936000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }