euler03's picture
Training in progress, step 1500, checkpoint
7c9f5d1 verified
{
"best_metric": 0.0,
"best_model_checkpoint": "./results/checkpoint-500",
"epoch": 0.5128205128205128,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003418803418803419,
"grad_norm": 2.3258378505706787,
"learning_rate": 4.9943019943019945e-05,
"loss": 0.6681,
"step": 10
},
{
"epoch": 0.006837606837606838,
"grad_norm": 0.7698261737823486,
"learning_rate": 4.988603988603989e-05,
"loss": 0.6451,
"step": 20
},
{
"epoch": 0.010256410256410256,
"grad_norm": 1.6664257049560547,
"learning_rate": 4.982905982905983e-05,
"loss": 0.6485,
"step": 30
},
{
"epoch": 0.013675213675213675,
"grad_norm": 0.6200563907623291,
"learning_rate": 4.9772079772079774e-05,
"loss": 0.6453,
"step": 40
},
{
"epoch": 0.017094017094017096,
"grad_norm": 0.5258885622024536,
"learning_rate": 4.971509971509972e-05,
"loss": 0.6569,
"step": 50
},
{
"epoch": 0.020512820512820513,
"grad_norm": 0.5715610384941101,
"learning_rate": 4.965811965811966e-05,
"loss": 0.6508,
"step": 60
},
{
"epoch": 0.023931623931623933,
"grad_norm": 0.5744765400886536,
"learning_rate": 4.96011396011396e-05,
"loss": 0.6029,
"step": 70
},
{
"epoch": 0.02735042735042735,
"grad_norm": 0.9320403337478638,
"learning_rate": 4.9544159544159546e-05,
"loss": 0.6644,
"step": 80
},
{
"epoch": 0.03076923076923077,
"grad_norm": 0.5994309186935425,
"learning_rate": 4.948717948717949e-05,
"loss": 0.6757,
"step": 90
},
{
"epoch": 0.03418803418803419,
"grad_norm": 0.4685361385345459,
"learning_rate": 4.943019943019943e-05,
"loss": 0.6372,
"step": 100
},
{
"epoch": 0.037606837606837605,
"grad_norm": 0.6897755265235901,
"learning_rate": 4.9373219373219375e-05,
"loss": 0.6395,
"step": 110
},
{
"epoch": 0.041025641025641026,
"grad_norm": 0.5714218616485596,
"learning_rate": 4.931623931623932e-05,
"loss": 0.6323,
"step": 120
},
{
"epoch": 0.044444444444444446,
"grad_norm": 0.6862583160400391,
"learning_rate": 4.925925925925926e-05,
"loss": 0.6307,
"step": 130
},
{
"epoch": 0.04786324786324787,
"grad_norm": 1.1985986232757568,
"learning_rate": 4.9202279202279204e-05,
"loss": 0.6353,
"step": 140
},
{
"epoch": 0.05128205128205128,
"grad_norm": 0.4656996428966522,
"learning_rate": 4.9145299145299147e-05,
"loss": 0.6552,
"step": 150
},
{
"epoch": 0.0547008547008547,
"grad_norm": 1.3551446199417114,
"learning_rate": 4.908831908831909e-05,
"loss": 0.6484,
"step": 160
},
{
"epoch": 0.05811965811965812,
"grad_norm": 1.137487769126892,
"learning_rate": 4.903133903133903e-05,
"loss": 0.5905,
"step": 170
},
{
"epoch": 0.06153846153846154,
"grad_norm": 0.6064645051956177,
"learning_rate": 4.8974358974358975e-05,
"loss": 0.6157,
"step": 180
},
{
"epoch": 0.06495726495726496,
"grad_norm": 2.0975794792175293,
"learning_rate": 4.891737891737892e-05,
"loss": 0.6701,
"step": 190
},
{
"epoch": 0.06837606837606838,
"grad_norm": 0.48940032720565796,
"learning_rate": 4.886039886039887e-05,
"loss": 0.6342,
"step": 200
},
{
"epoch": 0.07179487179487179,
"grad_norm": 1.2511190176010132,
"learning_rate": 4.8803418803418804e-05,
"loss": 0.6521,
"step": 210
},
{
"epoch": 0.07521367521367521,
"grad_norm": 0.7074885964393616,
"learning_rate": 4.874643874643875e-05,
"loss": 0.6548,
"step": 220
},
{
"epoch": 0.07863247863247863,
"grad_norm": 1.152065396308899,
"learning_rate": 4.868945868945869e-05,
"loss": 0.6589,
"step": 230
},
{
"epoch": 0.08205128205128205,
"grad_norm": 0.39897221326828003,
"learning_rate": 4.863247863247863e-05,
"loss": 0.6595,
"step": 240
},
{
"epoch": 0.08547008547008547,
"grad_norm": 0.5259735584259033,
"learning_rate": 4.8575498575498576e-05,
"loss": 0.665,
"step": 250
},
{
"epoch": 0.08888888888888889,
"grad_norm": 0.5097119808197021,
"learning_rate": 4.851851851851852e-05,
"loss": 0.6498,
"step": 260
},
{
"epoch": 0.09230769230769231,
"grad_norm": 0.48037877678871155,
"learning_rate": 4.846153846153846e-05,
"loss": 0.5882,
"step": 270
},
{
"epoch": 0.09572649572649573,
"grad_norm": 0.6850088834762573,
"learning_rate": 4.840455840455841e-05,
"loss": 0.6329,
"step": 280
},
{
"epoch": 0.09914529914529914,
"grad_norm": 0.6092679500579834,
"learning_rate": 4.834757834757835e-05,
"loss": 0.6246,
"step": 290
},
{
"epoch": 0.10256410256410256,
"grad_norm": 1.0922237634658813,
"learning_rate": 4.829059829059829e-05,
"loss": 0.6144,
"step": 300
},
{
"epoch": 0.10598290598290598,
"grad_norm": 1.4150214195251465,
"learning_rate": 4.823361823361824e-05,
"loss": 0.643,
"step": 310
},
{
"epoch": 0.1094017094017094,
"grad_norm": 1.516169548034668,
"learning_rate": 4.817663817663818e-05,
"loss": 0.6046,
"step": 320
},
{
"epoch": 0.11282051282051282,
"grad_norm": 0.5234593749046326,
"learning_rate": 4.8119658119658126e-05,
"loss": 0.6193,
"step": 330
},
{
"epoch": 0.11623931623931624,
"grad_norm": 0.6485182046890259,
"learning_rate": 4.806267806267806e-05,
"loss": 0.6314,
"step": 340
},
{
"epoch": 0.11965811965811966,
"grad_norm": 0.9457536935806274,
"learning_rate": 4.8005698005698006e-05,
"loss": 0.5802,
"step": 350
},
{
"epoch": 0.12307692307692308,
"grad_norm": 1.2444144487380981,
"learning_rate": 4.7948717948717955e-05,
"loss": 0.5927,
"step": 360
},
{
"epoch": 0.1264957264957265,
"grad_norm": 0.499647855758667,
"learning_rate": 4.789173789173789e-05,
"loss": 0.6358,
"step": 370
},
{
"epoch": 0.12991452991452992,
"grad_norm": 2.130183696746826,
"learning_rate": 4.7834757834757834e-05,
"loss": 0.6324,
"step": 380
},
{
"epoch": 0.13333333333333333,
"grad_norm": 0.6378350257873535,
"learning_rate": 4.7777777777777784e-05,
"loss": 0.6061,
"step": 390
},
{
"epoch": 0.13675213675213677,
"grad_norm": 0.39135029911994934,
"learning_rate": 4.772079772079772e-05,
"loss": 0.6329,
"step": 400
},
{
"epoch": 0.14017094017094017,
"grad_norm": 0.5480381846427917,
"learning_rate": 4.766381766381767e-05,
"loss": 0.6607,
"step": 410
},
{
"epoch": 0.14358974358974358,
"grad_norm": 0.4431852400302887,
"learning_rate": 4.7606837606837606e-05,
"loss": 0.6233,
"step": 420
},
{
"epoch": 0.147008547008547,
"grad_norm": 0.4828330874443054,
"learning_rate": 4.754985754985755e-05,
"loss": 0.6437,
"step": 430
},
{
"epoch": 0.15042735042735042,
"grad_norm": 0.5272857546806335,
"learning_rate": 4.74928774928775e-05,
"loss": 0.6671,
"step": 440
},
{
"epoch": 0.15384615384615385,
"grad_norm": 1.4251387119293213,
"learning_rate": 4.7435897435897435e-05,
"loss": 0.658,
"step": 450
},
{
"epoch": 0.15726495726495726,
"grad_norm": 0.8041712641716003,
"learning_rate": 4.737891737891738e-05,
"loss": 0.6487,
"step": 460
},
{
"epoch": 0.1606837606837607,
"grad_norm": 0.7019796371459961,
"learning_rate": 4.732193732193733e-05,
"loss": 0.6019,
"step": 470
},
{
"epoch": 0.1641025641025641,
"grad_norm": 0.8561422228813171,
"learning_rate": 4.7264957264957264e-05,
"loss": 0.6897,
"step": 480
},
{
"epoch": 0.1675213675213675,
"grad_norm": 1.0677204132080078,
"learning_rate": 4.7207977207977214e-05,
"loss": 0.6848,
"step": 490
},
{
"epoch": 0.17094017094017094,
"grad_norm": 0.4762294590473175,
"learning_rate": 4.7150997150997157e-05,
"loss": 0.6527,
"step": 500
},
{
"epoch": 0.17094017094017094,
"eval_accuracy": 0.661082143772972,
"eval_f1": 0.0,
"eval_loss": 0.6433083415031433,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_roc_auc": 0.4981741909669265,
"eval_runtime": 36.622,
"eval_samples_per_second": 319.453,
"eval_steps_per_second": 19.988,
"step": 500
},
{
"epoch": 0.17435897435897435,
"grad_norm": 0.4656302034854889,
"learning_rate": 4.709401709401709e-05,
"loss": 0.6506,
"step": 510
},
{
"epoch": 0.17777777777777778,
"grad_norm": 0.6288455724716187,
"learning_rate": 4.703703703703704e-05,
"loss": 0.6422,
"step": 520
},
{
"epoch": 0.1811965811965812,
"grad_norm": 0.39913907647132874,
"learning_rate": 4.698005698005698e-05,
"loss": 0.6146,
"step": 530
},
{
"epoch": 0.18461538461538463,
"grad_norm": 0.40889817476272583,
"learning_rate": 4.692307692307693e-05,
"loss": 0.6272,
"step": 540
},
{
"epoch": 0.18803418803418803,
"grad_norm": 0.9223109483718872,
"learning_rate": 4.686609686609687e-05,
"loss": 0.6391,
"step": 550
},
{
"epoch": 0.19145299145299147,
"grad_norm": 0.43170908093452454,
"learning_rate": 4.680911680911681e-05,
"loss": 0.6613,
"step": 560
},
{
"epoch": 0.19487179487179487,
"grad_norm": 0.6207427978515625,
"learning_rate": 4.675213675213676e-05,
"loss": 0.6471,
"step": 570
},
{
"epoch": 0.19829059829059828,
"grad_norm": 0.7672275304794312,
"learning_rate": 4.66951566951567e-05,
"loss": 0.6629,
"step": 580
},
{
"epoch": 0.20170940170940171,
"grad_norm": 0.4669424891471863,
"learning_rate": 4.6638176638176636e-05,
"loss": 0.6588,
"step": 590
},
{
"epoch": 0.20512820512820512,
"grad_norm": 0.6726049184799194,
"learning_rate": 4.6581196581196586e-05,
"loss": 0.6258,
"step": 600
},
{
"epoch": 0.20854700854700856,
"grad_norm": 0.7948060035705566,
"learning_rate": 4.652421652421652e-05,
"loss": 0.5705,
"step": 610
},
{
"epoch": 0.21196581196581196,
"grad_norm": 0.419849693775177,
"learning_rate": 4.646723646723647e-05,
"loss": 0.6468,
"step": 620
},
{
"epoch": 0.2153846153846154,
"grad_norm": 1.0143113136291504,
"learning_rate": 4.6410256410256415e-05,
"loss": 0.6297,
"step": 630
},
{
"epoch": 0.2188034188034188,
"grad_norm": 0.7109899520874023,
"learning_rate": 4.635327635327635e-05,
"loss": 0.673,
"step": 640
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.760080099105835,
"learning_rate": 4.62962962962963e-05,
"loss": 0.6227,
"step": 650
},
{
"epoch": 0.22564102564102564,
"grad_norm": 0.7442237138748169,
"learning_rate": 4.6239316239316244e-05,
"loss": 0.5715,
"step": 660
},
{
"epoch": 0.22905982905982905,
"grad_norm": 0.39145609736442566,
"learning_rate": 4.618233618233619e-05,
"loss": 0.6727,
"step": 670
},
{
"epoch": 0.23247863247863249,
"grad_norm": 0.868276059627533,
"learning_rate": 4.612535612535613e-05,
"loss": 0.6344,
"step": 680
},
{
"epoch": 0.2358974358974359,
"grad_norm": 0.6120406985282898,
"learning_rate": 4.6068376068376066e-05,
"loss": 0.5954,
"step": 690
},
{
"epoch": 0.23931623931623933,
"grad_norm": 0.5536867380142212,
"learning_rate": 4.6011396011396016e-05,
"loss": 0.6476,
"step": 700
},
{
"epoch": 0.24273504273504273,
"grad_norm": 0.4315416216850281,
"learning_rate": 4.595441595441596e-05,
"loss": 0.6215,
"step": 710
},
{
"epoch": 0.24615384615384617,
"grad_norm": 0.517528235912323,
"learning_rate": 4.5897435897435895e-05,
"loss": 0.6258,
"step": 720
},
{
"epoch": 0.24957264957264957,
"grad_norm": 1.3188592195510864,
"learning_rate": 4.5840455840455844e-05,
"loss": 0.6469,
"step": 730
},
{
"epoch": 0.252991452991453,
"grad_norm": 1.2717797756195068,
"learning_rate": 4.578347578347579e-05,
"loss": 0.5683,
"step": 740
},
{
"epoch": 0.2564102564102564,
"grad_norm": 1.0561293363571167,
"learning_rate": 4.572649572649573e-05,
"loss": 0.6769,
"step": 750
},
{
"epoch": 0.25982905982905985,
"grad_norm": 1.4157183170318604,
"learning_rate": 4.566951566951567e-05,
"loss": 0.6901,
"step": 760
},
{
"epoch": 0.26324786324786326,
"grad_norm": 0.4029109477996826,
"learning_rate": 4.5612535612535616e-05,
"loss": 0.593,
"step": 770
},
{
"epoch": 0.26666666666666666,
"grad_norm": 1.0039498805999756,
"learning_rate": 4.555555555555556e-05,
"loss": 0.6798,
"step": 780
},
{
"epoch": 0.27008547008547007,
"grad_norm": 0.6905536651611328,
"learning_rate": 4.54985754985755e-05,
"loss": 0.6352,
"step": 790
},
{
"epoch": 0.27350427350427353,
"grad_norm": 0.8582714796066284,
"learning_rate": 4.544159544159544e-05,
"loss": 0.6438,
"step": 800
},
{
"epoch": 0.27692307692307694,
"grad_norm": 0.4063926339149475,
"learning_rate": 4.538461538461539e-05,
"loss": 0.6503,
"step": 810
},
{
"epoch": 0.28034188034188035,
"grad_norm": 1.0651031732559204,
"learning_rate": 4.532763532763533e-05,
"loss": 0.6296,
"step": 820
},
{
"epoch": 0.28376068376068375,
"grad_norm": 0.618545651435852,
"learning_rate": 4.5270655270655274e-05,
"loss": 0.6695,
"step": 830
},
{
"epoch": 0.28717948717948716,
"grad_norm": 1.4270812273025513,
"learning_rate": 4.521367521367522e-05,
"loss": 0.588,
"step": 840
},
{
"epoch": 0.2905982905982906,
"grad_norm": 1.277422547340393,
"learning_rate": 4.515669515669516e-05,
"loss": 0.6822,
"step": 850
},
{
"epoch": 0.294017094017094,
"grad_norm": 0.44470494985580444,
"learning_rate": 4.50997150997151e-05,
"loss": 0.6401,
"step": 860
},
{
"epoch": 0.29743589743589743,
"grad_norm": 0.6381728053092957,
"learning_rate": 4.5042735042735046e-05,
"loss": 0.693,
"step": 870
},
{
"epoch": 0.30085470085470084,
"grad_norm": 0.4355703294277191,
"learning_rate": 4.498575498575499e-05,
"loss": 0.6083,
"step": 880
},
{
"epoch": 0.30427350427350425,
"grad_norm": 1.0187709331512451,
"learning_rate": 4.492877492877493e-05,
"loss": 0.5236,
"step": 890
},
{
"epoch": 0.3076923076923077,
"grad_norm": 0.7143679261207581,
"learning_rate": 4.4871794871794874e-05,
"loss": 0.6413,
"step": 900
},
{
"epoch": 0.3111111111111111,
"grad_norm": 1.0808229446411133,
"learning_rate": 4.481481481481482e-05,
"loss": 0.6026,
"step": 910
},
{
"epoch": 0.3145299145299145,
"grad_norm": 0.796187698841095,
"learning_rate": 4.475783475783476e-05,
"loss": 0.6812,
"step": 920
},
{
"epoch": 0.31794871794871793,
"grad_norm": 0.5163740515708923,
"learning_rate": 4.47008547008547e-05,
"loss": 0.6537,
"step": 930
},
{
"epoch": 0.3213675213675214,
"grad_norm": 0.7213220596313477,
"learning_rate": 4.4643874643874646e-05,
"loss": 0.6765,
"step": 940
},
{
"epoch": 0.3247863247863248,
"grad_norm": 0.44362661242485046,
"learning_rate": 4.458689458689459e-05,
"loss": 0.6249,
"step": 950
},
{
"epoch": 0.3282051282051282,
"grad_norm": 0.4917695224285126,
"learning_rate": 4.452991452991453e-05,
"loss": 0.63,
"step": 960
},
{
"epoch": 0.3316239316239316,
"grad_norm": 0.709846556186676,
"learning_rate": 4.4472934472934475e-05,
"loss": 0.5544,
"step": 970
},
{
"epoch": 0.335042735042735,
"grad_norm": 1.065099835395813,
"learning_rate": 4.441595441595442e-05,
"loss": 0.6338,
"step": 980
},
{
"epoch": 0.3384615384615385,
"grad_norm": 0.42223694920539856,
"learning_rate": 4.435897435897436e-05,
"loss": 0.5828,
"step": 990
},
{
"epoch": 0.3418803418803419,
"grad_norm": 1.5173028707504272,
"learning_rate": 4.4301994301994304e-05,
"loss": 0.6229,
"step": 1000
},
{
"epoch": 0.3418803418803419,
"eval_accuracy": 0.661082143772972,
"eval_f1": 0.0,
"eval_loss": 0.6458322405815125,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_roc_auc": 0.5011399036892176,
"eval_runtime": 36.5197,
"eval_samples_per_second": 320.347,
"eval_steps_per_second": 20.044,
"step": 1000
},
{
"epoch": 0.3452991452991453,
"grad_norm": 0.8043766617774963,
"learning_rate": 4.424501424501425e-05,
"loss": 0.6463,
"step": 1010
},
{
"epoch": 0.3487179487179487,
"grad_norm": 0.6817493438720703,
"learning_rate": 4.418803418803419e-05,
"loss": 0.6266,
"step": 1020
},
{
"epoch": 0.35213675213675216,
"grad_norm": 0.6765307784080505,
"learning_rate": 4.413105413105413e-05,
"loss": 0.6203,
"step": 1030
},
{
"epoch": 0.35555555555555557,
"grad_norm": 0.6116905808448792,
"learning_rate": 4.4074074074074076e-05,
"loss": 0.5933,
"step": 1040
},
{
"epoch": 0.358974358974359,
"grad_norm": 0.3634931445121765,
"learning_rate": 4.401709401709402e-05,
"loss": 0.6612,
"step": 1050
},
{
"epoch": 0.3623931623931624,
"grad_norm": 0.8377366065979004,
"learning_rate": 4.396011396011396e-05,
"loss": 0.6933,
"step": 1060
},
{
"epoch": 0.3658119658119658,
"grad_norm": 0.7808057069778442,
"learning_rate": 4.3903133903133905e-05,
"loss": 0.6101,
"step": 1070
},
{
"epoch": 0.36923076923076925,
"grad_norm": 0.5020534992218018,
"learning_rate": 4.384615384615385e-05,
"loss": 0.6333,
"step": 1080
},
{
"epoch": 0.37264957264957266,
"grad_norm": 0.9217988848686218,
"learning_rate": 4.378917378917379e-05,
"loss": 0.652,
"step": 1090
},
{
"epoch": 0.37606837606837606,
"grad_norm": 0.426917165517807,
"learning_rate": 4.3732193732193733e-05,
"loss": 0.6776,
"step": 1100
},
{
"epoch": 0.37948717948717947,
"grad_norm": 1.00786292552948,
"learning_rate": 4.3675213675213676e-05,
"loss": 0.6308,
"step": 1110
},
{
"epoch": 0.38290598290598293,
"grad_norm": 0.5222122669219971,
"learning_rate": 4.361823361823362e-05,
"loss": 0.5881,
"step": 1120
},
{
"epoch": 0.38632478632478634,
"grad_norm": 1.309751272201538,
"learning_rate": 4.356125356125356e-05,
"loss": 0.6988,
"step": 1130
},
{
"epoch": 0.38974358974358975,
"grad_norm": 0.5627844929695129,
"learning_rate": 4.3504273504273505e-05,
"loss": 0.6396,
"step": 1140
},
{
"epoch": 0.39316239316239315,
"grad_norm": 0.40362900495529175,
"learning_rate": 4.344729344729345e-05,
"loss": 0.639,
"step": 1150
},
{
"epoch": 0.39658119658119656,
"grad_norm": 0.632331371307373,
"learning_rate": 4.339031339031339e-05,
"loss": 0.6187,
"step": 1160
},
{
"epoch": 0.4,
"grad_norm": 1.1355897188186646,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.6317,
"step": 1170
},
{
"epoch": 0.40341880341880343,
"grad_norm": 0.8610725998878479,
"learning_rate": 4.327635327635328e-05,
"loss": 0.631,
"step": 1180
},
{
"epoch": 0.40683760683760684,
"grad_norm": 0.6825465559959412,
"learning_rate": 4.321937321937322e-05,
"loss": 0.6825,
"step": 1190
},
{
"epoch": 0.41025641025641024,
"grad_norm": 1.3887457847595215,
"learning_rate": 4.316239316239317e-05,
"loss": 0.6221,
"step": 1200
},
{
"epoch": 0.41367521367521365,
"grad_norm": 0.5809090733528137,
"learning_rate": 4.3105413105413106e-05,
"loss": 0.6117,
"step": 1210
},
{
"epoch": 0.4170940170940171,
"grad_norm": 0.4157603681087494,
"learning_rate": 4.304843304843305e-05,
"loss": 0.613,
"step": 1220
},
{
"epoch": 0.4205128205128205,
"grad_norm": 0.4386206269264221,
"learning_rate": 4.2991452991453e-05,
"loss": 0.6458,
"step": 1230
},
{
"epoch": 0.4239316239316239,
"grad_norm": 1.4249426126480103,
"learning_rate": 4.2934472934472935e-05,
"loss": 0.66,
"step": 1240
},
{
"epoch": 0.42735042735042733,
"grad_norm": 1.3717528581619263,
"learning_rate": 4.287749287749288e-05,
"loss": 0.6497,
"step": 1250
},
{
"epoch": 0.4307692307692308,
"grad_norm": 0.6880800724029541,
"learning_rate": 4.282051282051282e-05,
"loss": 0.6231,
"step": 1260
},
{
"epoch": 0.4341880341880342,
"grad_norm": 0.9455773234367371,
"learning_rate": 4.2763532763532764e-05,
"loss": 0.6524,
"step": 1270
},
{
"epoch": 0.4376068376068376,
"grad_norm": 1.2795006036758423,
"learning_rate": 4.270655270655271e-05,
"loss": 0.6039,
"step": 1280
},
{
"epoch": 0.441025641025641,
"grad_norm": 0.4846753776073456,
"learning_rate": 4.264957264957265e-05,
"loss": 0.6066,
"step": 1290
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.49425560235977173,
"learning_rate": 4.259259259259259e-05,
"loss": 0.6545,
"step": 1300
},
{
"epoch": 0.4478632478632479,
"grad_norm": 0.924453854560852,
"learning_rate": 4.253561253561254e-05,
"loss": 0.6406,
"step": 1310
},
{
"epoch": 0.4512820512820513,
"grad_norm": 0.46777766942977905,
"learning_rate": 4.247863247863248e-05,
"loss": 0.6275,
"step": 1320
},
{
"epoch": 0.4547008547008547,
"grad_norm": 0.7829861044883728,
"learning_rate": 4.242165242165243e-05,
"loss": 0.6445,
"step": 1330
},
{
"epoch": 0.4581196581196581,
"grad_norm": 0.6596978306770325,
"learning_rate": 4.2364672364672364e-05,
"loss": 0.648,
"step": 1340
},
{
"epoch": 0.46153846153846156,
"grad_norm": 0.9732853770256042,
"learning_rate": 4.230769230769231e-05,
"loss": 0.6738,
"step": 1350
},
{
"epoch": 0.46495726495726497,
"grad_norm": 0.4845993220806122,
"learning_rate": 4.225071225071226e-05,
"loss": 0.6464,
"step": 1360
},
{
"epoch": 0.4683760683760684,
"grad_norm": 0.40009310841560364,
"learning_rate": 4.219373219373219e-05,
"loss": 0.6193,
"step": 1370
},
{
"epoch": 0.4717948717948718,
"grad_norm": 1.296000361442566,
"learning_rate": 4.2136752136752136e-05,
"loss": 0.608,
"step": 1380
},
{
"epoch": 0.4752136752136752,
"grad_norm": 0.3851681351661682,
"learning_rate": 4.2079772079772086e-05,
"loss": 0.636,
"step": 1390
},
{
"epoch": 0.47863247863247865,
"grad_norm": 1.5586471557617188,
"learning_rate": 4.202279202279202e-05,
"loss": 0.652,
"step": 1400
},
{
"epoch": 0.48205128205128206,
"grad_norm": 1.1093754768371582,
"learning_rate": 4.196581196581197e-05,
"loss": 0.6397,
"step": 1410
},
{
"epoch": 0.48547008547008547,
"grad_norm": 0.6494556665420532,
"learning_rate": 4.190883190883191e-05,
"loss": 0.6691,
"step": 1420
},
{
"epoch": 0.4888888888888889,
"grad_norm": 0.6842040419578552,
"learning_rate": 4.185185185185185e-05,
"loss": 0.653,
"step": 1430
},
{
"epoch": 0.49230769230769234,
"grad_norm": 0.39208441972732544,
"learning_rate": 4.17948717948718e-05,
"loss": 0.6303,
"step": 1440
},
{
"epoch": 0.49572649572649574,
"grad_norm": 0.3755127787590027,
"learning_rate": 4.1737891737891737e-05,
"loss": 0.6619,
"step": 1450
},
{
"epoch": 0.49914529914529915,
"grad_norm": 0.3358234167098999,
"learning_rate": 4.168091168091168e-05,
"loss": 0.6782,
"step": 1460
},
{
"epoch": 0.5025641025641026,
"grad_norm": 0.30498063564300537,
"learning_rate": 4.162393162393163e-05,
"loss": 0.6582,
"step": 1470
},
{
"epoch": 0.505982905982906,
"grad_norm": 0.7140593528747559,
"learning_rate": 4.1566951566951565e-05,
"loss": 0.6749,
"step": 1480
},
{
"epoch": 0.5094017094017094,
"grad_norm": 0.4288971424102783,
"learning_rate": 4.1509971509971515e-05,
"loss": 0.6355,
"step": 1490
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.8717936277389526,
"learning_rate": 4.145299145299146e-05,
"loss": 0.6258,
"step": 1500
},
{
"epoch": 0.5128205128205128,
"eval_accuracy": 0.661082143772972,
"eval_f1": 0.0,
"eval_loss": 0.6402843594551086,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_roc_auc": 0.5119639749280213,
"eval_runtime": 35.8008,
"eval_samples_per_second": 326.78,
"eval_steps_per_second": 20.446,
"step": 1500
}
],
"logging_steps": 10,
"max_steps": 8775,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 794804391936000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}