Training in progress, step 1500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +730 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a10a2ecaff875b9c46ad2bbd2fed17c2a0a46c72399b0499d9bca795a82b01a
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d52b5f85bae69987c6e3f333d957b4a00ee3c2c2c4e07e3f04dbb474f7a3832
 size 267832560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ffdf6481862c29fc9f519ed97553ae9c619649345ac1473ff2b63f00a952157
 size 535727290

 version https://git-lfs.github.com/spec/v1
+oid sha256:35a22e202003692875f84b686ed8ca24482f192a3e635d1468082f0bb00792ff
 size 535727290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e4a426a8b73c74a38f2a3b1243f7c773cf4681b425c8731e354a12e8672e330
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2847d9d71bf9a8c602c37034ef9e075859f3ed69e4a00df162b9a4a32971121
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cbe50d058b46466dc5c0d3a5f85c97b4ca24f57c286062ca922883cd2d25c9c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:34672d517f99ba83bdc5002baecfd9b3f2d2fcacea6cdddb384020730eba75c1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.0,
   "best_model_checkpoint": "./results/checkpoint-500",
-  "epoch": 0.17094017094017094,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -370,6 +370,732 @@
       "eval_samples_per_second": 319.453,
       "eval_steps_per_second": 19.988,
       "step": 500
     }
   ],
   "logging_steps": 10,
@@ -384,7 +1110,7 @@
         "early_stopping_threshold": 0.001
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -398,7 +1124,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 264934797312000.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.0,
   "best_model_checkpoint": "./results/checkpoint-500",
+  "epoch": 0.5128205128205128,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 319.453,
       "eval_steps_per_second": 19.988,
       "step": 500
+    },
+    {
+      "epoch": 0.17435897435897435,
+      "grad_norm": 0.4656302034854889,
+      "learning_rate": 4.709401709401709e-05,
+      "loss": 0.6506,
+      "step": 510
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 0.6288455724716187,
+      "learning_rate": 4.703703703703704e-05,
+      "loss": 0.6422,
+      "step": 520
+    },
+    {
+      "epoch": 0.1811965811965812,
+      "grad_norm": 0.39913907647132874,
+      "learning_rate": 4.698005698005698e-05,
+      "loss": 0.6146,
+      "step": 530
+    },
+    {
+      "epoch": 0.18461538461538463,
+      "grad_norm": 0.40889817476272583,
+      "learning_rate": 4.692307692307693e-05,
+      "loss": 0.6272,
+      "step": 540
+    },
+    {
+      "epoch": 0.18803418803418803,
+      "grad_norm": 0.9223109483718872,
+      "learning_rate": 4.686609686609687e-05,
+      "loss": 0.6391,
+      "step": 550
+    },
+    {
+      "epoch": 0.19145299145299147,
+      "grad_norm": 0.43170908093452454,
+      "learning_rate": 4.680911680911681e-05,
+      "loss": 0.6613,
+      "step": 560
+    },
+    {
+      "epoch": 0.19487179487179487,
+      "grad_norm": 0.6207427978515625,
+      "learning_rate": 4.675213675213676e-05,
+      "loss": 0.6471,
+      "step": 570
+    },
+    {
+      "epoch": 0.19829059829059828,
+      "grad_norm": 0.7672275304794312,
+      "learning_rate": 4.66951566951567e-05,
+      "loss": 0.6629,
+      "step": 580
+    },
+    {
+      "epoch": 0.20170940170940171,
+      "grad_norm": 0.4669424891471863,
+      "learning_rate": 4.6638176638176636e-05,
+      "loss": 0.6588,
+      "step": 590
+    },
+    {
+      "epoch": 0.20512820512820512,
+      "grad_norm": 0.6726049184799194,
+      "learning_rate": 4.6581196581196586e-05,
+      "loss": 0.6258,
+      "step": 600
+    },
+    {
+      "epoch": 0.20854700854700856,
+      "grad_norm": 0.7948060035705566,
+      "learning_rate": 4.652421652421652e-05,
+      "loss": 0.5705,
+      "step": 610
+    },
+    {
+      "epoch": 0.21196581196581196,
+      "grad_norm": 0.419849693775177,
+      "learning_rate": 4.646723646723647e-05,
+      "loss": 0.6468,
+      "step": 620
+    },
+    {
+      "epoch": 0.2153846153846154,
+      "grad_norm": 1.0143113136291504,
+      "learning_rate": 4.6410256410256415e-05,
+      "loss": 0.6297,
+      "step": 630
+    },
+    {
+      "epoch": 0.2188034188034188,
+      "grad_norm": 0.7109899520874023,
+      "learning_rate": 4.635327635327635e-05,
+      "loss": 0.673,
+      "step": 640
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 0.760080099105835,
+      "learning_rate": 4.62962962962963e-05,
+      "loss": 0.6227,
+      "step": 650
+    },
+    {
+      "epoch": 0.22564102564102564,
+      "grad_norm": 0.7442237138748169,
+      "learning_rate": 4.6239316239316244e-05,
+      "loss": 0.5715,
+      "step": 660
+    },
+    {
+      "epoch": 0.22905982905982905,
+      "grad_norm": 0.39145609736442566,
+      "learning_rate": 4.618233618233619e-05,
+      "loss": 0.6727,
+      "step": 670
+    },
+    {
+      "epoch": 0.23247863247863249,
+      "grad_norm": 0.868276059627533,
+      "learning_rate": 4.612535612535613e-05,
+      "loss": 0.6344,
+      "step": 680
+    },
+    {
+      "epoch": 0.2358974358974359,
+      "grad_norm": 0.6120406985282898,
+      "learning_rate": 4.6068376068376066e-05,
+      "loss": 0.5954,
+      "step": 690
+    },
+    {
+      "epoch": 0.23931623931623933,
+      "grad_norm": 0.5536867380142212,
+      "learning_rate": 4.6011396011396016e-05,
+      "loss": 0.6476,
+      "step": 700
+    },
+    {
+      "epoch": 0.24273504273504273,
+      "grad_norm": 0.4315416216850281,
+      "learning_rate": 4.595441595441596e-05,
+      "loss": 0.6215,
+      "step": 710
+    },
+    {
+      "epoch": 0.24615384615384617,
+      "grad_norm": 0.517528235912323,
+      "learning_rate": 4.5897435897435895e-05,
+      "loss": 0.6258,
+      "step": 720
+    },
+    {
+      "epoch": 0.24957264957264957,
+      "grad_norm": 1.3188592195510864,
+      "learning_rate": 4.5840455840455844e-05,
+      "loss": 0.6469,
+      "step": 730
+    },
+    {
+      "epoch": 0.252991452991453,
+      "grad_norm": 1.2717797756195068,
+      "learning_rate": 4.578347578347579e-05,
+      "loss": 0.5683,
+      "step": 740
+    },
+    {
+      "epoch": 0.2564102564102564,
+      "grad_norm": 1.0561293363571167,
+      "learning_rate": 4.572649572649573e-05,
+      "loss": 0.6769,
+      "step": 750
+    },
+    {
+      "epoch": 0.25982905982905985,
+      "grad_norm": 1.4157183170318604,
+      "learning_rate": 4.566951566951567e-05,
+      "loss": 0.6901,
+      "step": 760
+    },
+    {
+      "epoch": 0.26324786324786326,
+      "grad_norm": 0.4029109477996826,
+      "learning_rate": 4.5612535612535616e-05,
+      "loss": 0.593,
+      "step": 770
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 1.0039498805999756,
+      "learning_rate": 4.555555555555556e-05,
+      "loss": 0.6798,
+      "step": 780
+    },
+    {
+      "epoch": 0.27008547008547007,
+      "grad_norm": 0.6905536651611328,
+      "learning_rate": 4.54985754985755e-05,
+      "loss": 0.6352,
+      "step": 790
+    },
+    {
+      "epoch": 0.27350427350427353,
+      "grad_norm": 0.8582714796066284,
+      "learning_rate": 4.544159544159544e-05,
+      "loss": 0.6438,
+      "step": 800
+    },
+    {
+      "epoch": 0.27692307692307694,
+      "grad_norm": 0.4063926339149475,
+      "learning_rate": 4.538461538461539e-05,
+      "loss": 0.6503,
+      "step": 810
+    },
+    {
+      "epoch": 0.28034188034188035,
+      "grad_norm": 1.0651031732559204,
+      "learning_rate": 4.532763532763533e-05,
+      "loss": 0.6296,
+      "step": 820
+    },
+    {
+      "epoch": 0.28376068376068375,
+      "grad_norm": 0.618545651435852,
+      "learning_rate": 4.5270655270655274e-05,
+      "loss": 0.6695,
+      "step": 830
+    },
+    {
+      "epoch": 0.28717948717948716,
+      "grad_norm": 1.4270812273025513,
+      "learning_rate": 4.521367521367522e-05,
+      "loss": 0.588,
+      "step": 840
+    },
+    {
+      "epoch": 0.2905982905982906,
+      "grad_norm": 1.277422547340393,
+      "learning_rate": 4.515669515669516e-05,
+      "loss": 0.6822,
+      "step": 850
+    },
+    {
+      "epoch": 0.294017094017094,
+      "grad_norm": 0.44470494985580444,
+      "learning_rate": 4.50997150997151e-05,
+      "loss": 0.6401,
+      "step": 860
+    },
+    {
+      "epoch": 0.29743589743589743,
+      "grad_norm": 0.6381728053092957,
+      "learning_rate": 4.5042735042735046e-05,
+      "loss": 0.693,
+      "step": 870
+    },
+    {
+      "epoch": 0.30085470085470084,
+      "grad_norm": 0.4355703294277191,
+      "learning_rate": 4.498575498575499e-05,
+      "loss": 0.6083,
+      "step": 880
+    },
+    {
+      "epoch": 0.30427350427350425,
+      "grad_norm": 1.0187709331512451,
+      "learning_rate": 4.492877492877493e-05,
+      "loss": 0.5236,
+      "step": 890
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 0.7143679261207581,
+      "learning_rate": 4.4871794871794874e-05,
+      "loss": 0.6413,
+      "step": 900
+    },
+    {
+      "epoch": 0.3111111111111111,
+      "grad_norm": 1.0808229446411133,
+      "learning_rate": 4.481481481481482e-05,
+      "loss": 0.6026,
+      "step": 910
+    },
+    {
+      "epoch": 0.3145299145299145,
+      "grad_norm": 0.796187698841095,
+      "learning_rate": 4.475783475783476e-05,
+      "loss": 0.6812,
+      "step": 920
+    },
+    {
+      "epoch": 0.31794871794871793,
+      "grad_norm": 0.5163740515708923,
+      "learning_rate": 4.47008547008547e-05,
+      "loss": 0.6537,
+      "step": 930
+    },
+    {
+      "epoch": 0.3213675213675214,
+      "grad_norm": 0.7213220596313477,
+      "learning_rate": 4.4643874643874646e-05,
+      "loss": 0.6765,
+      "step": 940
+    },
+    {
+      "epoch": 0.3247863247863248,
+      "grad_norm": 0.44362661242485046,
+      "learning_rate": 4.458689458689459e-05,
+      "loss": 0.6249,
+      "step": 950
+    },
+    {
+      "epoch": 0.3282051282051282,
+      "grad_norm": 0.4917695224285126,
+      "learning_rate": 4.452991452991453e-05,
+      "loss": 0.63,
+      "step": 960
+    },
+    {
+      "epoch": 0.3316239316239316,
+      "grad_norm": 0.709846556186676,
+      "learning_rate": 4.4472934472934475e-05,
+      "loss": 0.5544,
+      "step": 970
+    },
+    {
+      "epoch": 0.335042735042735,
+      "grad_norm": 1.065099835395813,
+      "learning_rate": 4.441595441595442e-05,
+      "loss": 0.6338,
+      "step": 980
+    },
+    {
+      "epoch": 0.3384615384615385,
+      "grad_norm": 0.42223694920539856,
+      "learning_rate": 4.435897435897436e-05,
+      "loss": 0.5828,
+      "step": 990
+    },
+    {
+      "epoch": 0.3418803418803419,
+      "grad_norm": 1.5173028707504272,
+      "learning_rate": 4.4301994301994304e-05,
+      "loss": 0.6229,
+      "step": 1000
+    },
+    {
+      "epoch": 0.3418803418803419,
+      "eval_accuracy": 0.661082143772972,
+      "eval_f1": 0.0,
+      "eval_loss": 0.6458322405815125,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_roc_auc": 0.5011399036892176,
+      "eval_runtime": 36.5197,
+      "eval_samples_per_second": 320.347,
+      "eval_steps_per_second": 20.044,
+      "step": 1000
+    },
+    {
+      "epoch": 0.3452991452991453,
+      "grad_norm": 0.8043766617774963,
+      "learning_rate": 4.424501424501425e-05,
+      "loss": 0.6463,
+      "step": 1010
+    },
+    {
+      "epoch": 0.3487179487179487,
+      "grad_norm": 0.6817493438720703,
+      "learning_rate": 4.418803418803419e-05,
+      "loss": 0.6266,
+      "step": 1020
+    },
+    {
+      "epoch": 0.35213675213675216,
+      "grad_norm": 0.6765307784080505,
+      "learning_rate": 4.413105413105413e-05,
+      "loss": 0.6203,
+      "step": 1030
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 0.6116905808448792,
+      "learning_rate": 4.4074074074074076e-05,
+      "loss": 0.5933,
+      "step": 1040
+    },
+    {
+      "epoch": 0.358974358974359,
+      "grad_norm": 0.3634931445121765,
+      "learning_rate": 4.401709401709402e-05,
+      "loss": 0.6612,
+      "step": 1050
+    },
+    {
+      "epoch": 0.3623931623931624,
+      "grad_norm": 0.8377366065979004,
+      "learning_rate": 4.396011396011396e-05,
+      "loss": 0.6933,
+      "step": 1060
+    },
+    {
+      "epoch": 0.3658119658119658,
+      "grad_norm": 0.7808057069778442,
+      "learning_rate": 4.3903133903133905e-05,
+      "loss": 0.6101,
+      "step": 1070
+    },
+    {
+      "epoch": 0.36923076923076925,
+      "grad_norm": 0.5020534992218018,
+      "learning_rate": 4.384615384615385e-05,
+      "loss": 0.6333,
+      "step": 1080
+    },
+    {
+      "epoch": 0.37264957264957266,
+      "grad_norm": 0.9217988848686218,
+      "learning_rate": 4.378917378917379e-05,
+      "loss": 0.652,
+      "step": 1090
+    },
+    {
+      "epoch": 0.37606837606837606,
+      "grad_norm": 0.426917165517807,
+      "learning_rate": 4.3732193732193733e-05,
+      "loss": 0.6776,
+      "step": 1100
+    },
+    {
+      "epoch": 0.37948717948717947,
+      "grad_norm": 1.00786292552948,
+      "learning_rate": 4.3675213675213676e-05,
+      "loss": 0.6308,
+      "step": 1110
+    },
+    {
+      "epoch": 0.38290598290598293,
+      "grad_norm": 0.5222122669219971,
+      "learning_rate": 4.361823361823362e-05,
+      "loss": 0.5881,
+      "step": 1120
+    },
+    {
+      "epoch": 0.38632478632478634,
+      "grad_norm": 1.309751272201538,
+      "learning_rate": 4.356125356125356e-05,
+      "loss": 0.6988,
+      "step": 1130
+    },
+    {
+      "epoch": 0.38974358974358975,
+      "grad_norm": 0.5627844929695129,
+      "learning_rate": 4.3504273504273505e-05,
+      "loss": 0.6396,
+      "step": 1140
+    },
+    {
+      "epoch": 0.39316239316239315,
+      "grad_norm": 0.40362900495529175,
+      "learning_rate": 4.344729344729345e-05,
+      "loss": 0.639,
+      "step": 1150
+    },
+    {
+      "epoch": 0.39658119658119656,
+      "grad_norm": 0.632331371307373,
+      "learning_rate": 4.339031339031339e-05,
+      "loss": 0.6187,
+      "step": 1160
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 1.1355897188186646,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.6317,
+      "step": 1170
+    },
+    {
+      "epoch": 0.40341880341880343,
+      "grad_norm": 0.8610725998878479,
+      "learning_rate": 4.327635327635328e-05,
+      "loss": 0.631,
+      "step": 1180
+    },
+    {
+      "epoch": 0.40683760683760684,
+      "grad_norm": 0.6825465559959412,
+      "learning_rate": 4.321937321937322e-05,
+      "loss": 0.6825,
+      "step": 1190
+    },
+    {
+      "epoch": 0.41025641025641024,
+      "grad_norm": 1.3887457847595215,
+      "learning_rate": 4.316239316239317e-05,
+      "loss": 0.6221,
+      "step": 1200
+    },
+    {
+      "epoch": 0.41367521367521365,
+      "grad_norm": 0.5809090733528137,
+      "learning_rate": 4.3105413105413106e-05,
+      "loss": 0.6117,
+      "step": 1210
+    },
+    {
+      "epoch": 0.4170940170940171,
+      "grad_norm": 0.4157603681087494,
+      "learning_rate": 4.304843304843305e-05,
+      "loss": 0.613,
+      "step": 1220
+    },
+    {
+      "epoch": 0.4205128205128205,
+      "grad_norm": 0.4386206269264221,
+      "learning_rate": 4.2991452991453e-05,
+      "loss": 0.6458,
+      "step": 1230
+    },
+    {
+      "epoch": 0.4239316239316239,
+      "grad_norm": 1.4249426126480103,
+      "learning_rate": 4.2934472934472935e-05,
+      "loss": 0.66,
+      "step": 1240
+    },
+    {
+      "epoch": 0.42735042735042733,
+      "grad_norm": 1.3717528581619263,
+      "learning_rate": 4.287749287749288e-05,
+      "loss": 0.6497,
+      "step": 1250
+    },
+    {
+      "epoch": 0.4307692307692308,
+      "grad_norm": 0.6880800724029541,
+      "learning_rate": 4.282051282051282e-05,
+      "loss": 0.6231,
+      "step": 1260
+    },
+    {
+      "epoch": 0.4341880341880342,
+      "grad_norm": 0.9455773234367371,
+      "learning_rate": 4.2763532763532764e-05,
+      "loss": 0.6524,
+      "step": 1270
+    },
+    {
+      "epoch": 0.4376068376068376,
+      "grad_norm": 1.2795006036758423,
+      "learning_rate": 4.270655270655271e-05,
+      "loss": 0.6039,
+      "step": 1280
+    },
+    {
+      "epoch": 0.441025641025641,
+      "grad_norm": 0.4846753776073456,
+      "learning_rate": 4.264957264957265e-05,
+      "loss": 0.6066,
+      "step": 1290
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 0.49425560235977173,
+      "learning_rate": 4.259259259259259e-05,
+      "loss": 0.6545,
+      "step": 1300
+    },
+    {
+      "epoch": 0.4478632478632479,
+      "grad_norm": 0.924453854560852,
+      "learning_rate": 4.253561253561254e-05,
+      "loss": 0.6406,
+      "step": 1310
+    },
+    {
+      "epoch": 0.4512820512820513,
+      "grad_norm": 0.46777766942977905,
+      "learning_rate": 4.247863247863248e-05,
+      "loss": 0.6275,
+      "step": 1320
+    },
+    {
+      "epoch": 0.4547008547008547,
+      "grad_norm": 0.7829861044883728,
+      "learning_rate": 4.242165242165243e-05,
+      "loss": 0.6445,
+      "step": 1330
+    },
+    {
+      "epoch": 0.4581196581196581,
+      "grad_norm": 0.6596978306770325,
+      "learning_rate": 4.2364672364672364e-05,
+      "loss": 0.648,
+      "step": 1340
+    },
+    {
+      "epoch": 0.46153846153846156,
+      "grad_norm": 0.9732853770256042,
+      "learning_rate": 4.230769230769231e-05,
+      "loss": 0.6738,
+      "step": 1350
+    },
+    {
+      "epoch": 0.46495726495726497,
+      "grad_norm": 0.4845993220806122,
+      "learning_rate": 4.225071225071226e-05,
+      "loss": 0.6464,
+      "step": 1360
+    },
+    {
+      "epoch": 0.4683760683760684,
+      "grad_norm": 0.40009310841560364,
+      "learning_rate": 4.219373219373219e-05,
+      "loss": 0.6193,
+      "step": 1370
+    },
+    {
+      "epoch": 0.4717948717948718,
+      "grad_norm": 1.296000361442566,
+      "learning_rate": 4.2136752136752136e-05,
+      "loss": 0.608,
+      "step": 1380
+    },
+    {
+      "epoch": 0.4752136752136752,
+      "grad_norm": 0.3851681351661682,
+      "learning_rate": 4.2079772079772086e-05,
+      "loss": 0.636,
+      "step": 1390
+    },
+    {
+      "epoch": 0.47863247863247865,
+      "grad_norm": 1.5586471557617188,
+      "learning_rate": 4.202279202279202e-05,
+      "loss": 0.652,
+      "step": 1400
+    },
+    {
+      "epoch": 0.48205128205128206,
+      "grad_norm": 1.1093754768371582,
+      "learning_rate": 4.196581196581197e-05,
+      "loss": 0.6397,
+      "step": 1410
+    },
+    {
+      "epoch": 0.48547008547008547,
+      "grad_norm": 0.6494556665420532,
+      "learning_rate": 4.190883190883191e-05,
+      "loss": 0.6691,
+      "step": 1420
+    },
+    {
+      "epoch": 0.4888888888888889,
+      "grad_norm": 0.6842040419578552,
+      "learning_rate": 4.185185185185185e-05,
+      "loss": 0.653,
+      "step": 1430
+    },
+    {
+      "epoch": 0.49230769230769234,
+      "grad_norm": 0.39208441972732544,
+      "learning_rate": 4.17948717948718e-05,
+      "loss": 0.6303,
+      "step": 1440
+    },
+    {
+      "epoch": 0.49572649572649574,
+      "grad_norm": 0.3755127787590027,
+      "learning_rate": 4.1737891737891737e-05,
+      "loss": 0.6619,
+      "step": 1450
+    },
+    {
+      "epoch": 0.49914529914529915,
+      "grad_norm": 0.3358234167098999,
+      "learning_rate": 4.168091168091168e-05,
+      "loss": 0.6782,
+      "step": 1460
+    },
+    {
+      "epoch": 0.5025641025641026,
+      "grad_norm": 0.30498063564300537,
+      "learning_rate": 4.162393162393163e-05,
+      "loss": 0.6582,
+      "step": 1470
+    },
+    {
+      "epoch": 0.505982905982906,
+      "grad_norm": 0.7140593528747559,
+      "learning_rate": 4.1566951566951565e-05,
+      "loss": 0.6749,
+      "step": 1480
+    },
+    {
+      "epoch": 0.5094017094017094,
+      "grad_norm": 0.4288971424102783,
+      "learning_rate": 4.1509971509971515e-05,
+      "loss": 0.6355,
+      "step": 1490
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 0.8717936277389526,
+      "learning_rate": 4.145299145299146e-05,
+      "loss": 0.6258,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "eval_accuracy": 0.661082143772972,
+      "eval_f1": 0.0,
+      "eval_loss": 0.6402843594551086,
+      "eval_precision": 0.0,
+      "eval_recall": 0.0,
+      "eval_roc_auc": 0.5119639749280213,
+      "eval_runtime": 35.8008,
+      "eval_samples_per_second": 326.78,
+      "eval_steps_per_second": 20.446,
+      "step": 1500
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.001
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 794804391936000.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null