End of training

Browse files

Files changed (4) hide show

all_results.json +8 -0
runs/May26_05-44-34_e54c14a66b63/events.out.tfevents.1748246726.e54c14a66b63.2476.1 +2 -2
test_results.json +8 -0
trainer_state.json +911 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.1,
+    "eval_accuracy": 0.8502994011976048,
+    "eval_loss": 0.6543134450912476,
+    "eval_runtime": 119.6956,
+    "eval_samples_per_second": 4.186,
+    "eval_steps_per_second": 0.526
+}

runs/May26_05-44-34_e54c14a66b63/events.out.tfevents.1748246726.e54c14a66b63.2476.1 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8d3fcc4f1f96c3f2c387d983c2c08e336153d1a3531c5323870793572a0c097
-size 411

 version https://git-lfs.github.com/spec/v1
+oid sha256:12c313847dc678add0e6340d243661b895b4f8e6a0c74c00852b20ce6316468f
+size 734

test_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.1,
+    "eval_accuracy": 0.8502994011976048,
+    "eval_loss": 0.6543134450912476,
+    "eval_runtime": 119.6956,
+    "eval_samples_per_second": 4.186,
+    "eval_steps_per_second": 0.526
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,911 @@

+{
+  "best_global_step": 576,
+  "best_metric": 0.8495934959349594,
+  "best_model_checkpoint": "ALL_RGBCROP_Aug16F-8B16F/checkpoint-576",
+  "epoch": 3.1,
+  "eval_steps": 500,
+  "global_step": 1152,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.003472222222222222,
+      "grad_norm": 4.709033489227295,
+      "learning_rate": 1.5625e-06,
+      "loss": 0.712,
+      "step": 10
+    },
+    {
+      "epoch": 0.006944444444444444,
+      "grad_norm": 5.104576587677002,
+      "learning_rate": 3.2986111111111115e-06,
+      "loss": 0.7204,
+      "step": 20
+    },
+    {
+      "epoch": 0.010416666666666666,
+      "grad_norm": 4.670226573944092,
+      "learning_rate": 5.034722222222222e-06,
+      "loss": 0.7194,
+      "step": 30
+    },
+    {
+      "epoch": 0.013888888888888888,
+      "grad_norm": 5.235384941101074,
+      "learning_rate": 6.770833333333333e-06,
+      "loss": 0.6691,
+      "step": 40
+    },
+    {
+      "epoch": 0.017361111111111112,
+      "grad_norm": 5.082781791687012,
+      "learning_rate": 8.506944444444445e-06,
+      "loss": 0.6538,
+      "step": 50
+    },
+    {
+      "epoch": 0.020833333333333332,
+      "grad_norm": 4.47755765914917,
+      "learning_rate": 1.0243055555555556e-05,
+      "loss": 0.6051,
+      "step": 60
+    },
+    {
+      "epoch": 0.024305555555555556,
+      "grad_norm": 7.663668632507324,
+      "learning_rate": 1.1979166666666667e-05,
+      "loss": 0.6125,
+      "step": 70
+    },
+    {
+      "epoch": 0.027777777777777776,
+      "grad_norm": 6.4383978843688965,
+      "learning_rate": 1.371527777777778e-05,
+      "loss": 0.5258,
+      "step": 80
+    },
+    {
+      "epoch": 0.03125,
+      "grad_norm": 6.019410610198975,
+      "learning_rate": 1.545138888888889e-05,
+      "loss": 0.532,
+      "step": 90
+    },
+    {
+      "epoch": 0.034722222222222224,
+      "grad_norm": 6.127612590789795,
+      "learning_rate": 1.71875e-05,
+      "loss": 0.4173,
+      "step": 100
+    },
+    {
+      "epoch": 0.03819444444444445,
+      "grad_norm": 6.65797233581543,
+      "learning_rate": 1.8923611111111112e-05,
+      "loss": 0.4882,
+      "step": 110
+    },
+    {
+      "epoch": 0.041666666666666664,
+      "grad_norm": 8.373489379882812,
+      "learning_rate": 2.0659722222222223e-05,
+      "loss": 0.4912,
+      "step": 120
+    },
+    {
+      "epoch": 0.04513888888888889,
+      "grad_norm": 5.64392614364624,
+      "learning_rate": 2.2395833333333337e-05,
+      "loss": 0.3324,
+      "step": 130
+    },
+    {
+      "epoch": 0.04861111111111111,
+      "grad_norm": 6.752640724182129,
+      "learning_rate": 2.4131944444444448e-05,
+      "loss": 0.3179,
+      "step": 140
+    },
+    {
+      "epoch": 0.052083333333333336,
+      "grad_norm": 6.095239162445068,
+      "learning_rate": 2.5868055555555558e-05,
+      "loss": 0.2183,
+      "step": 150
+    },
+    {
+      "epoch": 0.05555555555555555,
+      "grad_norm": 11.378706932067871,
+      "learning_rate": 2.760416666666667e-05,
+      "loss": 0.2803,
+      "step": 160
+    },
+    {
+      "epoch": 0.059027777777777776,
+      "grad_norm": 3.435918092727661,
+      "learning_rate": 2.934027777777778e-05,
+      "loss": 0.2328,
+      "step": 170
+    },
+    {
+      "epoch": 0.0625,
+      "grad_norm": 9.458333015441895,
+      "learning_rate": 3.107638888888889e-05,
+      "loss": 0.2167,
+      "step": 180
+    },
+    {
+      "epoch": 0.06597222222222222,
+      "grad_norm": 36.986854553222656,
+      "learning_rate": 3.2812500000000005e-05,
+      "loss": 0.2547,
+      "step": 190
+    },
+    {
+      "epoch": 0.06944444444444445,
+      "grad_norm": 9.727249145507812,
+      "learning_rate": 3.454861111111111e-05,
+      "loss": 0.2211,
+      "step": 200
+    },
+    {
+      "epoch": 0.07291666666666667,
+      "grad_norm": 8.362494468688965,
+      "learning_rate": 3.628472222222222e-05,
+      "loss": 0.2203,
+      "step": 210
+    },
+    {
+      "epoch": 0.0763888888888889,
+      "grad_norm": 8.82453727722168,
+      "learning_rate": 3.8020833333333334e-05,
+      "loss": 0.1637,
+      "step": 220
+    },
+    {
+      "epoch": 0.0798611111111111,
+      "grad_norm": 4.613597393035889,
+      "learning_rate": 3.975694444444444e-05,
+      "loss": 0.3848,
+      "step": 230
+    },
+    {
+      "epoch": 0.08333333333333333,
+      "grad_norm": 12.759675025939941,
+      "learning_rate": 4.149305555555556e-05,
+      "loss": 0.2376,
+      "step": 240
+    },
+    {
+      "epoch": 0.08680555555555555,
+      "grad_norm": 1.1166067123413086,
+      "learning_rate": 4.322916666666667e-05,
+      "loss": 0.2498,
+      "step": 250
+    },
+    {
+      "epoch": 0.09027777777777778,
+      "grad_norm": 6.368658542633057,
+      "learning_rate": 4.4965277777777784e-05,
+      "loss": 0.155,
+      "step": 260
+    },
+    {
+      "epoch": 0.09375,
+      "grad_norm": 28.25998306274414,
+      "learning_rate": 4.670138888888889e-05,
+      "loss": 0.3182,
+      "step": 270
+    },
+    {
+      "epoch": 0.09722222222222222,
+      "grad_norm": 9.867981910705566,
+      "learning_rate": 4.8437500000000005e-05,
+      "loss": 0.3887,
+      "step": 280
+    },
+    {
+      "epoch": 0.1,
+      "eval_accuracy": 0.8109756097560976,
+      "eval_loss": 0.6568520665168762,
+      "eval_runtime": 553.1651,
+      "eval_samples_per_second": 0.889,
+      "eval_steps_per_second": 0.112,
+      "step": 288
+    },
+    {
+      "epoch": 1.0006944444444446,
+      "grad_norm": 11.57461929321289,
+      "learning_rate": 4.9980709876543215e-05,
+      "loss": 0.1709,
+      "step": 290
+    },
+    {
+      "epoch": 1.0041666666666667,
+      "grad_norm": 15.742400169372559,
+      "learning_rate": 4.978780864197531e-05,
+      "loss": 0.1815,
+      "step": 300
+    },
+    {
+      "epoch": 1.007638888888889,
+      "grad_norm": 0.1386760175228119,
+      "learning_rate": 4.959490740740741e-05,
+      "loss": 0.0484,
+      "step": 310
+    },
+    {
+      "epoch": 1.011111111111111,
+      "grad_norm": 0.07092873752117157,
+      "learning_rate": 4.940200617283951e-05,
+      "loss": 0.0473,
+      "step": 320
+    },
+    {
+      "epoch": 1.0145833333333334,
+      "grad_norm": 9.820779800415039,
+      "learning_rate": 4.92091049382716e-05,
+      "loss": 0.1945,
+      "step": 330
+    },
+    {
+      "epoch": 1.0180555555555555,
+      "grad_norm": 18.48552131652832,
+      "learning_rate": 4.9016203703703705e-05,
+      "loss": 0.0859,
+      "step": 340
+    },
+    {
+      "epoch": 1.0215277777777778,
+      "grad_norm": 0.23785443603992462,
+      "learning_rate": 4.882330246913581e-05,
+      "loss": 0.2122,
+      "step": 350
+    },
+    {
+      "epoch": 1.025,
+      "grad_norm": 0.36233341693878174,
+      "learning_rate": 4.86304012345679e-05,
+      "loss": 0.066,
+      "step": 360
+    },
+    {
+      "epoch": 1.0284722222222222,
+      "grad_norm": 1.8070086240768433,
+      "learning_rate": 4.8437500000000005e-05,
+      "loss": 0.0808,
+      "step": 370
+    },
+    {
+      "epoch": 1.0319444444444446,
+      "grad_norm": 6.958967685699463,
+      "learning_rate": 4.82445987654321e-05,
+      "loss": 0.155,
+      "step": 380
+    },
+    {
+      "epoch": 1.0354166666666667,
+      "grad_norm": 7.139225959777832,
+      "learning_rate": 4.8051697530864196e-05,
+      "loss": 0.0155,
+      "step": 390
+    },
+    {
+      "epoch": 1.038888888888889,
+      "grad_norm": 35.38884353637695,
+      "learning_rate": 4.78587962962963e-05,
+      "loss": 0.1301,
+      "step": 400
+    },
+    {
+      "epoch": 1.042361111111111,
+      "grad_norm": 0.03199063986539841,
+      "learning_rate": 4.76658950617284e-05,
+      "loss": 0.269,
+      "step": 410
+    },
+    {
+      "epoch": 1.0458333333333334,
+      "grad_norm": 18.229887008666992,
+      "learning_rate": 4.7472993827160496e-05,
+      "loss": 0.2097,
+      "step": 420
+    },
+    {
+      "epoch": 1.0493055555555555,
+      "grad_norm": 14.07356071472168,
+      "learning_rate": 4.72800925925926e-05,
+      "loss": 0.2068,
+      "step": 430
+    },
+    {
+      "epoch": 1.0527777777777778,
+      "grad_norm": 12.532169342041016,
+      "learning_rate": 4.708719135802469e-05,
+      "loss": 0.0579,
+      "step": 440
+    },
+    {
+      "epoch": 1.05625,
+      "grad_norm": 0.1333385556936264,
+      "learning_rate": 4.689429012345679e-05,
+      "loss": 0.0325,
+      "step": 450
+    },
+    {
+      "epoch": 1.0597222222222222,
+      "grad_norm": 0.01660008542239666,
+      "learning_rate": 4.670138888888889e-05,
+      "loss": 0.0696,
+      "step": 460
+    },
+    {
+      "epoch": 1.0631944444444446,
+      "grad_norm": 6.17806339263916,
+      "learning_rate": 4.650848765432099e-05,
+      "loss": 0.0105,
+      "step": 470
+    },
+    {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 15.657581329345703,
+      "learning_rate": 4.631558641975309e-05,
+      "loss": 0.1608,
+      "step": 480
+    },
+    {
+      "epoch": 1.070138888888889,
+      "grad_norm": 0.2093486487865448,
+      "learning_rate": 4.612268518518519e-05,
+      "loss": 0.1097,
+      "step": 490
+    },
+    {
+      "epoch": 1.073611111111111,
+      "grad_norm": 0.8768461346626282,
+      "learning_rate": 4.5929783950617286e-05,
+      "loss": 0.006,
+      "step": 500
+    },
+    {
+      "epoch": 1.0770833333333334,
+      "grad_norm": 0.07378463447093964,
+      "learning_rate": 4.573688271604938e-05,
+      "loss": 0.1396,
+      "step": 510
+    },
+    {
+      "epoch": 1.0805555555555555,
+      "grad_norm": 0.10317941755056381,
+      "learning_rate": 4.5543981481481484e-05,
+      "loss": 0.104,
+      "step": 520
+    },
+    {
+      "epoch": 1.0840277777777778,
+      "grad_norm": 30.956636428833008,
+      "learning_rate": 4.535108024691358e-05,
+      "loss": 0.1163,
+      "step": 530
+    },
+    {
+      "epoch": 1.0875,
+      "grad_norm": 0.4652070701122284,
+      "learning_rate": 4.515817901234568e-05,
+      "loss": 0.0798,
+      "step": 540
+    },
+    {
+      "epoch": 1.0909722222222222,
+      "grad_norm": 0.08043529838323593,
+      "learning_rate": 4.4965277777777784e-05,
+      "loss": 0.0093,
+      "step": 550
+    },
+    {
+      "epoch": 1.0944444444444446,
+      "grad_norm": 0.0071389698423445225,
+      "learning_rate": 4.477237654320988e-05,
+      "loss": 0.0612,
+      "step": 560
+    },
+    {
+      "epoch": 1.0979166666666667,
+      "grad_norm": 29.914186477661133,
+      "learning_rate": 4.4579475308641974e-05,
+      "loss": 0.0345,
+      "step": 570
+    },
+    {
+      "epoch": 1.1,
+      "eval_accuracy": 0.8495934959349594,
+      "eval_loss": 0.8915717005729675,
+      "eval_runtime": 117.2393,
+      "eval_samples_per_second": 4.197,
+      "eval_steps_per_second": 0.529,
+      "step": 576
+    },
+    {
+      "epoch": 2.001388888888889,
+      "grad_norm": 0.0068489667028188705,
+      "learning_rate": 4.4386574074074077e-05,
+      "loss": 0.0045,
+      "step": 580
+    },
+    {
+      "epoch": 2.004861111111111,
+      "grad_norm": 0.01323405746370554,
+      "learning_rate": 4.419367283950617e-05,
+      "loss": 0.0197,
+      "step": 590
+    },
+    {
+      "epoch": 2.0083333333333333,
+      "grad_norm": 7.431529998779297,
+      "learning_rate": 4.4000771604938274e-05,
+      "loss": 0.0124,
+      "step": 600
+    },
+    {
+      "epoch": 2.0118055555555556,
+      "grad_norm": 0.0020631514489650726,
+      "learning_rate": 4.3807870370370376e-05,
+      "loss": 0.0004,
+      "step": 610
+    },
+    {
+      "epoch": 2.015277777777778,
+      "grad_norm": 0.0061363764107227325,
+      "learning_rate": 4.361496913580247e-05,
+      "loss": 0.0008,
+      "step": 620
+    },
+    {
+      "epoch": 2.01875,
+      "grad_norm": 0.005492759868502617,
+      "learning_rate": 4.342206790123457e-05,
+      "loss": 0.0134,
+      "step": 630
+    },
+    {
+      "epoch": 2.022222222222222,
+      "grad_norm": 0.008418605662882328,
+      "learning_rate": 4.322916666666667e-05,
+      "loss": 0.0012,
+      "step": 640
+    },
+    {
+      "epoch": 2.0256944444444445,
+      "grad_norm": 0.23562128841876984,
+      "learning_rate": 4.3036265432098765e-05,
+      "loss": 0.0248,
+      "step": 650
+    },
+    {
+      "epoch": 2.029166666666667,
+      "grad_norm": 0.0033202702179551125,
+      "learning_rate": 4.284336419753087e-05,
+      "loss": 0.0005,
+      "step": 660
+    },
+    {
+      "epoch": 2.032638888888889,
+      "grad_norm": 0.011993384920060635,
+      "learning_rate": 4.265046296296297e-05,
+      "loss": 0.0003,
+      "step": 670
+    },
+    {
+      "epoch": 2.036111111111111,
+      "grad_norm": 0.02084469608962536,
+      "learning_rate": 4.2457561728395065e-05,
+      "loss": 0.001,
+      "step": 680
+    },
+    {
+      "epoch": 2.0395833333333333,
+      "grad_norm": 0.0016855947906151414,
+      "learning_rate": 4.226466049382716e-05,
+      "loss": 0.0004,
+      "step": 690
+    },
+    {
+      "epoch": 2.0430555555555556,
+      "grad_norm": 0.016722600907087326,
+      "learning_rate": 4.207175925925926e-05,
+      "loss": 0.0005,
+      "step": 700
+    },
+    {
+      "epoch": 2.046527777777778,
+      "grad_norm": 0.013101249001920223,
+      "learning_rate": 4.187885802469136e-05,
+      "loss": 0.0077,
+      "step": 710
+    },
+    {
+      "epoch": 2.05,
+      "grad_norm": 0.004911151714622974,
+      "learning_rate": 4.168595679012346e-05,
+      "loss": 0.0088,
+      "step": 720
+    },
+    {
+      "epoch": 2.053472222222222,
+      "grad_norm": 0.0035500626545399427,
+      "learning_rate": 4.149305555555556e-05,
+      "loss": 0.0004,
+      "step": 730
+    },
+    {
+      "epoch": 2.0569444444444445,
+      "grad_norm": 0.003059967188164592,
+      "learning_rate": 4.130015432098766e-05,
+      "loss": 0.0009,
+      "step": 740
+    },
+    {
+      "epoch": 2.060416666666667,
+      "grad_norm": 2.3829972743988037,
+      "learning_rate": 4.110725308641975e-05,
+      "loss": 0.0007,
+      "step": 750
+    },
+    {
+      "epoch": 2.063888888888889,
+      "grad_norm": 0.0023200404830276966,
+      "learning_rate": 4.0914351851851855e-05,
+      "loss": 0.0479,
+      "step": 760
+    },
+    {
+      "epoch": 2.067361111111111,
+      "grad_norm": 0.0032272222451865673,
+      "learning_rate": 4.072145061728395e-05,
+      "loss": 0.0001,
+      "step": 770
+    },
+    {
+      "epoch": 2.0708333333333333,
+      "grad_norm": 0.0031554449815303087,
+      "learning_rate": 4.052854938271605e-05,
+      "loss": 0.0102,
+      "step": 780
+    },
+    {
+      "epoch": 2.0743055555555556,
+      "grad_norm": 0.6856481432914734,
+      "learning_rate": 4.033564814814815e-05,
+      "loss": 0.0003,
+      "step": 790
+    },
+    {
+      "epoch": 2.077777777777778,
+      "grad_norm": 0.12966661155223846,
+      "learning_rate": 4.014274691358025e-05,
+      "loss": 0.0002,
+      "step": 800
+    },
+    {
+      "epoch": 2.08125,
+      "grad_norm": 0.005961376242339611,
+      "learning_rate": 3.9949845679012346e-05,
+      "loss": 0.1566,
+      "step": 810
+    },
+    {
+      "epoch": 2.084722222222222,
+      "grad_norm": 0.0027580575551837683,
+      "learning_rate": 3.975694444444444e-05,
+      "loss": 0.0522,
+      "step": 820
+    },
+    {
+      "epoch": 2.0881944444444445,
+      "grad_norm": 0.006767706014215946,
+      "learning_rate": 3.956404320987654e-05,
+      "loss": 0.0002,
+      "step": 830
+    },
+    {
+      "epoch": 2.091666666666667,
+      "grad_norm": 0.0025393294636160135,
+      "learning_rate": 3.9371141975308645e-05,
+      "loss": 0.0002,
+      "step": 840
+    },
+    {
+      "epoch": 2.095138888888889,
+      "grad_norm": 0.0009938559960573912,
+      "learning_rate": 3.917824074074074e-05,
+      "loss": 0.0001,
+      "step": 850
+    },
+    {
+      "epoch": 2.098611111111111,
+      "grad_norm": 0.001436732243746519,
+      "learning_rate": 3.898533950617284e-05,
+      "loss": 0.0117,
+      "step": 860
+    },
+    {
+      "epoch": 2.1,
+      "eval_accuracy": 0.8373983739837398,
+      "eval_loss": 1.0666677951812744,
+      "eval_runtime": 120.4352,
+      "eval_samples_per_second": 4.085,
+      "eval_steps_per_second": 0.515,
+      "step": 864
+    },
+    {
+      "epoch": 3.002083333333333,
+      "grad_norm": 0.03854840621352196,
+      "learning_rate": 3.879243827160494e-05,
+      "loss": 0.0022,
+      "step": 870
+    },
+    {
+      "epoch": 3.0055555555555555,
+      "grad_norm": 0.0014166207984089851,
+      "learning_rate": 3.8599537037037034e-05,
+      "loss": 0.0001,
+      "step": 880
+    },
+    {
+      "epoch": 3.009027777777778,
+      "grad_norm": 0.0014319154433906078,
+      "learning_rate": 3.8406635802469136e-05,
+      "loss": 0.0001,
+      "step": 890
+    },
+    {
+      "epoch": 3.0125,
+      "grad_norm": 0.0013770597288385034,
+      "learning_rate": 3.821373456790124e-05,
+      "loss": 0.0003,
+      "step": 900
+    },
+    {
+      "epoch": 3.015972222222222,
+      "grad_norm": 0.006136466283351183,
+      "learning_rate": 3.8020833333333334e-05,
+      "loss": 0.0001,
+      "step": 910
+    },
+    {
+      "epoch": 3.0194444444444444,
+      "grad_norm": 0.0012927391799166799,
+      "learning_rate": 3.7827932098765436e-05,
+      "loss": 0.0001,
+      "step": 920
+    },
+    {
+      "epoch": 3.0229166666666667,
+      "grad_norm": 0.0009090897510759532,
+      "learning_rate": 3.763503086419753e-05,
+      "loss": 0.0001,
+      "step": 930
+    },
+    {
+      "epoch": 3.026388888888889,
+      "grad_norm": 0.007072359789162874,
+      "learning_rate": 3.744212962962963e-05,
+      "loss": 0.0001,
+      "step": 940
+    },
+    {
+      "epoch": 3.029861111111111,
+      "grad_norm": 0.000913340481929481,
+      "learning_rate": 3.724922839506173e-05,
+      "loss": 0.0001,
+      "step": 950
+    },
+    {
+      "epoch": 3.033333333333333,
+      "grad_norm": 0.0012041399022564292,
+      "learning_rate": 3.705632716049383e-05,
+      "loss": 0.0001,
+      "step": 960
+    },
+    {
+      "epoch": 3.0368055555555555,
+      "grad_norm": 0.0008073053904809058,
+      "learning_rate": 3.6863425925925926e-05,
+      "loss": 0.0,
+      "step": 970
+    },
+    {
+      "epoch": 3.040277777777778,
+      "grad_norm": 0.0016801038291305304,
+      "learning_rate": 3.667052469135803e-05,
+      "loss": 0.0002,
+      "step": 980
+    },
+    {
+      "epoch": 3.04375,
+      "grad_norm": 2.7720425128936768,
+      "learning_rate": 3.647762345679013e-05,
+      "loss": 0.0095,
+      "step": 990
+    },
+    {
+      "epoch": 3.047222222222222,
+      "grad_norm": 0.0007318072021007538,
+      "learning_rate": 3.628472222222222e-05,
+      "loss": 0.0001,
+      "step": 1000
+    },
+    {
+      "epoch": 3.0506944444444444,
+      "grad_norm": 0.0022503375075757504,
+      "learning_rate": 3.609182098765432e-05,
+      "loss": 0.0005,
+      "step": 1010
+    },
+    {
+      "epoch": 3.0541666666666667,
+      "grad_norm": 0.0006915331468917429,
+      "learning_rate": 3.5898919753086424e-05,
+      "loss": 0.0001,
+      "step": 1020
+    },
+    {
+      "epoch": 3.057638888888889,
+      "grad_norm": 0.0023397556506097317,
+      "learning_rate": 3.570601851851852e-05,
+      "loss": 0.0002,
+      "step": 1030
+    },
+    {
+      "epoch": 3.061111111111111,
+      "grad_norm": 0.0009047880303114653,
+      "learning_rate": 3.551311728395062e-05,
+      "loss": 0.0028,
+      "step": 1040
+    },
+    {
+      "epoch": 3.064583333333333,
+      "grad_norm": 0.003123590722680092,
+      "learning_rate": 3.532021604938272e-05,
+      "loss": 0.0,
+      "step": 1050
+    },
+    {
+      "epoch": 3.0680555555555555,
+      "grad_norm": 0.003919905982911587,
+      "learning_rate": 3.512731481481481e-05,
+      "loss": 0.0002,
+      "step": 1060
+    },
+    {
+      "epoch": 3.071527777777778,
+      "grad_norm": 0.001332888612523675,
+      "learning_rate": 3.4934413580246915e-05,
+      "loss": 0.003,
+      "step": 1070
+    },
+    {
+      "epoch": 3.075,
+      "grad_norm": 0.000786742486525327,
+      "learning_rate": 3.474151234567901e-05,
+      "loss": 0.0219,
+      "step": 1080
+    },
+    {
+      "epoch": 3.078472222222222,
+      "grad_norm": 0.0009092329419218004,
+      "learning_rate": 3.454861111111111e-05,
+      "loss": 0.0001,
+      "step": 1090
+    },
+    {
+      "epoch": 3.0819444444444444,
+      "grad_norm": 0.022490588948130608,
+      "learning_rate": 3.4355709876543214e-05,
+      "loss": 0.0001,
+      "step": 1100
+    },
+    {
+      "epoch": 3.0854166666666667,
+      "grad_norm": 0.0010289773344993591,
+      "learning_rate": 3.416280864197531e-05,
+      "loss": 0.0,
+      "step": 1110
+    },
+    {
+      "epoch": 3.088888888888889,
+      "grad_norm": 0.03139325976371765,
+      "learning_rate": 3.396990740740741e-05,
+      "loss": 0.0,
+      "step": 1120
+    },
+    {
+      "epoch": 3.092361111111111,
+      "grad_norm": 1.5481621026992798,
+      "learning_rate": 3.377700617283951e-05,
+      "loss": 0.0007,
+      "step": 1130
+    },
+    {
+      "epoch": 3.095833333333333,
+      "grad_norm": 0.4089558720588684,
+      "learning_rate": 3.35841049382716e-05,
+      "loss": 0.0017,
+      "step": 1140
+    },
+    {
+      "epoch": 3.0993055555555555,
+      "grad_norm": 0.024518415331840515,
+      "learning_rate": 3.3391203703703705e-05,
+      "loss": 0.0001,
+      "step": 1150
+    },
+    {
+      "epoch": 3.1,
+      "eval_accuracy": 0.7764227642276422,
+      "eval_loss": 1.549495816230774,
+      "eval_runtime": 119.7871,
+      "eval_samples_per_second": 4.107,
+      "eval_steps_per_second": 0.518,
+      "step": 1152
+    },
+    {
+      "epoch": 3.1,
+      "step": 1152,
+      "total_flos": 1.1483731256571593e+19,
+      "train_loss": 0.12870611023062553,
+      "train_runtime": 7836.725,
+      "train_samples_per_second": 2.94,
+      "train_steps_per_second": 0.368
+    },
+    {
+      "epoch": 3.1,
+      "eval_accuracy": 0.8502994011976048,
+      "eval_loss": 0.6543134450912476,
+      "eval_runtime": 569.5516,
+      "eval_samples_per_second": 0.88,
+      "eval_steps_per_second": 0.111,
+      "step": 1152
+    },
+    {
+      "epoch": 3.1,
+      "eval_accuracy": 0.8502994011976048,
+      "eval_loss": 0.6543134450912476,
+      "eval_runtime": 119.6956,
+      "eval_samples_per_second": 4.186,
+      "eval_steps_per_second": 0.526,
+      "step": 1152
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2880,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 2,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 2
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1483731256571593e+19,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}