dermascan / model /trainer_state.json
manan05's picture
Add all
01353e5
{
"best_metric": 0.7673663168415792,
"best_model_checkpoint": "convnextv2-base-22k-224-finetuned-dermnet-large/checkpoint-9730",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 9730,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5.13874614594039e-07,
"loss": 2.0737,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.027749229188078e-06,
"loss": 1.9478,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 1.5416238437821174e-06,
"loss": 2.0247,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 2.055498458376156e-06,
"loss": 2.2624,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 2.5693730729701954e-06,
"loss": 1.9465,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 3.0832476875642348e-06,
"loss": 1.8196,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 3.5971223021582732e-06,
"loss": 1.7665,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 4.110996916752312e-06,
"loss": 1.7605,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 4.6248715313463515e-06,
"loss": 1.7962,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 5.138746145940391e-06,
"loss": 1.6105,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 5.65262076053443e-06,
"loss": 1.9133,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 6.1664953751284695e-06,
"loss": 1.5583,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 6.680369989722508e-06,
"loss": 1.7534,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 7.1942446043165465e-06,
"loss": 1.6163,
"step": 140
},
{
"epoch": 0.15,
"learning_rate": 7.708119218910587e-06,
"loss": 1.7714,
"step": 150
},
{
"epoch": 0.16,
"learning_rate": 8.221993833504624e-06,
"loss": 1.6438,
"step": 160
},
{
"epoch": 0.17,
"learning_rate": 8.735868448098665e-06,
"loss": 1.5433,
"step": 170
},
{
"epoch": 0.18,
"learning_rate": 9.249743062692703e-06,
"loss": 1.5969,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 9.763617677286742e-06,
"loss": 1.5838,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 1.0277492291880782e-05,
"loss": 1.6643,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 1.0791366906474821e-05,
"loss": 1.435,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 1.130524152106886e-05,
"loss": 1.357,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 1.1819116135662898e-05,
"loss": 1.3119,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 1.2332990750256939e-05,
"loss": 1.2994,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 1.2846865364850977e-05,
"loss": 1.3524,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 1.3360739979445016e-05,
"loss": 1.3284,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 1.3874614594039057e-05,
"loss": 1.3976,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 1.4388489208633093e-05,
"loss": 1.3794,
"step": 280
},
{
"epoch": 0.3,
"learning_rate": 1.4902363823227134e-05,
"loss": 1.3362,
"step": 290
},
{
"epoch": 0.31,
"learning_rate": 1.5416238437821173e-05,
"loss": 1.2241,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 1.593011305241521e-05,
"loss": 1.2597,
"step": 310
},
{
"epoch": 0.33,
"learning_rate": 1.644398766700925e-05,
"loss": 1.1817,
"step": 320
},
{
"epoch": 0.34,
"learning_rate": 1.695786228160329e-05,
"loss": 1.1404,
"step": 330
},
{
"epoch": 0.35,
"learning_rate": 1.747173689619733e-05,
"loss": 1.0917,
"step": 340
},
{
"epoch": 0.36,
"learning_rate": 1.7985611510791367e-05,
"loss": 1.233,
"step": 350
},
{
"epoch": 0.37,
"learning_rate": 1.8499486125385406e-05,
"loss": 1.1605,
"step": 360
},
{
"epoch": 0.38,
"learning_rate": 1.9013360739979445e-05,
"loss": 1.0923,
"step": 370
},
{
"epoch": 0.39,
"learning_rate": 1.9527235354573485e-05,
"loss": 1.12,
"step": 380
},
{
"epoch": 0.4,
"learning_rate": 2.0041109969167524e-05,
"loss": 1.0117,
"step": 390
},
{
"epoch": 0.41,
"learning_rate": 2.0554984583761563e-05,
"loss": 1.0255,
"step": 400
},
{
"epoch": 0.42,
"learning_rate": 2.1068859198355603e-05,
"loss": 1.0407,
"step": 410
},
{
"epoch": 0.43,
"learning_rate": 2.1582733812949642e-05,
"loss": 1.1729,
"step": 420
},
{
"epoch": 0.44,
"learning_rate": 2.209660842754368e-05,
"loss": 1.0569,
"step": 430
},
{
"epoch": 0.45,
"learning_rate": 2.261048304213772e-05,
"loss": 1.0047,
"step": 440
},
{
"epoch": 0.46,
"learning_rate": 2.312435765673176e-05,
"loss": 0.9317,
"step": 450
},
{
"epoch": 0.47,
"learning_rate": 2.3638232271325796e-05,
"loss": 0.7828,
"step": 460
},
{
"epoch": 0.48,
"learning_rate": 2.4152106885919835e-05,
"loss": 0.931,
"step": 470
},
{
"epoch": 0.49,
"learning_rate": 2.4665981500513878e-05,
"loss": 1.5736,
"step": 480
},
{
"epoch": 0.5,
"learning_rate": 2.5179856115107914e-05,
"loss": 2.1362,
"step": 490
},
{
"epoch": 0.51,
"learning_rate": 2.5693730729701953e-05,
"loss": 1.8763,
"step": 500
},
{
"epoch": 0.52,
"learning_rate": 2.6207605344295993e-05,
"loss": 1.8256,
"step": 510
},
{
"epoch": 0.53,
"learning_rate": 2.6721479958890032e-05,
"loss": 1.8017,
"step": 520
},
{
"epoch": 0.54,
"learning_rate": 2.7235354573484075e-05,
"loss": 1.7021,
"step": 530
},
{
"epoch": 0.55,
"learning_rate": 2.7749229188078114e-05,
"loss": 1.7462,
"step": 540
},
{
"epoch": 0.57,
"learning_rate": 2.8263103802672147e-05,
"loss": 1.9649,
"step": 550
},
{
"epoch": 0.58,
"learning_rate": 2.8776978417266186e-05,
"loss": 1.823,
"step": 560
},
{
"epoch": 0.59,
"learning_rate": 2.929085303186023e-05,
"loss": 1.6174,
"step": 570
},
{
"epoch": 0.6,
"learning_rate": 2.9804727646454268e-05,
"loss": 1.6988,
"step": 580
},
{
"epoch": 0.61,
"learning_rate": 3.0318602261048307e-05,
"loss": 1.5515,
"step": 590
},
{
"epoch": 0.62,
"learning_rate": 3.083247687564235e-05,
"loss": 2.0989,
"step": 600
},
{
"epoch": 0.63,
"learning_rate": 3.1346351490236386e-05,
"loss": 1.8226,
"step": 610
},
{
"epoch": 0.64,
"learning_rate": 3.186022610483042e-05,
"loss": 1.8034,
"step": 620
},
{
"epoch": 0.65,
"learning_rate": 3.237410071942446e-05,
"loss": 1.8546,
"step": 630
},
{
"epoch": 0.66,
"learning_rate": 3.28879753340185e-05,
"loss": 1.7997,
"step": 640
},
{
"epoch": 0.67,
"learning_rate": 3.340184994861254e-05,
"loss": 1.7317,
"step": 650
},
{
"epoch": 0.68,
"learning_rate": 3.391572456320658e-05,
"loss": 1.6874,
"step": 660
},
{
"epoch": 0.69,
"learning_rate": 3.442959917780062e-05,
"loss": 2.0054,
"step": 670
},
{
"epoch": 0.7,
"learning_rate": 3.494347379239466e-05,
"loss": 1.7331,
"step": 680
},
{
"epoch": 0.71,
"learning_rate": 3.5457348406988694e-05,
"loss": 1.8541,
"step": 690
},
{
"epoch": 0.72,
"learning_rate": 3.597122302158273e-05,
"loss": 1.8074,
"step": 700
},
{
"epoch": 0.73,
"learning_rate": 3.648509763617677e-05,
"loss": 1.888,
"step": 710
},
{
"epoch": 0.74,
"learning_rate": 3.699897225077081e-05,
"loss": 1.8043,
"step": 720
},
{
"epoch": 0.75,
"learning_rate": 3.751284686536485e-05,
"loss": 1.9325,
"step": 730
},
{
"epoch": 0.76,
"learning_rate": 3.802672147995889e-05,
"loss": 1.7977,
"step": 740
},
{
"epoch": 0.77,
"learning_rate": 3.854059609455293e-05,
"loss": 1.7316,
"step": 750
},
{
"epoch": 0.78,
"learning_rate": 3.905447070914697e-05,
"loss": 1.7019,
"step": 760
},
{
"epoch": 0.79,
"learning_rate": 3.956834532374101e-05,
"loss": 1.839,
"step": 770
},
{
"epoch": 0.8,
"learning_rate": 4.008221993833505e-05,
"loss": 1.5638,
"step": 780
},
{
"epoch": 0.81,
"learning_rate": 4.059609455292909e-05,
"loss": 1.7119,
"step": 790
},
{
"epoch": 0.82,
"learning_rate": 4.110996916752313e-05,
"loss": 1.5968,
"step": 800
},
{
"epoch": 0.83,
"learning_rate": 4.1623843782117166e-05,
"loss": 1.811,
"step": 810
},
{
"epoch": 0.84,
"learning_rate": 4.2137718396711205e-05,
"loss": 1.7695,
"step": 820
},
{
"epoch": 0.85,
"learning_rate": 4.265159301130524e-05,
"loss": 1.6472,
"step": 830
},
{
"epoch": 0.86,
"learning_rate": 4.3165467625899284e-05,
"loss": 1.9663,
"step": 840
},
{
"epoch": 0.87,
"learning_rate": 4.367934224049332e-05,
"loss": 1.8655,
"step": 850
},
{
"epoch": 0.88,
"learning_rate": 4.419321685508736e-05,
"loss": 1.7894,
"step": 860
},
{
"epoch": 0.89,
"learning_rate": 4.47070914696814e-05,
"loss": 1.7302,
"step": 870
},
{
"epoch": 0.9,
"learning_rate": 4.522096608427544e-05,
"loss": 1.8222,
"step": 880
},
{
"epoch": 0.91,
"learning_rate": 4.573484069886948e-05,
"loss": 1.6887,
"step": 890
},
{
"epoch": 0.92,
"learning_rate": 4.624871531346352e-05,
"loss": 1.4496,
"step": 900
},
{
"epoch": 0.94,
"learning_rate": 4.676258992805755e-05,
"loss": 1.6265,
"step": 910
},
{
"epoch": 0.95,
"learning_rate": 4.727646454265159e-05,
"loss": 1.7892,
"step": 920
},
{
"epoch": 0.96,
"learning_rate": 4.779033915724563e-05,
"loss": 1.7873,
"step": 930
},
{
"epoch": 0.97,
"learning_rate": 4.830421377183967e-05,
"loss": 1.6579,
"step": 940
},
{
"epoch": 0.98,
"learning_rate": 4.881808838643372e-05,
"loss": 1.7282,
"step": 950
},
{
"epoch": 0.99,
"learning_rate": 4.9331963001027756e-05,
"loss": 1.7008,
"step": 960
},
{
"epoch": 1.0,
"learning_rate": 4.9845837615621795e-05,
"loss": 1.7339,
"step": 970
},
{
"epoch": 1.0,
"eval_accuracy": 0.5409795102448776,
"eval_loss": 1.5397833585739136,
"eval_runtime": 103.1363,
"eval_samples_per_second": 38.803,
"eval_steps_per_second": 2.434,
"step": 973
},
{
"epoch": 1.01,
"learning_rate": 4.996003197442047e-05,
"loss": 1.6536,
"step": 980
},
{
"epoch": 1.02,
"learning_rate": 4.9902934795021126e-05,
"loss": 1.3931,
"step": 990
},
{
"epoch": 1.03,
"learning_rate": 4.9845837615621795e-05,
"loss": 1.4986,
"step": 1000
},
{
"epoch": 1.04,
"learning_rate": 4.978874043622245e-05,
"loss": 1.4395,
"step": 1010
},
{
"epoch": 1.05,
"learning_rate": 4.9731643256823115e-05,
"loss": 1.4261,
"step": 1020
},
{
"epoch": 1.06,
"learning_rate": 4.967454607742378e-05,
"loss": 1.3916,
"step": 1030
},
{
"epoch": 1.07,
"learning_rate": 4.961744889802444e-05,
"loss": 1.5453,
"step": 1040
},
{
"epoch": 1.08,
"learning_rate": 4.9560351718625104e-05,
"loss": 1.5387,
"step": 1050
},
{
"epoch": 1.09,
"learning_rate": 4.950325453922576e-05,
"loss": 1.6456,
"step": 1060
},
{
"epoch": 1.1,
"learning_rate": 4.944615735982643e-05,
"loss": 1.5404,
"step": 1070
},
{
"epoch": 1.11,
"learning_rate": 4.9389060180427086e-05,
"loss": 1.5847,
"step": 1080
},
{
"epoch": 1.12,
"learning_rate": 4.9331963001027756e-05,
"loss": 1.6313,
"step": 1090
},
{
"epoch": 1.13,
"learning_rate": 4.927486582162841e-05,
"loss": 1.5878,
"step": 1100
},
{
"epoch": 1.14,
"learning_rate": 4.9217768642229075e-05,
"loss": 1.3887,
"step": 1110
},
{
"epoch": 1.15,
"learning_rate": 4.916067146282974e-05,
"loss": 1.5207,
"step": 1120
},
{
"epoch": 1.16,
"learning_rate": 4.91035742834304e-05,
"loss": 1.7028,
"step": 1130
},
{
"epoch": 1.17,
"learning_rate": 4.9046477104031064e-05,
"loss": 1.4021,
"step": 1140
},
{
"epoch": 1.18,
"learning_rate": 4.898937992463172e-05,
"loss": 1.3693,
"step": 1150
},
{
"epoch": 1.19,
"learning_rate": 4.893228274523239e-05,
"loss": 1.4952,
"step": 1160
},
{
"epoch": 1.2,
"learning_rate": 4.887518556583305e-05,
"loss": 1.5499,
"step": 1170
},
{
"epoch": 1.21,
"learning_rate": 4.881808838643372e-05,
"loss": 1.3218,
"step": 1180
},
{
"epoch": 1.22,
"learning_rate": 4.876099120703437e-05,
"loss": 1.3657,
"step": 1190
},
{
"epoch": 1.23,
"learning_rate": 4.8703894027635036e-05,
"loss": 1.4617,
"step": 1200
},
{
"epoch": 1.24,
"learning_rate": 4.86467968482357e-05,
"loss": 1.3737,
"step": 1210
},
{
"epoch": 1.25,
"learning_rate": 4.858969966883636e-05,
"loss": 1.4115,
"step": 1220
},
{
"epoch": 1.26,
"learning_rate": 4.8532602489437025e-05,
"loss": 1.5131,
"step": 1230
},
{
"epoch": 1.27,
"learning_rate": 4.847550531003769e-05,
"loss": 1.2952,
"step": 1240
},
{
"epoch": 1.28,
"learning_rate": 4.841840813063835e-05,
"loss": 1.4604,
"step": 1250
},
{
"epoch": 1.29,
"learning_rate": 4.836131095123901e-05,
"loss": 1.3625,
"step": 1260
},
{
"epoch": 1.31,
"learning_rate": 4.830421377183967e-05,
"loss": 1.3646,
"step": 1270
},
{
"epoch": 1.32,
"learning_rate": 4.8247116592440334e-05,
"loss": 1.5038,
"step": 1280
},
{
"epoch": 1.33,
"learning_rate": 4.8190019413041e-05,
"loss": 1.3761,
"step": 1290
},
{
"epoch": 1.34,
"learning_rate": 4.813292223364166e-05,
"loss": 1.4109,
"step": 1300
},
{
"epoch": 1.35,
"learning_rate": 4.807582505424232e-05,
"loss": 1.4483,
"step": 1310
},
{
"epoch": 1.36,
"learning_rate": 4.8018727874842986e-05,
"loss": 1.519,
"step": 1320
},
{
"epoch": 1.37,
"learning_rate": 4.796163069544365e-05,
"loss": 1.3254,
"step": 1330
},
{
"epoch": 1.38,
"learning_rate": 4.790453351604431e-05,
"loss": 1.4592,
"step": 1340
},
{
"epoch": 1.39,
"learning_rate": 4.7847436336644975e-05,
"loss": 1.5981,
"step": 1350
},
{
"epoch": 1.4,
"learning_rate": 4.779033915724563e-05,
"loss": 1.4727,
"step": 1360
},
{
"epoch": 1.41,
"learning_rate": 4.77332419778463e-05,
"loss": 1.4419,
"step": 1370
},
{
"epoch": 1.42,
"learning_rate": 4.767614479844696e-05,
"loss": 1.4821,
"step": 1380
},
{
"epoch": 1.43,
"learning_rate": 4.761904761904762e-05,
"loss": 1.2204,
"step": 1390
},
{
"epoch": 1.44,
"learning_rate": 4.7561950439648283e-05,
"loss": 1.4889,
"step": 1400
},
{
"epoch": 1.45,
"learning_rate": 4.7504853260248946e-05,
"loss": 1.4497,
"step": 1410
},
{
"epoch": 1.46,
"learning_rate": 4.744775608084961e-05,
"loss": 1.2922,
"step": 1420
},
{
"epoch": 1.47,
"learning_rate": 4.7390658901450266e-05,
"loss": 1.3788,
"step": 1430
},
{
"epoch": 1.48,
"learning_rate": 4.7333561722050936e-05,
"loss": 1.3694,
"step": 1440
},
{
"epoch": 1.49,
"learning_rate": 4.727646454265159e-05,
"loss": 1.6264,
"step": 1450
},
{
"epoch": 1.5,
"learning_rate": 4.721936736325226e-05,
"loss": 1.3413,
"step": 1460
},
{
"epoch": 1.51,
"learning_rate": 4.716227018385292e-05,
"loss": 1.3771,
"step": 1470
},
{
"epoch": 1.52,
"learning_rate": 4.710517300445358e-05,
"loss": 1.4509,
"step": 1480
},
{
"epoch": 1.53,
"learning_rate": 4.7048075825054244e-05,
"loss": 1.2803,
"step": 1490
},
{
"epoch": 1.54,
"learning_rate": 4.699097864565491e-05,
"loss": 1.4746,
"step": 1500
},
{
"epoch": 1.55,
"learning_rate": 4.693388146625557e-05,
"loss": 1.5008,
"step": 1510
},
{
"epoch": 1.56,
"learning_rate": 4.6876784286856226e-05,
"loss": 1.4823,
"step": 1520
},
{
"epoch": 1.57,
"learning_rate": 4.6819687107456896e-05,
"loss": 1.3437,
"step": 1530
},
{
"epoch": 1.58,
"learning_rate": 4.676258992805755e-05,
"loss": 1.4004,
"step": 1540
},
{
"epoch": 1.59,
"learning_rate": 4.670549274865822e-05,
"loss": 1.3797,
"step": 1550
},
{
"epoch": 1.6,
"learning_rate": 4.664839556925888e-05,
"loss": 1.5489,
"step": 1560
},
{
"epoch": 1.61,
"learning_rate": 4.659129838985954e-05,
"loss": 1.3143,
"step": 1570
},
{
"epoch": 1.62,
"learning_rate": 4.6534201210460205e-05,
"loss": 1.4755,
"step": 1580
},
{
"epoch": 1.63,
"learning_rate": 4.647710403106087e-05,
"loss": 1.4333,
"step": 1590
},
{
"epoch": 1.64,
"learning_rate": 4.642000685166153e-05,
"loss": 1.4428,
"step": 1600
},
{
"epoch": 1.65,
"learning_rate": 4.6362909672262194e-05,
"loss": 1.2202,
"step": 1610
},
{
"epoch": 1.66,
"learning_rate": 4.630581249286286e-05,
"loss": 1.347,
"step": 1620
},
{
"epoch": 1.68,
"learning_rate": 4.624871531346352e-05,
"loss": 1.3844,
"step": 1630
},
{
"epoch": 1.69,
"learning_rate": 4.6191618134064176e-05,
"loss": 1.2675,
"step": 1640
},
{
"epoch": 1.7,
"learning_rate": 4.613452095466484e-05,
"loss": 1.3218,
"step": 1650
},
{
"epoch": 1.71,
"learning_rate": 4.60774237752655e-05,
"loss": 1.4464,
"step": 1660
},
{
"epoch": 1.72,
"learning_rate": 4.6020326595866165e-05,
"loss": 1.2809,
"step": 1670
},
{
"epoch": 1.73,
"learning_rate": 4.596322941646683e-05,
"loss": 1.2388,
"step": 1680
},
{
"epoch": 1.74,
"learning_rate": 4.590613223706749e-05,
"loss": 1.4035,
"step": 1690
},
{
"epoch": 1.75,
"learning_rate": 4.5849035057668155e-05,
"loss": 1.3908,
"step": 1700
},
{
"epoch": 1.76,
"learning_rate": 4.579193787826882e-05,
"loss": 1.1645,
"step": 1710
},
{
"epoch": 1.77,
"learning_rate": 4.573484069886948e-05,
"loss": 1.1677,
"step": 1720
},
{
"epoch": 1.78,
"learning_rate": 4.567774351947014e-05,
"loss": 1.4892,
"step": 1730
},
{
"epoch": 1.79,
"learning_rate": 4.562064634007081e-05,
"loss": 1.3076,
"step": 1740
},
{
"epoch": 1.8,
"learning_rate": 4.556354916067146e-05,
"loss": 1.3253,
"step": 1750
},
{
"epoch": 1.81,
"learning_rate": 4.5506451981272126e-05,
"loss": 1.4176,
"step": 1760
},
{
"epoch": 1.82,
"learning_rate": 4.544935480187279e-05,
"loss": 1.2577,
"step": 1770
},
{
"epoch": 1.83,
"learning_rate": 4.539225762247345e-05,
"loss": 1.3966,
"step": 1780
},
{
"epoch": 1.84,
"learning_rate": 4.5335160443074115e-05,
"loss": 1.282,
"step": 1790
},
{
"epoch": 1.85,
"learning_rate": 4.527806326367477e-05,
"loss": 1.5133,
"step": 1800
},
{
"epoch": 1.86,
"learning_rate": 4.522096608427544e-05,
"loss": 1.263,
"step": 1810
},
{
"epoch": 1.87,
"learning_rate": 4.51638689048761e-05,
"loss": 1.4085,
"step": 1820
},
{
"epoch": 1.88,
"learning_rate": 4.510677172547677e-05,
"loss": 1.2634,
"step": 1830
},
{
"epoch": 1.89,
"learning_rate": 4.5049674546077424e-05,
"loss": 1.184,
"step": 1840
},
{
"epoch": 1.9,
"learning_rate": 4.4992577366678094e-05,
"loss": 1.2223,
"step": 1850
},
{
"epoch": 1.91,
"learning_rate": 4.493548018727875e-05,
"loss": 1.3867,
"step": 1860
},
{
"epoch": 1.92,
"learning_rate": 4.487838300787941e-05,
"loss": 1.1988,
"step": 1870
},
{
"epoch": 1.93,
"learning_rate": 4.4821285828480076e-05,
"loss": 1.3555,
"step": 1880
},
{
"epoch": 1.94,
"learning_rate": 4.476418864908073e-05,
"loss": 1.1373,
"step": 1890
},
{
"epoch": 1.95,
"learning_rate": 4.47070914696814e-05,
"loss": 1.3534,
"step": 1900
},
{
"epoch": 1.96,
"learning_rate": 4.464999429028206e-05,
"loss": 1.287,
"step": 1910
},
{
"epoch": 1.97,
"learning_rate": 4.459289711088273e-05,
"loss": 1.2433,
"step": 1920
},
{
"epoch": 1.98,
"learning_rate": 4.4535799931483384e-05,
"loss": 1.5231,
"step": 1930
},
{
"epoch": 1.99,
"learning_rate": 4.447870275208405e-05,
"loss": 1.2566,
"step": 1940
},
{
"epoch": 2.0,
"eval_accuracy": 0.627936031984008,
"eval_loss": 1.2302356958389282,
"eval_runtime": 78.2932,
"eval_samples_per_second": 51.116,
"eval_steps_per_second": 3.206,
"step": 1946
},
{
"epoch": 2.0,
"learning_rate": 4.442160557268471e-05,
"loss": 1.2635,
"step": 1950
},
{
"epoch": 2.01,
"learning_rate": 4.4364508393285373e-05,
"loss": 1.06,
"step": 1960
},
{
"epoch": 2.02,
"learning_rate": 4.4307411213886037e-05,
"loss": 1.0296,
"step": 1970
},
{
"epoch": 2.03,
"learning_rate": 4.42503140344867e-05,
"loss": 0.9412,
"step": 1980
},
{
"epoch": 2.05,
"learning_rate": 4.419321685508736e-05,
"loss": 1.0654,
"step": 1990
},
{
"epoch": 2.06,
"learning_rate": 4.4136119675688026e-05,
"loss": 1.0277,
"step": 2000
},
{
"epoch": 2.07,
"learning_rate": 4.407902249628869e-05,
"loss": 1.1426,
"step": 2010
},
{
"epoch": 2.08,
"learning_rate": 4.4021925316889345e-05,
"loss": 1.0303,
"step": 2020
},
{
"epoch": 2.09,
"learning_rate": 4.396482813749001e-05,
"loss": 1.034,
"step": 2030
},
{
"epoch": 2.1,
"learning_rate": 4.390773095809067e-05,
"loss": 0.9853,
"step": 2040
},
{
"epoch": 2.11,
"learning_rate": 4.3850633778691334e-05,
"loss": 1.0274,
"step": 2050
},
{
"epoch": 2.12,
"learning_rate": 4.3793536599292e-05,
"loss": 1.2666,
"step": 2060
},
{
"epoch": 2.13,
"learning_rate": 4.373643941989266e-05,
"loss": 1.0593,
"step": 2070
},
{
"epoch": 2.14,
"learning_rate": 4.367934224049332e-05,
"loss": 1.1989,
"step": 2080
},
{
"epoch": 2.15,
"learning_rate": 4.3622245061093986e-05,
"loss": 0.8752,
"step": 2090
},
{
"epoch": 2.16,
"learning_rate": 4.356514788169464e-05,
"loss": 1.0088,
"step": 2100
},
{
"epoch": 2.17,
"learning_rate": 4.350805070229531e-05,
"loss": 0.9801,
"step": 2110
},
{
"epoch": 2.18,
"learning_rate": 4.345095352289597e-05,
"loss": 1.0773,
"step": 2120
},
{
"epoch": 2.19,
"learning_rate": 4.339385634349664e-05,
"loss": 0.9073,
"step": 2130
},
{
"epoch": 2.2,
"learning_rate": 4.3336759164097295e-05,
"loss": 1.0323,
"step": 2140
},
{
"epoch": 2.21,
"learning_rate": 4.327966198469796e-05,
"loss": 0.9896,
"step": 2150
},
{
"epoch": 2.22,
"learning_rate": 4.322256480529862e-05,
"loss": 1.0167,
"step": 2160
},
{
"epoch": 2.23,
"learning_rate": 4.3165467625899284e-05,
"loss": 0.9743,
"step": 2170
},
{
"epoch": 2.24,
"learning_rate": 4.310837044649995e-05,
"loss": 1.0166,
"step": 2180
},
{
"epoch": 2.25,
"learning_rate": 4.30512732671006e-05,
"loss": 1.134,
"step": 2190
},
{
"epoch": 2.26,
"learning_rate": 4.299417608770127e-05,
"loss": 1.1469,
"step": 2200
},
{
"epoch": 2.27,
"learning_rate": 4.293707890830193e-05,
"loss": 0.9072,
"step": 2210
},
{
"epoch": 2.28,
"learning_rate": 4.28799817289026e-05,
"loss": 1.0896,
"step": 2220
},
{
"epoch": 2.29,
"learning_rate": 4.2822884549503255e-05,
"loss": 1.1482,
"step": 2230
},
{
"epoch": 2.3,
"learning_rate": 4.276578737010392e-05,
"loss": 1.1971,
"step": 2240
},
{
"epoch": 2.31,
"learning_rate": 4.270869019070458e-05,
"loss": 1.1036,
"step": 2250
},
{
"epoch": 2.32,
"learning_rate": 4.265159301130524e-05,
"loss": 0.9868,
"step": 2260
},
{
"epoch": 2.33,
"learning_rate": 4.259449583190591e-05,
"loss": 0.9487,
"step": 2270
},
{
"epoch": 2.34,
"learning_rate": 4.2537398652506564e-05,
"loss": 0.9881,
"step": 2280
},
{
"epoch": 2.35,
"learning_rate": 4.2480301473107234e-05,
"loss": 1.1292,
"step": 2290
},
{
"epoch": 2.36,
"learning_rate": 4.242320429370789e-05,
"loss": 0.9419,
"step": 2300
},
{
"epoch": 2.37,
"learning_rate": 4.236610711430855e-05,
"loss": 1.0623,
"step": 2310
},
{
"epoch": 2.38,
"learning_rate": 4.2309009934909216e-05,
"loss": 1.1257,
"step": 2320
},
{
"epoch": 2.39,
"learning_rate": 4.225191275550988e-05,
"loss": 0.9547,
"step": 2330
},
{
"epoch": 2.4,
"learning_rate": 4.219481557611054e-05,
"loss": 0.9799,
"step": 2340
},
{
"epoch": 2.42,
"learning_rate": 4.2137718396711205e-05,
"loss": 0.8543,
"step": 2350
},
{
"epoch": 2.43,
"learning_rate": 4.208062121731187e-05,
"loss": 0.9839,
"step": 2360
},
{
"epoch": 2.44,
"learning_rate": 4.202352403791253e-05,
"loss": 1.1034,
"step": 2370
},
{
"epoch": 2.45,
"learning_rate": 4.1966426858513194e-05,
"loss": 0.9854,
"step": 2380
},
{
"epoch": 2.46,
"learning_rate": 4.190932967911385e-05,
"loss": 1.1817,
"step": 2390
},
{
"epoch": 2.47,
"learning_rate": 4.1852232499714514e-05,
"loss": 1.0864,
"step": 2400
},
{
"epoch": 2.48,
"learning_rate": 4.179513532031518e-05,
"loss": 1.2053,
"step": 2410
},
{
"epoch": 2.49,
"learning_rate": 4.173803814091584e-05,
"loss": 1.1929,
"step": 2420
},
{
"epoch": 2.5,
"learning_rate": 4.16809409615165e-05,
"loss": 1.187,
"step": 2430
},
{
"epoch": 2.51,
"learning_rate": 4.1623843782117166e-05,
"loss": 1.0885,
"step": 2440
},
{
"epoch": 2.52,
"learning_rate": 4.156674660271783e-05,
"loss": 0.8845,
"step": 2450
},
{
"epoch": 2.53,
"learning_rate": 4.150964942331849e-05,
"loss": 1.0721,
"step": 2460
},
{
"epoch": 2.54,
"learning_rate": 4.145255224391915e-05,
"loss": 1.005,
"step": 2470
},
{
"epoch": 2.55,
"learning_rate": 4.139545506451982e-05,
"loss": 1.0132,
"step": 2480
},
{
"epoch": 2.56,
"learning_rate": 4.1338357885120474e-05,
"loss": 1.0567,
"step": 2490
},
{
"epoch": 2.57,
"learning_rate": 4.1281260705721144e-05,
"loss": 0.9447,
"step": 2500
},
{
"epoch": 2.58,
"learning_rate": 4.12241635263218e-05,
"loss": 0.9953,
"step": 2510
},
{
"epoch": 2.59,
"learning_rate": 4.1167066346922464e-05,
"loss": 1.0493,
"step": 2520
},
{
"epoch": 2.6,
"learning_rate": 4.110996916752313e-05,
"loss": 1.0847,
"step": 2530
},
{
"epoch": 2.61,
"learning_rate": 4.105287198812379e-05,
"loss": 0.8683,
"step": 2540
},
{
"epoch": 2.62,
"learning_rate": 4.099577480872445e-05,
"loss": 0.9047,
"step": 2550
},
{
"epoch": 2.63,
"learning_rate": 4.093867762932511e-05,
"loss": 1.039,
"step": 2560
},
{
"epoch": 2.64,
"learning_rate": 4.088158044992578e-05,
"loss": 0.9386,
"step": 2570
},
{
"epoch": 2.65,
"learning_rate": 4.0824483270526435e-05,
"loss": 1.2603,
"step": 2580
},
{
"epoch": 2.66,
"learning_rate": 4.0767386091127105e-05,
"loss": 1.0625,
"step": 2590
},
{
"epoch": 2.67,
"learning_rate": 4.071028891172776e-05,
"loss": 1.0527,
"step": 2600
},
{
"epoch": 2.68,
"learning_rate": 4.0653191732328424e-05,
"loss": 0.965,
"step": 2610
},
{
"epoch": 2.69,
"learning_rate": 4.059609455292909e-05,
"loss": 0.7905,
"step": 2620
},
{
"epoch": 2.7,
"learning_rate": 4.0538997373529744e-05,
"loss": 0.9921,
"step": 2630
},
{
"epoch": 2.71,
"learning_rate": 4.048190019413041e-05,
"loss": 1.02,
"step": 2640
},
{
"epoch": 2.72,
"learning_rate": 4.042480301473107e-05,
"loss": 1.0053,
"step": 2650
},
{
"epoch": 2.73,
"learning_rate": 4.036770583533174e-05,
"loss": 1.1715,
"step": 2660
},
{
"epoch": 2.74,
"learning_rate": 4.0310608655932396e-05,
"loss": 0.9042,
"step": 2670
},
{
"epoch": 2.75,
"learning_rate": 4.0253511476533066e-05,
"loss": 1.0459,
"step": 2680
},
{
"epoch": 2.76,
"learning_rate": 4.019641429713372e-05,
"loss": 1.0154,
"step": 2690
},
{
"epoch": 2.77,
"learning_rate": 4.0139317117734385e-05,
"loss": 1.0837,
"step": 2700
},
{
"epoch": 2.79,
"learning_rate": 4.008221993833505e-05,
"loss": 0.9708,
"step": 2710
},
{
"epoch": 2.8,
"learning_rate": 4.002512275893571e-05,
"loss": 1.1382,
"step": 2720
},
{
"epoch": 2.81,
"learning_rate": 3.9968025579536374e-05,
"loss": 0.9227,
"step": 2730
},
{
"epoch": 2.82,
"learning_rate": 3.991092840013704e-05,
"loss": 1.1343,
"step": 2740
},
{
"epoch": 2.83,
"learning_rate": 3.98538312207377e-05,
"loss": 0.92,
"step": 2750
},
{
"epoch": 2.84,
"learning_rate": 3.9796734041338356e-05,
"loss": 1.0399,
"step": 2760
},
{
"epoch": 2.85,
"learning_rate": 3.973963686193902e-05,
"loss": 1.1288,
"step": 2770
},
{
"epoch": 2.86,
"learning_rate": 3.968253968253968e-05,
"loss": 0.9115,
"step": 2780
},
{
"epoch": 2.87,
"learning_rate": 3.9625442503140346e-05,
"loss": 1.1428,
"step": 2790
},
{
"epoch": 2.88,
"learning_rate": 3.956834532374101e-05,
"loss": 0.9391,
"step": 2800
},
{
"epoch": 2.89,
"learning_rate": 3.951124814434167e-05,
"loss": 0.9768,
"step": 2810
},
{
"epoch": 2.9,
"learning_rate": 3.9454150964942335e-05,
"loss": 0.9446,
"step": 2820
},
{
"epoch": 2.91,
"learning_rate": 3.9397053785543e-05,
"loss": 0.9711,
"step": 2830
},
{
"epoch": 2.92,
"learning_rate": 3.933995660614366e-05,
"loss": 0.9887,
"step": 2840
},
{
"epoch": 2.93,
"learning_rate": 3.9282859426744324e-05,
"loss": 1.0373,
"step": 2850
},
{
"epoch": 2.94,
"learning_rate": 3.922576224734498e-05,
"loss": 0.8822,
"step": 2860
},
{
"epoch": 2.95,
"learning_rate": 3.916866506794565e-05,
"loss": 1.1377,
"step": 2870
},
{
"epoch": 2.96,
"learning_rate": 3.9111567888546306e-05,
"loss": 0.8674,
"step": 2880
},
{
"epoch": 2.97,
"learning_rate": 3.905447070914697e-05,
"loss": 0.9848,
"step": 2890
},
{
"epoch": 2.98,
"learning_rate": 3.899737352974763e-05,
"loss": 1.0722,
"step": 2900
},
{
"epoch": 2.99,
"learning_rate": 3.8940276350348295e-05,
"loss": 1.1855,
"step": 2910
},
{
"epoch": 3.0,
"eval_accuracy": 0.6856571714142928,
"eval_loss": 1.036320447921753,
"eval_runtime": 86.7468,
"eval_samples_per_second": 46.134,
"eval_steps_per_second": 2.893,
"step": 2919
},
{
"epoch": 3.0,
"learning_rate": 3.888317917094896e-05,
"loss": 1.0147,
"step": 2920
},
{
"epoch": 3.01,
"learning_rate": 3.8826081991549615e-05,
"loss": 0.6069,
"step": 2930
},
{
"epoch": 3.02,
"learning_rate": 3.8768984812150285e-05,
"loss": 0.8573,
"step": 2940
},
{
"epoch": 3.03,
"learning_rate": 3.871188763275094e-05,
"loss": 0.7542,
"step": 2950
},
{
"epoch": 3.04,
"learning_rate": 3.865479045335161e-05,
"loss": 0.8753,
"step": 2960
},
{
"epoch": 3.05,
"learning_rate": 3.859769327395227e-05,
"loss": 0.7049,
"step": 2970
},
{
"epoch": 3.06,
"learning_rate": 3.854059609455293e-05,
"loss": 0.585,
"step": 2980
},
{
"epoch": 3.07,
"learning_rate": 3.848349891515359e-05,
"loss": 0.7805,
"step": 2990
},
{
"epoch": 3.08,
"learning_rate": 3.8426401735754256e-05,
"loss": 0.6462,
"step": 3000
},
{
"epoch": 3.09,
"learning_rate": 3.836930455635492e-05,
"loss": 0.7899,
"step": 3010
},
{
"epoch": 3.1,
"learning_rate": 3.8312207376955575e-05,
"loss": 0.8974,
"step": 3020
},
{
"epoch": 3.11,
"learning_rate": 3.8255110197556245e-05,
"loss": 0.6812,
"step": 3030
},
{
"epoch": 3.12,
"learning_rate": 3.81980130181569e-05,
"loss": 0.9342,
"step": 3040
},
{
"epoch": 3.13,
"learning_rate": 3.814091583875757e-05,
"loss": 0.6933,
"step": 3050
},
{
"epoch": 3.14,
"learning_rate": 3.808381865935823e-05,
"loss": 0.8281,
"step": 3060
},
{
"epoch": 3.16,
"learning_rate": 3.802672147995889e-05,
"loss": 0.7896,
"step": 3070
},
{
"epoch": 3.17,
"learning_rate": 3.7969624300559554e-05,
"loss": 0.7623,
"step": 3080
},
{
"epoch": 3.18,
"learning_rate": 3.791252712116022e-05,
"loss": 0.8253,
"step": 3090
},
{
"epoch": 3.19,
"learning_rate": 3.785542994176088e-05,
"loss": 0.6962,
"step": 3100
},
{
"epoch": 3.2,
"learning_rate": 3.779833276236154e-05,
"loss": 0.8646,
"step": 3110
},
{
"epoch": 3.21,
"learning_rate": 3.7741235582962206e-05,
"loss": 0.7083,
"step": 3120
},
{
"epoch": 3.22,
"learning_rate": 3.768413840356286e-05,
"loss": 0.7984,
"step": 3130
},
{
"epoch": 3.23,
"learning_rate": 3.7627041224163525e-05,
"loss": 0.8558,
"step": 3140
},
{
"epoch": 3.24,
"learning_rate": 3.756994404476419e-05,
"loss": 0.7339,
"step": 3150
},
{
"epoch": 3.25,
"learning_rate": 3.751284686536485e-05,
"loss": 0.6585,
"step": 3160
},
{
"epoch": 3.26,
"learning_rate": 3.7455749685965514e-05,
"loss": 0.6563,
"step": 3170
},
{
"epoch": 3.27,
"learning_rate": 3.739865250656618e-05,
"loss": 0.8558,
"step": 3180
},
{
"epoch": 3.28,
"learning_rate": 3.734155532716684e-05,
"loss": 0.8081,
"step": 3190
},
{
"epoch": 3.29,
"learning_rate": 3.7284458147767503e-05,
"loss": 0.9351,
"step": 3200
},
{
"epoch": 3.3,
"learning_rate": 3.7227360968368166e-05,
"loss": 0.7624,
"step": 3210
},
{
"epoch": 3.31,
"learning_rate": 3.717026378896883e-05,
"loss": 0.7248,
"step": 3220
},
{
"epoch": 3.32,
"learning_rate": 3.7113166609569486e-05,
"loss": 0.817,
"step": 3230
},
{
"epoch": 3.33,
"learning_rate": 3.7056069430170156e-05,
"loss": 0.6947,
"step": 3240
},
{
"epoch": 3.34,
"learning_rate": 3.699897225077081e-05,
"loss": 0.759,
"step": 3250
},
{
"epoch": 3.35,
"learning_rate": 3.6941875071371475e-05,
"loss": 0.7159,
"step": 3260
},
{
"epoch": 3.36,
"learning_rate": 3.688477789197214e-05,
"loss": 0.7979,
"step": 3270
},
{
"epoch": 3.37,
"learning_rate": 3.68276807125728e-05,
"loss": 0.8609,
"step": 3280
},
{
"epoch": 3.38,
"learning_rate": 3.6770583533173464e-05,
"loss": 0.617,
"step": 3290
},
{
"epoch": 3.39,
"learning_rate": 3.671348635377412e-05,
"loss": 0.8382,
"step": 3300
},
{
"epoch": 3.4,
"learning_rate": 3.665638917437479e-05,
"loss": 0.9262,
"step": 3310
},
{
"epoch": 3.41,
"learning_rate": 3.6599291994975446e-05,
"loss": 0.8049,
"step": 3320
},
{
"epoch": 3.42,
"learning_rate": 3.6542194815576116e-05,
"loss": 0.7247,
"step": 3330
},
{
"epoch": 3.43,
"learning_rate": 3.648509763617677e-05,
"loss": 0.7857,
"step": 3340
},
{
"epoch": 3.44,
"learning_rate": 3.642800045677744e-05,
"loss": 0.7727,
"step": 3350
},
{
"epoch": 3.45,
"learning_rate": 3.63709032773781e-05,
"loss": 0.6476,
"step": 3360
},
{
"epoch": 3.46,
"learning_rate": 3.631380609797876e-05,
"loss": 0.8037,
"step": 3370
},
{
"epoch": 3.47,
"learning_rate": 3.6256708918579425e-05,
"loss": 0.7828,
"step": 3380
},
{
"epoch": 3.48,
"learning_rate": 3.619961173918008e-05,
"loss": 0.9081,
"step": 3390
},
{
"epoch": 3.49,
"learning_rate": 3.614251455978075e-05,
"loss": 0.7654,
"step": 3400
},
{
"epoch": 3.5,
"learning_rate": 3.608541738038141e-05,
"loss": 0.7513,
"step": 3410
},
{
"epoch": 3.51,
"learning_rate": 3.602832020098208e-05,
"loss": 0.6636,
"step": 3420
},
{
"epoch": 3.53,
"learning_rate": 3.597122302158273e-05,
"loss": 0.828,
"step": 3430
},
{
"epoch": 3.54,
"learning_rate": 3.5914125842183396e-05,
"loss": 0.8404,
"step": 3440
},
{
"epoch": 3.55,
"learning_rate": 3.585702866278406e-05,
"loss": 0.6785,
"step": 3450
},
{
"epoch": 3.56,
"learning_rate": 3.579993148338472e-05,
"loss": 0.7086,
"step": 3460
},
{
"epoch": 3.57,
"learning_rate": 3.5742834303985385e-05,
"loss": 0.819,
"step": 3470
},
{
"epoch": 3.58,
"learning_rate": 3.568573712458605e-05,
"loss": 0.7055,
"step": 3480
},
{
"epoch": 3.59,
"learning_rate": 3.562863994518671e-05,
"loss": 0.7075,
"step": 3490
},
{
"epoch": 3.6,
"learning_rate": 3.5571542765787375e-05,
"loss": 0.5664,
"step": 3500
},
{
"epoch": 3.61,
"learning_rate": 3.551444558638804e-05,
"loss": 0.7915,
"step": 3510
},
{
"epoch": 3.62,
"learning_rate": 3.5457348406988694e-05,
"loss": 0.7425,
"step": 3520
},
{
"epoch": 3.63,
"learning_rate": 3.540025122758936e-05,
"loss": 0.6031,
"step": 3530
},
{
"epoch": 3.64,
"learning_rate": 3.534315404819002e-05,
"loss": 0.695,
"step": 3540
},
{
"epoch": 3.65,
"learning_rate": 3.528605686879068e-05,
"loss": 0.7757,
"step": 3550
},
{
"epoch": 3.66,
"learning_rate": 3.5228959689391346e-05,
"loss": 0.9496,
"step": 3560
},
{
"epoch": 3.67,
"learning_rate": 3.517186250999201e-05,
"loss": 0.8422,
"step": 3570
},
{
"epoch": 3.68,
"learning_rate": 3.511476533059267e-05,
"loss": 0.526,
"step": 3580
},
{
"epoch": 3.69,
"learning_rate": 3.5057668151193335e-05,
"loss": 0.7195,
"step": 3590
},
{
"epoch": 3.7,
"learning_rate": 3.500057097179399e-05,
"loss": 0.6729,
"step": 3600
},
{
"epoch": 3.71,
"learning_rate": 3.494347379239466e-05,
"loss": 0.8048,
"step": 3610
},
{
"epoch": 3.72,
"learning_rate": 3.488637661299532e-05,
"loss": 0.8051,
"step": 3620
},
{
"epoch": 3.73,
"learning_rate": 3.482927943359598e-05,
"loss": 0.8161,
"step": 3630
},
{
"epoch": 3.74,
"learning_rate": 3.4772182254196644e-05,
"loss": 0.8085,
"step": 3640
},
{
"epoch": 3.75,
"learning_rate": 3.471508507479731e-05,
"loss": 0.846,
"step": 3650
},
{
"epoch": 3.76,
"learning_rate": 3.465798789539797e-05,
"loss": 0.7425,
"step": 3660
},
{
"epoch": 3.77,
"learning_rate": 3.460089071599863e-05,
"loss": 0.6626,
"step": 3670
},
{
"epoch": 3.78,
"learning_rate": 3.4543793536599296e-05,
"loss": 0.7278,
"step": 3680
},
{
"epoch": 3.79,
"learning_rate": 3.448669635719995e-05,
"loss": 0.854,
"step": 3690
},
{
"epoch": 3.8,
"learning_rate": 3.442959917780062e-05,
"loss": 0.9847,
"step": 3700
},
{
"epoch": 3.81,
"learning_rate": 3.437250199840128e-05,
"loss": 0.6927,
"step": 3710
},
{
"epoch": 3.82,
"learning_rate": 3.431540481900195e-05,
"loss": 0.6668,
"step": 3720
},
{
"epoch": 3.83,
"learning_rate": 3.4258307639602604e-05,
"loss": 0.6964,
"step": 3730
},
{
"epoch": 3.84,
"learning_rate": 3.420121046020327e-05,
"loss": 0.6407,
"step": 3740
},
{
"epoch": 3.85,
"learning_rate": 3.414411328080393e-05,
"loss": 0.6686,
"step": 3750
},
{
"epoch": 3.86,
"learning_rate": 3.408701610140459e-05,
"loss": 0.835,
"step": 3760
},
{
"epoch": 3.87,
"learning_rate": 3.4029918922005257e-05,
"loss": 0.8278,
"step": 3770
},
{
"epoch": 3.88,
"learning_rate": 3.397282174260591e-05,
"loss": 0.7366,
"step": 3780
},
{
"epoch": 3.9,
"learning_rate": 3.391572456320658e-05,
"loss": 0.7879,
"step": 3790
},
{
"epoch": 3.91,
"learning_rate": 3.385862738380724e-05,
"loss": 0.757,
"step": 3800
},
{
"epoch": 3.92,
"learning_rate": 3.38015302044079e-05,
"loss": 0.7843,
"step": 3810
},
{
"epoch": 3.93,
"learning_rate": 3.3744433025008565e-05,
"loss": 0.6959,
"step": 3820
},
{
"epoch": 3.94,
"learning_rate": 3.368733584560923e-05,
"loss": 0.6813,
"step": 3830
},
{
"epoch": 3.95,
"learning_rate": 3.363023866620989e-05,
"loss": 0.8886,
"step": 3840
},
{
"epoch": 3.96,
"learning_rate": 3.3573141486810554e-05,
"loss": 0.8847,
"step": 3850
},
{
"epoch": 3.97,
"learning_rate": 3.351604430741122e-05,
"loss": 0.7488,
"step": 3860
},
{
"epoch": 3.98,
"learning_rate": 3.345894712801188e-05,
"loss": 0.6671,
"step": 3870
},
{
"epoch": 3.99,
"learning_rate": 3.340184994861254e-05,
"loss": 0.6329,
"step": 3880
},
{
"epoch": 4.0,
"learning_rate": 3.33447527692132e-05,
"loss": 0.7105,
"step": 3890
},
{
"epoch": 4.0,
"eval_accuracy": 0.711144427786107,
"eval_loss": 0.9586681127548218,
"eval_runtime": 77.637,
"eval_samples_per_second": 51.548,
"eval_steps_per_second": 3.233,
"step": 3892
},
{
"epoch": 4.01,
"learning_rate": 3.328765558981386e-05,
"loss": 0.6649,
"step": 3900
},
{
"epoch": 4.02,
"learning_rate": 3.3230558410414526e-05,
"loss": 0.5029,
"step": 3910
},
{
"epoch": 4.03,
"learning_rate": 3.317346123101519e-05,
"loss": 0.5073,
"step": 3920
},
{
"epoch": 4.04,
"learning_rate": 3.311636405161585e-05,
"loss": 0.5827,
"step": 3930
},
{
"epoch": 4.05,
"learning_rate": 3.3059266872216515e-05,
"loss": 0.5721,
"step": 3940
},
{
"epoch": 4.06,
"learning_rate": 3.300216969281718e-05,
"loss": 0.4412,
"step": 3950
},
{
"epoch": 4.07,
"learning_rate": 3.294507251341784e-05,
"loss": 0.6078,
"step": 3960
},
{
"epoch": 4.08,
"learning_rate": 3.28879753340185e-05,
"loss": 0.4646,
"step": 3970
},
{
"epoch": 4.09,
"learning_rate": 3.283087815461917e-05,
"loss": 0.5959,
"step": 3980
},
{
"epoch": 4.1,
"learning_rate": 3.277378097521982e-05,
"loss": 0.5176,
"step": 3990
},
{
"epoch": 4.11,
"learning_rate": 3.2716683795820486e-05,
"loss": 0.4482,
"step": 4000
},
{
"epoch": 4.12,
"learning_rate": 3.265958661642115e-05,
"loss": 0.5811,
"step": 4010
},
{
"epoch": 4.13,
"learning_rate": 3.260248943702181e-05,
"loss": 0.6701,
"step": 4020
},
{
"epoch": 4.14,
"learning_rate": 3.2545392257622476e-05,
"loss": 0.7067,
"step": 4030
},
{
"epoch": 4.15,
"learning_rate": 3.248829507822314e-05,
"loss": 0.5683,
"step": 4040
},
{
"epoch": 4.16,
"learning_rate": 3.24311978988238e-05,
"loss": 0.4745,
"step": 4050
},
{
"epoch": 4.17,
"learning_rate": 3.237410071942446e-05,
"loss": 0.4521,
"step": 4060
},
{
"epoch": 4.18,
"learning_rate": 3.231700354002513e-05,
"loss": 0.4244,
"step": 4070
},
{
"epoch": 4.19,
"learning_rate": 3.2259906360625784e-05,
"loss": 0.5912,
"step": 4080
},
{
"epoch": 4.2,
"learning_rate": 3.2202809181226454e-05,
"loss": 0.5982,
"step": 4090
},
{
"epoch": 4.21,
"learning_rate": 3.214571200182711e-05,
"loss": 0.5074,
"step": 4100
},
{
"epoch": 4.22,
"learning_rate": 3.208861482242777e-05,
"loss": 0.4708,
"step": 4110
},
{
"epoch": 4.23,
"learning_rate": 3.2031517643028436e-05,
"loss": 0.4193,
"step": 4120
},
{
"epoch": 4.24,
"learning_rate": 3.197442046362909e-05,
"loss": 0.5634,
"step": 4130
},
{
"epoch": 4.25,
"learning_rate": 3.191732328422976e-05,
"loss": 0.4596,
"step": 4140
},
{
"epoch": 4.27,
"learning_rate": 3.186022610483042e-05,
"loss": 0.5762,
"step": 4150
},
{
"epoch": 4.28,
"learning_rate": 3.180312892543109e-05,
"loss": 0.6591,
"step": 4160
},
{
"epoch": 4.29,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.6099,
"step": 4170
},
{
"epoch": 4.3,
"learning_rate": 3.1688934566632414e-05,
"loss": 0.6084,
"step": 4180
},
{
"epoch": 4.31,
"learning_rate": 3.163183738723307e-05,
"loss": 0.4838,
"step": 4190
},
{
"epoch": 4.32,
"learning_rate": 3.1574740207833734e-05,
"loss": 0.5271,
"step": 4200
},
{
"epoch": 4.33,
"learning_rate": 3.15176430284344e-05,
"loss": 0.5699,
"step": 4210
},
{
"epoch": 4.34,
"learning_rate": 3.146054584903506e-05,
"loss": 0.5429,
"step": 4220
},
{
"epoch": 4.35,
"learning_rate": 3.140344866963572e-05,
"loss": 0.575,
"step": 4230
},
{
"epoch": 4.36,
"learning_rate": 3.1346351490236386e-05,
"loss": 0.6351,
"step": 4240
},
{
"epoch": 4.37,
"learning_rate": 3.128925431083705e-05,
"loss": 0.5762,
"step": 4250
},
{
"epoch": 4.38,
"learning_rate": 3.1232157131437705e-05,
"loss": 0.5703,
"step": 4260
},
{
"epoch": 4.39,
"learning_rate": 3.117505995203837e-05,
"loss": 0.6424,
"step": 4270
},
{
"epoch": 4.4,
"learning_rate": 3.111796277263903e-05,
"loss": 0.6194,
"step": 4280
},
{
"epoch": 4.41,
"learning_rate": 3.1060865593239694e-05,
"loss": 0.5795,
"step": 4290
},
{
"epoch": 4.42,
"learning_rate": 3.100376841384036e-05,
"loss": 0.6398,
"step": 4300
},
{
"epoch": 4.43,
"learning_rate": 3.094667123444102e-05,
"loss": 0.4502,
"step": 4310
},
{
"epoch": 4.44,
"learning_rate": 3.0889574055041684e-05,
"loss": 0.5962,
"step": 4320
},
{
"epoch": 4.45,
"learning_rate": 3.083247687564235e-05,
"loss": 0.558,
"step": 4330
},
{
"epoch": 4.46,
"learning_rate": 3.077537969624301e-05,
"loss": 0.4236,
"step": 4340
},
{
"epoch": 4.47,
"learning_rate": 3.071828251684367e-05,
"loss": 0.5991,
"step": 4350
},
{
"epoch": 4.48,
"learning_rate": 3.066118533744433e-05,
"loss": 0.5379,
"step": 4360
},
{
"epoch": 4.49,
"learning_rate": 3.0604088158045e-05,
"loss": 0.5784,
"step": 4370
},
{
"epoch": 4.5,
"learning_rate": 3.0546990978645655e-05,
"loss": 0.572,
"step": 4380
},
{
"epoch": 4.51,
"learning_rate": 3.0489893799246315e-05,
"loss": 0.548,
"step": 4390
},
{
"epoch": 4.52,
"learning_rate": 3.043279661984698e-05,
"loss": 0.6229,
"step": 4400
},
{
"epoch": 4.53,
"learning_rate": 3.037569944044764e-05,
"loss": 0.6826,
"step": 4410
},
{
"epoch": 4.54,
"learning_rate": 3.0318602261048307e-05,
"loss": 0.5678,
"step": 4420
},
{
"epoch": 4.55,
"learning_rate": 3.0261505081648967e-05,
"loss": 0.7231,
"step": 4430
},
{
"epoch": 4.56,
"learning_rate": 3.0204407902249633e-05,
"loss": 0.5012,
"step": 4440
},
{
"epoch": 4.57,
"learning_rate": 3.0147310722850293e-05,
"loss": 0.627,
"step": 4450
},
{
"epoch": 4.58,
"learning_rate": 3.0090213543450956e-05,
"loss": 0.6946,
"step": 4460
},
{
"epoch": 4.59,
"learning_rate": 3.0033116364051616e-05,
"loss": 0.667,
"step": 4470
},
{
"epoch": 4.6,
"learning_rate": 2.9976019184652282e-05,
"loss": 0.6744,
"step": 4480
},
{
"epoch": 4.61,
"learning_rate": 2.9918922005252942e-05,
"loss": 0.622,
"step": 4490
},
{
"epoch": 4.62,
"learning_rate": 2.98618248258536e-05,
"loss": 0.5412,
"step": 4500
},
{
"epoch": 4.64,
"learning_rate": 2.9804727646454268e-05,
"loss": 0.6783,
"step": 4510
},
{
"epoch": 4.65,
"learning_rate": 2.9747630467054928e-05,
"loss": 0.6158,
"step": 4520
},
{
"epoch": 4.66,
"learning_rate": 2.969053328765559e-05,
"loss": 0.5068,
"step": 4530
},
{
"epoch": 4.67,
"learning_rate": 2.963343610825625e-05,
"loss": 0.5774,
"step": 4540
},
{
"epoch": 4.68,
"learning_rate": 2.9576338928856917e-05,
"loss": 0.4798,
"step": 4550
},
{
"epoch": 4.69,
"learning_rate": 2.9519241749457576e-05,
"loss": 0.5738,
"step": 4560
},
{
"epoch": 4.7,
"learning_rate": 2.9462144570058243e-05,
"loss": 0.6501,
"step": 4570
},
{
"epoch": 4.71,
"learning_rate": 2.9405047390658903e-05,
"loss": 0.6111,
"step": 4580
},
{
"epoch": 4.72,
"learning_rate": 2.9347950211259566e-05,
"loss": 0.6347,
"step": 4590
},
{
"epoch": 4.73,
"learning_rate": 2.929085303186023e-05,
"loss": 0.449,
"step": 4600
},
{
"epoch": 4.74,
"learning_rate": 2.923375585246089e-05,
"loss": 0.7129,
"step": 4610
},
{
"epoch": 4.75,
"learning_rate": 2.917665867306155e-05,
"loss": 0.6683,
"step": 4620
},
{
"epoch": 4.76,
"learning_rate": 2.911956149366221e-05,
"loss": 0.545,
"step": 4630
},
{
"epoch": 4.77,
"learning_rate": 2.9062464314262877e-05,
"loss": 0.4887,
"step": 4640
},
{
"epoch": 4.78,
"learning_rate": 2.9005367134863537e-05,
"loss": 0.6314,
"step": 4650
},
{
"epoch": 4.79,
"learning_rate": 2.8948269955464204e-05,
"loss": 0.6196,
"step": 4660
},
{
"epoch": 4.8,
"learning_rate": 2.8891172776064863e-05,
"loss": 0.6465,
"step": 4670
},
{
"epoch": 4.81,
"learning_rate": 2.8834075596665526e-05,
"loss": 0.3738,
"step": 4680
},
{
"epoch": 4.82,
"learning_rate": 2.8776978417266186e-05,
"loss": 0.4967,
"step": 4690
},
{
"epoch": 4.83,
"learning_rate": 2.8719881237866852e-05,
"loss": 0.5445,
"step": 4700
},
{
"epoch": 4.84,
"learning_rate": 2.8662784058467512e-05,
"loss": 0.4294,
"step": 4710
},
{
"epoch": 4.85,
"learning_rate": 2.860568687906818e-05,
"loss": 0.5878,
"step": 4720
},
{
"epoch": 4.86,
"learning_rate": 2.8548589699668838e-05,
"loss": 0.535,
"step": 4730
},
{
"epoch": 4.87,
"learning_rate": 2.84914925202695e-05,
"loss": 0.4343,
"step": 4740
},
{
"epoch": 4.88,
"learning_rate": 2.843439534087016e-05,
"loss": 0.6452,
"step": 4750
},
{
"epoch": 4.89,
"learning_rate": 2.8377298161470824e-05,
"loss": 0.5162,
"step": 4760
},
{
"epoch": 4.9,
"learning_rate": 2.8320200982071487e-05,
"loss": 0.5307,
"step": 4770
},
{
"epoch": 4.91,
"learning_rate": 2.8263103802672147e-05,
"loss": 0.6837,
"step": 4780
},
{
"epoch": 4.92,
"learning_rate": 2.8206006623272813e-05,
"loss": 0.5875,
"step": 4790
},
{
"epoch": 4.93,
"learning_rate": 2.8148909443873473e-05,
"loss": 0.4295,
"step": 4800
},
{
"epoch": 4.94,
"learning_rate": 2.809181226447414e-05,
"loss": 0.4948,
"step": 4810
},
{
"epoch": 4.95,
"learning_rate": 2.80347150850748e-05,
"loss": 0.527,
"step": 4820
},
{
"epoch": 4.96,
"learning_rate": 2.7977617905675462e-05,
"loss": 0.6179,
"step": 4830
},
{
"epoch": 4.97,
"learning_rate": 2.792052072627612e-05,
"loss": 0.5411,
"step": 4840
},
{
"epoch": 4.98,
"learning_rate": 2.7863423546876788e-05,
"loss": 0.5768,
"step": 4850
},
{
"epoch": 4.99,
"learning_rate": 2.7806326367477448e-05,
"loss": 0.6063,
"step": 4860
},
{
"epoch": 5.0,
"eval_accuracy": 0.7373813093453273,
"eval_loss": 0.8927831053733826,
"eval_runtime": 75.0091,
"eval_samples_per_second": 53.354,
"eval_steps_per_second": 3.346,
"step": 4865
},
{
"epoch": 5.01,
"learning_rate": 2.7749229188078114e-05,
"loss": 0.5726,
"step": 4870
},
{
"epoch": 5.02,
"learning_rate": 2.7692132008678774e-05,
"loss": 0.3877,
"step": 4880
},
{
"epoch": 5.03,
"learning_rate": 2.7635034829279433e-05,
"loss": 0.4006,
"step": 4890
},
{
"epoch": 5.04,
"learning_rate": 2.7577937649880096e-05,
"loss": 0.4685,
"step": 4900
},
{
"epoch": 5.05,
"learning_rate": 2.7520840470480756e-05,
"loss": 0.4393,
"step": 4910
},
{
"epoch": 5.06,
"learning_rate": 2.7463743291081422e-05,
"loss": 0.3602,
"step": 4920
},
{
"epoch": 5.07,
"learning_rate": 2.7406646111682082e-05,
"loss": 0.4899,
"step": 4930
},
{
"epoch": 5.08,
"learning_rate": 2.734954893228275e-05,
"loss": 0.3739,
"step": 4940
},
{
"epoch": 5.09,
"learning_rate": 2.7292451752883408e-05,
"loss": 0.5111,
"step": 4950
},
{
"epoch": 5.1,
"learning_rate": 2.7235354573484075e-05,
"loss": 0.5569,
"step": 4960
},
{
"epoch": 5.11,
"learning_rate": 2.7178257394084734e-05,
"loss": 0.3131,
"step": 4970
},
{
"epoch": 5.12,
"learning_rate": 2.7121160214685397e-05,
"loss": 0.4263,
"step": 4980
},
{
"epoch": 5.13,
"learning_rate": 2.7064063035286057e-05,
"loss": 0.3337,
"step": 4990
},
{
"epoch": 5.14,
"learning_rate": 2.7006965855886717e-05,
"loss": 0.2748,
"step": 5000
},
{
"epoch": 5.15,
"learning_rate": 2.6949868676487383e-05,
"loss": 0.3704,
"step": 5010
},
{
"epoch": 5.16,
"learning_rate": 2.6892771497088043e-05,
"loss": 0.3749,
"step": 5020
},
{
"epoch": 5.17,
"learning_rate": 2.683567431768871e-05,
"loss": 0.4065,
"step": 5030
},
{
"epoch": 5.18,
"learning_rate": 2.677857713828937e-05,
"loss": 0.3415,
"step": 5040
},
{
"epoch": 5.19,
"learning_rate": 2.6721479958890032e-05,
"loss": 0.4751,
"step": 5050
},
{
"epoch": 5.2,
"learning_rate": 2.666438277949069e-05,
"loss": 0.2972,
"step": 5060
},
{
"epoch": 5.21,
"learning_rate": 2.6607285600091358e-05,
"loss": 0.4076,
"step": 5070
},
{
"epoch": 5.22,
"learning_rate": 2.6550188420692018e-05,
"loss": 0.3959,
"step": 5080
},
{
"epoch": 5.23,
"learning_rate": 2.6493091241292684e-05,
"loss": 0.3548,
"step": 5090
},
{
"epoch": 5.24,
"learning_rate": 2.6435994061893344e-05,
"loss": 0.4791,
"step": 5100
},
{
"epoch": 5.25,
"learning_rate": 2.637889688249401e-05,
"loss": 0.4068,
"step": 5110
},
{
"epoch": 5.26,
"learning_rate": 2.632179970309467e-05,
"loss": 0.3196,
"step": 5120
},
{
"epoch": 5.27,
"learning_rate": 2.626470252369533e-05,
"loss": 0.5445,
"step": 5130
},
{
"epoch": 5.28,
"learning_rate": 2.6207605344295993e-05,
"loss": 0.5241,
"step": 5140
},
{
"epoch": 5.29,
"learning_rate": 2.6150508164896652e-05,
"loss": 0.4467,
"step": 5150
},
{
"epoch": 5.3,
"learning_rate": 2.609341098549732e-05,
"loss": 0.6068,
"step": 5160
},
{
"epoch": 5.31,
"learning_rate": 2.603631380609798e-05,
"loss": 0.3883,
"step": 5170
},
{
"epoch": 5.32,
"learning_rate": 2.5979216626698645e-05,
"loss": 0.3926,
"step": 5180
},
{
"epoch": 5.33,
"learning_rate": 2.5922119447299304e-05,
"loss": 0.3871,
"step": 5190
},
{
"epoch": 5.34,
"learning_rate": 2.5865022267899968e-05,
"loss": 0.5836,
"step": 5200
},
{
"epoch": 5.35,
"learning_rate": 2.5807925088500627e-05,
"loss": 0.392,
"step": 5210
},
{
"epoch": 5.36,
"learning_rate": 2.5750827909101294e-05,
"loss": 0.4085,
"step": 5220
},
{
"epoch": 5.38,
"learning_rate": 2.5693730729701953e-05,
"loss": 0.4794,
"step": 5230
},
{
"epoch": 5.39,
"learning_rate": 2.563663355030262e-05,
"loss": 0.3662,
"step": 5240
},
{
"epoch": 5.4,
"learning_rate": 2.557953637090328e-05,
"loss": 0.3993,
"step": 5250
},
{
"epoch": 5.41,
"learning_rate": 2.552243919150394e-05,
"loss": 0.5316,
"step": 5260
},
{
"epoch": 5.42,
"learning_rate": 2.5465342012104605e-05,
"loss": 0.4612,
"step": 5270
},
{
"epoch": 5.43,
"learning_rate": 2.5408244832705265e-05,
"loss": 0.3946,
"step": 5280
},
{
"epoch": 5.44,
"learning_rate": 2.5351147653305928e-05,
"loss": 0.3912,
"step": 5290
},
{
"epoch": 5.45,
"learning_rate": 2.5294050473906588e-05,
"loss": 0.3888,
"step": 5300
},
{
"epoch": 5.46,
"learning_rate": 2.5236953294507254e-05,
"loss": 0.4966,
"step": 5310
},
{
"epoch": 5.47,
"learning_rate": 2.5179856115107914e-05,
"loss": 0.3926,
"step": 5320
},
{
"epoch": 5.48,
"learning_rate": 2.512275893570858e-05,
"loss": 0.3809,
"step": 5330
},
{
"epoch": 5.49,
"learning_rate": 2.506566175630924e-05,
"loss": 0.4826,
"step": 5340
},
{
"epoch": 5.5,
"learning_rate": 2.5008564576909903e-05,
"loss": 0.4125,
"step": 5350
},
{
"epoch": 5.51,
"learning_rate": 2.4951467397510563e-05,
"loss": 0.493,
"step": 5360
},
{
"epoch": 5.52,
"learning_rate": 2.4894370218111226e-05,
"loss": 0.5023,
"step": 5370
},
{
"epoch": 5.53,
"learning_rate": 2.483727303871189e-05,
"loss": 0.5726,
"step": 5380
},
{
"epoch": 5.54,
"learning_rate": 2.4780175859312552e-05,
"loss": 0.3551,
"step": 5390
},
{
"epoch": 5.55,
"learning_rate": 2.4723078679913215e-05,
"loss": 0.5638,
"step": 5400
},
{
"epoch": 5.56,
"learning_rate": 2.4665981500513878e-05,
"loss": 0.3479,
"step": 5410
},
{
"epoch": 5.57,
"learning_rate": 2.4608884321114538e-05,
"loss": 0.4529,
"step": 5420
},
{
"epoch": 5.58,
"learning_rate": 2.45517871417152e-05,
"loss": 0.526,
"step": 5430
},
{
"epoch": 5.59,
"learning_rate": 2.449468996231586e-05,
"loss": 0.5076,
"step": 5440
},
{
"epoch": 5.6,
"learning_rate": 2.4437592782916523e-05,
"loss": 0.3853,
"step": 5450
},
{
"epoch": 5.61,
"learning_rate": 2.4380495603517186e-05,
"loss": 0.5106,
"step": 5460
},
{
"epoch": 5.62,
"learning_rate": 2.432339842411785e-05,
"loss": 0.4372,
"step": 5470
},
{
"epoch": 5.63,
"learning_rate": 2.4266301244718513e-05,
"loss": 0.3792,
"step": 5480
},
{
"epoch": 5.64,
"learning_rate": 2.4209204065319176e-05,
"loss": 0.4477,
"step": 5490
},
{
"epoch": 5.65,
"learning_rate": 2.4152106885919835e-05,
"loss": 0.5813,
"step": 5500
},
{
"epoch": 5.66,
"learning_rate": 2.40950097065205e-05,
"loss": 0.3895,
"step": 5510
},
{
"epoch": 5.67,
"learning_rate": 2.403791252712116e-05,
"loss": 0.434,
"step": 5520
},
{
"epoch": 5.68,
"learning_rate": 2.3980815347721824e-05,
"loss": 0.4687,
"step": 5530
},
{
"epoch": 5.69,
"learning_rate": 2.3923718168322487e-05,
"loss": 0.5047,
"step": 5540
},
{
"epoch": 5.7,
"learning_rate": 2.386662098892315e-05,
"loss": 0.3522,
"step": 5550
},
{
"epoch": 5.71,
"learning_rate": 2.380952380952381e-05,
"loss": 0.5933,
"step": 5560
},
{
"epoch": 5.72,
"learning_rate": 2.3752426630124473e-05,
"loss": 0.4048,
"step": 5570
},
{
"epoch": 5.73,
"learning_rate": 2.3695329450725133e-05,
"loss": 0.4535,
"step": 5580
},
{
"epoch": 5.75,
"learning_rate": 2.3638232271325796e-05,
"loss": 0.5233,
"step": 5590
},
{
"epoch": 5.76,
"learning_rate": 2.358113509192646e-05,
"loss": 0.4555,
"step": 5600
},
{
"epoch": 5.77,
"learning_rate": 2.3524037912527122e-05,
"loss": 0.5052,
"step": 5610
},
{
"epoch": 5.78,
"learning_rate": 2.3466940733127785e-05,
"loss": 0.344,
"step": 5620
},
{
"epoch": 5.79,
"learning_rate": 2.3409843553728448e-05,
"loss": 0.4227,
"step": 5630
},
{
"epoch": 5.8,
"learning_rate": 2.335274637432911e-05,
"loss": 0.4903,
"step": 5640
},
{
"epoch": 5.81,
"learning_rate": 2.329564919492977e-05,
"loss": 0.4492,
"step": 5650
},
{
"epoch": 5.82,
"learning_rate": 2.3238552015530434e-05,
"loss": 0.4573,
"step": 5660
},
{
"epoch": 5.83,
"learning_rate": 2.3181454836131097e-05,
"loss": 0.4255,
"step": 5670
},
{
"epoch": 5.84,
"learning_rate": 2.312435765673176e-05,
"loss": 0.5569,
"step": 5680
},
{
"epoch": 5.85,
"learning_rate": 2.306726047733242e-05,
"loss": 0.3653,
"step": 5690
},
{
"epoch": 5.86,
"learning_rate": 2.3010163297933083e-05,
"loss": 0.4681,
"step": 5700
},
{
"epoch": 5.87,
"learning_rate": 2.2953066118533746e-05,
"loss": 0.5671,
"step": 5710
},
{
"epoch": 5.88,
"learning_rate": 2.289596893913441e-05,
"loss": 0.4827,
"step": 5720
},
{
"epoch": 5.89,
"learning_rate": 2.283887175973507e-05,
"loss": 0.3532,
"step": 5730
},
{
"epoch": 5.9,
"learning_rate": 2.278177458033573e-05,
"loss": 0.3967,
"step": 5740
},
{
"epoch": 5.91,
"learning_rate": 2.2724677400936395e-05,
"loss": 0.4732,
"step": 5750
},
{
"epoch": 5.92,
"learning_rate": 2.2667580221537058e-05,
"loss": 0.51,
"step": 5760
},
{
"epoch": 5.93,
"learning_rate": 2.261048304213772e-05,
"loss": 0.448,
"step": 5770
},
{
"epoch": 5.94,
"learning_rate": 2.2553385862738384e-05,
"loss": 0.3952,
"step": 5780
},
{
"epoch": 5.95,
"learning_rate": 2.2496288683339047e-05,
"loss": 0.5458,
"step": 5790
},
{
"epoch": 5.96,
"learning_rate": 2.2439191503939706e-05,
"loss": 0.4016,
"step": 5800
},
{
"epoch": 5.97,
"learning_rate": 2.2382094324540366e-05,
"loss": 0.4725,
"step": 5810
},
{
"epoch": 5.98,
"learning_rate": 2.232499714514103e-05,
"loss": 0.4798,
"step": 5820
},
{
"epoch": 5.99,
"learning_rate": 2.2267899965741692e-05,
"loss": 0.3221,
"step": 5830
},
{
"epoch": 6.0,
"eval_accuracy": 0.7478760619690155,
"eval_loss": 0.8848473429679871,
"eval_runtime": 76.1229,
"eval_samples_per_second": 52.573,
"eval_steps_per_second": 3.297,
"step": 5838
},
{
"epoch": 6.0,
"learning_rate": 2.2210802786342355e-05,
"loss": 0.3877,
"step": 5840
},
{
"epoch": 6.01,
"learning_rate": 2.2153705606943018e-05,
"loss": 0.3256,
"step": 5850
},
{
"epoch": 6.02,
"learning_rate": 2.209660842754368e-05,
"loss": 0.3953,
"step": 5860
},
{
"epoch": 6.03,
"learning_rate": 2.2039511248144344e-05,
"loss": 0.3124,
"step": 5870
},
{
"epoch": 6.04,
"learning_rate": 2.1982414068745004e-05,
"loss": 0.3513,
"step": 5880
},
{
"epoch": 6.05,
"learning_rate": 2.1925316889345667e-05,
"loss": 0.3144,
"step": 5890
},
{
"epoch": 6.06,
"learning_rate": 2.186821970994633e-05,
"loss": 0.3416,
"step": 5900
},
{
"epoch": 6.07,
"learning_rate": 2.1811122530546993e-05,
"loss": 0.32,
"step": 5910
},
{
"epoch": 6.08,
"learning_rate": 2.1754025351147656e-05,
"loss": 0.2815,
"step": 5920
},
{
"epoch": 6.09,
"learning_rate": 2.169692817174832e-05,
"loss": 0.4166,
"step": 5930
},
{
"epoch": 6.1,
"learning_rate": 2.163983099234898e-05,
"loss": 0.4271,
"step": 5940
},
{
"epoch": 6.12,
"learning_rate": 2.1582733812949642e-05,
"loss": 0.4052,
"step": 5950
},
{
"epoch": 6.13,
"learning_rate": 2.15256366335503e-05,
"loss": 0.3513,
"step": 5960
},
{
"epoch": 6.14,
"learning_rate": 2.1468539454150965e-05,
"loss": 0.3415,
"step": 5970
},
{
"epoch": 6.15,
"learning_rate": 2.1411442274751628e-05,
"loss": 0.4111,
"step": 5980
},
{
"epoch": 6.16,
"learning_rate": 2.135434509535229e-05,
"loss": 0.3873,
"step": 5990
},
{
"epoch": 6.17,
"learning_rate": 2.1297247915952954e-05,
"loss": 0.2613,
"step": 6000
},
{
"epoch": 6.18,
"learning_rate": 2.1240150736553617e-05,
"loss": 0.2936,
"step": 6010
},
{
"epoch": 6.19,
"learning_rate": 2.1183053557154277e-05,
"loss": 0.3673,
"step": 6020
},
{
"epoch": 6.2,
"learning_rate": 2.112595637775494e-05,
"loss": 0.4002,
"step": 6030
},
{
"epoch": 6.21,
"learning_rate": 2.1068859198355603e-05,
"loss": 0.3748,
"step": 6040
},
{
"epoch": 6.22,
"learning_rate": 2.1011762018956266e-05,
"loss": 0.373,
"step": 6050
},
{
"epoch": 6.23,
"learning_rate": 2.0954664839556925e-05,
"loss": 0.4076,
"step": 6060
},
{
"epoch": 6.24,
"learning_rate": 2.089756766015759e-05,
"loss": 0.313,
"step": 6070
},
{
"epoch": 6.25,
"learning_rate": 2.084047048075825e-05,
"loss": 0.4081,
"step": 6080
},
{
"epoch": 6.26,
"learning_rate": 2.0783373301358914e-05,
"loss": 0.4025,
"step": 6090
},
{
"epoch": 6.27,
"learning_rate": 2.0726276121959574e-05,
"loss": 0.3625,
"step": 6100
},
{
"epoch": 6.28,
"learning_rate": 2.0669178942560237e-05,
"loss": 0.2957,
"step": 6110
},
{
"epoch": 6.29,
"learning_rate": 2.06120817631609e-05,
"loss": 0.3891,
"step": 6120
},
{
"epoch": 6.3,
"learning_rate": 2.0554984583761563e-05,
"loss": 0.4241,
"step": 6130
},
{
"epoch": 6.31,
"learning_rate": 2.0497887404362226e-05,
"loss": 0.3527,
"step": 6140
},
{
"epoch": 6.32,
"learning_rate": 2.044079022496289e-05,
"loss": 0.3201,
"step": 6150
},
{
"epoch": 6.33,
"learning_rate": 2.0383693045563552e-05,
"loss": 0.2978,
"step": 6160
},
{
"epoch": 6.34,
"learning_rate": 2.0326595866164212e-05,
"loss": 0.3845,
"step": 6170
},
{
"epoch": 6.35,
"learning_rate": 2.0269498686764872e-05,
"loss": 0.5311,
"step": 6180
},
{
"epoch": 6.36,
"learning_rate": 2.0212401507365535e-05,
"loss": 0.3621,
"step": 6190
},
{
"epoch": 6.37,
"learning_rate": 2.0155304327966198e-05,
"loss": 0.3857,
"step": 6200
},
{
"epoch": 6.38,
"learning_rate": 2.009820714856686e-05,
"loss": 0.3401,
"step": 6210
},
{
"epoch": 6.39,
"learning_rate": 2.0041109969167524e-05,
"loss": 0.3093,
"step": 6220
},
{
"epoch": 6.4,
"learning_rate": 1.9984012789768187e-05,
"loss": 0.2847,
"step": 6230
},
{
"epoch": 6.41,
"learning_rate": 1.992691561036885e-05,
"loss": 0.3002,
"step": 6240
},
{
"epoch": 6.42,
"learning_rate": 1.986981843096951e-05,
"loss": 0.2641,
"step": 6250
},
{
"epoch": 6.43,
"learning_rate": 1.9812721251570173e-05,
"loss": 0.325,
"step": 6260
},
{
"epoch": 6.44,
"learning_rate": 1.9755624072170836e-05,
"loss": 0.3443,
"step": 6270
},
{
"epoch": 6.45,
"learning_rate": 1.96985268927715e-05,
"loss": 0.3127,
"step": 6280
},
{
"epoch": 6.46,
"learning_rate": 1.9641429713372162e-05,
"loss": 0.331,
"step": 6290
},
{
"epoch": 6.47,
"learning_rate": 1.9584332533972825e-05,
"loss": 0.3095,
"step": 6300
},
{
"epoch": 6.49,
"learning_rate": 1.9527235354573485e-05,
"loss": 0.4524,
"step": 6310
},
{
"epoch": 6.5,
"learning_rate": 1.9470138175174148e-05,
"loss": 0.2787,
"step": 6320
},
{
"epoch": 6.51,
"learning_rate": 1.9413040995774807e-05,
"loss": 0.2727,
"step": 6330
},
{
"epoch": 6.52,
"learning_rate": 1.935594381637547e-05,
"loss": 0.4067,
"step": 6340
},
{
"epoch": 6.53,
"learning_rate": 1.9298846636976133e-05,
"loss": 0.2386,
"step": 6350
},
{
"epoch": 6.54,
"learning_rate": 1.9241749457576796e-05,
"loss": 0.3437,
"step": 6360
},
{
"epoch": 6.55,
"learning_rate": 1.918465227817746e-05,
"loss": 0.3871,
"step": 6370
},
{
"epoch": 6.56,
"learning_rate": 1.9127555098778123e-05,
"loss": 0.3844,
"step": 6380
},
{
"epoch": 6.57,
"learning_rate": 1.9070457919378786e-05,
"loss": 0.2102,
"step": 6390
},
{
"epoch": 6.58,
"learning_rate": 1.9013360739979445e-05,
"loss": 0.2859,
"step": 6400
},
{
"epoch": 6.59,
"learning_rate": 1.895626356058011e-05,
"loss": 0.3515,
"step": 6410
},
{
"epoch": 6.6,
"learning_rate": 1.889916638118077e-05,
"loss": 0.4626,
"step": 6420
},
{
"epoch": 6.61,
"learning_rate": 1.884206920178143e-05,
"loss": 0.435,
"step": 6430
},
{
"epoch": 6.62,
"learning_rate": 1.8784972022382094e-05,
"loss": 0.4085,
"step": 6440
},
{
"epoch": 6.63,
"learning_rate": 1.8727874842982757e-05,
"loss": 0.3809,
"step": 6450
},
{
"epoch": 6.64,
"learning_rate": 1.867077766358342e-05,
"loss": 0.398,
"step": 6460
},
{
"epoch": 6.65,
"learning_rate": 1.8613680484184083e-05,
"loss": 0.3326,
"step": 6470
},
{
"epoch": 6.66,
"learning_rate": 1.8556583304784743e-05,
"loss": 0.4435,
"step": 6480
},
{
"epoch": 6.67,
"learning_rate": 1.8499486125385406e-05,
"loss": 0.4116,
"step": 6490
},
{
"epoch": 6.68,
"learning_rate": 1.844238894598607e-05,
"loss": 0.3593,
"step": 6500
},
{
"epoch": 6.69,
"learning_rate": 1.8385291766586732e-05,
"loss": 0.2717,
"step": 6510
},
{
"epoch": 6.7,
"learning_rate": 1.8328194587187395e-05,
"loss": 0.3265,
"step": 6520
},
{
"epoch": 6.71,
"learning_rate": 1.8271097407788058e-05,
"loss": 0.346,
"step": 6530
},
{
"epoch": 6.72,
"learning_rate": 1.821400022838872e-05,
"loss": 0.2315,
"step": 6540
},
{
"epoch": 6.73,
"learning_rate": 1.815690304898938e-05,
"loss": 0.3481,
"step": 6550
},
{
"epoch": 6.74,
"learning_rate": 1.809980586959004e-05,
"loss": 0.5586,
"step": 6560
},
{
"epoch": 6.75,
"learning_rate": 1.8042708690190704e-05,
"loss": 0.4025,
"step": 6570
},
{
"epoch": 6.76,
"learning_rate": 1.7985611510791367e-05,
"loss": 0.4215,
"step": 6580
},
{
"epoch": 6.77,
"learning_rate": 1.792851433139203e-05,
"loss": 0.3219,
"step": 6590
},
{
"epoch": 6.78,
"learning_rate": 1.7871417151992693e-05,
"loss": 0.3005,
"step": 6600
},
{
"epoch": 6.79,
"learning_rate": 1.7814319972593356e-05,
"loss": 0.2727,
"step": 6610
},
{
"epoch": 6.8,
"learning_rate": 1.775722279319402e-05,
"loss": 0.2769,
"step": 6620
},
{
"epoch": 6.81,
"learning_rate": 1.770012561379468e-05,
"loss": 0.3262,
"step": 6630
},
{
"epoch": 6.82,
"learning_rate": 1.764302843439534e-05,
"loss": 0.3398,
"step": 6640
},
{
"epoch": 6.83,
"learning_rate": 1.7585931254996005e-05,
"loss": 0.3723,
"step": 6650
},
{
"epoch": 6.84,
"learning_rate": 1.7528834075596668e-05,
"loss": 0.4217,
"step": 6660
},
{
"epoch": 6.86,
"learning_rate": 1.747173689619733e-05,
"loss": 0.3878,
"step": 6670
},
{
"epoch": 6.87,
"learning_rate": 1.741463971679799e-05,
"loss": 0.4208,
"step": 6680
},
{
"epoch": 6.88,
"learning_rate": 1.7357542537398653e-05,
"loss": 0.3066,
"step": 6690
},
{
"epoch": 6.89,
"learning_rate": 1.7300445357999316e-05,
"loss": 0.3075,
"step": 6700
},
{
"epoch": 6.9,
"learning_rate": 1.7243348178599976e-05,
"loss": 0.2159,
"step": 6710
},
{
"epoch": 6.91,
"learning_rate": 1.718625099920064e-05,
"loss": 0.347,
"step": 6720
},
{
"epoch": 6.92,
"learning_rate": 1.7129153819801302e-05,
"loss": 0.1409,
"step": 6730
},
{
"epoch": 6.93,
"learning_rate": 1.7072056640401965e-05,
"loss": 0.4728,
"step": 6740
},
{
"epoch": 6.94,
"learning_rate": 1.7014959461002628e-05,
"loss": 0.422,
"step": 6750
},
{
"epoch": 6.95,
"learning_rate": 1.695786228160329e-05,
"loss": 0.3282,
"step": 6760
},
{
"epoch": 6.96,
"learning_rate": 1.690076510220395e-05,
"loss": 0.3912,
"step": 6770
},
{
"epoch": 6.97,
"learning_rate": 1.6843667922804614e-05,
"loss": 0.3438,
"step": 6780
},
{
"epoch": 6.98,
"learning_rate": 1.6786570743405277e-05,
"loss": 0.3683,
"step": 6790
},
{
"epoch": 6.99,
"learning_rate": 1.672947356400594e-05,
"loss": 0.2954,
"step": 6800
},
{
"epoch": 7.0,
"learning_rate": 1.66723763846066e-05,
"loss": 0.4932,
"step": 6810
},
{
"epoch": 7.0,
"eval_accuracy": 0.7528735632183908,
"eval_loss": 0.8778512477874756,
"eval_runtime": 79.1516,
"eval_samples_per_second": 50.561,
"eval_steps_per_second": 3.171,
"step": 6811
},
{
"epoch": 7.01,
"learning_rate": 1.6615279205207263e-05,
"loss": 0.4063,
"step": 6820
},
{
"epoch": 7.02,
"learning_rate": 1.6558182025807926e-05,
"loss": 0.2402,
"step": 6830
},
{
"epoch": 7.03,
"learning_rate": 1.650108484640859e-05,
"loss": 0.3374,
"step": 6840
},
{
"epoch": 7.04,
"learning_rate": 1.644398766700925e-05,
"loss": 0.2592,
"step": 6850
},
{
"epoch": 7.05,
"learning_rate": 1.638689048760991e-05,
"loss": 0.3549,
"step": 6860
},
{
"epoch": 7.06,
"learning_rate": 1.6329793308210575e-05,
"loss": 0.3334,
"step": 6870
},
{
"epoch": 7.07,
"learning_rate": 1.6272696128811238e-05,
"loss": 0.3014,
"step": 6880
},
{
"epoch": 7.08,
"learning_rate": 1.62155989494119e-05,
"loss": 0.2526,
"step": 6890
},
{
"epoch": 7.09,
"learning_rate": 1.6158501770012564e-05,
"loss": 0.187,
"step": 6900
},
{
"epoch": 7.1,
"learning_rate": 1.6101404590613227e-05,
"loss": 0.2323,
"step": 6910
},
{
"epoch": 7.11,
"learning_rate": 1.6044307411213887e-05,
"loss": 0.2563,
"step": 6920
},
{
"epoch": 7.12,
"learning_rate": 1.5987210231814546e-05,
"loss": 0.2672,
"step": 6930
},
{
"epoch": 7.13,
"learning_rate": 1.593011305241521e-05,
"loss": 0.2626,
"step": 6940
},
{
"epoch": 7.14,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.3739,
"step": 6950
},
{
"epoch": 7.15,
"learning_rate": 1.5815918693616535e-05,
"loss": 0.2507,
"step": 6960
},
{
"epoch": 7.16,
"learning_rate": 1.57588215142172e-05,
"loss": 0.2795,
"step": 6970
},
{
"epoch": 7.17,
"learning_rate": 1.570172433481786e-05,
"loss": 0.2093,
"step": 6980
},
{
"epoch": 7.18,
"learning_rate": 1.5644627155418525e-05,
"loss": 0.3191,
"step": 6990
},
{
"epoch": 7.19,
"learning_rate": 1.5587529976019184e-05,
"loss": 0.2568,
"step": 7000
},
{
"epoch": 7.2,
"learning_rate": 1.5530432796619847e-05,
"loss": 0.2473,
"step": 7010
},
{
"epoch": 7.21,
"learning_rate": 1.547333561722051e-05,
"loss": 0.2847,
"step": 7020
},
{
"epoch": 7.23,
"learning_rate": 1.5416238437821173e-05,
"loss": 0.378,
"step": 7030
},
{
"epoch": 7.24,
"learning_rate": 1.5359141258421836e-05,
"loss": 0.3038,
"step": 7040
},
{
"epoch": 7.25,
"learning_rate": 1.53020440790225e-05,
"loss": 0.3276,
"step": 7050
},
{
"epoch": 7.26,
"learning_rate": 1.5244946899623157e-05,
"loss": 0.3149,
"step": 7060
},
{
"epoch": 7.27,
"learning_rate": 1.518784972022382e-05,
"loss": 0.3424,
"step": 7070
},
{
"epoch": 7.28,
"learning_rate": 1.5130752540824483e-05,
"loss": 0.3672,
"step": 7080
},
{
"epoch": 7.29,
"learning_rate": 1.5073655361425147e-05,
"loss": 0.3536,
"step": 7090
},
{
"epoch": 7.3,
"learning_rate": 1.5016558182025808e-05,
"loss": 0.2377,
"step": 7100
},
{
"epoch": 7.31,
"learning_rate": 1.4959461002626471e-05,
"loss": 0.4243,
"step": 7110
},
{
"epoch": 7.32,
"learning_rate": 1.4902363823227134e-05,
"loss": 0.2583,
"step": 7120
},
{
"epoch": 7.33,
"learning_rate": 1.4845266643827795e-05,
"loss": 0.3068,
"step": 7130
},
{
"epoch": 7.34,
"learning_rate": 1.4788169464428458e-05,
"loss": 0.2399,
"step": 7140
},
{
"epoch": 7.35,
"learning_rate": 1.4731072285029121e-05,
"loss": 0.2499,
"step": 7150
},
{
"epoch": 7.36,
"learning_rate": 1.4673975105629783e-05,
"loss": 0.2531,
"step": 7160
},
{
"epoch": 7.37,
"learning_rate": 1.4616877926230446e-05,
"loss": 0.3785,
"step": 7170
},
{
"epoch": 7.38,
"learning_rate": 1.4559780746831106e-05,
"loss": 0.2174,
"step": 7180
},
{
"epoch": 7.39,
"learning_rate": 1.4502683567431769e-05,
"loss": 0.4069,
"step": 7190
},
{
"epoch": 7.4,
"learning_rate": 1.4445586388032432e-05,
"loss": 0.2942,
"step": 7200
},
{
"epoch": 7.41,
"learning_rate": 1.4388489208633093e-05,
"loss": 0.373,
"step": 7210
},
{
"epoch": 7.42,
"learning_rate": 1.4331392029233756e-05,
"loss": 0.1789,
"step": 7220
},
{
"epoch": 7.43,
"learning_rate": 1.4274294849834419e-05,
"loss": 0.3505,
"step": 7230
},
{
"epoch": 7.44,
"learning_rate": 1.421719767043508e-05,
"loss": 0.3121,
"step": 7240
},
{
"epoch": 7.45,
"learning_rate": 1.4160100491035743e-05,
"loss": 0.2677,
"step": 7250
},
{
"epoch": 7.46,
"learning_rate": 1.4103003311636407e-05,
"loss": 0.3118,
"step": 7260
},
{
"epoch": 7.47,
"learning_rate": 1.404590613223707e-05,
"loss": 0.3632,
"step": 7270
},
{
"epoch": 7.48,
"learning_rate": 1.3988808952837731e-05,
"loss": 0.3108,
"step": 7280
},
{
"epoch": 7.49,
"learning_rate": 1.3931711773438394e-05,
"loss": 0.2599,
"step": 7290
},
{
"epoch": 7.5,
"learning_rate": 1.3874614594039057e-05,
"loss": 0.2582,
"step": 7300
},
{
"epoch": 7.51,
"learning_rate": 1.3817517414639717e-05,
"loss": 0.3127,
"step": 7310
},
{
"epoch": 7.52,
"learning_rate": 1.3760420235240378e-05,
"loss": 0.3364,
"step": 7320
},
{
"epoch": 7.53,
"learning_rate": 1.3703323055841041e-05,
"loss": 0.314,
"step": 7330
},
{
"epoch": 7.54,
"learning_rate": 1.3646225876441704e-05,
"loss": 0.2049,
"step": 7340
},
{
"epoch": 7.55,
"learning_rate": 1.3589128697042367e-05,
"loss": 0.1941,
"step": 7350
},
{
"epoch": 7.56,
"learning_rate": 1.3532031517643029e-05,
"loss": 0.3185,
"step": 7360
},
{
"epoch": 7.57,
"learning_rate": 1.3474934338243692e-05,
"loss": 0.2818,
"step": 7370
},
{
"epoch": 7.58,
"learning_rate": 1.3417837158844355e-05,
"loss": 0.4293,
"step": 7380
},
{
"epoch": 7.6,
"learning_rate": 1.3360739979445016e-05,
"loss": 0.2607,
"step": 7390
},
{
"epoch": 7.61,
"learning_rate": 1.3303642800045679e-05,
"loss": 0.3304,
"step": 7400
},
{
"epoch": 7.62,
"learning_rate": 1.3246545620646342e-05,
"loss": 0.2465,
"step": 7410
},
{
"epoch": 7.63,
"learning_rate": 1.3189448441247005e-05,
"loss": 0.3255,
"step": 7420
},
{
"epoch": 7.64,
"learning_rate": 1.3132351261847665e-05,
"loss": 0.2682,
"step": 7430
},
{
"epoch": 7.65,
"learning_rate": 1.3075254082448326e-05,
"loss": 0.3535,
"step": 7440
},
{
"epoch": 7.66,
"learning_rate": 1.301815690304899e-05,
"loss": 0.3849,
"step": 7450
},
{
"epoch": 7.67,
"learning_rate": 1.2961059723649652e-05,
"loss": 0.3847,
"step": 7460
},
{
"epoch": 7.68,
"learning_rate": 1.2903962544250314e-05,
"loss": 0.3054,
"step": 7470
},
{
"epoch": 7.69,
"learning_rate": 1.2846865364850977e-05,
"loss": 0.3864,
"step": 7480
},
{
"epoch": 7.7,
"learning_rate": 1.278976818545164e-05,
"loss": 0.3128,
"step": 7490
},
{
"epoch": 7.71,
"learning_rate": 1.2732671006052303e-05,
"loss": 0.243,
"step": 7500
},
{
"epoch": 7.72,
"learning_rate": 1.2675573826652964e-05,
"loss": 0.2103,
"step": 7510
},
{
"epoch": 7.73,
"learning_rate": 1.2618476647253627e-05,
"loss": 0.2757,
"step": 7520
},
{
"epoch": 7.74,
"learning_rate": 1.256137946785429e-05,
"loss": 0.2814,
"step": 7530
},
{
"epoch": 7.75,
"learning_rate": 1.2504282288454952e-05,
"loss": 0.1796,
"step": 7540
},
{
"epoch": 7.76,
"learning_rate": 1.2447185109055613e-05,
"loss": 0.1797,
"step": 7550
},
{
"epoch": 7.77,
"learning_rate": 1.2390087929656276e-05,
"loss": 0.4496,
"step": 7560
},
{
"epoch": 7.78,
"learning_rate": 1.2332990750256939e-05,
"loss": 0.3054,
"step": 7570
},
{
"epoch": 7.79,
"learning_rate": 1.22758935708576e-05,
"loss": 0.3379,
"step": 7580
},
{
"epoch": 7.8,
"learning_rate": 1.2218796391458262e-05,
"loss": 0.2622,
"step": 7590
},
{
"epoch": 7.81,
"learning_rate": 1.2161699212058925e-05,
"loss": 0.2826,
"step": 7600
},
{
"epoch": 7.82,
"learning_rate": 1.2104602032659588e-05,
"loss": 0.403,
"step": 7610
},
{
"epoch": 7.83,
"learning_rate": 1.204750485326025e-05,
"loss": 0.3228,
"step": 7620
},
{
"epoch": 7.84,
"learning_rate": 1.1990407673860912e-05,
"loss": 0.2298,
"step": 7630
},
{
"epoch": 7.85,
"learning_rate": 1.1933310494461575e-05,
"loss": 0.337,
"step": 7640
},
{
"epoch": 7.86,
"learning_rate": 1.1876213315062237e-05,
"loss": 0.238,
"step": 7650
},
{
"epoch": 7.87,
"learning_rate": 1.1819116135662898e-05,
"loss": 0.3083,
"step": 7660
},
{
"epoch": 7.88,
"learning_rate": 1.1762018956263561e-05,
"loss": 0.355,
"step": 7670
},
{
"epoch": 7.89,
"learning_rate": 1.1704921776864224e-05,
"loss": 0.2095,
"step": 7680
},
{
"epoch": 7.9,
"learning_rate": 1.1647824597464885e-05,
"loss": 0.2722,
"step": 7690
},
{
"epoch": 7.91,
"learning_rate": 1.1590727418065548e-05,
"loss": 0.2877,
"step": 7700
},
{
"epoch": 7.92,
"learning_rate": 1.153363023866621e-05,
"loss": 0.4122,
"step": 7710
},
{
"epoch": 7.93,
"learning_rate": 1.1476533059266873e-05,
"loss": 0.2895,
"step": 7720
},
{
"epoch": 7.94,
"learning_rate": 1.1419435879867534e-05,
"loss": 0.2573,
"step": 7730
},
{
"epoch": 7.95,
"learning_rate": 1.1362338700468197e-05,
"loss": 0.282,
"step": 7740
},
{
"epoch": 7.97,
"learning_rate": 1.130524152106886e-05,
"loss": 0.2968,
"step": 7750
},
{
"epoch": 7.98,
"learning_rate": 1.1248144341669523e-05,
"loss": 0.4063,
"step": 7760
},
{
"epoch": 7.99,
"learning_rate": 1.1191047162270183e-05,
"loss": 0.32,
"step": 7770
},
{
"epoch": 8.0,
"learning_rate": 1.1133949982870846e-05,
"loss": 0.2617,
"step": 7780
},
{
"epoch": 8.0,
"eval_accuracy": 0.7611194402798601,
"eval_loss": 0.8758827447891235,
"eval_runtime": 75.9337,
"eval_samples_per_second": 52.704,
"eval_steps_per_second": 3.306,
"step": 7784
},
{
"epoch": 8.01,
"learning_rate": 1.1076852803471509e-05,
"loss": 0.3641,
"step": 7790
},
{
"epoch": 8.02,
"learning_rate": 1.1019755624072172e-05,
"loss": 0.178,
"step": 7800
},
{
"epoch": 8.03,
"learning_rate": 1.0962658444672834e-05,
"loss": 0.3221,
"step": 7810
},
{
"epoch": 8.04,
"learning_rate": 1.0905561265273497e-05,
"loss": 0.2942,
"step": 7820
},
{
"epoch": 8.05,
"learning_rate": 1.084846408587416e-05,
"loss": 0.1911,
"step": 7830
},
{
"epoch": 8.06,
"learning_rate": 1.0791366906474821e-05,
"loss": 0.3745,
"step": 7840
},
{
"epoch": 8.07,
"learning_rate": 1.0734269727075482e-05,
"loss": 0.2101,
"step": 7850
},
{
"epoch": 8.08,
"learning_rate": 1.0677172547676145e-05,
"loss": 0.1982,
"step": 7860
},
{
"epoch": 8.09,
"learning_rate": 1.0620075368276808e-05,
"loss": 0.3596,
"step": 7870
},
{
"epoch": 8.1,
"learning_rate": 1.056297818887747e-05,
"loss": 0.2472,
"step": 7880
},
{
"epoch": 8.11,
"learning_rate": 1.0505881009478133e-05,
"loss": 0.2363,
"step": 7890
},
{
"epoch": 8.12,
"learning_rate": 1.0448783830078794e-05,
"loss": 0.2522,
"step": 7900
},
{
"epoch": 8.13,
"learning_rate": 1.0391686650679457e-05,
"loss": 0.3441,
"step": 7910
},
{
"epoch": 8.14,
"learning_rate": 1.0334589471280119e-05,
"loss": 0.2186,
"step": 7920
},
{
"epoch": 8.15,
"learning_rate": 1.0277492291880782e-05,
"loss": 0.2811,
"step": 7930
},
{
"epoch": 8.16,
"learning_rate": 1.0220395112481445e-05,
"loss": 0.286,
"step": 7940
},
{
"epoch": 8.17,
"learning_rate": 1.0163297933082106e-05,
"loss": 0.3196,
"step": 7950
},
{
"epoch": 8.18,
"learning_rate": 1.0106200753682767e-05,
"loss": 0.2615,
"step": 7960
},
{
"epoch": 8.19,
"learning_rate": 1.004910357428343e-05,
"loss": 0.3869,
"step": 7970
},
{
"epoch": 8.2,
"learning_rate": 9.992006394884094e-06,
"loss": 0.1821,
"step": 7980
},
{
"epoch": 8.21,
"learning_rate": 9.934909215484755e-06,
"loss": 0.2395,
"step": 7990
},
{
"epoch": 8.22,
"learning_rate": 9.877812036085418e-06,
"loss": 0.1865,
"step": 8000
},
{
"epoch": 8.23,
"learning_rate": 9.820714856686081e-06,
"loss": 0.2793,
"step": 8010
},
{
"epoch": 8.24,
"learning_rate": 9.763617677286742e-06,
"loss": 0.254,
"step": 8020
},
{
"epoch": 8.25,
"learning_rate": 9.706520497887404e-06,
"loss": 0.2022,
"step": 8030
},
{
"epoch": 8.26,
"learning_rate": 9.649423318488067e-06,
"loss": 0.2775,
"step": 8040
},
{
"epoch": 8.27,
"learning_rate": 9.59232613908873e-06,
"loss": 0.1804,
"step": 8050
},
{
"epoch": 8.28,
"learning_rate": 9.535228959689393e-06,
"loss": 0.3502,
"step": 8060
},
{
"epoch": 8.29,
"learning_rate": 9.478131780290054e-06,
"loss": 0.3351,
"step": 8070
},
{
"epoch": 8.3,
"learning_rate": 9.421034600890716e-06,
"loss": 0.2204,
"step": 8080
},
{
"epoch": 8.31,
"learning_rate": 9.363937421491379e-06,
"loss": 0.338,
"step": 8090
},
{
"epoch": 8.32,
"learning_rate": 9.306840242092042e-06,
"loss": 0.1702,
"step": 8100
},
{
"epoch": 8.34,
"learning_rate": 9.249743062692703e-06,
"loss": 0.3316,
"step": 8110
},
{
"epoch": 8.35,
"learning_rate": 9.192645883293366e-06,
"loss": 0.1557,
"step": 8120
},
{
"epoch": 8.36,
"learning_rate": 9.135548703894029e-06,
"loss": 0.3038,
"step": 8130
},
{
"epoch": 8.37,
"learning_rate": 9.07845152449469e-06,
"loss": 0.2708,
"step": 8140
},
{
"epoch": 8.38,
"learning_rate": 9.021354345095352e-06,
"loss": 0.3314,
"step": 8150
},
{
"epoch": 8.39,
"learning_rate": 8.964257165696015e-06,
"loss": 0.259,
"step": 8160
},
{
"epoch": 8.4,
"learning_rate": 8.907159986296678e-06,
"loss": 0.2734,
"step": 8170
},
{
"epoch": 8.41,
"learning_rate": 8.85006280689734e-06,
"loss": 0.3141,
"step": 8180
},
{
"epoch": 8.42,
"learning_rate": 8.792965627498002e-06,
"loss": 0.3305,
"step": 8190
},
{
"epoch": 8.43,
"learning_rate": 8.735868448098665e-06,
"loss": 0.3442,
"step": 8200
},
{
"epoch": 8.44,
"learning_rate": 8.678771268699327e-06,
"loss": 0.1473,
"step": 8210
},
{
"epoch": 8.45,
"learning_rate": 8.621674089299988e-06,
"loss": 0.215,
"step": 8220
},
{
"epoch": 8.46,
"learning_rate": 8.564576909900651e-06,
"loss": 0.3342,
"step": 8230
},
{
"epoch": 8.47,
"learning_rate": 8.507479730501314e-06,
"loss": 0.2517,
"step": 8240
},
{
"epoch": 8.48,
"learning_rate": 8.450382551101975e-06,
"loss": 0.2465,
"step": 8250
},
{
"epoch": 8.49,
"learning_rate": 8.393285371702639e-06,
"loss": 0.2631,
"step": 8260
},
{
"epoch": 8.5,
"learning_rate": 8.3361881923033e-06,
"loss": 0.2456,
"step": 8270
},
{
"epoch": 8.51,
"learning_rate": 8.279091012903963e-06,
"loss": 0.1236,
"step": 8280
},
{
"epoch": 8.52,
"learning_rate": 8.221993833504624e-06,
"loss": 0.1322,
"step": 8290
},
{
"epoch": 8.53,
"learning_rate": 8.164896654105287e-06,
"loss": 0.3307,
"step": 8300
},
{
"epoch": 8.54,
"learning_rate": 8.10779947470595e-06,
"loss": 0.2435,
"step": 8310
},
{
"epoch": 8.55,
"learning_rate": 8.050702295306613e-06,
"loss": 0.2027,
"step": 8320
},
{
"epoch": 8.56,
"learning_rate": 7.993605115907273e-06,
"loss": 0.3071,
"step": 8330
},
{
"epoch": 8.57,
"learning_rate": 7.936507936507936e-06,
"loss": 0.2156,
"step": 8340
},
{
"epoch": 8.58,
"learning_rate": 7.8794107571086e-06,
"loss": 0.2249,
"step": 8350
},
{
"epoch": 8.59,
"learning_rate": 7.822313577709262e-06,
"loss": 0.2273,
"step": 8360
},
{
"epoch": 8.6,
"learning_rate": 7.765216398309924e-06,
"loss": 0.2881,
"step": 8370
},
{
"epoch": 8.61,
"learning_rate": 7.708119218910587e-06,
"loss": 0.2833,
"step": 8380
},
{
"epoch": 8.62,
"learning_rate": 7.65102203951125e-06,
"loss": 0.2244,
"step": 8390
},
{
"epoch": 8.63,
"learning_rate": 7.59392486011191e-06,
"loss": 0.3647,
"step": 8400
},
{
"epoch": 8.64,
"learning_rate": 7.536827680712573e-06,
"loss": 0.2832,
"step": 8410
},
{
"epoch": 8.65,
"learning_rate": 7.4797305013132355e-06,
"loss": 0.2852,
"step": 8420
},
{
"epoch": 8.66,
"learning_rate": 7.422633321913898e-06,
"loss": 0.1909,
"step": 8430
},
{
"epoch": 8.67,
"learning_rate": 7.365536142514561e-06,
"loss": 0.1812,
"step": 8440
},
{
"epoch": 8.68,
"learning_rate": 7.308438963115223e-06,
"loss": 0.1603,
"step": 8450
},
{
"epoch": 8.69,
"learning_rate": 7.251341783715884e-06,
"loss": 0.3101,
"step": 8460
},
{
"epoch": 8.71,
"learning_rate": 7.1942446043165465e-06,
"loss": 0.2295,
"step": 8470
},
{
"epoch": 8.72,
"learning_rate": 7.1371474249172095e-06,
"loss": 0.1954,
"step": 8480
},
{
"epoch": 8.73,
"learning_rate": 7.080050245517872e-06,
"loss": 0.3214,
"step": 8490
},
{
"epoch": 8.74,
"learning_rate": 7.022953066118535e-06,
"loss": 0.2829,
"step": 8500
},
{
"epoch": 8.75,
"learning_rate": 6.965855886719197e-06,
"loss": 0.195,
"step": 8510
},
{
"epoch": 8.76,
"learning_rate": 6.908758707319858e-06,
"loss": 0.2034,
"step": 8520
},
{
"epoch": 8.77,
"learning_rate": 6.8516615279205205e-06,
"loss": 0.302,
"step": 8530
},
{
"epoch": 8.78,
"learning_rate": 6.794564348521184e-06,
"loss": 0.179,
"step": 8540
},
{
"epoch": 8.79,
"learning_rate": 6.737467169121846e-06,
"loss": 0.2493,
"step": 8550
},
{
"epoch": 8.8,
"learning_rate": 6.680369989722508e-06,
"loss": 0.2454,
"step": 8560
},
{
"epoch": 8.81,
"learning_rate": 6.623272810323171e-06,
"loss": 0.3168,
"step": 8570
},
{
"epoch": 8.82,
"learning_rate": 6.566175630923832e-06,
"loss": 0.3508,
"step": 8580
},
{
"epoch": 8.83,
"learning_rate": 6.509078451524495e-06,
"loss": 0.2957,
"step": 8590
},
{
"epoch": 8.84,
"learning_rate": 6.451981272125157e-06,
"loss": 0.2076,
"step": 8600
},
{
"epoch": 8.85,
"learning_rate": 6.39488409272582e-06,
"loss": 0.2547,
"step": 8610
},
{
"epoch": 8.86,
"learning_rate": 6.337786913326482e-06,
"loss": 0.309,
"step": 8620
},
{
"epoch": 8.87,
"learning_rate": 6.280689733927145e-06,
"loss": 0.1687,
"step": 8630
},
{
"epoch": 8.88,
"learning_rate": 6.2235925545278065e-06,
"loss": 0.3392,
"step": 8640
},
{
"epoch": 8.89,
"learning_rate": 6.1664953751284695e-06,
"loss": 0.2131,
"step": 8650
},
{
"epoch": 8.9,
"learning_rate": 6.109398195729131e-06,
"loss": 0.1998,
"step": 8660
},
{
"epoch": 8.91,
"learning_rate": 6.052301016329794e-06,
"loss": 0.3418,
"step": 8670
},
{
"epoch": 8.92,
"learning_rate": 5.995203836930456e-06,
"loss": 0.3297,
"step": 8680
},
{
"epoch": 8.93,
"learning_rate": 5.938106657531118e-06,
"loss": 0.2749,
"step": 8690
},
{
"epoch": 8.94,
"learning_rate": 5.8810094781317805e-06,
"loss": 0.2044,
"step": 8700
},
{
"epoch": 8.95,
"learning_rate": 5.823912298732443e-06,
"loss": 0.2422,
"step": 8710
},
{
"epoch": 8.96,
"learning_rate": 5.766815119333105e-06,
"loss": 0.2068,
"step": 8720
},
{
"epoch": 8.97,
"learning_rate": 5.709717939933767e-06,
"loss": 0.2952,
"step": 8730
},
{
"epoch": 8.98,
"learning_rate": 5.65262076053443e-06,
"loss": 0.3472,
"step": 8740
},
{
"epoch": 8.99,
"learning_rate": 5.5955235811350915e-06,
"loss": 0.2058,
"step": 8750
},
{
"epoch": 9.0,
"eval_accuracy": 0.764367816091954,
"eval_loss": 0.88155198097229,
"eval_runtime": 79.0277,
"eval_samples_per_second": 50.64,
"eval_steps_per_second": 3.176,
"step": 8757
},
{
"epoch": 9.0,
"learning_rate": 5.5384264017357546e-06,
"loss": 0.2606,
"step": 8760
},
{
"epoch": 9.01,
"learning_rate": 5.481329222336417e-06,
"loss": 0.1715,
"step": 8770
},
{
"epoch": 9.02,
"learning_rate": 5.42423204293708e-06,
"loss": 0.195,
"step": 8780
},
{
"epoch": 9.03,
"learning_rate": 5.367134863537741e-06,
"loss": 0.1734,
"step": 8790
},
{
"epoch": 9.04,
"learning_rate": 5.310037684138404e-06,
"loss": 0.3185,
"step": 8800
},
{
"epoch": 9.05,
"learning_rate": 5.252940504739066e-06,
"loss": 0.3128,
"step": 8810
},
{
"epoch": 9.06,
"learning_rate": 5.195843325339729e-06,
"loss": 0.2999,
"step": 8820
},
{
"epoch": 9.08,
"learning_rate": 5.138746145940391e-06,
"loss": 0.2472,
"step": 8830
},
{
"epoch": 9.09,
"learning_rate": 5.081648966541053e-06,
"loss": 0.2307,
"step": 8840
},
{
"epoch": 9.1,
"learning_rate": 5.024551787141715e-06,
"loss": 0.2071,
"step": 8850
},
{
"epoch": 9.11,
"learning_rate": 4.9674546077423774e-06,
"loss": 0.1363,
"step": 8860
},
{
"epoch": 9.12,
"learning_rate": 4.9103574283430405e-06,
"loss": 0.2243,
"step": 8870
},
{
"epoch": 9.13,
"learning_rate": 4.853260248943702e-06,
"loss": 0.2443,
"step": 8880
},
{
"epoch": 9.14,
"learning_rate": 4.796163069544365e-06,
"loss": 0.1662,
"step": 8890
},
{
"epoch": 9.15,
"learning_rate": 4.739065890145027e-06,
"loss": 0.1759,
"step": 8900
},
{
"epoch": 9.16,
"learning_rate": 4.681968710745689e-06,
"loss": 0.2733,
"step": 8910
},
{
"epoch": 9.17,
"learning_rate": 4.6248715313463515e-06,
"loss": 0.2764,
"step": 8920
},
{
"epoch": 9.18,
"learning_rate": 4.5677743519470145e-06,
"loss": 0.173,
"step": 8930
},
{
"epoch": 9.19,
"learning_rate": 4.510677172547676e-06,
"loss": 0.1549,
"step": 8940
},
{
"epoch": 9.2,
"learning_rate": 4.453579993148339e-06,
"loss": 0.2386,
"step": 8950
},
{
"epoch": 9.21,
"learning_rate": 4.396482813749001e-06,
"loss": 0.1738,
"step": 8960
},
{
"epoch": 9.22,
"learning_rate": 4.339385634349663e-06,
"loss": 0.2113,
"step": 8970
},
{
"epoch": 9.23,
"learning_rate": 4.2822884549503255e-06,
"loss": 0.2004,
"step": 8980
},
{
"epoch": 9.24,
"learning_rate": 4.225191275550988e-06,
"loss": 0.1507,
"step": 8990
},
{
"epoch": 9.25,
"learning_rate": 4.16809409615165e-06,
"loss": 0.1872,
"step": 9000
},
{
"epoch": 9.26,
"learning_rate": 4.110996916752312e-06,
"loss": 0.1939,
"step": 9010
},
{
"epoch": 9.27,
"learning_rate": 4.053899737352975e-06,
"loss": 0.1814,
"step": 9020
},
{
"epoch": 9.28,
"learning_rate": 3.9968025579536366e-06,
"loss": 0.1386,
"step": 9030
},
{
"epoch": 9.29,
"learning_rate": 3.9397053785543e-06,
"loss": 0.2234,
"step": 9040
},
{
"epoch": 9.3,
"learning_rate": 3.882608199154962e-06,
"loss": 0.2506,
"step": 9050
},
{
"epoch": 9.31,
"learning_rate": 3.825511019755625e-06,
"loss": 0.1923,
"step": 9060
},
{
"epoch": 9.32,
"learning_rate": 3.7684138403562866e-06,
"loss": 0.2455,
"step": 9070
},
{
"epoch": 9.33,
"learning_rate": 3.711316660956949e-06,
"loss": 0.2709,
"step": 9080
},
{
"epoch": 9.34,
"learning_rate": 3.6542194815576115e-06,
"loss": 0.1799,
"step": 9090
},
{
"epoch": 9.35,
"learning_rate": 3.5971223021582732e-06,
"loss": 0.1779,
"step": 9100
},
{
"epoch": 9.36,
"learning_rate": 3.540025122758936e-06,
"loss": 0.1892,
"step": 9110
},
{
"epoch": 9.37,
"learning_rate": 3.4829279433595985e-06,
"loss": 0.2063,
"step": 9120
},
{
"epoch": 9.38,
"learning_rate": 3.4258307639602603e-06,
"loss": 0.263,
"step": 9130
},
{
"epoch": 9.39,
"learning_rate": 3.368733584560923e-06,
"loss": 0.2038,
"step": 9140
},
{
"epoch": 9.4,
"learning_rate": 3.3116364051615855e-06,
"loss": 0.2361,
"step": 9150
},
{
"epoch": 9.41,
"learning_rate": 3.2545392257622473e-06,
"loss": 0.2218,
"step": 9160
},
{
"epoch": 9.42,
"learning_rate": 3.19744204636291e-06,
"loss": 0.2983,
"step": 9170
},
{
"epoch": 9.43,
"learning_rate": 3.1403448669635725e-06,
"loss": 0.2649,
"step": 9180
},
{
"epoch": 9.45,
"learning_rate": 3.0832476875642348e-06,
"loss": 0.3045,
"step": 9190
},
{
"epoch": 9.46,
"learning_rate": 3.026150508164897e-06,
"loss": 0.2163,
"step": 9200
},
{
"epoch": 9.47,
"learning_rate": 2.969053328765559e-06,
"loss": 0.174,
"step": 9210
},
{
"epoch": 9.48,
"learning_rate": 2.9119561493662214e-06,
"loss": 0.1959,
"step": 9220
},
{
"epoch": 9.49,
"learning_rate": 2.8548589699668836e-06,
"loss": 0.3003,
"step": 9230
},
{
"epoch": 9.5,
"learning_rate": 2.7977617905675458e-06,
"loss": 0.1453,
"step": 9240
},
{
"epoch": 9.51,
"learning_rate": 2.7406646111682084e-06,
"loss": 0.2132,
"step": 9250
},
{
"epoch": 9.52,
"learning_rate": 2.6835674317688706e-06,
"loss": 0.2701,
"step": 9260
},
{
"epoch": 9.53,
"learning_rate": 2.626470252369533e-06,
"loss": 0.201,
"step": 9270
},
{
"epoch": 9.54,
"learning_rate": 2.5693730729701954e-06,
"loss": 0.2415,
"step": 9280
},
{
"epoch": 9.55,
"learning_rate": 2.5122758935708576e-06,
"loss": 0.3035,
"step": 9290
},
{
"epoch": 9.56,
"learning_rate": 2.4551787141715202e-06,
"loss": 0.1869,
"step": 9300
},
{
"epoch": 9.57,
"learning_rate": 2.3980815347721824e-06,
"loss": 0.1831,
"step": 9310
},
{
"epoch": 9.58,
"learning_rate": 2.3409843553728446e-06,
"loss": 0.3404,
"step": 9320
},
{
"epoch": 9.59,
"learning_rate": 2.2838871759735073e-06,
"loss": 0.2532,
"step": 9330
},
{
"epoch": 9.6,
"learning_rate": 2.2267899965741695e-06,
"loss": 0.1948,
"step": 9340
},
{
"epoch": 9.61,
"learning_rate": 2.1696928171748317e-06,
"loss": 0.2346,
"step": 9350
},
{
"epoch": 9.62,
"learning_rate": 2.112595637775494e-06,
"loss": 0.2772,
"step": 9360
},
{
"epoch": 9.63,
"learning_rate": 2.055498458376156e-06,
"loss": 0.2794,
"step": 9370
},
{
"epoch": 9.64,
"learning_rate": 1.9984012789768183e-06,
"loss": 0.2529,
"step": 9380
},
{
"epoch": 9.65,
"learning_rate": 1.941304099577481e-06,
"loss": 0.1766,
"step": 9390
},
{
"epoch": 9.66,
"learning_rate": 1.8842069201781433e-06,
"loss": 0.263,
"step": 9400
},
{
"epoch": 9.67,
"learning_rate": 1.8271097407788057e-06,
"loss": 0.2428,
"step": 9410
},
{
"epoch": 9.68,
"learning_rate": 1.770012561379468e-06,
"loss": 0.2133,
"step": 9420
},
{
"epoch": 9.69,
"learning_rate": 1.7129153819801301e-06,
"loss": 0.1734,
"step": 9430
},
{
"epoch": 9.7,
"learning_rate": 1.6558182025807928e-06,
"loss": 0.2164,
"step": 9440
},
{
"epoch": 9.71,
"learning_rate": 1.598721023181455e-06,
"loss": 0.2505,
"step": 9450
},
{
"epoch": 9.72,
"learning_rate": 1.5416238437821174e-06,
"loss": 0.2556,
"step": 9460
},
{
"epoch": 9.73,
"learning_rate": 1.4845266643827796e-06,
"loss": 0.3275,
"step": 9470
},
{
"epoch": 9.74,
"learning_rate": 1.4274294849834418e-06,
"loss": 0.2799,
"step": 9480
},
{
"epoch": 9.75,
"learning_rate": 1.3703323055841042e-06,
"loss": 0.2848,
"step": 9490
},
{
"epoch": 9.76,
"learning_rate": 1.3132351261847666e-06,
"loss": 0.175,
"step": 9500
},
{
"epoch": 9.77,
"learning_rate": 1.2561379467854288e-06,
"loss": 0.2494,
"step": 9510
},
{
"epoch": 9.78,
"learning_rate": 1.1990407673860912e-06,
"loss": 0.1938,
"step": 9520
},
{
"epoch": 9.79,
"learning_rate": 1.1419435879867536e-06,
"loss": 0.2322,
"step": 9530
},
{
"epoch": 9.8,
"learning_rate": 1.0848464085874158e-06,
"loss": 0.1531,
"step": 9540
},
{
"epoch": 9.82,
"learning_rate": 1.027749229188078e-06,
"loss": 0.2093,
"step": 9550
},
{
"epoch": 9.83,
"learning_rate": 9.706520497887405e-07,
"loss": 0.2247,
"step": 9560
},
{
"epoch": 9.84,
"learning_rate": 9.135548703894029e-07,
"loss": 0.1895,
"step": 9570
},
{
"epoch": 9.85,
"learning_rate": 8.564576909900651e-07,
"loss": 0.1653,
"step": 9580
},
{
"epoch": 9.86,
"learning_rate": 7.993605115907275e-07,
"loss": 0.2792,
"step": 9590
},
{
"epoch": 9.87,
"learning_rate": 7.422633321913898e-07,
"loss": 0.2381,
"step": 9600
},
{
"epoch": 9.88,
"learning_rate": 6.851661527920521e-07,
"loss": 0.2989,
"step": 9610
},
{
"epoch": 9.89,
"learning_rate": 6.280689733927144e-07,
"loss": 0.1886,
"step": 9620
},
{
"epoch": 9.9,
"learning_rate": 5.709717939933768e-07,
"loss": 0.1844,
"step": 9630
},
{
"epoch": 9.91,
"learning_rate": 5.13874614594039e-07,
"loss": 0.3018,
"step": 9640
},
{
"epoch": 9.92,
"learning_rate": 4.5677743519470143e-07,
"loss": 0.2131,
"step": 9650
},
{
"epoch": 9.93,
"learning_rate": 3.9968025579536374e-07,
"loss": 0.2389,
"step": 9660
},
{
"epoch": 9.94,
"learning_rate": 3.4258307639602605e-07,
"loss": 0.2018,
"step": 9670
},
{
"epoch": 9.95,
"learning_rate": 2.854858969966884e-07,
"loss": 0.2221,
"step": 9680
},
{
"epoch": 9.96,
"learning_rate": 2.2838871759735072e-07,
"loss": 0.1782,
"step": 9690
},
{
"epoch": 9.97,
"learning_rate": 1.7129153819801302e-07,
"loss": 0.16,
"step": 9700
},
{
"epoch": 9.98,
"learning_rate": 1.1419435879867536e-07,
"loss": 0.2135,
"step": 9710
},
{
"epoch": 9.99,
"learning_rate": 5.709717939933768e-08,
"loss": 0.1635,
"step": 9720
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.1797,
"step": 9730
},
{
"epoch": 10.0,
"eval_accuracy": 0.7673663168415792,
"eval_loss": 0.8717091679573059,
"eval_runtime": 77.3108,
"eval_samples_per_second": 51.765,
"eval_steps_per_second": 3.247,
"step": 9730
},
{
"epoch": 10.0,
"step": 9730,
"total_flos": 1.232466345637632e+19,
"train_loss": 0.6940609043573182,
"train_runtime": 7099.3654,
"train_samples_per_second": 21.913,
"train_steps_per_second": 1.371
}
],
"logging_steps": 10,
"max_steps": 9730,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.232466345637632e+19,
"trial_name": null,
"trial_params": null
}