wav2vec2-present / trainer_state.json
MatsRooth's picture
End of training
e12b174 verified
raw
history blame
190 kB
{
"best_metric": 0.985280588776449,
"best_model_checkpoint": "wav2vec2-present/checkpoint-15390",
"epoch": 9.996752192270218,
"eval_steps": 500,
"global_step": 15390,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9493177387914228e-07,
"loss": 2.564,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.8986354775828457e-07,
"loss": 2.558,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 5.847953216374269e-07,
"loss": 2.5645,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 7.797270955165691e-07,
"loss": 2.568,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 9.746588693957115e-07,
"loss": 2.5642,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 1.1695906432748538e-06,
"loss": 2.5622,
"step": 60
},
{
"epoch": 0.05,
"learning_rate": 1.364522417153996e-06,
"loss": 2.5609,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 1.5594541910331383e-06,
"loss": 2.5598,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 1.7543859649122807e-06,
"loss": 2.552,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 1.949317738791423e-06,
"loss": 2.5565,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 2.1442495126705653e-06,
"loss": 2.5584,
"step": 110
},
{
"epoch": 0.08,
"learning_rate": 2.3391812865497075e-06,
"loss": 2.5492,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 2.5341130604288498e-06,
"loss": 2.5532,
"step": 130
},
{
"epoch": 0.09,
"learning_rate": 2.729044834307992e-06,
"loss": 2.5497,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 2.9239766081871343e-06,
"loss": 2.549,
"step": 150
},
{
"epoch": 0.1,
"learning_rate": 3.1189083820662765e-06,
"loss": 2.546,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 3.313840155945419e-06,
"loss": 2.5487,
"step": 170
},
{
"epoch": 0.12,
"learning_rate": 3.5087719298245615e-06,
"loss": 2.5346,
"step": 180
},
{
"epoch": 0.12,
"learning_rate": 3.7037037037037037e-06,
"loss": 2.5267,
"step": 190
},
{
"epoch": 0.13,
"learning_rate": 3.898635477582846e-06,
"loss": 2.529,
"step": 200
},
{
"epoch": 0.14,
"learning_rate": 4.093567251461988e-06,
"loss": 2.5306,
"step": 210
},
{
"epoch": 0.14,
"learning_rate": 4.2884990253411305e-06,
"loss": 2.5213,
"step": 220
},
{
"epoch": 0.15,
"learning_rate": 4.483430799220272e-06,
"loss": 2.512,
"step": 230
},
{
"epoch": 0.16,
"learning_rate": 4.678362573099415e-06,
"loss": 2.5016,
"step": 240
},
{
"epoch": 0.16,
"learning_rate": 4.873294346978558e-06,
"loss": 2.5006,
"step": 250
},
{
"epoch": 0.17,
"learning_rate": 5.0682261208576995e-06,
"loss": 2.5025,
"step": 260
},
{
"epoch": 0.18,
"learning_rate": 5.263157894736842e-06,
"loss": 2.498,
"step": 270
},
{
"epoch": 0.18,
"learning_rate": 5.458089668615984e-06,
"loss": 2.4611,
"step": 280
},
{
"epoch": 0.19,
"learning_rate": 5.653021442495127e-06,
"loss": 2.4517,
"step": 290
},
{
"epoch": 0.19,
"learning_rate": 5.8479532163742686e-06,
"loss": 2.4287,
"step": 300
},
{
"epoch": 0.2,
"learning_rate": 6.042884990253411e-06,
"loss": 2.4189,
"step": 310
},
{
"epoch": 0.21,
"learning_rate": 6.237816764132553e-06,
"loss": 2.3796,
"step": 320
},
{
"epoch": 0.21,
"learning_rate": 6.432748538011696e-06,
"loss": 2.3646,
"step": 330
},
{
"epoch": 0.22,
"learning_rate": 6.627680311890838e-06,
"loss": 2.3719,
"step": 340
},
{
"epoch": 0.23,
"learning_rate": 6.82261208576998e-06,
"loss": 2.3398,
"step": 350
},
{
"epoch": 0.23,
"learning_rate": 7.017543859649123e-06,
"loss": 2.2933,
"step": 360
},
{
"epoch": 0.24,
"learning_rate": 7.212475633528265e-06,
"loss": 2.3268,
"step": 370
},
{
"epoch": 0.25,
"learning_rate": 7.4074074074074075e-06,
"loss": 2.2196,
"step": 380
},
{
"epoch": 0.25,
"learning_rate": 7.602339181286549e-06,
"loss": 2.1905,
"step": 390
},
{
"epoch": 0.26,
"learning_rate": 7.797270955165692e-06,
"loss": 2.2123,
"step": 400
},
{
"epoch": 0.27,
"learning_rate": 7.992202729044834e-06,
"loss": 2.1655,
"step": 410
},
{
"epoch": 0.27,
"learning_rate": 8.187134502923976e-06,
"loss": 2.1171,
"step": 420
},
{
"epoch": 0.28,
"learning_rate": 8.38206627680312e-06,
"loss": 2.056,
"step": 430
},
{
"epoch": 0.29,
"learning_rate": 8.576998050682261e-06,
"loss": 2.0953,
"step": 440
},
{
"epoch": 0.29,
"learning_rate": 8.771929824561403e-06,
"loss": 2.0215,
"step": 450
},
{
"epoch": 0.3,
"learning_rate": 8.966861598440545e-06,
"loss": 1.9845,
"step": 460
},
{
"epoch": 0.31,
"learning_rate": 9.161793372319688e-06,
"loss": 1.9909,
"step": 470
},
{
"epoch": 0.31,
"learning_rate": 9.35672514619883e-06,
"loss": 1.9478,
"step": 480
},
{
"epoch": 0.32,
"learning_rate": 9.551656920077972e-06,
"loss": 1.9098,
"step": 490
},
{
"epoch": 0.32,
"learning_rate": 9.746588693957115e-06,
"loss": 1.8296,
"step": 500
},
{
"epoch": 0.33,
"learning_rate": 9.941520467836257e-06,
"loss": 1.7486,
"step": 510
},
{
"epoch": 0.34,
"learning_rate": 1.0136452241715399e-05,
"loss": 1.7578,
"step": 520
},
{
"epoch": 0.34,
"learning_rate": 1.0331384015594541e-05,
"loss": 1.708,
"step": 530
},
{
"epoch": 0.35,
"learning_rate": 1.0526315789473684e-05,
"loss": 1.5767,
"step": 540
},
{
"epoch": 0.36,
"learning_rate": 1.0721247563352826e-05,
"loss": 1.7068,
"step": 550
},
{
"epoch": 0.36,
"learning_rate": 1.0916179337231968e-05,
"loss": 1.6142,
"step": 560
},
{
"epoch": 0.37,
"learning_rate": 1.111111111111111e-05,
"loss": 1.584,
"step": 570
},
{
"epoch": 0.38,
"learning_rate": 1.1306042884990253e-05,
"loss": 1.523,
"step": 580
},
{
"epoch": 0.38,
"learning_rate": 1.1500974658869395e-05,
"loss": 1.4793,
"step": 590
},
{
"epoch": 0.39,
"learning_rate": 1.1695906432748537e-05,
"loss": 1.4863,
"step": 600
},
{
"epoch": 0.4,
"learning_rate": 1.1871345029239766e-05,
"loss": 1.4778,
"step": 610
},
{
"epoch": 0.4,
"learning_rate": 1.2066276803118908e-05,
"loss": 1.3872,
"step": 620
},
{
"epoch": 0.41,
"learning_rate": 1.226120857699805e-05,
"loss": 1.412,
"step": 630
},
{
"epoch": 0.42,
"learning_rate": 1.2456140350877193e-05,
"loss": 1.4054,
"step": 640
},
{
"epoch": 0.42,
"learning_rate": 1.2651072124756335e-05,
"loss": 1.4,
"step": 650
},
{
"epoch": 0.43,
"learning_rate": 1.2846003898635477e-05,
"loss": 1.2976,
"step": 660
},
{
"epoch": 0.44,
"learning_rate": 1.3040935672514619e-05,
"loss": 1.3261,
"step": 670
},
{
"epoch": 0.44,
"learning_rate": 1.3235867446393762e-05,
"loss": 1.2583,
"step": 680
},
{
"epoch": 0.45,
"learning_rate": 1.3430799220272904e-05,
"loss": 1.3031,
"step": 690
},
{
"epoch": 0.45,
"learning_rate": 1.3625730994152046e-05,
"loss": 1.1888,
"step": 700
},
{
"epoch": 0.46,
"learning_rate": 1.3820662768031188e-05,
"loss": 1.2747,
"step": 710
},
{
"epoch": 0.47,
"learning_rate": 1.4015594541910331e-05,
"loss": 1.1909,
"step": 720
},
{
"epoch": 0.47,
"learning_rate": 1.4210526315789473e-05,
"loss": 1.1954,
"step": 730
},
{
"epoch": 0.48,
"learning_rate": 1.4405458089668615e-05,
"loss": 1.1938,
"step": 740
},
{
"epoch": 0.49,
"learning_rate": 1.4600389863547759e-05,
"loss": 1.1446,
"step": 750
},
{
"epoch": 0.49,
"learning_rate": 1.47953216374269e-05,
"loss": 1.0875,
"step": 760
},
{
"epoch": 0.5,
"learning_rate": 1.4990253411306042e-05,
"loss": 1.1029,
"step": 770
},
{
"epoch": 0.51,
"learning_rate": 1.5185185185185186e-05,
"loss": 0.9905,
"step": 780
},
{
"epoch": 0.51,
"learning_rate": 1.538011695906433e-05,
"loss": 1.175,
"step": 790
},
{
"epoch": 0.52,
"learning_rate": 1.5555555555555555e-05,
"loss": 1.0142,
"step": 800
},
{
"epoch": 0.53,
"learning_rate": 1.5750487329434697e-05,
"loss": 1.054,
"step": 810
},
{
"epoch": 0.53,
"learning_rate": 1.594541910331384e-05,
"loss": 1.019,
"step": 820
},
{
"epoch": 0.54,
"learning_rate": 1.614035087719298e-05,
"loss": 0.98,
"step": 830
},
{
"epoch": 0.55,
"learning_rate": 1.6335282651072126e-05,
"loss": 0.8901,
"step": 840
},
{
"epoch": 0.55,
"learning_rate": 1.6530214424951268e-05,
"loss": 0.9091,
"step": 850
},
{
"epoch": 0.56,
"learning_rate": 1.672514619883041e-05,
"loss": 0.8969,
"step": 860
},
{
"epoch": 0.57,
"learning_rate": 1.692007797270955e-05,
"loss": 0.8246,
"step": 870
},
{
"epoch": 0.57,
"learning_rate": 1.7115009746588693e-05,
"loss": 1.0441,
"step": 880
},
{
"epoch": 0.58,
"learning_rate": 1.7309941520467835e-05,
"loss": 0.8832,
"step": 890
},
{
"epoch": 0.58,
"learning_rate": 1.7504873294346977e-05,
"loss": 0.8898,
"step": 900
},
{
"epoch": 0.59,
"learning_rate": 1.7699805068226122e-05,
"loss": 0.8001,
"step": 910
},
{
"epoch": 0.6,
"learning_rate": 1.7894736842105264e-05,
"loss": 0.7232,
"step": 920
},
{
"epoch": 0.6,
"learning_rate": 1.8089668615984406e-05,
"loss": 0.8311,
"step": 930
},
{
"epoch": 0.61,
"learning_rate": 1.8284600389863547e-05,
"loss": 0.7449,
"step": 940
},
{
"epoch": 0.62,
"learning_rate": 1.847953216374269e-05,
"loss": 0.8162,
"step": 950
},
{
"epoch": 0.62,
"learning_rate": 1.867446393762183e-05,
"loss": 0.7072,
"step": 960
},
{
"epoch": 0.63,
"learning_rate": 1.8869395711500973e-05,
"loss": 0.7233,
"step": 970
},
{
"epoch": 0.64,
"learning_rate": 1.9064327485380115e-05,
"loss": 0.8823,
"step": 980
},
{
"epoch": 0.64,
"learning_rate": 1.925925925925926e-05,
"loss": 0.71,
"step": 990
},
{
"epoch": 0.65,
"learning_rate": 1.9454191033138402e-05,
"loss": 0.6826,
"step": 1000
},
{
"epoch": 0.66,
"learning_rate": 1.9649122807017544e-05,
"loss": 0.7723,
"step": 1010
},
{
"epoch": 0.66,
"learning_rate": 1.9844054580896685e-05,
"loss": 0.7333,
"step": 1020
},
{
"epoch": 0.67,
"learning_rate": 2.0038986354775827e-05,
"loss": 0.6399,
"step": 1030
},
{
"epoch": 0.68,
"learning_rate": 2.023391812865497e-05,
"loss": 0.5773,
"step": 1040
},
{
"epoch": 0.68,
"learning_rate": 2.042884990253411e-05,
"loss": 0.5524,
"step": 1050
},
{
"epoch": 0.69,
"learning_rate": 2.0623781676413256e-05,
"loss": 0.6107,
"step": 1060
},
{
"epoch": 0.7,
"learning_rate": 2.0818713450292398e-05,
"loss": 0.515,
"step": 1070
},
{
"epoch": 0.7,
"learning_rate": 2.101364522417154e-05,
"loss": 0.5981,
"step": 1080
},
{
"epoch": 0.71,
"learning_rate": 2.120857699805068e-05,
"loss": 0.6844,
"step": 1090
},
{
"epoch": 0.71,
"learning_rate": 2.1403508771929824e-05,
"loss": 0.4908,
"step": 1100
},
{
"epoch": 0.72,
"learning_rate": 2.1598440545808965e-05,
"loss": 0.5733,
"step": 1110
},
{
"epoch": 0.73,
"learning_rate": 2.1793372319688107e-05,
"loss": 0.4578,
"step": 1120
},
{
"epoch": 0.73,
"learning_rate": 2.1988304093567252e-05,
"loss": 0.4443,
"step": 1130
},
{
"epoch": 0.74,
"learning_rate": 2.2183235867446394e-05,
"loss": 0.4854,
"step": 1140
},
{
"epoch": 0.75,
"learning_rate": 2.2378167641325536e-05,
"loss": 0.4891,
"step": 1150
},
{
"epoch": 0.75,
"learning_rate": 2.2573099415204678e-05,
"loss": 0.4287,
"step": 1160
},
{
"epoch": 0.76,
"learning_rate": 2.2748538011695907e-05,
"loss": 0.7449,
"step": 1170
},
{
"epoch": 0.77,
"learning_rate": 2.294346978557505e-05,
"loss": 0.4233,
"step": 1180
},
{
"epoch": 0.77,
"learning_rate": 2.313840155945419e-05,
"loss": 0.749,
"step": 1190
},
{
"epoch": 0.78,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.4584,
"step": 1200
},
{
"epoch": 0.79,
"learning_rate": 2.3528265107212478e-05,
"loss": 0.379,
"step": 1210
},
{
"epoch": 0.79,
"learning_rate": 2.372319688109162e-05,
"loss": 0.5038,
"step": 1220
},
{
"epoch": 0.8,
"learning_rate": 2.391812865497076e-05,
"loss": 0.4659,
"step": 1230
},
{
"epoch": 0.81,
"learning_rate": 2.4113060428849903e-05,
"loss": 0.5737,
"step": 1240
},
{
"epoch": 0.81,
"learning_rate": 2.4307992202729045e-05,
"loss": 0.4579,
"step": 1250
},
{
"epoch": 0.82,
"learning_rate": 2.4502923976608187e-05,
"loss": 0.4404,
"step": 1260
},
{
"epoch": 0.82,
"learning_rate": 2.4697855750487332e-05,
"loss": 0.3332,
"step": 1270
},
{
"epoch": 0.83,
"learning_rate": 2.4892787524366474e-05,
"loss": 0.4733,
"step": 1280
},
{
"epoch": 0.84,
"learning_rate": 2.5087719298245616e-05,
"loss": 0.4872,
"step": 1290
},
{
"epoch": 0.84,
"learning_rate": 2.5282651072124758e-05,
"loss": 0.473,
"step": 1300
},
{
"epoch": 0.85,
"learning_rate": 2.54775828460039e-05,
"loss": 0.4366,
"step": 1310
},
{
"epoch": 0.86,
"learning_rate": 2.567251461988304e-05,
"loss": 0.3755,
"step": 1320
},
{
"epoch": 0.86,
"learning_rate": 2.5867446393762183e-05,
"loss": 0.6943,
"step": 1330
},
{
"epoch": 0.87,
"learning_rate": 2.6062378167641325e-05,
"loss": 0.3036,
"step": 1340
},
{
"epoch": 0.88,
"learning_rate": 2.625730994152047e-05,
"loss": 0.4545,
"step": 1350
},
{
"epoch": 0.88,
"learning_rate": 2.6452241715399612e-05,
"loss": 0.3032,
"step": 1360
},
{
"epoch": 0.89,
"learning_rate": 2.6647173489278754e-05,
"loss": 0.3492,
"step": 1370
},
{
"epoch": 0.9,
"learning_rate": 2.6842105263157896e-05,
"loss": 0.2846,
"step": 1380
},
{
"epoch": 0.9,
"learning_rate": 2.7037037037037037e-05,
"loss": 0.5078,
"step": 1390
},
{
"epoch": 0.91,
"learning_rate": 2.7212475633528263e-05,
"loss": 0.3074,
"step": 1400
},
{
"epoch": 0.92,
"learning_rate": 2.7407407407407408e-05,
"loss": 0.2837,
"step": 1410
},
{
"epoch": 0.92,
"learning_rate": 2.760233918128655e-05,
"loss": 0.4813,
"step": 1420
},
{
"epoch": 0.93,
"learning_rate": 2.7797270955165692e-05,
"loss": 0.3597,
"step": 1430
},
{
"epoch": 0.94,
"learning_rate": 2.7992202729044834e-05,
"loss": 0.1955,
"step": 1440
},
{
"epoch": 0.94,
"learning_rate": 2.8187134502923976e-05,
"loss": 0.5268,
"step": 1450
},
{
"epoch": 0.95,
"learning_rate": 2.8382066276803117e-05,
"loss": 0.5699,
"step": 1460
},
{
"epoch": 0.95,
"learning_rate": 2.857699805068226e-05,
"loss": 0.3092,
"step": 1470
},
{
"epoch": 0.96,
"learning_rate": 2.87719298245614e-05,
"loss": 0.3738,
"step": 1480
},
{
"epoch": 0.97,
"learning_rate": 2.8966861598440546e-05,
"loss": 0.5783,
"step": 1490
},
{
"epoch": 0.97,
"learning_rate": 2.9161793372319688e-05,
"loss": 0.3053,
"step": 1500
},
{
"epoch": 0.98,
"learning_rate": 2.935672514619883e-05,
"loss": 0.3565,
"step": 1510
},
{
"epoch": 0.99,
"learning_rate": 2.9551656920077972e-05,
"loss": 0.3092,
"step": 1520
},
{
"epoch": 0.99,
"learning_rate": 2.9746588693957114e-05,
"loss": 0.5208,
"step": 1530
},
{
"epoch": 1.0,
"eval_accuracy": 0.9425022999080037,
"eval_loss": 0.22455397248268127,
"eval_runtime": 13.2379,
"eval_samples_per_second": 164.226,
"eval_steps_per_second": 41.094,
"step": 1539
},
{
"epoch": 1.0,
"learning_rate": 2.9941520467836255e-05,
"loss": 0.4893,
"step": 1540
},
{
"epoch": 1.01,
"learning_rate": 2.99848386398094e-05,
"loss": 0.2747,
"step": 1550
},
{
"epoch": 1.01,
"learning_rate": 2.9963179553822827e-05,
"loss": 0.6032,
"step": 1560
},
{
"epoch": 1.02,
"learning_rate": 2.9941520467836255e-05,
"loss": 0.265,
"step": 1570
},
{
"epoch": 1.03,
"learning_rate": 2.9919861381849688e-05,
"loss": 0.4532,
"step": 1580
},
{
"epoch": 1.03,
"learning_rate": 2.9898202295863116e-05,
"loss": 0.3197,
"step": 1590
},
{
"epoch": 1.04,
"learning_rate": 2.9876543209876545e-05,
"loss": 0.3362,
"step": 1600
},
{
"epoch": 1.05,
"learning_rate": 2.9854884123889974e-05,
"loss": 0.3188,
"step": 1610
},
{
"epoch": 1.05,
"learning_rate": 2.9833225037903402e-05,
"loss": 0.3177,
"step": 1620
},
{
"epoch": 1.06,
"learning_rate": 2.981156595191683e-05,
"loss": 0.4515,
"step": 1630
},
{
"epoch": 1.07,
"learning_rate": 2.9789906865930256e-05,
"loss": 0.2545,
"step": 1640
},
{
"epoch": 1.07,
"learning_rate": 2.9768247779943685e-05,
"loss": 0.2263,
"step": 1650
},
{
"epoch": 1.08,
"learning_rate": 2.9746588693957114e-05,
"loss": 0.4115,
"step": 1660
},
{
"epoch": 1.08,
"learning_rate": 2.9724929607970546e-05,
"loss": 0.2845,
"step": 1670
},
{
"epoch": 1.09,
"learning_rate": 2.9703270521983974e-05,
"loss": 0.3104,
"step": 1680
},
{
"epoch": 1.1,
"learning_rate": 2.9681611435997403e-05,
"loss": 0.3711,
"step": 1690
},
{
"epoch": 1.1,
"learning_rate": 2.9659952350010832e-05,
"loss": 0.3236,
"step": 1700
},
{
"epoch": 1.11,
"learning_rate": 2.9638293264024257e-05,
"loss": 0.216,
"step": 1710
},
{
"epoch": 1.12,
"learning_rate": 2.9616634178037686e-05,
"loss": 0.3934,
"step": 1720
},
{
"epoch": 1.12,
"learning_rate": 2.9594975092051114e-05,
"loss": 0.6872,
"step": 1730
},
{
"epoch": 1.13,
"learning_rate": 2.9573316006064543e-05,
"loss": 0.5262,
"step": 1740
},
{
"epoch": 1.14,
"learning_rate": 2.9551656920077972e-05,
"loss": 0.3703,
"step": 1750
},
{
"epoch": 1.14,
"learning_rate": 2.9529997834091404e-05,
"loss": 0.3206,
"step": 1760
},
{
"epoch": 1.15,
"learning_rate": 2.9508338748104833e-05,
"loss": 0.3157,
"step": 1770
},
{
"epoch": 1.16,
"learning_rate": 2.948667966211826e-05,
"loss": 0.3127,
"step": 1780
},
{
"epoch": 1.16,
"learning_rate": 2.9465020576131687e-05,
"loss": 0.2422,
"step": 1790
},
{
"epoch": 1.17,
"learning_rate": 2.9443361490145115e-05,
"loss": 0.6096,
"step": 1800
},
{
"epoch": 1.18,
"learning_rate": 2.9421702404158544e-05,
"loss": 0.4274,
"step": 1810
},
{
"epoch": 1.18,
"learning_rate": 2.9400043318171973e-05,
"loss": 0.4009,
"step": 1820
},
{
"epoch": 1.19,
"learning_rate": 2.93783842321854e-05,
"loss": 0.4787,
"step": 1830
},
{
"epoch": 1.2,
"learning_rate": 2.935672514619883e-05,
"loss": 0.4098,
"step": 1840
},
{
"epoch": 1.2,
"learning_rate": 2.9335066060212262e-05,
"loss": 0.4029,
"step": 1850
},
{
"epoch": 1.21,
"learning_rate": 2.931340697422569e-05,
"loss": 0.2639,
"step": 1860
},
{
"epoch": 1.21,
"learning_rate": 2.9291747888239116e-05,
"loss": 0.4326,
"step": 1870
},
{
"epoch": 1.22,
"learning_rate": 2.9270088802252545e-05,
"loss": 0.6265,
"step": 1880
},
{
"epoch": 1.23,
"learning_rate": 2.9248429716265973e-05,
"loss": 0.453,
"step": 1890
},
{
"epoch": 1.23,
"learning_rate": 2.9226770630279402e-05,
"loss": 0.3825,
"step": 1900
},
{
"epoch": 1.24,
"learning_rate": 2.920511154429283e-05,
"loss": 0.567,
"step": 1910
},
{
"epoch": 1.25,
"learning_rate": 2.918345245830626e-05,
"loss": 0.3227,
"step": 1920
},
{
"epoch": 1.25,
"learning_rate": 2.9161793372319688e-05,
"loss": 0.4686,
"step": 1930
},
{
"epoch": 1.26,
"learning_rate": 2.914013428633312e-05,
"loss": 0.4481,
"step": 1940
},
{
"epoch": 1.27,
"learning_rate": 2.9118475200346546e-05,
"loss": 0.3457,
"step": 1950
},
{
"epoch": 1.27,
"learning_rate": 2.9096816114359974e-05,
"loss": 0.3616,
"step": 1960
},
{
"epoch": 1.28,
"learning_rate": 2.9075157028373403e-05,
"loss": 0.3722,
"step": 1970
},
{
"epoch": 1.29,
"learning_rate": 2.905349794238683e-05,
"loss": 0.4628,
"step": 1980
},
{
"epoch": 1.29,
"learning_rate": 2.903183885640026e-05,
"loss": 0.1889,
"step": 1990
},
{
"epoch": 1.3,
"learning_rate": 2.901017977041369e-05,
"loss": 0.4129,
"step": 2000
},
{
"epoch": 1.31,
"learning_rate": 2.8988520684427118e-05,
"loss": 0.2512,
"step": 2010
},
{
"epoch": 1.31,
"learning_rate": 2.8966861598440546e-05,
"loss": 0.3462,
"step": 2020
},
{
"epoch": 1.32,
"learning_rate": 2.8945202512453975e-05,
"loss": 0.2496,
"step": 2030
},
{
"epoch": 1.33,
"learning_rate": 2.8923543426467404e-05,
"loss": 0.2584,
"step": 2040
},
{
"epoch": 1.33,
"learning_rate": 2.8901884340480832e-05,
"loss": 0.4408,
"step": 2050
},
{
"epoch": 1.34,
"learning_rate": 2.888022525449426e-05,
"loss": 0.1791,
"step": 2060
},
{
"epoch": 1.34,
"learning_rate": 2.885856616850769e-05,
"loss": 0.2065,
"step": 2070
},
{
"epoch": 1.35,
"learning_rate": 2.883690708252112e-05,
"loss": 0.4957,
"step": 2080
},
{
"epoch": 1.36,
"learning_rate": 2.8815247996534547e-05,
"loss": 0.1517,
"step": 2090
},
{
"epoch": 1.36,
"learning_rate": 2.8793588910547976e-05,
"loss": 0.2041,
"step": 2100
},
{
"epoch": 1.37,
"learning_rate": 2.87719298245614e-05,
"loss": 0.2251,
"step": 2110
},
{
"epoch": 1.38,
"learning_rate": 2.8750270738574833e-05,
"loss": 0.1827,
"step": 2120
},
{
"epoch": 1.38,
"learning_rate": 2.8730777561186917e-05,
"loss": 0.1692,
"step": 2130
},
{
"epoch": 1.39,
"learning_rate": 2.8709118475200346e-05,
"loss": 0.2938,
"step": 2140
},
{
"epoch": 1.4,
"learning_rate": 2.8687459389213774e-05,
"loss": 0.2557,
"step": 2150
},
{
"epoch": 1.4,
"learning_rate": 2.8665800303227203e-05,
"loss": 0.2886,
"step": 2160
},
{
"epoch": 1.41,
"learning_rate": 2.8644141217240635e-05,
"loss": 0.3589,
"step": 2170
},
{
"epoch": 1.42,
"learning_rate": 2.8622482131254064e-05,
"loss": 0.3187,
"step": 2180
},
{
"epoch": 1.42,
"learning_rate": 2.8600823045267492e-05,
"loss": 0.2109,
"step": 2190
},
{
"epoch": 1.43,
"learning_rate": 2.857916395928092e-05,
"loss": 0.3777,
"step": 2200
},
{
"epoch": 1.44,
"learning_rate": 2.8557504873294346e-05,
"loss": 0.3847,
"step": 2210
},
{
"epoch": 1.44,
"learning_rate": 2.8535845787307775e-05,
"loss": 0.5642,
"step": 2220
},
{
"epoch": 1.45,
"learning_rate": 2.8514186701321204e-05,
"loss": 0.3966,
"step": 2230
},
{
"epoch": 1.46,
"learning_rate": 2.8492527615334633e-05,
"loss": 0.1387,
"step": 2240
},
{
"epoch": 1.46,
"learning_rate": 2.847086852934806e-05,
"loss": 0.3419,
"step": 2250
},
{
"epoch": 1.47,
"learning_rate": 2.8449209443361493e-05,
"loss": 0.3061,
"step": 2260
},
{
"epoch": 1.47,
"learning_rate": 2.8427550357374922e-05,
"loss": 0.6589,
"step": 2270
},
{
"epoch": 1.48,
"learning_rate": 2.840589127138835e-05,
"loss": 0.2016,
"step": 2280
},
{
"epoch": 1.49,
"learning_rate": 2.8384232185401776e-05,
"loss": 0.2931,
"step": 2290
},
{
"epoch": 1.49,
"learning_rate": 2.8362573099415205e-05,
"loss": 0.2773,
"step": 2300
},
{
"epoch": 1.5,
"learning_rate": 2.8340914013428633e-05,
"loss": 0.2194,
"step": 2310
},
{
"epoch": 1.51,
"learning_rate": 2.8319254927442062e-05,
"loss": 0.2638,
"step": 2320
},
{
"epoch": 1.51,
"learning_rate": 2.829759584145549e-05,
"loss": 0.3591,
"step": 2330
},
{
"epoch": 1.52,
"learning_rate": 2.827593675546892e-05,
"loss": 0.2546,
"step": 2340
},
{
"epoch": 1.53,
"learning_rate": 2.825427766948235e-05,
"loss": 0.3646,
"step": 2350
},
{
"epoch": 1.53,
"learning_rate": 2.8232618583495777e-05,
"loss": 0.458,
"step": 2360
},
{
"epoch": 1.54,
"learning_rate": 2.8210959497509205e-05,
"loss": 0.313,
"step": 2370
},
{
"epoch": 1.55,
"learning_rate": 2.8189300411522634e-05,
"loss": 0.3892,
"step": 2380
},
{
"epoch": 1.55,
"learning_rate": 2.8167641325536063e-05,
"loss": 0.4741,
"step": 2390
},
{
"epoch": 1.56,
"learning_rate": 2.814598223954949e-05,
"loss": 0.2889,
"step": 2400
},
{
"epoch": 1.57,
"learning_rate": 2.812432315356292e-05,
"loss": 0.5176,
"step": 2410
},
{
"epoch": 1.57,
"learning_rate": 2.810266406757635e-05,
"loss": 0.2147,
"step": 2420
},
{
"epoch": 1.58,
"learning_rate": 2.8081004981589778e-05,
"loss": 0.1541,
"step": 2430
},
{
"epoch": 1.58,
"learning_rate": 2.8059345895603206e-05,
"loss": 0.3722,
"step": 2440
},
{
"epoch": 1.59,
"learning_rate": 2.8037686809616635e-05,
"loss": 0.1849,
"step": 2450
},
{
"epoch": 1.6,
"learning_rate": 2.8016027723630064e-05,
"loss": 0.3595,
"step": 2460
},
{
"epoch": 1.6,
"learning_rate": 2.7994368637643492e-05,
"loss": 0.2309,
"step": 2470
},
{
"epoch": 1.61,
"learning_rate": 2.797270955165692e-05,
"loss": 0.2493,
"step": 2480
},
{
"epoch": 1.62,
"learning_rate": 2.795105046567035e-05,
"loss": 0.5766,
"step": 2490
},
{
"epoch": 1.62,
"learning_rate": 2.792939137968378e-05,
"loss": 0.4037,
"step": 2500
},
{
"epoch": 1.63,
"learning_rate": 2.7907732293697207e-05,
"loss": 0.4681,
"step": 2510
},
{
"epoch": 1.64,
"learning_rate": 2.7886073207710632e-05,
"loss": 0.1419,
"step": 2520
},
{
"epoch": 1.64,
"learning_rate": 2.7864414121724064e-05,
"loss": 0.477,
"step": 2530
},
{
"epoch": 1.65,
"learning_rate": 2.7842755035737493e-05,
"loss": 0.4341,
"step": 2540
},
{
"epoch": 1.66,
"learning_rate": 2.7821095949750922e-05,
"loss": 0.2381,
"step": 2550
},
{
"epoch": 1.66,
"learning_rate": 2.779943686376435e-05,
"loss": 0.302,
"step": 2560
},
{
"epoch": 1.67,
"learning_rate": 2.777777777777778e-05,
"loss": 0.2982,
"step": 2570
},
{
"epoch": 1.68,
"learning_rate": 2.7756118691791208e-05,
"loss": 0.4892,
"step": 2580
},
{
"epoch": 1.68,
"learning_rate": 2.7734459605804636e-05,
"loss": 0.3519,
"step": 2590
},
{
"epoch": 1.69,
"learning_rate": 2.7712800519818062e-05,
"loss": 0.207,
"step": 2600
},
{
"epoch": 1.7,
"learning_rate": 2.769114143383149e-05,
"loss": 0.1381,
"step": 2610
},
{
"epoch": 1.7,
"learning_rate": 2.7669482347844923e-05,
"loss": 0.3716,
"step": 2620
},
{
"epoch": 1.71,
"learning_rate": 2.764782326185835e-05,
"loss": 0.4123,
"step": 2630
},
{
"epoch": 1.71,
"learning_rate": 2.762616417587178e-05,
"loss": 0.2551,
"step": 2640
},
{
"epoch": 1.72,
"learning_rate": 2.760450508988521e-05,
"loss": 0.2666,
"step": 2650
},
{
"epoch": 1.73,
"learning_rate": 2.7582846003898637e-05,
"loss": 0.1336,
"step": 2660
},
{
"epoch": 1.73,
"learning_rate": 2.7561186917912066e-05,
"loss": 0.5134,
"step": 2670
},
{
"epoch": 1.74,
"learning_rate": 2.753952783192549e-05,
"loss": 0.2047,
"step": 2680
},
{
"epoch": 1.75,
"learning_rate": 2.751786874593892e-05,
"loss": 0.2533,
"step": 2690
},
{
"epoch": 1.75,
"learning_rate": 2.749620965995235e-05,
"loss": 0.0704,
"step": 2700
},
{
"epoch": 1.76,
"learning_rate": 2.747455057396578e-05,
"loss": 0.269,
"step": 2710
},
{
"epoch": 1.77,
"learning_rate": 2.745289148797921e-05,
"loss": 0.1492,
"step": 2720
},
{
"epoch": 1.77,
"learning_rate": 2.7431232401992638e-05,
"loss": 0.1962,
"step": 2730
},
{
"epoch": 1.78,
"learning_rate": 2.7409573316006067e-05,
"loss": 0.251,
"step": 2740
},
{
"epoch": 1.79,
"learning_rate": 2.7387914230019495e-05,
"loss": 0.5038,
"step": 2750
},
{
"epoch": 1.79,
"learning_rate": 2.736625514403292e-05,
"loss": 0.0198,
"step": 2760
},
{
"epoch": 1.8,
"learning_rate": 2.734459605804635e-05,
"loss": 0.3041,
"step": 2770
},
{
"epoch": 1.81,
"learning_rate": 2.7322936972059778e-05,
"loss": 0.3146,
"step": 2780
},
{
"epoch": 1.81,
"learning_rate": 2.7301277886073207e-05,
"loss": 0.2433,
"step": 2790
},
{
"epoch": 1.82,
"learning_rate": 2.727961880008664e-05,
"loss": 0.5248,
"step": 2800
},
{
"epoch": 1.83,
"learning_rate": 2.7257959714100068e-05,
"loss": 0.2415,
"step": 2810
},
{
"epoch": 1.83,
"learning_rate": 2.7236300628113496e-05,
"loss": 0.3897,
"step": 2820
},
{
"epoch": 1.84,
"learning_rate": 2.7214641542126925e-05,
"loss": 0.4774,
"step": 2830
},
{
"epoch": 1.84,
"learning_rate": 2.719298245614035e-05,
"loss": 0.1535,
"step": 2840
},
{
"epoch": 1.85,
"learning_rate": 2.717132337015378e-05,
"loss": 0.7375,
"step": 2850
},
{
"epoch": 1.86,
"learning_rate": 2.7149664284167208e-05,
"loss": 0.3043,
"step": 2860
},
{
"epoch": 1.86,
"learning_rate": 2.7128005198180636e-05,
"loss": 0.1723,
"step": 2870
},
{
"epoch": 1.87,
"learning_rate": 2.7106346112194065e-05,
"loss": 0.2247,
"step": 2880
},
{
"epoch": 1.88,
"learning_rate": 2.7084687026207497e-05,
"loss": 0.4143,
"step": 2890
},
{
"epoch": 1.88,
"learning_rate": 2.7063027940220926e-05,
"loss": 0.3145,
"step": 2900
},
{
"epoch": 1.89,
"learning_rate": 2.7041368854234354e-05,
"loss": 0.2796,
"step": 2910
},
{
"epoch": 1.9,
"learning_rate": 2.701970976824778e-05,
"loss": 0.1485,
"step": 2920
},
{
"epoch": 1.9,
"learning_rate": 2.699805068226121e-05,
"loss": 0.3664,
"step": 2930
},
{
"epoch": 1.91,
"learning_rate": 2.6976391596274637e-05,
"loss": 0.298,
"step": 2940
},
{
"epoch": 1.92,
"learning_rate": 2.6954732510288066e-05,
"loss": 0.5404,
"step": 2950
},
{
"epoch": 1.92,
"learning_rate": 2.6933073424301494e-05,
"loss": 0.0554,
"step": 2960
},
{
"epoch": 1.93,
"learning_rate": 2.6911414338314923e-05,
"loss": 0.3279,
"step": 2970
},
{
"epoch": 1.94,
"learning_rate": 2.6889755252328355e-05,
"loss": 0.2289,
"step": 2980
},
{
"epoch": 1.94,
"learning_rate": 2.6868096166341784e-05,
"loss": 0.257,
"step": 2990
},
{
"epoch": 1.95,
"learning_rate": 2.684643708035521e-05,
"loss": 0.5012,
"step": 3000
},
{
"epoch": 1.96,
"learning_rate": 2.6824777994368638e-05,
"loss": 0.2418,
"step": 3010
},
{
"epoch": 1.96,
"learning_rate": 2.6803118908382067e-05,
"loss": 0.0826,
"step": 3020
},
{
"epoch": 1.97,
"learning_rate": 2.6781459822395495e-05,
"loss": 0.204,
"step": 3030
},
{
"epoch": 1.97,
"learning_rate": 2.6759800736408924e-05,
"loss": 0.2902,
"step": 3040
},
{
"epoch": 1.98,
"learning_rate": 2.6738141650422353e-05,
"loss": 0.3245,
"step": 3050
},
{
"epoch": 1.99,
"learning_rate": 2.671648256443578e-05,
"loss": 0.3886,
"step": 3060
},
{
"epoch": 1.99,
"learning_rate": 2.6694823478449213e-05,
"loss": 0.1232,
"step": 3070
},
{
"epoch": 2.0,
"eval_accuracy": 0.9655013799448022,
"eval_loss": 0.1605590134859085,
"eval_runtime": 12.979,
"eval_samples_per_second": 167.501,
"eval_steps_per_second": 41.914,
"step": 3079
},
{
"epoch": 2.0,
"learning_rate": 2.667316439246264e-05,
"loss": 0.1282,
"step": 3080
},
{
"epoch": 2.01,
"learning_rate": 2.6651505306476067e-05,
"loss": 0.4082,
"step": 3090
},
{
"epoch": 2.01,
"learning_rate": 2.6629846220489496e-05,
"loss": 0.5166,
"step": 3100
},
{
"epoch": 2.02,
"learning_rate": 2.6608187134502925e-05,
"loss": 0.3364,
"step": 3110
},
{
"epoch": 2.03,
"learning_rate": 2.6586528048516353e-05,
"loss": 0.2218,
"step": 3120
},
{
"epoch": 2.03,
"learning_rate": 2.6564868962529782e-05,
"loss": 0.285,
"step": 3130
},
{
"epoch": 2.04,
"learning_rate": 2.654320987654321e-05,
"loss": 0.214,
"step": 3140
},
{
"epoch": 2.05,
"learning_rate": 2.652155079055664e-05,
"loss": 0.547,
"step": 3150
},
{
"epoch": 2.05,
"learning_rate": 2.6499891704570065e-05,
"loss": 0.2743,
"step": 3160
},
{
"epoch": 2.06,
"learning_rate": 2.6478232618583497e-05,
"loss": 0.4205,
"step": 3170
},
{
"epoch": 2.07,
"learning_rate": 2.6456573532596926e-05,
"loss": 0.1901,
"step": 3180
},
{
"epoch": 2.07,
"learning_rate": 2.6434914446610354e-05,
"loss": 0.1482,
"step": 3190
},
{
"epoch": 2.08,
"learning_rate": 2.6413255360623783e-05,
"loss": 0.1439,
"step": 3200
},
{
"epoch": 2.09,
"learning_rate": 2.639159627463721e-05,
"loss": 0.3271,
"step": 3210
},
{
"epoch": 2.09,
"learning_rate": 2.636993718865064e-05,
"loss": 0.237,
"step": 3220
},
{
"epoch": 2.1,
"learning_rate": 2.634827810266407e-05,
"loss": 0.3626,
"step": 3230
},
{
"epoch": 2.1,
"learning_rate": 2.6326619016677494e-05,
"loss": 0.2673,
"step": 3240
},
{
"epoch": 2.11,
"learning_rate": 2.6304959930690923e-05,
"loss": 0.0236,
"step": 3250
},
{
"epoch": 2.12,
"learning_rate": 2.6283300844704355e-05,
"loss": 0.3381,
"step": 3260
},
{
"epoch": 2.12,
"learning_rate": 2.6261641758717784e-05,
"loss": 0.0661,
"step": 3270
},
{
"epoch": 2.13,
"learning_rate": 2.6239982672731212e-05,
"loss": 0.4211,
"step": 3280
},
{
"epoch": 2.14,
"learning_rate": 2.621832358674464e-05,
"loss": 0.3107,
"step": 3290
},
{
"epoch": 2.14,
"learning_rate": 2.619666450075807e-05,
"loss": 0.3576,
"step": 3300
},
{
"epoch": 2.15,
"learning_rate": 2.61750054147715e-05,
"loss": 0.4687,
"step": 3310
},
{
"epoch": 2.16,
"learning_rate": 2.6153346328784924e-05,
"loss": 0.1991,
"step": 3320
},
{
"epoch": 2.16,
"learning_rate": 2.6131687242798352e-05,
"loss": 0.5119,
"step": 3330
},
{
"epoch": 2.17,
"learning_rate": 2.611002815681178e-05,
"loss": 0.1439,
"step": 3340
},
{
"epoch": 2.18,
"learning_rate": 2.6088369070825213e-05,
"loss": 0.29,
"step": 3350
},
{
"epoch": 2.18,
"learning_rate": 2.6066709984838642e-05,
"loss": 0.1705,
"step": 3360
},
{
"epoch": 2.19,
"learning_rate": 2.604505089885207e-05,
"loss": 0.1561,
"step": 3370
},
{
"epoch": 2.2,
"learning_rate": 2.60233918128655e-05,
"loss": 0.5298,
"step": 3380
},
{
"epoch": 2.2,
"learning_rate": 2.6001732726878928e-05,
"loss": 0.2279,
"step": 3390
},
{
"epoch": 2.21,
"learning_rate": 2.5980073640892353e-05,
"loss": 0.1045,
"step": 3400
},
{
"epoch": 2.22,
"learning_rate": 2.5958414554905782e-05,
"loss": 0.2926,
"step": 3410
},
{
"epoch": 2.22,
"learning_rate": 2.593675546891921e-05,
"loss": 0.1234,
"step": 3420
},
{
"epoch": 2.23,
"learning_rate": 2.591509638293264e-05,
"loss": 0.4732,
"step": 3430
},
{
"epoch": 2.23,
"learning_rate": 2.589343729694607e-05,
"loss": 0.2754,
"step": 3440
},
{
"epoch": 2.24,
"learning_rate": 2.58717782109595e-05,
"loss": 0.3542,
"step": 3450
},
{
"epoch": 2.25,
"learning_rate": 2.585011912497293e-05,
"loss": 0.5329,
"step": 3460
},
{
"epoch": 2.25,
"learning_rate": 2.5828460038986354e-05,
"loss": 0.2176,
"step": 3470
},
{
"epoch": 2.26,
"learning_rate": 2.5806800952999783e-05,
"loss": 0.1245,
"step": 3480
},
{
"epoch": 2.27,
"learning_rate": 2.578514186701321e-05,
"loss": 0.1505,
"step": 3490
},
{
"epoch": 2.27,
"learning_rate": 2.576348278102664e-05,
"loss": 0.5114,
"step": 3500
},
{
"epoch": 2.28,
"learning_rate": 2.574182369504007e-05,
"loss": 0.2739,
"step": 3510
},
{
"epoch": 2.29,
"learning_rate": 2.5720164609053497e-05,
"loss": 0.4071,
"step": 3520
},
{
"epoch": 2.29,
"learning_rate": 2.569850552306693e-05,
"loss": 0.1159,
"step": 3530
},
{
"epoch": 2.3,
"learning_rate": 2.5676846437080358e-05,
"loss": 0.3694,
"step": 3540
},
{
"epoch": 2.31,
"learning_rate": 2.5655187351093783e-05,
"loss": 0.4572,
"step": 3550
},
{
"epoch": 2.31,
"learning_rate": 2.5633528265107212e-05,
"loss": 0.2517,
"step": 3560
},
{
"epoch": 2.32,
"learning_rate": 2.561186917912064e-05,
"loss": 0.1947,
"step": 3570
},
{
"epoch": 2.33,
"learning_rate": 2.559021009313407e-05,
"loss": 0.2947,
"step": 3580
},
{
"epoch": 2.33,
"learning_rate": 2.5568551007147498e-05,
"loss": 0.1922,
"step": 3590
},
{
"epoch": 2.34,
"learning_rate": 2.5546891921160927e-05,
"loss": 0.3169,
"step": 3600
},
{
"epoch": 2.34,
"learning_rate": 2.5525232835174356e-05,
"loss": 0.065,
"step": 3610
},
{
"epoch": 2.35,
"learning_rate": 2.5503573749187788e-05,
"loss": 0.4879,
"step": 3620
},
{
"epoch": 2.36,
"learning_rate": 2.5481914663201213e-05,
"loss": 0.1695,
"step": 3630
},
{
"epoch": 2.36,
"learning_rate": 2.546025557721464e-05,
"loss": 0.381,
"step": 3640
},
{
"epoch": 2.37,
"learning_rate": 2.543859649122807e-05,
"loss": 0.2645,
"step": 3650
},
{
"epoch": 2.38,
"learning_rate": 2.54169374052415e-05,
"loss": 0.2559,
"step": 3660
},
{
"epoch": 2.38,
"learning_rate": 2.5395278319254928e-05,
"loss": 0.0956,
"step": 3670
},
{
"epoch": 2.39,
"learning_rate": 2.5373619233268356e-05,
"loss": 0.3473,
"step": 3680
},
{
"epoch": 2.4,
"learning_rate": 2.5351960147281785e-05,
"loss": 0.25,
"step": 3690
},
{
"epoch": 2.4,
"learning_rate": 2.5330301061295214e-05,
"loss": 0.3441,
"step": 3700
},
{
"epoch": 2.41,
"learning_rate": 2.5308641975308642e-05,
"loss": 0.2912,
"step": 3710
},
{
"epoch": 2.42,
"learning_rate": 2.528698288932207e-05,
"loss": 0.2895,
"step": 3720
},
{
"epoch": 2.42,
"learning_rate": 2.52653238033355e-05,
"loss": 0.1336,
"step": 3730
},
{
"epoch": 2.43,
"learning_rate": 2.524366471734893e-05,
"loss": 0.1,
"step": 3740
},
{
"epoch": 2.44,
"learning_rate": 2.5222005631362357e-05,
"loss": 0.0469,
"step": 3750
},
{
"epoch": 2.44,
"learning_rate": 2.5200346545375786e-05,
"loss": 0.1433,
"step": 3760
},
{
"epoch": 2.45,
"learning_rate": 2.5178687459389215e-05,
"loss": 0.1602,
"step": 3770
},
{
"epoch": 2.46,
"learning_rate": 2.5157028373402643e-05,
"loss": 0.3086,
"step": 3780
},
{
"epoch": 2.46,
"learning_rate": 2.513536928741607e-05,
"loss": 0.112,
"step": 3790
},
{
"epoch": 2.47,
"learning_rate": 2.51137102014295e-05,
"loss": 0.3675,
"step": 3800
},
{
"epoch": 2.47,
"learning_rate": 2.509205111544293e-05,
"loss": 0.1579,
"step": 3810
},
{
"epoch": 2.48,
"learning_rate": 2.5070392029456358e-05,
"loss": 0.3057,
"step": 3820
},
{
"epoch": 2.49,
"learning_rate": 2.5048732943469787e-05,
"loss": 0.3646,
"step": 3830
},
{
"epoch": 2.49,
"learning_rate": 2.5027073857483215e-05,
"loss": 0.2384,
"step": 3840
},
{
"epoch": 2.5,
"learning_rate": 2.5005414771496644e-05,
"loss": 0.3239,
"step": 3850
},
{
"epoch": 2.51,
"learning_rate": 2.4983755685510073e-05,
"loss": 0.2883,
"step": 3860
},
{
"epoch": 2.51,
"learning_rate": 2.4962096599523498e-05,
"loss": 0.2288,
"step": 3870
},
{
"epoch": 2.52,
"learning_rate": 2.4940437513536927e-05,
"loss": 0.3717,
"step": 3880
},
{
"epoch": 2.53,
"learning_rate": 2.491877842755036e-05,
"loss": 0.2852,
"step": 3890
},
{
"epoch": 2.53,
"learning_rate": 2.4897119341563787e-05,
"loss": 0.6269,
"step": 3900
},
{
"epoch": 2.54,
"learning_rate": 2.4875460255577216e-05,
"loss": 0.2522,
"step": 3910
},
{
"epoch": 2.55,
"learning_rate": 2.4853801169590645e-05,
"loss": 0.3426,
"step": 3920
},
{
"epoch": 2.55,
"learning_rate": 2.4832142083604074e-05,
"loss": 0.2613,
"step": 3930
},
{
"epoch": 2.56,
"learning_rate": 2.4810482997617502e-05,
"loss": 0.3396,
"step": 3940
},
{
"epoch": 2.57,
"learning_rate": 2.4788823911630927e-05,
"loss": 0.225,
"step": 3950
},
{
"epoch": 2.57,
"learning_rate": 2.4767164825644356e-05,
"loss": 0.204,
"step": 3960
},
{
"epoch": 2.58,
"learning_rate": 2.4745505739657785e-05,
"loss": 0.6812,
"step": 3970
},
{
"epoch": 2.59,
"learning_rate": 2.4723846653671217e-05,
"loss": 0.5684,
"step": 3980
},
{
"epoch": 2.59,
"learning_rate": 2.4702187567684646e-05,
"loss": 0.1882,
"step": 3990
},
{
"epoch": 2.6,
"learning_rate": 2.4680528481698074e-05,
"loss": 0.2217,
"step": 4000
},
{
"epoch": 2.6,
"learning_rate": 2.4658869395711503e-05,
"loss": 0.0704,
"step": 4010
},
{
"epoch": 2.61,
"learning_rate": 2.463721030972493e-05,
"loss": 0.3599,
"step": 4020
},
{
"epoch": 2.62,
"learning_rate": 2.4615551223738357e-05,
"loss": 0.2779,
"step": 4030
},
{
"epoch": 2.62,
"learning_rate": 2.4593892137751786e-05,
"loss": 0.4508,
"step": 4040
},
{
"epoch": 2.63,
"learning_rate": 2.4572233051765214e-05,
"loss": 0.3388,
"step": 4050
},
{
"epoch": 2.64,
"learning_rate": 2.4550573965778643e-05,
"loss": 0.3605,
"step": 4060
},
{
"epoch": 2.64,
"learning_rate": 2.4528914879792075e-05,
"loss": 0.3589,
"step": 4070
},
{
"epoch": 2.65,
"learning_rate": 2.4507255793805504e-05,
"loss": 0.2711,
"step": 4080
},
{
"epoch": 2.66,
"learning_rate": 2.4485596707818932e-05,
"loss": 0.1015,
"step": 4090
},
{
"epoch": 2.66,
"learning_rate": 2.446393762183236e-05,
"loss": 0.4934,
"step": 4100
},
{
"epoch": 2.67,
"learning_rate": 2.4442278535845786e-05,
"loss": 0.5104,
"step": 4110
},
{
"epoch": 2.68,
"learning_rate": 2.4420619449859215e-05,
"loss": 0.2386,
"step": 4120
},
{
"epoch": 2.68,
"learning_rate": 2.4398960363872644e-05,
"loss": 0.3044,
"step": 4130
},
{
"epoch": 2.69,
"learning_rate": 2.4377301277886073e-05,
"loss": 0.4077,
"step": 4140
},
{
"epoch": 2.7,
"learning_rate": 2.43556421918995e-05,
"loss": 0.2545,
"step": 4150
},
{
"epoch": 2.7,
"learning_rate": 2.4333983105912933e-05,
"loss": 0.2894,
"step": 4160
},
{
"epoch": 2.71,
"learning_rate": 2.4312324019926362e-05,
"loss": 0.3408,
"step": 4170
},
{
"epoch": 2.72,
"learning_rate": 2.429066493393979e-05,
"loss": 0.1438,
"step": 4180
},
{
"epoch": 2.72,
"learning_rate": 2.4269005847953216e-05,
"loss": 0.2897,
"step": 4190
},
{
"epoch": 2.73,
"learning_rate": 2.4247346761966645e-05,
"loss": 0.2148,
"step": 4200
},
{
"epoch": 2.73,
"learning_rate": 2.4225687675980073e-05,
"loss": 0.5298,
"step": 4210
},
{
"epoch": 2.74,
"learning_rate": 2.4204028589993502e-05,
"loss": 0.1486,
"step": 4220
},
{
"epoch": 2.75,
"learning_rate": 2.418236950400693e-05,
"loss": 0.4059,
"step": 4230
},
{
"epoch": 2.75,
"learning_rate": 2.416071041802036e-05,
"loss": 0.1608,
"step": 4240
},
{
"epoch": 2.76,
"learning_rate": 2.413905133203379e-05,
"loss": 0.2226,
"step": 4250
},
{
"epoch": 2.77,
"learning_rate": 2.411739224604722e-05,
"loss": 0.3627,
"step": 4260
},
{
"epoch": 2.77,
"learning_rate": 2.4095733160060645e-05,
"loss": 0.3048,
"step": 4270
},
{
"epoch": 2.78,
"learning_rate": 2.4074074074074074e-05,
"loss": 0.2084,
"step": 4280
},
{
"epoch": 2.79,
"learning_rate": 2.4052414988087503e-05,
"loss": 0.128,
"step": 4290
},
{
"epoch": 2.79,
"learning_rate": 2.403075590210093e-05,
"loss": 0.4311,
"step": 4300
},
{
"epoch": 2.8,
"learning_rate": 2.400909681611436e-05,
"loss": 0.2711,
"step": 4310
},
{
"epoch": 2.81,
"learning_rate": 2.398743773012779e-05,
"loss": 0.2327,
"step": 4320
},
{
"epoch": 2.81,
"learning_rate": 2.3965778644141218e-05,
"loss": 0.467,
"step": 4330
},
{
"epoch": 2.82,
"learning_rate": 2.394411955815465e-05,
"loss": 0.5556,
"step": 4340
},
{
"epoch": 2.83,
"learning_rate": 2.3922460472168075e-05,
"loss": 0.2591,
"step": 4350
},
{
"epoch": 2.83,
"learning_rate": 2.3900801386181504e-05,
"loss": 0.2438,
"step": 4360
},
{
"epoch": 2.84,
"learning_rate": 2.3879142300194932e-05,
"loss": 0.2504,
"step": 4370
},
{
"epoch": 2.85,
"learning_rate": 2.385748321420836e-05,
"loss": 0.0638,
"step": 4380
},
{
"epoch": 2.85,
"learning_rate": 2.383582412822179e-05,
"loss": 0.2644,
"step": 4390
},
{
"epoch": 2.86,
"learning_rate": 2.381416504223522e-05,
"loss": 0.1309,
"step": 4400
},
{
"epoch": 2.86,
"learning_rate": 2.3792505956248647e-05,
"loss": 0.2903,
"step": 4410
},
{
"epoch": 2.87,
"learning_rate": 2.3770846870262076e-05,
"loss": 0.3967,
"step": 4420
},
{
"epoch": 2.88,
"learning_rate": 2.3749187784275504e-05,
"loss": 0.1492,
"step": 4430
},
{
"epoch": 2.88,
"learning_rate": 2.3727528698288933e-05,
"loss": 0.2094,
"step": 4440
},
{
"epoch": 2.89,
"learning_rate": 2.3705869612302362e-05,
"loss": 0.1203,
"step": 4450
},
{
"epoch": 2.9,
"learning_rate": 2.368421052631579e-05,
"loss": 0.2751,
"step": 4460
},
{
"epoch": 2.9,
"learning_rate": 2.366255144032922e-05,
"loss": 0.3923,
"step": 4470
},
{
"epoch": 2.91,
"learning_rate": 2.3640892354342648e-05,
"loss": 0.3586,
"step": 4480
},
{
"epoch": 2.92,
"learning_rate": 2.3619233268356076e-05,
"loss": 0.1712,
"step": 4490
},
{
"epoch": 2.92,
"learning_rate": 2.3597574182369505e-05,
"loss": 0.2187,
"step": 4500
},
{
"epoch": 2.93,
"learning_rate": 2.357591509638293e-05,
"loss": 0.2134,
"step": 4510
},
{
"epoch": 2.94,
"learning_rate": 2.3554256010396363e-05,
"loss": 0.1906,
"step": 4520
},
{
"epoch": 2.94,
"learning_rate": 2.353259692440979e-05,
"loss": 0.2102,
"step": 4530
},
{
"epoch": 2.95,
"learning_rate": 2.351093783842322e-05,
"loss": 0.1499,
"step": 4540
},
{
"epoch": 2.96,
"learning_rate": 2.348927875243665e-05,
"loss": 0.1415,
"step": 4550
},
{
"epoch": 2.96,
"learning_rate": 2.3467619666450077e-05,
"loss": 0.146,
"step": 4560
},
{
"epoch": 2.97,
"learning_rate": 2.3445960580463506e-05,
"loss": 0.4851,
"step": 4570
},
{
"epoch": 2.97,
"learning_rate": 2.342430149447693e-05,
"loss": 0.1137,
"step": 4580
},
{
"epoch": 2.98,
"learning_rate": 2.340264240849036e-05,
"loss": 0.474,
"step": 4590
},
{
"epoch": 2.99,
"learning_rate": 2.338098332250379e-05,
"loss": 0.4388,
"step": 4600
},
{
"epoch": 2.99,
"learning_rate": 2.335932423651722e-05,
"loss": 0.2855,
"step": 4610
},
{
"epoch": 3.0,
"eval_accuracy": 0.9696412143514259,
"eval_loss": 0.11773567646741867,
"eval_runtime": 12.9815,
"eval_samples_per_second": 167.469,
"eval_steps_per_second": 41.906,
"step": 4618
},
{
"epoch": 3.0,
"learning_rate": 2.333766515053065e-05,
"loss": 0.333,
"step": 4620
},
{
"epoch": 3.01,
"learning_rate": 2.3316006064544078e-05,
"loss": 0.1692,
"step": 4630
},
{
"epoch": 3.01,
"learning_rate": 2.3294346978557507e-05,
"loss": 0.1158,
"step": 4640
},
{
"epoch": 3.02,
"learning_rate": 2.3272687892570935e-05,
"loss": 0.0129,
"step": 4650
},
{
"epoch": 3.03,
"learning_rate": 2.325102880658436e-05,
"loss": 0.2642,
"step": 4660
},
{
"epoch": 3.03,
"learning_rate": 2.322936972059779e-05,
"loss": 0.5089,
"step": 4670
},
{
"epoch": 3.04,
"learning_rate": 2.3207710634611218e-05,
"loss": 0.0762,
"step": 4680
},
{
"epoch": 3.05,
"learning_rate": 2.3186051548624647e-05,
"loss": 0.236,
"step": 4690
},
{
"epoch": 3.05,
"learning_rate": 2.316439246263808e-05,
"loss": 0.263,
"step": 4700
},
{
"epoch": 3.06,
"learning_rate": 2.3142733376651508e-05,
"loss": 0.0997,
"step": 4710
},
{
"epoch": 3.07,
"learning_rate": 2.3121074290664936e-05,
"loss": 0.3237,
"step": 4720
},
{
"epoch": 3.07,
"learning_rate": 2.3099415204678365e-05,
"loss": 0.0913,
"step": 4730
},
{
"epoch": 3.08,
"learning_rate": 2.307775611869179e-05,
"loss": 0.154,
"step": 4740
},
{
"epoch": 3.09,
"learning_rate": 2.305609703270522e-05,
"loss": 0.1299,
"step": 4750
},
{
"epoch": 3.09,
"learning_rate": 2.3034437946718648e-05,
"loss": 0.2991,
"step": 4760
},
{
"epoch": 3.1,
"learning_rate": 2.3012778860732076e-05,
"loss": 0.2063,
"step": 4770
},
{
"epoch": 3.1,
"learning_rate": 2.2991119774745505e-05,
"loss": 0.4323,
"step": 4780
},
{
"epoch": 3.11,
"learning_rate": 2.2969460688758937e-05,
"loss": 0.3903,
"step": 4790
},
{
"epoch": 3.12,
"learning_rate": 2.2947801602772366e-05,
"loss": 0.2083,
"step": 4800
},
{
"epoch": 3.12,
"learning_rate": 2.2926142516785794e-05,
"loss": 0.2063,
"step": 4810
},
{
"epoch": 3.13,
"learning_rate": 2.290448343079922e-05,
"loss": 0.4494,
"step": 4820
},
{
"epoch": 3.14,
"learning_rate": 2.288282434481265e-05,
"loss": 0.5917,
"step": 4830
},
{
"epoch": 3.14,
"learning_rate": 2.2861165258826077e-05,
"loss": 0.175,
"step": 4840
},
{
"epoch": 3.15,
"learning_rate": 2.2839506172839506e-05,
"loss": 0.1678,
"step": 4850
},
{
"epoch": 3.16,
"learning_rate": 2.2817847086852934e-05,
"loss": 0.397,
"step": 4860
},
{
"epoch": 3.16,
"learning_rate": 2.2796188000866363e-05,
"loss": 0.3491,
"step": 4870
},
{
"epoch": 3.17,
"learning_rate": 2.2774528914879795e-05,
"loss": 0.1067,
"step": 4880
},
{
"epoch": 3.18,
"learning_rate": 2.2752869828893224e-05,
"loss": 0.2016,
"step": 4890
},
{
"epoch": 3.18,
"learning_rate": 2.273121074290665e-05,
"loss": 0.2037,
"step": 4900
},
{
"epoch": 3.19,
"learning_rate": 2.2709551656920078e-05,
"loss": 0.2404,
"step": 4910
},
{
"epoch": 3.2,
"learning_rate": 2.2687892570933507e-05,
"loss": 0.1275,
"step": 4920
},
{
"epoch": 3.2,
"learning_rate": 2.2666233484946935e-05,
"loss": 0.0866,
"step": 4930
},
{
"epoch": 3.21,
"learning_rate": 2.2644574398960364e-05,
"loss": 0.2417,
"step": 4940
},
{
"epoch": 3.22,
"learning_rate": 2.2622915312973793e-05,
"loss": 0.0546,
"step": 4950
},
{
"epoch": 3.22,
"learning_rate": 2.260125622698722e-05,
"loss": 0.2839,
"step": 4960
},
{
"epoch": 3.23,
"learning_rate": 2.2579597141000653e-05,
"loss": 0.1398,
"step": 4970
},
{
"epoch": 3.23,
"learning_rate": 2.255793805501408e-05,
"loss": 0.3108,
"step": 4980
},
{
"epoch": 3.24,
"learning_rate": 2.2536278969027507e-05,
"loss": 0.4113,
"step": 4990
},
{
"epoch": 3.25,
"learning_rate": 2.2514619883040936e-05,
"loss": 0.15,
"step": 5000
},
{
"epoch": 3.25,
"learning_rate": 2.2492960797054365e-05,
"loss": 0.1093,
"step": 5010
},
{
"epoch": 3.26,
"learning_rate": 2.2471301711067793e-05,
"loss": 0.3478,
"step": 5020
},
{
"epoch": 3.27,
"learning_rate": 2.2449642625081222e-05,
"loss": 0.229,
"step": 5030
},
{
"epoch": 3.27,
"learning_rate": 2.242798353909465e-05,
"loss": 0.2711,
"step": 5040
},
{
"epoch": 3.28,
"learning_rate": 2.240632445310808e-05,
"loss": 0.1047,
"step": 5050
},
{
"epoch": 3.29,
"learning_rate": 2.2384665367121508e-05,
"loss": 0.1989,
"step": 5060
},
{
"epoch": 3.29,
"learning_rate": 2.2363006281134937e-05,
"loss": 0.1658,
"step": 5070
},
{
"epoch": 3.3,
"learning_rate": 2.2341347195148366e-05,
"loss": 0.1486,
"step": 5080
},
{
"epoch": 3.31,
"learning_rate": 2.2319688109161794e-05,
"loss": 0.1316,
"step": 5090
},
{
"epoch": 3.31,
"learning_rate": 2.2298029023175223e-05,
"loss": 0.087,
"step": 5100
},
{
"epoch": 3.32,
"learning_rate": 2.227636993718865e-05,
"loss": 0.3231,
"step": 5110
},
{
"epoch": 3.33,
"learning_rate": 2.225471085120208e-05,
"loss": 0.1468,
"step": 5120
},
{
"epoch": 3.33,
"learning_rate": 2.223305176521551e-05,
"loss": 0.1442,
"step": 5130
},
{
"epoch": 3.34,
"learning_rate": 2.2211392679228934e-05,
"loss": 0.3304,
"step": 5140
},
{
"epoch": 3.35,
"learning_rate": 2.2189733593242366e-05,
"loss": 0.2766,
"step": 5150
},
{
"epoch": 3.35,
"learning_rate": 2.2168074507255795e-05,
"loss": 0.1563,
"step": 5160
},
{
"epoch": 3.36,
"learning_rate": 2.2146415421269224e-05,
"loss": 0.1921,
"step": 5170
},
{
"epoch": 3.36,
"learning_rate": 2.2124756335282652e-05,
"loss": 0.1017,
"step": 5180
},
{
"epoch": 3.37,
"learning_rate": 2.210309724929608e-05,
"loss": 0.2529,
"step": 5190
},
{
"epoch": 3.38,
"learning_rate": 2.208143816330951e-05,
"loss": 0.0399,
"step": 5200
},
{
"epoch": 3.38,
"learning_rate": 2.205977907732294e-05,
"loss": 0.417,
"step": 5210
},
{
"epoch": 3.39,
"learning_rate": 2.2038119991336364e-05,
"loss": 0.6172,
"step": 5220
},
{
"epoch": 3.4,
"learning_rate": 2.2016460905349792e-05,
"loss": 0.1713,
"step": 5230
},
{
"epoch": 3.4,
"learning_rate": 2.1994801819363224e-05,
"loss": 0.4274,
"step": 5240
},
{
"epoch": 3.41,
"learning_rate": 2.1973142733376653e-05,
"loss": 0.3577,
"step": 5250
},
{
"epoch": 3.42,
"learning_rate": 2.1951483647390082e-05,
"loss": 0.2066,
"step": 5260
},
{
"epoch": 3.42,
"learning_rate": 2.192982456140351e-05,
"loss": 0.3221,
"step": 5270
},
{
"epoch": 3.43,
"learning_rate": 2.190816547541694e-05,
"loss": 0.1747,
"step": 5280
},
{
"epoch": 3.44,
"learning_rate": 2.1886506389430368e-05,
"loss": 0.1228,
"step": 5290
},
{
"epoch": 3.44,
"learning_rate": 2.1864847303443793e-05,
"loss": 0.1929,
"step": 5300
},
{
"epoch": 3.45,
"learning_rate": 2.1843188217457222e-05,
"loss": 0.2565,
"step": 5310
},
{
"epoch": 3.46,
"learning_rate": 2.182152913147065e-05,
"loss": 0.159,
"step": 5320
},
{
"epoch": 3.46,
"learning_rate": 2.1799870045484083e-05,
"loss": 0.1915,
"step": 5330
},
{
"epoch": 3.47,
"learning_rate": 2.177821095949751e-05,
"loss": 0.2584,
"step": 5340
},
{
"epoch": 3.48,
"learning_rate": 2.175655187351094e-05,
"loss": 0.162,
"step": 5350
},
{
"epoch": 3.48,
"learning_rate": 2.173489278752437e-05,
"loss": 0.1473,
"step": 5360
},
{
"epoch": 3.49,
"learning_rate": 2.1713233701537797e-05,
"loss": 0.4753,
"step": 5370
},
{
"epoch": 3.49,
"learning_rate": 2.1691574615551223e-05,
"loss": 0.1635,
"step": 5380
},
{
"epoch": 3.5,
"learning_rate": 2.166991552956465e-05,
"loss": 0.2713,
"step": 5390
},
{
"epoch": 3.51,
"learning_rate": 2.164825644357808e-05,
"loss": 0.4024,
"step": 5400
},
{
"epoch": 3.51,
"learning_rate": 2.162659735759151e-05,
"loss": 0.3595,
"step": 5410
},
{
"epoch": 3.52,
"learning_rate": 2.160493827160494e-05,
"loss": 0.2345,
"step": 5420
},
{
"epoch": 3.53,
"learning_rate": 2.158327918561837e-05,
"loss": 0.2353,
"step": 5430
},
{
"epoch": 3.53,
"learning_rate": 2.1561620099631798e-05,
"loss": 0.3144,
"step": 5440
},
{
"epoch": 3.54,
"learning_rate": 2.1539961013645227e-05,
"loss": 0.558,
"step": 5450
},
{
"epoch": 3.55,
"learning_rate": 2.1518301927658652e-05,
"loss": 0.1033,
"step": 5460
},
{
"epoch": 3.55,
"learning_rate": 2.149664284167208e-05,
"loss": 0.2233,
"step": 5470
},
{
"epoch": 3.56,
"learning_rate": 2.147498375568551e-05,
"loss": 0.2178,
"step": 5480
},
{
"epoch": 3.57,
"learning_rate": 2.1453324669698938e-05,
"loss": 0.0916,
"step": 5490
},
{
"epoch": 3.57,
"learning_rate": 2.1431665583712367e-05,
"loss": 0.051,
"step": 5500
},
{
"epoch": 3.58,
"learning_rate": 2.14100064977258e-05,
"loss": 0.3294,
"step": 5510
},
{
"epoch": 3.59,
"learning_rate": 2.1388347411739228e-05,
"loss": 0.1862,
"step": 5520
},
{
"epoch": 3.59,
"learning_rate": 2.1366688325752653e-05,
"loss": 0.2174,
"step": 5530
},
{
"epoch": 3.6,
"learning_rate": 2.134502923976608e-05,
"loss": 0.2679,
"step": 5540
},
{
"epoch": 3.61,
"learning_rate": 2.132337015377951e-05,
"loss": 0.1024,
"step": 5550
},
{
"epoch": 3.61,
"learning_rate": 2.130171106779294e-05,
"loss": 0.2108,
"step": 5560
},
{
"epoch": 3.62,
"learning_rate": 2.1280051981806368e-05,
"loss": 0.1707,
"step": 5570
},
{
"epoch": 3.62,
"learning_rate": 2.1258392895819796e-05,
"loss": 0.1842,
"step": 5580
},
{
"epoch": 3.63,
"learning_rate": 2.1236733809833225e-05,
"loss": 0.0681,
"step": 5590
},
{
"epoch": 3.64,
"learning_rate": 2.1215074723846657e-05,
"loss": 0.3176,
"step": 5600
},
{
"epoch": 3.64,
"learning_rate": 2.1193415637860082e-05,
"loss": 0.3924,
"step": 5610
},
{
"epoch": 3.65,
"learning_rate": 2.117175655187351e-05,
"loss": 0.307,
"step": 5620
},
{
"epoch": 3.66,
"learning_rate": 2.115009746588694e-05,
"loss": 0.164,
"step": 5630
},
{
"epoch": 3.66,
"learning_rate": 2.112843837990037e-05,
"loss": 0.1099,
"step": 5640
},
{
"epoch": 3.67,
"learning_rate": 2.1106779293913797e-05,
"loss": 0.3696,
"step": 5650
},
{
"epoch": 3.68,
"learning_rate": 2.1085120207927226e-05,
"loss": 0.2124,
"step": 5660
},
{
"epoch": 3.68,
"learning_rate": 2.1063461121940655e-05,
"loss": 0.212,
"step": 5670
},
{
"epoch": 3.69,
"learning_rate": 2.1041802035954083e-05,
"loss": 0.2727,
"step": 5680
},
{
"epoch": 3.7,
"learning_rate": 2.1020142949967512e-05,
"loss": 0.2698,
"step": 5690
},
{
"epoch": 3.7,
"learning_rate": 2.099848386398094e-05,
"loss": 0.4189,
"step": 5700
},
{
"epoch": 3.71,
"learning_rate": 2.097682477799437e-05,
"loss": 0.1711,
"step": 5710
},
{
"epoch": 3.72,
"learning_rate": 2.0955165692007798e-05,
"loss": 0.1526,
"step": 5720
},
{
"epoch": 3.72,
"learning_rate": 2.0933506606021227e-05,
"loss": 0.1894,
"step": 5730
},
{
"epoch": 3.73,
"learning_rate": 2.0911847520034655e-05,
"loss": 0.2144,
"step": 5740
},
{
"epoch": 3.73,
"learning_rate": 2.0890188434048084e-05,
"loss": 0.1443,
"step": 5750
},
{
"epoch": 3.74,
"learning_rate": 2.0868529348061513e-05,
"loss": 0.0814,
"step": 5760
},
{
"epoch": 3.75,
"learning_rate": 2.0846870262074938e-05,
"loss": 0.2765,
"step": 5770
},
{
"epoch": 3.75,
"learning_rate": 2.082521117608837e-05,
"loss": 0.1244,
"step": 5780
},
{
"epoch": 3.76,
"learning_rate": 2.08035520901018e-05,
"loss": 0.2107,
"step": 5790
},
{
"epoch": 3.77,
"learning_rate": 2.0781893004115227e-05,
"loss": 0.0134,
"step": 5800
},
{
"epoch": 3.77,
"learning_rate": 2.0760233918128656e-05,
"loss": 0.1578,
"step": 5810
},
{
"epoch": 3.78,
"learning_rate": 2.074074074074074e-05,
"loss": 0.1113,
"step": 5820
},
{
"epoch": 3.79,
"learning_rate": 2.071908165475417e-05,
"loss": 0.1734,
"step": 5830
},
{
"epoch": 3.79,
"learning_rate": 2.06974225687676e-05,
"loss": 0.2593,
"step": 5840
},
{
"epoch": 3.8,
"learning_rate": 2.067576348278103e-05,
"loss": 0.4005,
"step": 5850
},
{
"epoch": 3.81,
"learning_rate": 2.0654104396794458e-05,
"loss": 0.1157,
"step": 5860
},
{
"epoch": 3.81,
"learning_rate": 2.0632445310807883e-05,
"loss": 0.1248,
"step": 5870
},
{
"epoch": 3.82,
"learning_rate": 2.0610786224821312e-05,
"loss": 0.1108,
"step": 5880
},
{
"epoch": 3.83,
"learning_rate": 2.058912713883474e-05,
"loss": 0.1242,
"step": 5890
},
{
"epoch": 3.83,
"learning_rate": 2.056746805284817e-05,
"loss": 0.1443,
"step": 5900
},
{
"epoch": 3.84,
"learning_rate": 2.0545808966861598e-05,
"loss": 0.1957,
"step": 5910
},
{
"epoch": 3.85,
"learning_rate": 2.0524149880875027e-05,
"loss": 0.323,
"step": 5920
},
{
"epoch": 3.85,
"learning_rate": 2.050249079488846e-05,
"loss": 0.3315,
"step": 5930
},
{
"epoch": 3.86,
"learning_rate": 2.0480831708901888e-05,
"loss": 0.311,
"step": 5940
},
{
"epoch": 3.86,
"learning_rate": 2.0459172622915313e-05,
"loss": 0.1231,
"step": 5950
},
{
"epoch": 3.87,
"learning_rate": 2.043751353692874e-05,
"loss": 0.1074,
"step": 5960
},
{
"epoch": 3.88,
"learning_rate": 2.041585445094217e-05,
"loss": 0.1604,
"step": 5970
},
{
"epoch": 3.88,
"learning_rate": 2.03941953649556e-05,
"loss": 0.3031,
"step": 5980
},
{
"epoch": 3.89,
"learning_rate": 2.0372536278969028e-05,
"loss": 0.4937,
"step": 5990
},
{
"epoch": 3.9,
"learning_rate": 2.0350877192982456e-05,
"loss": 0.1419,
"step": 6000
},
{
"epoch": 3.9,
"learning_rate": 2.0329218106995885e-05,
"loss": 0.2078,
"step": 6010
},
{
"epoch": 3.91,
"learning_rate": 2.0307559021009317e-05,
"loss": 0.1859,
"step": 6020
},
{
"epoch": 3.92,
"learning_rate": 2.0285899935022742e-05,
"loss": 0.2744,
"step": 6030
},
{
"epoch": 3.92,
"learning_rate": 2.026424084903617e-05,
"loss": 0.039,
"step": 6040
},
{
"epoch": 3.93,
"learning_rate": 2.02425817630496e-05,
"loss": 0.2164,
"step": 6050
},
{
"epoch": 3.94,
"learning_rate": 2.022092267706303e-05,
"loss": 0.231,
"step": 6060
},
{
"epoch": 3.94,
"learning_rate": 2.0199263591076457e-05,
"loss": 0.182,
"step": 6070
},
{
"epoch": 3.95,
"learning_rate": 2.0177604505089886e-05,
"loss": 0.0891,
"step": 6080
},
{
"epoch": 3.96,
"learning_rate": 2.0155945419103314e-05,
"loss": 0.2888,
"step": 6090
},
{
"epoch": 3.96,
"learning_rate": 2.0134286333116743e-05,
"loss": 0.2661,
"step": 6100
},
{
"epoch": 3.97,
"learning_rate": 2.0112627247130172e-05,
"loss": 0.2692,
"step": 6110
},
{
"epoch": 3.98,
"learning_rate": 2.00909681611436e-05,
"loss": 0.1445,
"step": 6120
},
{
"epoch": 3.98,
"learning_rate": 2.006930907515703e-05,
"loss": 0.2447,
"step": 6130
},
{
"epoch": 3.99,
"learning_rate": 2.0047649989170458e-05,
"loss": 0.1887,
"step": 6140
},
{
"epoch": 3.99,
"learning_rate": 2.0025990903183887e-05,
"loss": 0.2405,
"step": 6150
},
{
"epoch": 4.0,
"eval_accuracy": 0.9793008279668813,
"eval_loss": 0.10150198638439178,
"eval_runtime": 12.9625,
"eval_samples_per_second": 167.715,
"eval_steps_per_second": 41.967,
"step": 6158
},
{
"epoch": 4.0,
"learning_rate": 2.0004331817197315e-05,
"loss": 0.172,
"step": 6160
},
{
"epoch": 4.01,
"learning_rate": 1.9982672731210744e-05,
"loss": 0.2265,
"step": 6170
},
{
"epoch": 4.01,
"learning_rate": 1.9961013645224173e-05,
"loss": 0.1351,
"step": 6180
},
{
"epoch": 4.02,
"learning_rate": 1.9939354559237598e-05,
"loss": 0.2882,
"step": 6190
},
{
"epoch": 4.03,
"learning_rate": 1.991769547325103e-05,
"loss": 0.2578,
"step": 6200
},
{
"epoch": 4.03,
"learning_rate": 1.989603638726446e-05,
"loss": 0.2349,
"step": 6210
},
{
"epoch": 4.04,
"learning_rate": 1.9874377301277887e-05,
"loss": 0.3212,
"step": 6220
},
{
"epoch": 4.05,
"learning_rate": 1.9852718215291316e-05,
"loss": 0.2314,
"step": 6230
},
{
"epoch": 4.05,
"learning_rate": 1.9831059129304745e-05,
"loss": 0.1418,
"step": 6240
},
{
"epoch": 4.06,
"learning_rate": 1.9809400043318173e-05,
"loss": 0.1055,
"step": 6250
},
{
"epoch": 4.07,
"learning_rate": 1.9787740957331602e-05,
"loss": 0.1424,
"step": 6260
},
{
"epoch": 4.07,
"learning_rate": 1.9766081871345027e-05,
"loss": 0.1625,
"step": 6270
},
{
"epoch": 4.08,
"learning_rate": 1.9744422785358456e-05,
"loss": 0.2511,
"step": 6280
},
{
"epoch": 4.09,
"learning_rate": 1.9722763699371888e-05,
"loss": 0.2538,
"step": 6290
},
{
"epoch": 4.09,
"learning_rate": 1.9701104613385317e-05,
"loss": 0.0055,
"step": 6300
},
{
"epoch": 4.1,
"learning_rate": 1.9679445527398745e-05,
"loss": 0.2292,
"step": 6310
},
{
"epoch": 4.11,
"learning_rate": 1.9657786441412174e-05,
"loss": 0.1772,
"step": 6320
},
{
"epoch": 4.11,
"learning_rate": 1.9636127355425603e-05,
"loss": 0.2582,
"step": 6330
},
{
"epoch": 4.12,
"learning_rate": 1.9614468269439028e-05,
"loss": 0.1229,
"step": 6340
},
{
"epoch": 4.12,
"learning_rate": 1.9592809183452457e-05,
"loss": 0.0285,
"step": 6350
},
{
"epoch": 4.13,
"learning_rate": 1.9571150097465886e-05,
"loss": 0.1899,
"step": 6360
},
{
"epoch": 4.14,
"learning_rate": 1.9549491011479314e-05,
"loss": 0.4994,
"step": 6370
},
{
"epoch": 4.14,
"learning_rate": 1.9527831925492746e-05,
"loss": 0.2171,
"step": 6380
},
{
"epoch": 4.15,
"learning_rate": 1.9506172839506175e-05,
"loss": 0.2057,
"step": 6390
},
{
"epoch": 4.16,
"learning_rate": 1.9484513753519604e-05,
"loss": 0.2704,
"step": 6400
},
{
"epoch": 4.16,
"learning_rate": 1.9462854667533032e-05,
"loss": 0.1213,
"step": 6410
},
{
"epoch": 4.17,
"learning_rate": 1.9441195581546458e-05,
"loss": 0.2396,
"step": 6420
},
{
"epoch": 4.18,
"learning_rate": 1.9419536495559886e-05,
"loss": 0.0396,
"step": 6430
},
{
"epoch": 4.18,
"learning_rate": 1.9397877409573315e-05,
"loss": 0.2299,
"step": 6440
},
{
"epoch": 4.19,
"learning_rate": 1.9376218323586744e-05,
"loss": 0.0109,
"step": 6450
},
{
"epoch": 4.2,
"learning_rate": 1.9354559237600172e-05,
"loss": 0.0679,
"step": 6460
},
{
"epoch": 4.2,
"learning_rate": 1.9332900151613604e-05,
"loss": 0.1354,
"step": 6470
},
{
"epoch": 4.21,
"learning_rate": 1.9311241065627033e-05,
"loss": 0.1422,
"step": 6480
},
{
"epoch": 4.22,
"learning_rate": 1.9289581979640462e-05,
"loss": 0.3022,
"step": 6490
},
{
"epoch": 4.22,
"learning_rate": 1.9267922893653887e-05,
"loss": 0.274,
"step": 6500
},
{
"epoch": 4.23,
"learning_rate": 1.9246263807667316e-05,
"loss": 0.1733,
"step": 6510
},
{
"epoch": 4.24,
"learning_rate": 1.9224604721680744e-05,
"loss": 0.4914,
"step": 6520
},
{
"epoch": 4.24,
"learning_rate": 1.9202945635694173e-05,
"loss": 0.1881,
"step": 6530
},
{
"epoch": 4.25,
"learning_rate": 1.9181286549707602e-05,
"loss": 0.1131,
"step": 6540
},
{
"epoch": 4.25,
"learning_rate": 1.915962746372103e-05,
"loss": 0.3501,
"step": 6550
},
{
"epoch": 4.26,
"learning_rate": 1.9137968377734463e-05,
"loss": 0.1951,
"step": 6560
},
{
"epoch": 4.27,
"learning_rate": 1.911630929174789e-05,
"loss": 0.1627,
"step": 6570
},
{
"epoch": 4.27,
"learning_rate": 1.9094650205761317e-05,
"loss": 0.0676,
"step": 6580
},
{
"epoch": 4.28,
"learning_rate": 1.9072991119774745e-05,
"loss": 0.2998,
"step": 6590
},
{
"epoch": 4.29,
"learning_rate": 1.9051332033788174e-05,
"loss": 0.1903,
"step": 6600
},
{
"epoch": 4.29,
"learning_rate": 1.9029672947801603e-05,
"loss": 0.1043,
"step": 6610
},
{
"epoch": 4.3,
"learning_rate": 1.900801386181503e-05,
"loss": 0.2515,
"step": 6620
},
{
"epoch": 4.31,
"learning_rate": 1.898635477582846e-05,
"loss": 0.3629,
"step": 6630
},
{
"epoch": 4.31,
"learning_rate": 1.896469568984189e-05,
"loss": 0.1478,
"step": 6640
},
{
"epoch": 4.32,
"learning_rate": 1.894303660385532e-05,
"loss": 0.1651,
"step": 6650
},
{
"epoch": 4.33,
"learning_rate": 1.8921377517868746e-05,
"loss": 0.1962,
"step": 6660
},
{
"epoch": 4.33,
"learning_rate": 1.8899718431882175e-05,
"loss": 0.0931,
"step": 6670
},
{
"epoch": 4.34,
"learning_rate": 1.8878059345895603e-05,
"loss": 0.1451,
"step": 6680
},
{
"epoch": 4.35,
"learning_rate": 1.8856400259909032e-05,
"loss": 0.2826,
"step": 6690
},
{
"epoch": 4.35,
"learning_rate": 1.883474117392246e-05,
"loss": 0.0117,
"step": 6700
},
{
"epoch": 4.36,
"learning_rate": 1.881308208793589e-05,
"loss": 0.2076,
"step": 6710
},
{
"epoch": 4.37,
"learning_rate": 1.8791423001949318e-05,
"loss": 0.2932,
"step": 6720
},
{
"epoch": 4.37,
"learning_rate": 1.8769763915962747e-05,
"loss": 0.0393,
"step": 6730
},
{
"epoch": 4.38,
"learning_rate": 1.8748104829976176e-05,
"loss": 0.222,
"step": 6740
},
{
"epoch": 4.38,
"learning_rate": 1.8726445743989604e-05,
"loss": 0.2544,
"step": 6750
},
{
"epoch": 4.39,
"learning_rate": 1.8704786658003033e-05,
"loss": 0.0992,
"step": 6760
},
{
"epoch": 4.4,
"learning_rate": 1.868312757201646e-05,
"loss": 0.1865,
"step": 6770
},
{
"epoch": 4.4,
"learning_rate": 1.866146848602989e-05,
"loss": 0.0594,
"step": 6780
},
{
"epoch": 4.41,
"learning_rate": 1.863980940004332e-05,
"loss": 0.1626,
"step": 6790
},
{
"epoch": 4.42,
"learning_rate": 1.8618150314056748e-05,
"loss": 0.2632,
"step": 6800
},
{
"epoch": 4.42,
"learning_rate": 1.8596491228070176e-05,
"loss": 0.409,
"step": 6810
},
{
"epoch": 4.43,
"learning_rate": 1.85748321420836e-05,
"loss": 0.2442,
"step": 6820
},
{
"epoch": 4.44,
"learning_rate": 1.8553173056097034e-05,
"loss": 0.1129,
"step": 6830
},
{
"epoch": 4.44,
"learning_rate": 1.8531513970110462e-05,
"loss": 0.2521,
"step": 6840
},
{
"epoch": 4.45,
"learning_rate": 1.850985488412389e-05,
"loss": 0.2092,
"step": 6850
},
{
"epoch": 4.46,
"learning_rate": 1.848819579813732e-05,
"loss": 0.1882,
"step": 6860
},
{
"epoch": 4.46,
"learning_rate": 1.846653671215075e-05,
"loss": 0.3425,
"step": 6870
},
{
"epoch": 4.47,
"learning_rate": 1.8444877626164177e-05,
"loss": 0.2234,
"step": 6880
},
{
"epoch": 4.48,
"learning_rate": 1.8423218540177606e-05,
"loss": 0.0481,
"step": 6890
},
{
"epoch": 4.48,
"learning_rate": 1.840155945419103e-05,
"loss": 0.0574,
"step": 6900
},
{
"epoch": 4.49,
"learning_rate": 1.837990036820446e-05,
"loss": 0.0049,
"step": 6910
},
{
"epoch": 4.49,
"learning_rate": 1.8358241282217892e-05,
"loss": 0.0997,
"step": 6920
},
{
"epoch": 4.5,
"learning_rate": 1.833658219623132e-05,
"loss": 0.3253,
"step": 6930
},
{
"epoch": 4.51,
"learning_rate": 1.831492311024475e-05,
"loss": 0.1426,
"step": 6940
},
{
"epoch": 4.51,
"learning_rate": 1.8293264024258178e-05,
"loss": 0.0564,
"step": 6950
},
{
"epoch": 4.52,
"learning_rate": 1.8271604938271607e-05,
"loss": 0.3849,
"step": 6960
},
{
"epoch": 4.53,
"learning_rate": 1.8249945852285035e-05,
"loss": 0.0362,
"step": 6970
},
{
"epoch": 4.53,
"learning_rate": 1.822828676629846e-05,
"loss": 0.1942,
"step": 6980
},
{
"epoch": 4.54,
"learning_rate": 1.820662768031189e-05,
"loss": 0.2048,
"step": 6990
},
{
"epoch": 4.55,
"learning_rate": 1.8184968594325318e-05,
"loss": 0.1466,
"step": 7000
},
{
"epoch": 4.55,
"learning_rate": 1.816330950833875e-05,
"loss": 0.2379,
"step": 7010
},
{
"epoch": 4.56,
"learning_rate": 1.814165042235218e-05,
"loss": 0.1948,
"step": 7020
},
{
"epoch": 4.57,
"learning_rate": 1.8119991336365607e-05,
"loss": 0.0612,
"step": 7030
},
{
"epoch": 4.57,
"learning_rate": 1.8098332250379036e-05,
"loss": 0.1641,
"step": 7040
},
{
"epoch": 4.58,
"learning_rate": 1.8076673164392465e-05,
"loss": 0.0618,
"step": 7050
},
{
"epoch": 4.59,
"learning_rate": 1.805501407840589e-05,
"loss": 0.1693,
"step": 7060
},
{
"epoch": 4.59,
"learning_rate": 1.803335499241932e-05,
"loss": 0.134,
"step": 7070
},
{
"epoch": 4.6,
"learning_rate": 1.8011695906432747e-05,
"loss": 0.3439,
"step": 7080
},
{
"epoch": 4.61,
"learning_rate": 1.7990036820446176e-05,
"loss": 0.259,
"step": 7090
},
{
"epoch": 4.61,
"learning_rate": 1.7968377734459608e-05,
"loss": 0.114,
"step": 7100
},
{
"epoch": 4.62,
"learning_rate": 1.7946718648473037e-05,
"loss": 0.3295,
"step": 7110
},
{
"epoch": 4.62,
"learning_rate": 1.7925059562486466e-05,
"loss": 0.259,
"step": 7120
},
{
"epoch": 4.63,
"learning_rate": 1.7903400476499894e-05,
"loss": 0.3889,
"step": 7130
},
{
"epoch": 4.64,
"learning_rate": 1.788174139051332e-05,
"loss": 0.1208,
"step": 7140
},
{
"epoch": 4.64,
"learning_rate": 1.7860082304526748e-05,
"loss": 0.1495,
"step": 7150
},
{
"epoch": 4.65,
"learning_rate": 1.7838423218540177e-05,
"loss": 0.2276,
"step": 7160
},
{
"epoch": 4.66,
"learning_rate": 1.7816764132553606e-05,
"loss": 0.4401,
"step": 7170
},
{
"epoch": 4.66,
"learning_rate": 1.7795105046567034e-05,
"loss": 0.18,
"step": 7180
},
{
"epoch": 4.67,
"learning_rate": 1.7773445960580466e-05,
"loss": 0.1499,
"step": 7190
},
{
"epoch": 4.68,
"learning_rate": 1.7751786874593895e-05,
"loss": 0.1996,
"step": 7200
},
{
"epoch": 4.68,
"learning_rate": 1.7730127788607324e-05,
"loss": 0.3012,
"step": 7210
},
{
"epoch": 4.69,
"learning_rate": 1.770846870262075e-05,
"loss": 0.1885,
"step": 7220
},
{
"epoch": 4.7,
"learning_rate": 1.7686809616634178e-05,
"loss": 0.119,
"step": 7230
},
{
"epoch": 4.7,
"learning_rate": 1.7665150530647606e-05,
"loss": 0.2105,
"step": 7240
},
{
"epoch": 4.71,
"learning_rate": 1.7643491444661035e-05,
"loss": 0.1406,
"step": 7250
},
{
"epoch": 4.72,
"learning_rate": 1.7621832358674464e-05,
"loss": 0.358,
"step": 7260
},
{
"epoch": 4.72,
"learning_rate": 1.7600173272687892e-05,
"loss": 0.1969,
"step": 7270
},
{
"epoch": 4.73,
"learning_rate": 1.7578514186701325e-05,
"loss": 0.0513,
"step": 7280
},
{
"epoch": 4.74,
"learning_rate": 1.755685510071475e-05,
"loss": 0.1709,
"step": 7290
},
{
"epoch": 4.74,
"learning_rate": 1.753519601472818e-05,
"loss": 0.316,
"step": 7300
},
{
"epoch": 4.75,
"learning_rate": 1.7513536928741607e-05,
"loss": 0.0858,
"step": 7310
},
{
"epoch": 4.75,
"learning_rate": 1.7491877842755036e-05,
"loss": 0.1269,
"step": 7320
},
{
"epoch": 4.76,
"learning_rate": 1.7470218756768465e-05,
"loss": 0.1272,
"step": 7330
},
{
"epoch": 4.77,
"learning_rate": 1.7448559670781893e-05,
"loss": 0.2098,
"step": 7340
},
{
"epoch": 4.77,
"learning_rate": 1.7426900584795322e-05,
"loss": 0.1316,
"step": 7350
},
{
"epoch": 4.78,
"learning_rate": 1.740524149880875e-05,
"loss": 0.1895,
"step": 7360
},
{
"epoch": 4.79,
"learning_rate": 1.738358241282218e-05,
"loss": 0.1917,
"step": 7370
},
{
"epoch": 4.79,
"learning_rate": 1.7361923326835608e-05,
"loss": 0.2667,
"step": 7380
},
{
"epoch": 4.8,
"learning_rate": 1.7340264240849037e-05,
"loss": 0.0624,
"step": 7390
},
{
"epoch": 4.81,
"learning_rate": 1.7318605154862465e-05,
"loss": 0.1343,
"step": 7400
},
{
"epoch": 4.81,
"learning_rate": 1.7296946068875894e-05,
"loss": 0.1578,
"step": 7410
},
{
"epoch": 4.82,
"learning_rate": 1.7275286982889323e-05,
"loss": 0.0942,
"step": 7420
},
{
"epoch": 4.83,
"learning_rate": 1.725362789690275e-05,
"loss": 0.1821,
"step": 7430
},
{
"epoch": 4.83,
"learning_rate": 1.723196881091618e-05,
"loss": 0.1143,
"step": 7440
},
{
"epoch": 4.84,
"learning_rate": 1.7210309724929605e-05,
"loss": 0.2072,
"step": 7450
},
{
"epoch": 4.85,
"learning_rate": 1.7188650638943038e-05,
"loss": 0.3436,
"step": 7460
},
{
"epoch": 4.85,
"learning_rate": 1.7166991552956466e-05,
"loss": 0.2031,
"step": 7470
},
{
"epoch": 4.86,
"learning_rate": 1.7145332466969895e-05,
"loss": 0.1608,
"step": 7480
},
{
"epoch": 4.87,
"learning_rate": 1.7123673380983324e-05,
"loss": 0.1898,
"step": 7490
},
{
"epoch": 4.87,
"learning_rate": 1.7102014294996752e-05,
"loss": 0.2166,
"step": 7500
},
{
"epoch": 4.88,
"learning_rate": 1.708035520901018e-05,
"loss": 0.1145,
"step": 7510
},
{
"epoch": 4.88,
"learning_rate": 1.705869612302361e-05,
"loss": 0.3105,
"step": 7520
},
{
"epoch": 4.89,
"learning_rate": 1.7037037037037035e-05,
"loss": 0.2545,
"step": 7530
},
{
"epoch": 4.9,
"learning_rate": 1.7015377951050464e-05,
"loss": 0.016,
"step": 7540
},
{
"epoch": 4.9,
"learning_rate": 1.6993718865063896e-05,
"loss": 0.3899,
"step": 7550
},
{
"epoch": 4.91,
"learning_rate": 1.6972059779077324e-05,
"loss": 0.1974,
"step": 7560
},
{
"epoch": 4.92,
"learning_rate": 1.6950400693090753e-05,
"loss": 0.0903,
"step": 7570
},
{
"epoch": 4.92,
"learning_rate": 1.6928741607104182e-05,
"loss": 0.1623,
"step": 7580
},
{
"epoch": 4.93,
"learning_rate": 1.690708252111761e-05,
"loss": 0.2203,
"step": 7590
},
{
"epoch": 4.94,
"learning_rate": 1.688542343513104e-05,
"loss": 0.2525,
"step": 7600
},
{
"epoch": 4.94,
"learning_rate": 1.6863764349144464e-05,
"loss": 0.3311,
"step": 7610
},
{
"epoch": 4.95,
"learning_rate": 1.6842105263157893e-05,
"loss": 0.0228,
"step": 7620
},
{
"epoch": 4.96,
"learning_rate": 1.6820446177171322e-05,
"loss": 0.2533,
"step": 7630
},
{
"epoch": 4.96,
"learning_rate": 1.6798787091184754e-05,
"loss": 0.1275,
"step": 7640
},
{
"epoch": 4.97,
"learning_rate": 1.6777128005198183e-05,
"loss": 0.0841,
"step": 7650
},
{
"epoch": 4.98,
"learning_rate": 1.675546891921161e-05,
"loss": 0.2046,
"step": 7660
},
{
"epoch": 4.98,
"learning_rate": 1.673380983322504e-05,
"loss": 0.2394,
"step": 7670
},
{
"epoch": 4.99,
"learning_rate": 1.671215074723847e-05,
"loss": 0.1007,
"step": 7680
},
{
"epoch": 5.0,
"learning_rate": 1.6690491661251894e-05,
"loss": 0.0272,
"step": 7690
},
{
"epoch": 5.0,
"eval_accuracy": 0.9829806807727691,
"eval_loss": 0.08533850312232971,
"eval_runtime": 12.8473,
"eval_samples_per_second": 169.218,
"eval_steps_per_second": 42.343,
"step": 7697
},
{
"epoch": 5.0,
"learning_rate": 1.6668832575265323e-05,
"loss": 0.1804,
"step": 7700
},
{
"epoch": 5.01,
"learning_rate": 1.664717348927875e-05,
"loss": 0.0881,
"step": 7710
},
{
"epoch": 5.01,
"learning_rate": 1.662551440329218e-05,
"loss": 0.3207,
"step": 7720
},
{
"epoch": 5.02,
"learning_rate": 1.660385531730561e-05,
"loss": 0.046,
"step": 7730
},
{
"epoch": 5.03,
"learning_rate": 1.658219623131904e-05,
"loss": 0.0632,
"step": 7740
},
{
"epoch": 5.03,
"learning_rate": 1.656053714533247e-05,
"loss": 0.492,
"step": 7750
},
{
"epoch": 5.04,
"learning_rate": 1.6538878059345898e-05,
"loss": 0.1749,
"step": 7760
},
{
"epoch": 5.05,
"learning_rate": 1.6517218973359323e-05,
"loss": 0.0785,
"step": 7770
},
{
"epoch": 5.05,
"learning_rate": 1.6495559887372752e-05,
"loss": 0.0355,
"step": 7780
},
{
"epoch": 5.06,
"learning_rate": 1.647390080138618e-05,
"loss": 0.2957,
"step": 7790
},
{
"epoch": 5.07,
"learning_rate": 1.645224171539961e-05,
"loss": 0.3177,
"step": 7800
},
{
"epoch": 5.07,
"learning_rate": 1.6430582629413038e-05,
"loss": 0.1878,
"step": 7810
},
{
"epoch": 5.08,
"learning_rate": 1.6408923543426467e-05,
"loss": 0.1447,
"step": 7820
},
{
"epoch": 5.09,
"learning_rate": 1.63872644574399e-05,
"loss": 0.2844,
"step": 7830
},
{
"epoch": 5.09,
"learning_rate": 1.6365605371453328e-05,
"loss": 0.0256,
"step": 7840
},
{
"epoch": 5.1,
"learning_rate": 1.6343946285466753e-05,
"loss": 0.0758,
"step": 7850
},
{
"epoch": 5.11,
"learning_rate": 1.632228719948018e-05,
"loss": 0.1041,
"step": 7860
},
{
"epoch": 5.11,
"learning_rate": 1.630062811349361e-05,
"loss": 0.1667,
"step": 7870
},
{
"epoch": 5.12,
"learning_rate": 1.627896902750704e-05,
"loss": 0.2194,
"step": 7880
},
{
"epoch": 5.13,
"learning_rate": 1.6257309941520468e-05,
"loss": 0.2497,
"step": 7890
},
{
"epoch": 5.13,
"learning_rate": 1.6235650855533896e-05,
"loss": 0.2496,
"step": 7900
},
{
"epoch": 5.14,
"learning_rate": 1.6213991769547325e-05,
"loss": 0.2154,
"step": 7910
},
{
"epoch": 5.14,
"learning_rate": 1.6192332683560757e-05,
"loss": 0.219,
"step": 7920
},
{
"epoch": 5.15,
"learning_rate": 1.6170673597574182e-05,
"loss": 0.0798,
"step": 7930
},
{
"epoch": 5.16,
"learning_rate": 1.614901451158761e-05,
"loss": 0.2998,
"step": 7940
},
{
"epoch": 5.16,
"learning_rate": 1.612735542560104e-05,
"loss": 0.0871,
"step": 7950
},
{
"epoch": 5.17,
"learning_rate": 1.610569633961447e-05,
"loss": 0.1863,
"step": 7960
},
{
"epoch": 5.18,
"learning_rate": 1.6084037253627897e-05,
"loss": 0.1953,
"step": 7970
},
{
"epoch": 5.18,
"learning_rate": 1.6062378167641326e-05,
"loss": 0.2771,
"step": 7980
},
{
"epoch": 5.19,
"learning_rate": 1.6040719081654754e-05,
"loss": 0.1713,
"step": 7990
},
{
"epoch": 5.2,
"learning_rate": 1.602122590426684e-05,
"loss": 0.4941,
"step": 8000
},
{
"epoch": 5.2,
"learning_rate": 1.599956681828027e-05,
"loss": 0.0853,
"step": 8010
},
{
"epoch": 5.21,
"learning_rate": 1.5977907732293696e-05,
"loss": 0.1868,
"step": 8020
},
{
"epoch": 5.22,
"learning_rate": 1.5956248646307124e-05,
"loss": 0.2171,
"step": 8030
},
{
"epoch": 5.22,
"learning_rate": 1.5934589560320553e-05,
"loss": 0.0884,
"step": 8040
},
{
"epoch": 5.23,
"learning_rate": 1.591293047433398e-05,
"loss": 0.2246,
"step": 8050
},
{
"epoch": 5.24,
"learning_rate": 1.5891271388347414e-05,
"loss": 0.1167,
"step": 8060
},
{
"epoch": 5.24,
"learning_rate": 1.5869612302360842e-05,
"loss": 0.2971,
"step": 8070
},
{
"epoch": 5.25,
"learning_rate": 1.584795321637427e-05,
"loss": 0.1353,
"step": 8080
},
{
"epoch": 5.25,
"learning_rate": 1.58262941303877e-05,
"loss": 0.1755,
"step": 8090
},
{
"epoch": 5.26,
"learning_rate": 1.5804635044401125e-05,
"loss": 0.1807,
"step": 8100
},
{
"epoch": 5.27,
"learning_rate": 1.5782975958414554e-05,
"loss": 0.1284,
"step": 8110
},
{
"epoch": 5.27,
"learning_rate": 1.5761316872427982e-05,
"loss": 0.113,
"step": 8120
},
{
"epoch": 5.28,
"learning_rate": 1.573965778644141e-05,
"loss": 0.163,
"step": 8130
},
{
"epoch": 5.29,
"learning_rate": 1.571799870045484e-05,
"loss": 0.2105,
"step": 8140
},
{
"epoch": 5.29,
"learning_rate": 1.5696339614468272e-05,
"loss": 0.2947,
"step": 8150
},
{
"epoch": 5.3,
"learning_rate": 1.56746805284817e-05,
"loss": 0.0778,
"step": 8160
},
{
"epoch": 5.31,
"learning_rate": 1.565302144249513e-05,
"loss": 0.2409,
"step": 8170
},
{
"epoch": 5.31,
"learning_rate": 1.5631362356508555e-05,
"loss": 0.2679,
"step": 8180
},
{
"epoch": 5.32,
"learning_rate": 1.5609703270521983e-05,
"loss": 0.1457,
"step": 8190
},
{
"epoch": 5.33,
"learning_rate": 1.5588044184535412e-05,
"loss": 0.2009,
"step": 8200
},
{
"epoch": 5.33,
"learning_rate": 1.556638509854884e-05,
"loss": 0.1151,
"step": 8210
},
{
"epoch": 5.34,
"learning_rate": 1.554472601256227e-05,
"loss": 0.1441,
"step": 8220
},
{
"epoch": 5.35,
"learning_rate": 1.5523066926575698e-05,
"loss": 0.043,
"step": 8230
},
{
"epoch": 5.35,
"learning_rate": 1.550140784058913e-05,
"loss": 0.344,
"step": 8240
},
{
"epoch": 5.36,
"learning_rate": 1.547974875460256e-05,
"loss": 0.1488,
"step": 8250
},
{
"epoch": 5.37,
"learning_rate": 1.5458089668615984e-05,
"loss": 0.3524,
"step": 8260
},
{
"epoch": 5.37,
"learning_rate": 1.5436430582629413e-05,
"loss": 0.1053,
"step": 8270
},
{
"epoch": 5.38,
"learning_rate": 1.541477149664284e-05,
"loss": 0.3589,
"step": 8280
},
{
"epoch": 5.38,
"learning_rate": 1.539311241065627e-05,
"loss": 0.1829,
"step": 8290
},
{
"epoch": 5.39,
"learning_rate": 1.53714533246697e-05,
"loss": 0.2131,
"step": 8300
},
{
"epoch": 5.4,
"learning_rate": 1.5349794238683127e-05,
"loss": 0.0702,
"step": 8310
},
{
"epoch": 5.4,
"learning_rate": 1.5328135152696556e-05,
"loss": 0.0808,
"step": 8320
},
{
"epoch": 5.41,
"learning_rate": 1.5306476066709988e-05,
"loss": 0.184,
"step": 8330
},
{
"epoch": 5.42,
"learning_rate": 1.5284816980723414e-05,
"loss": 0.3782,
"step": 8340
},
{
"epoch": 5.42,
"learning_rate": 1.5263157894736842e-05,
"loss": 0.021,
"step": 8350
},
{
"epoch": 5.43,
"learning_rate": 1.5241498808750271e-05,
"loss": 0.1233,
"step": 8360
},
{
"epoch": 5.44,
"learning_rate": 1.52198397227637e-05,
"loss": 0.2307,
"step": 8370
},
{
"epoch": 5.44,
"learning_rate": 1.5198180636777128e-05,
"loss": 0.3145,
"step": 8380
},
{
"epoch": 5.45,
"learning_rate": 1.5176521550790557e-05,
"loss": 0.1034,
"step": 8390
},
{
"epoch": 5.46,
"learning_rate": 1.5154862464803984e-05,
"loss": 0.0123,
"step": 8400
},
{
"epoch": 5.46,
"learning_rate": 1.5133203378817413e-05,
"loss": 0.1678,
"step": 8410
},
{
"epoch": 5.47,
"learning_rate": 1.5111544292830845e-05,
"loss": 0.1051,
"step": 8420
},
{
"epoch": 5.48,
"learning_rate": 1.5089885206844273e-05,
"loss": 0.0742,
"step": 8430
},
{
"epoch": 5.48,
"learning_rate": 1.50682261208577e-05,
"loss": 0.2037,
"step": 8440
},
{
"epoch": 5.49,
"learning_rate": 1.5046567034871129e-05,
"loss": 0.3068,
"step": 8450
},
{
"epoch": 5.5,
"learning_rate": 1.5024907948884558e-05,
"loss": 0.2932,
"step": 8460
},
{
"epoch": 5.5,
"learning_rate": 1.5003248862897986e-05,
"loss": 0.0899,
"step": 8470
},
{
"epoch": 5.51,
"learning_rate": 1.4981589776911413e-05,
"loss": 0.1688,
"step": 8480
},
{
"epoch": 5.51,
"learning_rate": 1.4959930690924844e-05,
"loss": 0.2213,
"step": 8490
},
{
"epoch": 5.52,
"learning_rate": 1.4938271604938272e-05,
"loss": 0.1739,
"step": 8500
},
{
"epoch": 5.53,
"learning_rate": 1.4916612518951701e-05,
"loss": 0.1328,
"step": 8510
},
{
"epoch": 5.53,
"learning_rate": 1.4894953432965128e-05,
"loss": 0.0799,
"step": 8520
},
{
"epoch": 5.54,
"learning_rate": 1.4873294346978557e-05,
"loss": 0.0648,
"step": 8530
},
{
"epoch": 5.55,
"learning_rate": 1.4851635260991987e-05,
"loss": 0.1861,
"step": 8540
},
{
"epoch": 5.55,
"learning_rate": 1.4829976175005416e-05,
"loss": 0.1486,
"step": 8550
},
{
"epoch": 5.56,
"learning_rate": 1.4808317089018843e-05,
"loss": 0.3231,
"step": 8560
},
{
"epoch": 5.57,
"learning_rate": 1.4786658003032272e-05,
"loss": 0.0632,
"step": 8570
},
{
"epoch": 5.57,
"learning_rate": 1.4764998917045702e-05,
"loss": 0.0212,
"step": 8580
},
{
"epoch": 5.58,
"learning_rate": 1.474333983105913e-05,
"loss": 0.0212,
"step": 8590
},
{
"epoch": 5.59,
"learning_rate": 1.4721680745072558e-05,
"loss": 0.0259,
"step": 8600
},
{
"epoch": 5.59,
"learning_rate": 1.4700021659085986e-05,
"loss": 0.2181,
"step": 8610
},
{
"epoch": 5.6,
"learning_rate": 1.4678362573099415e-05,
"loss": 0.1272,
"step": 8620
},
{
"epoch": 5.61,
"learning_rate": 1.4656703487112845e-05,
"loss": 0.3194,
"step": 8630
},
{
"epoch": 5.61,
"learning_rate": 1.4635044401126272e-05,
"loss": 0.1792,
"step": 8640
},
{
"epoch": 5.62,
"learning_rate": 1.4613385315139701e-05,
"loss": 0.1701,
"step": 8650
},
{
"epoch": 5.63,
"learning_rate": 1.459172622915313e-05,
"loss": 0.2487,
"step": 8660
},
{
"epoch": 5.63,
"learning_rate": 1.457006714316656e-05,
"loss": 0.0454,
"step": 8670
},
{
"epoch": 5.64,
"learning_rate": 1.4548408057179987e-05,
"loss": 0.1839,
"step": 8680
},
{
"epoch": 5.64,
"learning_rate": 1.4526748971193416e-05,
"loss": 0.2466,
"step": 8690
},
{
"epoch": 5.65,
"learning_rate": 1.4505089885206844e-05,
"loss": 0.2125,
"step": 8700
},
{
"epoch": 5.66,
"learning_rate": 1.4483430799220273e-05,
"loss": 0.1713,
"step": 8710
},
{
"epoch": 5.66,
"learning_rate": 1.4461771713233702e-05,
"loss": 0.1824,
"step": 8720
},
{
"epoch": 5.67,
"learning_rate": 1.444011262724713e-05,
"loss": 0.1248,
"step": 8730
},
{
"epoch": 5.68,
"learning_rate": 1.441845354126056e-05,
"loss": 0.0942,
"step": 8740
},
{
"epoch": 5.68,
"learning_rate": 1.4396794455273988e-05,
"loss": 0.2349,
"step": 8750
},
{
"epoch": 5.69,
"learning_rate": 1.4375135369287417e-05,
"loss": 0.1787,
"step": 8760
},
{
"epoch": 5.7,
"learning_rate": 1.4353476283300845e-05,
"loss": 0.3789,
"step": 8770
},
{
"epoch": 5.7,
"learning_rate": 1.4331817197314274e-05,
"loss": 0.166,
"step": 8780
},
{
"epoch": 5.71,
"learning_rate": 1.4310158111327703e-05,
"loss": 0.2508,
"step": 8790
},
{
"epoch": 5.72,
"learning_rate": 1.428849902534113e-05,
"loss": 0.3274,
"step": 8800
},
{
"epoch": 5.72,
"learning_rate": 1.426683993935456e-05,
"loss": 0.0026,
"step": 8810
},
{
"epoch": 5.73,
"learning_rate": 1.4245180853367989e-05,
"loss": 0.3321,
"step": 8820
},
{
"epoch": 5.74,
"learning_rate": 1.4223521767381417e-05,
"loss": 0.4053,
"step": 8830
},
{
"epoch": 5.74,
"learning_rate": 1.4201862681394844e-05,
"loss": 0.2369,
"step": 8840
},
{
"epoch": 5.75,
"learning_rate": 1.4180203595408275e-05,
"loss": 0.0657,
"step": 8850
},
{
"epoch": 5.76,
"learning_rate": 1.4158544509421703e-05,
"loss": 0.1151,
"step": 8860
},
{
"epoch": 5.76,
"learning_rate": 1.4136885423435132e-05,
"loss": 0.2244,
"step": 8870
},
{
"epoch": 5.77,
"learning_rate": 1.4115226337448559e-05,
"loss": 0.0992,
"step": 8880
},
{
"epoch": 5.77,
"learning_rate": 1.4093567251461988e-05,
"loss": 0.0408,
"step": 8890
},
{
"epoch": 5.78,
"learning_rate": 1.4071908165475418e-05,
"loss": 0.1377,
"step": 8900
},
{
"epoch": 5.79,
"learning_rate": 1.4050249079488847e-05,
"loss": 0.2829,
"step": 8910
},
{
"epoch": 5.79,
"learning_rate": 1.4028589993502274e-05,
"loss": 0.2788,
"step": 8920
},
{
"epoch": 5.8,
"learning_rate": 1.4006930907515703e-05,
"loss": 0.1138,
"step": 8930
},
{
"epoch": 5.81,
"learning_rate": 1.3985271821529133e-05,
"loss": 0.0994,
"step": 8940
},
{
"epoch": 5.81,
"learning_rate": 1.3963612735542562e-05,
"loss": 0.1967,
"step": 8950
},
{
"epoch": 5.82,
"learning_rate": 1.3941953649555989e-05,
"loss": 0.0688,
"step": 8960
},
{
"epoch": 5.83,
"learning_rate": 1.3920294563569417e-05,
"loss": 0.1298,
"step": 8970
},
{
"epoch": 5.83,
"learning_rate": 1.3898635477582846e-05,
"loss": 0.1009,
"step": 8980
},
{
"epoch": 5.84,
"learning_rate": 1.3876976391596276e-05,
"loss": 0.2893,
"step": 8990
},
{
"epoch": 5.85,
"learning_rate": 1.3855317305609703e-05,
"loss": 0.2114,
"step": 9000
},
{
"epoch": 5.85,
"learning_rate": 1.3833658219623132e-05,
"loss": 0.306,
"step": 9010
},
{
"epoch": 5.86,
"learning_rate": 1.381199913363656e-05,
"loss": 0.0636,
"step": 9020
},
{
"epoch": 5.87,
"learning_rate": 1.379034004764999e-05,
"loss": 0.1042,
"step": 9030
},
{
"epoch": 5.87,
"learning_rate": 1.3768680961663418e-05,
"loss": 0.0434,
"step": 9040
},
{
"epoch": 5.88,
"learning_rate": 1.3747021875676847e-05,
"loss": 0.2535,
"step": 9050
},
{
"epoch": 5.89,
"learning_rate": 1.3725362789690275e-05,
"loss": 0.2026,
"step": 9060
},
{
"epoch": 5.89,
"learning_rate": 1.3703703703703704e-05,
"loss": 0.2205,
"step": 9070
},
{
"epoch": 5.9,
"learning_rate": 1.3682044617717133e-05,
"loss": 0.0956,
"step": 9080
},
{
"epoch": 5.9,
"learning_rate": 1.3660385531730562e-05,
"loss": 0.2372,
"step": 9090
},
{
"epoch": 5.91,
"learning_rate": 1.363872644574399e-05,
"loss": 0.0798,
"step": 9100
},
{
"epoch": 5.92,
"learning_rate": 1.3617067359757417e-05,
"loss": 0.0851,
"step": 9110
},
{
"epoch": 5.92,
"learning_rate": 1.3595408273770848e-05,
"loss": 0.1327,
"step": 9120
},
{
"epoch": 5.93,
"learning_rate": 1.3573749187784276e-05,
"loss": 0.4017,
"step": 9130
},
{
"epoch": 5.94,
"learning_rate": 1.3552090101797705e-05,
"loss": 0.0811,
"step": 9140
},
{
"epoch": 5.94,
"learning_rate": 1.3530431015811132e-05,
"loss": 0.4282,
"step": 9150
},
{
"epoch": 5.95,
"learning_rate": 1.350877192982456e-05,
"loss": 0.1739,
"step": 9160
},
{
"epoch": 5.96,
"learning_rate": 1.3487112843837991e-05,
"loss": 0.0615,
"step": 9170
},
{
"epoch": 5.96,
"learning_rate": 1.346545375785142e-05,
"loss": 0.2251,
"step": 9180
},
{
"epoch": 5.97,
"learning_rate": 1.3443794671864847e-05,
"loss": 0.0924,
"step": 9190
},
{
"epoch": 5.98,
"learning_rate": 1.3422135585878275e-05,
"loss": 0.1031,
"step": 9200
},
{
"epoch": 5.98,
"learning_rate": 1.3400476499891706e-05,
"loss": 0.1001,
"step": 9210
},
{
"epoch": 5.99,
"learning_rate": 1.3378817413905134e-05,
"loss": 0.2866,
"step": 9220
},
{
"epoch": 6.0,
"learning_rate": 1.3357158327918561e-05,
"loss": 0.1269,
"step": 9230
},
{
"epoch": 6.0,
"eval_accuracy": 0.9806807727690893,
"eval_loss": 0.10807611048221588,
"eval_runtime": 13.2387,
"eval_samples_per_second": 164.215,
"eval_steps_per_second": 41.092,
"step": 9237
},
{
"epoch": 6.0,
"learning_rate": 1.333549924193199e-05,
"loss": 0.1506,
"step": 9240
},
{
"epoch": 6.01,
"learning_rate": 1.3313840155945419e-05,
"loss": 0.3085,
"step": 9250
},
{
"epoch": 6.01,
"learning_rate": 1.3292181069958849e-05,
"loss": 0.1471,
"step": 9260
},
{
"epoch": 6.02,
"learning_rate": 1.3270521983972276e-05,
"loss": 0.0518,
"step": 9270
},
{
"epoch": 6.03,
"learning_rate": 1.3248862897985705e-05,
"loss": 0.1339,
"step": 9280
},
{
"epoch": 6.03,
"learning_rate": 1.3227203811999134e-05,
"loss": 0.0966,
"step": 9290
},
{
"epoch": 6.04,
"learning_rate": 1.3205544726012564e-05,
"loss": 0.2445,
"step": 9300
},
{
"epoch": 6.05,
"learning_rate": 1.3183885640025991e-05,
"loss": 0.3489,
"step": 9310
},
{
"epoch": 6.05,
"learning_rate": 1.316222655403942e-05,
"loss": 0.127,
"step": 9320
},
{
"epoch": 6.06,
"learning_rate": 1.3140567468052848e-05,
"loss": 0.1226,
"step": 9330
},
{
"epoch": 6.07,
"learning_rate": 1.3118908382066277e-05,
"loss": 0.2297,
"step": 9340
},
{
"epoch": 6.07,
"learning_rate": 1.3097249296079706e-05,
"loss": 0.0035,
"step": 9350
},
{
"epoch": 6.08,
"learning_rate": 1.3075590210093134e-05,
"loss": 0.1268,
"step": 9360
},
{
"epoch": 6.09,
"learning_rate": 1.3053931124106563e-05,
"loss": 0.1522,
"step": 9370
},
{
"epoch": 6.09,
"learning_rate": 1.3032272038119992e-05,
"loss": 0.193,
"step": 9380
},
{
"epoch": 6.1,
"learning_rate": 1.301061295213342e-05,
"loss": 0.0452,
"step": 9390
},
{
"epoch": 6.11,
"learning_rate": 1.2988953866146849e-05,
"loss": 0.0601,
"step": 9400
},
{
"epoch": 6.11,
"learning_rate": 1.2967294780160278e-05,
"loss": 0.1202,
"step": 9410
},
{
"epoch": 6.12,
"learning_rate": 1.2945635694173706e-05,
"loss": 0.0262,
"step": 9420
},
{
"epoch": 6.13,
"learning_rate": 1.2923976608187133e-05,
"loss": 0.1947,
"step": 9430
},
{
"epoch": 6.13,
"learning_rate": 1.2902317522200564e-05,
"loss": 0.0498,
"step": 9440
},
{
"epoch": 6.14,
"learning_rate": 1.2880658436213992e-05,
"loss": 0.1465,
"step": 9450
},
{
"epoch": 6.14,
"learning_rate": 1.2858999350227421e-05,
"loss": 0.148,
"step": 9460
},
{
"epoch": 6.15,
"learning_rate": 1.2837340264240848e-05,
"loss": 0.2404,
"step": 9470
},
{
"epoch": 6.16,
"learning_rate": 1.2815681178254279e-05,
"loss": 0.4131,
"step": 9480
},
{
"epoch": 6.16,
"learning_rate": 1.2794022092267707e-05,
"loss": 0.0116,
"step": 9490
},
{
"epoch": 6.17,
"learning_rate": 1.2772363006281136e-05,
"loss": 0.2536,
"step": 9500
},
{
"epoch": 6.18,
"learning_rate": 1.2750703920294563e-05,
"loss": 0.275,
"step": 9510
},
{
"epoch": 6.18,
"learning_rate": 1.2729044834307992e-05,
"loss": 0.1243,
"step": 9520
},
{
"epoch": 6.19,
"learning_rate": 1.2707385748321422e-05,
"loss": 0.0021,
"step": 9530
},
{
"epoch": 6.2,
"learning_rate": 1.268572666233485e-05,
"loss": 0.3057,
"step": 9540
},
{
"epoch": 6.2,
"learning_rate": 1.2664067576348278e-05,
"loss": 0.163,
"step": 9550
},
{
"epoch": 6.21,
"learning_rate": 1.2642408490361706e-05,
"loss": 0.0507,
"step": 9560
},
{
"epoch": 6.22,
"learning_rate": 1.2620749404375137e-05,
"loss": 0.2131,
"step": 9570
},
{
"epoch": 6.22,
"learning_rate": 1.2599090318388565e-05,
"loss": 0.0124,
"step": 9580
},
{
"epoch": 6.23,
"learning_rate": 1.2577431232401992e-05,
"loss": 0.0315,
"step": 9590
},
{
"epoch": 6.24,
"learning_rate": 1.2555772146415421e-05,
"loss": 0.3318,
"step": 9600
},
{
"epoch": 6.24,
"learning_rate": 1.253411306042885e-05,
"loss": 0.0796,
"step": 9610
},
{
"epoch": 6.25,
"learning_rate": 1.251245397444228e-05,
"loss": 0.108,
"step": 9620
},
{
"epoch": 6.26,
"learning_rate": 1.2490794888455707e-05,
"loss": 0.1974,
"step": 9630
},
{
"epoch": 6.26,
"learning_rate": 1.2469135802469136e-05,
"loss": 0.2185,
"step": 9640
},
{
"epoch": 6.27,
"learning_rate": 1.2447476716482564e-05,
"loss": 0.1898,
"step": 9650
},
{
"epoch": 6.27,
"learning_rate": 1.2425817630495995e-05,
"loss": 0.0438,
"step": 9660
},
{
"epoch": 6.28,
"learning_rate": 1.2404158544509422e-05,
"loss": 0.0117,
"step": 9670
},
{
"epoch": 6.29,
"learning_rate": 1.238249945852285e-05,
"loss": 0.2061,
"step": 9680
},
{
"epoch": 6.29,
"learning_rate": 1.236084037253628e-05,
"loss": 0.0047,
"step": 9690
},
{
"epoch": 6.3,
"learning_rate": 1.2339181286549708e-05,
"loss": 0.0662,
"step": 9700
},
{
"epoch": 6.31,
"learning_rate": 1.2317522200563137e-05,
"loss": 0.0885,
"step": 9710
},
{
"epoch": 6.31,
"learning_rate": 1.2295863114576565e-05,
"loss": 0.0855,
"step": 9720
},
{
"epoch": 6.32,
"learning_rate": 1.2274204028589994e-05,
"loss": 0.1832,
"step": 9730
},
{
"epoch": 6.33,
"learning_rate": 1.2252544942603423e-05,
"loss": 0.0117,
"step": 9740
},
{
"epoch": 6.33,
"learning_rate": 1.2230885856616851e-05,
"loss": 0.1825,
"step": 9750
},
{
"epoch": 6.34,
"learning_rate": 1.220922677063028e-05,
"loss": 0.0776,
"step": 9760
},
{
"epoch": 6.35,
"learning_rate": 1.2187567684643709e-05,
"loss": 0.276,
"step": 9770
},
{
"epoch": 6.35,
"learning_rate": 1.2165908598657137e-05,
"loss": 0.0053,
"step": 9780
},
{
"epoch": 6.36,
"learning_rate": 1.2144249512670564e-05,
"loss": 0.2128,
"step": 9790
},
{
"epoch": 6.37,
"learning_rate": 1.2122590426683995e-05,
"loss": 0.2099,
"step": 9800
},
{
"epoch": 6.37,
"learning_rate": 1.2100931340697423e-05,
"loss": 0.0055,
"step": 9810
},
{
"epoch": 6.38,
"learning_rate": 1.2079272254710852e-05,
"loss": 0.122,
"step": 9820
},
{
"epoch": 6.39,
"learning_rate": 1.2057613168724279e-05,
"loss": 0.0016,
"step": 9830
},
{
"epoch": 6.39,
"learning_rate": 1.203595408273771e-05,
"loss": 0.0019,
"step": 9840
},
{
"epoch": 6.4,
"learning_rate": 1.2014294996751138e-05,
"loss": 0.0679,
"step": 9850
},
{
"epoch": 6.4,
"learning_rate": 1.1992635910764567e-05,
"loss": 0.2834,
"step": 9860
},
{
"epoch": 6.41,
"learning_rate": 1.1970976824777994e-05,
"loss": 0.3688,
"step": 9870
},
{
"epoch": 6.42,
"learning_rate": 1.1949317738791423e-05,
"loss": 0.1276,
"step": 9880
},
{
"epoch": 6.42,
"learning_rate": 1.1927658652804853e-05,
"loss": 0.1572,
"step": 9890
},
{
"epoch": 6.43,
"learning_rate": 1.1905999566818282e-05,
"loss": 0.2999,
"step": 9900
},
{
"epoch": 6.44,
"learning_rate": 1.1884340480831709e-05,
"loss": 0.0549,
"step": 9910
},
{
"epoch": 6.44,
"learning_rate": 1.1862681394845137e-05,
"loss": 0.0244,
"step": 9920
},
{
"epoch": 6.45,
"learning_rate": 1.1841022308858568e-05,
"loss": 0.0144,
"step": 9930
},
{
"epoch": 6.46,
"learning_rate": 1.1819363222871996e-05,
"loss": 0.0892,
"step": 9940
},
{
"epoch": 6.46,
"learning_rate": 1.1797704136885423e-05,
"loss": 0.0441,
"step": 9950
},
{
"epoch": 6.47,
"learning_rate": 1.1776045050898852e-05,
"loss": 0.1115,
"step": 9960
},
{
"epoch": 6.48,
"learning_rate": 1.175438596491228e-05,
"loss": 0.1608,
"step": 9970
},
{
"epoch": 6.48,
"learning_rate": 1.1734892787524366e-05,
"loss": 0.2287,
"step": 9980
},
{
"epoch": 6.49,
"learning_rate": 1.1713233701537797e-05,
"loss": 0.0351,
"step": 9990
},
{
"epoch": 6.5,
"learning_rate": 1.1691574615551224e-05,
"loss": 0.0526,
"step": 10000
},
{
"epoch": 6.5,
"learning_rate": 1.1669915529564652e-05,
"loss": 0.0966,
"step": 10010
},
{
"epoch": 6.51,
"learning_rate": 1.1648256443578081e-05,
"loss": 0.2692,
"step": 10020
},
{
"epoch": 6.52,
"learning_rate": 1.162659735759151e-05,
"loss": 0.0803,
"step": 10030
},
{
"epoch": 6.52,
"learning_rate": 1.1604938271604938e-05,
"loss": 0.3985,
"step": 10040
},
{
"epoch": 6.53,
"learning_rate": 1.1583279185618367e-05,
"loss": 0.1525,
"step": 10050
},
{
"epoch": 6.53,
"learning_rate": 1.1561620099631796e-05,
"loss": 0.1275,
"step": 10060
},
{
"epoch": 6.54,
"learning_rate": 1.1539961013645224e-05,
"loss": 0.0422,
"step": 10070
},
{
"epoch": 6.55,
"learning_rate": 1.1518301927658653e-05,
"loss": 0.1179,
"step": 10080
},
{
"epoch": 6.55,
"learning_rate": 1.1496642841672082e-05,
"loss": 0.2389,
"step": 10090
},
{
"epoch": 6.56,
"learning_rate": 1.147498375568551e-05,
"loss": 0.0017,
"step": 10100
},
{
"epoch": 6.57,
"learning_rate": 1.1453324669698939e-05,
"loss": 0.5718,
"step": 10110
},
{
"epoch": 6.57,
"learning_rate": 1.1431665583712366e-05,
"loss": 0.1237,
"step": 10120
},
{
"epoch": 6.58,
"learning_rate": 1.1410006497725796e-05,
"loss": 0.1591,
"step": 10130
},
{
"epoch": 6.59,
"learning_rate": 1.1388347411739225e-05,
"loss": 0.0555,
"step": 10140
},
{
"epoch": 6.59,
"learning_rate": 1.1366688325752654e-05,
"loss": 0.0478,
"step": 10150
},
{
"epoch": 6.6,
"learning_rate": 1.134502923976608e-05,
"loss": 0.1472,
"step": 10160
},
{
"epoch": 6.61,
"learning_rate": 1.1323370153779511e-05,
"loss": 0.1414,
"step": 10170
},
{
"epoch": 6.61,
"learning_rate": 1.130171106779294e-05,
"loss": 0.1624,
"step": 10180
},
{
"epoch": 6.62,
"learning_rate": 1.1280051981806369e-05,
"loss": 0.4795,
"step": 10190
},
{
"epoch": 6.63,
"learning_rate": 1.1258392895819796e-05,
"loss": 0.2595,
"step": 10200
},
{
"epoch": 6.63,
"learning_rate": 1.1236733809833224e-05,
"loss": 0.0409,
"step": 10210
},
{
"epoch": 6.64,
"learning_rate": 1.1215074723846655e-05,
"loss": 0.0421,
"step": 10220
},
{
"epoch": 6.65,
"learning_rate": 1.1193415637860083e-05,
"loss": 0.1452,
"step": 10230
},
{
"epoch": 6.65,
"learning_rate": 1.117175655187351e-05,
"loss": 0.1621,
"step": 10240
},
{
"epoch": 6.66,
"learning_rate": 1.1150097465886939e-05,
"loss": 0.1155,
"step": 10250
},
{
"epoch": 6.66,
"learning_rate": 1.112843837990037e-05,
"loss": 0.1035,
"step": 10260
},
{
"epoch": 6.67,
"learning_rate": 1.1106779293913798e-05,
"loss": 0.1507,
"step": 10270
},
{
"epoch": 6.68,
"learning_rate": 1.1085120207927225e-05,
"loss": 0.1554,
"step": 10280
},
{
"epoch": 6.68,
"learning_rate": 1.1063461121940654e-05,
"loss": 0.0084,
"step": 10290
},
{
"epoch": 6.69,
"learning_rate": 1.1041802035954082e-05,
"loss": 0.2307,
"step": 10300
},
{
"epoch": 6.7,
"learning_rate": 1.1020142949967513e-05,
"loss": 0.197,
"step": 10310
},
{
"epoch": 6.7,
"learning_rate": 1.099848386398094e-05,
"loss": 0.2854,
"step": 10320
},
{
"epoch": 6.71,
"learning_rate": 1.0976824777994368e-05,
"loss": 0.1157,
"step": 10330
},
{
"epoch": 6.72,
"learning_rate": 1.0955165692007797e-05,
"loss": 0.1403,
"step": 10340
},
{
"epoch": 6.72,
"learning_rate": 1.0933506606021228e-05,
"loss": 0.1204,
"step": 10350
},
{
"epoch": 6.73,
"learning_rate": 1.0911847520034655e-05,
"loss": 0.2161,
"step": 10360
},
{
"epoch": 6.74,
"learning_rate": 1.0890188434048083e-05,
"loss": 0.2184,
"step": 10370
},
{
"epoch": 6.74,
"learning_rate": 1.0868529348061512e-05,
"loss": 0.2235,
"step": 10380
},
{
"epoch": 6.75,
"learning_rate": 1.084687026207494e-05,
"loss": 0.0236,
"step": 10390
},
{
"epoch": 6.76,
"learning_rate": 1.082521117608837e-05,
"loss": 0.2451,
"step": 10400
},
{
"epoch": 6.76,
"learning_rate": 1.0803552090101798e-05,
"loss": 0.1368,
"step": 10410
},
{
"epoch": 6.77,
"learning_rate": 1.0781893004115227e-05,
"loss": 0.166,
"step": 10420
},
{
"epoch": 6.77,
"learning_rate": 1.0760233918128655e-05,
"loss": 0.0876,
"step": 10430
},
{
"epoch": 6.78,
"learning_rate": 1.0738574832142084e-05,
"loss": 0.1326,
"step": 10440
},
{
"epoch": 6.79,
"learning_rate": 1.0716915746155513e-05,
"loss": 0.1558,
"step": 10450
},
{
"epoch": 6.79,
"learning_rate": 1.0695256660168941e-05,
"loss": 0.2767,
"step": 10460
},
{
"epoch": 6.8,
"learning_rate": 1.067359757418237e-05,
"loss": 0.2269,
"step": 10470
},
{
"epoch": 6.81,
"learning_rate": 1.0651938488195797e-05,
"loss": 0.2727,
"step": 10480
},
{
"epoch": 6.81,
"learning_rate": 1.0630279402209227e-05,
"loss": 0.1883,
"step": 10490
},
{
"epoch": 6.82,
"learning_rate": 1.0608620316222656e-05,
"loss": 0.3771,
"step": 10500
},
{
"epoch": 6.83,
"learning_rate": 1.0586961230236085e-05,
"loss": 0.1681,
"step": 10510
},
{
"epoch": 6.83,
"learning_rate": 1.0565302144249512e-05,
"loss": 0.0599,
"step": 10520
},
{
"epoch": 6.84,
"learning_rate": 1.0543643058262942e-05,
"loss": 0.2127,
"step": 10530
},
{
"epoch": 6.85,
"learning_rate": 1.0521983972276371e-05,
"loss": 0.1691,
"step": 10540
},
{
"epoch": 6.85,
"learning_rate": 1.05003248862898e-05,
"loss": 0.2349,
"step": 10550
},
{
"epoch": 6.86,
"learning_rate": 1.0478665800303227e-05,
"loss": 0.2292,
"step": 10560
},
{
"epoch": 6.87,
"learning_rate": 1.0457006714316655e-05,
"loss": 0.3338,
"step": 10570
},
{
"epoch": 6.87,
"learning_rate": 1.0435347628330086e-05,
"loss": 0.1032,
"step": 10580
},
{
"epoch": 6.88,
"learning_rate": 1.0413688542343514e-05,
"loss": 0.059,
"step": 10590
},
{
"epoch": 6.89,
"learning_rate": 1.0392029456356941e-05,
"loss": 0.272,
"step": 10600
},
{
"epoch": 6.89,
"learning_rate": 1.037037037037037e-05,
"loss": 0.0206,
"step": 10610
},
{
"epoch": 6.9,
"learning_rate": 1.03487112843838e-05,
"loss": 0.2188,
"step": 10620
},
{
"epoch": 6.9,
"learning_rate": 1.0327052198397229e-05,
"loss": 0.1537,
"step": 10630
},
{
"epoch": 6.91,
"learning_rate": 1.0305393112410656e-05,
"loss": 0.2969,
"step": 10640
},
{
"epoch": 6.92,
"learning_rate": 1.0283734026424085e-05,
"loss": 0.1091,
"step": 10650
},
{
"epoch": 6.92,
"learning_rate": 1.0262074940437513e-05,
"loss": 0.1372,
"step": 10660
},
{
"epoch": 6.93,
"learning_rate": 1.0240415854450944e-05,
"loss": 0.1222,
"step": 10670
},
{
"epoch": 6.94,
"learning_rate": 1.021875676846437e-05,
"loss": 0.0026,
"step": 10680
},
{
"epoch": 6.94,
"learning_rate": 1.01970976824778e-05,
"loss": 0.0525,
"step": 10690
},
{
"epoch": 6.95,
"learning_rate": 1.0175438596491228e-05,
"loss": 0.2373,
"step": 10700
},
{
"epoch": 6.96,
"learning_rate": 1.0153779510504659e-05,
"loss": 0.1444,
"step": 10710
},
{
"epoch": 6.96,
"learning_rate": 1.0132120424518086e-05,
"loss": 0.1887,
"step": 10720
},
{
"epoch": 6.97,
"learning_rate": 1.0110461338531514e-05,
"loss": 0.0863,
"step": 10730
},
{
"epoch": 6.98,
"learning_rate": 1.0088802252544943e-05,
"loss": 0.1191,
"step": 10740
},
{
"epoch": 6.98,
"learning_rate": 1.0067143166558372e-05,
"loss": 0.2556,
"step": 10750
},
{
"epoch": 6.99,
"learning_rate": 1.00454840805718e-05,
"loss": 0.2222,
"step": 10760
},
{
"epoch": 7.0,
"learning_rate": 1.0023824994585229e-05,
"loss": 0.031,
"step": 10770
},
{
"epoch": 7.0,
"eval_accuracy": 0.984820607175713,
"eval_loss": 0.08112048357725143,
"eval_runtime": 12.6945,
"eval_samples_per_second": 171.255,
"eval_steps_per_second": 42.853,
"step": 10776
},
{
"epoch": 7.0,
"learning_rate": 1.0002165908598658e-05,
"loss": 0.0887,
"step": 10780
},
{
"epoch": 7.01,
"learning_rate": 9.980506822612086e-06,
"loss": 0.195,
"step": 10790
},
{
"epoch": 7.02,
"learning_rate": 9.958847736625515e-06,
"loss": 0.176,
"step": 10800
},
{
"epoch": 7.02,
"learning_rate": 9.937188650638944e-06,
"loss": 0.1994,
"step": 10810
},
{
"epoch": 7.03,
"learning_rate": 9.915529564652372e-06,
"loss": 0.0794,
"step": 10820
},
{
"epoch": 7.03,
"learning_rate": 9.893870478665801e-06,
"loss": 0.0675,
"step": 10830
},
{
"epoch": 7.04,
"learning_rate": 9.872211392679228e-06,
"loss": 0.1071,
"step": 10840
},
{
"epoch": 7.05,
"learning_rate": 9.850552306692658e-06,
"loss": 0.033,
"step": 10850
},
{
"epoch": 7.05,
"learning_rate": 9.828893220706087e-06,
"loss": 0.0126,
"step": 10860
},
{
"epoch": 7.06,
"learning_rate": 9.807234134719514e-06,
"loss": 0.2058,
"step": 10870
},
{
"epoch": 7.07,
"learning_rate": 9.785575048732943e-06,
"loss": 0.2196,
"step": 10880
},
{
"epoch": 7.07,
"learning_rate": 9.763915962746373e-06,
"loss": 0.0015,
"step": 10890
},
{
"epoch": 7.08,
"learning_rate": 9.742256876759802e-06,
"loss": 0.0216,
"step": 10900
},
{
"epoch": 7.09,
"learning_rate": 9.720597790773229e-06,
"loss": 0.008,
"step": 10910
},
{
"epoch": 7.09,
"learning_rate": 9.698938704786658e-06,
"loss": 0.3726,
"step": 10920
},
{
"epoch": 7.1,
"learning_rate": 9.677279618800086e-06,
"loss": 0.1296,
"step": 10930
},
{
"epoch": 7.11,
"learning_rate": 9.655620532813517e-06,
"loss": 0.034,
"step": 10940
},
{
"epoch": 7.11,
"learning_rate": 9.633961446826944e-06,
"loss": 0.0329,
"step": 10950
},
{
"epoch": 7.12,
"learning_rate": 9.612302360840372e-06,
"loss": 0.1104,
"step": 10960
},
{
"epoch": 7.13,
"learning_rate": 9.590643274853801e-06,
"loss": 0.0433,
"step": 10970
},
{
"epoch": 7.13,
"learning_rate": 9.568984188867231e-06,
"loss": 0.0929,
"step": 10980
},
{
"epoch": 7.14,
"learning_rate": 9.547325102880658e-06,
"loss": 0.0112,
"step": 10990
},
{
"epoch": 7.15,
"learning_rate": 9.525666016894087e-06,
"loss": 0.076,
"step": 11000
},
{
"epoch": 7.15,
"learning_rate": 9.504006930907516e-06,
"loss": 0.0653,
"step": 11010
},
{
"epoch": 7.16,
"learning_rate": 9.482347844920944e-06,
"loss": 0.3116,
"step": 11020
},
{
"epoch": 7.16,
"learning_rate": 9.460688758934373e-06,
"loss": 0.0533,
"step": 11030
},
{
"epoch": 7.17,
"learning_rate": 9.439029672947802e-06,
"loss": 0.0627,
"step": 11040
},
{
"epoch": 7.18,
"learning_rate": 9.41737058696123e-06,
"loss": 0.0206,
"step": 11050
},
{
"epoch": 7.18,
"learning_rate": 9.395711500974659e-06,
"loss": 0.1429,
"step": 11060
},
{
"epoch": 7.19,
"learning_rate": 9.374052414988088e-06,
"loss": 0.1702,
"step": 11070
},
{
"epoch": 7.2,
"learning_rate": 9.352393329001516e-06,
"loss": 0.0337,
"step": 11080
},
{
"epoch": 7.2,
"learning_rate": 9.330734243014945e-06,
"loss": 0.1393,
"step": 11090
},
{
"epoch": 7.21,
"learning_rate": 9.309075157028374e-06,
"loss": 0.1942,
"step": 11100
},
{
"epoch": 7.22,
"learning_rate": 9.2874160710418e-06,
"loss": 0.2188,
"step": 11110
},
{
"epoch": 7.22,
"learning_rate": 9.265756985055231e-06,
"loss": 0.0614,
"step": 11120
},
{
"epoch": 7.23,
"learning_rate": 9.24409789906866e-06,
"loss": 0.2601,
"step": 11130
},
{
"epoch": 7.24,
"learning_rate": 9.222438813082089e-06,
"loss": 0.2971,
"step": 11140
},
{
"epoch": 7.24,
"learning_rate": 9.200779727095516e-06,
"loss": 0.1568,
"step": 11150
},
{
"epoch": 7.25,
"learning_rate": 9.179120641108946e-06,
"loss": 0.2477,
"step": 11160
},
{
"epoch": 7.26,
"learning_rate": 9.157461555122375e-06,
"loss": 0.3047,
"step": 11170
},
{
"epoch": 7.26,
"learning_rate": 9.135802469135803e-06,
"loss": 0.1162,
"step": 11180
},
{
"epoch": 7.27,
"learning_rate": 9.11414338314923e-06,
"loss": 0.112,
"step": 11190
},
{
"epoch": 7.28,
"learning_rate": 9.092484297162659e-06,
"loss": 0.1143,
"step": 11200
},
{
"epoch": 7.28,
"learning_rate": 9.07082521117609e-06,
"loss": 0.0668,
"step": 11210
},
{
"epoch": 7.29,
"learning_rate": 9.049166125189518e-06,
"loss": 0.2082,
"step": 11220
},
{
"epoch": 7.29,
"learning_rate": 9.027507039202945e-06,
"loss": 0.1127,
"step": 11230
},
{
"epoch": 7.3,
"learning_rate": 9.005847953216374e-06,
"loss": 0.0592,
"step": 11240
},
{
"epoch": 7.31,
"learning_rate": 8.984188867229804e-06,
"loss": 0.1539,
"step": 11250
},
{
"epoch": 7.31,
"learning_rate": 8.962529781243233e-06,
"loss": 0.1297,
"step": 11260
},
{
"epoch": 7.32,
"learning_rate": 8.94087069525666e-06,
"loss": 0.1659,
"step": 11270
},
{
"epoch": 7.33,
"learning_rate": 8.919211609270088e-06,
"loss": 0.1437,
"step": 11280
},
{
"epoch": 7.33,
"learning_rate": 8.897552523283517e-06,
"loss": 0.0978,
"step": 11290
},
{
"epoch": 7.34,
"learning_rate": 8.875893437296948e-06,
"loss": 0.228,
"step": 11300
},
{
"epoch": 7.35,
"learning_rate": 8.854234351310375e-06,
"loss": 0.0906,
"step": 11310
},
{
"epoch": 7.35,
"learning_rate": 8.832575265323803e-06,
"loss": 0.173,
"step": 11320
},
{
"epoch": 7.36,
"learning_rate": 8.810916179337232e-06,
"loss": 0.0306,
"step": 11330
},
{
"epoch": 7.37,
"learning_rate": 8.789257093350662e-06,
"loss": 0.1072,
"step": 11340
},
{
"epoch": 7.37,
"learning_rate": 8.76759800736409e-06,
"loss": 0.0704,
"step": 11350
},
{
"epoch": 7.38,
"learning_rate": 8.745938921377518e-06,
"loss": 0.0096,
"step": 11360
},
{
"epoch": 7.39,
"learning_rate": 8.724279835390947e-06,
"loss": 0.4016,
"step": 11370
},
{
"epoch": 7.39,
"learning_rate": 8.702620749404375e-06,
"loss": 0.2172,
"step": 11380
},
{
"epoch": 7.4,
"learning_rate": 8.680961663417804e-06,
"loss": 0.2804,
"step": 11390
},
{
"epoch": 7.41,
"learning_rate": 8.659302577431233e-06,
"loss": 0.1273,
"step": 11400
},
{
"epoch": 7.41,
"learning_rate": 8.637643491444661e-06,
"loss": 0.0394,
"step": 11410
},
{
"epoch": 7.42,
"learning_rate": 8.61598440545809e-06,
"loss": 0.0711,
"step": 11420
},
{
"epoch": 7.42,
"learning_rate": 8.594325319471519e-06,
"loss": 0.188,
"step": 11430
},
{
"epoch": 7.43,
"learning_rate": 8.572666233484947e-06,
"loss": 0.046,
"step": 11440
},
{
"epoch": 7.44,
"learning_rate": 8.551007147498376e-06,
"loss": 0.1146,
"step": 11450
},
{
"epoch": 7.44,
"learning_rate": 8.529348061511805e-06,
"loss": 0.039,
"step": 11460
},
{
"epoch": 7.45,
"learning_rate": 8.507688975525232e-06,
"loss": 0.1031,
"step": 11470
},
{
"epoch": 7.46,
"learning_rate": 8.486029889538662e-06,
"loss": 0.003,
"step": 11480
},
{
"epoch": 7.46,
"learning_rate": 8.464370803552091e-06,
"loss": 0.018,
"step": 11490
},
{
"epoch": 7.47,
"learning_rate": 8.44271171756552e-06,
"loss": 0.2962,
"step": 11500
},
{
"epoch": 7.48,
"learning_rate": 8.421052631578947e-06,
"loss": 0.053,
"step": 11510
},
{
"epoch": 7.48,
"learning_rate": 8.399393545592377e-06,
"loss": 0.0209,
"step": 11520
},
{
"epoch": 7.49,
"learning_rate": 8.377734459605806e-06,
"loss": 0.0926,
"step": 11530
},
{
"epoch": 7.5,
"learning_rate": 8.356075373619234e-06,
"loss": 0.1764,
"step": 11540
},
{
"epoch": 7.5,
"learning_rate": 8.334416287632661e-06,
"loss": 0.0722,
"step": 11550
},
{
"epoch": 7.51,
"learning_rate": 8.31275720164609e-06,
"loss": 0.1181,
"step": 11560
},
{
"epoch": 7.52,
"learning_rate": 8.29109811565952e-06,
"loss": 0.1074,
"step": 11570
},
{
"epoch": 7.52,
"learning_rate": 8.269439029672949e-06,
"loss": 0.1395,
"step": 11580
},
{
"epoch": 7.53,
"learning_rate": 8.247779943686376e-06,
"loss": 0.095,
"step": 11590
},
{
"epoch": 7.53,
"learning_rate": 8.226120857699805e-06,
"loss": 0.0765,
"step": 11600
},
{
"epoch": 7.54,
"learning_rate": 8.204461771713233e-06,
"loss": 0.4006,
"step": 11610
},
{
"epoch": 7.55,
"learning_rate": 8.182802685726664e-06,
"loss": 0.0065,
"step": 11620
},
{
"epoch": 7.55,
"learning_rate": 8.16114359974009e-06,
"loss": 0.1837,
"step": 11630
},
{
"epoch": 7.56,
"learning_rate": 8.13948451375352e-06,
"loss": 0.1463,
"step": 11640
},
{
"epoch": 7.57,
"learning_rate": 8.117825427766948e-06,
"loss": 0.2499,
"step": 11650
},
{
"epoch": 7.57,
"learning_rate": 8.096166341780379e-06,
"loss": 0.1759,
"step": 11660
},
{
"epoch": 7.58,
"learning_rate": 8.074507255793806e-06,
"loss": 0.0632,
"step": 11670
},
{
"epoch": 7.59,
"learning_rate": 8.052848169807234e-06,
"loss": 0.046,
"step": 11680
},
{
"epoch": 7.59,
"learning_rate": 8.031189083820663e-06,
"loss": 0.1882,
"step": 11690
},
{
"epoch": 7.6,
"learning_rate": 8.009529997834092e-06,
"loss": 0.2443,
"step": 11700
},
{
"epoch": 7.61,
"learning_rate": 7.98787091184752e-06,
"loss": 0.0786,
"step": 11710
},
{
"epoch": 7.61,
"learning_rate": 7.966211825860949e-06,
"loss": 0.0354,
"step": 11720
},
{
"epoch": 7.62,
"learning_rate": 7.944552739874378e-06,
"loss": 0.1392,
"step": 11730
},
{
"epoch": 7.63,
"learning_rate": 7.922893653887806e-06,
"loss": 0.1614,
"step": 11740
},
{
"epoch": 7.63,
"learning_rate": 7.901234567901235e-06,
"loss": 0.0229,
"step": 11750
},
{
"epoch": 7.64,
"learning_rate": 7.879575481914664e-06,
"loss": 0.0742,
"step": 11760
},
{
"epoch": 7.65,
"learning_rate": 7.857916395928092e-06,
"loss": 0.0247,
"step": 11770
},
{
"epoch": 7.65,
"learning_rate": 7.836257309941521e-06,
"loss": 0.0734,
"step": 11780
},
{
"epoch": 7.66,
"learning_rate": 7.814598223954948e-06,
"loss": 0.161,
"step": 11790
},
{
"epoch": 7.66,
"learning_rate": 7.792939137968378e-06,
"loss": 0.1736,
"step": 11800
},
{
"epoch": 7.67,
"learning_rate": 7.771280051981807e-06,
"loss": 0.1407,
"step": 11810
},
{
"epoch": 7.68,
"learning_rate": 7.749620965995236e-06,
"loss": 0.104,
"step": 11820
},
{
"epoch": 7.68,
"learning_rate": 7.727961880008663e-06,
"loss": 0.1501,
"step": 11830
},
{
"epoch": 7.69,
"learning_rate": 7.706302794022093e-06,
"loss": 0.1137,
"step": 11840
},
{
"epoch": 7.7,
"learning_rate": 7.684643708035522e-06,
"loss": 0.1669,
"step": 11850
},
{
"epoch": 7.7,
"learning_rate": 7.662984622048949e-06,
"loss": 0.0013,
"step": 11860
},
{
"epoch": 7.71,
"learning_rate": 7.641325536062378e-06,
"loss": 0.1373,
"step": 11870
},
{
"epoch": 7.72,
"learning_rate": 7.619666450075806e-06,
"loss": 0.1723,
"step": 11880
},
{
"epoch": 7.72,
"learning_rate": 7.598007364089236e-06,
"loss": 0.0015,
"step": 11890
},
{
"epoch": 7.73,
"learning_rate": 7.576348278102664e-06,
"loss": 0.0992,
"step": 11900
},
{
"epoch": 7.74,
"learning_rate": 7.554689192116092e-06,
"loss": 0.0976,
"step": 11910
},
{
"epoch": 7.74,
"learning_rate": 7.533030106129521e-06,
"loss": 0.0918,
"step": 11920
},
{
"epoch": 7.75,
"learning_rate": 7.5113710201429505e-06,
"loss": 0.0709,
"step": 11930
},
{
"epoch": 7.76,
"learning_rate": 7.489711934156379e-06,
"loss": 0.2004,
"step": 11940
},
{
"epoch": 7.76,
"learning_rate": 7.468052848169807e-06,
"loss": 0.0302,
"step": 11950
},
{
"epoch": 7.77,
"learning_rate": 7.446393762183236e-06,
"loss": 0.3603,
"step": 11960
},
{
"epoch": 7.78,
"learning_rate": 7.424734676196664e-06,
"loss": 0.0168,
"step": 11970
},
{
"epoch": 7.78,
"learning_rate": 7.403075590210093e-06,
"loss": 0.0821,
"step": 11980
},
{
"epoch": 7.79,
"learning_rate": 7.381416504223522e-06,
"loss": 0.1954,
"step": 11990
},
{
"epoch": 7.79,
"learning_rate": 7.35975741823695e-06,
"loss": 0.1345,
"step": 12000
},
{
"epoch": 7.8,
"learning_rate": 7.338098332250379e-06,
"loss": 0.0208,
"step": 12010
},
{
"epoch": 7.81,
"learning_rate": 7.316439246263808e-06,
"loss": 0.1959,
"step": 12020
},
{
"epoch": 7.81,
"learning_rate": 7.2947801602772365e-06,
"loss": 0.0016,
"step": 12030
},
{
"epoch": 7.82,
"learning_rate": 7.273121074290665e-06,
"loss": 0.2701,
"step": 12040
},
{
"epoch": 7.83,
"learning_rate": 7.251461988304093e-06,
"loss": 0.0076,
"step": 12050
},
{
"epoch": 7.83,
"learning_rate": 7.2298029023175225e-06,
"loss": 0.0192,
"step": 12060
},
{
"epoch": 7.84,
"learning_rate": 7.20814381633095e-06,
"loss": 0.0733,
"step": 12070
},
{
"epoch": 7.85,
"learning_rate": 7.18648473034438e-06,
"loss": 0.0395,
"step": 12080
},
{
"epoch": 7.85,
"learning_rate": 7.164825644357808e-06,
"loss": 0.044,
"step": 12090
},
{
"epoch": 7.86,
"learning_rate": 7.143166558371237e-06,
"loss": 0.1878,
"step": 12100
},
{
"epoch": 7.87,
"learning_rate": 7.121507472384665e-06,
"loss": 0.233,
"step": 12110
},
{
"epoch": 7.87,
"learning_rate": 7.099848386398095e-06,
"loss": 0.1157,
"step": 12120
},
{
"epoch": 7.88,
"learning_rate": 7.0781893004115225e-06,
"loss": 0.098,
"step": 12130
},
{
"epoch": 7.89,
"learning_rate": 7.056530214424951e-06,
"loss": 0.1827,
"step": 12140
},
{
"epoch": 7.89,
"learning_rate": 7.03487112843838e-06,
"loss": 0.2982,
"step": 12150
},
{
"epoch": 7.9,
"learning_rate": 7.0132120424518085e-06,
"loss": 0.1493,
"step": 12160
},
{
"epoch": 7.91,
"learning_rate": 6.991552956465237e-06,
"loss": 0.1547,
"step": 12170
},
{
"epoch": 7.91,
"learning_rate": 6.969893870478666e-06,
"loss": 0.0282,
"step": 12180
},
{
"epoch": 7.92,
"learning_rate": 6.948234784492095e-06,
"loss": 0.2962,
"step": 12190
},
{
"epoch": 7.92,
"learning_rate": 6.926575698505523e-06,
"loss": 0.0203,
"step": 12200
},
{
"epoch": 7.93,
"learning_rate": 6.904916612518952e-06,
"loss": 0.3606,
"step": 12210
},
{
"epoch": 7.94,
"learning_rate": 6.883257526532381e-06,
"loss": 0.2018,
"step": 12220
},
{
"epoch": 7.94,
"learning_rate": 6.8615984405458085e-06,
"loss": 0.1647,
"step": 12230
},
{
"epoch": 7.95,
"learning_rate": 6.839939354559238e-06,
"loss": 0.0782,
"step": 12240
},
{
"epoch": 7.96,
"learning_rate": 6.818280268572666e-06,
"loss": 0.0471,
"step": 12250
},
{
"epoch": 7.96,
"learning_rate": 6.796621182586095e-06,
"loss": 0.0793,
"step": 12260
},
{
"epoch": 7.97,
"learning_rate": 6.774962096599523e-06,
"loss": 0.1777,
"step": 12270
},
{
"epoch": 7.98,
"learning_rate": 6.753303010612953e-06,
"loss": 0.1029,
"step": 12280
},
{
"epoch": 7.98,
"learning_rate": 6.731643924626381e-06,
"loss": 0.0013,
"step": 12290
},
{
"epoch": 7.99,
"learning_rate": 6.70998483863981e-06,
"loss": 0.1963,
"step": 12300
},
{
"epoch": 8.0,
"learning_rate": 6.688325752653238e-06,
"loss": 0.0781,
"step": 12310
},
{
"epoch": 8.0,
"eval_accuracy": 0.984820607175713,
"eval_loss": 0.07072959840297699,
"eval_runtime": 13.0034,
"eval_samples_per_second": 167.187,
"eval_steps_per_second": 41.835,
"step": 12316
},
{
"epoch": 8.0,
"learning_rate": 6.666666666666667e-06,
"loss": 0.1161,
"step": 12320
},
{
"epoch": 8.01,
"learning_rate": 6.645007580680095e-06,
"loss": 0.2413,
"step": 12330
},
{
"epoch": 8.02,
"learning_rate": 6.623348494693524e-06,
"loss": 0.1146,
"step": 12340
},
{
"epoch": 8.02,
"learning_rate": 6.601689408706953e-06,
"loss": 0.0088,
"step": 12350
},
{
"epoch": 8.03,
"learning_rate": 6.580030322720381e-06,
"loss": 0.116,
"step": 12360
},
{
"epoch": 8.04,
"learning_rate": 6.55837123673381e-06,
"loss": 0.0927,
"step": 12370
},
{
"epoch": 8.04,
"learning_rate": 6.536712150747239e-06,
"loss": 0.3197,
"step": 12380
},
{
"epoch": 8.05,
"learning_rate": 6.5150530647606674e-06,
"loss": 0.1136,
"step": 12390
},
{
"epoch": 8.05,
"learning_rate": 6.493393978774096e-06,
"loss": 0.2585,
"step": 12400
},
{
"epoch": 8.06,
"learning_rate": 6.471734892787524e-06,
"loss": 0.127,
"step": 12410
},
{
"epoch": 8.07,
"learning_rate": 6.4500758068009535e-06,
"loss": 0.0578,
"step": 12420
},
{
"epoch": 8.07,
"learning_rate": 6.428416720814381e-06,
"loss": 0.0706,
"step": 12430
},
{
"epoch": 8.08,
"learning_rate": 6.406757634827811e-06,
"loss": 0.1134,
"step": 12440
},
{
"epoch": 8.09,
"learning_rate": 6.385098548841239e-06,
"loss": 0.0335,
"step": 12450
},
{
"epoch": 8.09,
"learning_rate": 6.363439462854668e-06,
"loss": 0.0277,
"step": 12460
},
{
"epoch": 8.1,
"learning_rate": 6.341780376868096e-06,
"loss": 0.1412,
"step": 12470
},
{
"epoch": 8.11,
"learning_rate": 6.320121290881526e-06,
"loss": 0.07,
"step": 12480
},
{
"epoch": 8.11,
"learning_rate": 6.2984622048949534e-06,
"loss": 0.1496,
"step": 12490
},
{
"epoch": 8.12,
"learning_rate": 6.276803118908382e-06,
"loss": 0.0603,
"step": 12500
},
{
"epoch": 8.13,
"learning_rate": 6.255144032921811e-06,
"loss": 0.2813,
"step": 12510
},
{
"epoch": 8.13,
"learning_rate": 6.2334849469352395e-06,
"loss": 0.1433,
"step": 12520
},
{
"epoch": 8.14,
"learning_rate": 6.211825860948668e-06,
"loss": 0.3679,
"step": 12530
},
{
"epoch": 8.15,
"learning_rate": 6.190166774962097e-06,
"loss": 0.0328,
"step": 12540
},
{
"epoch": 8.15,
"learning_rate": 6.1685076889755256e-06,
"loss": 0.1398,
"step": 12550
},
{
"epoch": 8.16,
"learning_rate": 6.146848602988954e-06,
"loss": 0.1047,
"step": 12560
},
{
"epoch": 8.16,
"learning_rate": 6.125189517002383e-06,
"loss": 0.1391,
"step": 12570
},
{
"epoch": 8.17,
"learning_rate": 6.103530431015812e-06,
"loss": 0.0941,
"step": 12580
},
{
"epoch": 8.18,
"learning_rate": 6.0818713450292395e-06,
"loss": 0.0813,
"step": 12590
},
{
"epoch": 8.18,
"learning_rate": 6.060212259042669e-06,
"loss": 0.0015,
"step": 12600
},
{
"epoch": 8.19,
"learning_rate": 6.038553173056097e-06,
"loss": 0.1206,
"step": 12610
},
{
"epoch": 8.2,
"learning_rate": 6.016894087069526e-06,
"loss": 0.158,
"step": 12620
},
{
"epoch": 8.2,
"learning_rate": 5.995235001082954e-06,
"loss": 0.0406,
"step": 12630
},
{
"epoch": 8.21,
"learning_rate": 5.973575915096384e-06,
"loss": 0.1629,
"step": 12640
},
{
"epoch": 8.22,
"learning_rate": 5.9519168291098116e-06,
"loss": 0.0426,
"step": 12650
},
{
"epoch": 8.22,
"learning_rate": 5.930257743123241e-06,
"loss": 0.0673,
"step": 12660
},
{
"epoch": 8.23,
"learning_rate": 5.908598657136669e-06,
"loss": 0.037,
"step": 12670
},
{
"epoch": 8.24,
"learning_rate": 5.886939571150098e-06,
"loss": 0.0649,
"step": 12680
},
{
"epoch": 8.24,
"learning_rate": 5.865280485163526e-06,
"loss": 0.2742,
"step": 12690
},
{
"epoch": 8.25,
"learning_rate": 5.843621399176955e-06,
"loss": 0.2166,
"step": 12700
},
{
"epoch": 8.26,
"learning_rate": 5.821962313190384e-06,
"loss": 0.0853,
"step": 12710
},
{
"epoch": 8.26,
"learning_rate": 5.800303227203812e-06,
"loss": 0.0083,
"step": 12720
},
{
"epoch": 8.27,
"learning_rate": 5.778644141217241e-06,
"loss": 0.1324,
"step": 12730
},
{
"epoch": 8.28,
"learning_rate": 5.75698505523067e-06,
"loss": 0.0543,
"step": 12740
},
{
"epoch": 8.28,
"learning_rate": 5.7353259692440976e-06,
"loss": 0.1978,
"step": 12750
},
{
"epoch": 8.29,
"learning_rate": 5.713666883257527e-06,
"loss": 0.112,
"step": 12760
},
{
"epoch": 8.29,
"learning_rate": 5.692007797270955e-06,
"loss": 0.1806,
"step": 12770
},
{
"epoch": 8.3,
"learning_rate": 5.6703487112843845e-06,
"loss": 0.0336,
"step": 12780
},
{
"epoch": 8.31,
"learning_rate": 5.648689625297812e-06,
"loss": 0.2045,
"step": 12790
},
{
"epoch": 8.31,
"learning_rate": 5.627030539311242e-06,
"loss": 0.0968,
"step": 12800
},
{
"epoch": 8.32,
"learning_rate": 5.60537145332467e-06,
"loss": 0.2564,
"step": 12810
},
{
"epoch": 8.33,
"learning_rate": 5.583712367338099e-06,
"loss": 0.078,
"step": 12820
},
{
"epoch": 8.33,
"learning_rate": 5.562053281351527e-06,
"loss": 0.2211,
"step": 12830
},
{
"epoch": 8.34,
"learning_rate": 5.540394195364956e-06,
"loss": 0.0235,
"step": 12840
},
{
"epoch": 8.35,
"learning_rate": 5.518735109378384e-06,
"loss": 0.0086,
"step": 12850
},
{
"epoch": 8.35,
"learning_rate": 5.497076023391813e-06,
"loss": 0.0448,
"step": 12860
},
{
"epoch": 8.36,
"learning_rate": 5.475416937405242e-06,
"loss": 0.0874,
"step": 12870
},
{
"epoch": 8.37,
"learning_rate": 5.4537578514186705e-06,
"loss": 0.1144,
"step": 12880
},
{
"epoch": 8.37,
"learning_rate": 5.432098765432099e-06,
"loss": 0.0012,
"step": 12890
},
{
"epoch": 8.38,
"learning_rate": 5.410439679445528e-06,
"loss": 0.1392,
"step": 12900
},
{
"epoch": 8.39,
"learning_rate": 5.3887805934589565e-06,
"loss": 0.0546,
"step": 12910
},
{
"epoch": 8.39,
"learning_rate": 5.367121507472384e-06,
"loss": 0.0058,
"step": 12920
},
{
"epoch": 8.4,
"learning_rate": 5.345462421485813e-06,
"loss": 0.1225,
"step": 12930
},
{
"epoch": 8.41,
"learning_rate": 5.323803335499242e-06,
"loss": 0.0963,
"step": 12940
},
{
"epoch": 8.41,
"learning_rate": 5.30214424951267e-06,
"loss": 0.1001,
"step": 12950
},
{
"epoch": 8.42,
"learning_rate": 5.280485163526099e-06,
"loss": 0.1365,
"step": 12960
},
{
"epoch": 8.42,
"learning_rate": 5.258826077539528e-06,
"loss": 0.1296,
"step": 12970
},
{
"epoch": 8.43,
"learning_rate": 5.2371669915529565e-06,
"loss": 0.0279,
"step": 12980
},
{
"epoch": 8.44,
"learning_rate": 5.215507905566385e-06,
"loss": 0.0445,
"step": 12990
},
{
"epoch": 8.44,
"learning_rate": 5.193848819579814e-06,
"loss": 0.064,
"step": 13000
},
{
"epoch": 8.45,
"learning_rate": 5.1721897335932425e-06,
"loss": 0.0262,
"step": 13010
},
{
"epoch": 8.46,
"learning_rate": 5.15053064760667e-06,
"loss": 0.2856,
"step": 13020
},
{
"epoch": 8.46,
"learning_rate": 5.1288715616201e-06,
"loss": 0.1308,
"step": 13030
},
{
"epoch": 8.47,
"learning_rate": 5.107212475633528e-06,
"loss": 0.1229,
"step": 13040
},
{
"epoch": 8.48,
"learning_rate": 5.085553389646957e-06,
"loss": 0.1929,
"step": 13050
},
{
"epoch": 8.48,
"learning_rate": 5.063894303660385e-06,
"loss": 0.1193,
"step": 13060
},
{
"epoch": 8.49,
"learning_rate": 5.042235217673815e-06,
"loss": 0.059,
"step": 13070
},
{
"epoch": 8.5,
"learning_rate": 5.0205761316872425e-06,
"loss": 0.1365,
"step": 13080
},
{
"epoch": 8.5,
"learning_rate": 4.998917045700672e-06,
"loss": 0.052,
"step": 13090
},
{
"epoch": 8.51,
"learning_rate": 4.9772579597141e-06,
"loss": 0.0587,
"step": 13100
},
{
"epoch": 8.52,
"learning_rate": 4.9555988737275285e-06,
"loss": 0.1808,
"step": 13110
},
{
"epoch": 8.52,
"learning_rate": 4.933939787740957e-06,
"loss": 0.071,
"step": 13120
},
{
"epoch": 8.53,
"learning_rate": 4.912280701754386e-06,
"loss": 0.0762,
"step": 13130
},
{
"epoch": 8.54,
"learning_rate": 4.890621615767815e-06,
"loss": 0.0501,
"step": 13140
},
{
"epoch": 8.54,
"learning_rate": 4.868962529781243e-06,
"loss": 0.1167,
"step": 13150
},
{
"epoch": 8.55,
"learning_rate": 4.847303443794672e-06,
"loss": 0.2363,
"step": 13160
},
{
"epoch": 8.55,
"learning_rate": 4.825644357808101e-06,
"loss": 0.1063,
"step": 13170
},
{
"epoch": 8.56,
"learning_rate": 4.803985271821529e-06,
"loss": 0.0592,
"step": 13180
},
{
"epoch": 8.57,
"learning_rate": 4.782326185834958e-06,
"loss": 0.1423,
"step": 13190
},
{
"epoch": 8.57,
"learning_rate": 4.760667099848386e-06,
"loss": 0.1531,
"step": 13200
},
{
"epoch": 8.58,
"learning_rate": 4.739008013861815e-06,
"loss": 0.0018,
"step": 13210
},
{
"epoch": 8.59,
"learning_rate": 4.717348927875243e-06,
"loss": 0.1259,
"step": 13220
},
{
"epoch": 8.59,
"learning_rate": 4.695689841888673e-06,
"loss": 0.0096,
"step": 13230
},
{
"epoch": 8.6,
"learning_rate": 4.674030755902101e-06,
"loss": 0.1893,
"step": 13240
},
{
"epoch": 8.61,
"learning_rate": 4.65237166991553e-06,
"loss": 0.1007,
"step": 13250
},
{
"epoch": 8.61,
"learning_rate": 4.630712583928958e-06,
"loss": 0.0186,
"step": 13260
},
{
"epoch": 8.62,
"learning_rate": 4.6090534979423875e-06,
"loss": 0.1633,
"step": 13270
},
{
"epoch": 8.63,
"learning_rate": 4.587394411955815e-06,
"loss": 0.1736,
"step": 13280
},
{
"epoch": 8.63,
"learning_rate": 4.565735325969244e-06,
"loss": 0.1066,
"step": 13290
},
{
"epoch": 8.64,
"learning_rate": 4.544076239982673e-06,
"loss": 0.2152,
"step": 13300
},
{
"epoch": 8.65,
"learning_rate": 4.522417153996101e-06,
"loss": 0.1314,
"step": 13310
},
{
"epoch": 8.65,
"learning_rate": 4.50075806800953e-06,
"loss": 0.1044,
"step": 13320
},
{
"epoch": 8.66,
"learning_rate": 4.479098982022959e-06,
"loss": 0.0462,
"step": 13330
},
{
"epoch": 8.67,
"learning_rate": 4.4574398960363874e-06,
"loss": 0.1103,
"step": 13340
},
{
"epoch": 8.67,
"learning_rate": 4.435780810049816e-06,
"loss": 0.1436,
"step": 13350
},
{
"epoch": 8.68,
"learning_rate": 4.414121724063245e-06,
"loss": 0.0242,
"step": 13360
},
{
"epoch": 8.68,
"learning_rate": 4.3924626380766735e-06,
"loss": 0.1488,
"step": 13370
},
{
"epoch": 8.69,
"learning_rate": 4.370803552090101e-06,
"loss": 0.2303,
"step": 13380
},
{
"epoch": 8.7,
"learning_rate": 4.349144466103531e-06,
"loss": 0.1304,
"step": 13390
},
{
"epoch": 8.7,
"learning_rate": 4.327485380116959e-06,
"loss": 0.0573,
"step": 13400
},
{
"epoch": 8.71,
"learning_rate": 4.305826294130388e-06,
"loss": 0.0173,
"step": 13410
},
{
"epoch": 8.72,
"learning_rate": 4.284167208143816e-06,
"loss": 0.0028,
"step": 13420
},
{
"epoch": 8.72,
"learning_rate": 4.262508122157246e-06,
"loss": 0.0278,
"step": 13430
},
{
"epoch": 8.73,
"learning_rate": 4.2408490361706735e-06,
"loss": 0.0493,
"step": 13440
},
{
"epoch": 8.74,
"learning_rate": 4.219189950184103e-06,
"loss": 0.0705,
"step": 13450
},
{
"epoch": 8.74,
"learning_rate": 4.197530864197531e-06,
"loss": 0.0259,
"step": 13460
},
{
"epoch": 8.75,
"learning_rate": 4.1758717782109595e-06,
"loss": 0.1553,
"step": 13470
},
{
"epoch": 8.76,
"learning_rate": 4.154212692224388e-06,
"loss": 0.107,
"step": 13480
},
{
"epoch": 8.76,
"learning_rate": 4.132553606237817e-06,
"loss": 0.0314,
"step": 13490
},
{
"epoch": 8.77,
"learning_rate": 4.1108945202512456e-06,
"loss": 0.1201,
"step": 13500
},
{
"epoch": 8.78,
"learning_rate": 4.089235434264674e-06,
"loss": 0.0249,
"step": 13510
},
{
"epoch": 8.78,
"learning_rate": 4.067576348278103e-06,
"loss": 0.0269,
"step": 13520
},
{
"epoch": 8.79,
"learning_rate": 4.045917262291532e-06,
"loss": 0.1419,
"step": 13530
},
{
"epoch": 8.8,
"learning_rate": 4.0242581763049595e-06,
"loss": 0.2158,
"step": 13540
},
{
"epoch": 8.8,
"learning_rate": 4.002599090318389e-06,
"loss": 0.1973,
"step": 13550
},
{
"epoch": 8.81,
"learning_rate": 3.980940004331817e-06,
"loss": 0.2355,
"step": 13560
},
{
"epoch": 8.81,
"learning_rate": 3.959280918345246e-06,
"loss": 0.0418,
"step": 13570
},
{
"epoch": 8.82,
"learning_rate": 3.937621832358674e-06,
"loss": 0.1702,
"step": 13580
},
{
"epoch": 8.83,
"learning_rate": 3.915962746372104e-06,
"loss": 0.1813,
"step": 13590
},
{
"epoch": 8.83,
"learning_rate": 3.8943036603855316e-06,
"loss": 0.089,
"step": 13600
},
{
"epoch": 8.84,
"learning_rate": 3.872644574398961e-06,
"loss": 0.0309,
"step": 13610
},
{
"epoch": 8.85,
"learning_rate": 3.850985488412389e-06,
"loss": 0.0278,
"step": 13620
},
{
"epoch": 8.85,
"learning_rate": 3.829326402425818e-06,
"loss": 0.0381,
"step": 13630
},
{
"epoch": 8.86,
"learning_rate": 3.8076673164392463e-06,
"loss": 0.014,
"step": 13640
},
{
"epoch": 8.87,
"learning_rate": 3.7860082304526746e-06,
"loss": 0.109,
"step": 13650
},
{
"epoch": 8.87,
"learning_rate": 3.7643491444661037e-06,
"loss": 0.3331,
"step": 13660
},
{
"epoch": 8.88,
"learning_rate": 3.742690058479532e-06,
"loss": 0.0013,
"step": 13670
},
{
"epoch": 8.89,
"learning_rate": 3.7210309724929606e-06,
"loss": 0.016,
"step": 13680
},
{
"epoch": 8.89,
"learning_rate": 3.6993718865063893e-06,
"loss": 0.1149,
"step": 13690
},
{
"epoch": 8.9,
"learning_rate": 3.677712800519818e-06,
"loss": 0.3126,
"step": 13700
},
{
"epoch": 8.91,
"learning_rate": 3.6560537145332467e-06,
"loss": 0.2456,
"step": 13710
},
{
"epoch": 8.91,
"learning_rate": 3.6343946285466754e-06,
"loss": 0.0794,
"step": 13720
},
{
"epoch": 8.92,
"learning_rate": 3.612735542560104e-06,
"loss": 0.1407,
"step": 13730
},
{
"epoch": 8.92,
"learning_rate": 3.5910764565735327e-06,
"loss": 0.1953,
"step": 13740
},
{
"epoch": 8.93,
"learning_rate": 3.5694173705869614e-06,
"loss": 0.2692,
"step": 13750
},
{
"epoch": 8.94,
"learning_rate": 3.5477582846003897e-06,
"loss": 0.235,
"step": 13760
},
{
"epoch": 8.94,
"learning_rate": 3.5260991986138184e-06,
"loss": 0.1149,
"step": 13770
},
{
"epoch": 8.95,
"learning_rate": 3.504440112627247e-06,
"loss": 0.1316,
"step": 13780
},
{
"epoch": 8.96,
"learning_rate": 3.4827810266406757e-06,
"loss": 0.0752,
"step": 13790
},
{
"epoch": 8.96,
"learning_rate": 3.4611219406541044e-06,
"loss": 0.0281,
"step": 13800
},
{
"epoch": 8.97,
"learning_rate": 3.439462854667533e-06,
"loss": 0.161,
"step": 13810
},
{
"epoch": 8.98,
"learning_rate": 3.417803768680962e-06,
"loss": 0.0827,
"step": 13820
},
{
"epoch": 8.98,
"learning_rate": 3.3961446826943905e-06,
"loss": 0.0172,
"step": 13830
},
{
"epoch": 8.99,
"learning_rate": 3.374485596707819e-06,
"loss": 0.1149,
"step": 13840
},
{
"epoch": 9.0,
"learning_rate": 3.3528265107212474e-06,
"loss": 0.0317,
"step": 13850
},
{
"epoch": 9.0,
"eval_accuracy": 0.984820607175713,
"eval_loss": 0.0851309597492218,
"eval_runtime": 12.8784,
"eval_samples_per_second": 168.809,
"eval_steps_per_second": 42.241,
"step": 13855
},
{
"epoch": 9.0,
"learning_rate": 3.331167424734676e-06,
"loss": 0.2191,
"step": 13860
},
{
"epoch": 9.01,
"learning_rate": 3.309508338748105e-06,
"loss": 0.1574,
"step": 13870
},
{
"epoch": 9.02,
"learning_rate": 3.2878492527615335e-06,
"loss": 0.0723,
"step": 13880
},
{
"epoch": 9.02,
"learning_rate": 3.266190166774962e-06,
"loss": 0.0881,
"step": 13890
},
{
"epoch": 9.03,
"learning_rate": 3.244531080788391e-06,
"loss": 0.1085,
"step": 13900
},
{
"epoch": 9.04,
"learning_rate": 3.2228719948018195e-06,
"loss": 0.097,
"step": 13910
},
{
"epoch": 9.04,
"learning_rate": 3.2012129088152482e-06,
"loss": 0.0165,
"step": 13920
},
{
"epoch": 9.05,
"learning_rate": 3.179553822828677e-06,
"loss": 0.1146,
"step": 13930
},
{
"epoch": 9.05,
"learning_rate": 3.157894736842105e-06,
"loss": 0.0318,
"step": 13940
},
{
"epoch": 9.06,
"learning_rate": 3.136235650855534e-06,
"loss": 0.0328,
"step": 13950
},
{
"epoch": 9.07,
"learning_rate": 3.1145765648689625e-06,
"loss": 0.0659,
"step": 13960
},
{
"epoch": 9.07,
"learning_rate": 3.0929174788823912e-06,
"loss": 0.0173,
"step": 13970
},
{
"epoch": 9.08,
"learning_rate": 3.07125839289582e-06,
"loss": 0.0772,
"step": 13980
},
{
"epoch": 9.09,
"learning_rate": 3.0495993069092486e-06,
"loss": 0.002,
"step": 13990
},
{
"epoch": 9.09,
"learning_rate": 3.0279402209226773e-06,
"loss": 0.326,
"step": 14000
},
{
"epoch": 9.1,
"learning_rate": 3.006281134936106e-06,
"loss": 0.0963,
"step": 14010
},
{
"epoch": 9.11,
"learning_rate": 2.9846220489495346e-06,
"loss": 0.0407,
"step": 14020
},
{
"epoch": 9.11,
"learning_rate": 2.962962962962963e-06,
"loss": 0.0449,
"step": 14030
},
{
"epoch": 9.12,
"learning_rate": 2.9413038769763916e-06,
"loss": 0.196,
"step": 14040
},
{
"epoch": 9.13,
"learning_rate": 2.9196447909898203e-06,
"loss": 0.0058,
"step": 14050
},
{
"epoch": 9.13,
"learning_rate": 2.897985705003249e-06,
"loss": 0.0504,
"step": 14060
},
{
"epoch": 9.14,
"learning_rate": 2.8763266190166776e-06,
"loss": 0.2405,
"step": 14070
},
{
"epoch": 9.15,
"learning_rate": 2.8546675330301063e-06,
"loss": 0.2004,
"step": 14080
},
{
"epoch": 9.15,
"learning_rate": 2.833008447043535e-06,
"loss": 0.1501,
"step": 14090
},
{
"epoch": 9.16,
"learning_rate": 2.8113493610569637e-06,
"loss": 0.0012,
"step": 14100
},
{
"epoch": 9.17,
"learning_rate": 2.789690275070392e-06,
"loss": 0.0938,
"step": 14110
},
{
"epoch": 9.17,
"learning_rate": 2.7680311890838206e-06,
"loss": 0.1683,
"step": 14120
},
{
"epoch": 9.18,
"learning_rate": 2.7463721030972493e-06,
"loss": 0.0288,
"step": 14130
},
{
"epoch": 9.18,
"learning_rate": 2.724713017110678e-06,
"loss": 0.0936,
"step": 14140
},
{
"epoch": 9.19,
"learning_rate": 2.7030539311241067e-06,
"loss": 0.0241,
"step": 14150
},
{
"epoch": 9.2,
"learning_rate": 2.6813948451375354e-06,
"loss": 0.1308,
"step": 14160
},
{
"epoch": 9.2,
"learning_rate": 2.659735759150964e-06,
"loss": 0.0011,
"step": 14170
},
{
"epoch": 9.21,
"learning_rate": 2.6380766731643928e-06,
"loss": 0.0892,
"step": 14180
},
{
"epoch": 9.22,
"learning_rate": 2.6164175871778214e-06,
"loss": 0.0683,
"step": 14190
},
{
"epoch": 9.22,
"learning_rate": 2.5947585011912497e-06,
"loss": 0.0821,
"step": 14200
},
{
"epoch": 9.23,
"learning_rate": 2.5730994152046784e-06,
"loss": 0.1705,
"step": 14210
},
{
"epoch": 9.24,
"learning_rate": 2.551440329218107e-06,
"loss": 0.2017,
"step": 14220
},
{
"epoch": 9.24,
"learning_rate": 2.5297812432315358e-06,
"loss": 0.2258,
"step": 14230
},
{
"epoch": 9.25,
"learning_rate": 2.5081221572449644e-06,
"loss": 0.0655,
"step": 14240
},
{
"epoch": 9.26,
"learning_rate": 2.486463071258393e-06,
"loss": 0.0019,
"step": 14250
},
{
"epoch": 9.26,
"learning_rate": 2.464803985271822e-06,
"loss": 0.1095,
"step": 14260
},
{
"epoch": 9.27,
"learning_rate": 2.4431448992852505e-06,
"loss": 0.0099,
"step": 14270
},
{
"epoch": 9.28,
"learning_rate": 2.421485813298679e-06,
"loss": 0.1379,
"step": 14280
},
{
"epoch": 9.28,
"learning_rate": 2.3998267273121074e-06,
"loss": 0.0783,
"step": 14290
},
{
"epoch": 9.29,
"learning_rate": 2.378167641325536e-06,
"loss": 0.0081,
"step": 14300
},
{
"epoch": 9.3,
"learning_rate": 2.356508555338965e-06,
"loss": 0.0195,
"step": 14310
},
{
"epoch": 9.3,
"learning_rate": 2.3348494693523935e-06,
"loss": 0.0019,
"step": 14320
},
{
"epoch": 9.31,
"learning_rate": 2.313190383365822e-06,
"loss": 0.0339,
"step": 14330
},
{
"epoch": 9.31,
"learning_rate": 2.291531297379251e-06,
"loss": 0.0434,
"step": 14340
},
{
"epoch": 9.32,
"learning_rate": 2.2698722113926796e-06,
"loss": 0.1738,
"step": 14350
},
{
"epoch": 9.33,
"learning_rate": 2.2482131254061082e-06,
"loss": 0.1285,
"step": 14360
},
{
"epoch": 9.33,
"learning_rate": 2.226554039419537e-06,
"loss": 0.0907,
"step": 14370
},
{
"epoch": 9.34,
"learning_rate": 2.204894953432965e-06,
"loss": 0.0349,
"step": 14380
},
{
"epoch": 9.35,
"learning_rate": 2.183235867446394e-06,
"loss": 0.0509,
"step": 14390
},
{
"epoch": 9.35,
"learning_rate": 2.1615767814598226e-06,
"loss": 0.0015,
"step": 14400
},
{
"epoch": 9.36,
"learning_rate": 2.1399176954732512e-06,
"loss": 0.0035,
"step": 14410
},
{
"epoch": 9.37,
"learning_rate": 2.11825860948668e-06,
"loss": 0.1029,
"step": 14420
},
{
"epoch": 9.37,
"learning_rate": 2.0965995235001086e-06,
"loss": 0.1722,
"step": 14430
},
{
"epoch": 9.38,
"learning_rate": 2.0749404375135373e-06,
"loss": 0.1884,
"step": 14440
},
{
"epoch": 9.39,
"learning_rate": 2.053281351526966e-06,
"loss": 0.153,
"step": 14450
},
{
"epoch": 9.39,
"learning_rate": 2.0316222655403942e-06,
"loss": 0.0016,
"step": 14460
},
{
"epoch": 9.4,
"learning_rate": 2.0099631795538225e-06,
"loss": 0.1055,
"step": 14470
},
{
"epoch": 9.41,
"learning_rate": 1.988304093567251e-06,
"loss": 0.0803,
"step": 14480
},
{
"epoch": 9.41,
"learning_rate": 1.96664500758068e-06,
"loss": 0.0262,
"step": 14490
},
{
"epoch": 9.42,
"learning_rate": 1.9449859215941086e-06,
"loss": 0.0813,
"step": 14500
},
{
"epoch": 9.43,
"learning_rate": 1.9233268356075373e-06,
"loss": 0.1877,
"step": 14510
},
{
"epoch": 9.43,
"learning_rate": 1.9016677496209661e-06,
"loss": 0.0272,
"step": 14520
},
{
"epoch": 9.44,
"learning_rate": 1.8800086636343948e-06,
"loss": 0.1979,
"step": 14530
},
{
"epoch": 9.44,
"learning_rate": 1.8583495776478233e-06,
"loss": 0.1209,
"step": 14540
},
{
"epoch": 9.45,
"learning_rate": 1.836690491661252e-06,
"loss": 0.0729,
"step": 14550
},
{
"epoch": 9.46,
"learning_rate": 1.8150314056746807e-06,
"loss": 0.1729,
"step": 14560
},
{
"epoch": 9.46,
"learning_rate": 1.7933723196881092e-06,
"loss": 0.1455,
"step": 14570
},
{
"epoch": 9.47,
"learning_rate": 1.7717132337015378e-06,
"loss": 0.0725,
"step": 14580
},
{
"epoch": 9.48,
"learning_rate": 1.7500541477149665e-06,
"loss": 0.0148,
"step": 14590
},
{
"epoch": 9.48,
"learning_rate": 1.7283950617283952e-06,
"loss": 0.0011,
"step": 14600
},
{
"epoch": 9.49,
"learning_rate": 1.7067359757418239e-06,
"loss": 0.002,
"step": 14610
},
{
"epoch": 9.5,
"learning_rate": 1.6850768897552524e-06,
"loss": 0.2557,
"step": 14620
},
{
"epoch": 9.5,
"learning_rate": 1.663417803768681e-06,
"loss": 0.2756,
"step": 14630
},
{
"epoch": 9.51,
"learning_rate": 1.6417587177821097e-06,
"loss": 0.0519,
"step": 14640
},
{
"epoch": 9.52,
"learning_rate": 1.6200996317955384e-06,
"loss": 0.0122,
"step": 14650
},
{
"epoch": 9.52,
"learning_rate": 1.5984405458089669e-06,
"loss": 0.0782,
"step": 14660
},
{
"epoch": 9.53,
"learning_rate": 1.5767814598223956e-06,
"loss": 0.181,
"step": 14670
},
{
"epoch": 9.54,
"learning_rate": 1.5551223738358243e-06,
"loss": 0.0997,
"step": 14680
},
{
"epoch": 9.54,
"learning_rate": 1.533463287849253e-06,
"loss": 0.1639,
"step": 14690
},
{
"epoch": 9.55,
"learning_rate": 1.5118042018626816e-06,
"loss": 0.0852,
"step": 14700
},
{
"epoch": 9.56,
"learning_rate": 1.4901451158761101e-06,
"loss": 0.001,
"step": 14710
},
{
"epoch": 9.56,
"learning_rate": 1.4684860298895388e-06,
"loss": 0.2217,
"step": 14720
},
{
"epoch": 9.57,
"learning_rate": 1.4468269439029673e-06,
"loss": 0.1359,
"step": 14730
},
{
"epoch": 9.57,
"learning_rate": 1.425167857916396e-06,
"loss": 0.1462,
"step": 14740
},
{
"epoch": 9.58,
"learning_rate": 1.4035087719298244e-06,
"loss": 0.002,
"step": 14750
},
{
"epoch": 9.59,
"learning_rate": 1.3818496859432531e-06,
"loss": 0.0147,
"step": 14760
},
{
"epoch": 9.59,
"learning_rate": 1.3601905999566818e-06,
"loss": 0.0607,
"step": 14770
},
{
"epoch": 9.6,
"learning_rate": 1.3385315139701105e-06,
"loss": 0.0903,
"step": 14780
},
{
"epoch": 9.61,
"learning_rate": 1.316872427983539e-06,
"loss": 0.1384,
"step": 14790
},
{
"epoch": 9.61,
"learning_rate": 1.2952133419969676e-06,
"loss": 0.1349,
"step": 14800
},
{
"epoch": 9.62,
"learning_rate": 1.2735542560103963e-06,
"loss": 0.145,
"step": 14810
},
{
"epoch": 9.63,
"learning_rate": 1.251895170023825e-06,
"loss": 0.0957,
"step": 14820
},
{
"epoch": 9.63,
"learning_rate": 1.2302360840372537e-06,
"loss": 0.0971,
"step": 14830
},
{
"epoch": 9.64,
"learning_rate": 1.2085769980506822e-06,
"loss": 0.0578,
"step": 14840
},
{
"epoch": 9.65,
"learning_rate": 1.1869179120641109e-06,
"loss": 0.0793,
"step": 14850
},
{
"epoch": 9.65,
"learning_rate": 1.1652588260775395e-06,
"loss": 0.0914,
"step": 14860
},
{
"epoch": 9.66,
"learning_rate": 1.1435997400909682e-06,
"loss": 0.0207,
"step": 14870
},
{
"epoch": 9.67,
"learning_rate": 1.1219406541043967e-06,
"loss": 0.0121,
"step": 14880
},
{
"epoch": 9.67,
"learning_rate": 1.1002815681178254e-06,
"loss": 0.0013,
"step": 14890
},
{
"epoch": 9.68,
"learning_rate": 1.078622482131254e-06,
"loss": 0.1251,
"step": 14900
},
{
"epoch": 9.68,
"learning_rate": 1.0569633961446828e-06,
"loss": 0.0711,
"step": 14910
},
{
"epoch": 9.69,
"learning_rate": 1.0353043101581112e-06,
"loss": 0.032,
"step": 14920
},
{
"epoch": 9.7,
"learning_rate": 1.01364522417154e-06,
"loss": 0.1033,
"step": 14930
},
{
"epoch": 9.7,
"learning_rate": 9.919861381849686e-07,
"loss": 0.0706,
"step": 14940
},
{
"epoch": 9.71,
"learning_rate": 9.703270521983973e-07,
"loss": 0.0011,
"step": 14950
},
{
"epoch": 9.72,
"learning_rate": 9.48667966211826e-07,
"loss": 0.1376,
"step": 14960
},
{
"epoch": 9.72,
"learning_rate": 9.270088802252545e-07,
"loss": 0.0821,
"step": 14970
},
{
"epoch": 9.73,
"learning_rate": 9.053497942386831e-07,
"loss": 0.0325,
"step": 14980
},
{
"epoch": 9.74,
"learning_rate": 8.836907082521118e-07,
"loss": 0.1046,
"step": 14990
},
{
"epoch": 9.74,
"learning_rate": 8.620316222655404e-07,
"loss": 0.001,
"step": 15000
},
{
"epoch": 9.75,
"learning_rate": 8.403725362789691e-07,
"loss": 0.0316,
"step": 15010
},
{
"epoch": 9.76,
"learning_rate": 8.187134502923977e-07,
"loss": 0.0583,
"step": 15020
},
{
"epoch": 9.76,
"learning_rate": 7.970543643058263e-07,
"loss": 0.3065,
"step": 15030
},
{
"epoch": 9.77,
"learning_rate": 7.75395278319255e-07,
"loss": 0.0385,
"step": 15040
},
{
"epoch": 9.78,
"learning_rate": 7.537361923326836e-07,
"loss": 0.0798,
"step": 15050
},
{
"epoch": 9.78,
"learning_rate": 7.320771063461123e-07,
"loss": 0.0639,
"step": 15060
},
{
"epoch": 9.79,
"learning_rate": 7.104180203595409e-07,
"loss": 0.1085,
"step": 15070
},
{
"epoch": 9.8,
"learning_rate": 6.887589343729695e-07,
"loss": 0.0518,
"step": 15080
},
{
"epoch": 9.8,
"learning_rate": 6.670998483863981e-07,
"loss": 0.16,
"step": 15090
},
{
"epoch": 9.81,
"learning_rate": 6.454407623998268e-07,
"loss": 0.0204,
"step": 15100
},
{
"epoch": 9.81,
"learning_rate": 6.237816764132554e-07,
"loss": 0.1528,
"step": 15110
},
{
"epoch": 9.82,
"learning_rate": 6.02122590426684e-07,
"loss": 0.1124,
"step": 15120
},
{
"epoch": 9.83,
"learning_rate": 5.804635044401126e-07,
"loss": 0.169,
"step": 15130
},
{
"epoch": 9.83,
"learning_rate": 5.588044184535412e-07,
"loss": 0.0796,
"step": 15140
},
{
"epoch": 9.84,
"learning_rate": 5.371453324669699e-07,
"loss": 0.1531,
"step": 15150
},
{
"epoch": 9.85,
"learning_rate": 5.154862464803985e-07,
"loss": 0.1019,
"step": 15160
},
{
"epoch": 9.85,
"learning_rate": 4.938271604938272e-07,
"loss": 0.0018,
"step": 15170
},
{
"epoch": 9.86,
"learning_rate": 4.7216807450725577e-07,
"loss": 0.021,
"step": 15180
},
{
"epoch": 9.87,
"learning_rate": 4.505089885206844e-07,
"loss": 0.0517,
"step": 15190
},
{
"epoch": 9.87,
"learning_rate": 4.2884990253411303e-07,
"loss": 0.0759,
"step": 15200
},
{
"epoch": 9.88,
"learning_rate": 4.071908165475417e-07,
"loss": 0.2548,
"step": 15210
},
{
"epoch": 9.89,
"learning_rate": 3.8553173056097035e-07,
"loss": 0.1891,
"step": 15220
},
{
"epoch": 9.89,
"learning_rate": 3.63872644574399e-07,
"loss": 0.1289,
"step": 15230
},
{
"epoch": 9.9,
"learning_rate": 3.422135585878276e-07,
"loss": 0.0315,
"step": 15240
},
{
"epoch": 9.91,
"learning_rate": 3.2055447260125624e-07,
"loss": 0.0379,
"step": 15250
},
{
"epoch": 9.91,
"learning_rate": 2.988953866146849e-07,
"loss": 0.1247,
"step": 15260
},
{
"epoch": 9.92,
"learning_rate": 2.772363006281135e-07,
"loss": 0.1522,
"step": 15270
},
{
"epoch": 9.93,
"learning_rate": 2.5557721464154214e-07,
"loss": 0.1842,
"step": 15280
},
{
"epoch": 9.93,
"learning_rate": 2.3391812865497075e-07,
"loss": 0.1641,
"step": 15290
},
{
"epoch": 9.94,
"learning_rate": 2.122590426683994e-07,
"loss": 0.0641,
"step": 15300
},
{
"epoch": 9.94,
"learning_rate": 1.9059995668182804e-07,
"loss": 0.0768,
"step": 15310
},
{
"epoch": 9.95,
"learning_rate": 1.6894087069525667e-07,
"loss": 0.0525,
"step": 15320
},
{
"epoch": 9.96,
"learning_rate": 1.472817847086853e-07,
"loss": 0.0982,
"step": 15330
},
{
"epoch": 9.96,
"learning_rate": 1.256226987221139e-07,
"loss": 0.197,
"step": 15340
},
{
"epoch": 9.97,
"learning_rate": 1.0396361273554257e-07,
"loss": 0.1669,
"step": 15350
},
{
"epoch": 9.98,
"learning_rate": 8.230452674897118e-08,
"loss": 0.0749,
"step": 15360
},
{
"epoch": 9.98,
"learning_rate": 6.064544076239983e-08,
"loss": 0.1194,
"step": 15370
},
{
"epoch": 9.99,
"learning_rate": 3.898635477582846e-08,
"loss": 0.1512,
"step": 15380
},
{
"epoch": 10.0,
"learning_rate": 1.7327268789257094e-08,
"loss": 0.068,
"step": 15390
},
{
"epoch": 10.0,
"eval_accuracy": 0.985280588776449,
"eval_loss": 0.0865689218044281,
"eval_runtime": 13.3643,
"eval_samples_per_second": 162.673,
"eval_steps_per_second": 40.706,
"step": 15390
},
{
"epoch": 10.0,
"step": 15390,
"total_flos": 1.1936517061793697e+18,
"train_loss": 0.29998462175571583,
"train_runtime": 2944.6409,
"train_samples_per_second": 41.825,
"train_steps_per_second": 5.226
}
],
"logging_steps": 10,
"max_steps": 15390,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.1936517061793697e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}