ppicazo's picture
Upload 8 files
ed5a13e
{
"best_metric": 0.0011723055504262447,
"best_model_checkpoint": "./ap_train_outputs/checkpoint-10918",
"epoch": 106.0,
"eval_steps": 500,
"global_step": 10918,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 1.999223300970874e-05,
"loss": 2.0179,
"step": 10
},
{
"epoch": 0.19,
"learning_rate": 1.9984466019417477e-05,
"loss": 1.9319,
"step": 20
},
{
"epoch": 0.29,
"learning_rate": 1.9976699029126216e-05,
"loss": 1.7818,
"step": 30
},
{
"epoch": 0.39,
"learning_rate": 1.9968932038834955e-05,
"loss": 1.7205,
"step": 40
},
{
"epoch": 0.49,
"learning_rate": 1.996116504854369e-05,
"loss": 1.5862,
"step": 50
},
{
"epoch": 0.58,
"learning_rate": 1.995339805825243e-05,
"loss": 1.5051,
"step": 60
},
{
"epoch": 0.68,
"learning_rate": 1.9945631067961166e-05,
"loss": 1.3624,
"step": 70
},
{
"epoch": 0.78,
"learning_rate": 1.9937864077669905e-05,
"loss": 1.3245,
"step": 80
},
{
"epoch": 0.87,
"learning_rate": 1.993009708737864e-05,
"loss": 1.2896,
"step": 90
},
{
"epoch": 0.97,
"learning_rate": 1.992233009708738e-05,
"loss": 1.1133,
"step": 100
},
{
"epoch": 1.0,
"eval_accuracy": 0.8561643835616438,
"eval_loss": 1.1050430536270142,
"eval_runtime": 1.0213,
"eval_samples_per_second": 142.949,
"eval_steps_per_second": 18.603,
"step": 103
},
{
"epoch": 1.07,
"learning_rate": 1.991456310679612e-05,
"loss": 1.1258,
"step": 110
},
{
"epoch": 1.17,
"learning_rate": 1.9906796116504855e-05,
"loss": 1.0101,
"step": 120
},
{
"epoch": 1.26,
"learning_rate": 1.9899029126213594e-05,
"loss": 0.8295,
"step": 130
},
{
"epoch": 1.36,
"learning_rate": 1.989126213592233e-05,
"loss": 0.8112,
"step": 140
},
{
"epoch": 1.46,
"learning_rate": 1.988349514563107e-05,
"loss": 0.7511,
"step": 150
},
{
"epoch": 1.55,
"learning_rate": 1.987572815533981e-05,
"loss": 0.7514,
"step": 160
},
{
"epoch": 1.65,
"learning_rate": 1.9867961165048548e-05,
"loss": 0.7797,
"step": 170
},
{
"epoch": 1.75,
"learning_rate": 1.9860194174757283e-05,
"loss": 0.6666,
"step": 180
},
{
"epoch": 1.84,
"learning_rate": 1.9852427184466022e-05,
"loss": 0.7185,
"step": 190
},
{
"epoch": 1.94,
"learning_rate": 1.9844660194174758e-05,
"loss": 0.6564,
"step": 200
},
{
"epoch": 2.0,
"eval_accuracy": 0.9452054794520548,
"eval_loss": 0.6278233528137207,
"eval_runtime": 1.018,
"eval_samples_per_second": 143.418,
"eval_steps_per_second": 18.664,
"step": 206
},
{
"epoch": 2.04,
"learning_rate": 1.9836893203883497e-05,
"loss": 0.4882,
"step": 210
},
{
"epoch": 2.14,
"learning_rate": 1.9829126213592233e-05,
"loss": 0.5629,
"step": 220
},
{
"epoch": 2.23,
"learning_rate": 1.9821359223300972e-05,
"loss": 0.5384,
"step": 230
},
{
"epoch": 2.33,
"learning_rate": 1.981359223300971e-05,
"loss": 0.445,
"step": 240
},
{
"epoch": 2.43,
"learning_rate": 1.9805825242718447e-05,
"loss": 0.5185,
"step": 250
},
{
"epoch": 2.52,
"learning_rate": 1.9798058252427187e-05,
"loss": 0.4958,
"step": 260
},
{
"epoch": 2.62,
"learning_rate": 1.9790291262135922e-05,
"loss": 0.4132,
"step": 270
},
{
"epoch": 2.72,
"learning_rate": 1.978252427184466e-05,
"loss": 0.4397,
"step": 280
},
{
"epoch": 2.82,
"learning_rate": 1.97747572815534e-05,
"loss": 0.4415,
"step": 290
},
{
"epoch": 2.91,
"learning_rate": 1.9766990291262137e-05,
"loss": 0.4004,
"step": 300
},
{
"epoch": 3.0,
"eval_accuracy": 0.9657534246575342,
"eval_loss": 0.3876227140426636,
"eval_runtime": 0.9913,
"eval_samples_per_second": 147.283,
"eval_steps_per_second": 19.167,
"step": 309
},
{
"epoch": 3.01,
"learning_rate": 1.9759223300970876e-05,
"loss": 0.3367,
"step": 310
},
{
"epoch": 3.11,
"learning_rate": 1.975145631067961e-05,
"loss": 0.3328,
"step": 320
},
{
"epoch": 3.2,
"learning_rate": 1.974368932038835e-05,
"loss": 0.3767,
"step": 330
},
{
"epoch": 3.3,
"learning_rate": 1.9735922330097087e-05,
"loss": 0.3029,
"step": 340
},
{
"epoch": 3.4,
"learning_rate": 1.972815533980583e-05,
"loss": 0.3183,
"step": 350
},
{
"epoch": 3.5,
"learning_rate": 1.9720388349514565e-05,
"loss": 0.239,
"step": 360
},
{
"epoch": 3.59,
"learning_rate": 1.9712621359223304e-05,
"loss": 0.3627,
"step": 370
},
{
"epoch": 3.69,
"learning_rate": 1.970485436893204e-05,
"loss": 0.3516,
"step": 380
},
{
"epoch": 3.79,
"learning_rate": 1.969708737864078e-05,
"loss": 0.2904,
"step": 390
},
{
"epoch": 3.88,
"learning_rate": 1.9689320388349515e-05,
"loss": 0.2362,
"step": 400
},
{
"epoch": 3.98,
"learning_rate": 1.9681553398058254e-05,
"loss": 0.2624,
"step": 410
},
{
"epoch": 4.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.24633407592773438,
"eval_runtime": 0.9705,
"eval_samples_per_second": 150.434,
"eval_steps_per_second": 19.577,
"step": 412
},
{
"epoch": 4.08,
"learning_rate": 1.9673786407766993e-05,
"loss": 0.2748,
"step": 420
},
{
"epoch": 4.17,
"learning_rate": 1.966601941747573e-05,
"loss": 0.2072,
"step": 430
},
{
"epoch": 4.27,
"learning_rate": 1.965825242718447e-05,
"loss": 0.209,
"step": 440
},
{
"epoch": 4.37,
"learning_rate": 1.9650485436893204e-05,
"loss": 0.2567,
"step": 450
},
{
"epoch": 4.47,
"learning_rate": 1.9642718446601943e-05,
"loss": 0.194,
"step": 460
},
{
"epoch": 4.56,
"learning_rate": 1.963495145631068e-05,
"loss": 0.2188,
"step": 470
},
{
"epoch": 4.66,
"learning_rate": 1.962718446601942e-05,
"loss": 0.2031,
"step": 480
},
{
"epoch": 4.76,
"learning_rate": 1.9619417475728157e-05,
"loss": 0.2943,
"step": 490
},
{
"epoch": 4.85,
"learning_rate": 1.9611650485436893e-05,
"loss": 0.1883,
"step": 500
},
{
"epoch": 4.95,
"learning_rate": 1.9603883495145632e-05,
"loss": 0.2074,
"step": 510
},
{
"epoch": 5.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.19891677796840668,
"eval_runtime": 0.9232,
"eval_samples_per_second": 158.14,
"eval_steps_per_second": 20.58,
"step": 515
},
{
"epoch": 5.05,
"learning_rate": 1.959611650485437e-05,
"loss": 0.2058,
"step": 520
},
{
"epoch": 5.15,
"learning_rate": 1.9588349514563107e-05,
"loss": 0.1468,
"step": 530
},
{
"epoch": 5.24,
"learning_rate": 1.9580582524271847e-05,
"loss": 0.2338,
"step": 540
},
{
"epoch": 5.34,
"learning_rate": 1.9572815533980586e-05,
"loss": 0.2376,
"step": 550
},
{
"epoch": 5.44,
"learning_rate": 1.956504854368932e-05,
"loss": 0.2948,
"step": 560
},
{
"epoch": 5.53,
"learning_rate": 1.955728155339806e-05,
"loss": 0.191,
"step": 570
},
{
"epoch": 5.63,
"learning_rate": 1.9549514563106797e-05,
"loss": 0.1313,
"step": 580
},
{
"epoch": 5.73,
"learning_rate": 1.9541747572815536e-05,
"loss": 0.1462,
"step": 590
},
{
"epoch": 5.83,
"learning_rate": 1.9533980582524275e-05,
"loss": 0.239,
"step": 600
},
{
"epoch": 5.92,
"learning_rate": 1.952621359223301e-05,
"loss": 0.141,
"step": 610
},
{
"epoch": 6.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.17079336941242218,
"eval_runtime": 0.9678,
"eval_samples_per_second": 150.859,
"eval_steps_per_second": 19.632,
"step": 618
},
{
"epoch": 6.02,
"learning_rate": 1.951844660194175e-05,
"loss": 0.1518,
"step": 620
},
{
"epoch": 6.12,
"learning_rate": 1.9510679611650486e-05,
"loss": 0.1385,
"step": 630
},
{
"epoch": 6.21,
"learning_rate": 1.9502912621359225e-05,
"loss": 0.1632,
"step": 640
},
{
"epoch": 6.31,
"learning_rate": 1.949514563106796e-05,
"loss": 0.216,
"step": 650
},
{
"epoch": 6.41,
"learning_rate": 1.94873786407767e-05,
"loss": 0.3242,
"step": 660
},
{
"epoch": 6.5,
"learning_rate": 1.947961165048544e-05,
"loss": 0.1218,
"step": 670
},
{
"epoch": 6.6,
"learning_rate": 1.947184466019418e-05,
"loss": 0.1637,
"step": 680
},
{
"epoch": 6.7,
"learning_rate": 1.9464077669902914e-05,
"loss": 0.1651,
"step": 690
},
{
"epoch": 6.8,
"learning_rate": 1.9456310679611653e-05,
"loss": 0.181,
"step": 700
},
{
"epoch": 6.89,
"learning_rate": 1.944854368932039e-05,
"loss": 0.186,
"step": 710
},
{
"epoch": 6.99,
"learning_rate": 1.944077669902913e-05,
"loss": 0.1338,
"step": 720
},
{
"epoch": 7.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.17499177157878876,
"eval_runtime": 0.9605,
"eval_samples_per_second": 152.011,
"eval_steps_per_second": 19.782,
"step": 721
},
{
"epoch": 7.09,
"learning_rate": 1.9433009708737868e-05,
"loss": 0.1857,
"step": 730
},
{
"epoch": 7.18,
"learning_rate": 1.9425242718446603e-05,
"loss": 0.2048,
"step": 740
},
{
"epoch": 7.28,
"learning_rate": 1.9417475728155343e-05,
"loss": 0.1958,
"step": 750
},
{
"epoch": 7.38,
"learning_rate": 1.940970873786408e-05,
"loss": 0.2379,
"step": 760
},
{
"epoch": 7.48,
"learning_rate": 1.9401941747572818e-05,
"loss": 0.1159,
"step": 770
},
{
"epoch": 7.57,
"learning_rate": 1.9394174757281553e-05,
"loss": 0.1961,
"step": 780
},
{
"epoch": 7.67,
"learning_rate": 1.9386407766990292e-05,
"loss": 0.1297,
"step": 790
},
{
"epoch": 7.77,
"learning_rate": 1.937864077669903e-05,
"loss": 0.2555,
"step": 800
},
{
"epoch": 7.86,
"learning_rate": 1.9370873786407767e-05,
"loss": 0.134,
"step": 810
},
{
"epoch": 7.96,
"learning_rate": 1.9363106796116507e-05,
"loss": 0.1343,
"step": 820
},
{
"epoch": 8.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.12730906903743744,
"eval_runtime": 0.993,
"eval_samples_per_second": 147.023,
"eval_steps_per_second": 19.133,
"step": 824
},
{
"epoch": 8.06,
"learning_rate": 1.9355339805825242e-05,
"loss": 0.2206,
"step": 830
},
{
"epoch": 8.16,
"learning_rate": 1.934757281553398e-05,
"loss": 0.1735,
"step": 840
},
{
"epoch": 8.25,
"learning_rate": 1.9339805825242717e-05,
"loss": 0.137,
"step": 850
},
{
"epoch": 8.35,
"learning_rate": 1.933203883495146e-05,
"loss": 0.1378,
"step": 860
},
{
"epoch": 8.45,
"learning_rate": 1.9324271844660196e-05,
"loss": 0.1671,
"step": 870
},
{
"epoch": 8.54,
"learning_rate": 1.9316504854368935e-05,
"loss": 0.212,
"step": 880
},
{
"epoch": 8.64,
"learning_rate": 1.930873786407767e-05,
"loss": 0.1598,
"step": 890
},
{
"epoch": 8.74,
"learning_rate": 1.930097087378641e-05,
"loss": 0.1813,
"step": 900
},
{
"epoch": 8.83,
"learning_rate": 1.9293203883495146e-05,
"loss": 0.0725,
"step": 910
},
{
"epoch": 8.93,
"learning_rate": 1.9285436893203885e-05,
"loss": 0.1558,
"step": 920
},
{
"epoch": 9.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.11657154560089111,
"eval_runtime": 0.9702,
"eval_samples_per_second": 150.486,
"eval_steps_per_second": 19.584,
"step": 927
},
{
"epoch": 9.03,
"learning_rate": 1.9277669902912624e-05,
"loss": 0.094,
"step": 930
},
{
"epoch": 9.13,
"learning_rate": 1.926990291262136e-05,
"loss": 0.1341,
"step": 940
},
{
"epoch": 9.22,
"learning_rate": 1.92621359223301e-05,
"loss": 0.196,
"step": 950
},
{
"epoch": 9.32,
"learning_rate": 1.9254368932038835e-05,
"loss": 0.1028,
"step": 960
},
{
"epoch": 9.42,
"learning_rate": 1.9246601941747574e-05,
"loss": 0.178,
"step": 970
},
{
"epoch": 9.51,
"learning_rate": 1.9238834951456313e-05,
"loss": 0.2527,
"step": 980
},
{
"epoch": 9.61,
"learning_rate": 1.923106796116505e-05,
"loss": 0.2609,
"step": 990
},
{
"epoch": 9.71,
"learning_rate": 1.922330097087379e-05,
"loss": 0.1518,
"step": 1000
},
{
"epoch": 9.81,
"learning_rate": 1.9215533980582528e-05,
"loss": 0.1383,
"step": 1010
},
{
"epoch": 9.9,
"learning_rate": 1.9207766990291263e-05,
"loss": 0.075,
"step": 1020
},
{
"epoch": 10.0,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.0799,
"step": 1030
},
{
"epoch": 10.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.1238846480846405,
"eval_runtime": 0.9773,
"eval_samples_per_second": 149.397,
"eval_steps_per_second": 19.442,
"step": 1030
},
{
"epoch": 10.1,
"learning_rate": 1.919223300970874e-05,
"loss": 0.1866,
"step": 1040
},
{
"epoch": 10.19,
"learning_rate": 1.9184466019417478e-05,
"loss": 0.1154,
"step": 1050
},
{
"epoch": 10.29,
"learning_rate": 1.9176699029126217e-05,
"loss": 0.1123,
"step": 1060
},
{
"epoch": 10.39,
"learning_rate": 1.9168932038834952e-05,
"loss": 0.1085,
"step": 1070
},
{
"epoch": 10.49,
"learning_rate": 1.916116504854369e-05,
"loss": 0.2075,
"step": 1080
},
{
"epoch": 10.58,
"learning_rate": 1.9153398058252427e-05,
"loss": 0.0624,
"step": 1090
},
{
"epoch": 10.68,
"learning_rate": 1.9145631067961167e-05,
"loss": 0.1116,
"step": 1100
},
{
"epoch": 10.78,
"learning_rate": 1.9137864077669906e-05,
"loss": 0.0523,
"step": 1110
},
{
"epoch": 10.87,
"learning_rate": 1.913009708737864e-05,
"loss": 0.1655,
"step": 1120
},
{
"epoch": 10.97,
"learning_rate": 1.912233009708738e-05,
"loss": 0.1677,
"step": 1130
},
{
"epoch": 11.0,
"eval_accuracy": 0.9726027397260274,
"eval_loss": 0.15431082248687744,
"eval_runtime": 0.948,
"eval_samples_per_second": 154.005,
"eval_steps_per_second": 20.042,
"step": 1133
},
{
"epoch": 11.07,
"learning_rate": 1.9114563106796117e-05,
"loss": 0.1516,
"step": 1140
},
{
"epoch": 11.17,
"learning_rate": 1.9106796116504856e-05,
"loss": 0.1927,
"step": 1150
},
{
"epoch": 11.26,
"learning_rate": 1.909902912621359e-05,
"loss": 0.0548,
"step": 1160
},
{
"epoch": 11.36,
"learning_rate": 1.9091262135922334e-05,
"loss": 0.085,
"step": 1170
},
{
"epoch": 11.46,
"learning_rate": 1.908349514563107e-05,
"loss": 0.202,
"step": 1180
},
{
"epoch": 11.55,
"learning_rate": 1.907572815533981e-05,
"loss": 0.0741,
"step": 1190
},
{
"epoch": 11.65,
"learning_rate": 1.9067961165048545e-05,
"loss": 0.1373,
"step": 1200
},
{
"epoch": 11.75,
"learning_rate": 1.9060194174757284e-05,
"loss": 0.219,
"step": 1210
},
{
"epoch": 11.84,
"learning_rate": 1.905242718446602e-05,
"loss": 0.169,
"step": 1220
},
{
"epoch": 11.94,
"learning_rate": 1.904466019417476e-05,
"loss": 0.1969,
"step": 1230
},
{
"epoch": 12.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.09193126857280731,
"eval_runtime": 0.9434,
"eval_samples_per_second": 154.754,
"eval_steps_per_second": 20.139,
"step": 1236
},
{
"epoch": 12.04,
"learning_rate": 1.90368932038835e-05,
"loss": 0.0957,
"step": 1240
},
{
"epoch": 12.14,
"learning_rate": 1.9029126213592234e-05,
"loss": 0.1396,
"step": 1250
},
{
"epoch": 12.23,
"learning_rate": 1.9021359223300973e-05,
"loss": 0.1004,
"step": 1260
},
{
"epoch": 12.33,
"learning_rate": 1.901359223300971e-05,
"loss": 0.0796,
"step": 1270
},
{
"epoch": 12.43,
"learning_rate": 1.900582524271845e-05,
"loss": 0.225,
"step": 1280
},
{
"epoch": 12.52,
"learning_rate": 1.8998058252427184e-05,
"loss": 0.1395,
"step": 1290
},
{
"epoch": 12.62,
"learning_rate": 1.8990291262135923e-05,
"loss": 0.0571,
"step": 1300
},
{
"epoch": 12.72,
"learning_rate": 1.8982524271844663e-05,
"loss": 0.0472,
"step": 1310
},
{
"epoch": 12.82,
"learning_rate": 1.89747572815534e-05,
"loss": 0.1276,
"step": 1320
},
{
"epoch": 12.91,
"learning_rate": 1.8966990291262138e-05,
"loss": 0.0533,
"step": 1330
},
{
"epoch": 13.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.07145330309867859,
"eval_runtime": 0.9485,
"eval_samples_per_second": 153.935,
"eval_steps_per_second": 20.033,
"step": 1339
},
{
"epoch": 13.01,
"learning_rate": 1.8959223300970873e-05,
"loss": 0.0895,
"step": 1340
},
{
"epoch": 13.11,
"learning_rate": 1.8951456310679613e-05,
"loss": 0.0545,
"step": 1350
},
{
"epoch": 13.2,
"learning_rate": 1.894368932038835e-05,
"loss": 0.0889,
"step": 1360
},
{
"epoch": 13.3,
"learning_rate": 1.893592233009709e-05,
"loss": 0.0444,
"step": 1370
},
{
"epoch": 13.4,
"learning_rate": 1.8928155339805827e-05,
"loss": 0.131,
"step": 1380
},
{
"epoch": 13.5,
"learning_rate": 1.8920388349514566e-05,
"loss": 0.1644,
"step": 1390
},
{
"epoch": 13.59,
"learning_rate": 1.89126213592233e-05,
"loss": 0.1254,
"step": 1400
},
{
"epoch": 13.69,
"learning_rate": 1.890485436893204e-05,
"loss": 0.1407,
"step": 1410
},
{
"epoch": 13.79,
"learning_rate": 1.8897087378640777e-05,
"loss": 0.1139,
"step": 1420
},
{
"epoch": 13.88,
"learning_rate": 1.8889320388349516e-05,
"loss": 0.0703,
"step": 1430
},
{
"epoch": 13.98,
"learning_rate": 1.8881553398058255e-05,
"loss": 0.1645,
"step": 1440
},
{
"epoch": 14.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.050846148282289505,
"eval_runtime": 0.9651,
"eval_samples_per_second": 151.283,
"eval_steps_per_second": 19.688,
"step": 1442
},
{
"epoch": 14.08,
"learning_rate": 1.887378640776699e-05,
"loss": 0.1136,
"step": 1450
},
{
"epoch": 14.17,
"learning_rate": 1.886601941747573e-05,
"loss": 0.0542,
"step": 1460
},
{
"epoch": 14.27,
"learning_rate": 1.8858252427184466e-05,
"loss": 0.1973,
"step": 1470
},
{
"epoch": 14.37,
"learning_rate": 1.8850485436893205e-05,
"loss": 0.1312,
"step": 1480
},
{
"epoch": 14.47,
"learning_rate": 1.8842718446601944e-05,
"loss": 0.1627,
"step": 1490
},
{
"epoch": 14.56,
"learning_rate": 1.883495145631068e-05,
"loss": 0.1452,
"step": 1500
},
{
"epoch": 14.66,
"learning_rate": 1.882718446601942e-05,
"loss": 0.1142,
"step": 1510
},
{
"epoch": 14.76,
"learning_rate": 1.881941747572816e-05,
"loss": 0.0803,
"step": 1520
},
{
"epoch": 14.85,
"learning_rate": 1.8811650485436894e-05,
"loss": 0.1401,
"step": 1530
},
{
"epoch": 14.95,
"learning_rate": 1.8803883495145633e-05,
"loss": 0.1036,
"step": 1540
},
{
"epoch": 15.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.06801381707191467,
"eval_runtime": 0.9536,
"eval_samples_per_second": 153.104,
"eval_steps_per_second": 19.924,
"step": 1545
},
{
"epoch": 15.05,
"learning_rate": 1.8796116504854373e-05,
"loss": 0.1774,
"step": 1550
},
{
"epoch": 15.15,
"learning_rate": 1.878834951456311e-05,
"loss": 0.0533,
"step": 1560
},
{
"epoch": 15.24,
"learning_rate": 1.8780582524271848e-05,
"loss": 0.0623,
"step": 1570
},
{
"epoch": 15.34,
"learning_rate": 1.8772815533980583e-05,
"loss": 0.1694,
"step": 1580
},
{
"epoch": 15.44,
"learning_rate": 1.8765048543689323e-05,
"loss": 0.1773,
"step": 1590
},
{
"epoch": 15.53,
"learning_rate": 1.875728155339806e-05,
"loss": 0.2231,
"step": 1600
},
{
"epoch": 15.63,
"learning_rate": 1.8749514563106798e-05,
"loss": 0.0794,
"step": 1610
},
{
"epoch": 15.73,
"learning_rate": 1.8741747572815537e-05,
"loss": 0.0464,
"step": 1620
},
{
"epoch": 15.83,
"learning_rate": 1.8733980582524273e-05,
"loss": 0.1643,
"step": 1630
},
{
"epoch": 15.92,
"learning_rate": 1.8726213592233012e-05,
"loss": 0.0442,
"step": 1640
},
{
"epoch": 16.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.08464141935110092,
"eval_runtime": 0.9534,
"eval_samples_per_second": 153.139,
"eval_steps_per_second": 19.929,
"step": 1648
},
{
"epoch": 16.02,
"learning_rate": 1.8718446601941747e-05,
"loss": 0.0626,
"step": 1650
},
{
"epoch": 16.12,
"learning_rate": 1.8710679611650487e-05,
"loss": 0.0868,
"step": 1660
},
{
"epoch": 16.21,
"learning_rate": 1.8702912621359222e-05,
"loss": 0.2295,
"step": 1670
},
{
"epoch": 16.31,
"learning_rate": 1.8695145631067965e-05,
"loss": 0.089,
"step": 1680
},
{
"epoch": 16.41,
"learning_rate": 1.86873786407767e-05,
"loss": 0.0727,
"step": 1690
},
{
"epoch": 16.5,
"learning_rate": 1.867961165048544e-05,
"loss": 0.0831,
"step": 1700
},
{
"epoch": 16.6,
"learning_rate": 1.8671844660194176e-05,
"loss": 0.1162,
"step": 1710
},
{
"epoch": 16.7,
"learning_rate": 1.8664077669902915e-05,
"loss": 0.0484,
"step": 1720
},
{
"epoch": 16.8,
"learning_rate": 1.865631067961165e-05,
"loss": 0.0411,
"step": 1730
},
{
"epoch": 16.89,
"learning_rate": 1.864854368932039e-05,
"loss": 0.0726,
"step": 1740
},
{
"epoch": 16.99,
"learning_rate": 1.864077669902913e-05,
"loss": 0.065,
"step": 1750
},
{
"epoch": 17.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.056606147438287735,
"eval_runtime": 0.9748,
"eval_samples_per_second": 149.773,
"eval_steps_per_second": 19.491,
"step": 1751
},
{
"epoch": 17.09,
"learning_rate": 1.8633009708737865e-05,
"loss": 0.2112,
"step": 1760
},
{
"epoch": 17.18,
"learning_rate": 1.8625242718446604e-05,
"loss": 0.0992,
"step": 1770
},
{
"epoch": 17.28,
"learning_rate": 1.861747572815534e-05,
"loss": 0.1154,
"step": 1780
},
{
"epoch": 17.38,
"learning_rate": 1.860970873786408e-05,
"loss": 0.0843,
"step": 1790
},
{
"epoch": 17.48,
"learning_rate": 1.860194174757282e-05,
"loss": 0.0969,
"step": 1800
},
{
"epoch": 17.57,
"learning_rate": 1.8594174757281554e-05,
"loss": 0.2324,
"step": 1810
},
{
"epoch": 17.67,
"learning_rate": 1.8586407766990293e-05,
"loss": 0.1239,
"step": 1820
},
{
"epoch": 17.77,
"learning_rate": 1.857864077669903e-05,
"loss": 0.1081,
"step": 1830
},
{
"epoch": 17.86,
"learning_rate": 1.857087378640777e-05,
"loss": 0.0354,
"step": 1840
},
{
"epoch": 17.96,
"learning_rate": 1.8563106796116504e-05,
"loss": 0.1437,
"step": 1850
},
{
"epoch": 18.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.04979800060391426,
"eval_runtime": 0.9429,
"eval_samples_per_second": 154.848,
"eval_steps_per_second": 20.151,
"step": 1854
},
{
"epoch": 18.06,
"learning_rate": 1.8555339805825243e-05,
"loss": 0.1741,
"step": 1860
},
{
"epoch": 18.16,
"learning_rate": 1.8547572815533983e-05,
"loss": 0.0442,
"step": 1870
},
{
"epoch": 18.25,
"learning_rate": 1.8539805825242722e-05,
"loss": 0.0778,
"step": 1880
},
{
"epoch": 18.35,
"learning_rate": 1.8532038834951458e-05,
"loss": 0.132,
"step": 1890
},
{
"epoch": 18.45,
"learning_rate": 1.8524271844660197e-05,
"loss": 0.2408,
"step": 1900
},
{
"epoch": 18.54,
"learning_rate": 1.8516504854368933e-05,
"loss": 0.0852,
"step": 1910
},
{
"epoch": 18.64,
"learning_rate": 1.8508737864077672e-05,
"loss": 0.149,
"step": 1920
},
{
"epoch": 18.74,
"learning_rate": 1.850097087378641e-05,
"loss": 0.0426,
"step": 1930
},
{
"epoch": 18.83,
"learning_rate": 1.8493203883495147e-05,
"loss": 0.1248,
"step": 1940
},
{
"epoch": 18.93,
"learning_rate": 1.8485436893203886e-05,
"loss": 0.1527,
"step": 1950
},
{
"epoch": 19.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.0702158659696579,
"eval_runtime": 0.9693,
"eval_samples_per_second": 150.63,
"eval_steps_per_second": 19.603,
"step": 1957
},
{
"epoch": 19.03,
"learning_rate": 1.847766990291262e-05,
"loss": 0.0963,
"step": 1960
},
{
"epoch": 19.13,
"learning_rate": 1.846990291262136e-05,
"loss": 0.1038,
"step": 1970
},
{
"epoch": 19.22,
"learning_rate": 1.8462135922330097e-05,
"loss": 0.0276,
"step": 1980
},
{
"epoch": 19.32,
"learning_rate": 1.8454368932038836e-05,
"loss": 0.2018,
"step": 1990
},
{
"epoch": 19.42,
"learning_rate": 1.8446601941747575e-05,
"loss": 0.1405,
"step": 2000
},
{
"epoch": 19.51,
"learning_rate": 1.843883495145631e-05,
"loss": 0.0337,
"step": 2010
},
{
"epoch": 19.61,
"learning_rate": 1.843106796116505e-05,
"loss": 0.1076,
"step": 2020
},
{
"epoch": 19.71,
"learning_rate": 1.842330097087379e-05,
"loss": 0.1037,
"step": 2030
},
{
"epoch": 19.81,
"learning_rate": 1.8415533980582525e-05,
"loss": 0.0665,
"step": 2040
},
{
"epoch": 19.9,
"learning_rate": 1.8407766990291264e-05,
"loss": 0.1567,
"step": 2050
},
{
"epoch": 20.0,
"learning_rate": 1.8400000000000003e-05,
"loss": 0.0682,
"step": 2060
},
{
"epoch": 20.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.06622537225484848,
"eval_runtime": 0.9584,
"eval_samples_per_second": 152.335,
"eval_steps_per_second": 19.824,
"step": 2060
},
{
"epoch": 20.1,
"learning_rate": 1.839223300970874e-05,
"loss": 0.0995,
"step": 2070
},
{
"epoch": 20.19,
"learning_rate": 1.838446601941748e-05,
"loss": 0.0921,
"step": 2080
},
{
"epoch": 20.29,
"learning_rate": 1.8376699029126214e-05,
"loss": 0.2157,
"step": 2090
},
{
"epoch": 20.39,
"learning_rate": 1.8368932038834953e-05,
"loss": 0.1321,
"step": 2100
},
{
"epoch": 20.49,
"learning_rate": 1.836116504854369e-05,
"loss": 0.0957,
"step": 2110
},
{
"epoch": 20.58,
"learning_rate": 1.835339805825243e-05,
"loss": 0.2989,
"step": 2120
},
{
"epoch": 20.68,
"learning_rate": 1.8345631067961168e-05,
"loss": 0.2302,
"step": 2130
},
{
"epoch": 20.78,
"learning_rate": 1.8337864077669903e-05,
"loss": 0.1633,
"step": 2140
},
{
"epoch": 20.87,
"learning_rate": 1.8330097087378643e-05,
"loss": 0.0726,
"step": 2150
},
{
"epoch": 20.97,
"learning_rate": 1.832233009708738e-05,
"loss": 0.1013,
"step": 2160
},
{
"epoch": 21.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.07291552424430847,
"eval_runtime": 0.958,
"eval_samples_per_second": 152.399,
"eval_steps_per_second": 19.833,
"step": 2163
},
{
"epoch": 21.07,
"learning_rate": 1.8314563106796118e-05,
"loss": 0.0254,
"step": 2170
},
{
"epoch": 21.17,
"learning_rate": 1.8306796116504857e-05,
"loss": 0.1409,
"step": 2180
},
{
"epoch": 21.26,
"learning_rate": 1.8299029126213596e-05,
"loss": 0.0853,
"step": 2190
},
{
"epoch": 21.36,
"learning_rate": 1.8291262135922332e-05,
"loss": 0.131,
"step": 2200
},
{
"epoch": 21.46,
"learning_rate": 1.828349514563107e-05,
"loss": 0.1024,
"step": 2210
},
{
"epoch": 21.55,
"learning_rate": 1.8275728155339807e-05,
"loss": 0.0212,
"step": 2220
},
{
"epoch": 21.65,
"learning_rate": 1.8267961165048546e-05,
"loss": 0.0578,
"step": 2230
},
{
"epoch": 21.75,
"learning_rate": 1.826019417475728e-05,
"loss": 0.1048,
"step": 2240
},
{
"epoch": 21.84,
"learning_rate": 1.825242718446602e-05,
"loss": 0.0612,
"step": 2250
},
{
"epoch": 21.94,
"learning_rate": 1.824466019417476e-05,
"loss": 0.0807,
"step": 2260
},
{
"epoch": 22.0,
"eval_accuracy": 1.0,
"eval_loss": 0.02733495458960533,
"eval_runtime": 0.9501,
"eval_samples_per_second": 153.673,
"eval_steps_per_second": 19.999,
"step": 2266
},
{
"epoch": 22.04,
"learning_rate": 1.8236893203883496e-05,
"loss": 0.1788,
"step": 2270
},
{
"epoch": 22.14,
"learning_rate": 1.8229126213592235e-05,
"loss": 0.1037,
"step": 2280
},
{
"epoch": 22.23,
"learning_rate": 1.822135922330097e-05,
"loss": 0.0909,
"step": 2290
},
{
"epoch": 22.33,
"learning_rate": 1.821359223300971e-05,
"loss": 0.0597,
"step": 2300
},
{
"epoch": 22.43,
"learning_rate": 1.820582524271845e-05,
"loss": 0.0693,
"step": 2310
},
{
"epoch": 22.52,
"learning_rate": 1.8198058252427185e-05,
"loss": 0.0684,
"step": 2320
},
{
"epoch": 22.62,
"learning_rate": 1.8190291262135924e-05,
"loss": 0.1146,
"step": 2330
},
{
"epoch": 22.72,
"learning_rate": 1.818252427184466e-05,
"loss": 0.0753,
"step": 2340
},
{
"epoch": 22.82,
"learning_rate": 1.81747572815534e-05,
"loss": 0.0691,
"step": 2350
},
{
"epoch": 22.91,
"learning_rate": 1.8166990291262135e-05,
"loss": 0.0803,
"step": 2360
},
{
"epoch": 23.0,
"eval_accuracy": 0.9657534246575342,
"eval_loss": 0.13809683918952942,
"eval_runtime": 0.9466,
"eval_samples_per_second": 154.239,
"eval_steps_per_second": 20.072,
"step": 2369
},
{
"epoch": 23.01,
"learning_rate": 1.8159223300970878e-05,
"loss": 0.1052,
"step": 2370
},
{
"epoch": 23.11,
"learning_rate": 1.8151456310679613e-05,
"loss": 0.1529,
"step": 2380
},
{
"epoch": 23.2,
"learning_rate": 1.8143689320388353e-05,
"loss": 0.0929,
"step": 2390
},
{
"epoch": 23.3,
"learning_rate": 1.813592233009709e-05,
"loss": 0.1057,
"step": 2400
},
{
"epoch": 23.4,
"learning_rate": 1.8128155339805828e-05,
"loss": 0.0545,
"step": 2410
},
{
"epoch": 23.5,
"learning_rate": 1.8120388349514563e-05,
"loss": 0.0632,
"step": 2420
},
{
"epoch": 23.59,
"learning_rate": 1.8112621359223303e-05,
"loss": 0.0276,
"step": 2430
},
{
"epoch": 23.69,
"learning_rate": 1.8104854368932042e-05,
"loss": 0.0976,
"step": 2440
},
{
"epoch": 23.79,
"learning_rate": 1.8097087378640778e-05,
"loss": 0.087,
"step": 2450
},
{
"epoch": 23.88,
"learning_rate": 1.8089320388349517e-05,
"loss": 0.248,
"step": 2460
},
{
"epoch": 23.98,
"learning_rate": 1.8081553398058253e-05,
"loss": 0.0972,
"step": 2470
},
{
"epoch": 24.0,
"eval_accuracy": 1.0,
"eval_loss": 0.025691555812954903,
"eval_runtime": 0.9246,
"eval_samples_per_second": 157.902,
"eval_steps_per_second": 20.549,
"step": 2472
},
{
"epoch": 24.08,
"learning_rate": 1.8073786407766992e-05,
"loss": 0.1732,
"step": 2480
},
{
"epoch": 24.17,
"learning_rate": 1.8066019417475728e-05,
"loss": 0.0377,
"step": 2490
},
{
"epoch": 24.27,
"learning_rate": 1.8058252427184467e-05,
"loss": 0.0686,
"step": 2500
},
{
"epoch": 24.37,
"learning_rate": 1.8050485436893206e-05,
"loss": 0.0926,
"step": 2510
},
{
"epoch": 24.47,
"learning_rate": 1.8042718446601942e-05,
"loss": 0.079,
"step": 2520
},
{
"epoch": 24.56,
"learning_rate": 1.803495145631068e-05,
"loss": 0.0477,
"step": 2530
},
{
"epoch": 24.66,
"learning_rate": 1.802718446601942e-05,
"loss": 0.0554,
"step": 2540
},
{
"epoch": 24.76,
"learning_rate": 1.8019417475728156e-05,
"loss": 0.0732,
"step": 2550
},
{
"epoch": 24.85,
"learning_rate": 1.8011650485436895e-05,
"loss": 0.0215,
"step": 2560
},
{
"epoch": 24.95,
"learning_rate": 1.8003883495145634e-05,
"loss": 0.0173,
"step": 2570
},
{
"epoch": 25.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.05056421086192131,
"eval_runtime": 0.9345,
"eval_samples_per_second": 156.232,
"eval_steps_per_second": 20.332,
"step": 2575
},
{
"epoch": 25.05,
"learning_rate": 1.799611650485437e-05,
"loss": 0.0749,
"step": 2580
},
{
"epoch": 25.15,
"learning_rate": 1.798834951456311e-05,
"loss": 0.0437,
"step": 2590
},
{
"epoch": 25.24,
"learning_rate": 1.7980582524271845e-05,
"loss": 0.0748,
"step": 2600
},
{
"epoch": 25.34,
"learning_rate": 1.7972815533980584e-05,
"loss": 0.04,
"step": 2610
},
{
"epoch": 25.44,
"learning_rate": 1.7965048543689323e-05,
"loss": 0.0721,
"step": 2620
},
{
"epoch": 25.53,
"learning_rate": 1.795728155339806e-05,
"loss": 0.143,
"step": 2630
},
{
"epoch": 25.63,
"learning_rate": 1.79495145631068e-05,
"loss": 0.185,
"step": 2640
},
{
"epoch": 25.73,
"learning_rate": 1.7941747572815534e-05,
"loss": 0.0379,
"step": 2650
},
{
"epoch": 25.83,
"learning_rate": 1.7933980582524273e-05,
"loss": 0.1209,
"step": 2660
},
{
"epoch": 25.92,
"learning_rate": 1.792621359223301e-05,
"loss": 0.075,
"step": 2670
},
{
"epoch": 26.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.09800746291875839,
"eval_runtime": 0.9365,
"eval_samples_per_second": 155.906,
"eval_steps_per_second": 20.289,
"step": 2678
},
{
"epoch": 26.02,
"learning_rate": 1.791844660194175e-05,
"loss": 0.0662,
"step": 2680
},
{
"epoch": 26.12,
"learning_rate": 1.7910679611650488e-05,
"loss": 0.0206,
"step": 2690
},
{
"epoch": 26.21,
"learning_rate": 1.7902912621359227e-05,
"loss": 0.0576,
"step": 2700
},
{
"epoch": 26.31,
"learning_rate": 1.7895145631067963e-05,
"loss": 0.0479,
"step": 2710
},
{
"epoch": 26.41,
"learning_rate": 1.7887378640776702e-05,
"loss": 0.039,
"step": 2720
},
{
"epoch": 26.5,
"learning_rate": 1.7879611650485438e-05,
"loss": 0.0851,
"step": 2730
},
{
"epoch": 26.6,
"learning_rate": 1.7871844660194177e-05,
"loss": 0.1626,
"step": 2740
},
{
"epoch": 26.7,
"learning_rate": 1.7864077669902916e-05,
"loss": 0.079,
"step": 2750
},
{
"epoch": 26.8,
"learning_rate": 1.7856310679611652e-05,
"loss": 0.0268,
"step": 2760
},
{
"epoch": 26.89,
"learning_rate": 1.784854368932039e-05,
"loss": 0.0656,
"step": 2770
},
{
"epoch": 26.99,
"learning_rate": 1.7840776699029127e-05,
"loss": 0.1103,
"step": 2780
},
{
"epoch": 27.0,
"eval_accuracy": 0.958904109589041,
"eval_loss": 0.10835416615009308,
"eval_runtime": 0.9504,
"eval_samples_per_second": 153.614,
"eval_steps_per_second": 19.991,
"step": 2781
},
{
"epoch": 27.09,
"learning_rate": 1.7833009708737866e-05,
"loss": 0.0675,
"step": 2790
},
{
"epoch": 27.18,
"learning_rate": 1.7825242718446602e-05,
"loss": 0.183,
"step": 2800
},
{
"epoch": 27.28,
"learning_rate": 1.781747572815534e-05,
"loss": 0.0763,
"step": 2810
},
{
"epoch": 27.38,
"learning_rate": 1.780970873786408e-05,
"loss": 0.0298,
"step": 2820
},
{
"epoch": 27.48,
"learning_rate": 1.7801941747572816e-05,
"loss": 0.1811,
"step": 2830
},
{
"epoch": 27.57,
"learning_rate": 1.7794174757281555e-05,
"loss": 0.0562,
"step": 2840
},
{
"epoch": 27.67,
"learning_rate": 1.778640776699029e-05,
"loss": 0.146,
"step": 2850
},
{
"epoch": 27.77,
"learning_rate": 1.777864077669903e-05,
"loss": 0.0538,
"step": 2860
},
{
"epoch": 27.86,
"learning_rate": 1.7770873786407766e-05,
"loss": 0.1454,
"step": 2870
},
{
"epoch": 27.96,
"learning_rate": 1.776310679611651e-05,
"loss": 0.0622,
"step": 2880
},
{
"epoch": 28.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.02398013137280941,
"eval_runtime": 0.9144,
"eval_samples_per_second": 159.669,
"eval_steps_per_second": 20.779,
"step": 2884
},
{
"epoch": 28.06,
"learning_rate": 1.7755339805825244e-05,
"loss": 0.1488,
"step": 2890
},
{
"epoch": 28.16,
"learning_rate": 1.7747572815533983e-05,
"loss": 0.0624,
"step": 2900
},
{
"epoch": 28.25,
"learning_rate": 1.773980582524272e-05,
"loss": 0.0144,
"step": 2910
},
{
"epoch": 28.35,
"learning_rate": 1.773203883495146e-05,
"loss": 0.0935,
"step": 2920
},
{
"epoch": 28.45,
"learning_rate": 1.7724271844660194e-05,
"loss": 0.1088,
"step": 2930
},
{
"epoch": 28.54,
"learning_rate": 1.7716504854368933e-05,
"loss": 0.0121,
"step": 2940
},
{
"epoch": 28.64,
"learning_rate": 1.7708737864077673e-05,
"loss": 0.0328,
"step": 2950
},
{
"epoch": 28.74,
"learning_rate": 1.770097087378641e-05,
"loss": 0.0876,
"step": 2960
},
{
"epoch": 28.83,
"learning_rate": 1.7693203883495148e-05,
"loss": 0.0872,
"step": 2970
},
{
"epoch": 28.93,
"learning_rate": 1.7685436893203883e-05,
"loss": 0.0126,
"step": 2980
},
{
"epoch": 29.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.03914155438542366,
"eval_runtime": 0.9262,
"eval_samples_per_second": 157.634,
"eval_steps_per_second": 20.514,
"step": 2987
},
{
"epoch": 29.03,
"learning_rate": 1.7677669902912623e-05,
"loss": 0.0274,
"step": 2990
},
{
"epoch": 29.13,
"learning_rate": 1.7669902912621362e-05,
"loss": 0.0635,
"step": 3000
},
{
"epoch": 29.22,
"learning_rate": 1.7662135922330098e-05,
"loss": 0.1097,
"step": 3010
},
{
"epoch": 29.32,
"learning_rate": 1.7654368932038837e-05,
"loss": 0.1493,
"step": 3020
},
{
"epoch": 29.42,
"learning_rate": 1.7646601941747576e-05,
"loss": 0.0423,
"step": 3030
},
{
"epoch": 29.51,
"learning_rate": 1.7638834951456312e-05,
"loss": 0.1211,
"step": 3040
},
{
"epoch": 29.61,
"learning_rate": 1.763106796116505e-05,
"loss": 0.0614,
"step": 3050
},
{
"epoch": 29.71,
"learning_rate": 1.7623300970873787e-05,
"loss": 0.0644,
"step": 3060
},
{
"epoch": 29.81,
"learning_rate": 1.7615533980582526e-05,
"loss": 0.0784,
"step": 3070
},
{
"epoch": 29.9,
"learning_rate": 1.7607766990291265e-05,
"loss": 0.156,
"step": 3080
},
{
"epoch": 30.0,
"learning_rate": 1.76e-05,
"loss": 0.082,
"step": 3090
},
{
"epoch": 30.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.08485659211874008,
"eval_runtime": 0.9382,
"eval_samples_per_second": 155.618,
"eval_steps_per_second": 20.252,
"step": 3090
},
{
"epoch": 30.1,
"learning_rate": 1.759223300970874e-05,
"loss": 0.2005,
"step": 3100
},
{
"epoch": 30.19,
"learning_rate": 1.7584466019417476e-05,
"loss": 0.322,
"step": 3110
},
{
"epoch": 30.29,
"learning_rate": 1.7576699029126215e-05,
"loss": 0.0765,
"step": 3120
},
{
"epoch": 30.39,
"learning_rate": 1.7568932038834954e-05,
"loss": 0.0799,
"step": 3130
},
{
"epoch": 30.49,
"learning_rate": 1.756116504854369e-05,
"loss": 0.0892,
"step": 3140
},
{
"epoch": 30.58,
"learning_rate": 1.755339805825243e-05,
"loss": 0.0932,
"step": 3150
},
{
"epoch": 30.68,
"learning_rate": 1.7545631067961165e-05,
"loss": 0.0882,
"step": 3160
},
{
"epoch": 30.78,
"learning_rate": 1.7537864077669904e-05,
"loss": 0.0714,
"step": 3170
},
{
"epoch": 30.87,
"learning_rate": 1.753009708737864e-05,
"loss": 0.0537,
"step": 3180
},
{
"epoch": 30.97,
"learning_rate": 1.7522330097087383e-05,
"loss": 0.0203,
"step": 3190
},
{
"epoch": 31.0,
"eval_accuracy": 1.0,
"eval_loss": 0.019243279471993446,
"eval_runtime": 0.9242,
"eval_samples_per_second": 157.974,
"eval_steps_per_second": 20.558,
"step": 3193
},
{
"epoch": 31.07,
"learning_rate": 1.751456310679612e-05,
"loss": 0.1567,
"step": 3200
},
{
"epoch": 31.17,
"learning_rate": 1.7506796116504858e-05,
"loss": 0.0829,
"step": 3210
},
{
"epoch": 31.26,
"learning_rate": 1.7499029126213593e-05,
"loss": 0.0259,
"step": 3220
},
{
"epoch": 31.36,
"learning_rate": 1.7491262135922333e-05,
"loss": 0.0337,
"step": 3230
},
{
"epoch": 31.46,
"learning_rate": 1.748349514563107e-05,
"loss": 0.0407,
"step": 3240
},
{
"epoch": 31.55,
"learning_rate": 1.7475728155339808e-05,
"loss": 0.1494,
"step": 3250
},
{
"epoch": 31.65,
"learning_rate": 1.7467961165048547e-05,
"loss": 0.1308,
"step": 3260
},
{
"epoch": 31.75,
"learning_rate": 1.7460194174757283e-05,
"loss": 0.0744,
"step": 3270
},
{
"epoch": 31.84,
"learning_rate": 1.7452427184466022e-05,
"loss": 0.059,
"step": 3280
},
{
"epoch": 31.94,
"learning_rate": 1.7444660194174758e-05,
"loss": 0.1044,
"step": 3290
},
{
"epoch": 32.0,
"eval_accuracy": 0.9657534246575342,
"eval_loss": 0.11390157043933868,
"eval_runtime": 0.9599,
"eval_samples_per_second": 152.1,
"eval_steps_per_second": 19.794,
"step": 3296
},
{
"epoch": 32.04,
"learning_rate": 1.7436893203883497e-05,
"loss": 0.1431,
"step": 3300
},
{
"epoch": 32.14,
"learning_rate": 1.7429126213592233e-05,
"loss": 0.0207,
"step": 3310
},
{
"epoch": 32.23,
"learning_rate": 1.7421359223300972e-05,
"loss": 0.1726,
"step": 3320
},
{
"epoch": 32.33,
"learning_rate": 1.741359223300971e-05,
"loss": 0.0813,
"step": 3330
},
{
"epoch": 32.43,
"learning_rate": 1.7405825242718447e-05,
"loss": 0.1417,
"step": 3340
},
{
"epoch": 32.52,
"learning_rate": 1.7398058252427186e-05,
"loss": 0.036,
"step": 3350
},
{
"epoch": 32.62,
"learning_rate": 1.7390291262135922e-05,
"loss": 0.065,
"step": 3360
},
{
"epoch": 32.72,
"learning_rate": 1.738252427184466e-05,
"loss": 0.0654,
"step": 3370
},
{
"epoch": 32.82,
"learning_rate": 1.73747572815534e-05,
"loss": 0.0311,
"step": 3380
},
{
"epoch": 32.91,
"learning_rate": 1.736699029126214e-05,
"loss": 0.0134,
"step": 3390
},
{
"epoch": 33.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.057724058628082275,
"eval_runtime": 0.9176,
"eval_samples_per_second": 159.114,
"eval_steps_per_second": 20.707,
"step": 3399
},
{
"epoch": 33.01,
"learning_rate": 1.7359223300970875e-05,
"loss": 0.0149,
"step": 3400
},
{
"epoch": 33.11,
"learning_rate": 1.7351456310679614e-05,
"loss": 0.1328,
"step": 3410
},
{
"epoch": 33.2,
"learning_rate": 1.734368932038835e-05,
"loss": 0.0303,
"step": 3420
},
{
"epoch": 33.3,
"learning_rate": 1.733592233009709e-05,
"loss": 0.0764,
"step": 3430
},
{
"epoch": 33.4,
"learning_rate": 1.732815533980583e-05,
"loss": 0.0176,
"step": 3440
},
{
"epoch": 33.5,
"learning_rate": 1.7320388349514564e-05,
"loss": 0.0133,
"step": 3450
},
{
"epoch": 33.59,
"learning_rate": 1.7312621359223303e-05,
"loss": 0.2035,
"step": 3460
},
{
"epoch": 33.69,
"learning_rate": 1.730485436893204e-05,
"loss": 0.0844,
"step": 3470
},
{
"epoch": 33.79,
"learning_rate": 1.729708737864078e-05,
"loss": 0.062,
"step": 3480
},
{
"epoch": 33.88,
"learning_rate": 1.7289320388349514e-05,
"loss": 0.0381,
"step": 3490
},
{
"epoch": 33.98,
"learning_rate": 1.7281553398058253e-05,
"loss": 0.0923,
"step": 3500
},
{
"epoch": 34.0,
"eval_accuracy": 0.958904109589041,
"eval_loss": 0.182390034198761,
"eval_runtime": 0.9423,
"eval_samples_per_second": 154.946,
"eval_steps_per_second": 20.164,
"step": 3502
},
{
"epoch": 34.08,
"learning_rate": 1.7273786407766993e-05,
"loss": 0.044,
"step": 3510
},
{
"epoch": 34.17,
"learning_rate": 1.726601941747573e-05,
"loss": 0.0188,
"step": 3520
},
{
"epoch": 34.27,
"learning_rate": 1.7258252427184468e-05,
"loss": 0.0863,
"step": 3530
},
{
"epoch": 34.37,
"learning_rate": 1.7250485436893207e-05,
"loss": 0.1034,
"step": 3540
},
{
"epoch": 34.47,
"learning_rate": 1.7242718446601943e-05,
"loss": 0.0148,
"step": 3550
},
{
"epoch": 34.56,
"learning_rate": 1.7234951456310682e-05,
"loss": 0.0083,
"step": 3560
},
{
"epoch": 34.66,
"learning_rate": 1.722718446601942e-05,
"loss": 0.0313,
"step": 3570
},
{
"epoch": 34.76,
"learning_rate": 1.7219417475728157e-05,
"loss": 0.1949,
"step": 3580
},
{
"epoch": 34.85,
"learning_rate": 1.7211650485436896e-05,
"loss": 0.0643,
"step": 3590
},
{
"epoch": 34.95,
"learning_rate": 1.7203883495145632e-05,
"loss": 0.1156,
"step": 3600
},
{
"epoch": 35.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.022417498752474785,
"eval_runtime": 0.9325,
"eval_samples_per_second": 156.574,
"eval_steps_per_second": 20.376,
"step": 3605
},
{
"epoch": 35.05,
"learning_rate": 1.719611650485437e-05,
"loss": 0.1949,
"step": 3610
},
{
"epoch": 35.15,
"learning_rate": 1.7188349514563107e-05,
"loss": 0.0527,
"step": 3620
},
{
"epoch": 35.24,
"learning_rate": 1.7180582524271846e-05,
"loss": 0.0105,
"step": 3630
},
{
"epoch": 35.34,
"learning_rate": 1.7172815533980585e-05,
"loss": 0.1182,
"step": 3640
},
{
"epoch": 35.44,
"learning_rate": 1.716504854368932e-05,
"loss": 0.0799,
"step": 3650
},
{
"epoch": 35.53,
"learning_rate": 1.715728155339806e-05,
"loss": 0.1506,
"step": 3660
},
{
"epoch": 35.63,
"learning_rate": 1.7149514563106796e-05,
"loss": 0.1022,
"step": 3670
},
{
"epoch": 35.73,
"learning_rate": 1.7141747572815535e-05,
"loss": 0.0591,
"step": 3680
},
{
"epoch": 35.83,
"learning_rate": 1.713398058252427e-05,
"loss": 0.0083,
"step": 3690
},
{
"epoch": 35.92,
"learning_rate": 1.7126213592233013e-05,
"loss": 0.0161,
"step": 3700
},
{
"epoch": 36.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03223036974668503,
"eval_runtime": 0.926,
"eval_samples_per_second": 157.661,
"eval_steps_per_second": 20.518,
"step": 3708
},
{
"epoch": 36.02,
"learning_rate": 1.711844660194175e-05,
"loss": 0.1174,
"step": 3710
},
{
"epoch": 36.12,
"learning_rate": 1.711067961165049e-05,
"loss": 0.0884,
"step": 3720
},
{
"epoch": 36.21,
"learning_rate": 1.7102912621359224e-05,
"loss": 0.0085,
"step": 3730
},
{
"epoch": 36.31,
"learning_rate": 1.7095145631067963e-05,
"loss": 0.1102,
"step": 3740
},
{
"epoch": 36.41,
"learning_rate": 1.70873786407767e-05,
"loss": 0.1087,
"step": 3750
},
{
"epoch": 36.5,
"learning_rate": 1.707961165048544e-05,
"loss": 0.0612,
"step": 3760
},
{
"epoch": 36.6,
"learning_rate": 1.7071844660194178e-05,
"loss": 0.0383,
"step": 3770
},
{
"epoch": 36.7,
"learning_rate": 1.7064077669902913e-05,
"loss": 0.021,
"step": 3780
},
{
"epoch": 36.8,
"learning_rate": 1.7056310679611653e-05,
"loss": 0.1188,
"step": 3790
},
{
"epoch": 36.89,
"learning_rate": 1.704854368932039e-05,
"loss": 0.0805,
"step": 3800
},
{
"epoch": 36.99,
"learning_rate": 1.7040776699029128e-05,
"loss": 0.0754,
"step": 3810
},
{
"epoch": 37.0,
"eval_accuracy": 0.9726027397260274,
"eval_loss": 0.10264816880226135,
"eval_runtime": 0.9208,
"eval_samples_per_second": 158.561,
"eval_steps_per_second": 20.635,
"step": 3811
},
{
"epoch": 37.09,
"learning_rate": 1.7033009708737867e-05,
"loss": 0.012,
"step": 3820
},
{
"epoch": 37.18,
"learning_rate": 1.7025242718446603e-05,
"loss": 0.0646,
"step": 3830
},
{
"epoch": 37.28,
"learning_rate": 1.7017475728155342e-05,
"loss": 0.0136,
"step": 3840
},
{
"epoch": 37.38,
"learning_rate": 1.7009708737864078e-05,
"loss": 0.0409,
"step": 3850
},
{
"epoch": 37.48,
"learning_rate": 1.7001941747572817e-05,
"loss": 0.109,
"step": 3860
},
{
"epoch": 37.57,
"learning_rate": 1.6994174757281553e-05,
"loss": 0.0205,
"step": 3870
},
{
"epoch": 37.67,
"learning_rate": 1.6986407766990292e-05,
"loss": 0.1262,
"step": 3880
},
{
"epoch": 37.77,
"learning_rate": 1.697864077669903e-05,
"loss": 0.1535,
"step": 3890
},
{
"epoch": 37.86,
"learning_rate": 1.697087378640777e-05,
"loss": 0.0292,
"step": 3900
},
{
"epoch": 37.96,
"learning_rate": 1.6963106796116506e-05,
"loss": 0.0356,
"step": 3910
},
{
"epoch": 38.0,
"eval_accuracy": 0.952054794520548,
"eval_loss": 0.25439009070396423,
"eval_runtime": 0.9243,
"eval_samples_per_second": 157.958,
"eval_steps_per_second": 20.556,
"step": 3914
},
{
"epoch": 38.06,
"learning_rate": 1.6955339805825245e-05,
"loss": 0.1842,
"step": 3920
},
{
"epoch": 38.16,
"learning_rate": 1.694757281553398e-05,
"loss": 0.1801,
"step": 3930
},
{
"epoch": 38.25,
"learning_rate": 1.693980582524272e-05,
"loss": 0.0089,
"step": 3940
},
{
"epoch": 38.35,
"learning_rate": 1.693203883495146e-05,
"loss": 0.0289,
"step": 3950
},
{
"epoch": 38.45,
"learning_rate": 1.6924271844660195e-05,
"loss": 0.0536,
"step": 3960
},
{
"epoch": 38.54,
"learning_rate": 1.6916504854368934e-05,
"loss": 0.1036,
"step": 3970
},
{
"epoch": 38.64,
"learning_rate": 1.690873786407767e-05,
"loss": 0.0112,
"step": 3980
},
{
"epoch": 38.74,
"learning_rate": 1.690097087378641e-05,
"loss": 0.008,
"step": 3990
},
{
"epoch": 38.83,
"learning_rate": 1.6893203883495145e-05,
"loss": 0.0183,
"step": 4000
},
{
"epoch": 38.93,
"learning_rate": 1.6885436893203884e-05,
"loss": 0.008,
"step": 4010
},
{
"epoch": 39.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.10272617638111115,
"eval_runtime": 0.9261,
"eval_samples_per_second": 157.656,
"eval_steps_per_second": 20.517,
"step": 4017
},
{
"epoch": 39.03,
"learning_rate": 1.6877669902912623e-05,
"loss": 0.1074,
"step": 4020
},
{
"epoch": 39.13,
"learning_rate": 1.686990291262136e-05,
"loss": 0.1753,
"step": 4030
},
{
"epoch": 39.22,
"learning_rate": 1.68621359223301e-05,
"loss": 0.0181,
"step": 4040
},
{
"epoch": 39.32,
"learning_rate": 1.6854368932038838e-05,
"loss": 0.0541,
"step": 4050
},
{
"epoch": 39.42,
"learning_rate": 1.6846601941747573e-05,
"loss": 0.1461,
"step": 4060
},
{
"epoch": 39.51,
"learning_rate": 1.6838834951456313e-05,
"loss": 0.1556,
"step": 4070
},
{
"epoch": 39.61,
"learning_rate": 1.6831067961165052e-05,
"loss": 0.0072,
"step": 4080
},
{
"epoch": 39.71,
"learning_rate": 1.6823300970873788e-05,
"loss": 0.1443,
"step": 4090
},
{
"epoch": 39.81,
"learning_rate": 1.6815533980582527e-05,
"loss": 0.0272,
"step": 4100
},
{
"epoch": 39.9,
"learning_rate": 1.6807766990291263e-05,
"loss": 0.0404,
"step": 4110
},
{
"epoch": 40.0,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.1293,
"step": 4120
},
{
"epoch": 40.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.06097627803683281,
"eval_runtime": 0.961,
"eval_samples_per_second": 151.929,
"eval_steps_per_second": 19.772,
"step": 4120
},
{
"epoch": 40.1,
"learning_rate": 1.6792233009708738e-05,
"loss": 0.0466,
"step": 4130
},
{
"epoch": 40.19,
"learning_rate": 1.6784466019417477e-05,
"loss": 0.0499,
"step": 4140
},
{
"epoch": 40.29,
"learning_rate": 1.6776699029126216e-05,
"loss": 0.1867,
"step": 4150
},
{
"epoch": 40.39,
"learning_rate": 1.6768932038834952e-05,
"loss": 0.0198,
"step": 4160
},
{
"epoch": 40.49,
"learning_rate": 1.676116504854369e-05,
"loss": 0.0074,
"step": 4170
},
{
"epoch": 40.58,
"learning_rate": 1.6753398058252427e-05,
"loss": 0.0624,
"step": 4180
},
{
"epoch": 40.68,
"learning_rate": 1.6745631067961166e-05,
"loss": 0.0607,
"step": 4190
},
{
"epoch": 40.78,
"learning_rate": 1.6737864077669905e-05,
"loss": 0.0749,
"step": 4200
},
{
"epoch": 40.87,
"learning_rate": 1.6730097087378644e-05,
"loss": 0.0066,
"step": 4210
},
{
"epoch": 40.97,
"learning_rate": 1.672233009708738e-05,
"loss": 0.0578,
"step": 4220
},
{
"epoch": 41.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.08583226799964905,
"eval_runtime": 0.9345,
"eval_samples_per_second": 156.24,
"eval_steps_per_second": 20.333,
"step": 4223
},
{
"epoch": 41.07,
"learning_rate": 1.671456310679612e-05,
"loss": 0.1032,
"step": 4230
},
{
"epoch": 41.17,
"learning_rate": 1.6706796116504855e-05,
"loss": 0.0544,
"step": 4240
},
{
"epoch": 41.26,
"learning_rate": 1.6699029126213594e-05,
"loss": 0.1737,
"step": 4250
},
{
"epoch": 41.36,
"learning_rate": 1.669126213592233e-05,
"loss": 0.0807,
"step": 4260
},
{
"epoch": 41.46,
"learning_rate": 1.668349514563107e-05,
"loss": 0.0775,
"step": 4270
},
{
"epoch": 41.55,
"learning_rate": 1.667572815533981e-05,
"loss": 0.0672,
"step": 4280
},
{
"epoch": 41.65,
"learning_rate": 1.6667961165048544e-05,
"loss": 0.049,
"step": 4290
},
{
"epoch": 41.75,
"learning_rate": 1.6660194174757283e-05,
"loss": 0.1505,
"step": 4300
},
{
"epoch": 41.84,
"learning_rate": 1.665242718446602e-05,
"loss": 0.1309,
"step": 4310
},
{
"epoch": 41.94,
"learning_rate": 1.664466019417476e-05,
"loss": 0.0528,
"step": 4320
},
{
"epoch": 42.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.09928789734840393,
"eval_runtime": 0.9576,
"eval_samples_per_second": 152.464,
"eval_steps_per_second": 19.841,
"step": 4326
},
{
"epoch": 42.04,
"learning_rate": 1.6636893203883498e-05,
"loss": 0.1204,
"step": 4330
},
{
"epoch": 42.14,
"learning_rate": 1.6629126213592233e-05,
"loss": 0.0681,
"step": 4340
},
{
"epoch": 42.23,
"learning_rate": 1.6621359223300973e-05,
"loss": 0.1064,
"step": 4350
},
{
"epoch": 42.33,
"learning_rate": 1.661359223300971e-05,
"loss": 0.0672,
"step": 4360
},
{
"epoch": 42.43,
"learning_rate": 1.6605825242718448e-05,
"loss": 0.0494,
"step": 4370
},
{
"epoch": 42.52,
"learning_rate": 1.6598058252427183e-05,
"loss": 0.0866,
"step": 4380
},
{
"epoch": 42.62,
"learning_rate": 1.6590291262135926e-05,
"loss": 0.0385,
"step": 4390
},
{
"epoch": 42.72,
"learning_rate": 1.6582524271844662e-05,
"loss": 0.0143,
"step": 4400
},
{
"epoch": 42.82,
"learning_rate": 1.65747572815534e-05,
"loss": 0.014,
"step": 4410
},
{
"epoch": 42.91,
"learning_rate": 1.6566990291262137e-05,
"loss": 0.0886,
"step": 4420
},
{
"epoch": 43.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03256027027964592,
"eval_runtime": 0.9428,
"eval_samples_per_second": 154.855,
"eval_steps_per_second": 20.152,
"step": 4429
},
{
"epoch": 43.01,
"learning_rate": 1.6559223300970876e-05,
"loss": 0.0291,
"step": 4430
},
{
"epoch": 43.11,
"learning_rate": 1.6551456310679612e-05,
"loss": 0.0058,
"step": 4440
},
{
"epoch": 43.2,
"learning_rate": 1.654368932038835e-05,
"loss": 0.0331,
"step": 4450
},
{
"epoch": 43.3,
"learning_rate": 1.653592233009709e-05,
"loss": 0.2086,
"step": 4460
},
{
"epoch": 43.4,
"learning_rate": 1.6528155339805826e-05,
"loss": 0.0081,
"step": 4470
},
{
"epoch": 43.5,
"learning_rate": 1.6520388349514565e-05,
"loss": 0.0051,
"step": 4480
},
{
"epoch": 43.59,
"learning_rate": 1.65126213592233e-05,
"loss": 0.2006,
"step": 4490
},
{
"epoch": 43.69,
"learning_rate": 1.650485436893204e-05,
"loss": 0.0789,
"step": 4500
},
{
"epoch": 43.79,
"learning_rate": 1.6497087378640776e-05,
"loss": 0.0571,
"step": 4510
},
{
"epoch": 43.88,
"learning_rate": 1.6489320388349515e-05,
"loss": 0.0509,
"step": 4520
},
{
"epoch": 43.98,
"learning_rate": 1.6481553398058254e-05,
"loss": 0.0254,
"step": 4530
},
{
"epoch": 44.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03951861709356308,
"eval_runtime": 0.9204,
"eval_samples_per_second": 158.634,
"eval_steps_per_second": 20.644,
"step": 4532
},
{
"epoch": 44.08,
"learning_rate": 1.647378640776699e-05,
"loss": 0.0554,
"step": 4540
},
{
"epoch": 44.17,
"learning_rate": 1.646601941747573e-05,
"loss": 0.1147,
"step": 4550
},
{
"epoch": 44.27,
"learning_rate": 1.645825242718447e-05,
"loss": 0.0991,
"step": 4560
},
{
"epoch": 44.37,
"learning_rate": 1.6450485436893204e-05,
"loss": 0.0387,
"step": 4570
},
{
"epoch": 44.47,
"learning_rate": 1.6442718446601943e-05,
"loss": 0.0065,
"step": 4580
},
{
"epoch": 44.56,
"learning_rate": 1.6434951456310683e-05,
"loss": 0.0062,
"step": 4590
},
{
"epoch": 44.66,
"learning_rate": 1.642718446601942e-05,
"loss": 0.0337,
"step": 4600
},
{
"epoch": 44.76,
"learning_rate": 1.6419417475728158e-05,
"loss": 0.0792,
"step": 4610
},
{
"epoch": 44.85,
"learning_rate": 1.6411650485436893e-05,
"loss": 0.1339,
"step": 4620
},
{
"epoch": 44.95,
"learning_rate": 1.6403883495145633e-05,
"loss": 0.0087,
"step": 4630
},
{
"epoch": 45.0,
"eval_accuracy": 0.958904109589041,
"eval_loss": 0.1797976791858673,
"eval_runtime": 0.9137,
"eval_samples_per_second": 159.792,
"eval_steps_per_second": 20.795,
"step": 4635
},
{
"epoch": 45.05,
"learning_rate": 1.6396116504854372e-05,
"loss": 0.0471,
"step": 4640
},
{
"epoch": 45.15,
"learning_rate": 1.6388349514563108e-05,
"loss": 0.2771,
"step": 4650
},
{
"epoch": 45.24,
"learning_rate": 1.6380582524271847e-05,
"loss": 0.0076,
"step": 4660
},
{
"epoch": 45.34,
"learning_rate": 1.6372815533980583e-05,
"loss": 0.0712,
"step": 4670
},
{
"epoch": 45.44,
"learning_rate": 1.6365048543689322e-05,
"loss": 0.0652,
"step": 4680
},
{
"epoch": 45.53,
"learning_rate": 1.6357281553398058e-05,
"loss": 0.0174,
"step": 4690
},
{
"epoch": 45.63,
"learning_rate": 1.6349514563106797e-05,
"loss": 0.0869,
"step": 4700
},
{
"epoch": 45.73,
"learning_rate": 1.6341747572815536e-05,
"loss": 0.0263,
"step": 4710
},
{
"epoch": 45.83,
"learning_rate": 1.6333980582524275e-05,
"loss": 0.1847,
"step": 4720
},
{
"epoch": 45.92,
"learning_rate": 1.632621359223301e-05,
"loss": 0.0736,
"step": 4730
},
{
"epoch": 46.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.0322740375995636,
"eval_runtime": 0.9527,
"eval_samples_per_second": 153.246,
"eval_steps_per_second": 19.943,
"step": 4738
},
{
"epoch": 46.02,
"learning_rate": 1.631844660194175e-05,
"loss": 0.0529,
"step": 4740
},
{
"epoch": 46.12,
"learning_rate": 1.6310679611650486e-05,
"loss": 0.0063,
"step": 4750
},
{
"epoch": 46.21,
"learning_rate": 1.6302912621359225e-05,
"loss": 0.0516,
"step": 4760
},
{
"epoch": 46.31,
"learning_rate": 1.6295145631067964e-05,
"loss": 0.0302,
"step": 4770
},
{
"epoch": 46.41,
"learning_rate": 1.62873786407767e-05,
"loss": 0.0122,
"step": 4780
},
{
"epoch": 46.5,
"learning_rate": 1.627961165048544e-05,
"loss": 0.1734,
"step": 4790
},
{
"epoch": 46.6,
"learning_rate": 1.6271844660194175e-05,
"loss": 0.0983,
"step": 4800
},
{
"epoch": 46.7,
"learning_rate": 1.6264077669902914e-05,
"loss": 0.1147,
"step": 4810
},
{
"epoch": 46.8,
"learning_rate": 1.625631067961165e-05,
"loss": 0.0177,
"step": 4820
},
{
"epoch": 46.89,
"learning_rate": 1.624854368932039e-05,
"loss": 0.0644,
"step": 4830
},
{
"epoch": 46.99,
"learning_rate": 1.624077669902913e-05,
"loss": 0.0427,
"step": 4840
},
{
"epoch": 47.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03598255664110184,
"eval_runtime": 0.9233,
"eval_samples_per_second": 158.128,
"eval_steps_per_second": 20.578,
"step": 4841
},
{
"epoch": 47.09,
"learning_rate": 1.6233009708737864e-05,
"loss": 0.0146,
"step": 4850
},
{
"epoch": 47.18,
"learning_rate": 1.6225242718446603e-05,
"loss": 0.0297,
"step": 4860
},
{
"epoch": 47.28,
"learning_rate": 1.621747572815534e-05,
"loss": 0.0042,
"step": 4870
},
{
"epoch": 47.38,
"learning_rate": 1.620970873786408e-05,
"loss": 0.0157,
"step": 4880
},
{
"epoch": 47.48,
"learning_rate": 1.6201941747572814e-05,
"loss": 0.0753,
"step": 4890
},
{
"epoch": 47.57,
"learning_rate": 1.6194174757281557e-05,
"loss": 0.1388,
"step": 4900
},
{
"epoch": 47.67,
"learning_rate": 1.6186407766990293e-05,
"loss": 0.0326,
"step": 4910
},
{
"epoch": 47.77,
"learning_rate": 1.6178640776699032e-05,
"loss": 0.1892,
"step": 4920
},
{
"epoch": 47.86,
"learning_rate": 1.6170873786407768e-05,
"loss": 0.0573,
"step": 4930
},
{
"epoch": 47.96,
"learning_rate": 1.6163106796116507e-05,
"loss": 0.0322,
"step": 4940
},
{
"epoch": 48.0,
"eval_accuracy": 0.9657534246575342,
"eval_loss": 0.05355680733919144,
"eval_runtime": 0.9463,
"eval_samples_per_second": 154.291,
"eval_steps_per_second": 20.079,
"step": 4944
},
{
"epoch": 48.06,
"learning_rate": 1.6155339805825243e-05,
"loss": 0.1167,
"step": 4950
},
{
"epoch": 48.16,
"learning_rate": 1.6147572815533982e-05,
"loss": 0.0333,
"step": 4960
},
{
"epoch": 48.25,
"learning_rate": 1.613980582524272e-05,
"loss": 0.2643,
"step": 4970
},
{
"epoch": 48.35,
"learning_rate": 1.6132038834951457e-05,
"loss": 0.0314,
"step": 4980
},
{
"epoch": 48.45,
"learning_rate": 1.6124271844660196e-05,
"loss": 0.0281,
"step": 4990
},
{
"epoch": 48.54,
"learning_rate": 1.6116504854368932e-05,
"loss": 0.0865,
"step": 5000
},
{
"epoch": 48.64,
"learning_rate": 1.610873786407767e-05,
"loss": 0.1104,
"step": 5010
},
{
"epoch": 48.74,
"learning_rate": 1.610097087378641e-05,
"loss": 0.0059,
"step": 5020
},
{
"epoch": 48.83,
"learning_rate": 1.6093203883495146e-05,
"loss": 0.0419,
"step": 5030
},
{
"epoch": 48.93,
"learning_rate": 1.6085436893203885e-05,
"loss": 0.0499,
"step": 5040
},
{
"epoch": 49.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.035887837409973145,
"eval_runtime": 0.9776,
"eval_samples_per_second": 149.343,
"eval_steps_per_second": 19.435,
"step": 5047
},
{
"epoch": 49.03,
"learning_rate": 1.6077669902912624e-05,
"loss": 0.062,
"step": 5050
},
{
"epoch": 49.13,
"learning_rate": 1.606990291262136e-05,
"loss": 0.0114,
"step": 5060
},
{
"epoch": 49.22,
"learning_rate": 1.60621359223301e-05,
"loss": 0.1428,
"step": 5070
},
{
"epoch": 49.32,
"learning_rate": 1.6054368932038835e-05,
"loss": 0.0259,
"step": 5080
},
{
"epoch": 49.42,
"learning_rate": 1.6046601941747574e-05,
"loss": 0.0047,
"step": 5090
},
{
"epoch": 49.51,
"learning_rate": 1.6038834951456313e-05,
"loss": 0.0825,
"step": 5100
},
{
"epoch": 49.61,
"learning_rate": 1.603106796116505e-05,
"loss": 0.1709,
"step": 5110
},
{
"epoch": 49.71,
"learning_rate": 1.602330097087379e-05,
"loss": 0.0343,
"step": 5120
},
{
"epoch": 49.81,
"learning_rate": 1.6015533980582524e-05,
"loss": 0.0675,
"step": 5130
},
{
"epoch": 49.9,
"learning_rate": 1.6007766990291263e-05,
"loss": 0.0356,
"step": 5140
},
{
"epoch": 50.0,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0234,
"step": 5150
},
{
"epoch": 50.0,
"eval_accuracy": 1.0,
"eval_loss": 0.01597742550075054,
"eval_runtime": 0.9767,
"eval_samples_per_second": 149.48,
"eval_steps_per_second": 19.453,
"step": 5150
},
{
"epoch": 50.1,
"learning_rate": 1.599223300970874e-05,
"loss": 0.1435,
"step": 5160
},
{
"epoch": 50.19,
"learning_rate": 1.5984466019417478e-05,
"loss": 0.1082,
"step": 5170
},
{
"epoch": 50.29,
"learning_rate": 1.5976699029126213e-05,
"loss": 0.0888,
"step": 5180
},
{
"epoch": 50.39,
"learning_rate": 1.5968932038834953e-05,
"loss": 0.0825,
"step": 5190
},
{
"epoch": 50.49,
"learning_rate": 1.596116504854369e-05,
"loss": 0.1063,
"step": 5200
},
{
"epoch": 50.58,
"learning_rate": 1.595339805825243e-05,
"loss": 0.0379,
"step": 5210
},
{
"epoch": 50.68,
"learning_rate": 1.5945631067961167e-05,
"loss": 0.0053,
"step": 5220
},
{
"epoch": 50.78,
"learning_rate": 1.5937864077669906e-05,
"loss": 0.0691,
"step": 5230
},
{
"epoch": 50.87,
"learning_rate": 1.5930097087378642e-05,
"loss": 0.0763,
"step": 5240
},
{
"epoch": 50.97,
"learning_rate": 1.592233009708738e-05,
"loss": 0.0896,
"step": 5250
},
{
"epoch": 51.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.054553717374801636,
"eval_runtime": 0.9405,
"eval_samples_per_second": 155.229,
"eval_steps_per_second": 20.201,
"step": 5253
},
{
"epoch": 51.07,
"learning_rate": 1.5914563106796117e-05,
"loss": 0.0095,
"step": 5260
},
{
"epoch": 51.17,
"learning_rate": 1.5906796116504856e-05,
"loss": 0.0832,
"step": 5270
},
{
"epoch": 51.26,
"learning_rate": 1.5899029126213595e-05,
"loss": 0.0357,
"step": 5280
},
{
"epoch": 51.36,
"learning_rate": 1.589126213592233e-05,
"loss": 0.0705,
"step": 5290
},
{
"epoch": 51.46,
"learning_rate": 1.588349514563107e-05,
"loss": 0.0749,
"step": 5300
},
{
"epoch": 51.55,
"learning_rate": 1.5875728155339806e-05,
"loss": 0.0785,
"step": 5310
},
{
"epoch": 51.65,
"learning_rate": 1.5867961165048545e-05,
"loss": 0.0107,
"step": 5320
},
{
"epoch": 51.75,
"learning_rate": 1.586019417475728e-05,
"loss": 0.0292,
"step": 5330
},
{
"epoch": 51.84,
"learning_rate": 1.585242718446602e-05,
"loss": 0.0349,
"step": 5340
},
{
"epoch": 51.94,
"learning_rate": 1.584466019417476e-05,
"loss": 0.1089,
"step": 5350
},
{
"epoch": 52.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.06735321879386902,
"eval_runtime": 0.9363,
"eval_samples_per_second": 155.931,
"eval_steps_per_second": 20.292,
"step": 5356
},
{
"epoch": 52.04,
"learning_rate": 1.5836893203883495e-05,
"loss": 0.0729,
"step": 5360
},
{
"epoch": 52.14,
"learning_rate": 1.5829126213592234e-05,
"loss": 0.226,
"step": 5370
},
{
"epoch": 52.23,
"learning_rate": 1.582135922330097e-05,
"loss": 0.1495,
"step": 5380
},
{
"epoch": 52.33,
"learning_rate": 1.581359223300971e-05,
"loss": 0.1037,
"step": 5390
},
{
"epoch": 52.43,
"learning_rate": 1.580582524271845e-05,
"loss": 0.0258,
"step": 5400
},
{
"epoch": 52.52,
"learning_rate": 1.5798058252427188e-05,
"loss": 0.0822,
"step": 5410
},
{
"epoch": 52.62,
"learning_rate": 1.5790291262135923e-05,
"loss": 0.0039,
"step": 5420
},
{
"epoch": 52.72,
"learning_rate": 1.5782524271844663e-05,
"loss": 0.1656,
"step": 5430
},
{
"epoch": 52.82,
"learning_rate": 1.57747572815534e-05,
"loss": 0.0786,
"step": 5440
},
{
"epoch": 52.91,
"learning_rate": 1.5766990291262138e-05,
"loss": 0.1473,
"step": 5450
},
{
"epoch": 53.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03693979233503342,
"eval_runtime": 0.9352,
"eval_samples_per_second": 156.119,
"eval_steps_per_second": 20.317,
"step": 5459
},
{
"epoch": 53.01,
"learning_rate": 1.5759223300970877e-05,
"loss": 0.1319,
"step": 5460
},
{
"epoch": 53.11,
"learning_rate": 1.5751456310679613e-05,
"loss": 0.0548,
"step": 5470
},
{
"epoch": 53.2,
"learning_rate": 1.5743689320388352e-05,
"loss": 0.1589,
"step": 5480
},
{
"epoch": 53.3,
"learning_rate": 1.5735922330097088e-05,
"loss": 0.0182,
"step": 5490
},
{
"epoch": 53.4,
"learning_rate": 1.5728155339805827e-05,
"loss": 0.0687,
"step": 5500
},
{
"epoch": 53.5,
"learning_rate": 1.5720388349514563e-05,
"loss": 0.0477,
"step": 5510
},
{
"epoch": 53.59,
"learning_rate": 1.5712621359223302e-05,
"loss": 0.0341,
"step": 5520
},
{
"epoch": 53.69,
"learning_rate": 1.570485436893204e-05,
"loss": 0.0338,
"step": 5530
},
{
"epoch": 53.79,
"learning_rate": 1.5697087378640777e-05,
"loss": 0.0927,
"step": 5540
},
{
"epoch": 53.88,
"learning_rate": 1.5689320388349516e-05,
"loss": 0.119,
"step": 5550
},
{
"epoch": 53.98,
"learning_rate": 1.5681553398058255e-05,
"loss": 0.0346,
"step": 5560
},
{
"epoch": 54.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.03953592851758003,
"eval_runtime": 0.9234,
"eval_samples_per_second": 158.116,
"eval_steps_per_second": 20.577,
"step": 5562
},
{
"epoch": 54.08,
"learning_rate": 1.567378640776699e-05,
"loss": 0.0527,
"step": 5570
},
{
"epoch": 54.17,
"learning_rate": 1.566601941747573e-05,
"loss": 0.17,
"step": 5580
},
{
"epoch": 54.27,
"learning_rate": 1.565825242718447e-05,
"loss": 0.0102,
"step": 5590
},
{
"epoch": 54.37,
"learning_rate": 1.5650485436893205e-05,
"loss": 0.0073,
"step": 5600
},
{
"epoch": 54.47,
"learning_rate": 1.5642718446601944e-05,
"loss": 0.0288,
"step": 5610
},
{
"epoch": 54.56,
"learning_rate": 1.563495145631068e-05,
"loss": 0.0455,
"step": 5620
},
{
"epoch": 54.66,
"learning_rate": 1.562718446601942e-05,
"loss": 0.0343,
"step": 5630
},
{
"epoch": 54.76,
"learning_rate": 1.5619417475728155e-05,
"loss": 0.0179,
"step": 5640
},
{
"epoch": 54.85,
"learning_rate": 1.5611650485436894e-05,
"loss": 0.0116,
"step": 5650
},
{
"epoch": 54.95,
"learning_rate": 1.5603883495145634e-05,
"loss": 0.1742,
"step": 5660
},
{
"epoch": 55.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03901122882962227,
"eval_runtime": 0.9237,
"eval_samples_per_second": 158.058,
"eval_steps_per_second": 20.569,
"step": 5665
},
{
"epoch": 55.05,
"learning_rate": 1.559611650485437e-05,
"loss": 0.0043,
"step": 5670
},
{
"epoch": 55.15,
"learning_rate": 1.558834951456311e-05,
"loss": 0.0422,
"step": 5680
},
{
"epoch": 55.24,
"learning_rate": 1.5580582524271844e-05,
"loss": 0.0032,
"step": 5690
},
{
"epoch": 55.34,
"learning_rate": 1.5572815533980583e-05,
"loss": 0.0043,
"step": 5700
},
{
"epoch": 55.44,
"learning_rate": 1.556504854368932e-05,
"loss": 0.099,
"step": 5710
},
{
"epoch": 55.53,
"learning_rate": 1.5557281553398062e-05,
"loss": 0.0195,
"step": 5720
},
{
"epoch": 55.63,
"learning_rate": 1.5549514563106798e-05,
"loss": 0.1299,
"step": 5730
},
{
"epoch": 55.73,
"learning_rate": 1.5541747572815537e-05,
"loss": 0.0038,
"step": 5740
},
{
"epoch": 55.83,
"learning_rate": 1.5533980582524273e-05,
"loss": 0.0479,
"step": 5750
},
{
"epoch": 55.92,
"learning_rate": 1.5526213592233012e-05,
"loss": 0.0788,
"step": 5760
},
{
"epoch": 56.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.055711355060338974,
"eval_runtime": 0.9412,
"eval_samples_per_second": 155.121,
"eval_steps_per_second": 20.187,
"step": 5768
},
{
"epoch": 56.02,
"learning_rate": 1.5518446601941748e-05,
"loss": 0.1459,
"step": 5770
},
{
"epoch": 56.12,
"learning_rate": 1.5510679611650487e-05,
"loss": 0.067,
"step": 5780
},
{
"epoch": 56.21,
"learning_rate": 1.5502912621359226e-05,
"loss": 0.056,
"step": 5790
},
{
"epoch": 56.31,
"learning_rate": 1.5495145631067962e-05,
"loss": 0.0233,
"step": 5800
},
{
"epoch": 56.41,
"learning_rate": 1.54873786407767e-05,
"loss": 0.1362,
"step": 5810
},
{
"epoch": 56.5,
"learning_rate": 1.5479611650485437e-05,
"loss": 0.092,
"step": 5820
},
{
"epoch": 56.6,
"learning_rate": 1.5471844660194176e-05,
"loss": 0.0779,
"step": 5830
},
{
"epoch": 56.7,
"learning_rate": 1.5464077669902915e-05,
"loss": 0.0401,
"step": 5840
},
{
"epoch": 56.8,
"learning_rate": 1.545631067961165e-05,
"loss": 0.0743,
"step": 5850
},
{
"epoch": 56.89,
"learning_rate": 1.544854368932039e-05,
"loss": 0.0751,
"step": 5860
},
{
"epoch": 56.99,
"learning_rate": 1.5440776699029126e-05,
"loss": 0.12,
"step": 5870
},
{
"epoch": 57.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.04083726927638054,
"eval_runtime": 0.9215,
"eval_samples_per_second": 158.439,
"eval_steps_per_second": 20.619,
"step": 5871
},
{
"epoch": 57.09,
"learning_rate": 1.5433009708737865e-05,
"loss": 0.0554,
"step": 5880
},
{
"epoch": 57.18,
"learning_rate": 1.54252427184466e-05,
"loss": 0.0277,
"step": 5890
},
{
"epoch": 57.28,
"learning_rate": 1.541747572815534e-05,
"loss": 0.0297,
"step": 5900
},
{
"epoch": 57.38,
"learning_rate": 1.540970873786408e-05,
"loss": 0.0304,
"step": 5910
},
{
"epoch": 57.48,
"learning_rate": 1.540194174757282e-05,
"loss": 0.1162,
"step": 5920
},
{
"epoch": 57.57,
"learning_rate": 1.5394174757281554e-05,
"loss": 0.0031,
"step": 5930
},
{
"epoch": 57.67,
"learning_rate": 1.5386407766990294e-05,
"loss": 0.0263,
"step": 5940
},
{
"epoch": 57.77,
"learning_rate": 1.537864077669903e-05,
"loss": 0.0255,
"step": 5950
},
{
"epoch": 57.86,
"learning_rate": 1.537087378640777e-05,
"loss": 0.1066,
"step": 5960
},
{
"epoch": 57.96,
"learning_rate": 1.5363106796116508e-05,
"loss": 0.0952,
"step": 5970
},
{
"epoch": 58.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.04665667563676834,
"eval_runtime": 0.9277,
"eval_samples_per_second": 157.377,
"eval_steps_per_second": 20.481,
"step": 5974
},
{
"epoch": 58.06,
"learning_rate": 1.5355339805825243e-05,
"loss": 0.0915,
"step": 5980
},
{
"epoch": 58.16,
"learning_rate": 1.5347572815533983e-05,
"loss": 0.0818,
"step": 5990
},
{
"epoch": 58.25,
"learning_rate": 1.533980582524272e-05,
"loss": 0.1942,
"step": 6000
},
{
"epoch": 58.35,
"learning_rate": 1.5332038834951458e-05,
"loss": 0.0257,
"step": 6010
},
{
"epoch": 58.45,
"learning_rate": 1.5324271844660193e-05,
"loss": 0.2663,
"step": 6020
},
{
"epoch": 58.54,
"learning_rate": 1.5316504854368933e-05,
"loss": 0.1124,
"step": 6030
},
{
"epoch": 58.64,
"learning_rate": 1.5308737864077672e-05,
"loss": 0.1751,
"step": 6040
},
{
"epoch": 58.74,
"learning_rate": 1.5300970873786408e-05,
"loss": 0.06,
"step": 6050
},
{
"epoch": 58.83,
"learning_rate": 1.5293203883495147e-05,
"loss": 0.069,
"step": 6060
},
{
"epoch": 58.93,
"learning_rate": 1.5285436893203886e-05,
"loss": 0.0119,
"step": 6070
},
{
"epoch": 59.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.09355130046606064,
"eval_runtime": 0.9076,
"eval_samples_per_second": 160.86,
"eval_steps_per_second": 20.934,
"step": 6077
},
{
"epoch": 59.03,
"learning_rate": 1.5277669902912622e-05,
"loss": 0.0037,
"step": 6080
},
{
"epoch": 59.13,
"learning_rate": 1.526990291262136e-05,
"loss": 0.0265,
"step": 6090
},
{
"epoch": 59.22,
"learning_rate": 1.52621359223301e-05,
"loss": 0.0376,
"step": 6100
},
{
"epoch": 59.32,
"learning_rate": 1.5254368932038836e-05,
"loss": 0.0088,
"step": 6110
},
{
"epoch": 59.42,
"learning_rate": 1.5246601941747573e-05,
"loss": 0.0406,
"step": 6120
},
{
"epoch": 59.51,
"learning_rate": 1.5238834951456311e-05,
"loss": 0.2535,
"step": 6130
},
{
"epoch": 59.61,
"learning_rate": 1.5231067961165048e-05,
"loss": 0.0168,
"step": 6140
},
{
"epoch": 59.71,
"learning_rate": 1.5223300970873786e-05,
"loss": 0.0041,
"step": 6150
},
{
"epoch": 59.81,
"learning_rate": 1.5215533980582527e-05,
"loss": 0.0073,
"step": 6160
},
{
"epoch": 59.9,
"learning_rate": 1.5207766990291264e-05,
"loss": 0.0921,
"step": 6170
},
{
"epoch": 60.0,
"learning_rate": 1.5200000000000002e-05,
"loss": 0.0381,
"step": 6180
},
{
"epoch": 60.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.058126527816057205,
"eval_runtime": 0.9237,
"eval_samples_per_second": 158.052,
"eval_steps_per_second": 20.568,
"step": 6180
},
{
"epoch": 60.1,
"learning_rate": 1.519223300970874e-05,
"loss": 0.0028,
"step": 6190
},
{
"epoch": 60.19,
"learning_rate": 1.5184466019417477e-05,
"loss": 0.0349,
"step": 6200
},
{
"epoch": 60.29,
"learning_rate": 1.5176699029126214e-05,
"loss": 0.0037,
"step": 6210
},
{
"epoch": 60.39,
"learning_rate": 1.5168932038834954e-05,
"loss": 0.0028,
"step": 6220
},
{
"epoch": 60.49,
"learning_rate": 1.5161165048543691e-05,
"loss": 0.0764,
"step": 6230
},
{
"epoch": 60.58,
"learning_rate": 1.5153398058252429e-05,
"loss": 0.1693,
"step": 6240
},
{
"epoch": 60.68,
"learning_rate": 1.5145631067961166e-05,
"loss": 0.0965,
"step": 6250
},
{
"epoch": 60.78,
"learning_rate": 1.5137864077669904e-05,
"loss": 0.0026,
"step": 6260
},
{
"epoch": 60.87,
"learning_rate": 1.5130097087378641e-05,
"loss": 0.0157,
"step": 6270
},
{
"epoch": 60.97,
"learning_rate": 1.512233009708738e-05,
"loss": 0.0147,
"step": 6280
},
{
"epoch": 61.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.08475180715322495,
"eval_runtime": 0.9312,
"eval_samples_per_second": 156.787,
"eval_steps_per_second": 20.404,
"step": 6283
},
{
"epoch": 61.07,
"learning_rate": 1.5114563106796118e-05,
"loss": 0.0033,
"step": 6290
},
{
"epoch": 61.17,
"learning_rate": 1.5106796116504855e-05,
"loss": 0.0149,
"step": 6300
},
{
"epoch": 61.26,
"learning_rate": 1.5099029126213593e-05,
"loss": 0.0366,
"step": 6310
},
{
"epoch": 61.36,
"learning_rate": 1.5091262135922332e-05,
"loss": 0.0854,
"step": 6320
},
{
"epoch": 61.46,
"learning_rate": 1.508349514563107e-05,
"loss": 0.0727,
"step": 6330
},
{
"epoch": 61.55,
"learning_rate": 1.5075728155339807e-05,
"loss": 0.0502,
"step": 6340
},
{
"epoch": 61.65,
"learning_rate": 1.5067961165048546e-05,
"loss": 0.0878,
"step": 6350
},
{
"epoch": 61.75,
"learning_rate": 1.5060194174757284e-05,
"loss": 0.0988,
"step": 6360
},
{
"epoch": 61.84,
"learning_rate": 1.5052427184466021e-05,
"loss": 0.0054,
"step": 6370
},
{
"epoch": 61.94,
"learning_rate": 1.5044660194174759e-05,
"loss": 0.028,
"step": 6380
},
{
"epoch": 62.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.0554812066257,
"eval_runtime": 0.9252,
"eval_samples_per_second": 157.804,
"eval_steps_per_second": 20.536,
"step": 6386
},
{
"epoch": 62.04,
"learning_rate": 1.5036893203883496e-05,
"loss": 0.0038,
"step": 6390
},
{
"epoch": 62.14,
"learning_rate": 1.5029126213592234e-05,
"loss": 0.1136,
"step": 6400
},
{
"epoch": 62.23,
"learning_rate": 1.5021359223300973e-05,
"loss": 0.0329,
"step": 6410
},
{
"epoch": 62.33,
"learning_rate": 1.501359223300971e-05,
"loss": 0.2072,
"step": 6420
},
{
"epoch": 62.43,
"learning_rate": 1.5005825242718448e-05,
"loss": 0.028,
"step": 6430
},
{
"epoch": 62.52,
"learning_rate": 1.4998058252427185e-05,
"loss": 0.2947,
"step": 6440
},
{
"epoch": 62.62,
"learning_rate": 1.4990291262135923e-05,
"loss": 0.0525,
"step": 6450
},
{
"epoch": 62.72,
"learning_rate": 1.498252427184466e-05,
"loss": 0.0433,
"step": 6460
},
{
"epoch": 62.82,
"learning_rate": 1.4974757281553401e-05,
"loss": 0.0054,
"step": 6470
},
{
"epoch": 62.91,
"learning_rate": 1.4966990291262139e-05,
"loss": 0.0108,
"step": 6480
},
{
"epoch": 63.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.02103378064930439,
"eval_runtime": 0.9293,
"eval_samples_per_second": 157.104,
"eval_steps_per_second": 20.445,
"step": 6489
},
{
"epoch": 63.01,
"learning_rate": 1.4959223300970876e-05,
"loss": 0.0574,
"step": 6490
},
{
"epoch": 63.11,
"learning_rate": 1.4951456310679614e-05,
"loss": 0.1288,
"step": 6500
},
{
"epoch": 63.2,
"learning_rate": 1.4943689320388351e-05,
"loss": 0.0043,
"step": 6510
},
{
"epoch": 63.3,
"learning_rate": 1.4935922330097089e-05,
"loss": 0.0602,
"step": 6520
},
{
"epoch": 63.4,
"learning_rate": 1.4928155339805826e-05,
"loss": 0.087,
"step": 6530
},
{
"epoch": 63.5,
"learning_rate": 1.4920388349514565e-05,
"loss": 0.0581,
"step": 6540
},
{
"epoch": 63.59,
"learning_rate": 1.4912621359223303e-05,
"loss": 0.0797,
"step": 6550
},
{
"epoch": 63.69,
"learning_rate": 1.490485436893204e-05,
"loss": 0.1079,
"step": 6560
},
{
"epoch": 63.79,
"learning_rate": 1.4897087378640778e-05,
"loss": 0.0026,
"step": 6570
},
{
"epoch": 63.88,
"learning_rate": 1.4889320388349515e-05,
"loss": 0.004,
"step": 6580
},
{
"epoch": 63.98,
"learning_rate": 1.4881553398058253e-05,
"loss": 0.0845,
"step": 6590
},
{
"epoch": 64.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.01815168187022209,
"eval_runtime": 0.957,
"eval_samples_per_second": 152.56,
"eval_steps_per_second": 19.854,
"step": 6592
},
{
"epoch": 64.08,
"learning_rate": 1.4873786407766992e-05,
"loss": 0.0624,
"step": 6600
},
{
"epoch": 64.17,
"learning_rate": 1.486601941747573e-05,
"loss": 0.0056,
"step": 6610
},
{
"epoch": 64.27,
"learning_rate": 1.4858252427184467e-05,
"loss": 0.0028,
"step": 6620
},
{
"epoch": 64.37,
"learning_rate": 1.4850485436893204e-05,
"loss": 0.0151,
"step": 6630
},
{
"epoch": 64.47,
"learning_rate": 1.4842718446601942e-05,
"loss": 0.0057,
"step": 6640
},
{
"epoch": 64.56,
"learning_rate": 1.483495145631068e-05,
"loss": 0.0037,
"step": 6650
},
{
"epoch": 64.66,
"learning_rate": 1.482718446601942e-05,
"loss": 0.0025,
"step": 6660
},
{
"epoch": 64.76,
"learning_rate": 1.4819417475728158e-05,
"loss": 0.0439,
"step": 6670
},
{
"epoch": 64.85,
"learning_rate": 1.4811650485436895e-05,
"loss": 0.0049,
"step": 6680
},
{
"epoch": 64.95,
"learning_rate": 1.4803883495145633e-05,
"loss": 0.0027,
"step": 6690
},
{
"epoch": 65.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.021542595699429512,
"eval_runtime": 0.9233,
"eval_samples_per_second": 158.125,
"eval_steps_per_second": 20.578,
"step": 6695
},
{
"epoch": 65.05,
"learning_rate": 1.479611650485437e-05,
"loss": 0.0547,
"step": 6700
},
{
"epoch": 65.15,
"learning_rate": 1.4788349514563108e-05,
"loss": 0.0319,
"step": 6710
},
{
"epoch": 65.24,
"learning_rate": 1.4780582524271845e-05,
"loss": 0.0903,
"step": 6720
},
{
"epoch": 65.34,
"learning_rate": 1.4772815533980584e-05,
"loss": 0.1014,
"step": 6730
},
{
"epoch": 65.44,
"learning_rate": 1.4765048543689322e-05,
"loss": 0.0021,
"step": 6740
},
{
"epoch": 65.53,
"learning_rate": 1.475728155339806e-05,
"loss": 0.0146,
"step": 6750
},
{
"epoch": 65.63,
"learning_rate": 1.4749514563106797e-05,
"loss": 0.0188,
"step": 6760
},
{
"epoch": 65.73,
"learning_rate": 1.4741747572815534e-05,
"loss": 0.1414,
"step": 6770
},
{
"epoch": 65.83,
"learning_rate": 1.4733980582524272e-05,
"loss": 0.0898,
"step": 6780
},
{
"epoch": 65.92,
"learning_rate": 1.4726213592233011e-05,
"loss": 0.0852,
"step": 6790
},
{
"epoch": 66.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.03675241023302078,
"eval_runtime": 0.9838,
"eval_samples_per_second": 148.411,
"eval_steps_per_second": 19.314,
"step": 6798
},
{
"epoch": 66.02,
"learning_rate": 1.4718446601941749e-05,
"loss": 0.0604,
"step": 6800
},
{
"epoch": 66.12,
"learning_rate": 1.4710679611650486e-05,
"loss": 0.0943,
"step": 6810
},
{
"epoch": 66.21,
"learning_rate": 1.4702912621359225e-05,
"loss": 0.1165,
"step": 6820
},
{
"epoch": 66.31,
"learning_rate": 1.4695145631067963e-05,
"loss": 0.0357,
"step": 6830
},
{
"epoch": 66.41,
"learning_rate": 1.46873786407767e-05,
"loss": 0.0481,
"step": 6840
},
{
"epoch": 66.5,
"learning_rate": 1.467961165048544e-05,
"loss": 0.0041,
"step": 6850
},
{
"epoch": 66.6,
"learning_rate": 1.4671844660194177e-05,
"loss": 0.019,
"step": 6860
},
{
"epoch": 66.7,
"learning_rate": 1.4664077669902914e-05,
"loss": 0.0022,
"step": 6870
},
{
"epoch": 66.8,
"learning_rate": 1.4656310679611652e-05,
"loss": 0.0822,
"step": 6880
},
{
"epoch": 66.89,
"learning_rate": 1.464854368932039e-05,
"loss": 0.0585,
"step": 6890
},
{
"epoch": 66.99,
"learning_rate": 1.4640776699029127e-05,
"loss": 0.0022,
"step": 6900
},
{
"epoch": 67.0,
"eval_accuracy": 0.958904109589041,
"eval_loss": 0.16548927128314972,
"eval_runtime": 0.927,
"eval_samples_per_second": 157.494,
"eval_steps_per_second": 20.496,
"step": 6901
},
{
"epoch": 67.09,
"learning_rate": 1.4633009708737864e-05,
"loss": 0.0252,
"step": 6910
},
{
"epoch": 67.18,
"learning_rate": 1.4625242718446604e-05,
"loss": 0.1552,
"step": 6920
},
{
"epoch": 67.28,
"learning_rate": 1.4617475728155341e-05,
"loss": 0.0023,
"step": 6930
},
{
"epoch": 67.38,
"learning_rate": 1.4609708737864079e-05,
"loss": 0.0022,
"step": 6940
},
{
"epoch": 67.48,
"learning_rate": 1.4601941747572816e-05,
"loss": 0.0952,
"step": 6950
},
{
"epoch": 67.57,
"learning_rate": 1.4594174757281554e-05,
"loss": 0.0205,
"step": 6960
},
{
"epoch": 67.67,
"learning_rate": 1.4586407766990291e-05,
"loss": 0.0487,
"step": 6970
},
{
"epoch": 67.77,
"learning_rate": 1.4578640776699032e-05,
"loss": 0.002,
"step": 6980
},
{
"epoch": 67.86,
"learning_rate": 1.457087378640777e-05,
"loss": 0.0832,
"step": 6990
},
{
"epoch": 67.96,
"learning_rate": 1.4563106796116507e-05,
"loss": 0.0757,
"step": 7000
},
{
"epoch": 68.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.034192681312561035,
"eval_runtime": 0.8958,
"eval_samples_per_second": 162.982,
"eval_steps_per_second": 21.21,
"step": 7004
},
{
"epoch": 68.06,
"learning_rate": 1.4555339805825244e-05,
"loss": 0.0993,
"step": 7010
},
{
"epoch": 68.16,
"learning_rate": 1.4547572815533982e-05,
"loss": 0.0503,
"step": 7020
},
{
"epoch": 68.25,
"learning_rate": 1.453980582524272e-05,
"loss": 0.0605,
"step": 7030
},
{
"epoch": 68.35,
"learning_rate": 1.4532038834951459e-05,
"loss": 0.1539,
"step": 7040
},
{
"epoch": 68.45,
"learning_rate": 1.4524271844660196e-05,
"loss": 0.0486,
"step": 7050
},
{
"epoch": 68.54,
"learning_rate": 1.4516504854368934e-05,
"loss": 0.0159,
"step": 7060
},
{
"epoch": 68.64,
"learning_rate": 1.4508737864077671e-05,
"loss": 0.0608,
"step": 7070
},
{
"epoch": 68.74,
"learning_rate": 1.4500970873786409e-05,
"loss": 0.1876,
"step": 7080
},
{
"epoch": 68.83,
"learning_rate": 1.4493203883495146e-05,
"loss": 0.0214,
"step": 7090
},
{
"epoch": 68.93,
"learning_rate": 1.4485436893203884e-05,
"loss": 0.0823,
"step": 7100
},
{
"epoch": 69.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.02801397815346718,
"eval_runtime": 0.9051,
"eval_samples_per_second": 161.3,
"eval_steps_per_second": 20.991,
"step": 7107
},
{
"epoch": 69.03,
"learning_rate": 1.4477669902912623e-05,
"loss": 0.0045,
"step": 7110
},
{
"epoch": 69.13,
"learning_rate": 1.446990291262136e-05,
"loss": 0.0027,
"step": 7120
},
{
"epoch": 69.22,
"learning_rate": 1.4462135922330098e-05,
"loss": 0.08,
"step": 7130
},
{
"epoch": 69.32,
"learning_rate": 1.4454368932038835e-05,
"loss": 0.1202,
"step": 7140
},
{
"epoch": 69.42,
"learning_rate": 1.4446601941747573e-05,
"loss": 0.0255,
"step": 7150
},
{
"epoch": 69.51,
"learning_rate": 1.443883495145631e-05,
"loss": 0.0071,
"step": 7160
},
{
"epoch": 69.61,
"learning_rate": 1.4431067961165051e-05,
"loss": 0.0873,
"step": 7170
},
{
"epoch": 69.71,
"learning_rate": 1.4423300970873789e-05,
"loss": 0.0846,
"step": 7180
},
{
"epoch": 69.81,
"learning_rate": 1.4415533980582526e-05,
"loss": 0.1357,
"step": 7190
},
{
"epoch": 69.9,
"learning_rate": 1.4407766990291264e-05,
"loss": 0.082,
"step": 7200
},
{
"epoch": 70.0,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.1071,
"step": 7210
},
{
"epoch": 70.0,
"eval_accuracy": 1.0,
"eval_loss": 0.014005626551806927,
"eval_runtime": 0.9022,
"eval_samples_per_second": 161.82,
"eval_steps_per_second": 21.059,
"step": 7210
},
{
"epoch": 70.1,
"learning_rate": 1.4392233009708739e-05,
"loss": 0.0503,
"step": 7220
},
{
"epoch": 70.19,
"learning_rate": 1.4384466019417478e-05,
"loss": 0.1342,
"step": 7230
},
{
"epoch": 70.29,
"learning_rate": 1.4376699029126215e-05,
"loss": 0.0553,
"step": 7240
},
{
"epoch": 70.39,
"learning_rate": 1.4368932038834953e-05,
"loss": 0.0535,
"step": 7250
},
{
"epoch": 70.49,
"learning_rate": 1.436116504854369e-05,
"loss": 0.0376,
"step": 7260
},
{
"epoch": 70.58,
"learning_rate": 1.4353398058252428e-05,
"loss": 0.0154,
"step": 7270
},
{
"epoch": 70.68,
"learning_rate": 1.4345631067961165e-05,
"loss": 0.0047,
"step": 7280
},
{
"epoch": 70.78,
"learning_rate": 1.4337864077669904e-05,
"loss": 0.0018,
"step": 7290
},
{
"epoch": 70.87,
"learning_rate": 1.4330097087378642e-05,
"loss": 0.0908,
"step": 7300
},
{
"epoch": 70.97,
"learning_rate": 1.432233009708738e-05,
"loss": 0.0832,
"step": 7310
},
{
"epoch": 71.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03868861868977547,
"eval_runtime": 0.9162,
"eval_samples_per_second": 159.35,
"eval_steps_per_second": 20.737,
"step": 7313
},
{
"epoch": 71.07,
"learning_rate": 1.4314563106796117e-05,
"loss": 0.054,
"step": 7320
},
{
"epoch": 71.17,
"learning_rate": 1.4306796116504856e-05,
"loss": 0.0967,
"step": 7330
},
{
"epoch": 71.26,
"learning_rate": 1.4299029126213594e-05,
"loss": 0.0186,
"step": 7340
},
{
"epoch": 71.36,
"learning_rate": 1.4291262135922331e-05,
"loss": 0.0503,
"step": 7350
},
{
"epoch": 71.46,
"learning_rate": 1.428349514563107e-05,
"loss": 0.1007,
"step": 7360
},
{
"epoch": 71.55,
"learning_rate": 1.4275728155339808e-05,
"loss": 0.0525,
"step": 7370
},
{
"epoch": 71.65,
"learning_rate": 1.4267961165048545e-05,
"loss": 0.0239,
"step": 7380
},
{
"epoch": 71.75,
"learning_rate": 1.4260194174757283e-05,
"loss": 0.0964,
"step": 7390
},
{
"epoch": 71.84,
"learning_rate": 1.425242718446602e-05,
"loss": 0.0359,
"step": 7400
},
{
"epoch": 71.94,
"learning_rate": 1.4244660194174758e-05,
"loss": 0.0417,
"step": 7410
},
{
"epoch": 72.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.0697227343916893,
"eval_runtime": 0.9023,
"eval_samples_per_second": 161.815,
"eval_steps_per_second": 21.058,
"step": 7416
},
{
"epoch": 72.04,
"learning_rate": 1.4236893203883497e-05,
"loss": 0.0933,
"step": 7420
},
{
"epoch": 72.14,
"learning_rate": 1.4229126213592234e-05,
"loss": 0.1749,
"step": 7430
},
{
"epoch": 72.23,
"learning_rate": 1.4221359223300972e-05,
"loss": 0.0145,
"step": 7440
},
{
"epoch": 72.33,
"learning_rate": 1.421359223300971e-05,
"loss": 0.0321,
"step": 7450
},
{
"epoch": 72.43,
"learning_rate": 1.4205825242718447e-05,
"loss": 0.0879,
"step": 7460
},
{
"epoch": 72.52,
"learning_rate": 1.4198058252427184e-05,
"loss": 0.0227,
"step": 7470
},
{
"epoch": 72.62,
"learning_rate": 1.4190291262135925e-05,
"loss": 0.1572,
"step": 7480
},
{
"epoch": 72.72,
"learning_rate": 1.4182524271844663e-05,
"loss": 0.0728,
"step": 7490
},
{
"epoch": 72.82,
"learning_rate": 1.41747572815534e-05,
"loss": 0.0045,
"step": 7500
},
{
"epoch": 72.91,
"learning_rate": 1.4166990291262138e-05,
"loss": 0.1208,
"step": 7510
},
{
"epoch": 73.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.07754257321357727,
"eval_runtime": 0.8991,
"eval_samples_per_second": 162.384,
"eval_steps_per_second": 21.132,
"step": 7519
},
{
"epoch": 73.01,
"learning_rate": 1.4159223300970875e-05,
"loss": 0.0482,
"step": 7520
},
{
"epoch": 73.11,
"learning_rate": 1.4151456310679613e-05,
"loss": 0.0655,
"step": 7530
},
{
"epoch": 73.2,
"learning_rate": 1.414368932038835e-05,
"loss": 0.0787,
"step": 7540
},
{
"epoch": 73.3,
"learning_rate": 1.413592233009709e-05,
"loss": 0.0483,
"step": 7550
},
{
"epoch": 73.4,
"learning_rate": 1.4128155339805827e-05,
"loss": 0.068,
"step": 7560
},
{
"epoch": 73.5,
"learning_rate": 1.4120388349514564e-05,
"loss": 0.0395,
"step": 7570
},
{
"epoch": 73.59,
"learning_rate": 1.4112621359223302e-05,
"loss": 0.0644,
"step": 7580
},
{
"epoch": 73.69,
"learning_rate": 1.410485436893204e-05,
"loss": 0.0126,
"step": 7590
},
{
"epoch": 73.79,
"learning_rate": 1.4097087378640777e-05,
"loss": 0.0531,
"step": 7600
},
{
"epoch": 73.88,
"learning_rate": 1.4089320388349516e-05,
"loss": 0.0767,
"step": 7610
},
{
"epoch": 73.98,
"learning_rate": 1.4081553398058254e-05,
"loss": 0.0083,
"step": 7620
},
{
"epoch": 74.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.052464041858911514,
"eval_runtime": 0.9146,
"eval_samples_per_second": 159.624,
"eval_steps_per_second": 20.773,
"step": 7622
},
{
"epoch": 74.08,
"learning_rate": 1.4073786407766991e-05,
"loss": 0.0115,
"step": 7630
},
{
"epoch": 74.17,
"learning_rate": 1.4066019417475729e-05,
"loss": 0.0252,
"step": 7640
},
{
"epoch": 74.27,
"learning_rate": 1.4058252427184466e-05,
"loss": 0.0033,
"step": 7650
},
{
"epoch": 74.37,
"learning_rate": 1.4050485436893204e-05,
"loss": 0.0883,
"step": 7660
},
{
"epoch": 74.47,
"learning_rate": 1.4042718446601944e-05,
"loss": 0.0103,
"step": 7670
},
{
"epoch": 74.56,
"learning_rate": 1.4034951456310682e-05,
"loss": 0.1416,
"step": 7680
},
{
"epoch": 74.66,
"learning_rate": 1.402718446601942e-05,
"loss": 0.0526,
"step": 7690
},
{
"epoch": 74.76,
"learning_rate": 1.4019417475728157e-05,
"loss": 0.0104,
"step": 7700
},
{
"epoch": 74.85,
"learning_rate": 1.4011650485436894e-05,
"loss": 0.1323,
"step": 7710
},
{
"epoch": 74.95,
"learning_rate": 1.4003883495145632e-05,
"loss": 0.0017,
"step": 7720
},
{
"epoch": 75.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.04071873798966408,
"eval_runtime": 0.9191,
"eval_samples_per_second": 158.853,
"eval_steps_per_second": 20.673,
"step": 7725
},
{
"epoch": 75.05,
"learning_rate": 1.399611650485437e-05,
"loss": 0.0016,
"step": 7730
},
{
"epoch": 75.15,
"learning_rate": 1.3988349514563109e-05,
"loss": 0.0279,
"step": 7740
},
{
"epoch": 75.24,
"learning_rate": 1.3980582524271846e-05,
"loss": 0.0353,
"step": 7750
},
{
"epoch": 75.34,
"learning_rate": 1.3972815533980584e-05,
"loss": 0.0031,
"step": 7760
},
{
"epoch": 75.44,
"learning_rate": 1.3965048543689321e-05,
"loss": 0.0166,
"step": 7770
},
{
"epoch": 75.53,
"learning_rate": 1.3957281553398059e-05,
"loss": 0.0515,
"step": 7780
},
{
"epoch": 75.63,
"learning_rate": 1.3949514563106796e-05,
"loss": 0.0705,
"step": 7790
},
{
"epoch": 75.73,
"learning_rate": 1.3941747572815535e-05,
"loss": 0.1237,
"step": 7800
},
{
"epoch": 75.83,
"learning_rate": 1.3933980582524273e-05,
"loss": 0.087,
"step": 7810
},
{
"epoch": 75.92,
"learning_rate": 1.392621359223301e-05,
"loss": 0.012,
"step": 7820
},
{
"epoch": 76.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.0362648107111454,
"eval_runtime": 0.9186,
"eval_samples_per_second": 158.943,
"eval_steps_per_second": 20.684,
"step": 7828
},
{
"epoch": 76.02,
"learning_rate": 1.391844660194175e-05,
"loss": 0.1469,
"step": 7830
},
{
"epoch": 76.12,
"learning_rate": 1.3910679611650487e-05,
"loss": 0.0019,
"step": 7840
},
{
"epoch": 76.21,
"learning_rate": 1.3902912621359224e-05,
"loss": 0.2058,
"step": 7850
},
{
"epoch": 76.31,
"learning_rate": 1.3895145631067964e-05,
"loss": 0.0152,
"step": 7860
},
{
"epoch": 76.41,
"learning_rate": 1.3887378640776701e-05,
"loss": 0.0499,
"step": 7870
},
{
"epoch": 76.5,
"learning_rate": 1.3879611650485439e-05,
"loss": 0.0561,
"step": 7880
},
{
"epoch": 76.6,
"learning_rate": 1.3871844660194176e-05,
"loss": 0.1247,
"step": 7890
},
{
"epoch": 76.7,
"learning_rate": 1.3864077669902914e-05,
"loss": 0.0401,
"step": 7900
},
{
"epoch": 76.8,
"learning_rate": 1.3856310679611651e-05,
"loss": 0.0112,
"step": 7910
},
{
"epoch": 76.89,
"learning_rate": 1.3848543689320389e-05,
"loss": 0.0092,
"step": 7920
},
{
"epoch": 76.99,
"learning_rate": 1.3840776699029128e-05,
"loss": 0.0215,
"step": 7930
},
{
"epoch": 77.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.030757028609514236,
"eval_runtime": 0.9146,
"eval_samples_per_second": 159.637,
"eval_steps_per_second": 20.775,
"step": 7931
},
{
"epoch": 77.09,
"learning_rate": 1.3833009708737865e-05,
"loss": 0.1653,
"step": 7940
},
{
"epoch": 77.18,
"learning_rate": 1.3825242718446603e-05,
"loss": 0.0577,
"step": 7950
},
{
"epoch": 77.28,
"learning_rate": 1.381747572815534e-05,
"loss": 0.0645,
"step": 7960
},
{
"epoch": 77.38,
"learning_rate": 1.3809708737864078e-05,
"loss": 0.0362,
"step": 7970
},
{
"epoch": 77.48,
"learning_rate": 1.3801941747572815e-05,
"loss": 0.0155,
"step": 7980
},
{
"epoch": 77.57,
"learning_rate": 1.3794174757281556e-05,
"loss": 0.0902,
"step": 7990
},
{
"epoch": 77.67,
"learning_rate": 1.3786407766990294e-05,
"loss": 0.059,
"step": 8000
},
{
"epoch": 77.77,
"learning_rate": 1.3778640776699031e-05,
"loss": 0.0036,
"step": 8010
},
{
"epoch": 77.86,
"learning_rate": 1.3770873786407769e-05,
"loss": 0.0471,
"step": 8020
},
{
"epoch": 77.96,
"learning_rate": 1.3763106796116506e-05,
"loss": 0.0319,
"step": 8030
},
{
"epoch": 78.0,
"eval_accuracy": 0.9726027397260274,
"eval_loss": 0.05245841667056084,
"eval_runtime": 0.9259,
"eval_samples_per_second": 157.685,
"eval_steps_per_second": 20.521,
"step": 8034
},
{
"epoch": 78.06,
"learning_rate": 1.3755339805825244e-05,
"loss": 0.0681,
"step": 8040
},
{
"epoch": 78.16,
"learning_rate": 1.3747572815533983e-05,
"loss": 0.0086,
"step": 8050
},
{
"epoch": 78.25,
"learning_rate": 1.373980582524272e-05,
"loss": 0.0723,
"step": 8060
},
{
"epoch": 78.35,
"learning_rate": 1.3732038834951458e-05,
"loss": 0.1384,
"step": 8070
},
{
"epoch": 78.45,
"learning_rate": 1.3724271844660195e-05,
"loss": 0.1407,
"step": 8080
},
{
"epoch": 78.54,
"learning_rate": 1.3716504854368933e-05,
"loss": 0.007,
"step": 8090
},
{
"epoch": 78.64,
"learning_rate": 1.370873786407767e-05,
"loss": 0.0528,
"step": 8100
},
{
"epoch": 78.74,
"learning_rate": 1.370097087378641e-05,
"loss": 0.0091,
"step": 8110
},
{
"epoch": 78.83,
"learning_rate": 1.3693203883495147e-05,
"loss": 0.033,
"step": 8120
},
{
"epoch": 78.93,
"learning_rate": 1.3685436893203884e-05,
"loss": 0.093,
"step": 8130
},
{
"epoch": 79.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.03234969452023506,
"eval_runtime": 0.8975,
"eval_samples_per_second": 162.673,
"eval_steps_per_second": 21.17,
"step": 8137
},
{
"epoch": 79.03,
"learning_rate": 1.3677669902912622e-05,
"loss": 0.0629,
"step": 8140
},
{
"epoch": 79.13,
"learning_rate": 1.366990291262136e-05,
"loss": 0.0159,
"step": 8150
},
{
"epoch": 79.22,
"learning_rate": 1.3662135922330097e-05,
"loss": 0.011,
"step": 8160
},
{
"epoch": 79.32,
"learning_rate": 1.3654368932038834e-05,
"loss": 0.1088,
"step": 8170
},
{
"epoch": 79.42,
"learning_rate": 1.3646601941747575e-05,
"loss": 0.1116,
"step": 8180
},
{
"epoch": 79.51,
"learning_rate": 1.3638834951456313e-05,
"loss": 0.0462,
"step": 8190
},
{
"epoch": 79.61,
"learning_rate": 1.363106796116505e-05,
"loss": 0.0378,
"step": 8200
},
{
"epoch": 79.71,
"learning_rate": 1.3623300970873788e-05,
"loss": 0.0016,
"step": 8210
},
{
"epoch": 79.81,
"learning_rate": 1.3615533980582525e-05,
"loss": 0.0658,
"step": 8220
},
{
"epoch": 79.9,
"learning_rate": 1.3607766990291263e-05,
"loss": 0.1224,
"step": 8230
},
{
"epoch": 80.0,
"learning_rate": 1.3600000000000002e-05,
"loss": 0.0813,
"step": 8240
},
{
"epoch": 80.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.04363853111863136,
"eval_runtime": 0.9212,
"eval_samples_per_second": 158.487,
"eval_steps_per_second": 20.625,
"step": 8240
},
{
"epoch": 80.1,
"learning_rate": 1.359223300970874e-05,
"loss": 0.1413,
"step": 8250
},
{
"epoch": 80.19,
"learning_rate": 1.3584466019417477e-05,
"loss": 0.0308,
"step": 8260
},
{
"epoch": 80.29,
"learning_rate": 1.3576699029126214e-05,
"loss": 0.0483,
"step": 8270
},
{
"epoch": 80.39,
"learning_rate": 1.3568932038834952e-05,
"loss": 0.0162,
"step": 8280
},
{
"epoch": 80.49,
"learning_rate": 1.356116504854369e-05,
"loss": 0.1146,
"step": 8290
},
{
"epoch": 80.58,
"learning_rate": 1.3553398058252429e-05,
"loss": 0.0095,
"step": 8300
},
{
"epoch": 80.68,
"learning_rate": 1.3545631067961166e-05,
"loss": 0.0334,
"step": 8310
},
{
"epoch": 80.78,
"learning_rate": 1.3537864077669904e-05,
"loss": 0.0178,
"step": 8320
},
{
"epoch": 80.87,
"learning_rate": 1.3530097087378641e-05,
"loss": 0.0963,
"step": 8330
},
{
"epoch": 80.97,
"learning_rate": 1.352233009708738e-05,
"loss": 0.0014,
"step": 8340
},
{
"epoch": 81.0,
"eval_accuracy": 1.0,
"eval_loss": 0.0034924051724374294,
"eval_runtime": 0.9228,
"eval_samples_per_second": 158.21,
"eval_steps_per_second": 20.589,
"step": 8343
},
{
"epoch": 81.07,
"learning_rate": 1.3514563106796118e-05,
"loss": 0.0019,
"step": 8350
},
{
"epoch": 81.17,
"learning_rate": 1.3506796116504855e-05,
"loss": 0.0013,
"step": 8360
},
{
"epoch": 81.26,
"learning_rate": 1.3499029126213594e-05,
"loss": 0.0016,
"step": 8370
},
{
"epoch": 81.36,
"learning_rate": 1.3491262135922332e-05,
"loss": 0.0363,
"step": 8380
},
{
"epoch": 81.46,
"learning_rate": 1.348349514563107e-05,
"loss": 0.0769,
"step": 8390
},
{
"epoch": 81.55,
"learning_rate": 1.3475728155339807e-05,
"loss": 0.0014,
"step": 8400
},
{
"epoch": 81.65,
"learning_rate": 1.3467961165048544e-05,
"loss": 0.0019,
"step": 8410
},
{
"epoch": 81.75,
"learning_rate": 1.3460194174757282e-05,
"loss": 0.0999,
"step": 8420
},
{
"epoch": 81.84,
"learning_rate": 1.3452427184466021e-05,
"loss": 0.0025,
"step": 8430
},
{
"epoch": 81.94,
"learning_rate": 1.3444660194174759e-05,
"loss": 0.0774,
"step": 8440
},
{
"epoch": 82.0,
"eval_accuracy": 0.9726027397260274,
"eval_loss": 0.10503670573234558,
"eval_runtime": 0.9067,
"eval_samples_per_second": 161.03,
"eval_steps_per_second": 20.956,
"step": 8446
},
{
"epoch": 82.04,
"learning_rate": 1.3436893203883496e-05,
"loss": 0.0666,
"step": 8450
},
{
"epoch": 82.14,
"learning_rate": 1.3429126213592234e-05,
"loss": 0.0457,
"step": 8460
},
{
"epoch": 82.23,
"learning_rate": 1.3421359223300971e-05,
"loss": 0.0802,
"step": 8470
},
{
"epoch": 82.33,
"learning_rate": 1.3413592233009709e-05,
"loss": 0.0304,
"step": 8480
},
{
"epoch": 82.43,
"learning_rate": 1.340582524271845e-05,
"loss": 0.0012,
"step": 8490
},
{
"epoch": 82.52,
"learning_rate": 1.3398058252427187e-05,
"loss": 0.0039,
"step": 8500
},
{
"epoch": 82.62,
"learning_rate": 1.3390291262135924e-05,
"loss": 0.0734,
"step": 8510
},
{
"epoch": 82.72,
"learning_rate": 1.3382524271844662e-05,
"loss": 0.0466,
"step": 8520
},
{
"epoch": 82.82,
"learning_rate": 1.33747572815534e-05,
"loss": 0.0051,
"step": 8530
},
{
"epoch": 82.91,
"learning_rate": 1.3366990291262137e-05,
"loss": 0.0393,
"step": 8540
},
{
"epoch": 83.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.08058138936758041,
"eval_runtime": 0.8979,
"eval_samples_per_second": 162.604,
"eval_steps_per_second": 21.161,
"step": 8549
},
{
"epoch": 83.01,
"learning_rate": 1.3359223300970874e-05,
"loss": 0.1685,
"step": 8550
},
{
"epoch": 83.11,
"learning_rate": 1.3351456310679614e-05,
"loss": 0.0016,
"step": 8560
},
{
"epoch": 83.2,
"learning_rate": 1.3343689320388351e-05,
"loss": 0.0839,
"step": 8570
},
{
"epoch": 83.3,
"learning_rate": 1.3335922330097089e-05,
"loss": 0.0922,
"step": 8580
},
{
"epoch": 83.4,
"learning_rate": 1.3328155339805826e-05,
"loss": 0.0146,
"step": 8590
},
{
"epoch": 83.5,
"learning_rate": 1.3320388349514564e-05,
"loss": 0.0014,
"step": 8600
},
{
"epoch": 83.59,
"learning_rate": 1.3312621359223301e-05,
"loss": 0.0269,
"step": 8610
},
{
"epoch": 83.69,
"learning_rate": 1.330485436893204e-05,
"loss": 0.0333,
"step": 8620
},
{
"epoch": 83.79,
"learning_rate": 1.3297087378640778e-05,
"loss": 0.0037,
"step": 8630
},
{
"epoch": 83.88,
"learning_rate": 1.3289320388349515e-05,
"loss": 0.0935,
"step": 8640
},
{
"epoch": 83.98,
"learning_rate": 1.3281553398058253e-05,
"loss": 0.0537,
"step": 8650
},
{
"epoch": 84.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.07363705337047577,
"eval_runtime": 0.8825,
"eval_samples_per_second": 165.435,
"eval_steps_per_second": 21.529,
"step": 8652
},
{
"epoch": 84.08,
"learning_rate": 1.327378640776699e-05,
"loss": 0.1431,
"step": 8660
},
{
"epoch": 84.17,
"learning_rate": 1.3266019417475728e-05,
"loss": 0.1869,
"step": 8670
},
{
"epoch": 84.27,
"learning_rate": 1.3258252427184469e-05,
"loss": 0.041,
"step": 8680
},
{
"epoch": 84.37,
"learning_rate": 1.3250485436893206e-05,
"loss": 0.0108,
"step": 8690
},
{
"epoch": 84.47,
"learning_rate": 1.3242718446601944e-05,
"loss": 0.0105,
"step": 8700
},
{
"epoch": 84.56,
"learning_rate": 1.3234951456310681e-05,
"loss": 0.0507,
"step": 8710
},
{
"epoch": 84.66,
"learning_rate": 1.3227184466019419e-05,
"loss": 0.0333,
"step": 8720
},
{
"epoch": 84.76,
"learning_rate": 1.3219417475728156e-05,
"loss": 0.0421,
"step": 8730
},
{
"epoch": 84.85,
"learning_rate": 1.3211650485436894e-05,
"loss": 0.0567,
"step": 8740
},
{
"epoch": 84.95,
"learning_rate": 1.3203883495145633e-05,
"loss": 0.016,
"step": 8750
},
{
"epoch": 85.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.06812305748462677,
"eval_runtime": 0.9225,
"eval_samples_per_second": 158.264,
"eval_steps_per_second": 20.596,
"step": 8755
},
{
"epoch": 85.05,
"learning_rate": 1.319611650485437e-05,
"loss": 0.0198,
"step": 8760
},
{
"epoch": 85.15,
"learning_rate": 1.3188349514563108e-05,
"loss": 0.1216,
"step": 8770
},
{
"epoch": 85.24,
"learning_rate": 1.3180582524271845e-05,
"loss": 0.0408,
"step": 8780
},
{
"epoch": 85.34,
"learning_rate": 1.3172815533980583e-05,
"loss": 0.0033,
"step": 8790
},
{
"epoch": 85.44,
"learning_rate": 1.316504854368932e-05,
"loss": 0.0379,
"step": 8800
},
{
"epoch": 85.53,
"learning_rate": 1.315728155339806e-05,
"loss": 0.0153,
"step": 8810
},
{
"epoch": 85.63,
"learning_rate": 1.3149514563106797e-05,
"loss": 0.0569,
"step": 8820
},
{
"epoch": 85.73,
"learning_rate": 1.3141747572815534e-05,
"loss": 0.0773,
"step": 8830
},
{
"epoch": 85.83,
"learning_rate": 1.3133980582524274e-05,
"loss": 0.0036,
"step": 8840
},
{
"epoch": 85.92,
"learning_rate": 1.3126213592233011e-05,
"loss": 0.0562,
"step": 8850
},
{
"epoch": 86.0,
"eval_accuracy": 0.9726027397260274,
"eval_loss": 0.13558551669120789,
"eval_runtime": 0.9401,
"eval_samples_per_second": 155.297,
"eval_steps_per_second": 20.21,
"step": 8858
},
{
"epoch": 86.02,
"learning_rate": 1.3118446601941749e-05,
"loss": 0.0014,
"step": 8860
},
{
"epoch": 86.12,
"learning_rate": 1.3110679611650488e-05,
"loss": 0.0025,
"step": 8870
},
{
"epoch": 86.21,
"learning_rate": 1.3102912621359225e-05,
"loss": 0.0016,
"step": 8880
},
{
"epoch": 86.31,
"learning_rate": 1.3095145631067963e-05,
"loss": 0.0182,
"step": 8890
},
{
"epoch": 86.41,
"learning_rate": 1.30873786407767e-05,
"loss": 0.0062,
"step": 8900
},
{
"epoch": 86.5,
"learning_rate": 1.3079611650485438e-05,
"loss": 0.0416,
"step": 8910
},
{
"epoch": 86.6,
"learning_rate": 1.3071844660194175e-05,
"loss": 0.0296,
"step": 8920
},
{
"epoch": 86.7,
"learning_rate": 1.3064077669902913e-05,
"loss": 0.1365,
"step": 8930
},
{
"epoch": 86.8,
"learning_rate": 1.3056310679611652e-05,
"loss": 0.0012,
"step": 8940
},
{
"epoch": 86.89,
"learning_rate": 1.304854368932039e-05,
"loss": 0.1132,
"step": 8950
},
{
"epoch": 86.99,
"learning_rate": 1.3040776699029127e-05,
"loss": 0.0133,
"step": 8960
},
{
"epoch": 87.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.09805306047201157,
"eval_runtime": 0.9311,
"eval_samples_per_second": 156.804,
"eval_steps_per_second": 20.406,
"step": 8961
},
{
"epoch": 87.09,
"learning_rate": 1.3033009708737864e-05,
"loss": 0.0237,
"step": 8970
},
{
"epoch": 87.18,
"learning_rate": 1.3025242718446602e-05,
"loss": 0.0895,
"step": 8980
},
{
"epoch": 87.28,
"learning_rate": 1.301747572815534e-05,
"loss": 0.043,
"step": 8990
},
{
"epoch": 87.38,
"learning_rate": 1.300970873786408e-05,
"loss": 0.0013,
"step": 9000
},
{
"epoch": 87.48,
"learning_rate": 1.3001941747572818e-05,
"loss": 0.1008,
"step": 9010
},
{
"epoch": 87.57,
"learning_rate": 1.2994174757281555e-05,
"loss": 0.1435,
"step": 9020
},
{
"epoch": 87.67,
"learning_rate": 1.2986407766990293e-05,
"loss": 0.0573,
"step": 9030
},
{
"epoch": 87.77,
"learning_rate": 1.297864077669903e-05,
"loss": 0.092,
"step": 9040
},
{
"epoch": 87.86,
"learning_rate": 1.2970873786407768e-05,
"loss": 0.1068,
"step": 9050
},
{
"epoch": 87.96,
"learning_rate": 1.2963106796116507e-05,
"loss": 0.0682,
"step": 9060
},
{
"epoch": 88.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.07209344208240509,
"eval_runtime": 0.9453,
"eval_samples_per_second": 154.455,
"eval_steps_per_second": 20.1,
"step": 9064
},
{
"epoch": 88.06,
"learning_rate": 1.2955339805825244e-05,
"loss": 0.0018,
"step": 9070
},
{
"epoch": 88.16,
"learning_rate": 1.2947572815533982e-05,
"loss": 0.1112,
"step": 9080
},
{
"epoch": 88.25,
"learning_rate": 1.293980582524272e-05,
"loss": 0.0071,
"step": 9090
},
{
"epoch": 88.35,
"learning_rate": 1.2932038834951457e-05,
"loss": 0.0061,
"step": 9100
},
{
"epoch": 88.45,
"learning_rate": 1.2924271844660194e-05,
"loss": 0.1292,
"step": 9110
},
{
"epoch": 88.54,
"learning_rate": 1.2916504854368934e-05,
"loss": 0.0175,
"step": 9120
},
{
"epoch": 88.64,
"learning_rate": 1.2908737864077671e-05,
"loss": 0.04,
"step": 9130
},
{
"epoch": 88.74,
"learning_rate": 1.2900970873786409e-05,
"loss": 0.0015,
"step": 9140
},
{
"epoch": 88.83,
"learning_rate": 1.2893203883495146e-05,
"loss": 0.0422,
"step": 9150
},
{
"epoch": 88.93,
"learning_rate": 1.2885436893203884e-05,
"loss": 0.0514,
"step": 9160
},
{
"epoch": 89.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.060059912502765656,
"eval_runtime": 0.9117,
"eval_samples_per_second": 160.14,
"eval_steps_per_second": 20.84,
"step": 9167
},
{
"epoch": 89.03,
"learning_rate": 1.2877669902912621e-05,
"loss": 0.0129,
"step": 9170
},
{
"epoch": 89.13,
"learning_rate": 1.2869902912621359e-05,
"loss": 0.0148,
"step": 9180
},
{
"epoch": 89.22,
"learning_rate": 1.28621359223301e-05,
"loss": 0.0098,
"step": 9190
},
{
"epoch": 89.32,
"learning_rate": 1.2854368932038837e-05,
"loss": 0.0552,
"step": 9200
},
{
"epoch": 89.42,
"learning_rate": 1.2846601941747574e-05,
"loss": 0.0969,
"step": 9210
},
{
"epoch": 89.51,
"learning_rate": 1.2838834951456312e-05,
"loss": 0.0018,
"step": 9220
},
{
"epoch": 89.61,
"learning_rate": 1.283106796116505e-05,
"loss": 0.0014,
"step": 9230
},
{
"epoch": 89.71,
"learning_rate": 1.2823300970873787e-05,
"loss": 0.0012,
"step": 9240
},
{
"epoch": 89.81,
"learning_rate": 1.2815533980582526e-05,
"loss": 0.0018,
"step": 9250
},
{
"epoch": 89.9,
"learning_rate": 1.2807766990291264e-05,
"loss": 0.0017,
"step": 9260
},
{
"epoch": 90.0,
"learning_rate": 1.2800000000000001e-05,
"loss": 0.0043,
"step": 9270
},
{
"epoch": 90.0,
"eval_accuracy": 0.9657534246575342,
"eval_loss": 0.16303785145282745,
"eval_runtime": 0.8933,
"eval_samples_per_second": 163.434,
"eval_steps_per_second": 21.269,
"step": 9270
},
{
"epoch": 90.1,
"learning_rate": 1.2792233009708739e-05,
"loss": 0.1021,
"step": 9280
},
{
"epoch": 90.19,
"learning_rate": 1.2784466019417476e-05,
"loss": 0.0296,
"step": 9290
},
{
"epoch": 90.29,
"learning_rate": 1.2776699029126214e-05,
"loss": 0.0811,
"step": 9300
},
{
"epoch": 90.39,
"learning_rate": 1.2768932038834953e-05,
"loss": 0.0438,
"step": 9310
},
{
"epoch": 90.49,
"learning_rate": 1.276116504854369e-05,
"loss": 0.0245,
"step": 9320
},
{
"epoch": 90.58,
"learning_rate": 1.2753398058252428e-05,
"loss": 0.0102,
"step": 9330
},
{
"epoch": 90.68,
"learning_rate": 1.2745631067961165e-05,
"loss": 0.0845,
"step": 9340
},
{
"epoch": 90.78,
"learning_rate": 1.2737864077669904e-05,
"loss": 0.0013,
"step": 9350
},
{
"epoch": 90.87,
"learning_rate": 1.2730097087378642e-05,
"loss": 0.0722,
"step": 9360
},
{
"epoch": 90.97,
"learning_rate": 1.272233009708738e-05,
"loss": 0.0011,
"step": 9370
},
{
"epoch": 91.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.029909975826740265,
"eval_runtime": 0.8937,
"eval_samples_per_second": 163.37,
"eval_steps_per_second": 21.26,
"step": 9373
},
{
"epoch": 91.07,
"learning_rate": 1.2714563106796119e-05,
"loss": 0.0259,
"step": 9380
},
{
"epoch": 91.17,
"learning_rate": 1.2706796116504856e-05,
"loss": 0.0221,
"step": 9390
},
{
"epoch": 91.26,
"learning_rate": 1.2699029126213594e-05,
"loss": 0.1292,
"step": 9400
},
{
"epoch": 91.36,
"learning_rate": 1.2691262135922331e-05,
"loss": 0.0398,
"step": 9410
},
{
"epoch": 91.46,
"learning_rate": 1.2683495145631069e-05,
"loss": 0.0971,
"step": 9420
},
{
"epoch": 91.55,
"learning_rate": 1.2675728155339806e-05,
"loss": 0.0973,
"step": 9430
},
{
"epoch": 91.65,
"learning_rate": 1.2667961165048545e-05,
"loss": 0.0636,
"step": 9440
},
{
"epoch": 91.75,
"learning_rate": 1.2660194174757283e-05,
"loss": 0.0216,
"step": 9450
},
{
"epoch": 91.84,
"learning_rate": 1.265242718446602e-05,
"loss": 0.1983,
"step": 9460
},
{
"epoch": 91.94,
"learning_rate": 1.2644660194174758e-05,
"loss": 0.0707,
"step": 9470
},
{
"epoch": 92.0,
"eval_accuracy": 0.9657534246575342,
"eval_loss": 0.100075863301754,
"eval_runtime": 0.8859,
"eval_samples_per_second": 164.8,
"eval_steps_per_second": 21.447,
"step": 9476
},
{
"epoch": 92.04,
"learning_rate": 1.2636893203883495e-05,
"loss": 0.0351,
"step": 9480
},
{
"epoch": 92.14,
"learning_rate": 1.2629126213592233e-05,
"loss": 0.0242,
"step": 9490
},
{
"epoch": 92.23,
"learning_rate": 1.2621359223300974e-05,
"loss": 0.0617,
"step": 9500
},
{
"epoch": 92.33,
"learning_rate": 1.2613592233009711e-05,
"loss": 0.1866,
"step": 9510
},
{
"epoch": 92.43,
"learning_rate": 1.2605825242718449e-05,
"loss": 0.0045,
"step": 9520
},
{
"epoch": 92.52,
"learning_rate": 1.2598058252427186e-05,
"loss": 0.1162,
"step": 9530
},
{
"epoch": 92.62,
"learning_rate": 1.2590291262135924e-05,
"loss": 0.034,
"step": 9540
},
{
"epoch": 92.72,
"learning_rate": 1.2582524271844661e-05,
"loss": 0.1544,
"step": 9550
},
{
"epoch": 92.82,
"learning_rate": 1.2574757281553399e-05,
"loss": 0.0101,
"step": 9560
},
{
"epoch": 92.91,
"learning_rate": 1.2566990291262138e-05,
"loss": 0.0026,
"step": 9570
},
{
"epoch": 93.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.014391203410923481,
"eval_runtime": 0.8967,
"eval_samples_per_second": 162.823,
"eval_steps_per_second": 21.189,
"step": 9579
},
{
"epoch": 93.01,
"learning_rate": 1.2559223300970875e-05,
"loss": 0.0848,
"step": 9580
},
{
"epoch": 93.11,
"learning_rate": 1.2551456310679613e-05,
"loss": 0.0198,
"step": 9590
},
{
"epoch": 93.2,
"learning_rate": 1.254368932038835e-05,
"loss": 0.033,
"step": 9600
},
{
"epoch": 93.3,
"learning_rate": 1.2535922330097088e-05,
"loss": 0.0032,
"step": 9610
},
{
"epoch": 93.4,
"learning_rate": 1.2528155339805825e-05,
"loss": 0.1336,
"step": 9620
},
{
"epoch": 93.5,
"learning_rate": 1.2520388349514564e-05,
"loss": 0.0281,
"step": 9630
},
{
"epoch": 93.59,
"learning_rate": 1.2512621359223302e-05,
"loss": 0.0436,
"step": 9640
},
{
"epoch": 93.69,
"learning_rate": 1.250485436893204e-05,
"loss": 0.193,
"step": 9650
},
{
"epoch": 93.79,
"learning_rate": 1.2497087378640777e-05,
"loss": 0.0018,
"step": 9660
},
{
"epoch": 93.88,
"learning_rate": 1.2489320388349514e-05,
"loss": 0.0509,
"step": 9670
},
{
"epoch": 93.98,
"learning_rate": 1.2481553398058252e-05,
"loss": 0.1578,
"step": 9680
},
{
"epoch": 94.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.012599923647940159,
"eval_runtime": 0.8826,
"eval_samples_per_second": 165.42,
"eval_steps_per_second": 21.527,
"step": 9682
},
{
"epoch": 94.08,
"learning_rate": 1.2473786407766993e-05,
"loss": 0.0837,
"step": 9690
},
{
"epoch": 94.17,
"learning_rate": 1.246601941747573e-05,
"loss": 0.0786,
"step": 9700
},
{
"epoch": 94.27,
"learning_rate": 1.2458252427184468e-05,
"loss": 0.0012,
"step": 9710
},
{
"epoch": 94.37,
"learning_rate": 1.2450485436893205e-05,
"loss": 0.0656,
"step": 9720
},
{
"epoch": 94.47,
"learning_rate": 1.2442718446601943e-05,
"loss": 0.022,
"step": 9730
},
{
"epoch": 94.56,
"learning_rate": 1.243495145631068e-05,
"loss": 0.1272,
"step": 9740
},
{
"epoch": 94.66,
"learning_rate": 1.2427184466019418e-05,
"loss": 0.0013,
"step": 9750
},
{
"epoch": 94.76,
"learning_rate": 1.2419417475728157e-05,
"loss": 0.0519,
"step": 9760
},
{
"epoch": 94.85,
"learning_rate": 1.2411650485436894e-05,
"loss": 0.084,
"step": 9770
},
{
"epoch": 94.95,
"learning_rate": 1.2403883495145632e-05,
"loss": 0.0431,
"step": 9780
},
{
"epoch": 95.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.04091314598917961,
"eval_runtime": 0.8882,
"eval_samples_per_second": 164.379,
"eval_steps_per_second": 21.392,
"step": 9785
},
{
"epoch": 95.05,
"learning_rate": 1.239611650485437e-05,
"loss": 0.1725,
"step": 9790
},
{
"epoch": 95.15,
"learning_rate": 1.2388349514563107e-05,
"loss": 0.0395,
"step": 9800
},
{
"epoch": 95.24,
"learning_rate": 1.2380582524271844e-05,
"loss": 0.1174,
"step": 9810
},
{
"epoch": 95.34,
"learning_rate": 1.2372815533980584e-05,
"loss": 0.0012,
"step": 9820
},
{
"epoch": 95.44,
"learning_rate": 1.2365048543689321e-05,
"loss": 0.0623,
"step": 9830
},
{
"epoch": 95.53,
"learning_rate": 1.2357281553398059e-05,
"loss": 0.0653,
"step": 9840
},
{
"epoch": 95.63,
"learning_rate": 1.2349514563106798e-05,
"loss": 0.0259,
"step": 9850
},
{
"epoch": 95.73,
"learning_rate": 1.2341747572815535e-05,
"loss": 0.0147,
"step": 9860
},
{
"epoch": 95.83,
"learning_rate": 1.2333980582524273e-05,
"loss": 0.1148,
"step": 9870
},
{
"epoch": 95.92,
"learning_rate": 1.2326213592233012e-05,
"loss": 0.1357,
"step": 9880
},
{
"epoch": 96.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.05305100977420807,
"eval_runtime": 0.8941,
"eval_samples_per_second": 163.297,
"eval_steps_per_second": 21.251,
"step": 9888
},
{
"epoch": 96.02,
"learning_rate": 1.231844660194175e-05,
"loss": 0.0041,
"step": 9890
},
{
"epoch": 96.12,
"learning_rate": 1.2310679611650487e-05,
"loss": 0.0897,
"step": 9900
},
{
"epoch": 96.21,
"learning_rate": 1.2302912621359224e-05,
"loss": 0.0901,
"step": 9910
},
{
"epoch": 96.31,
"learning_rate": 1.2295145631067962e-05,
"loss": 0.0013,
"step": 9920
},
{
"epoch": 96.41,
"learning_rate": 1.22873786407767e-05,
"loss": 0.1067,
"step": 9930
},
{
"epoch": 96.5,
"learning_rate": 1.2279611650485437e-05,
"loss": 0.0072,
"step": 9940
},
{
"epoch": 96.6,
"learning_rate": 1.2271844660194176e-05,
"loss": 0.0098,
"step": 9950
},
{
"epoch": 96.7,
"learning_rate": 1.2264077669902914e-05,
"loss": 0.0127,
"step": 9960
},
{
"epoch": 96.8,
"learning_rate": 1.2256310679611651e-05,
"loss": 0.0276,
"step": 9970
},
{
"epoch": 96.89,
"learning_rate": 1.2248543689320389e-05,
"loss": 0.093,
"step": 9980
},
{
"epoch": 96.99,
"learning_rate": 1.2240776699029126e-05,
"loss": 0.0476,
"step": 9990
},
{
"epoch": 97.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.008646626956760883,
"eval_runtime": 0.9314,
"eval_samples_per_second": 156.762,
"eval_steps_per_second": 20.4,
"step": 9991
},
{
"epoch": 97.09,
"learning_rate": 1.2233009708737864e-05,
"loss": 0.0041,
"step": 10000
},
{
"epoch": 97.18,
"learning_rate": 1.2225242718446604e-05,
"loss": 0.0017,
"step": 10010
},
{
"epoch": 97.28,
"learning_rate": 1.2217475728155342e-05,
"loss": 0.0313,
"step": 10020
},
{
"epoch": 97.38,
"learning_rate": 1.220970873786408e-05,
"loss": 0.0098,
"step": 10030
},
{
"epoch": 97.48,
"learning_rate": 1.2201941747572817e-05,
"loss": 0.0856,
"step": 10040
},
{
"epoch": 97.57,
"learning_rate": 1.2194174757281554e-05,
"loss": 0.1682,
"step": 10050
},
{
"epoch": 97.67,
"learning_rate": 1.2186407766990292e-05,
"loss": 0.1116,
"step": 10060
},
{
"epoch": 97.77,
"learning_rate": 1.2178640776699031e-05,
"loss": 0.0018,
"step": 10070
},
{
"epoch": 97.86,
"learning_rate": 1.2170873786407769e-05,
"loss": 0.002,
"step": 10080
},
{
"epoch": 97.96,
"learning_rate": 1.2163106796116506e-05,
"loss": 0.0315,
"step": 10090
},
{
"epoch": 98.0,
"eval_accuracy": 1.0,
"eval_loss": 0.0015310003655031323,
"eval_runtime": 0.9234,
"eval_samples_per_second": 158.106,
"eval_steps_per_second": 20.575,
"step": 10094
},
{
"epoch": 98.06,
"learning_rate": 1.2155339805825244e-05,
"loss": 0.0884,
"step": 10100
},
{
"epoch": 98.16,
"learning_rate": 1.2147572815533981e-05,
"loss": 0.0014,
"step": 10110
},
{
"epoch": 98.25,
"learning_rate": 1.2139805825242719e-05,
"loss": 0.0161,
"step": 10120
},
{
"epoch": 98.35,
"learning_rate": 1.2132038834951458e-05,
"loss": 0.0423,
"step": 10130
},
{
"epoch": 98.45,
"learning_rate": 1.2124271844660195e-05,
"loss": 0.0393,
"step": 10140
},
{
"epoch": 98.54,
"learning_rate": 1.2116504854368933e-05,
"loss": 0.2191,
"step": 10150
},
{
"epoch": 98.64,
"learning_rate": 1.210873786407767e-05,
"loss": 0.0192,
"step": 10160
},
{
"epoch": 98.74,
"learning_rate": 1.2100970873786408e-05,
"loss": 0.0264,
"step": 10170
},
{
"epoch": 98.83,
"learning_rate": 1.2093203883495145e-05,
"loss": 0.1027,
"step": 10180
},
{
"epoch": 98.93,
"learning_rate": 1.2085436893203883e-05,
"loss": 0.0171,
"step": 10190
},
{
"epoch": 99.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.0362049825489521,
"eval_runtime": 0.9372,
"eval_samples_per_second": 155.788,
"eval_steps_per_second": 20.274,
"step": 10197
},
{
"epoch": 99.03,
"learning_rate": 1.2077669902912624e-05,
"loss": 0.1313,
"step": 10200
},
{
"epoch": 99.13,
"learning_rate": 1.2069902912621361e-05,
"loss": 0.0012,
"step": 10210
},
{
"epoch": 99.22,
"learning_rate": 1.2062135922330099e-05,
"loss": 0.0546,
"step": 10220
},
{
"epoch": 99.32,
"learning_rate": 1.2054368932038836e-05,
"loss": 0.0046,
"step": 10230
},
{
"epoch": 99.42,
"learning_rate": 1.2046601941747574e-05,
"loss": 0.0537,
"step": 10240
},
{
"epoch": 99.51,
"learning_rate": 1.2038834951456311e-05,
"loss": 0.0711,
"step": 10250
},
{
"epoch": 99.61,
"learning_rate": 1.203106796116505e-05,
"loss": 0.012,
"step": 10260
},
{
"epoch": 99.71,
"learning_rate": 1.2023300970873788e-05,
"loss": 0.0375,
"step": 10270
},
{
"epoch": 99.81,
"learning_rate": 1.2015533980582525e-05,
"loss": 0.0139,
"step": 10280
},
{
"epoch": 99.9,
"learning_rate": 1.2007766990291263e-05,
"loss": 0.0962,
"step": 10290
},
{
"epoch": 100.0,
"learning_rate": 1.2e-05,
"loss": 0.0014,
"step": 10300
},
{
"epoch": 100.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.02322803996503353,
"eval_runtime": 0.9475,
"eval_samples_per_second": 154.09,
"eval_steps_per_second": 20.053,
"step": 10300
},
{
"epoch": 100.1,
"learning_rate": 1.1992233009708738e-05,
"loss": 0.0051,
"step": 10310
},
{
"epoch": 100.19,
"learning_rate": 1.1984466019417477e-05,
"loss": 0.042,
"step": 10320
},
{
"epoch": 100.29,
"learning_rate": 1.1976699029126214e-05,
"loss": 0.001,
"step": 10330
},
{
"epoch": 100.39,
"learning_rate": 1.1968932038834952e-05,
"loss": 0.0835,
"step": 10340
},
{
"epoch": 100.49,
"learning_rate": 1.196116504854369e-05,
"loss": 0.0298,
"step": 10350
},
{
"epoch": 100.58,
"learning_rate": 1.1953398058252429e-05,
"loss": 0.001,
"step": 10360
},
{
"epoch": 100.68,
"learning_rate": 1.1945631067961166e-05,
"loss": 0.2664,
"step": 10370
},
{
"epoch": 100.78,
"learning_rate": 1.1937864077669904e-05,
"loss": 0.0031,
"step": 10380
},
{
"epoch": 100.87,
"learning_rate": 1.1930097087378643e-05,
"loss": 0.0008,
"step": 10390
},
{
"epoch": 100.97,
"learning_rate": 1.192233009708738e-05,
"loss": 0.1161,
"step": 10400
},
{
"epoch": 101.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.043042704463005066,
"eval_runtime": 0.9222,
"eval_samples_per_second": 158.317,
"eval_steps_per_second": 20.603,
"step": 10403
},
{
"epoch": 101.07,
"learning_rate": 1.1914563106796118e-05,
"loss": 0.0009,
"step": 10410
},
{
"epoch": 101.17,
"learning_rate": 1.1906796116504855e-05,
"loss": 0.0432,
"step": 10420
},
{
"epoch": 101.26,
"learning_rate": 1.1899029126213593e-05,
"loss": 0.0441,
"step": 10430
},
{
"epoch": 101.36,
"learning_rate": 1.189126213592233e-05,
"loss": 0.0617,
"step": 10440
},
{
"epoch": 101.46,
"learning_rate": 1.188349514563107e-05,
"loss": 0.0993,
"step": 10450
},
{
"epoch": 101.55,
"learning_rate": 1.1875728155339807e-05,
"loss": 0.1322,
"step": 10460
},
{
"epoch": 101.65,
"learning_rate": 1.1867961165048544e-05,
"loss": 0.0435,
"step": 10470
},
{
"epoch": 101.75,
"learning_rate": 1.1860194174757282e-05,
"loss": 0.013,
"step": 10480
},
{
"epoch": 101.84,
"learning_rate": 1.185242718446602e-05,
"loss": 0.0214,
"step": 10490
},
{
"epoch": 101.94,
"learning_rate": 1.1844660194174757e-05,
"loss": 0.0839,
"step": 10500
},
{
"epoch": 102.0,
"eval_accuracy": 0.9794520547945206,
"eval_loss": 0.10051363706588745,
"eval_runtime": 0.9025,
"eval_samples_per_second": 161.775,
"eval_steps_per_second": 21.053,
"step": 10506
},
{
"epoch": 102.04,
"learning_rate": 1.1836893203883498e-05,
"loss": 0.0018,
"step": 10510
},
{
"epoch": 102.14,
"learning_rate": 1.1829126213592235e-05,
"loss": 0.0037,
"step": 10520
},
{
"epoch": 102.23,
"learning_rate": 1.1821359223300973e-05,
"loss": 0.0258,
"step": 10530
},
{
"epoch": 102.33,
"learning_rate": 1.181359223300971e-05,
"loss": 0.1846,
"step": 10540
},
{
"epoch": 102.43,
"learning_rate": 1.1805825242718448e-05,
"loss": 0.0964,
"step": 10550
},
{
"epoch": 102.52,
"learning_rate": 1.1798058252427185e-05,
"loss": 0.0063,
"step": 10560
},
{
"epoch": 102.62,
"learning_rate": 1.1790291262135923e-05,
"loss": 0.0415,
"step": 10570
},
{
"epoch": 102.72,
"learning_rate": 1.1782524271844662e-05,
"loss": 0.0045,
"step": 10580
},
{
"epoch": 102.82,
"learning_rate": 1.17747572815534e-05,
"loss": 0.075,
"step": 10590
},
{
"epoch": 102.91,
"learning_rate": 1.1766990291262137e-05,
"loss": 0.0428,
"step": 10600
},
{
"epoch": 103.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.08688335865736008,
"eval_runtime": 0.8958,
"eval_samples_per_second": 162.976,
"eval_steps_per_second": 21.209,
"step": 10609
},
{
"epoch": 103.01,
"learning_rate": 1.1759223300970874e-05,
"loss": 0.0012,
"step": 10610
},
{
"epoch": 103.11,
"learning_rate": 1.1751456310679612e-05,
"loss": 0.0385,
"step": 10620
},
{
"epoch": 103.2,
"learning_rate": 1.174368932038835e-05,
"loss": 0.0017,
"step": 10630
},
{
"epoch": 103.3,
"learning_rate": 1.1735922330097089e-05,
"loss": 0.001,
"step": 10640
},
{
"epoch": 103.4,
"learning_rate": 1.1728155339805826e-05,
"loss": 0.0486,
"step": 10650
},
{
"epoch": 103.5,
"learning_rate": 1.1720388349514564e-05,
"loss": 0.001,
"step": 10660
},
{
"epoch": 103.59,
"learning_rate": 1.1712621359223301e-05,
"loss": 0.0012,
"step": 10670
},
{
"epoch": 103.69,
"learning_rate": 1.1704854368932039e-05,
"loss": 0.0026,
"step": 10680
},
{
"epoch": 103.79,
"learning_rate": 1.1697087378640776e-05,
"loss": 0.1129,
"step": 10690
},
{
"epoch": 103.88,
"learning_rate": 1.1689320388349517e-05,
"loss": 0.0281,
"step": 10700
},
{
"epoch": 103.98,
"learning_rate": 1.1681553398058255e-05,
"loss": 0.0058,
"step": 10710
},
{
"epoch": 104.0,
"eval_accuracy": 0.9863013698630136,
"eval_loss": 0.02182828262448311,
"eval_runtime": 0.9139,
"eval_samples_per_second": 159.746,
"eval_steps_per_second": 20.789,
"step": 10712
},
{
"epoch": 104.08,
"learning_rate": 1.1673786407766992e-05,
"loss": 0.0081,
"step": 10720
},
{
"epoch": 104.17,
"learning_rate": 1.166601941747573e-05,
"loss": 0.0008,
"step": 10730
},
{
"epoch": 104.27,
"learning_rate": 1.1658252427184467e-05,
"loss": 0.0018,
"step": 10740
},
{
"epoch": 104.37,
"learning_rate": 1.1650485436893204e-05,
"loss": 0.003,
"step": 10750
},
{
"epoch": 104.47,
"learning_rate": 1.1642718446601942e-05,
"loss": 0.0578,
"step": 10760
},
{
"epoch": 104.56,
"learning_rate": 1.1634951456310681e-05,
"loss": 0.0679,
"step": 10770
},
{
"epoch": 104.66,
"learning_rate": 1.1627184466019419e-05,
"loss": 0.0371,
"step": 10780
},
{
"epoch": 104.76,
"learning_rate": 1.1619417475728156e-05,
"loss": 0.0815,
"step": 10790
},
{
"epoch": 104.85,
"learning_rate": 1.1611650485436894e-05,
"loss": 0.0277,
"step": 10800
},
{
"epoch": 104.95,
"learning_rate": 1.1603883495145631e-05,
"loss": 0.0657,
"step": 10810
},
{
"epoch": 105.0,
"eval_accuracy": 0.9931506849315068,
"eval_loss": 0.012804172933101654,
"eval_runtime": 0.8936,
"eval_samples_per_second": 163.381,
"eval_steps_per_second": 21.262,
"step": 10815
},
{
"epoch": 105.05,
"learning_rate": 1.1596116504854369e-05,
"loss": 0.0131,
"step": 10820
},
{
"epoch": 105.15,
"learning_rate": 1.1588349514563108e-05,
"loss": 0.0791,
"step": 10830
},
{
"epoch": 105.24,
"learning_rate": 1.1580582524271845e-05,
"loss": 0.1427,
"step": 10840
},
{
"epoch": 105.34,
"learning_rate": 1.1572815533980583e-05,
"loss": 0.0344,
"step": 10850
},
{
"epoch": 105.44,
"learning_rate": 1.1565048543689322e-05,
"loss": 0.0012,
"step": 10860
},
{
"epoch": 105.53,
"learning_rate": 1.155728155339806e-05,
"loss": 0.1199,
"step": 10870
},
{
"epoch": 105.63,
"learning_rate": 1.1549514563106797e-05,
"loss": 0.0055,
"step": 10880
},
{
"epoch": 105.73,
"learning_rate": 1.1541747572815536e-05,
"loss": 0.1326,
"step": 10890
},
{
"epoch": 105.83,
"learning_rate": 1.1533980582524274e-05,
"loss": 0.0034,
"step": 10900
},
{
"epoch": 105.92,
"learning_rate": 1.1526213592233011e-05,
"loss": 0.0032,
"step": 10910
},
{
"epoch": 106.0,
"eval_accuracy": 1.0,
"eval_loss": 0.0011723055504262447,
"eval_runtime": 0.8979,
"eval_samples_per_second": 162.605,
"eval_steps_per_second": 21.161,
"step": 10918
}
],
"logging_steps": 10,
"max_steps": 25750,
"num_input_tokens_seen": 0,
"num_train_epochs": 250,
"save_steps": 500,
"total_flos": 6.768824322311848e+18,
"trial_name": null,
"trial_params": null
}