{ "best_metric": 0.0011723055504262447, "best_model_checkpoint": "./ap_train_outputs/checkpoint-10918", "epoch": 106.0, "eval_steps": 500, "global_step": 10918, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.999223300970874e-05, "loss": 2.0179, "step": 10 }, { "epoch": 0.19, "learning_rate": 1.9984466019417477e-05, "loss": 1.9319, "step": 20 }, { "epoch": 0.29, "learning_rate": 1.9976699029126216e-05, "loss": 1.7818, "step": 30 }, { "epoch": 0.39, "learning_rate": 1.9968932038834955e-05, "loss": 1.7205, "step": 40 }, { "epoch": 0.49, "learning_rate": 1.996116504854369e-05, "loss": 1.5862, "step": 50 }, { "epoch": 0.58, "learning_rate": 1.995339805825243e-05, "loss": 1.5051, "step": 60 }, { "epoch": 0.68, "learning_rate": 1.9945631067961166e-05, "loss": 1.3624, "step": 70 }, { "epoch": 0.78, "learning_rate": 1.9937864077669905e-05, "loss": 1.3245, "step": 80 }, { "epoch": 0.87, "learning_rate": 1.993009708737864e-05, "loss": 1.2896, "step": 90 }, { "epoch": 0.97, "learning_rate": 1.992233009708738e-05, "loss": 1.1133, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.8561643835616438, "eval_loss": 1.1050430536270142, "eval_runtime": 1.0213, "eval_samples_per_second": 142.949, "eval_steps_per_second": 18.603, "step": 103 }, { "epoch": 1.07, "learning_rate": 1.991456310679612e-05, "loss": 1.1258, "step": 110 }, { "epoch": 1.17, "learning_rate": 1.9906796116504855e-05, "loss": 1.0101, "step": 120 }, { "epoch": 1.26, "learning_rate": 1.9899029126213594e-05, "loss": 0.8295, "step": 130 }, { "epoch": 1.36, "learning_rate": 1.989126213592233e-05, "loss": 0.8112, "step": 140 }, { "epoch": 1.46, "learning_rate": 1.988349514563107e-05, "loss": 0.7511, "step": 150 }, { "epoch": 1.55, "learning_rate": 1.987572815533981e-05, "loss": 0.7514, "step": 160 }, { "epoch": 1.65, "learning_rate": 1.9867961165048548e-05, "loss": 0.7797, "step": 170 }, { "epoch": 1.75, "learning_rate": 1.9860194174757283e-05, "loss": 0.6666, "step": 180 }, { "epoch": 1.84, "learning_rate": 1.9852427184466022e-05, "loss": 0.7185, "step": 190 }, { "epoch": 1.94, "learning_rate": 1.9844660194174758e-05, "loss": 0.6564, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.9452054794520548, "eval_loss": 0.6278233528137207, "eval_runtime": 1.018, "eval_samples_per_second": 143.418, "eval_steps_per_second": 18.664, "step": 206 }, { "epoch": 2.04, "learning_rate": 1.9836893203883497e-05, "loss": 0.4882, "step": 210 }, { "epoch": 2.14, "learning_rate": 1.9829126213592233e-05, "loss": 0.5629, "step": 220 }, { "epoch": 2.23, "learning_rate": 1.9821359223300972e-05, "loss": 0.5384, "step": 230 }, { "epoch": 2.33, "learning_rate": 1.981359223300971e-05, "loss": 0.445, "step": 240 }, { "epoch": 2.43, "learning_rate": 1.9805825242718447e-05, "loss": 0.5185, "step": 250 }, { "epoch": 2.52, "learning_rate": 1.9798058252427187e-05, "loss": 0.4958, "step": 260 }, { "epoch": 2.62, "learning_rate": 1.9790291262135922e-05, "loss": 0.4132, "step": 270 }, { "epoch": 2.72, "learning_rate": 1.978252427184466e-05, "loss": 0.4397, "step": 280 }, { "epoch": 2.82, "learning_rate": 1.97747572815534e-05, "loss": 0.4415, "step": 290 }, { "epoch": 2.91, "learning_rate": 1.9766990291262137e-05, "loss": 0.4004, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.9657534246575342, "eval_loss": 0.3876227140426636, "eval_runtime": 0.9913, "eval_samples_per_second": 147.283, "eval_steps_per_second": 19.167, "step": 309 }, { "epoch": 3.01, "learning_rate": 1.9759223300970876e-05, "loss": 0.3367, "step": 310 }, { "epoch": 3.11, "learning_rate": 1.975145631067961e-05, "loss": 0.3328, "step": 320 }, { "epoch": 3.2, "learning_rate": 1.974368932038835e-05, "loss": 0.3767, "step": 330 }, { "epoch": 3.3, "learning_rate": 1.9735922330097087e-05, "loss": 0.3029, "step": 340 }, { "epoch": 3.4, "learning_rate": 1.972815533980583e-05, "loss": 0.3183, "step": 350 }, { "epoch": 3.5, "learning_rate": 1.9720388349514565e-05, "loss": 0.239, "step": 360 }, { "epoch": 3.59, "learning_rate": 1.9712621359223304e-05, "loss": 0.3627, "step": 370 }, { "epoch": 3.69, "learning_rate": 1.970485436893204e-05, "loss": 0.3516, "step": 380 }, { "epoch": 3.79, "learning_rate": 1.969708737864078e-05, "loss": 0.2904, "step": 390 }, { "epoch": 3.88, "learning_rate": 1.9689320388349515e-05, "loss": 0.2362, "step": 400 }, { "epoch": 3.98, "learning_rate": 1.9681553398058254e-05, "loss": 0.2624, "step": 410 }, { "epoch": 4.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.24633407592773438, "eval_runtime": 0.9705, "eval_samples_per_second": 150.434, "eval_steps_per_second": 19.577, "step": 412 }, { "epoch": 4.08, "learning_rate": 1.9673786407766993e-05, "loss": 0.2748, "step": 420 }, { "epoch": 4.17, "learning_rate": 1.966601941747573e-05, "loss": 0.2072, "step": 430 }, { "epoch": 4.27, "learning_rate": 1.965825242718447e-05, "loss": 0.209, "step": 440 }, { "epoch": 4.37, "learning_rate": 1.9650485436893204e-05, "loss": 0.2567, "step": 450 }, { "epoch": 4.47, "learning_rate": 1.9642718446601943e-05, "loss": 0.194, "step": 460 }, { "epoch": 4.56, "learning_rate": 1.963495145631068e-05, "loss": 0.2188, "step": 470 }, { "epoch": 4.66, "learning_rate": 1.962718446601942e-05, "loss": 0.2031, "step": 480 }, { "epoch": 4.76, "learning_rate": 1.9619417475728157e-05, "loss": 0.2943, "step": 490 }, { "epoch": 4.85, "learning_rate": 1.9611650485436893e-05, "loss": 0.1883, "step": 500 }, { "epoch": 4.95, "learning_rate": 1.9603883495145632e-05, "loss": 0.2074, "step": 510 }, { "epoch": 5.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.19891677796840668, "eval_runtime": 0.9232, "eval_samples_per_second": 158.14, "eval_steps_per_second": 20.58, "step": 515 }, { "epoch": 5.05, "learning_rate": 1.959611650485437e-05, "loss": 0.2058, "step": 520 }, { "epoch": 5.15, "learning_rate": 1.9588349514563107e-05, "loss": 0.1468, "step": 530 }, { "epoch": 5.24, "learning_rate": 1.9580582524271847e-05, "loss": 0.2338, "step": 540 }, { "epoch": 5.34, "learning_rate": 1.9572815533980586e-05, "loss": 0.2376, "step": 550 }, { "epoch": 5.44, "learning_rate": 1.956504854368932e-05, "loss": 0.2948, "step": 560 }, { "epoch": 5.53, "learning_rate": 1.955728155339806e-05, "loss": 0.191, "step": 570 }, { "epoch": 5.63, "learning_rate": 1.9549514563106797e-05, "loss": 0.1313, "step": 580 }, { "epoch": 5.73, "learning_rate": 1.9541747572815536e-05, "loss": 0.1462, "step": 590 }, { "epoch": 5.83, "learning_rate": 1.9533980582524275e-05, "loss": 0.239, "step": 600 }, { "epoch": 5.92, "learning_rate": 1.952621359223301e-05, "loss": 0.141, "step": 610 }, { "epoch": 6.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.17079336941242218, "eval_runtime": 0.9678, "eval_samples_per_second": 150.859, "eval_steps_per_second": 19.632, "step": 618 }, { "epoch": 6.02, "learning_rate": 1.951844660194175e-05, "loss": 0.1518, "step": 620 }, { "epoch": 6.12, "learning_rate": 1.9510679611650486e-05, "loss": 0.1385, "step": 630 }, { "epoch": 6.21, "learning_rate": 1.9502912621359225e-05, "loss": 0.1632, "step": 640 }, { "epoch": 6.31, "learning_rate": 1.949514563106796e-05, "loss": 0.216, "step": 650 }, { "epoch": 6.41, "learning_rate": 1.94873786407767e-05, "loss": 0.3242, "step": 660 }, { "epoch": 6.5, "learning_rate": 1.947961165048544e-05, "loss": 0.1218, "step": 670 }, { "epoch": 6.6, "learning_rate": 1.947184466019418e-05, "loss": 0.1637, "step": 680 }, { "epoch": 6.7, "learning_rate": 1.9464077669902914e-05, "loss": 0.1651, "step": 690 }, { "epoch": 6.8, "learning_rate": 1.9456310679611653e-05, "loss": 0.181, "step": 700 }, { "epoch": 6.89, "learning_rate": 1.944854368932039e-05, "loss": 0.186, "step": 710 }, { "epoch": 6.99, "learning_rate": 1.944077669902913e-05, "loss": 0.1338, "step": 720 }, { "epoch": 7.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.17499177157878876, "eval_runtime": 0.9605, "eval_samples_per_second": 152.011, "eval_steps_per_second": 19.782, "step": 721 }, { "epoch": 7.09, "learning_rate": 1.9433009708737868e-05, "loss": 0.1857, "step": 730 }, { "epoch": 7.18, "learning_rate": 1.9425242718446603e-05, "loss": 0.2048, "step": 740 }, { "epoch": 7.28, "learning_rate": 1.9417475728155343e-05, "loss": 0.1958, "step": 750 }, { "epoch": 7.38, "learning_rate": 1.940970873786408e-05, "loss": 0.2379, "step": 760 }, { "epoch": 7.48, "learning_rate": 1.9401941747572818e-05, "loss": 0.1159, "step": 770 }, { "epoch": 7.57, "learning_rate": 1.9394174757281553e-05, "loss": 0.1961, "step": 780 }, { "epoch": 7.67, "learning_rate": 1.9386407766990292e-05, "loss": 0.1297, "step": 790 }, { "epoch": 7.77, "learning_rate": 1.937864077669903e-05, "loss": 0.2555, "step": 800 }, { "epoch": 7.86, "learning_rate": 1.9370873786407767e-05, "loss": 0.134, "step": 810 }, { "epoch": 7.96, "learning_rate": 1.9363106796116507e-05, "loss": 0.1343, "step": 820 }, { "epoch": 8.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.12730906903743744, "eval_runtime": 0.993, "eval_samples_per_second": 147.023, "eval_steps_per_second": 19.133, "step": 824 }, { "epoch": 8.06, "learning_rate": 1.9355339805825242e-05, "loss": 0.2206, "step": 830 }, { "epoch": 8.16, "learning_rate": 1.934757281553398e-05, "loss": 0.1735, "step": 840 }, { "epoch": 8.25, "learning_rate": 1.9339805825242717e-05, "loss": 0.137, "step": 850 }, { "epoch": 8.35, "learning_rate": 1.933203883495146e-05, "loss": 0.1378, "step": 860 }, { "epoch": 8.45, "learning_rate": 1.9324271844660196e-05, "loss": 0.1671, "step": 870 }, { "epoch": 8.54, "learning_rate": 1.9316504854368935e-05, "loss": 0.212, "step": 880 }, { "epoch": 8.64, "learning_rate": 1.930873786407767e-05, "loss": 0.1598, "step": 890 }, { "epoch": 8.74, "learning_rate": 1.930097087378641e-05, "loss": 0.1813, "step": 900 }, { "epoch": 8.83, "learning_rate": 1.9293203883495146e-05, "loss": 0.0725, "step": 910 }, { "epoch": 8.93, "learning_rate": 1.9285436893203885e-05, "loss": 0.1558, "step": 920 }, { "epoch": 9.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.11657154560089111, "eval_runtime": 0.9702, "eval_samples_per_second": 150.486, "eval_steps_per_second": 19.584, "step": 927 }, { "epoch": 9.03, "learning_rate": 1.9277669902912624e-05, "loss": 0.094, "step": 930 }, { "epoch": 9.13, "learning_rate": 1.926990291262136e-05, "loss": 0.1341, "step": 940 }, { "epoch": 9.22, "learning_rate": 1.92621359223301e-05, "loss": 0.196, "step": 950 }, { "epoch": 9.32, "learning_rate": 1.9254368932038835e-05, "loss": 0.1028, "step": 960 }, { "epoch": 9.42, "learning_rate": 1.9246601941747574e-05, "loss": 0.178, "step": 970 }, { "epoch": 9.51, "learning_rate": 1.9238834951456313e-05, "loss": 0.2527, "step": 980 }, { "epoch": 9.61, "learning_rate": 1.923106796116505e-05, "loss": 0.2609, "step": 990 }, { "epoch": 9.71, "learning_rate": 1.922330097087379e-05, "loss": 0.1518, "step": 1000 }, { "epoch": 9.81, "learning_rate": 1.9215533980582528e-05, "loss": 0.1383, "step": 1010 }, { "epoch": 9.9, "learning_rate": 1.9207766990291263e-05, "loss": 0.075, "step": 1020 }, { "epoch": 10.0, "learning_rate": 1.9200000000000003e-05, "loss": 0.0799, "step": 1030 }, { "epoch": 10.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.1238846480846405, "eval_runtime": 0.9773, "eval_samples_per_second": 149.397, "eval_steps_per_second": 19.442, "step": 1030 }, { "epoch": 10.1, "learning_rate": 1.919223300970874e-05, "loss": 0.1866, "step": 1040 }, { "epoch": 10.19, "learning_rate": 1.9184466019417478e-05, "loss": 0.1154, "step": 1050 }, { "epoch": 10.29, "learning_rate": 1.9176699029126217e-05, "loss": 0.1123, "step": 1060 }, { "epoch": 10.39, "learning_rate": 1.9168932038834952e-05, "loss": 0.1085, "step": 1070 }, { "epoch": 10.49, "learning_rate": 1.916116504854369e-05, "loss": 0.2075, "step": 1080 }, { "epoch": 10.58, "learning_rate": 1.9153398058252427e-05, "loss": 0.0624, "step": 1090 }, { "epoch": 10.68, "learning_rate": 1.9145631067961167e-05, "loss": 0.1116, "step": 1100 }, { "epoch": 10.78, "learning_rate": 1.9137864077669906e-05, "loss": 0.0523, "step": 1110 }, { "epoch": 10.87, "learning_rate": 1.913009708737864e-05, "loss": 0.1655, "step": 1120 }, { "epoch": 10.97, "learning_rate": 1.912233009708738e-05, "loss": 0.1677, "step": 1130 }, { "epoch": 11.0, "eval_accuracy": 0.9726027397260274, "eval_loss": 0.15431082248687744, "eval_runtime": 0.948, "eval_samples_per_second": 154.005, "eval_steps_per_second": 20.042, "step": 1133 }, { "epoch": 11.07, "learning_rate": 1.9114563106796117e-05, "loss": 0.1516, "step": 1140 }, { "epoch": 11.17, "learning_rate": 1.9106796116504856e-05, "loss": 0.1927, "step": 1150 }, { "epoch": 11.26, "learning_rate": 1.909902912621359e-05, "loss": 0.0548, "step": 1160 }, { "epoch": 11.36, "learning_rate": 1.9091262135922334e-05, "loss": 0.085, "step": 1170 }, { "epoch": 11.46, "learning_rate": 1.908349514563107e-05, "loss": 0.202, "step": 1180 }, { "epoch": 11.55, "learning_rate": 1.907572815533981e-05, "loss": 0.0741, "step": 1190 }, { "epoch": 11.65, "learning_rate": 1.9067961165048545e-05, "loss": 0.1373, "step": 1200 }, { "epoch": 11.75, "learning_rate": 1.9060194174757284e-05, "loss": 0.219, "step": 1210 }, { "epoch": 11.84, "learning_rate": 1.905242718446602e-05, "loss": 0.169, "step": 1220 }, { "epoch": 11.94, "learning_rate": 1.904466019417476e-05, "loss": 0.1969, "step": 1230 }, { "epoch": 12.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.09193126857280731, "eval_runtime": 0.9434, "eval_samples_per_second": 154.754, "eval_steps_per_second": 20.139, "step": 1236 }, { "epoch": 12.04, "learning_rate": 1.90368932038835e-05, "loss": 0.0957, "step": 1240 }, { "epoch": 12.14, "learning_rate": 1.9029126213592234e-05, "loss": 0.1396, "step": 1250 }, { "epoch": 12.23, "learning_rate": 1.9021359223300973e-05, "loss": 0.1004, "step": 1260 }, { "epoch": 12.33, "learning_rate": 1.901359223300971e-05, "loss": 0.0796, "step": 1270 }, { "epoch": 12.43, "learning_rate": 1.900582524271845e-05, "loss": 0.225, "step": 1280 }, { "epoch": 12.52, "learning_rate": 1.8998058252427184e-05, "loss": 0.1395, "step": 1290 }, { "epoch": 12.62, "learning_rate": 1.8990291262135923e-05, "loss": 0.0571, "step": 1300 }, { "epoch": 12.72, "learning_rate": 1.8982524271844663e-05, "loss": 0.0472, "step": 1310 }, { "epoch": 12.82, "learning_rate": 1.89747572815534e-05, "loss": 0.1276, "step": 1320 }, { "epoch": 12.91, "learning_rate": 1.8966990291262138e-05, "loss": 0.0533, "step": 1330 }, { "epoch": 13.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.07145330309867859, "eval_runtime": 0.9485, "eval_samples_per_second": 153.935, "eval_steps_per_second": 20.033, "step": 1339 }, { "epoch": 13.01, "learning_rate": 1.8959223300970873e-05, "loss": 0.0895, "step": 1340 }, { "epoch": 13.11, "learning_rate": 1.8951456310679613e-05, "loss": 0.0545, "step": 1350 }, { "epoch": 13.2, "learning_rate": 1.894368932038835e-05, "loss": 0.0889, "step": 1360 }, { "epoch": 13.3, "learning_rate": 1.893592233009709e-05, "loss": 0.0444, "step": 1370 }, { "epoch": 13.4, "learning_rate": 1.8928155339805827e-05, "loss": 0.131, "step": 1380 }, { "epoch": 13.5, "learning_rate": 1.8920388349514566e-05, "loss": 0.1644, "step": 1390 }, { "epoch": 13.59, "learning_rate": 1.89126213592233e-05, "loss": 0.1254, "step": 1400 }, { "epoch": 13.69, "learning_rate": 1.890485436893204e-05, "loss": 0.1407, "step": 1410 }, { "epoch": 13.79, "learning_rate": 1.8897087378640777e-05, "loss": 0.1139, "step": 1420 }, { "epoch": 13.88, "learning_rate": 1.8889320388349516e-05, "loss": 0.0703, "step": 1430 }, { "epoch": 13.98, "learning_rate": 1.8881553398058255e-05, "loss": 0.1645, "step": 1440 }, { "epoch": 14.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.050846148282289505, "eval_runtime": 0.9651, "eval_samples_per_second": 151.283, "eval_steps_per_second": 19.688, "step": 1442 }, { "epoch": 14.08, "learning_rate": 1.887378640776699e-05, "loss": 0.1136, "step": 1450 }, { "epoch": 14.17, "learning_rate": 1.886601941747573e-05, "loss": 0.0542, "step": 1460 }, { "epoch": 14.27, "learning_rate": 1.8858252427184466e-05, "loss": 0.1973, "step": 1470 }, { "epoch": 14.37, "learning_rate": 1.8850485436893205e-05, "loss": 0.1312, "step": 1480 }, { "epoch": 14.47, "learning_rate": 1.8842718446601944e-05, "loss": 0.1627, "step": 1490 }, { "epoch": 14.56, "learning_rate": 1.883495145631068e-05, "loss": 0.1452, "step": 1500 }, { "epoch": 14.66, "learning_rate": 1.882718446601942e-05, "loss": 0.1142, "step": 1510 }, { "epoch": 14.76, "learning_rate": 1.881941747572816e-05, "loss": 0.0803, "step": 1520 }, { "epoch": 14.85, "learning_rate": 1.8811650485436894e-05, "loss": 0.1401, "step": 1530 }, { "epoch": 14.95, "learning_rate": 1.8803883495145633e-05, "loss": 0.1036, "step": 1540 }, { "epoch": 15.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.06801381707191467, "eval_runtime": 0.9536, "eval_samples_per_second": 153.104, "eval_steps_per_second": 19.924, "step": 1545 }, { "epoch": 15.05, "learning_rate": 1.8796116504854373e-05, "loss": 0.1774, "step": 1550 }, { "epoch": 15.15, "learning_rate": 1.878834951456311e-05, "loss": 0.0533, "step": 1560 }, { "epoch": 15.24, "learning_rate": 1.8780582524271848e-05, "loss": 0.0623, "step": 1570 }, { "epoch": 15.34, "learning_rate": 1.8772815533980583e-05, "loss": 0.1694, "step": 1580 }, { "epoch": 15.44, "learning_rate": 1.8765048543689323e-05, "loss": 0.1773, "step": 1590 }, { "epoch": 15.53, "learning_rate": 1.875728155339806e-05, "loss": 0.2231, "step": 1600 }, { "epoch": 15.63, "learning_rate": 1.8749514563106798e-05, "loss": 0.0794, "step": 1610 }, { "epoch": 15.73, "learning_rate": 1.8741747572815537e-05, "loss": 0.0464, "step": 1620 }, { "epoch": 15.83, "learning_rate": 1.8733980582524273e-05, "loss": 0.1643, "step": 1630 }, { "epoch": 15.92, "learning_rate": 1.8726213592233012e-05, "loss": 0.0442, "step": 1640 }, { "epoch": 16.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.08464141935110092, "eval_runtime": 0.9534, "eval_samples_per_second": 153.139, "eval_steps_per_second": 19.929, "step": 1648 }, { "epoch": 16.02, "learning_rate": 1.8718446601941747e-05, "loss": 0.0626, "step": 1650 }, { "epoch": 16.12, "learning_rate": 1.8710679611650487e-05, "loss": 0.0868, "step": 1660 }, { "epoch": 16.21, "learning_rate": 1.8702912621359222e-05, "loss": 0.2295, "step": 1670 }, { "epoch": 16.31, "learning_rate": 1.8695145631067965e-05, "loss": 0.089, "step": 1680 }, { "epoch": 16.41, "learning_rate": 1.86873786407767e-05, "loss": 0.0727, "step": 1690 }, { "epoch": 16.5, "learning_rate": 1.867961165048544e-05, "loss": 0.0831, "step": 1700 }, { "epoch": 16.6, "learning_rate": 1.8671844660194176e-05, "loss": 0.1162, "step": 1710 }, { "epoch": 16.7, "learning_rate": 1.8664077669902915e-05, "loss": 0.0484, "step": 1720 }, { "epoch": 16.8, "learning_rate": 1.865631067961165e-05, "loss": 0.0411, "step": 1730 }, { "epoch": 16.89, "learning_rate": 1.864854368932039e-05, "loss": 0.0726, "step": 1740 }, { "epoch": 16.99, "learning_rate": 1.864077669902913e-05, "loss": 0.065, "step": 1750 }, { "epoch": 17.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.056606147438287735, "eval_runtime": 0.9748, "eval_samples_per_second": 149.773, "eval_steps_per_second": 19.491, "step": 1751 }, { "epoch": 17.09, "learning_rate": 1.8633009708737865e-05, "loss": 0.2112, "step": 1760 }, { "epoch": 17.18, "learning_rate": 1.8625242718446604e-05, "loss": 0.0992, "step": 1770 }, { "epoch": 17.28, "learning_rate": 1.861747572815534e-05, "loss": 0.1154, "step": 1780 }, { "epoch": 17.38, "learning_rate": 1.860970873786408e-05, "loss": 0.0843, "step": 1790 }, { "epoch": 17.48, "learning_rate": 1.860194174757282e-05, "loss": 0.0969, "step": 1800 }, { "epoch": 17.57, "learning_rate": 1.8594174757281554e-05, "loss": 0.2324, "step": 1810 }, { "epoch": 17.67, "learning_rate": 1.8586407766990293e-05, "loss": 0.1239, "step": 1820 }, { "epoch": 17.77, "learning_rate": 1.857864077669903e-05, "loss": 0.1081, "step": 1830 }, { "epoch": 17.86, "learning_rate": 1.857087378640777e-05, "loss": 0.0354, "step": 1840 }, { "epoch": 17.96, "learning_rate": 1.8563106796116504e-05, "loss": 0.1437, "step": 1850 }, { "epoch": 18.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.04979800060391426, "eval_runtime": 0.9429, "eval_samples_per_second": 154.848, "eval_steps_per_second": 20.151, "step": 1854 }, { "epoch": 18.06, "learning_rate": 1.8555339805825243e-05, "loss": 0.1741, "step": 1860 }, { "epoch": 18.16, "learning_rate": 1.8547572815533983e-05, "loss": 0.0442, "step": 1870 }, { "epoch": 18.25, "learning_rate": 1.8539805825242722e-05, "loss": 0.0778, "step": 1880 }, { "epoch": 18.35, "learning_rate": 1.8532038834951458e-05, "loss": 0.132, "step": 1890 }, { "epoch": 18.45, "learning_rate": 1.8524271844660197e-05, "loss": 0.2408, "step": 1900 }, { "epoch": 18.54, "learning_rate": 1.8516504854368933e-05, "loss": 0.0852, "step": 1910 }, { "epoch": 18.64, "learning_rate": 1.8508737864077672e-05, "loss": 0.149, "step": 1920 }, { "epoch": 18.74, "learning_rate": 1.850097087378641e-05, "loss": 0.0426, "step": 1930 }, { "epoch": 18.83, "learning_rate": 1.8493203883495147e-05, "loss": 0.1248, "step": 1940 }, { "epoch": 18.93, "learning_rate": 1.8485436893203886e-05, "loss": 0.1527, "step": 1950 }, { "epoch": 19.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.0702158659696579, "eval_runtime": 0.9693, "eval_samples_per_second": 150.63, "eval_steps_per_second": 19.603, "step": 1957 }, { "epoch": 19.03, "learning_rate": 1.847766990291262e-05, "loss": 0.0963, "step": 1960 }, { "epoch": 19.13, "learning_rate": 1.846990291262136e-05, "loss": 0.1038, "step": 1970 }, { "epoch": 19.22, "learning_rate": 1.8462135922330097e-05, "loss": 0.0276, "step": 1980 }, { "epoch": 19.32, "learning_rate": 1.8454368932038836e-05, "loss": 0.2018, "step": 1990 }, { "epoch": 19.42, "learning_rate": 1.8446601941747575e-05, "loss": 0.1405, "step": 2000 }, { "epoch": 19.51, "learning_rate": 1.843883495145631e-05, "loss": 0.0337, "step": 2010 }, { "epoch": 19.61, "learning_rate": 1.843106796116505e-05, "loss": 0.1076, "step": 2020 }, { "epoch": 19.71, "learning_rate": 1.842330097087379e-05, "loss": 0.1037, "step": 2030 }, { "epoch": 19.81, "learning_rate": 1.8415533980582525e-05, "loss": 0.0665, "step": 2040 }, { "epoch": 19.9, "learning_rate": 1.8407766990291264e-05, "loss": 0.1567, "step": 2050 }, { "epoch": 20.0, "learning_rate": 1.8400000000000003e-05, "loss": 0.0682, "step": 2060 }, { "epoch": 20.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.06622537225484848, "eval_runtime": 0.9584, "eval_samples_per_second": 152.335, "eval_steps_per_second": 19.824, "step": 2060 }, { "epoch": 20.1, "learning_rate": 1.839223300970874e-05, "loss": 0.0995, "step": 2070 }, { "epoch": 20.19, "learning_rate": 1.838446601941748e-05, "loss": 0.0921, "step": 2080 }, { "epoch": 20.29, "learning_rate": 1.8376699029126214e-05, "loss": 0.2157, "step": 2090 }, { "epoch": 20.39, "learning_rate": 1.8368932038834953e-05, "loss": 0.1321, "step": 2100 }, { "epoch": 20.49, "learning_rate": 1.836116504854369e-05, "loss": 0.0957, "step": 2110 }, { "epoch": 20.58, "learning_rate": 1.835339805825243e-05, "loss": 0.2989, "step": 2120 }, { "epoch": 20.68, "learning_rate": 1.8345631067961168e-05, "loss": 0.2302, "step": 2130 }, { "epoch": 20.78, "learning_rate": 1.8337864077669903e-05, "loss": 0.1633, "step": 2140 }, { "epoch": 20.87, "learning_rate": 1.8330097087378643e-05, "loss": 0.0726, "step": 2150 }, { "epoch": 20.97, "learning_rate": 1.832233009708738e-05, "loss": 0.1013, "step": 2160 }, { "epoch": 21.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.07291552424430847, "eval_runtime": 0.958, "eval_samples_per_second": 152.399, "eval_steps_per_second": 19.833, "step": 2163 }, { "epoch": 21.07, "learning_rate": 1.8314563106796118e-05, "loss": 0.0254, "step": 2170 }, { "epoch": 21.17, "learning_rate": 1.8306796116504857e-05, "loss": 0.1409, "step": 2180 }, { "epoch": 21.26, "learning_rate": 1.8299029126213596e-05, "loss": 0.0853, "step": 2190 }, { "epoch": 21.36, "learning_rate": 1.8291262135922332e-05, "loss": 0.131, "step": 2200 }, { "epoch": 21.46, "learning_rate": 1.828349514563107e-05, "loss": 0.1024, "step": 2210 }, { "epoch": 21.55, "learning_rate": 1.8275728155339807e-05, "loss": 0.0212, "step": 2220 }, { "epoch": 21.65, "learning_rate": 1.8267961165048546e-05, "loss": 0.0578, "step": 2230 }, { "epoch": 21.75, "learning_rate": 1.826019417475728e-05, "loss": 0.1048, "step": 2240 }, { "epoch": 21.84, "learning_rate": 1.825242718446602e-05, "loss": 0.0612, "step": 2250 }, { "epoch": 21.94, "learning_rate": 1.824466019417476e-05, "loss": 0.0807, "step": 2260 }, { "epoch": 22.0, "eval_accuracy": 1.0, "eval_loss": 0.02733495458960533, "eval_runtime": 0.9501, "eval_samples_per_second": 153.673, "eval_steps_per_second": 19.999, "step": 2266 }, { "epoch": 22.04, "learning_rate": 1.8236893203883496e-05, "loss": 0.1788, "step": 2270 }, { "epoch": 22.14, "learning_rate": 1.8229126213592235e-05, "loss": 0.1037, "step": 2280 }, { "epoch": 22.23, "learning_rate": 1.822135922330097e-05, "loss": 0.0909, "step": 2290 }, { "epoch": 22.33, "learning_rate": 1.821359223300971e-05, "loss": 0.0597, "step": 2300 }, { "epoch": 22.43, "learning_rate": 1.820582524271845e-05, "loss": 0.0693, "step": 2310 }, { "epoch": 22.52, "learning_rate": 1.8198058252427185e-05, "loss": 0.0684, "step": 2320 }, { "epoch": 22.62, "learning_rate": 1.8190291262135924e-05, "loss": 0.1146, "step": 2330 }, { "epoch": 22.72, "learning_rate": 1.818252427184466e-05, "loss": 0.0753, "step": 2340 }, { "epoch": 22.82, "learning_rate": 1.81747572815534e-05, "loss": 0.0691, "step": 2350 }, { "epoch": 22.91, "learning_rate": 1.8166990291262135e-05, "loss": 0.0803, "step": 2360 }, { "epoch": 23.0, "eval_accuracy": 0.9657534246575342, "eval_loss": 0.13809683918952942, "eval_runtime": 0.9466, "eval_samples_per_second": 154.239, "eval_steps_per_second": 20.072, "step": 2369 }, { "epoch": 23.01, "learning_rate": 1.8159223300970878e-05, "loss": 0.1052, "step": 2370 }, { "epoch": 23.11, "learning_rate": 1.8151456310679613e-05, "loss": 0.1529, "step": 2380 }, { "epoch": 23.2, "learning_rate": 1.8143689320388353e-05, "loss": 0.0929, "step": 2390 }, { "epoch": 23.3, "learning_rate": 1.813592233009709e-05, "loss": 0.1057, "step": 2400 }, { "epoch": 23.4, "learning_rate": 1.8128155339805828e-05, "loss": 0.0545, "step": 2410 }, { "epoch": 23.5, "learning_rate": 1.8120388349514563e-05, "loss": 0.0632, "step": 2420 }, { "epoch": 23.59, "learning_rate": 1.8112621359223303e-05, "loss": 0.0276, "step": 2430 }, { "epoch": 23.69, "learning_rate": 1.8104854368932042e-05, "loss": 0.0976, "step": 2440 }, { "epoch": 23.79, "learning_rate": 1.8097087378640778e-05, "loss": 0.087, "step": 2450 }, { "epoch": 23.88, "learning_rate": 1.8089320388349517e-05, "loss": 0.248, "step": 2460 }, { "epoch": 23.98, "learning_rate": 1.8081553398058253e-05, "loss": 0.0972, "step": 2470 }, { "epoch": 24.0, "eval_accuracy": 1.0, "eval_loss": 0.025691555812954903, "eval_runtime": 0.9246, "eval_samples_per_second": 157.902, "eval_steps_per_second": 20.549, "step": 2472 }, { "epoch": 24.08, "learning_rate": 1.8073786407766992e-05, "loss": 0.1732, "step": 2480 }, { "epoch": 24.17, "learning_rate": 1.8066019417475728e-05, "loss": 0.0377, "step": 2490 }, { "epoch": 24.27, "learning_rate": 1.8058252427184467e-05, "loss": 0.0686, "step": 2500 }, { "epoch": 24.37, "learning_rate": 1.8050485436893206e-05, "loss": 0.0926, "step": 2510 }, { "epoch": 24.47, "learning_rate": 1.8042718446601942e-05, "loss": 0.079, "step": 2520 }, { "epoch": 24.56, "learning_rate": 1.803495145631068e-05, "loss": 0.0477, "step": 2530 }, { "epoch": 24.66, "learning_rate": 1.802718446601942e-05, "loss": 0.0554, "step": 2540 }, { "epoch": 24.76, "learning_rate": 1.8019417475728156e-05, "loss": 0.0732, "step": 2550 }, { "epoch": 24.85, "learning_rate": 1.8011650485436895e-05, "loss": 0.0215, "step": 2560 }, { "epoch": 24.95, "learning_rate": 1.8003883495145634e-05, "loss": 0.0173, "step": 2570 }, { "epoch": 25.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.05056421086192131, "eval_runtime": 0.9345, "eval_samples_per_second": 156.232, "eval_steps_per_second": 20.332, "step": 2575 }, { "epoch": 25.05, "learning_rate": 1.799611650485437e-05, "loss": 0.0749, "step": 2580 }, { "epoch": 25.15, "learning_rate": 1.798834951456311e-05, "loss": 0.0437, "step": 2590 }, { "epoch": 25.24, "learning_rate": 1.7980582524271845e-05, "loss": 0.0748, "step": 2600 }, { "epoch": 25.34, "learning_rate": 1.7972815533980584e-05, "loss": 0.04, "step": 2610 }, { "epoch": 25.44, "learning_rate": 1.7965048543689323e-05, "loss": 0.0721, "step": 2620 }, { "epoch": 25.53, "learning_rate": 1.795728155339806e-05, "loss": 0.143, "step": 2630 }, { "epoch": 25.63, "learning_rate": 1.79495145631068e-05, "loss": 0.185, "step": 2640 }, { "epoch": 25.73, "learning_rate": 1.7941747572815534e-05, "loss": 0.0379, "step": 2650 }, { "epoch": 25.83, "learning_rate": 1.7933980582524273e-05, "loss": 0.1209, "step": 2660 }, { "epoch": 25.92, "learning_rate": 1.792621359223301e-05, "loss": 0.075, "step": 2670 }, { "epoch": 26.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.09800746291875839, "eval_runtime": 0.9365, "eval_samples_per_second": 155.906, "eval_steps_per_second": 20.289, "step": 2678 }, { "epoch": 26.02, "learning_rate": 1.791844660194175e-05, "loss": 0.0662, "step": 2680 }, { "epoch": 26.12, "learning_rate": 1.7910679611650488e-05, "loss": 0.0206, "step": 2690 }, { "epoch": 26.21, "learning_rate": 1.7902912621359227e-05, "loss": 0.0576, "step": 2700 }, { "epoch": 26.31, "learning_rate": 1.7895145631067963e-05, "loss": 0.0479, "step": 2710 }, { "epoch": 26.41, "learning_rate": 1.7887378640776702e-05, "loss": 0.039, "step": 2720 }, { "epoch": 26.5, "learning_rate": 1.7879611650485438e-05, "loss": 0.0851, "step": 2730 }, { "epoch": 26.6, "learning_rate": 1.7871844660194177e-05, "loss": 0.1626, "step": 2740 }, { "epoch": 26.7, "learning_rate": 1.7864077669902916e-05, "loss": 0.079, "step": 2750 }, { "epoch": 26.8, "learning_rate": 1.7856310679611652e-05, "loss": 0.0268, "step": 2760 }, { "epoch": 26.89, "learning_rate": 1.784854368932039e-05, "loss": 0.0656, "step": 2770 }, { "epoch": 26.99, "learning_rate": 1.7840776699029127e-05, "loss": 0.1103, "step": 2780 }, { "epoch": 27.0, "eval_accuracy": 0.958904109589041, "eval_loss": 0.10835416615009308, "eval_runtime": 0.9504, "eval_samples_per_second": 153.614, "eval_steps_per_second": 19.991, "step": 2781 }, { "epoch": 27.09, "learning_rate": 1.7833009708737866e-05, "loss": 0.0675, "step": 2790 }, { "epoch": 27.18, "learning_rate": 1.7825242718446602e-05, "loss": 0.183, "step": 2800 }, { "epoch": 27.28, "learning_rate": 1.781747572815534e-05, "loss": 0.0763, "step": 2810 }, { "epoch": 27.38, "learning_rate": 1.780970873786408e-05, "loss": 0.0298, "step": 2820 }, { "epoch": 27.48, "learning_rate": 1.7801941747572816e-05, "loss": 0.1811, "step": 2830 }, { "epoch": 27.57, "learning_rate": 1.7794174757281555e-05, "loss": 0.0562, "step": 2840 }, { "epoch": 27.67, "learning_rate": 1.778640776699029e-05, "loss": 0.146, "step": 2850 }, { "epoch": 27.77, "learning_rate": 1.777864077669903e-05, "loss": 0.0538, "step": 2860 }, { "epoch": 27.86, "learning_rate": 1.7770873786407766e-05, "loss": 0.1454, "step": 2870 }, { "epoch": 27.96, "learning_rate": 1.776310679611651e-05, "loss": 0.0622, "step": 2880 }, { "epoch": 28.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.02398013137280941, "eval_runtime": 0.9144, "eval_samples_per_second": 159.669, "eval_steps_per_second": 20.779, "step": 2884 }, { "epoch": 28.06, "learning_rate": 1.7755339805825244e-05, "loss": 0.1488, "step": 2890 }, { "epoch": 28.16, "learning_rate": 1.7747572815533983e-05, "loss": 0.0624, "step": 2900 }, { "epoch": 28.25, "learning_rate": 1.773980582524272e-05, "loss": 0.0144, "step": 2910 }, { "epoch": 28.35, "learning_rate": 1.773203883495146e-05, "loss": 0.0935, "step": 2920 }, { "epoch": 28.45, "learning_rate": 1.7724271844660194e-05, "loss": 0.1088, "step": 2930 }, { "epoch": 28.54, "learning_rate": 1.7716504854368933e-05, "loss": 0.0121, "step": 2940 }, { "epoch": 28.64, "learning_rate": 1.7708737864077673e-05, "loss": 0.0328, "step": 2950 }, { "epoch": 28.74, "learning_rate": 1.770097087378641e-05, "loss": 0.0876, "step": 2960 }, { "epoch": 28.83, "learning_rate": 1.7693203883495148e-05, "loss": 0.0872, "step": 2970 }, { "epoch": 28.93, "learning_rate": 1.7685436893203883e-05, "loss": 0.0126, "step": 2980 }, { "epoch": 29.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.03914155438542366, "eval_runtime": 0.9262, "eval_samples_per_second": 157.634, "eval_steps_per_second": 20.514, "step": 2987 }, { "epoch": 29.03, "learning_rate": 1.7677669902912623e-05, "loss": 0.0274, "step": 2990 }, { "epoch": 29.13, "learning_rate": 1.7669902912621362e-05, "loss": 0.0635, "step": 3000 }, { "epoch": 29.22, "learning_rate": 1.7662135922330098e-05, "loss": 0.1097, "step": 3010 }, { "epoch": 29.32, "learning_rate": 1.7654368932038837e-05, "loss": 0.1493, "step": 3020 }, { "epoch": 29.42, "learning_rate": 1.7646601941747576e-05, "loss": 0.0423, "step": 3030 }, { "epoch": 29.51, "learning_rate": 1.7638834951456312e-05, "loss": 0.1211, "step": 3040 }, { "epoch": 29.61, "learning_rate": 1.763106796116505e-05, "loss": 0.0614, "step": 3050 }, { "epoch": 29.71, "learning_rate": 1.7623300970873787e-05, "loss": 0.0644, "step": 3060 }, { "epoch": 29.81, "learning_rate": 1.7615533980582526e-05, "loss": 0.0784, "step": 3070 }, { "epoch": 29.9, "learning_rate": 1.7607766990291265e-05, "loss": 0.156, "step": 3080 }, { "epoch": 30.0, "learning_rate": 1.76e-05, "loss": 0.082, "step": 3090 }, { "epoch": 30.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.08485659211874008, "eval_runtime": 0.9382, "eval_samples_per_second": 155.618, "eval_steps_per_second": 20.252, "step": 3090 }, { "epoch": 30.1, "learning_rate": 1.759223300970874e-05, "loss": 0.2005, "step": 3100 }, { "epoch": 30.19, "learning_rate": 1.7584466019417476e-05, "loss": 0.322, "step": 3110 }, { "epoch": 30.29, "learning_rate": 1.7576699029126215e-05, "loss": 0.0765, "step": 3120 }, { "epoch": 30.39, "learning_rate": 1.7568932038834954e-05, "loss": 0.0799, "step": 3130 }, { "epoch": 30.49, "learning_rate": 1.756116504854369e-05, "loss": 0.0892, "step": 3140 }, { "epoch": 30.58, "learning_rate": 1.755339805825243e-05, "loss": 0.0932, "step": 3150 }, { "epoch": 30.68, "learning_rate": 1.7545631067961165e-05, "loss": 0.0882, "step": 3160 }, { "epoch": 30.78, "learning_rate": 1.7537864077669904e-05, "loss": 0.0714, "step": 3170 }, { "epoch": 30.87, "learning_rate": 1.753009708737864e-05, "loss": 0.0537, "step": 3180 }, { "epoch": 30.97, "learning_rate": 1.7522330097087383e-05, "loss": 0.0203, "step": 3190 }, { "epoch": 31.0, "eval_accuracy": 1.0, "eval_loss": 0.019243279471993446, "eval_runtime": 0.9242, "eval_samples_per_second": 157.974, "eval_steps_per_second": 20.558, "step": 3193 }, { "epoch": 31.07, "learning_rate": 1.751456310679612e-05, "loss": 0.1567, "step": 3200 }, { "epoch": 31.17, "learning_rate": 1.7506796116504858e-05, "loss": 0.0829, "step": 3210 }, { "epoch": 31.26, "learning_rate": 1.7499029126213593e-05, "loss": 0.0259, "step": 3220 }, { "epoch": 31.36, "learning_rate": 1.7491262135922333e-05, "loss": 0.0337, "step": 3230 }, { "epoch": 31.46, "learning_rate": 1.748349514563107e-05, "loss": 0.0407, "step": 3240 }, { "epoch": 31.55, "learning_rate": 1.7475728155339808e-05, "loss": 0.1494, "step": 3250 }, { "epoch": 31.65, "learning_rate": 1.7467961165048547e-05, "loss": 0.1308, "step": 3260 }, { "epoch": 31.75, "learning_rate": 1.7460194174757283e-05, "loss": 0.0744, "step": 3270 }, { "epoch": 31.84, "learning_rate": 1.7452427184466022e-05, "loss": 0.059, "step": 3280 }, { "epoch": 31.94, "learning_rate": 1.7444660194174758e-05, "loss": 0.1044, "step": 3290 }, { "epoch": 32.0, "eval_accuracy": 0.9657534246575342, "eval_loss": 0.11390157043933868, "eval_runtime": 0.9599, "eval_samples_per_second": 152.1, "eval_steps_per_second": 19.794, "step": 3296 }, { "epoch": 32.04, "learning_rate": 1.7436893203883497e-05, "loss": 0.1431, "step": 3300 }, { "epoch": 32.14, "learning_rate": 1.7429126213592233e-05, "loss": 0.0207, "step": 3310 }, { "epoch": 32.23, "learning_rate": 1.7421359223300972e-05, "loss": 0.1726, "step": 3320 }, { "epoch": 32.33, "learning_rate": 1.741359223300971e-05, "loss": 0.0813, "step": 3330 }, { "epoch": 32.43, "learning_rate": 1.7405825242718447e-05, "loss": 0.1417, "step": 3340 }, { "epoch": 32.52, "learning_rate": 1.7398058252427186e-05, "loss": 0.036, "step": 3350 }, { "epoch": 32.62, "learning_rate": 1.7390291262135922e-05, "loss": 0.065, "step": 3360 }, { "epoch": 32.72, "learning_rate": 1.738252427184466e-05, "loss": 0.0654, "step": 3370 }, { "epoch": 32.82, "learning_rate": 1.73747572815534e-05, "loss": 0.0311, "step": 3380 }, { "epoch": 32.91, "learning_rate": 1.736699029126214e-05, "loss": 0.0134, "step": 3390 }, { "epoch": 33.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.057724058628082275, "eval_runtime": 0.9176, "eval_samples_per_second": 159.114, "eval_steps_per_second": 20.707, "step": 3399 }, { "epoch": 33.01, "learning_rate": 1.7359223300970875e-05, "loss": 0.0149, "step": 3400 }, { "epoch": 33.11, "learning_rate": 1.7351456310679614e-05, "loss": 0.1328, "step": 3410 }, { "epoch": 33.2, "learning_rate": 1.734368932038835e-05, "loss": 0.0303, "step": 3420 }, { "epoch": 33.3, "learning_rate": 1.733592233009709e-05, "loss": 0.0764, "step": 3430 }, { "epoch": 33.4, "learning_rate": 1.732815533980583e-05, "loss": 0.0176, "step": 3440 }, { "epoch": 33.5, "learning_rate": 1.7320388349514564e-05, "loss": 0.0133, "step": 3450 }, { "epoch": 33.59, "learning_rate": 1.7312621359223303e-05, "loss": 0.2035, "step": 3460 }, { "epoch": 33.69, "learning_rate": 1.730485436893204e-05, "loss": 0.0844, "step": 3470 }, { "epoch": 33.79, "learning_rate": 1.729708737864078e-05, "loss": 0.062, "step": 3480 }, { "epoch": 33.88, "learning_rate": 1.7289320388349514e-05, "loss": 0.0381, "step": 3490 }, { "epoch": 33.98, "learning_rate": 1.7281553398058253e-05, "loss": 0.0923, "step": 3500 }, { "epoch": 34.0, "eval_accuracy": 0.958904109589041, "eval_loss": 0.182390034198761, "eval_runtime": 0.9423, "eval_samples_per_second": 154.946, "eval_steps_per_second": 20.164, "step": 3502 }, { "epoch": 34.08, "learning_rate": 1.7273786407766993e-05, "loss": 0.044, "step": 3510 }, { "epoch": 34.17, "learning_rate": 1.726601941747573e-05, "loss": 0.0188, "step": 3520 }, { "epoch": 34.27, "learning_rate": 1.7258252427184468e-05, "loss": 0.0863, "step": 3530 }, { "epoch": 34.37, "learning_rate": 1.7250485436893207e-05, "loss": 0.1034, "step": 3540 }, { "epoch": 34.47, "learning_rate": 1.7242718446601943e-05, "loss": 0.0148, "step": 3550 }, { "epoch": 34.56, "learning_rate": 1.7234951456310682e-05, "loss": 0.0083, "step": 3560 }, { "epoch": 34.66, "learning_rate": 1.722718446601942e-05, "loss": 0.0313, "step": 3570 }, { "epoch": 34.76, "learning_rate": 1.7219417475728157e-05, "loss": 0.1949, "step": 3580 }, { "epoch": 34.85, "learning_rate": 1.7211650485436896e-05, "loss": 0.0643, "step": 3590 }, { "epoch": 34.95, "learning_rate": 1.7203883495145632e-05, "loss": 0.1156, "step": 3600 }, { "epoch": 35.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.022417498752474785, "eval_runtime": 0.9325, "eval_samples_per_second": 156.574, "eval_steps_per_second": 20.376, "step": 3605 }, { "epoch": 35.05, "learning_rate": 1.719611650485437e-05, "loss": 0.1949, "step": 3610 }, { "epoch": 35.15, "learning_rate": 1.7188349514563107e-05, "loss": 0.0527, "step": 3620 }, { "epoch": 35.24, "learning_rate": 1.7180582524271846e-05, "loss": 0.0105, "step": 3630 }, { "epoch": 35.34, "learning_rate": 1.7172815533980585e-05, "loss": 0.1182, "step": 3640 }, { "epoch": 35.44, "learning_rate": 1.716504854368932e-05, "loss": 0.0799, "step": 3650 }, { "epoch": 35.53, "learning_rate": 1.715728155339806e-05, "loss": 0.1506, "step": 3660 }, { "epoch": 35.63, "learning_rate": 1.7149514563106796e-05, "loss": 0.1022, "step": 3670 }, { "epoch": 35.73, "learning_rate": 1.7141747572815535e-05, "loss": 0.0591, "step": 3680 }, { "epoch": 35.83, "learning_rate": 1.713398058252427e-05, "loss": 0.0083, "step": 3690 }, { "epoch": 35.92, "learning_rate": 1.7126213592233013e-05, "loss": 0.0161, "step": 3700 }, { "epoch": 36.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03223036974668503, "eval_runtime": 0.926, "eval_samples_per_second": 157.661, "eval_steps_per_second": 20.518, "step": 3708 }, { "epoch": 36.02, "learning_rate": 1.711844660194175e-05, "loss": 0.1174, "step": 3710 }, { "epoch": 36.12, "learning_rate": 1.711067961165049e-05, "loss": 0.0884, "step": 3720 }, { "epoch": 36.21, "learning_rate": 1.7102912621359224e-05, "loss": 0.0085, "step": 3730 }, { "epoch": 36.31, "learning_rate": 1.7095145631067963e-05, "loss": 0.1102, "step": 3740 }, { "epoch": 36.41, "learning_rate": 1.70873786407767e-05, "loss": 0.1087, "step": 3750 }, { "epoch": 36.5, "learning_rate": 1.707961165048544e-05, "loss": 0.0612, "step": 3760 }, { "epoch": 36.6, "learning_rate": 1.7071844660194178e-05, "loss": 0.0383, "step": 3770 }, { "epoch": 36.7, "learning_rate": 1.7064077669902913e-05, "loss": 0.021, "step": 3780 }, { "epoch": 36.8, "learning_rate": 1.7056310679611653e-05, "loss": 0.1188, "step": 3790 }, { "epoch": 36.89, "learning_rate": 1.704854368932039e-05, "loss": 0.0805, "step": 3800 }, { "epoch": 36.99, "learning_rate": 1.7040776699029128e-05, "loss": 0.0754, "step": 3810 }, { "epoch": 37.0, "eval_accuracy": 0.9726027397260274, "eval_loss": 0.10264816880226135, "eval_runtime": 0.9208, "eval_samples_per_second": 158.561, "eval_steps_per_second": 20.635, "step": 3811 }, { "epoch": 37.09, "learning_rate": 1.7033009708737867e-05, "loss": 0.012, "step": 3820 }, { "epoch": 37.18, "learning_rate": 1.7025242718446603e-05, "loss": 0.0646, "step": 3830 }, { "epoch": 37.28, "learning_rate": 1.7017475728155342e-05, "loss": 0.0136, "step": 3840 }, { "epoch": 37.38, "learning_rate": 1.7009708737864078e-05, "loss": 0.0409, "step": 3850 }, { "epoch": 37.48, "learning_rate": 1.7001941747572817e-05, "loss": 0.109, "step": 3860 }, { "epoch": 37.57, "learning_rate": 1.6994174757281553e-05, "loss": 0.0205, "step": 3870 }, { "epoch": 37.67, "learning_rate": 1.6986407766990292e-05, "loss": 0.1262, "step": 3880 }, { "epoch": 37.77, "learning_rate": 1.697864077669903e-05, "loss": 0.1535, "step": 3890 }, { "epoch": 37.86, "learning_rate": 1.697087378640777e-05, "loss": 0.0292, "step": 3900 }, { "epoch": 37.96, "learning_rate": 1.6963106796116506e-05, "loss": 0.0356, "step": 3910 }, { "epoch": 38.0, "eval_accuracy": 0.952054794520548, "eval_loss": 0.25439009070396423, "eval_runtime": 0.9243, "eval_samples_per_second": 157.958, "eval_steps_per_second": 20.556, "step": 3914 }, { "epoch": 38.06, "learning_rate": 1.6955339805825245e-05, "loss": 0.1842, "step": 3920 }, { "epoch": 38.16, "learning_rate": 1.694757281553398e-05, "loss": 0.1801, "step": 3930 }, { "epoch": 38.25, "learning_rate": 1.693980582524272e-05, "loss": 0.0089, "step": 3940 }, { "epoch": 38.35, "learning_rate": 1.693203883495146e-05, "loss": 0.0289, "step": 3950 }, { "epoch": 38.45, "learning_rate": 1.6924271844660195e-05, "loss": 0.0536, "step": 3960 }, { "epoch": 38.54, "learning_rate": 1.6916504854368934e-05, "loss": 0.1036, "step": 3970 }, { "epoch": 38.64, "learning_rate": 1.690873786407767e-05, "loss": 0.0112, "step": 3980 }, { "epoch": 38.74, "learning_rate": 1.690097087378641e-05, "loss": 0.008, "step": 3990 }, { "epoch": 38.83, "learning_rate": 1.6893203883495145e-05, "loss": 0.0183, "step": 4000 }, { "epoch": 38.93, "learning_rate": 1.6885436893203884e-05, "loss": 0.008, "step": 4010 }, { "epoch": 39.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.10272617638111115, "eval_runtime": 0.9261, "eval_samples_per_second": 157.656, "eval_steps_per_second": 20.517, "step": 4017 }, { "epoch": 39.03, "learning_rate": 1.6877669902912623e-05, "loss": 0.1074, "step": 4020 }, { "epoch": 39.13, "learning_rate": 1.686990291262136e-05, "loss": 0.1753, "step": 4030 }, { "epoch": 39.22, "learning_rate": 1.68621359223301e-05, "loss": 0.0181, "step": 4040 }, { "epoch": 39.32, "learning_rate": 1.6854368932038838e-05, "loss": 0.0541, "step": 4050 }, { "epoch": 39.42, "learning_rate": 1.6846601941747573e-05, "loss": 0.1461, "step": 4060 }, { "epoch": 39.51, "learning_rate": 1.6838834951456313e-05, "loss": 0.1556, "step": 4070 }, { "epoch": 39.61, "learning_rate": 1.6831067961165052e-05, "loss": 0.0072, "step": 4080 }, { "epoch": 39.71, "learning_rate": 1.6823300970873788e-05, "loss": 0.1443, "step": 4090 }, { "epoch": 39.81, "learning_rate": 1.6815533980582527e-05, "loss": 0.0272, "step": 4100 }, { "epoch": 39.9, "learning_rate": 1.6807766990291263e-05, "loss": 0.0404, "step": 4110 }, { "epoch": 40.0, "learning_rate": 1.6800000000000002e-05, "loss": 0.1293, "step": 4120 }, { "epoch": 40.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.06097627803683281, "eval_runtime": 0.961, "eval_samples_per_second": 151.929, "eval_steps_per_second": 19.772, "step": 4120 }, { "epoch": 40.1, "learning_rate": 1.6792233009708738e-05, "loss": 0.0466, "step": 4130 }, { "epoch": 40.19, "learning_rate": 1.6784466019417477e-05, "loss": 0.0499, "step": 4140 }, { "epoch": 40.29, "learning_rate": 1.6776699029126216e-05, "loss": 0.1867, "step": 4150 }, { "epoch": 40.39, "learning_rate": 1.6768932038834952e-05, "loss": 0.0198, "step": 4160 }, { "epoch": 40.49, "learning_rate": 1.676116504854369e-05, "loss": 0.0074, "step": 4170 }, { "epoch": 40.58, "learning_rate": 1.6753398058252427e-05, "loss": 0.0624, "step": 4180 }, { "epoch": 40.68, "learning_rate": 1.6745631067961166e-05, "loss": 0.0607, "step": 4190 }, { "epoch": 40.78, "learning_rate": 1.6737864077669905e-05, "loss": 0.0749, "step": 4200 }, { "epoch": 40.87, "learning_rate": 1.6730097087378644e-05, "loss": 0.0066, "step": 4210 }, { "epoch": 40.97, "learning_rate": 1.672233009708738e-05, "loss": 0.0578, "step": 4220 }, { "epoch": 41.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.08583226799964905, "eval_runtime": 0.9345, "eval_samples_per_second": 156.24, "eval_steps_per_second": 20.333, "step": 4223 }, { "epoch": 41.07, "learning_rate": 1.671456310679612e-05, "loss": 0.1032, "step": 4230 }, { "epoch": 41.17, "learning_rate": 1.6706796116504855e-05, "loss": 0.0544, "step": 4240 }, { "epoch": 41.26, "learning_rate": 1.6699029126213594e-05, "loss": 0.1737, "step": 4250 }, { "epoch": 41.36, "learning_rate": 1.669126213592233e-05, "loss": 0.0807, "step": 4260 }, { "epoch": 41.46, "learning_rate": 1.668349514563107e-05, "loss": 0.0775, "step": 4270 }, { "epoch": 41.55, "learning_rate": 1.667572815533981e-05, "loss": 0.0672, "step": 4280 }, { "epoch": 41.65, "learning_rate": 1.6667961165048544e-05, "loss": 0.049, "step": 4290 }, { "epoch": 41.75, "learning_rate": 1.6660194174757283e-05, "loss": 0.1505, "step": 4300 }, { "epoch": 41.84, "learning_rate": 1.665242718446602e-05, "loss": 0.1309, "step": 4310 }, { "epoch": 41.94, "learning_rate": 1.664466019417476e-05, "loss": 0.0528, "step": 4320 }, { "epoch": 42.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.09928789734840393, "eval_runtime": 0.9576, "eval_samples_per_second": 152.464, "eval_steps_per_second": 19.841, "step": 4326 }, { "epoch": 42.04, "learning_rate": 1.6636893203883498e-05, "loss": 0.1204, "step": 4330 }, { "epoch": 42.14, "learning_rate": 1.6629126213592233e-05, "loss": 0.0681, "step": 4340 }, { "epoch": 42.23, "learning_rate": 1.6621359223300973e-05, "loss": 0.1064, "step": 4350 }, { "epoch": 42.33, "learning_rate": 1.661359223300971e-05, "loss": 0.0672, "step": 4360 }, { "epoch": 42.43, "learning_rate": 1.6605825242718448e-05, "loss": 0.0494, "step": 4370 }, { "epoch": 42.52, "learning_rate": 1.6598058252427183e-05, "loss": 0.0866, "step": 4380 }, { "epoch": 42.62, "learning_rate": 1.6590291262135926e-05, "loss": 0.0385, "step": 4390 }, { "epoch": 42.72, "learning_rate": 1.6582524271844662e-05, "loss": 0.0143, "step": 4400 }, { "epoch": 42.82, "learning_rate": 1.65747572815534e-05, "loss": 0.014, "step": 4410 }, { "epoch": 42.91, "learning_rate": 1.6566990291262137e-05, "loss": 0.0886, "step": 4420 }, { "epoch": 43.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03256027027964592, "eval_runtime": 0.9428, "eval_samples_per_second": 154.855, "eval_steps_per_second": 20.152, "step": 4429 }, { "epoch": 43.01, "learning_rate": 1.6559223300970876e-05, "loss": 0.0291, "step": 4430 }, { "epoch": 43.11, "learning_rate": 1.6551456310679612e-05, "loss": 0.0058, "step": 4440 }, { "epoch": 43.2, "learning_rate": 1.654368932038835e-05, "loss": 0.0331, "step": 4450 }, { "epoch": 43.3, "learning_rate": 1.653592233009709e-05, "loss": 0.2086, "step": 4460 }, { "epoch": 43.4, "learning_rate": 1.6528155339805826e-05, "loss": 0.0081, "step": 4470 }, { "epoch": 43.5, "learning_rate": 1.6520388349514565e-05, "loss": 0.0051, "step": 4480 }, { "epoch": 43.59, "learning_rate": 1.65126213592233e-05, "loss": 0.2006, "step": 4490 }, { "epoch": 43.69, "learning_rate": 1.650485436893204e-05, "loss": 0.0789, "step": 4500 }, { "epoch": 43.79, "learning_rate": 1.6497087378640776e-05, "loss": 0.0571, "step": 4510 }, { "epoch": 43.88, "learning_rate": 1.6489320388349515e-05, "loss": 0.0509, "step": 4520 }, { "epoch": 43.98, "learning_rate": 1.6481553398058254e-05, "loss": 0.0254, "step": 4530 }, { "epoch": 44.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03951861709356308, "eval_runtime": 0.9204, "eval_samples_per_second": 158.634, "eval_steps_per_second": 20.644, "step": 4532 }, { "epoch": 44.08, "learning_rate": 1.647378640776699e-05, "loss": 0.0554, "step": 4540 }, { "epoch": 44.17, "learning_rate": 1.646601941747573e-05, "loss": 0.1147, "step": 4550 }, { "epoch": 44.27, "learning_rate": 1.645825242718447e-05, "loss": 0.0991, "step": 4560 }, { "epoch": 44.37, "learning_rate": 1.6450485436893204e-05, "loss": 0.0387, "step": 4570 }, { "epoch": 44.47, "learning_rate": 1.6442718446601943e-05, "loss": 0.0065, "step": 4580 }, { "epoch": 44.56, "learning_rate": 1.6434951456310683e-05, "loss": 0.0062, "step": 4590 }, { "epoch": 44.66, "learning_rate": 1.642718446601942e-05, "loss": 0.0337, "step": 4600 }, { "epoch": 44.76, "learning_rate": 1.6419417475728158e-05, "loss": 0.0792, "step": 4610 }, { "epoch": 44.85, "learning_rate": 1.6411650485436893e-05, "loss": 0.1339, "step": 4620 }, { "epoch": 44.95, "learning_rate": 1.6403883495145633e-05, "loss": 0.0087, "step": 4630 }, { "epoch": 45.0, "eval_accuracy": 0.958904109589041, "eval_loss": 0.1797976791858673, "eval_runtime": 0.9137, "eval_samples_per_second": 159.792, "eval_steps_per_second": 20.795, "step": 4635 }, { "epoch": 45.05, "learning_rate": 1.6396116504854372e-05, "loss": 0.0471, "step": 4640 }, { "epoch": 45.15, "learning_rate": 1.6388349514563108e-05, "loss": 0.2771, "step": 4650 }, { "epoch": 45.24, "learning_rate": 1.6380582524271847e-05, "loss": 0.0076, "step": 4660 }, { "epoch": 45.34, "learning_rate": 1.6372815533980583e-05, "loss": 0.0712, "step": 4670 }, { "epoch": 45.44, "learning_rate": 1.6365048543689322e-05, "loss": 0.0652, "step": 4680 }, { "epoch": 45.53, "learning_rate": 1.6357281553398058e-05, "loss": 0.0174, "step": 4690 }, { "epoch": 45.63, "learning_rate": 1.6349514563106797e-05, "loss": 0.0869, "step": 4700 }, { "epoch": 45.73, "learning_rate": 1.6341747572815536e-05, "loss": 0.0263, "step": 4710 }, { "epoch": 45.83, "learning_rate": 1.6333980582524275e-05, "loss": 0.1847, "step": 4720 }, { "epoch": 45.92, "learning_rate": 1.632621359223301e-05, "loss": 0.0736, "step": 4730 }, { "epoch": 46.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.0322740375995636, "eval_runtime": 0.9527, "eval_samples_per_second": 153.246, "eval_steps_per_second": 19.943, "step": 4738 }, { "epoch": 46.02, "learning_rate": 1.631844660194175e-05, "loss": 0.0529, "step": 4740 }, { "epoch": 46.12, "learning_rate": 1.6310679611650486e-05, "loss": 0.0063, "step": 4750 }, { "epoch": 46.21, "learning_rate": 1.6302912621359225e-05, "loss": 0.0516, "step": 4760 }, { "epoch": 46.31, "learning_rate": 1.6295145631067964e-05, "loss": 0.0302, "step": 4770 }, { "epoch": 46.41, "learning_rate": 1.62873786407767e-05, "loss": 0.0122, "step": 4780 }, { "epoch": 46.5, "learning_rate": 1.627961165048544e-05, "loss": 0.1734, "step": 4790 }, { "epoch": 46.6, "learning_rate": 1.6271844660194175e-05, "loss": 0.0983, "step": 4800 }, { "epoch": 46.7, "learning_rate": 1.6264077669902914e-05, "loss": 0.1147, "step": 4810 }, { "epoch": 46.8, "learning_rate": 1.625631067961165e-05, "loss": 0.0177, "step": 4820 }, { "epoch": 46.89, "learning_rate": 1.624854368932039e-05, "loss": 0.0644, "step": 4830 }, { "epoch": 46.99, "learning_rate": 1.624077669902913e-05, "loss": 0.0427, "step": 4840 }, { "epoch": 47.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03598255664110184, "eval_runtime": 0.9233, "eval_samples_per_second": 158.128, "eval_steps_per_second": 20.578, "step": 4841 }, { "epoch": 47.09, "learning_rate": 1.6233009708737864e-05, "loss": 0.0146, "step": 4850 }, { "epoch": 47.18, "learning_rate": 1.6225242718446603e-05, "loss": 0.0297, "step": 4860 }, { "epoch": 47.28, "learning_rate": 1.621747572815534e-05, "loss": 0.0042, "step": 4870 }, { "epoch": 47.38, "learning_rate": 1.620970873786408e-05, "loss": 0.0157, "step": 4880 }, { "epoch": 47.48, "learning_rate": 1.6201941747572814e-05, "loss": 0.0753, "step": 4890 }, { "epoch": 47.57, "learning_rate": 1.6194174757281557e-05, "loss": 0.1388, "step": 4900 }, { "epoch": 47.67, "learning_rate": 1.6186407766990293e-05, "loss": 0.0326, "step": 4910 }, { "epoch": 47.77, "learning_rate": 1.6178640776699032e-05, "loss": 0.1892, "step": 4920 }, { "epoch": 47.86, "learning_rate": 1.6170873786407768e-05, "loss": 0.0573, "step": 4930 }, { "epoch": 47.96, "learning_rate": 1.6163106796116507e-05, "loss": 0.0322, "step": 4940 }, { "epoch": 48.0, "eval_accuracy": 0.9657534246575342, "eval_loss": 0.05355680733919144, "eval_runtime": 0.9463, "eval_samples_per_second": 154.291, "eval_steps_per_second": 20.079, "step": 4944 }, { "epoch": 48.06, "learning_rate": 1.6155339805825243e-05, "loss": 0.1167, "step": 4950 }, { "epoch": 48.16, "learning_rate": 1.6147572815533982e-05, "loss": 0.0333, "step": 4960 }, { "epoch": 48.25, "learning_rate": 1.613980582524272e-05, "loss": 0.2643, "step": 4970 }, { "epoch": 48.35, "learning_rate": 1.6132038834951457e-05, "loss": 0.0314, "step": 4980 }, { "epoch": 48.45, "learning_rate": 1.6124271844660196e-05, "loss": 0.0281, "step": 4990 }, { "epoch": 48.54, "learning_rate": 1.6116504854368932e-05, "loss": 0.0865, "step": 5000 }, { "epoch": 48.64, "learning_rate": 1.610873786407767e-05, "loss": 0.1104, "step": 5010 }, { "epoch": 48.74, "learning_rate": 1.610097087378641e-05, "loss": 0.0059, "step": 5020 }, { "epoch": 48.83, "learning_rate": 1.6093203883495146e-05, "loss": 0.0419, "step": 5030 }, { "epoch": 48.93, "learning_rate": 1.6085436893203885e-05, "loss": 0.0499, "step": 5040 }, { "epoch": 49.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.035887837409973145, "eval_runtime": 0.9776, "eval_samples_per_second": 149.343, "eval_steps_per_second": 19.435, "step": 5047 }, { "epoch": 49.03, "learning_rate": 1.6077669902912624e-05, "loss": 0.062, "step": 5050 }, { "epoch": 49.13, "learning_rate": 1.606990291262136e-05, "loss": 0.0114, "step": 5060 }, { "epoch": 49.22, "learning_rate": 1.60621359223301e-05, "loss": 0.1428, "step": 5070 }, { "epoch": 49.32, "learning_rate": 1.6054368932038835e-05, "loss": 0.0259, "step": 5080 }, { "epoch": 49.42, "learning_rate": 1.6046601941747574e-05, "loss": 0.0047, "step": 5090 }, { "epoch": 49.51, "learning_rate": 1.6038834951456313e-05, "loss": 0.0825, "step": 5100 }, { "epoch": 49.61, "learning_rate": 1.603106796116505e-05, "loss": 0.1709, "step": 5110 }, { "epoch": 49.71, "learning_rate": 1.602330097087379e-05, "loss": 0.0343, "step": 5120 }, { "epoch": 49.81, "learning_rate": 1.6015533980582524e-05, "loss": 0.0675, "step": 5130 }, { "epoch": 49.9, "learning_rate": 1.6007766990291263e-05, "loss": 0.0356, "step": 5140 }, { "epoch": 50.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.0234, "step": 5150 }, { "epoch": 50.0, "eval_accuracy": 1.0, "eval_loss": 0.01597742550075054, "eval_runtime": 0.9767, "eval_samples_per_second": 149.48, "eval_steps_per_second": 19.453, "step": 5150 }, { "epoch": 50.1, "learning_rate": 1.599223300970874e-05, "loss": 0.1435, "step": 5160 }, { "epoch": 50.19, "learning_rate": 1.5984466019417478e-05, "loss": 0.1082, "step": 5170 }, { "epoch": 50.29, "learning_rate": 1.5976699029126213e-05, "loss": 0.0888, "step": 5180 }, { "epoch": 50.39, "learning_rate": 1.5968932038834953e-05, "loss": 0.0825, "step": 5190 }, { "epoch": 50.49, "learning_rate": 1.596116504854369e-05, "loss": 0.1063, "step": 5200 }, { "epoch": 50.58, "learning_rate": 1.595339805825243e-05, "loss": 0.0379, "step": 5210 }, { "epoch": 50.68, "learning_rate": 1.5945631067961167e-05, "loss": 0.0053, "step": 5220 }, { "epoch": 50.78, "learning_rate": 1.5937864077669906e-05, "loss": 0.0691, "step": 5230 }, { "epoch": 50.87, "learning_rate": 1.5930097087378642e-05, "loss": 0.0763, "step": 5240 }, { "epoch": 50.97, "learning_rate": 1.592233009708738e-05, "loss": 0.0896, "step": 5250 }, { "epoch": 51.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.054553717374801636, "eval_runtime": 0.9405, "eval_samples_per_second": 155.229, "eval_steps_per_second": 20.201, "step": 5253 }, { "epoch": 51.07, "learning_rate": 1.5914563106796117e-05, "loss": 0.0095, "step": 5260 }, { "epoch": 51.17, "learning_rate": 1.5906796116504856e-05, "loss": 0.0832, "step": 5270 }, { "epoch": 51.26, "learning_rate": 1.5899029126213595e-05, "loss": 0.0357, "step": 5280 }, { "epoch": 51.36, "learning_rate": 1.589126213592233e-05, "loss": 0.0705, "step": 5290 }, { "epoch": 51.46, "learning_rate": 1.588349514563107e-05, "loss": 0.0749, "step": 5300 }, { "epoch": 51.55, "learning_rate": 1.5875728155339806e-05, "loss": 0.0785, "step": 5310 }, { "epoch": 51.65, "learning_rate": 1.5867961165048545e-05, "loss": 0.0107, "step": 5320 }, { "epoch": 51.75, "learning_rate": 1.586019417475728e-05, "loss": 0.0292, "step": 5330 }, { "epoch": 51.84, "learning_rate": 1.585242718446602e-05, "loss": 0.0349, "step": 5340 }, { "epoch": 51.94, "learning_rate": 1.584466019417476e-05, "loss": 0.1089, "step": 5350 }, { "epoch": 52.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.06735321879386902, "eval_runtime": 0.9363, "eval_samples_per_second": 155.931, "eval_steps_per_second": 20.292, "step": 5356 }, { "epoch": 52.04, "learning_rate": 1.5836893203883495e-05, "loss": 0.0729, "step": 5360 }, { "epoch": 52.14, "learning_rate": 1.5829126213592234e-05, "loss": 0.226, "step": 5370 }, { "epoch": 52.23, "learning_rate": 1.582135922330097e-05, "loss": 0.1495, "step": 5380 }, { "epoch": 52.33, "learning_rate": 1.581359223300971e-05, "loss": 0.1037, "step": 5390 }, { "epoch": 52.43, "learning_rate": 1.580582524271845e-05, "loss": 0.0258, "step": 5400 }, { "epoch": 52.52, "learning_rate": 1.5798058252427188e-05, "loss": 0.0822, "step": 5410 }, { "epoch": 52.62, "learning_rate": 1.5790291262135923e-05, "loss": 0.0039, "step": 5420 }, { "epoch": 52.72, "learning_rate": 1.5782524271844663e-05, "loss": 0.1656, "step": 5430 }, { "epoch": 52.82, "learning_rate": 1.57747572815534e-05, "loss": 0.0786, "step": 5440 }, { "epoch": 52.91, "learning_rate": 1.5766990291262138e-05, "loss": 0.1473, "step": 5450 }, { "epoch": 53.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03693979233503342, "eval_runtime": 0.9352, "eval_samples_per_second": 156.119, "eval_steps_per_second": 20.317, "step": 5459 }, { "epoch": 53.01, "learning_rate": 1.5759223300970877e-05, "loss": 0.1319, "step": 5460 }, { "epoch": 53.11, "learning_rate": 1.5751456310679613e-05, "loss": 0.0548, "step": 5470 }, { "epoch": 53.2, "learning_rate": 1.5743689320388352e-05, "loss": 0.1589, "step": 5480 }, { "epoch": 53.3, "learning_rate": 1.5735922330097088e-05, "loss": 0.0182, "step": 5490 }, { "epoch": 53.4, "learning_rate": 1.5728155339805827e-05, "loss": 0.0687, "step": 5500 }, { "epoch": 53.5, "learning_rate": 1.5720388349514563e-05, "loss": 0.0477, "step": 5510 }, { "epoch": 53.59, "learning_rate": 1.5712621359223302e-05, "loss": 0.0341, "step": 5520 }, { "epoch": 53.69, "learning_rate": 1.570485436893204e-05, "loss": 0.0338, "step": 5530 }, { "epoch": 53.79, "learning_rate": 1.5697087378640777e-05, "loss": 0.0927, "step": 5540 }, { "epoch": 53.88, "learning_rate": 1.5689320388349516e-05, "loss": 0.119, "step": 5550 }, { "epoch": 53.98, "learning_rate": 1.5681553398058255e-05, "loss": 0.0346, "step": 5560 }, { "epoch": 54.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.03953592851758003, "eval_runtime": 0.9234, "eval_samples_per_second": 158.116, "eval_steps_per_second": 20.577, "step": 5562 }, { "epoch": 54.08, "learning_rate": 1.567378640776699e-05, "loss": 0.0527, "step": 5570 }, { "epoch": 54.17, "learning_rate": 1.566601941747573e-05, "loss": 0.17, "step": 5580 }, { "epoch": 54.27, "learning_rate": 1.565825242718447e-05, "loss": 0.0102, "step": 5590 }, { "epoch": 54.37, "learning_rate": 1.5650485436893205e-05, "loss": 0.0073, "step": 5600 }, { "epoch": 54.47, "learning_rate": 1.5642718446601944e-05, "loss": 0.0288, "step": 5610 }, { "epoch": 54.56, "learning_rate": 1.563495145631068e-05, "loss": 0.0455, "step": 5620 }, { "epoch": 54.66, "learning_rate": 1.562718446601942e-05, "loss": 0.0343, "step": 5630 }, { "epoch": 54.76, "learning_rate": 1.5619417475728155e-05, "loss": 0.0179, "step": 5640 }, { "epoch": 54.85, "learning_rate": 1.5611650485436894e-05, "loss": 0.0116, "step": 5650 }, { "epoch": 54.95, "learning_rate": 1.5603883495145634e-05, "loss": 0.1742, "step": 5660 }, { "epoch": 55.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03901122882962227, "eval_runtime": 0.9237, "eval_samples_per_second": 158.058, "eval_steps_per_second": 20.569, "step": 5665 }, { "epoch": 55.05, "learning_rate": 1.559611650485437e-05, "loss": 0.0043, "step": 5670 }, { "epoch": 55.15, "learning_rate": 1.558834951456311e-05, "loss": 0.0422, "step": 5680 }, { "epoch": 55.24, "learning_rate": 1.5580582524271844e-05, "loss": 0.0032, "step": 5690 }, { "epoch": 55.34, "learning_rate": 1.5572815533980583e-05, "loss": 0.0043, "step": 5700 }, { "epoch": 55.44, "learning_rate": 1.556504854368932e-05, "loss": 0.099, "step": 5710 }, { "epoch": 55.53, "learning_rate": 1.5557281553398062e-05, "loss": 0.0195, "step": 5720 }, { "epoch": 55.63, "learning_rate": 1.5549514563106798e-05, "loss": 0.1299, "step": 5730 }, { "epoch": 55.73, "learning_rate": 1.5541747572815537e-05, "loss": 0.0038, "step": 5740 }, { "epoch": 55.83, "learning_rate": 1.5533980582524273e-05, "loss": 0.0479, "step": 5750 }, { "epoch": 55.92, "learning_rate": 1.5526213592233012e-05, "loss": 0.0788, "step": 5760 }, { "epoch": 56.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.055711355060338974, "eval_runtime": 0.9412, "eval_samples_per_second": 155.121, "eval_steps_per_second": 20.187, "step": 5768 }, { "epoch": 56.02, "learning_rate": 1.5518446601941748e-05, "loss": 0.1459, "step": 5770 }, { "epoch": 56.12, "learning_rate": 1.5510679611650487e-05, "loss": 0.067, "step": 5780 }, { "epoch": 56.21, "learning_rate": 1.5502912621359226e-05, "loss": 0.056, "step": 5790 }, { "epoch": 56.31, "learning_rate": 1.5495145631067962e-05, "loss": 0.0233, "step": 5800 }, { "epoch": 56.41, "learning_rate": 1.54873786407767e-05, "loss": 0.1362, "step": 5810 }, { "epoch": 56.5, "learning_rate": 1.5479611650485437e-05, "loss": 0.092, "step": 5820 }, { "epoch": 56.6, "learning_rate": 1.5471844660194176e-05, "loss": 0.0779, "step": 5830 }, { "epoch": 56.7, "learning_rate": 1.5464077669902915e-05, "loss": 0.0401, "step": 5840 }, { "epoch": 56.8, "learning_rate": 1.545631067961165e-05, "loss": 0.0743, "step": 5850 }, { "epoch": 56.89, "learning_rate": 1.544854368932039e-05, "loss": 0.0751, "step": 5860 }, { "epoch": 56.99, "learning_rate": 1.5440776699029126e-05, "loss": 0.12, "step": 5870 }, { "epoch": 57.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.04083726927638054, "eval_runtime": 0.9215, "eval_samples_per_second": 158.439, "eval_steps_per_second": 20.619, "step": 5871 }, { "epoch": 57.09, "learning_rate": 1.5433009708737865e-05, "loss": 0.0554, "step": 5880 }, { "epoch": 57.18, "learning_rate": 1.54252427184466e-05, "loss": 0.0277, "step": 5890 }, { "epoch": 57.28, "learning_rate": 1.541747572815534e-05, "loss": 0.0297, "step": 5900 }, { "epoch": 57.38, "learning_rate": 1.540970873786408e-05, "loss": 0.0304, "step": 5910 }, { "epoch": 57.48, "learning_rate": 1.540194174757282e-05, "loss": 0.1162, "step": 5920 }, { "epoch": 57.57, "learning_rate": 1.5394174757281554e-05, "loss": 0.0031, "step": 5930 }, { "epoch": 57.67, "learning_rate": 1.5386407766990294e-05, "loss": 0.0263, "step": 5940 }, { "epoch": 57.77, "learning_rate": 1.537864077669903e-05, "loss": 0.0255, "step": 5950 }, { "epoch": 57.86, "learning_rate": 1.537087378640777e-05, "loss": 0.1066, "step": 5960 }, { "epoch": 57.96, "learning_rate": 1.5363106796116508e-05, "loss": 0.0952, "step": 5970 }, { "epoch": 58.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.04665667563676834, "eval_runtime": 0.9277, "eval_samples_per_second": 157.377, "eval_steps_per_second": 20.481, "step": 5974 }, { "epoch": 58.06, "learning_rate": 1.5355339805825243e-05, "loss": 0.0915, "step": 5980 }, { "epoch": 58.16, "learning_rate": 1.5347572815533983e-05, "loss": 0.0818, "step": 5990 }, { "epoch": 58.25, "learning_rate": 1.533980582524272e-05, "loss": 0.1942, "step": 6000 }, { "epoch": 58.35, "learning_rate": 1.5332038834951458e-05, "loss": 0.0257, "step": 6010 }, { "epoch": 58.45, "learning_rate": 1.5324271844660193e-05, "loss": 0.2663, "step": 6020 }, { "epoch": 58.54, "learning_rate": 1.5316504854368933e-05, "loss": 0.1124, "step": 6030 }, { "epoch": 58.64, "learning_rate": 1.5308737864077672e-05, "loss": 0.1751, "step": 6040 }, { "epoch": 58.74, "learning_rate": 1.5300970873786408e-05, "loss": 0.06, "step": 6050 }, { "epoch": 58.83, "learning_rate": 1.5293203883495147e-05, "loss": 0.069, "step": 6060 }, { "epoch": 58.93, "learning_rate": 1.5285436893203886e-05, "loss": 0.0119, "step": 6070 }, { "epoch": 59.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.09355130046606064, "eval_runtime": 0.9076, "eval_samples_per_second": 160.86, "eval_steps_per_second": 20.934, "step": 6077 }, { "epoch": 59.03, "learning_rate": 1.5277669902912622e-05, "loss": 0.0037, "step": 6080 }, { "epoch": 59.13, "learning_rate": 1.526990291262136e-05, "loss": 0.0265, "step": 6090 }, { "epoch": 59.22, "learning_rate": 1.52621359223301e-05, "loss": 0.0376, "step": 6100 }, { "epoch": 59.32, "learning_rate": 1.5254368932038836e-05, "loss": 0.0088, "step": 6110 }, { "epoch": 59.42, "learning_rate": 1.5246601941747573e-05, "loss": 0.0406, "step": 6120 }, { "epoch": 59.51, "learning_rate": 1.5238834951456311e-05, "loss": 0.2535, "step": 6130 }, { "epoch": 59.61, "learning_rate": 1.5231067961165048e-05, "loss": 0.0168, "step": 6140 }, { "epoch": 59.71, "learning_rate": 1.5223300970873786e-05, "loss": 0.0041, "step": 6150 }, { "epoch": 59.81, "learning_rate": 1.5215533980582527e-05, "loss": 0.0073, "step": 6160 }, { "epoch": 59.9, "learning_rate": 1.5207766990291264e-05, "loss": 0.0921, "step": 6170 }, { "epoch": 60.0, "learning_rate": 1.5200000000000002e-05, "loss": 0.0381, "step": 6180 }, { "epoch": 60.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.058126527816057205, "eval_runtime": 0.9237, "eval_samples_per_second": 158.052, "eval_steps_per_second": 20.568, "step": 6180 }, { "epoch": 60.1, "learning_rate": 1.519223300970874e-05, "loss": 0.0028, "step": 6190 }, { "epoch": 60.19, "learning_rate": 1.5184466019417477e-05, "loss": 0.0349, "step": 6200 }, { "epoch": 60.29, "learning_rate": 1.5176699029126214e-05, "loss": 0.0037, "step": 6210 }, { "epoch": 60.39, "learning_rate": 1.5168932038834954e-05, "loss": 0.0028, "step": 6220 }, { "epoch": 60.49, "learning_rate": 1.5161165048543691e-05, "loss": 0.0764, "step": 6230 }, { "epoch": 60.58, "learning_rate": 1.5153398058252429e-05, "loss": 0.1693, "step": 6240 }, { "epoch": 60.68, "learning_rate": 1.5145631067961166e-05, "loss": 0.0965, "step": 6250 }, { "epoch": 60.78, "learning_rate": 1.5137864077669904e-05, "loss": 0.0026, "step": 6260 }, { "epoch": 60.87, "learning_rate": 1.5130097087378641e-05, "loss": 0.0157, "step": 6270 }, { "epoch": 60.97, "learning_rate": 1.512233009708738e-05, "loss": 0.0147, "step": 6280 }, { "epoch": 61.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.08475180715322495, "eval_runtime": 0.9312, "eval_samples_per_second": 156.787, "eval_steps_per_second": 20.404, "step": 6283 }, { "epoch": 61.07, "learning_rate": 1.5114563106796118e-05, "loss": 0.0033, "step": 6290 }, { "epoch": 61.17, "learning_rate": 1.5106796116504855e-05, "loss": 0.0149, "step": 6300 }, { "epoch": 61.26, "learning_rate": 1.5099029126213593e-05, "loss": 0.0366, "step": 6310 }, { "epoch": 61.36, "learning_rate": 1.5091262135922332e-05, "loss": 0.0854, "step": 6320 }, { "epoch": 61.46, "learning_rate": 1.508349514563107e-05, "loss": 0.0727, "step": 6330 }, { "epoch": 61.55, "learning_rate": 1.5075728155339807e-05, "loss": 0.0502, "step": 6340 }, { "epoch": 61.65, "learning_rate": 1.5067961165048546e-05, "loss": 0.0878, "step": 6350 }, { "epoch": 61.75, "learning_rate": 1.5060194174757284e-05, "loss": 0.0988, "step": 6360 }, { "epoch": 61.84, "learning_rate": 1.5052427184466021e-05, "loss": 0.0054, "step": 6370 }, { "epoch": 61.94, "learning_rate": 1.5044660194174759e-05, "loss": 0.028, "step": 6380 }, { "epoch": 62.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.0554812066257, "eval_runtime": 0.9252, "eval_samples_per_second": 157.804, "eval_steps_per_second": 20.536, "step": 6386 }, { "epoch": 62.04, "learning_rate": 1.5036893203883496e-05, "loss": 0.0038, "step": 6390 }, { "epoch": 62.14, "learning_rate": 1.5029126213592234e-05, "loss": 0.1136, "step": 6400 }, { "epoch": 62.23, "learning_rate": 1.5021359223300973e-05, "loss": 0.0329, "step": 6410 }, { "epoch": 62.33, "learning_rate": 1.501359223300971e-05, "loss": 0.2072, "step": 6420 }, { "epoch": 62.43, "learning_rate": 1.5005825242718448e-05, "loss": 0.028, "step": 6430 }, { "epoch": 62.52, "learning_rate": 1.4998058252427185e-05, "loss": 0.2947, "step": 6440 }, { "epoch": 62.62, "learning_rate": 1.4990291262135923e-05, "loss": 0.0525, "step": 6450 }, { "epoch": 62.72, "learning_rate": 1.498252427184466e-05, "loss": 0.0433, "step": 6460 }, { "epoch": 62.82, "learning_rate": 1.4974757281553401e-05, "loss": 0.0054, "step": 6470 }, { "epoch": 62.91, "learning_rate": 1.4966990291262139e-05, "loss": 0.0108, "step": 6480 }, { "epoch": 63.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.02103378064930439, "eval_runtime": 0.9293, "eval_samples_per_second": 157.104, "eval_steps_per_second": 20.445, "step": 6489 }, { "epoch": 63.01, "learning_rate": 1.4959223300970876e-05, "loss": 0.0574, "step": 6490 }, { "epoch": 63.11, "learning_rate": 1.4951456310679614e-05, "loss": 0.1288, "step": 6500 }, { "epoch": 63.2, "learning_rate": 1.4943689320388351e-05, "loss": 0.0043, "step": 6510 }, { "epoch": 63.3, "learning_rate": 1.4935922330097089e-05, "loss": 0.0602, "step": 6520 }, { "epoch": 63.4, "learning_rate": 1.4928155339805826e-05, "loss": 0.087, "step": 6530 }, { "epoch": 63.5, "learning_rate": 1.4920388349514565e-05, "loss": 0.0581, "step": 6540 }, { "epoch": 63.59, "learning_rate": 1.4912621359223303e-05, "loss": 0.0797, "step": 6550 }, { "epoch": 63.69, "learning_rate": 1.490485436893204e-05, "loss": 0.1079, "step": 6560 }, { "epoch": 63.79, "learning_rate": 1.4897087378640778e-05, "loss": 0.0026, "step": 6570 }, { "epoch": 63.88, "learning_rate": 1.4889320388349515e-05, "loss": 0.004, "step": 6580 }, { "epoch": 63.98, "learning_rate": 1.4881553398058253e-05, "loss": 0.0845, "step": 6590 }, { "epoch": 64.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.01815168187022209, "eval_runtime": 0.957, "eval_samples_per_second": 152.56, "eval_steps_per_second": 19.854, "step": 6592 }, { "epoch": 64.08, "learning_rate": 1.4873786407766992e-05, "loss": 0.0624, "step": 6600 }, { "epoch": 64.17, "learning_rate": 1.486601941747573e-05, "loss": 0.0056, "step": 6610 }, { "epoch": 64.27, "learning_rate": 1.4858252427184467e-05, "loss": 0.0028, "step": 6620 }, { "epoch": 64.37, "learning_rate": 1.4850485436893204e-05, "loss": 0.0151, "step": 6630 }, { "epoch": 64.47, "learning_rate": 1.4842718446601942e-05, "loss": 0.0057, "step": 6640 }, { "epoch": 64.56, "learning_rate": 1.483495145631068e-05, "loss": 0.0037, "step": 6650 }, { "epoch": 64.66, "learning_rate": 1.482718446601942e-05, "loss": 0.0025, "step": 6660 }, { "epoch": 64.76, "learning_rate": 1.4819417475728158e-05, "loss": 0.0439, "step": 6670 }, { "epoch": 64.85, "learning_rate": 1.4811650485436895e-05, "loss": 0.0049, "step": 6680 }, { "epoch": 64.95, "learning_rate": 1.4803883495145633e-05, "loss": 0.0027, "step": 6690 }, { "epoch": 65.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.021542595699429512, "eval_runtime": 0.9233, "eval_samples_per_second": 158.125, "eval_steps_per_second": 20.578, "step": 6695 }, { "epoch": 65.05, "learning_rate": 1.479611650485437e-05, "loss": 0.0547, "step": 6700 }, { "epoch": 65.15, "learning_rate": 1.4788349514563108e-05, "loss": 0.0319, "step": 6710 }, { "epoch": 65.24, "learning_rate": 1.4780582524271845e-05, "loss": 0.0903, "step": 6720 }, { "epoch": 65.34, "learning_rate": 1.4772815533980584e-05, "loss": 0.1014, "step": 6730 }, { "epoch": 65.44, "learning_rate": 1.4765048543689322e-05, "loss": 0.0021, "step": 6740 }, { "epoch": 65.53, "learning_rate": 1.475728155339806e-05, "loss": 0.0146, "step": 6750 }, { "epoch": 65.63, "learning_rate": 1.4749514563106797e-05, "loss": 0.0188, "step": 6760 }, { "epoch": 65.73, "learning_rate": 1.4741747572815534e-05, "loss": 0.1414, "step": 6770 }, { "epoch": 65.83, "learning_rate": 1.4733980582524272e-05, "loss": 0.0898, "step": 6780 }, { "epoch": 65.92, "learning_rate": 1.4726213592233011e-05, "loss": 0.0852, "step": 6790 }, { "epoch": 66.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.03675241023302078, "eval_runtime": 0.9838, "eval_samples_per_second": 148.411, "eval_steps_per_second": 19.314, "step": 6798 }, { "epoch": 66.02, "learning_rate": 1.4718446601941749e-05, "loss": 0.0604, "step": 6800 }, { "epoch": 66.12, "learning_rate": 1.4710679611650486e-05, "loss": 0.0943, "step": 6810 }, { "epoch": 66.21, "learning_rate": 1.4702912621359225e-05, "loss": 0.1165, "step": 6820 }, { "epoch": 66.31, "learning_rate": 1.4695145631067963e-05, "loss": 0.0357, "step": 6830 }, { "epoch": 66.41, "learning_rate": 1.46873786407767e-05, "loss": 0.0481, "step": 6840 }, { "epoch": 66.5, "learning_rate": 1.467961165048544e-05, "loss": 0.0041, "step": 6850 }, { "epoch": 66.6, "learning_rate": 1.4671844660194177e-05, "loss": 0.019, "step": 6860 }, { "epoch": 66.7, "learning_rate": 1.4664077669902914e-05, "loss": 0.0022, "step": 6870 }, { "epoch": 66.8, "learning_rate": 1.4656310679611652e-05, "loss": 0.0822, "step": 6880 }, { "epoch": 66.89, "learning_rate": 1.464854368932039e-05, "loss": 0.0585, "step": 6890 }, { "epoch": 66.99, "learning_rate": 1.4640776699029127e-05, "loss": 0.0022, "step": 6900 }, { "epoch": 67.0, "eval_accuracy": 0.958904109589041, "eval_loss": 0.16548927128314972, "eval_runtime": 0.927, "eval_samples_per_second": 157.494, "eval_steps_per_second": 20.496, "step": 6901 }, { "epoch": 67.09, "learning_rate": 1.4633009708737864e-05, "loss": 0.0252, "step": 6910 }, { "epoch": 67.18, "learning_rate": 1.4625242718446604e-05, "loss": 0.1552, "step": 6920 }, { "epoch": 67.28, "learning_rate": 1.4617475728155341e-05, "loss": 0.0023, "step": 6930 }, { "epoch": 67.38, "learning_rate": 1.4609708737864079e-05, "loss": 0.0022, "step": 6940 }, { "epoch": 67.48, "learning_rate": 1.4601941747572816e-05, "loss": 0.0952, "step": 6950 }, { "epoch": 67.57, "learning_rate": 1.4594174757281554e-05, "loss": 0.0205, "step": 6960 }, { "epoch": 67.67, "learning_rate": 1.4586407766990291e-05, "loss": 0.0487, "step": 6970 }, { "epoch": 67.77, "learning_rate": 1.4578640776699032e-05, "loss": 0.002, "step": 6980 }, { "epoch": 67.86, "learning_rate": 1.457087378640777e-05, "loss": 0.0832, "step": 6990 }, { "epoch": 67.96, "learning_rate": 1.4563106796116507e-05, "loss": 0.0757, "step": 7000 }, { "epoch": 68.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.034192681312561035, "eval_runtime": 0.8958, "eval_samples_per_second": 162.982, "eval_steps_per_second": 21.21, "step": 7004 }, { "epoch": 68.06, "learning_rate": 1.4555339805825244e-05, "loss": 0.0993, "step": 7010 }, { "epoch": 68.16, "learning_rate": 1.4547572815533982e-05, "loss": 0.0503, "step": 7020 }, { "epoch": 68.25, "learning_rate": 1.453980582524272e-05, "loss": 0.0605, "step": 7030 }, { "epoch": 68.35, "learning_rate": 1.4532038834951459e-05, "loss": 0.1539, "step": 7040 }, { "epoch": 68.45, "learning_rate": 1.4524271844660196e-05, "loss": 0.0486, "step": 7050 }, { "epoch": 68.54, "learning_rate": 1.4516504854368934e-05, "loss": 0.0159, "step": 7060 }, { "epoch": 68.64, "learning_rate": 1.4508737864077671e-05, "loss": 0.0608, "step": 7070 }, { "epoch": 68.74, "learning_rate": 1.4500970873786409e-05, "loss": 0.1876, "step": 7080 }, { "epoch": 68.83, "learning_rate": 1.4493203883495146e-05, "loss": 0.0214, "step": 7090 }, { "epoch": 68.93, "learning_rate": 1.4485436893203884e-05, "loss": 0.0823, "step": 7100 }, { "epoch": 69.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.02801397815346718, "eval_runtime": 0.9051, "eval_samples_per_second": 161.3, "eval_steps_per_second": 20.991, "step": 7107 }, { "epoch": 69.03, "learning_rate": 1.4477669902912623e-05, "loss": 0.0045, "step": 7110 }, { "epoch": 69.13, "learning_rate": 1.446990291262136e-05, "loss": 0.0027, "step": 7120 }, { "epoch": 69.22, "learning_rate": 1.4462135922330098e-05, "loss": 0.08, "step": 7130 }, { "epoch": 69.32, "learning_rate": 1.4454368932038835e-05, "loss": 0.1202, "step": 7140 }, { "epoch": 69.42, "learning_rate": 1.4446601941747573e-05, "loss": 0.0255, "step": 7150 }, { "epoch": 69.51, "learning_rate": 1.443883495145631e-05, "loss": 0.0071, "step": 7160 }, { "epoch": 69.61, "learning_rate": 1.4431067961165051e-05, "loss": 0.0873, "step": 7170 }, { "epoch": 69.71, "learning_rate": 1.4423300970873789e-05, "loss": 0.0846, "step": 7180 }, { "epoch": 69.81, "learning_rate": 1.4415533980582526e-05, "loss": 0.1357, "step": 7190 }, { "epoch": 69.9, "learning_rate": 1.4407766990291264e-05, "loss": 0.082, "step": 7200 }, { "epoch": 70.0, "learning_rate": 1.4400000000000001e-05, "loss": 0.1071, "step": 7210 }, { "epoch": 70.0, "eval_accuracy": 1.0, "eval_loss": 0.014005626551806927, "eval_runtime": 0.9022, "eval_samples_per_second": 161.82, "eval_steps_per_second": 21.059, "step": 7210 }, { "epoch": 70.1, "learning_rate": 1.4392233009708739e-05, "loss": 0.0503, "step": 7220 }, { "epoch": 70.19, "learning_rate": 1.4384466019417478e-05, "loss": 0.1342, "step": 7230 }, { "epoch": 70.29, "learning_rate": 1.4376699029126215e-05, "loss": 0.0553, "step": 7240 }, { "epoch": 70.39, "learning_rate": 1.4368932038834953e-05, "loss": 0.0535, "step": 7250 }, { "epoch": 70.49, "learning_rate": 1.436116504854369e-05, "loss": 0.0376, "step": 7260 }, { "epoch": 70.58, "learning_rate": 1.4353398058252428e-05, "loss": 0.0154, "step": 7270 }, { "epoch": 70.68, "learning_rate": 1.4345631067961165e-05, "loss": 0.0047, "step": 7280 }, { "epoch": 70.78, "learning_rate": 1.4337864077669904e-05, "loss": 0.0018, "step": 7290 }, { "epoch": 70.87, "learning_rate": 1.4330097087378642e-05, "loss": 0.0908, "step": 7300 }, { "epoch": 70.97, "learning_rate": 1.432233009708738e-05, "loss": 0.0832, "step": 7310 }, { "epoch": 71.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03868861868977547, "eval_runtime": 0.9162, "eval_samples_per_second": 159.35, "eval_steps_per_second": 20.737, "step": 7313 }, { "epoch": 71.07, "learning_rate": 1.4314563106796117e-05, "loss": 0.054, "step": 7320 }, { "epoch": 71.17, "learning_rate": 1.4306796116504856e-05, "loss": 0.0967, "step": 7330 }, { "epoch": 71.26, "learning_rate": 1.4299029126213594e-05, "loss": 0.0186, "step": 7340 }, { "epoch": 71.36, "learning_rate": 1.4291262135922331e-05, "loss": 0.0503, "step": 7350 }, { "epoch": 71.46, "learning_rate": 1.428349514563107e-05, "loss": 0.1007, "step": 7360 }, { "epoch": 71.55, "learning_rate": 1.4275728155339808e-05, "loss": 0.0525, "step": 7370 }, { "epoch": 71.65, "learning_rate": 1.4267961165048545e-05, "loss": 0.0239, "step": 7380 }, { "epoch": 71.75, "learning_rate": 1.4260194174757283e-05, "loss": 0.0964, "step": 7390 }, { "epoch": 71.84, "learning_rate": 1.425242718446602e-05, "loss": 0.0359, "step": 7400 }, { "epoch": 71.94, "learning_rate": 1.4244660194174758e-05, "loss": 0.0417, "step": 7410 }, { "epoch": 72.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.0697227343916893, "eval_runtime": 0.9023, "eval_samples_per_second": 161.815, "eval_steps_per_second": 21.058, "step": 7416 }, { "epoch": 72.04, "learning_rate": 1.4236893203883497e-05, "loss": 0.0933, "step": 7420 }, { "epoch": 72.14, "learning_rate": 1.4229126213592234e-05, "loss": 0.1749, "step": 7430 }, { "epoch": 72.23, "learning_rate": 1.4221359223300972e-05, "loss": 0.0145, "step": 7440 }, { "epoch": 72.33, "learning_rate": 1.421359223300971e-05, "loss": 0.0321, "step": 7450 }, { "epoch": 72.43, "learning_rate": 1.4205825242718447e-05, "loss": 0.0879, "step": 7460 }, { "epoch": 72.52, "learning_rate": 1.4198058252427184e-05, "loss": 0.0227, "step": 7470 }, { "epoch": 72.62, "learning_rate": 1.4190291262135925e-05, "loss": 0.1572, "step": 7480 }, { "epoch": 72.72, "learning_rate": 1.4182524271844663e-05, "loss": 0.0728, "step": 7490 }, { "epoch": 72.82, "learning_rate": 1.41747572815534e-05, "loss": 0.0045, "step": 7500 }, { "epoch": 72.91, "learning_rate": 1.4166990291262138e-05, "loss": 0.1208, "step": 7510 }, { "epoch": 73.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.07754257321357727, "eval_runtime": 0.8991, "eval_samples_per_second": 162.384, "eval_steps_per_second": 21.132, "step": 7519 }, { "epoch": 73.01, "learning_rate": 1.4159223300970875e-05, "loss": 0.0482, "step": 7520 }, { "epoch": 73.11, "learning_rate": 1.4151456310679613e-05, "loss": 0.0655, "step": 7530 }, { "epoch": 73.2, "learning_rate": 1.414368932038835e-05, "loss": 0.0787, "step": 7540 }, { "epoch": 73.3, "learning_rate": 1.413592233009709e-05, "loss": 0.0483, "step": 7550 }, { "epoch": 73.4, "learning_rate": 1.4128155339805827e-05, "loss": 0.068, "step": 7560 }, { "epoch": 73.5, "learning_rate": 1.4120388349514564e-05, "loss": 0.0395, "step": 7570 }, { "epoch": 73.59, "learning_rate": 1.4112621359223302e-05, "loss": 0.0644, "step": 7580 }, { "epoch": 73.69, "learning_rate": 1.410485436893204e-05, "loss": 0.0126, "step": 7590 }, { "epoch": 73.79, "learning_rate": 1.4097087378640777e-05, "loss": 0.0531, "step": 7600 }, { "epoch": 73.88, "learning_rate": 1.4089320388349516e-05, "loss": 0.0767, "step": 7610 }, { "epoch": 73.98, "learning_rate": 1.4081553398058254e-05, "loss": 0.0083, "step": 7620 }, { "epoch": 74.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.052464041858911514, "eval_runtime": 0.9146, "eval_samples_per_second": 159.624, "eval_steps_per_second": 20.773, "step": 7622 }, { "epoch": 74.08, "learning_rate": 1.4073786407766991e-05, "loss": 0.0115, "step": 7630 }, { "epoch": 74.17, "learning_rate": 1.4066019417475729e-05, "loss": 0.0252, "step": 7640 }, { "epoch": 74.27, "learning_rate": 1.4058252427184466e-05, "loss": 0.0033, "step": 7650 }, { "epoch": 74.37, "learning_rate": 1.4050485436893204e-05, "loss": 0.0883, "step": 7660 }, { "epoch": 74.47, "learning_rate": 1.4042718446601944e-05, "loss": 0.0103, "step": 7670 }, { "epoch": 74.56, "learning_rate": 1.4034951456310682e-05, "loss": 0.1416, "step": 7680 }, { "epoch": 74.66, "learning_rate": 1.402718446601942e-05, "loss": 0.0526, "step": 7690 }, { "epoch": 74.76, "learning_rate": 1.4019417475728157e-05, "loss": 0.0104, "step": 7700 }, { "epoch": 74.85, "learning_rate": 1.4011650485436894e-05, "loss": 0.1323, "step": 7710 }, { "epoch": 74.95, "learning_rate": 1.4003883495145632e-05, "loss": 0.0017, "step": 7720 }, { "epoch": 75.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.04071873798966408, "eval_runtime": 0.9191, "eval_samples_per_second": 158.853, "eval_steps_per_second": 20.673, "step": 7725 }, { "epoch": 75.05, "learning_rate": 1.399611650485437e-05, "loss": 0.0016, "step": 7730 }, { "epoch": 75.15, "learning_rate": 1.3988349514563109e-05, "loss": 0.0279, "step": 7740 }, { "epoch": 75.24, "learning_rate": 1.3980582524271846e-05, "loss": 0.0353, "step": 7750 }, { "epoch": 75.34, "learning_rate": 1.3972815533980584e-05, "loss": 0.0031, "step": 7760 }, { "epoch": 75.44, "learning_rate": 1.3965048543689321e-05, "loss": 0.0166, "step": 7770 }, { "epoch": 75.53, "learning_rate": 1.3957281553398059e-05, "loss": 0.0515, "step": 7780 }, { "epoch": 75.63, "learning_rate": 1.3949514563106796e-05, "loss": 0.0705, "step": 7790 }, { "epoch": 75.73, "learning_rate": 1.3941747572815535e-05, "loss": 0.1237, "step": 7800 }, { "epoch": 75.83, "learning_rate": 1.3933980582524273e-05, "loss": 0.087, "step": 7810 }, { "epoch": 75.92, "learning_rate": 1.392621359223301e-05, "loss": 0.012, "step": 7820 }, { "epoch": 76.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.0362648107111454, "eval_runtime": 0.9186, "eval_samples_per_second": 158.943, "eval_steps_per_second": 20.684, "step": 7828 }, { "epoch": 76.02, "learning_rate": 1.391844660194175e-05, "loss": 0.1469, "step": 7830 }, { "epoch": 76.12, "learning_rate": 1.3910679611650487e-05, "loss": 0.0019, "step": 7840 }, { "epoch": 76.21, "learning_rate": 1.3902912621359224e-05, "loss": 0.2058, "step": 7850 }, { "epoch": 76.31, "learning_rate": 1.3895145631067964e-05, "loss": 0.0152, "step": 7860 }, { "epoch": 76.41, "learning_rate": 1.3887378640776701e-05, "loss": 0.0499, "step": 7870 }, { "epoch": 76.5, "learning_rate": 1.3879611650485439e-05, "loss": 0.0561, "step": 7880 }, { "epoch": 76.6, "learning_rate": 1.3871844660194176e-05, "loss": 0.1247, "step": 7890 }, { "epoch": 76.7, "learning_rate": 1.3864077669902914e-05, "loss": 0.0401, "step": 7900 }, { "epoch": 76.8, "learning_rate": 1.3856310679611651e-05, "loss": 0.0112, "step": 7910 }, { "epoch": 76.89, "learning_rate": 1.3848543689320389e-05, "loss": 0.0092, "step": 7920 }, { "epoch": 76.99, "learning_rate": 1.3840776699029128e-05, "loss": 0.0215, "step": 7930 }, { "epoch": 77.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.030757028609514236, "eval_runtime": 0.9146, "eval_samples_per_second": 159.637, "eval_steps_per_second": 20.775, "step": 7931 }, { "epoch": 77.09, "learning_rate": 1.3833009708737865e-05, "loss": 0.1653, "step": 7940 }, { "epoch": 77.18, "learning_rate": 1.3825242718446603e-05, "loss": 0.0577, "step": 7950 }, { "epoch": 77.28, "learning_rate": 1.381747572815534e-05, "loss": 0.0645, "step": 7960 }, { "epoch": 77.38, "learning_rate": 1.3809708737864078e-05, "loss": 0.0362, "step": 7970 }, { "epoch": 77.48, "learning_rate": 1.3801941747572815e-05, "loss": 0.0155, "step": 7980 }, { "epoch": 77.57, "learning_rate": 1.3794174757281556e-05, "loss": 0.0902, "step": 7990 }, { "epoch": 77.67, "learning_rate": 1.3786407766990294e-05, "loss": 0.059, "step": 8000 }, { "epoch": 77.77, "learning_rate": 1.3778640776699031e-05, "loss": 0.0036, "step": 8010 }, { "epoch": 77.86, "learning_rate": 1.3770873786407769e-05, "loss": 0.0471, "step": 8020 }, { "epoch": 77.96, "learning_rate": 1.3763106796116506e-05, "loss": 0.0319, "step": 8030 }, { "epoch": 78.0, "eval_accuracy": 0.9726027397260274, "eval_loss": 0.05245841667056084, "eval_runtime": 0.9259, "eval_samples_per_second": 157.685, "eval_steps_per_second": 20.521, "step": 8034 }, { "epoch": 78.06, "learning_rate": 1.3755339805825244e-05, "loss": 0.0681, "step": 8040 }, { "epoch": 78.16, "learning_rate": 1.3747572815533983e-05, "loss": 0.0086, "step": 8050 }, { "epoch": 78.25, "learning_rate": 1.373980582524272e-05, "loss": 0.0723, "step": 8060 }, { "epoch": 78.35, "learning_rate": 1.3732038834951458e-05, "loss": 0.1384, "step": 8070 }, { "epoch": 78.45, "learning_rate": 1.3724271844660195e-05, "loss": 0.1407, "step": 8080 }, { "epoch": 78.54, "learning_rate": 1.3716504854368933e-05, "loss": 0.007, "step": 8090 }, { "epoch": 78.64, "learning_rate": 1.370873786407767e-05, "loss": 0.0528, "step": 8100 }, { "epoch": 78.74, "learning_rate": 1.370097087378641e-05, "loss": 0.0091, "step": 8110 }, { "epoch": 78.83, "learning_rate": 1.3693203883495147e-05, "loss": 0.033, "step": 8120 }, { "epoch": 78.93, "learning_rate": 1.3685436893203884e-05, "loss": 0.093, "step": 8130 }, { "epoch": 79.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.03234969452023506, "eval_runtime": 0.8975, "eval_samples_per_second": 162.673, "eval_steps_per_second": 21.17, "step": 8137 }, { "epoch": 79.03, "learning_rate": 1.3677669902912622e-05, "loss": 0.0629, "step": 8140 }, { "epoch": 79.13, "learning_rate": 1.366990291262136e-05, "loss": 0.0159, "step": 8150 }, { "epoch": 79.22, "learning_rate": 1.3662135922330097e-05, "loss": 0.011, "step": 8160 }, { "epoch": 79.32, "learning_rate": 1.3654368932038834e-05, "loss": 0.1088, "step": 8170 }, { "epoch": 79.42, "learning_rate": 1.3646601941747575e-05, "loss": 0.1116, "step": 8180 }, { "epoch": 79.51, "learning_rate": 1.3638834951456313e-05, "loss": 0.0462, "step": 8190 }, { "epoch": 79.61, "learning_rate": 1.363106796116505e-05, "loss": 0.0378, "step": 8200 }, { "epoch": 79.71, "learning_rate": 1.3623300970873788e-05, "loss": 0.0016, "step": 8210 }, { "epoch": 79.81, "learning_rate": 1.3615533980582525e-05, "loss": 0.0658, "step": 8220 }, { "epoch": 79.9, "learning_rate": 1.3607766990291263e-05, "loss": 0.1224, "step": 8230 }, { "epoch": 80.0, "learning_rate": 1.3600000000000002e-05, "loss": 0.0813, "step": 8240 }, { "epoch": 80.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.04363853111863136, "eval_runtime": 0.9212, "eval_samples_per_second": 158.487, "eval_steps_per_second": 20.625, "step": 8240 }, { "epoch": 80.1, "learning_rate": 1.359223300970874e-05, "loss": 0.1413, "step": 8250 }, { "epoch": 80.19, "learning_rate": 1.3584466019417477e-05, "loss": 0.0308, "step": 8260 }, { "epoch": 80.29, "learning_rate": 1.3576699029126214e-05, "loss": 0.0483, "step": 8270 }, { "epoch": 80.39, "learning_rate": 1.3568932038834952e-05, "loss": 0.0162, "step": 8280 }, { "epoch": 80.49, "learning_rate": 1.356116504854369e-05, "loss": 0.1146, "step": 8290 }, { "epoch": 80.58, "learning_rate": 1.3553398058252429e-05, "loss": 0.0095, "step": 8300 }, { "epoch": 80.68, "learning_rate": 1.3545631067961166e-05, "loss": 0.0334, "step": 8310 }, { "epoch": 80.78, "learning_rate": 1.3537864077669904e-05, "loss": 0.0178, "step": 8320 }, { "epoch": 80.87, "learning_rate": 1.3530097087378641e-05, "loss": 0.0963, "step": 8330 }, { "epoch": 80.97, "learning_rate": 1.352233009708738e-05, "loss": 0.0014, "step": 8340 }, { "epoch": 81.0, "eval_accuracy": 1.0, "eval_loss": 0.0034924051724374294, "eval_runtime": 0.9228, "eval_samples_per_second": 158.21, "eval_steps_per_second": 20.589, "step": 8343 }, { "epoch": 81.07, "learning_rate": 1.3514563106796118e-05, "loss": 0.0019, "step": 8350 }, { "epoch": 81.17, "learning_rate": 1.3506796116504855e-05, "loss": 0.0013, "step": 8360 }, { "epoch": 81.26, "learning_rate": 1.3499029126213594e-05, "loss": 0.0016, "step": 8370 }, { "epoch": 81.36, "learning_rate": 1.3491262135922332e-05, "loss": 0.0363, "step": 8380 }, { "epoch": 81.46, "learning_rate": 1.348349514563107e-05, "loss": 0.0769, "step": 8390 }, { "epoch": 81.55, "learning_rate": 1.3475728155339807e-05, "loss": 0.0014, "step": 8400 }, { "epoch": 81.65, "learning_rate": 1.3467961165048544e-05, "loss": 0.0019, "step": 8410 }, { "epoch": 81.75, "learning_rate": 1.3460194174757282e-05, "loss": 0.0999, "step": 8420 }, { "epoch": 81.84, "learning_rate": 1.3452427184466021e-05, "loss": 0.0025, "step": 8430 }, { "epoch": 81.94, "learning_rate": 1.3444660194174759e-05, "loss": 0.0774, "step": 8440 }, { "epoch": 82.0, "eval_accuracy": 0.9726027397260274, "eval_loss": 0.10503670573234558, "eval_runtime": 0.9067, "eval_samples_per_second": 161.03, "eval_steps_per_second": 20.956, "step": 8446 }, { "epoch": 82.04, "learning_rate": 1.3436893203883496e-05, "loss": 0.0666, "step": 8450 }, { "epoch": 82.14, "learning_rate": 1.3429126213592234e-05, "loss": 0.0457, "step": 8460 }, { "epoch": 82.23, "learning_rate": 1.3421359223300971e-05, "loss": 0.0802, "step": 8470 }, { "epoch": 82.33, "learning_rate": 1.3413592233009709e-05, "loss": 0.0304, "step": 8480 }, { "epoch": 82.43, "learning_rate": 1.340582524271845e-05, "loss": 0.0012, "step": 8490 }, { "epoch": 82.52, "learning_rate": 1.3398058252427187e-05, "loss": 0.0039, "step": 8500 }, { "epoch": 82.62, "learning_rate": 1.3390291262135924e-05, "loss": 0.0734, "step": 8510 }, { "epoch": 82.72, "learning_rate": 1.3382524271844662e-05, "loss": 0.0466, "step": 8520 }, { "epoch": 82.82, "learning_rate": 1.33747572815534e-05, "loss": 0.0051, "step": 8530 }, { "epoch": 82.91, "learning_rate": 1.3366990291262137e-05, "loss": 0.0393, "step": 8540 }, { "epoch": 83.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.08058138936758041, "eval_runtime": 0.8979, "eval_samples_per_second": 162.604, "eval_steps_per_second": 21.161, "step": 8549 }, { "epoch": 83.01, "learning_rate": 1.3359223300970874e-05, "loss": 0.1685, "step": 8550 }, { "epoch": 83.11, "learning_rate": 1.3351456310679614e-05, "loss": 0.0016, "step": 8560 }, { "epoch": 83.2, "learning_rate": 1.3343689320388351e-05, "loss": 0.0839, "step": 8570 }, { "epoch": 83.3, "learning_rate": 1.3335922330097089e-05, "loss": 0.0922, "step": 8580 }, { "epoch": 83.4, "learning_rate": 1.3328155339805826e-05, "loss": 0.0146, "step": 8590 }, { "epoch": 83.5, "learning_rate": 1.3320388349514564e-05, "loss": 0.0014, "step": 8600 }, { "epoch": 83.59, "learning_rate": 1.3312621359223301e-05, "loss": 0.0269, "step": 8610 }, { "epoch": 83.69, "learning_rate": 1.330485436893204e-05, "loss": 0.0333, "step": 8620 }, { "epoch": 83.79, "learning_rate": 1.3297087378640778e-05, "loss": 0.0037, "step": 8630 }, { "epoch": 83.88, "learning_rate": 1.3289320388349515e-05, "loss": 0.0935, "step": 8640 }, { "epoch": 83.98, "learning_rate": 1.3281553398058253e-05, "loss": 0.0537, "step": 8650 }, { "epoch": 84.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.07363705337047577, "eval_runtime": 0.8825, "eval_samples_per_second": 165.435, "eval_steps_per_second": 21.529, "step": 8652 }, { "epoch": 84.08, "learning_rate": 1.327378640776699e-05, "loss": 0.1431, "step": 8660 }, { "epoch": 84.17, "learning_rate": 1.3266019417475728e-05, "loss": 0.1869, "step": 8670 }, { "epoch": 84.27, "learning_rate": 1.3258252427184469e-05, "loss": 0.041, "step": 8680 }, { "epoch": 84.37, "learning_rate": 1.3250485436893206e-05, "loss": 0.0108, "step": 8690 }, { "epoch": 84.47, "learning_rate": 1.3242718446601944e-05, "loss": 0.0105, "step": 8700 }, { "epoch": 84.56, "learning_rate": 1.3234951456310681e-05, "loss": 0.0507, "step": 8710 }, { "epoch": 84.66, "learning_rate": 1.3227184466019419e-05, "loss": 0.0333, "step": 8720 }, { "epoch": 84.76, "learning_rate": 1.3219417475728156e-05, "loss": 0.0421, "step": 8730 }, { "epoch": 84.85, "learning_rate": 1.3211650485436894e-05, "loss": 0.0567, "step": 8740 }, { "epoch": 84.95, "learning_rate": 1.3203883495145633e-05, "loss": 0.016, "step": 8750 }, { "epoch": 85.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.06812305748462677, "eval_runtime": 0.9225, "eval_samples_per_second": 158.264, "eval_steps_per_second": 20.596, "step": 8755 }, { "epoch": 85.05, "learning_rate": 1.319611650485437e-05, "loss": 0.0198, "step": 8760 }, { "epoch": 85.15, "learning_rate": 1.3188349514563108e-05, "loss": 0.1216, "step": 8770 }, { "epoch": 85.24, "learning_rate": 1.3180582524271845e-05, "loss": 0.0408, "step": 8780 }, { "epoch": 85.34, "learning_rate": 1.3172815533980583e-05, "loss": 0.0033, "step": 8790 }, { "epoch": 85.44, "learning_rate": 1.316504854368932e-05, "loss": 0.0379, "step": 8800 }, { "epoch": 85.53, "learning_rate": 1.315728155339806e-05, "loss": 0.0153, "step": 8810 }, { "epoch": 85.63, "learning_rate": 1.3149514563106797e-05, "loss": 0.0569, "step": 8820 }, { "epoch": 85.73, "learning_rate": 1.3141747572815534e-05, "loss": 0.0773, "step": 8830 }, { "epoch": 85.83, "learning_rate": 1.3133980582524274e-05, "loss": 0.0036, "step": 8840 }, { "epoch": 85.92, "learning_rate": 1.3126213592233011e-05, "loss": 0.0562, "step": 8850 }, { "epoch": 86.0, "eval_accuracy": 0.9726027397260274, "eval_loss": 0.13558551669120789, "eval_runtime": 0.9401, "eval_samples_per_second": 155.297, "eval_steps_per_second": 20.21, "step": 8858 }, { "epoch": 86.02, "learning_rate": 1.3118446601941749e-05, "loss": 0.0014, "step": 8860 }, { "epoch": 86.12, "learning_rate": 1.3110679611650488e-05, "loss": 0.0025, "step": 8870 }, { "epoch": 86.21, "learning_rate": 1.3102912621359225e-05, "loss": 0.0016, "step": 8880 }, { "epoch": 86.31, "learning_rate": 1.3095145631067963e-05, "loss": 0.0182, "step": 8890 }, { "epoch": 86.41, "learning_rate": 1.30873786407767e-05, "loss": 0.0062, "step": 8900 }, { "epoch": 86.5, "learning_rate": 1.3079611650485438e-05, "loss": 0.0416, "step": 8910 }, { "epoch": 86.6, "learning_rate": 1.3071844660194175e-05, "loss": 0.0296, "step": 8920 }, { "epoch": 86.7, "learning_rate": 1.3064077669902913e-05, "loss": 0.1365, "step": 8930 }, { "epoch": 86.8, "learning_rate": 1.3056310679611652e-05, "loss": 0.0012, "step": 8940 }, { "epoch": 86.89, "learning_rate": 1.304854368932039e-05, "loss": 0.1132, "step": 8950 }, { "epoch": 86.99, "learning_rate": 1.3040776699029127e-05, "loss": 0.0133, "step": 8960 }, { "epoch": 87.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.09805306047201157, "eval_runtime": 0.9311, "eval_samples_per_second": 156.804, "eval_steps_per_second": 20.406, "step": 8961 }, { "epoch": 87.09, "learning_rate": 1.3033009708737864e-05, "loss": 0.0237, "step": 8970 }, { "epoch": 87.18, "learning_rate": 1.3025242718446602e-05, "loss": 0.0895, "step": 8980 }, { "epoch": 87.28, "learning_rate": 1.301747572815534e-05, "loss": 0.043, "step": 8990 }, { "epoch": 87.38, "learning_rate": 1.300970873786408e-05, "loss": 0.0013, "step": 9000 }, { "epoch": 87.48, "learning_rate": 1.3001941747572818e-05, "loss": 0.1008, "step": 9010 }, { "epoch": 87.57, "learning_rate": 1.2994174757281555e-05, "loss": 0.1435, "step": 9020 }, { "epoch": 87.67, "learning_rate": 1.2986407766990293e-05, "loss": 0.0573, "step": 9030 }, { "epoch": 87.77, "learning_rate": 1.297864077669903e-05, "loss": 0.092, "step": 9040 }, { "epoch": 87.86, "learning_rate": 1.2970873786407768e-05, "loss": 0.1068, "step": 9050 }, { "epoch": 87.96, "learning_rate": 1.2963106796116507e-05, "loss": 0.0682, "step": 9060 }, { "epoch": 88.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.07209344208240509, "eval_runtime": 0.9453, "eval_samples_per_second": 154.455, "eval_steps_per_second": 20.1, "step": 9064 }, { "epoch": 88.06, "learning_rate": 1.2955339805825244e-05, "loss": 0.0018, "step": 9070 }, { "epoch": 88.16, "learning_rate": 1.2947572815533982e-05, "loss": 0.1112, "step": 9080 }, { "epoch": 88.25, "learning_rate": 1.293980582524272e-05, "loss": 0.0071, "step": 9090 }, { "epoch": 88.35, "learning_rate": 1.2932038834951457e-05, "loss": 0.0061, "step": 9100 }, { "epoch": 88.45, "learning_rate": 1.2924271844660194e-05, "loss": 0.1292, "step": 9110 }, { "epoch": 88.54, "learning_rate": 1.2916504854368934e-05, "loss": 0.0175, "step": 9120 }, { "epoch": 88.64, "learning_rate": 1.2908737864077671e-05, "loss": 0.04, "step": 9130 }, { "epoch": 88.74, "learning_rate": 1.2900970873786409e-05, "loss": 0.0015, "step": 9140 }, { "epoch": 88.83, "learning_rate": 1.2893203883495146e-05, "loss": 0.0422, "step": 9150 }, { "epoch": 88.93, "learning_rate": 1.2885436893203884e-05, "loss": 0.0514, "step": 9160 }, { "epoch": 89.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.060059912502765656, "eval_runtime": 0.9117, "eval_samples_per_second": 160.14, "eval_steps_per_second": 20.84, "step": 9167 }, { "epoch": 89.03, "learning_rate": 1.2877669902912621e-05, "loss": 0.0129, "step": 9170 }, { "epoch": 89.13, "learning_rate": 1.2869902912621359e-05, "loss": 0.0148, "step": 9180 }, { "epoch": 89.22, "learning_rate": 1.28621359223301e-05, "loss": 0.0098, "step": 9190 }, { "epoch": 89.32, "learning_rate": 1.2854368932038837e-05, "loss": 0.0552, "step": 9200 }, { "epoch": 89.42, "learning_rate": 1.2846601941747574e-05, "loss": 0.0969, "step": 9210 }, { "epoch": 89.51, "learning_rate": 1.2838834951456312e-05, "loss": 0.0018, "step": 9220 }, { "epoch": 89.61, "learning_rate": 1.283106796116505e-05, "loss": 0.0014, "step": 9230 }, { "epoch": 89.71, "learning_rate": 1.2823300970873787e-05, "loss": 0.0012, "step": 9240 }, { "epoch": 89.81, "learning_rate": 1.2815533980582526e-05, "loss": 0.0018, "step": 9250 }, { "epoch": 89.9, "learning_rate": 1.2807766990291264e-05, "loss": 0.0017, "step": 9260 }, { "epoch": 90.0, "learning_rate": 1.2800000000000001e-05, "loss": 0.0043, "step": 9270 }, { "epoch": 90.0, "eval_accuracy": 0.9657534246575342, "eval_loss": 0.16303785145282745, "eval_runtime": 0.8933, "eval_samples_per_second": 163.434, "eval_steps_per_second": 21.269, "step": 9270 }, { "epoch": 90.1, "learning_rate": 1.2792233009708739e-05, "loss": 0.1021, "step": 9280 }, { "epoch": 90.19, "learning_rate": 1.2784466019417476e-05, "loss": 0.0296, "step": 9290 }, { "epoch": 90.29, "learning_rate": 1.2776699029126214e-05, "loss": 0.0811, "step": 9300 }, { "epoch": 90.39, "learning_rate": 1.2768932038834953e-05, "loss": 0.0438, "step": 9310 }, { "epoch": 90.49, "learning_rate": 1.276116504854369e-05, "loss": 0.0245, "step": 9320 }, { "epoch": 90.58, "learning_rate": 1.2753398058252428e-05, "loss": 0.0102, "step": 9330 }, { "epoch": 90.68, "learning_rate": 1.2745631067961165e-05, "loss": 0.0845, "step": 9340 }, { "epoch": 90.78, "learning_rate": 1.2737864077669904e-05, "loss": 0.0013, "step": 9350 }, { "epoch": 90.87, "learning_rate": 1.2730097087378642e-05, "loss": 0.0722, "step": 9360 }, { "epoch": 90.97, "learning_rate": 1.272233009708738e-05, "loss": 0.0011, "step": 9370 }, { "epoch": 91.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.029909975826740265, "eval_runtime": 0.8937, "eval_samples_per_second": 163.37, "eval_steps_per_second": 21.26, "step": 9373 }, { "epoch": 91.07, "learning_rate": 1.2714563106796119e-05, "loss": 0.0259, "step": 9380 }, { "epoch": 91.17, "learning_rate": 1.2706796116504856e-05, "loss": 0.0221, "step": 9390 }, { "epoch": 91.26, "learning_rate": 1.2699029126213594e-05, "loss": 0.1292, "step": 9400 }, { "epoch": 91.36, "learning_rate": 1.2691262135922331e-05, "loss": 0.0398, "step": 9410 }, { "epoch": 91.46, "learning_rate": 1.2683495145631069e-05, "loss": 0.0971, "step": 9420 }, { "epoch": 91.55, "learning_rate": 1.2675728155339806e-05, "loss": 0.0973, "step": 9430 }, { "epoch": 91.65, "learning_rate": 1.2667961165048545e-05, "loss": 0.0636, "step": 9440 }, { "epoch": 91.75, "learning_rate": 1.2660194174757283e-05, "loss": 0.0216, "step": 9450 }, { "epoch": 91.84, "learning_rate": 1.265242718446602e-05, "loss": 0.1983, "step": 9460 }, { "epoch": 91.94, "learning_rate": 1.2644660194174758e-05, "loss": 0.0707, "step": 9470 }, { "epoch": 92.0, "eval_accuracy": 0.9657534246575342, "eval_loss": 0.100075863301754, "eval_runtime": 0.8859, "eval_samples_per_second": 164.8, "eval_steps_per_second": 21.447, "step": 9476 }, { "epoch": 92.04, "learning_rate": 1.2636893203883495e-05, "loss": 0.0351, "step": 9480 }, { "epoch": 92.14, "learning_rate": 1.2629126213592233e-05, "loss": 0.0242, "step": 9490 }, { "epoch": 92.23, "learning_rate": 1.2621359223300974e-05, "loss": 0.0617, "step": 9500 }, { "epoch": 92.33, "learning_rate": 1.2613592233009711e-05, "loss": 0.1866, "step": 9510 }, { "epoch": 92.43, "learning_rate": 1.2605825242718449e-05, "loss": 0.0045, "step": 9520 }, { "epoch": 92.52, "learning_rate": 1.2598058252427186e-05, "loss": 0.1162, "step": 9530 }, { "epoch": 92.62, "learning_rate": 1.2590291262135924e-05, "loss": 0.034, "step": 9540 }, { "epoch": 92.72, "learning_rate": 1.2582524271844661e-05, "loss": 0.1544, "step": 9550 }, { "epoch": 92.82, "learning_rate": 1.2574757281553399e-05, "loss": 0.0101, "step": 9560 }, { "epoch": 92.91, "learning_rate": 1.2566990291262138e-05, "loss": 0.0026, "step": 9570 }, { "epoch": 93.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.014391203410923481, "eval_runtime": 0.8967, "eval_samples_per_second": 162.823, "eval_steps_per_second": 21.189, "step": 9579 }, { "epoch": 93.01, "learning_rate": 1.2559223300970875e-05, "loss": 0.0848, "step": 9580 }, { "epoch": 93.11, "learning_rate": 1.2551456310679613e-05, "loss": 0.0198, "step": 9590 }, { "epoch": 93.2, "learning_rate": 1.254368932038835e-05, "loss": 0.033, "step": 9600 }, { "epoch": 93.3, "learning_rate": 1.2535922330097088e-05, "loss": 0.0032, "step": 9610 }, { "epoch": 93.4, "learning_rate": 1.2528155339805825e-05, "loss": 0.1336, "step": 9620 }, { "epoch": 93.5, "learning_rate": 1.2520388349514564e-05, "loss": 0.0281, "step": 9630 }, { "epoch": 93.59, "learning_rate": 1.2512621359223302e-05, "loss": 0.0436, "step": 9640 }, { "epoch": 93.69, "learning_rate": 1.250485436893204e-05, "loss": 0.193, "step": 9650 }, { "epoch": 93.79, "learning_rate": 1.2497087378640777e-05, "loss": 0.0018, "step": 9660 }, { "epoch": 93.88, "learning_rate": 1.2489320388349514e-05, "loss": 0.0509, "step": 9670 }, { "epoch": 93.98, "learning_rate": 1.2481553398058252e-05, "loss": 0.1578, "step": 9680 }, { "epoch": 94.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.012599923647940159, "eval_runtime": 0.8826, "eval_samples_per_second": 165.42, "eval_steps_per_second": 21.527, "step": 9682 }, { "epoch": 94.08, "learning_rate": 1.2473786407766993e-05, "loss": 0.0837, "step": 9690 }, { "epoch": 94.17, "learning_rate": 1.246601941747573e-05, "loss": 0.0786, "step": 9700 }, { "epoch": 94.27, "learning_rate": 1.2458252427184468e-05, "loss": 0.0012, "step": 9710 }, { "epoch": 94.37, "learning_rate": 1.2450485436893205e-05, "loss": 0.0656, "step": 9720 }, { "epoch": 94.47, "learning_rate": 1.2442718446601943e-05, "loss": 0.022, "step": 9730 }, { "epoch": 94.56, "learning_rate": 1.243495145631068e-05, "loss": 0.1272, "step": 9740 }, { "epoch": 94.66, "learning_rate": 1.2427184466019418e-05, "loss": 0.0013, "step": 9750 }, { "epoch": 94.76, "learning_rate": 1.2419417475728157e-05, "loss": 0.0519, "step": 9760 }, { "epoch": 94.85, "learning_rate": 1.2411650485436894e-05, "loss": 0.084, "step": 9770 }, { "epoch": 94.95, "learning_rate": 1.2403883495145632e-05, "loss": 0.0431, "step": 9780 }, { "epoch": 95.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.04091314598917961, "eval_runtime": 0.8882, "eval_samples_per_second": 164.379, "eval_steps_per_second": 21.392, "step": 9785 }, { "epoch": 95.05, "learning_rate": 1.239611650485437e-05, "loss": 0.1725, "step": 9790 }, { "epoch": 95.15, "learning_rate": 1.2388349514563107e-05, "loss": 0.0395, "step": 9800 }, { "epoch": 95.24, "learning_rate": 1.2380582524271844e-05, "loss": 0.1174, "step": 9810 }, { "epoch": 95.34, "learning_rate": 1.2372815533980584e-05, "loss": 0.0012, "step": 9820 }, { "epoch": 95.44, "learning_rate": 1.2365048543689321e-05, "loss": 0.0623, "step": 9830 }, { "epoch": 95.53, "learning_rate": 1.2357281553398059e-05, "loss": 0.0653, "step": 9840 }, { "epoch": 95.63, "learning_rate": 1.2349514563106798e-05, "loss": 0.0259, "step": 9850 }, { "epoch": 95.73, "learning_rate": 1.2341747572815535e-05, "loss": 0.0147, "step": 9860 }, { "epoch": 95.83, "learning_rate": 1.2333980582524273e-05, "loss": 0.1148, "step": 9870 }, { "epoch": 95.92, "learning_rate": 1.2326213592233012e-05, "loss": 0.1357, "step": 9880 }, { "epoch": 96.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.05305100977420807, "eval_runtime": 0.8941, "eval_samples_per_second": 163.297, "eval_steps_per_second": 21.251, "step": 9888 }, { "epoch": 96.02, "learning_rate": 1.231844660194175e-05, "loss": 0.0041, "step": 9890 }, { "epoch": 96.12, "learning_rate": 1.2310679611650487e-05, "loss": 0.0897, "step": 9900 }, { "epoch": 96.21, "learning_rate": 1.2302912621359224e-05, "loss": 0.0901, "step": 9910 }, { "epoch": 96.31, "learning_rate": 1.2295145631067962e-05, "loss": 0.0013, "step": 9920 }, { "epoch": 96.41, "learning_rate": 1.22873786407767e-05, "loss": 0.1067, "step": 9930 }, { "epoch": 96.5, "learning_rate": 1.2279611650485437e-05, "loss": 0.0072, "step": 9940 }, { "epoch": 96.6, "learning_rate": 1.2271844660194176e-05, "loss": 0.0098, "step": 9950 }, { "epoch": 96.7, "learning_rate": 1.2264077669902914e-05, "loss": 0.0127, "step": 9960 }, { "epoch": 96.8, "learning_rate": 1.2256310679611651e-05, "loss": 0.0276, "step": 9970 }, { "epoch": 96.89, "learning_rate": 1.2248543689320389e-05, "loss": 0.093, "step": 9980 }, { "epoch": 96.99, "learning_rate": 1.2240776699029126e-05, "loss": 0.0476, "step": 9990 }, { "epoch": 97.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.008646626956760883, "eval_runtime": 0.9314, "eval_samples_per_second": 156.762, "eval_steps_per_second": 20.4, "step": 9991 }, { "epoch": 97.09, "learning_rate": 1.2233009708737864e-05, "loss": 0.0041, "step": 10000 }, { "epoch": 97.18, "learning_rate": 1.2225242718446604e-05, "loss": 0.0017, "step": 10010 }, { "epoch": 97.28, "learning_rate": 1.2217475728155342e-05, "loss": 0.0313, "step": 10020 }, { "epoch": 97.38, "learning_rate": 1.220970873786408e-05, "loss": 0.0098, "step": 10030 }, { "epoch": 97.48, "learning_rate": 1.2201941747572817e-05, "loss": 0.0856, "step": 10040 }, { "epoch": 97.57, "learning_rate": 1.2194174757281554e-05, "loss": 0.1682, "step": 10050 }, { "epoch": 97.67, "learning_rate": 1.2186407766990292e-05, "loss": 0.1116, "step": 10060 }, { "epoch": 97.77, "learning_rate": 1.2178640776699031e-05, "loss": 0.0018, "step": 10070 }, { "epoch": 97.86, "learning_rate": 1.2170873786407769e-05, "loss": 0.002, "step": 10080 }, { "epoch": 97.96, "learning_rate": 1.2163106796116506e-05, "loss": 0.0315, "step": 10090 }, { "epoch": 98.0, "eval_accuracy": 1.0, "eval_loss": 0.0015310003655031323, "eval_runtime": 0.9234, "eval_samples_per_second": 158.106, "eval_steps_per_second": 20.575, "step": 10094 }, { "epoch": 98.06, "learning_rate": 1.2155339805825244e-05, "loss": 0.0884, "step": 10100 }, { "epoch": 98.16, "learning_rate": 1.2147572815533981e-05, "loss": 0.0014, "step": 10110 }, { "epoch": 98.25, "learning_rate": 1.2139805825242719e-05, "loss": 0.0161, "step": 10120 }, { "epoch": 98.35, "learning_rate": 1.2132038834951458e-05, "loss": 0.0423, "step": 10130 }, { "epoch": 98.45, "learning_rate": 1.2124271844660195e-05, "loss": 0.0393, "step": 10140 }, { "epoch": 98.54, "learning_rate": 1.2116504854368933e-05, "loss": 0.2191, "step": 10150 }, { "epoch": 98.64, "learning_rate": 1.210873786407767e-05, "loss": 0.0192, "step": 10160 }, { "epoch": 98.74, "learning_rate": 1.2100970873786408e-05, "loss": 0.0264, "step": 10170 }, { "epoch": 98.83, "learning_rate": 1.2093203883495145e-05, "loss": 0.1027, "step": 10180 }, { "epoch": 98.93, "learning_rate": 1.2085436893203883e-05, "loss": 0.0171, "step": 10190 }, { "epoch": 99.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.0362049825489521, "eval_runtime": 0.9372, "eval_samples_per_second": 155.788, "eval_steps_per_second": 20.274, "step": 10197 }, { "epoch": 99.03, "learning_rate": 1.2077669902912624e-05, "loss": 0.1313, "step": 10200 }, { "epoch": 99.13, "learning_rate": 1.2069902912621361e-05, "loss": 0.0012, "step": 10210 }, { "epoch": 99.22, "learning_rate": 1.2062135922330099e-05, "loss": 0.0546, "step": 10220 }, { "epoch": 99.32, "learning_rate": 1.2054368932038836e-05, "loss": 0.0046, "step": 10230 }, { "epoch": 99.42, "learning_rate": 1.2046601941747574e-05, "loss": 0.0537, "step": 10240 }, { "epoch": 99.51, "learning_rate": 1.2038834951456311e-05, "loss": 0.0711, "step": 10250 }, { "epoch": 99.61, "learning_rate": 1.203106796116505e-05, "loss": 0.012, "step": 10260 }, { "epoch": 99.71, "learning_rate": 1.2023300970873788e-05, "loss": 0.0375, "step": 10270 }, { "epoch": 99.81, "learning_rate": 1.2015533980582525e-05, "loss": 0.0139, "step": 10280 }, { "epoch": 99.9, "learning_rate": 1.2007766990291263e-05, "loss": 0.0962, "step": 10290 }, { "epoch": 100.0, "learning_rate": 1.2e-05, "loss": 0.0014, "step": 10300 }, { "epoch": 100.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.02322803996503353, "eval_runtime": 0.9475, "eval_samples_per_second": 154.09, "eval_steps_per_second": 20.053, "step": 10300 }, { "epoch": 100.1, "learning_rate": 1.1992233009708738e-05, "loss": 0.0051, "step": 10310 }, { "epoch": 100.19, "learning_rate": 1.1984466019417477e-05, "loss": 0.042, "step": 10320 }, { "epoch": 100.29, "learning_rate": 1.1976699029126214e-05, "loss": 0.001, "step": 10330 }, { "epoch": 100.39, "learning_rate": 1.1968932038834952e-05, "loss": 0.0835, "step": 10340 }, { "epoch": 100.49, "learning_rate": 1.196116504854369e-05, "loss": 0.0298, "step": 10350 }, { "epoch": 100.58, "learning_rate": 1.1953398058252429e-05, "loss": 0.001, "step": 10360 }, { "epoch": 100.68, "learning_rate": 1.1945631067961166e-05, "loss": 0.2664, "step": 10370 }, { "epoch": 100.78, "learning_rate": 1.1937864077669904e-05, "loss": 0.0031, "step": 10380 }, { "epoch": 100.87, "learning_rate": 1.1930097087378643e-05, "loss": 0.0008, "step": 10390 }, { "epoch": 100.97, "learning_rate": 1.192233009708738e-05, "loss": 0.1161, "step": 10400 }, { "epoch": 101.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.043042704463005066, "eval_runtime": 0.9222, "eval_samples_per_second": 158.317, "eval_steps_per_second": 20.603, "step": 10403 }, { "epoch": 101.07, "learning_rate": 1.1914563106796118e-05, "loss": 0.0009, "step": 10410 }, { "epoch": 101.17, "learning_rate": 1.1906796116504855e-05, "loss": 0.0432, "step": 10420 }, { "epoch": 101.26, "learning_rate": 1.1899029126213593e-05, "loss": 0.0441, "step": 10430 }, { "epoch": 101.36, "learning_rate": 1.189126213592233e-05, "loss": 0.0617, "step": 10440 }, { "epoch": 101.46, "learning_rate": 1.188349514563107e-05, "loss": 0.0993, "step": 10450 }, { "epoch": 101.55, "learning_rate": 1.1875728155339807e-05, "loss": 0.1322, "step": 10460 }, { "epoch": 101.65, "learning_rate": 1.1867961165048544e-05, "loss": 0.0435, "step": 10470 }, { "epoch": 101.75, "learning_rate": 1.1860194174757282e-05, "loss": 0.013, "step": 10480 }, { "epoch": 101.84, "learning_rate": 1.185242718446602e-05, "loss": 0.0214, "step": 10490 }, { "epoch": 101.94, "learning_rate": 1.1844660194174757e-05, "loss": 0.0839, "step": 10500 }, { "epoch": 102.0, "eval_accuracy": 0.9794520547945206, "eval_loss": 0.10051363706588745, "eval_runtime": 0.9025, "eval_samples_per_second": 161.775, "eval_steps_per_second": 21.053, "step": 10506 }, { "epoch": 102.04, "learning_rate": 1.1836893203883498e-05, "loss": 0.0018, "step": 10510 }, { "epoch": 102.14, "learning_rate": 1.1829126213592235e-05, "loss": 0.0037, "step": 10520 }, { "epoch": 102.23, "learning_rate": 1.1821359223300973e-05, "loss": 0.0258, "step": 10530 }, { "epoch": 102.33, "learning_rate": 1.181359223300971e-05, "loss": 0.1846, "step": 10540 }, { "epoch": 102.43, "learning_rate": 1.1805825242718448e-05, "loss": 0.0964, "step": 10550 }, { "epoch": 102.52, "learning_rate": 1.1798058252427185e-05, "loss": 0.0063, "step": 10560 }, { "epoch": 102.62, "learning_rate": 1.1790291262135923e-05, "loss": 0.0415, "step": 10570 }, { "epoch": 102.72, "learning_rate": 1.1782524271844662e-05, "loss": 0.0045, "step": 10580 }, { "epoch": 102.82, "learning_rate": 1.17747572815534e-05, "loss": 0.075, "step": 10590 }, { "epoch": 102.91, "learning_rate": 1.1766990291262137e-05, "loss": 0.0428, "step": 10600 }, { "epoch": 103.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.08688335865736008, "eval_runtime": 0.8958, "eval_samples_per_second": 162.976, "eval_steps_per_second": 21.209, "step": 10609 }, { "epoch": 103.01, "learning_rate": 1.1759223300970874e-05, "loss": 0.0012, "step": 10610 }, { "epoch": 103.11, "learning_rate": 1.1751456310679612e-05, "loss": 0.0385, "step": 10620 }, { "epoch": 103.2, "learning_rate": 1.174368932038835e-05, "loss": 0.0017, "step": 10630 }, { "epoch": 103.3, "learning_rate": 1.1735922330097089e-05, "loss": 0.001, "step": 10640 }, { "epoch": 103.4, "learning_rate": 1.1728155339805826e-05, "loss": 0.0486, "step": 10650 }, { "epoch": 103.5, "learning_rate": 1.1720388349514564e-05, "loss": 0.001, "step": 10660 }, { "epoch": 103.59, "learning_rate": 1.1712621359223301e-05, "loss": 0.0012, "step": 10670 }, { "epoch": 103.69, "learning_rate": 1.1704854368932039e-05, "loss": 0.0026, "step": 10680 }, { "epoch": 103.79, "learning_rate": 1.1697087378640776e-05, "loss": 0.1129, "step": 10690 }, { "epoch": 103.88, "learning_rate": 1.1689320388349517e-05, "loss": 0.0281, "step": 10700 }, { "epoch": 103.98, "learning_rate": 1.1681553398058255e-05, "loss": 0.0058, "step": 10710 }, { "epoch": 104.0, "eval_accuracy": 0.9863013698630136, "eval_loss": 0.02182828262448311, "eval_runtime": 0.9139, "eval_samples_per_second": 159.746, "eval_steps_per_second": 20.789, "step": 10712 }, { "epoch": 104.08, "learning_rate": 1.1673786407766992e-05, "loss": 0.0081, "step": 10720 }, { "epoch": 104.17, "learning_rate": 1.166601941747573e-05, "loss": 0.0008, "step": 10730 }, { "epoch": 104.27, "learning_rate": 1.1658252427184467e-05, "loss": 0.0018, "step": 10740 }, { "epoch": 104.37, "learning_rate": 1.1650485436893204e-05, "loss": 0.003, "step": 10750 }, { "epoch": 104.47, "learning_rate": 1.1642718446601942e-05, "loss": 0.0578, "step": 10760 }, { "epoch": 104.56, "learning_rate": 1.1634951456310681e-05, "loss": 0.0679, "step": 10770 }, { "epoch": 104.66, "learning_rate": 1.1627184466019419e-05, "loss": 0.0371, "step": 10780 }, { "epoch": 104.76, "learning_rate": 1.1619417475728156e-05, "loss": 0.0815, "step": 10790 }, { "epoch": 104.85, "learning_rate": 1.1611650485436894e-05, "loss": 0.0277, "step": 10800 }, { "epoch": 104.95, "learning_rate": 1.1603883495145631e-05, "loss": 0.0657, "step": 10810 }, { "epoch": 105.0, "eval_accuracy": 0.9931506849315068, "eval_loss": 0.012804172933101654, "eval_runtime": 0.8936, "eval_samples_per_second": 163.381, "eval_steps_per_second": 21.262, "step": 10815 }, { "epoch": 105.05, "learning_rate": 1.1596116504854369e-05, "loss": 0.0131, "step": 10820 }, { "epoch": 105.15, "learning_rate": 1.1588349514563108e-05, "loss": 0.0791, "step": 10830 }, { "epoch": 105.24, "learning_rate": 1.1580582524271845e-05, "loss": 0.1427, "step": 10840 }, { "epoch": 105.34, "learning_rate": 1.1572815533980583e-05, "loss": 0.0344, "step": 10850 }, { "epoch": 105.44, "learning_rate": 1.1565048543689322e-05, "loss": 0.0012, "step": 10860 }, { "epoch": 105.53, "learning_rate": 1.155728155339806e-05, "loss": 0.1199, "step": 10870 }, { "epoch": 105.63, "learning_rate": 1.1549514563106797e-05, "loss": 0.0055, "step": 10880 }, { "epoch": 105.73, "learning_rate": 1.1541747572815536e-05, "loss": 0.1326, "step": 10890 }, { "epoch": 105.83, "learning_rate": 1.1533980582524274e-05, "loss": 0.0034, "step": 10900 }, { "epoch": 105.92, "learning_rate": 1.1526213592233011e-05, "loss": 0.0032, "step": 10910 }, { "epoch": 106.0, "eval_accuracy": 1.0, "eval_loss": 0.0011723055504262447, "eval_runtime": 0.8979, "eval_samples_per_second": 162.605, "eval_steps_per_second": 21.161, "step": 10918 } ], "logging_steps": 10, "max_steps": 25750, "num_input_tokens_seen": 0, "num_train_epochs": 250, "save_steps": 500, "total_flos": 6.768824322311848e+18, "trial_name": null, "trial_params": null }