gokuls's picture
End of training
4b38e33
{
"best_metric": 2.7920210361480713,
"best_model_checkpoint": "model_v1_complete_training_wt_init_48_mini/checkpoint-1830000",
"epoch": 15.088621802749513,
"global_step": 1841687,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.000000000000001e-07,
"loss": 10.2892,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.0000000000000002e-06,
"loss": 8.8433,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 1.5e-06,
"loss": 8.1625,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 2.0000000000000003e-06,
"loss": 7.85,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 2.5e-06,
"loss": 7.6776,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 3e-06,
"loss": 7.5065,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 3.5e-06,
"loss": 7.3363,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 4.000000000000001e-06,
"loss": 7.2137,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 4.5e-06,
"loss": 7.1039,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 5e-06,
"loss": 7.0197,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 5.500000000000001e-06,
"loss": 6.9294,
"step": 5500
},
{
"epoch": 0.05,
"learning_rate": 6e-06,
"loss": 6.854,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 6.5000000000000004e-06,
"loss": 6.7942,
"step": 6500
},
{
"epoch": 0.06,
"learning_rate": 7e-06,
"loss": 6.7328,
"step": 7000
},
{
"epoch": 0.06,
"learning_rate": 7.500000000000001e-06,
"loss": 6.685,
"step": 7500
},
{
"epoch": 0.07,
"learning_rate": 8.000000000000001e-06,
"loss": 6.6331,
"step": 8000
},
{
"epoch": 0.07,
"learning_rate": 8.5e-06,
"loss": 6.5921,
"step": 8500
},
{
"epoch": 0.07,
"learning_rate": 9e-06,
"loss": 6.5616,
"step": 9000
},
{
"epoch": 0.08,
"learning_rate": 9.5e-06,
"loss": 6.5268,
"step": 9500
},
{
"epoch": 0.08,
"learning_rate": 1e-05,
"loss": 6.5012,
"step": 10000
},
{
"epoch": 0.09,
"learning_rate": 9.998356047280082e-06,
"loss": 6.4694,
"step": 10500
},
{
"epoch": 0.09,
"learning_rate": 9.996712094560161e-06,
"loss": 6.4482,
"step": 11000
},
{
"epoch": 0.09,
"learning_rate": 9.995068141840242e-06,
"loss": 6.4289,
"step": 11500
},
{
"epoch": 0.1,
"learning_rate": 9.993424189120321e-06,
"loss": 6.4084,
"step": 12000
},
{
"epoch": 0.1,
"learning_rate": 9.991780236400402e-06,
"loss": 6.3823,
"step": 12500
},
{
"epoch": 0.11,
"learning_rate": 9.990136283680483e-06,
"loss": 6.3696,
"step": 13000
},
{
"epoch": 0.11,
"learning_rate": 9.988492330960563e-06,
"loss": 6.359,
"step": 13500
},
{
"epoch": 0.11,
"learning_rate": 9.986848378240642e-06,
"loss": 6.3447,
"step": 14000
},
{
"epoch": 0.12,
"learning_rate": 9.985204425520723e-06,
"loss": 6.327,
"step": 14500
},
{
"epoch": 0.12,
"learning_rate": 9.983560472800802e-06,
"loss": 6.3197,
"step": 15000
},
{
"epoch": 0.13,
"learning_rate": 9.981916520080883e-06,
"loss": 6.3012,
"step": 15500
},
{
"epoch": 0.13,
"learning_rate": 9.980272567360964e-06,
"loss": 6.2978,
"step": 16000
},
{
"epoch": 0.14,
"learning_rate": 9.978628614641043e-06,
"loss": 6.2945,
"step": 16500
},
{
"epoch": 0.14,
"learning_rate": 9.976984661921124e-06,
"loss": 6.2789,
"step": 17000
},
{
"epoch": 0.14,
"learning_rate": 9.975340709201205e-06,
"loss": 6.2788,
"step": 17500
},
{
"epoch": 0.15,
"learning_rate": 9.973696756481285e-06,
"loss": 6.2633,
"step": 18000
},
{
"epoch": 0.15,
"learning_rate": 9.972052803761366e-06,
"loss": 6.2471,
"step": 18500
},
{
"epoch": 0.16,
"learning_rate": 9.970408851041445e-06,
"loss": 6.1923,
"step": 19000
},
{
"epoch": 0.16,
"learning_rate": 9.968764898321524e-06,
"loss": 6.1695,
"step": 19500
},
{
"epoch": 0.16,
"learning_rate": 9.967120945601605e-06,
"loss": 6.1645,
"step": 20000
},
{
"epoch": 0.17,
"learning_rate": 9.965476992881686e-06,
"loss": 6.1372,
"step": 20500
},
{
"epoch": 0.17,
"learning_rate": 9.963833040161765e-06,
"loss": 6.1184,
"step": 21000
},
{
"epoch": 0.18,
"learning_rate": 9.962189087441846e-06,
"loss": 6.1077,
"step": 21500
},
{
"epoch": 0.18,
"learning_rate": 9.960545134721926e-06,
"loss": 6.0858,
"step": 22000
},
{
"epoch": 0.18,
"learning_rate": 9.958901182002007e-06,
"loss": 6.0807,
"step": 22500
},
{
"epoch": 0.19,
"learning_rate": 9.957257229282087e-06,
"loss": 6.0578,
"step": 23000
},
{
"epoch": 0.19,
"learning_rate": 9.955613276562167e-06,
"loss": 6.0571,
"step": 23500
},
{
"epoch": 0.2,
"learning_rate": 9.953969323842248e-06,
"loss": 6.0397,
"step": 24000
},
{
"epoch": 0.2,
"learning_rate": 9.952325371122327e-06,
"loss": 6.0305,
"step": 24500
},
{
"epoch": 0.2,
"learning_rate": 9.950681418402408e-06,
"loss": 6.0192,
"step": 25000
},
{
"epoch": 0.21,
"learning_rate": 9.949037465682487e-06,
"loss": 6.003,
"step": 25500
},
{
"epoch": 0.21,
"learning_rate": 9.947393512962568e-06,
"loss": 5.9987,
"step": 26000
},
{
"epoch": 0.22,
"learning_rate": 9.945749560242648e-06,
"loss": 5.9946,
"step": 26500
},
{
"epoch": 0.22,
"learning_rate": 9.944105607522729e-06,
"loss": 5.9855,
"step": 27000
},
{
"epoch": 0.23,
"learning_rate": 9.942461654802808e-06,
"loss": 5.9723,
"step": 27500
},
{
"epoch": 0.23,
"learning_rate": 9.940817702082889e-06,
"loss": 5.9701,
"step": 28000
},
{
"epoch": 0.23,
"learning_rate": 9.93917374936297e-06,
"loss": 5.9636,
"step": 28500
},
{
"epoch": 0.24,
"learning_rate": 9.937529796643049e-06,
"loss": 5.9503,
"step": 29000
},
{
"epoch": 0.24,
"learning_rate": 9.93588584392313e-06,
"loss": 5.9404,
"step": 29500
},
{
"epoch": 0.25,
"learning_rate": 9.934241891203211e-06,
"loss": 5.9411,
"step": 30000
},
{
"epoch": 0.25,
"eval_accuracy": 0.1518340698223809,
"eval_loss": 5.883250713348389,
"eval_runtime": 404.4584,
"eval_samples_per_second": 762.392,
"eval_steps_per_second": 15.885,
"step": 30000
},
{
"epoch": 0.25,
"learning_rate": 9.93259793848329e-06,
"loss": 5.9333,
"step": 30500
},
{
"epoch": 0.25,
"learning_rate": 9.930953985763371e-06,
"loss": 5.9235,
"step": 31000
},
{
"epoch": 0.26,
"learning_rate": 9.92931003304345e-06,
"loss": 5.9169,
"step": 31500
},
{
"epoch": 0.26,
"learning_rate": 9.92766608032353e-06,
"loss": 5.9136,
"step": 32000
},
{
"epoch": 0.27,
"learning_rate": 9.92602212760361e-06,
"loss": 5.9025,
"step": 32500
},
{
"epoch": 0.27,
"learning_rate": 9.924378174883692e-06,
"loss": 5.9008,
"step": 33000
},
{
"epoch": 0.27,
"learning_rate": 9.922734222163771e-06,
"loss": 5.8908,
"step": 33500
},
{
"epoch": 0.28,
"learning_rate": 9.921090269443852e-06,
"loss": 5.8871,
"step": 34000
},
{
"epoch": 0.28,
"learning_rate": 9.919446316723931e-06,
"loss": 5.8785,
"step": 34500
},
{
"epoch": 0.29,
"learning_rate": 9.917802364004012e-06,
"loss": 5.877,
"step": 35000
},
{
"epoch": 0.29,
"learning_rate": 9.916158411284093e-06,
"loss": 5.8796,
"step": 35500
},
{
"epoch": 0.29,
"learning_rate": 9.914514458564172e-06,
"loss": 5.866,
"step": 36000
},
{
"epoch": 0.3,
"learning_rate": 9.912870505844253e-06,
"loss": 5.8613,
"step": 36500
},
{
"epoch": 0.3,
"learning_rate": 9.911226553124333e-06,
"loss": 5.8509,
"step": 37000
},
{
"epoch": 0.31,
"learning_rate": 9.909582600404412e-06,
"loss": 5.8453,
"step": 37500
},
{
"epoch": 0.31,
"learning_rate": 9.907938647684493e-06,
"loss": 5.843,
"step": 38000
},
{
"epoch": 0.32,
"learning_rate": 9.906294694964574e-06,
"loss": 5.8326,
"step": 38500
},
{
"epoch": 0.32,
"learning_rate": 9.904650742244653e-06,
"loss": 5.8313,
"step": 39000
},
{
"epoch": 0.32,
"learning_rate": 9.903006789524734e-06,
"loss": 5.8303,
"step": 39500
},
{
"epoch": 0.33,
"learning_rate": 9.901362836804813e-06,
"loss": 5.825,
"step": 40000
},
{
"epoch": 0.33,
"learning_rate": 9.899718884084894e-06,
"loss": 5.809,
"step": 40500
},
{
"epoch": 0.34,
"learning_rate": 9.898074931364975e-06,
"loss": 5.8117,
"step": 41000
},
{
"epoch": 0.34,
"learning_rate": 9.896430978645055e-06,
"loss": 5.8035,
"step": 41500
},
{
"epoch": 0.34,
"learning_rate": 9.894787025925136e-06,
"loss": 5.7973,
"step": 42000
},
{
"epoch": 0.35,
"learning_rate": 9.893143073205217e-06,
"loss": 5.7966,
"step": 42500
},
{
"epoch": 0.35,
"learning_rate": 9.891499120485296e-06,
"loss": 5.789,
"step": 43000
},
{
"epoch": 0.36,
"learning_rate": 9.889855167765375e-06,
"loss": 5.7827,
"step": 43500
},
{
"epoch": 0.36,
"learning_rate": 9.888211215045456e-06,
"loss": 5.7801,
"step": 44000
},
{
"epoch": 0.36,
"learning_rate": 9.886567262325535e-06,
"loss": 5.7761,
"step": 44500
},
{
"epoch": 0.37,
"learning_rate": 9.884923309605616e-06,
"loss": 5.7702,
"step": 45000
},
{
"epoch": 0.37,
"learning_rate": 9.883279356885697e-06,
"loss": 5.7638,
"step": 45500
},
{
"epoch": 0.38,
"learning_rate": 9.881635404165777e-06,
"loss": 5.7549,
"step": 46000
},
{
"epoch": 0.38,
"learning_rate": 9.879991451445858e-06,
"loss": 5.7496,
"step": 46500
},
{
"epoch": 0.39,
"learning_rate": 9.878347498725937e-06,
"loss": 5.7451,
"step": 47000
},
{
"epoch": 0.39,
"learning_rate": 9.876703546006018e-06,
"loss": 5.7447,
"step": 47500
},
{
"epoch": 0.39,
"learning_rate": 9.875059593286099e-06,
"loss": 5.7451,
"step": 48000
},
{
"epoch": 0.4,
"learning_rate": 9.873415640566178e-06,
"loss": 5.7393,
"step": 48500
},
{
"epoch": 0.4,
"learning_rate": 9.87177168784626e-06,
"loss": 5.7319,
"step": 49000
},
{
"epoch": 0.41,
"learning_rate": 9.870127735126338e-06,
"loss": 5.73,
"step": 49500
},
{
"epoch": 0.41,
"learning_rate": 9.868483782406418e-06,
"loss": 5.7289,
"step": 50000
},
{
"epoch": 0.41,
"learning_rate": 9.866839829686499e-06,
"loss": 5.7158,
"step": 50500
},
{
"epoch": 0.42,
"learning_rate": 9.86519587696658e-06,
"loss": 5.713,
"step": 51000
},
{
"epoch": 0.42,
"learning_rate": 9.863551924246659e-06,
"loss": 5.7082,
"step": 51500
},
{
"epoch": 0.43,
"learning_rate": 9.86190797152674e-06,
"loss": 5.704,
"step": 52000
},
{
"epoch": 0.43,
"learning_rate": 9.86026401880682e-06,
"loss": 5.7021,
"step": 52500
},
{
"epoch": 0.43,
"learning_rate": 9.8586200660869e-06,
"loss": 5.7041,
"step": 53000
},
{
"epoch": 0.44,
"learning_rate": 9.856976113366981e-06,
"loss": 5.6927,
"step": 53500
},
{
"epoch": 0.44,
"learning_rate": 9.85533216064706e-06,
"loss": 5.6896,
"step": 54000
},
{
"epoch": 0.45,
"learning_rate": 9.853688207927141e-06,
"loss": 5.6803,
"step": 54500
},
{
"epoch": 0.45,
"learning_rate": 9.85204425520722e-06,
"loss": 5.6817,
"step": 55000
},
{
"epoch": 0.45,
"learning_rate": 9.8504003024873e-06,
"loss": 5.6833,
"step": 55500
},
{
"epoch": 0.46,
"learning_rate": 9.848756349767381e-06,
"loss": 5.6681,
"step": 56000
},
{
"epoch": 0.46,
"learning_rate": 9.847112397047462e-06,
"loss": 5.6702,
"step": 56500
},
{
"epoch": 0.47,
"learning_rate": 9.845468444327541e-06,
"loss": 5.6569,
"step": 57000
},
{
"epoch": 0.47,
"learning_rate": 9.843824491607622e-06,
"loss": 5.6653,
"step": 57500
},
{
"epoch": 0.48,
"learning_rate": 9.842180538887703e-06,
"loss": 5.6525,
"step": 58000
},
{
"epoch": 0.48,
"learning_rate": 9.840536586167782e-06,
"loss": 5.6518,
"step": 58500
},
{
"epoch": 0.48,
"learning_rate": 9.838892633447863e-06,
"loss": 5.6497,
"step": 59000
},
{
"epoch": 0.49,
"learning_rate": 9.837248680727943e-06,
"loss": 5.6454,
"step": 59500
},
{
"epoch": 0.49,
"learning_rate": 9.835604728008024e-06,
"loss": 5.6408,
"step": 60000
},
{
"epoch": 0.49,
"eval_accuracy": 0.19079809901308453,
"eval_loss": 5.526524543762207,
"eval_runtime": 411.2166,
"eval_samples_per_second": 749.863,
"eval_steps_per_second": 15.624,
"step": 60000
},
{
"epoch": 0.5,
"learning_rate": 9.833960775288105e-06,
"loss": 5.6208,
"step": 60500
},
{
"epoch": 0.5,
"learning_rate": 9.832316822568184e-06,
"loss": 5.5933,
"step": 61000
},
{
"epoch": 0.5,
"learning_rate": 9.830672869848265e-06,
"loss": 5.5664,
"step": 61500
},
{
"epoch": 0.51,
"learning_rate": 9.829028917128344e-06,
"loss": 5.5416,
"step": 62000
},
{
"epoch": 0.51,
"learning_rate": 9.827384964408423e-06,
"loss": 5.5315,
"step": 62500
},
{
"epoch": 0.52,
"learning_rate": 9.825741011688504e-06,
"loss": 5.5227,
"step": 63000
},
{
"epoch": 0.52,
"learning_rate": 9.824097058968585e-06,
"loss": 5.4887,
"step": 63500
},
{
"epoch": 0.52,
"learning_rate": 9.822453106248665e-06,
"loss": 5.4846,
"step": 64000
},
{
"epoch": 0.53,
"learning_rate": 9.820809153528746e-06,
"loss": 5.4683,
"step": 64500
},
{
"epoch": 0.53,
"learning_rate": 9.819165200808825e-06,
"loss": 5.4631,
"step": 65000
},
{
"epoch": 0.54,
"learning_rate": 9.817521248088906e-06,
"loss": 5.4465,
"step": 65500
},
{
"epoch": 0.54,
"learning_rate": 9.815877295368987e-06,
"loss": 5.4323,
"step": 66000
},
{
"epoch": 0.54,
"learning_rate": 9.814233342649066e-06,
"loss": 5.415,
"step": 66500
},
{
"epoch": 0.55,
"learning_rate": 9.812589389929147e-06,
"loss": 5.4046,
"step": 67000
},
{
"epoch": 0.55,
"learning_rate": 9.810945437209226e-06,
"loss": 5.3932,
"step": 67500
},
{
"epoch": 0.56,
"learning_rate": 9.809301484489306e-06,
"loss": 5.3858,
"step": 68000
},
{
"epoch": 0.56,
"learning_rate": 9.807657531769387e-06,
"loss": 5.3667,
"step": 68500
},
{
"epoch": 0.57,
"learning_rate": 9.806013579049468e-06,
"loss": 5.3536,
"step": 69000
},
{
"epoch": 0.57,
"learning_rate": 9.804369626329547e-06,
"loss": 5.3413,
"step": 69500
},
{
"epoch": 0.57,
"learning_rate": 9.802725673609628e-06,
"loss": 5.327,
"step": 70000
},
{
"epoch": 0.58,
"learning_rate": 9.801081720889709e-06,
"loss": 5.3028,
"step": 70500
},
{
"epoch": 0.58,
"learning_rate": 9.799437768169788e-06,
"loss": 5.2809,
"step": 71000
},
{
"epoch": 0.59,
"learning_rate": 9.797793815449869e-06,
"loss": 5.2594,
"step": 71500
},
{
"epoch": 0.59,
"learning_rate": 9.796149862729948e-06,
"loss": 5.2433,
"step": 72000
},
{
"epoch": 0.59,
"learning_rate": 9.79450591001003e-06,
"loss": 5.2326,
"step": 72500
},
{
"epoch": 0.6,
"learning_rate": 9.79286195729011e-06,
"loss": 5.2018,
"step": 73000
},
{
"epoch": 0.6,
"learning_rate": 9.79121800457019e-06,
"loss": 5.1855,
"step": 73500
},
{
"epoch": 0.61,
"learning_rate": 9.789574051850269e-06,
"loss": 5.1629,
"step": 74000
},
{
"epoch": 0.61,
"learning_rate": 9.78793009913035e-06,
"loss": 5.1439,
"step": 74500
},
{
"epoch": 0.61,
"learning_rate": 9.786286146410429e-06,
"loss": 5.1354,
"step": 75000
},
{
"epoch": 0.62,
"learning_rate": 9.78464219369051e-06,
"loss": 5.1111,
"step": 75500
},
{
"epoch": 0.62,
"learning_rate": 9.782998240970591e-06,
"loss": 5.0945,
"step": 76000
},
{
"epoch": 0.63,
"learning_rate": 9.78135428825067e-06,
"loss": 5.0785,
"step": 76500
},
{
"epoch": 0.63,
"learning_rate": 9.779710335530751e-06,
"loss": 5.0583,
"step": 77000
},
{
"epoch": 0.63,
"learning_rate": 9.77806638281083e-06,
"loss": 5.0422,
"step": 77500
},
{
"epoch": 0.64,
"learning_rate": 9.776422430090912e-06,
"loss": 5.0259,
"step": 78000
},
{
"epoch": 0.64,
"learning_rate": 9.774778477370993e-06,
"loss": 5.0104,
"step": 78500
},
{
"epoch": 0.65,
"learning_rate": 9.773134524651072e-06,
"loss": 4.9894,
"step": 79000
},
{
"epoch": 0.65,
"learning_rate": 9.771490571931153e-06,
"loss": 4.9678,
"step": 79500
},
{
"epoch": 0.66,
"learning_rate": 9.769846619211232e-06,
"loss": 4.9547,
"step": 80000
},
{
"epoch": 0.66,
"learning_rate": 9.768202666491311e-06,
"loss": 4.9365,
"step": 80500
},
{
"epoch": 0.66,
"learning_rate": 9.766558713771392e-06,
"loss": 4.9222,
"step": 81000
},
{
"epoch": 0.67,
"learning_rate": 9.764914761051473e-06,
"loss": 4.9002,
"step": 81500
},
{
"epoch": 0.67,
"learning_rate": 9.763270808331553e-06,
"loss": 4.8928,
"step": 82000
},
{
"epoch": 0.68,
"learning_rate": 9.761626855611634e-06,
"loss": 4.866,
"step": 82500
},
{
"epoch": 0.68,
"learning_rate": 9.759982902891713e-06,
"loss": 4.8529,
"step": 83000
},
{
"epoch": 0.68,
"learning_rate": 9.758338950171794e-06,
"loss": 4.841,
"step": 83500
},
{
"epoch": 0.69,
"learning_rate": 9.756694997451875e-06,
"loss": 4.8105,
"step": 84000
},
{
"epoch": 0.69,
"learning_rate": 9.755051044731954e-06,
"loss": 4.7817,
"step": 84500
},
{
"epoch": 0.7,
"learning_rate": 9.753407092012035e-06,
"loss": 4.7561,
"step": 85000
},
{
"epoch": 0.7,
"learning_rate": 9.751763139292116e-06,
"loss": 4.7291,
"step": 85500
},
{
"epoch": 0.7,
"learning_rate": 9.750119186572195e-06,
"loss": 4.6946,
"step": 86000
},
{
"epoch": 0.71,
"learning_rate": 9.748475233852275e-06,
"loss": 4.6735,
"step": 86500
},
{
"epoch": 0.71,
"learning_rate": 9.746831281132356e-06,
"loss": 4.6509,
"step": 87000
},
{
"epoch": 0.72,
"learning_rate": 9.745187328412435e-06,
"loss": 4.6292,
"step": 87500
},
{
"epoch": 0.72,
"learning_rate": 9.743543375692516e-06,
"loss": 4.6109,
"step": 88000
},
{
"epoch": 0.73,
"learning_rate": 9.741899422972597e-06,
"loss": 4.5882,
"step": 88500
},
{
"epoch": 0.73,
"learning_rate": 9.740255470252676e-06,
"loss": 4.573,
"step": 89000
},
{
"epoch": 0.73,
"learning_rate": 9.738611517532757e-06,
"loss": 4.5576,
"step": 89500
},
{
"epoch": 0.74,
"learning_rate": 9.736967564812836e-06,
"loss": 4.5385,
"step": 90000
},
{
"epoch": 0.74,
"eval_accuracy": 0.31378937363065973,
"eval_loss": 4.313334941864014,
"eval_runtime": 406.1908,
"eval_samples_per_second": 759.141,
"eval_steps_per_second": 15.818,
"step": 90000
},
{
"epoch": 0.74,
"learning_rate": 9.735323612092917e-06,
"loss": 4.5261,
"step": 90500
},
{
"epoch": 0.75,
"learning_rate": 9.733679659372998e-06,
"loss": 4.5152,
"step": 91000
},
{
"epoch": 0.75,
"learning_rate": 9.732035706653078e-06,
"loss": 4.5018,
"step": 91500
},
{
"epoch": 0.75,
"learning_rate": 9.730391753933157e-06,
"loss": 4.4877,
"step": 92000
},
{
"epoch": 0.76,
"learning_rate": 9.728747801213238e-06,
"loss": 4.4749,
"step": 92500
},
{
"epoch": 0.76,
"learning_rate": 9.727103848493317e-06,
"loss": 4.4638,
"step": 93000
},
{
"epoch": 0.77,
"learning_rate": 9.725459895773398e-06,
"loss": 4.4451,
"step": 93500
},
{
"epoch": 0.77,
"learning_rate": 9.723815943053479e-06,
"loss": 4.437,
"step": 94000
},
{
"epoch": 0.77,
"learning_rate": 9.722171990333558e-06,
"loss": 4.433,
"step": 94500
},
{
"epoch": 0.78,
"learning_rate": 9.72052803761364e-06,
"loss": 4.4104,
"step": 95000
},
{
"epoch": 0.78,
"learning_rate": 9.718884084893719e-06,
"loss": 4.4126,
"step": 95500
},
{
"epoch": 0.79,
"learning_rate": 9.7172401321738e-06,
"loss": 4.4033,
"step": 96000
},
{
"epoch": 0.79,
"learning_rate": 9.71559617945388e-06,
"loss": 4.3898,
"step": 96500
},
{
"epoch": 0.79,
"learning_rate": 9.71395222673396e-06,
"loss": 4.3815,
"step": 97000
},
{
"epoch": 0.8,
"learning_rate": 9.71230827401404e-06,
"loss": 4.3669,
"step": 97500
},
{
"epoch": 0.8,
"learning_rate": 9.71066432129412e-06,
"loss": 4.359,
"step": 98000
},
{
"epoch": 0.81,
"learning_rate": 9.709020368574201e-06,
"loss": 4.3495,
"step": 98500
},
{
"epoch": 0.81,
"learning_rate": 9.70737641585428e-06,
"loss": 4.3459,
"step": 99000
},
{
"epoch": 0.82,
"learning_rate": 9.705732463134361e-06,
"loss": 4.3349,
"step": 99500
},
{
"epoch": 0.82,
"learning_rate": 9.70408851041444e-06,
"loss": 4.3202,
"step": 100000
},
{
"epoch": 0.82,
"learning_rate": 9.702444557694522e-06,
"loss": 4.32,
"step": 100500
},
{
"epoch": 0.83,
"learning_rate": 9.700800604974602e-06,
"loss": 4.3064,
"step": 101000
},
{
"epoch": 0.83,
"learning_rate": 9.699156652254682e-06,
"loss": 4.3046,
"step": 101500
},
{
"epoch": 0.84,
"learning_rate": 9.697512699534763e-06,
"loss": 4.2962,
"step": 102000
},
{
"epoch": 0.84,
"learning_rate": 9.695868746814842e-06,
"loss": 4.2827,
"step": 102500
},
{
"epoch": 0.84,
"learning_rate": 9.694224794094923e-06,
"loss": 4.2798,
"step": 103000
},
{
"epoch": 0.85,
"learning_rate": 9.692580841375004e-06,
"loss": 4.2678,
"step": 103500
},
{
"epoch": 0.85,
"learning_rate": 9.690936888655083e-06,
"loss": 4.2663,
"step": 104000
},
{
"epoch": 0.86,
"learning_rate": 9.689292935935163e-06,
"loss": 4.2641,
"step": 104500
},
{
"epoch": 0.86,
"learning_rate": 9.687648983215243e-06,
"loss": 4.2555,
"step": 105000
},
{
"epoch": 0.86,
"learning_rate": 9.686005030495323e-06,
"loss": 4.2571,
"step": 105500
},
{
"epoch": 0.87,
"learning_rate": 9.684361077775404e-06,
"loss": 4.2354,
"step": 106000
},
{
"epoch": 0.87,
"learning_rate": 9.682717125055485e-06,
"loss": 4.2328,
"step": 106500
},
{
"epoch": 0.88,
"learning_rate": 9.681073172335564e-06,
"loss": 4.2232,
"step": 107000
},
{
"epoch": 0.88,
"learning_rate": 9.679429219615645e-06,
"loss": 4.2218,
"step": 107500
},
{
"epoch": 0.88,
"learning_rate": 9.677785266895724e-06,
"loss": 4.2115,
"step": 108000
},
{
"epoch": 0.89,
"learning_rate": 9.676141314175805e-06,
"loss": 4.2085,
"step": 108500
},
{
"epoch": 0.89,
"learning_rate": 9.674497361455886e-06,
"loss": 4.1947,
"step": 109000
},
{
"epoch": 0.9,
"learning_rate": 9.672853408735965e-06,
"loss": 4.1982,
"step": 109500
},
{
"epoch": 0.9,
"learning_rate": 9.671209456016045e-06,
"loss": 4.1942,
"step": 110000
},
{
"epoch": 0.91,
"learning_rate": 9.669565503296126e-06,
"loss": 4.1869,
"step": 110500
},
{
"epoch": 0.91,
"learning_rate": 9.667921550576205e-06,
"loss": 4.1827,
"step": 111000
},
{
"epoch": 0.91,
"learning_rate": 9.666277597856286e-06,
"loss": 4.174,
"step": 111500
},
{
"epoch": 0.92,
"learning_rate": 9.664633645136367e-06,
"loss": 4.1719,
"step": 112000
},
{
"epoch": 0.92,
"learning_rate": 9.662989692416446e-06,
"loss": 4.1692,
"step": 112500
},
{
"epoch": 0.93,
"learning_rate": 9.661345739696527e-06,
"loss": 4.1603,
"step": 113000
},
{
"epoch": 0.93,
"learning_rate": 9.659701786976608e-06,
"loss": 4.1583,
"step": 113500
},
{
"epoch": 0.93,
"learning_rate": 9.658057834256687e-06,
"loss": 4.1563,
"step": 114000
},
{
"epoch": 0.94,
"learning_rate": 9.656413881536768e-06,
"loss": 4.1527,
"step": 114500
},
{
"epoch": 0.94,
"learning_rate": 9.654769928816848e-06,
"loss": 4.1418,
"step": 115000
},
{
"epoch": 0.95,
"learning_rate": 9.653125976096929e-06,
"loss": 4.1333,
"step": 115500
},
{
"epoch": 0.95,
"learning_rate": 9.65148202337701e-06,
"loss": 4.1356,
"step": 116000
},
{
"epoch": 0.95,
"learning_rate": 9.649838070657089e-06,
"loss": 4.1236,
"step": 116500
},
{
"epoch": 0.96,
"learning_rate": 9.648194117937168e-06,
"loss": 4.126,
"step": 117000
},
{
"epoch": 0.96,
"learning_rate": 9.64655016521725e-06,
"loss": 4.1232,
"step": 117500
},
{
"epoch": 0.97,
"learning_rate": 9.644906212497328e-06,
"loss": 4.1148,
"step": 118000
},
{
"epoch": 0.97,
"learning_rate": 9.64326225977741e-06,
"loss": 4.1131,
"step": 118500
},
{
"epoch": 0.97,
"learning_rate": 9.64161830705749e-06,
"loss": 4.1212,
"step": 119000
},
{
"epoch": 0.98,
"learning_rate": 9.63997435433757e-06,
"loss": 4.1011,
"step": 119500
},
{
"epoch": 0.98,
"learning_rate": 9.63833040161765e-06,
"loss": 4.1015,
"step": 120000
},
{
"epoch": 0.98,
"eval_accuracy": 0.35834025167851113,
"eval_loss": 3.899629831314087,
"eval_runtime": 406.9878,
"eval_samples_per_second": 757.654,
"eval_steps_per_second": 15.787,
"step": 120000
},
{
"epoch": 0.99,
"learning_rate": 9.63668644889773e-06,
"loss": 4.0954,
"step": 120500
},
{
"epoch": 0.99,
"learning_rate": 9.635042496177811e-06,
"loss": 4.0893,
"step": 121000
},
{
"epoch": 1.0,
"learning_rate": 9.633398543457892e-06,
"loss": 4.0861,
"step": 121500
},
{
"epoch": 1.0,
"learning_rate": 9.631754590737971e-06,
"loss": 4.0811,
"step": 122000
},
{
"epoch": 1.0,
"learning_rate": 9.63011063801805e-06,
"loss": 4.0771,
"step": 122500
},
{
"epoch": 1.01,
"learning_rate": 9.628466685298131e-06,
"loss": 4.0736,
"step": 123000
},
{
"epoch": 1.01,
"learning_rate": 9.62682273257821e-06,
"loss": 4.0716,
"step": 123500
},
{
"epoch": 1.02,
"learning_rate": 9.625178779858292e-06,
"loss": 4.064,
"step": 124000
},
{
"epoch": 1.02,
"learning_rate": 9.623534827138373e-06,
"loss": 4.0587,
"step": 124500
},
{
"epoch": 1.02,
"learning_rate": 9.621890874418452e-06,
"loss": 4.056,
"step": 125000
},
{
"epoch": 1.03,
"learning_rate": 9.620246921698533e-06,
"loss": 4.0621,
"step": 125500
},
{
"epoch": 1.03,
"learning_rate": 9.618602968978614e-06,
"loss": 4.0473,
"step": 126000
},
{
"epoch": 1.04,
"learning_rate": 9.616959016258693e-06,
"loss": 4.0481,
"step": 126500
},
{
"epoch": 1.04,
"learning_rate": 9.615315063538774e-06,
"loss": 4.0496,
"step": 127000
},
{
"epoch": 1.04,
"learning_rate": 9.613671110818853e-06,
"loss": 4.041,
"step": 127500
},
{
"epoch": 1.05,
"learning_rate": 9.612027158098934e-06,
"loss": 4.0395,
"step": 128000
},
{
"epoch": 1.05,
"learning_rate": 9.610383205379014e-06,
"loss": 4.0357,
"step": 128500
},
{
"epoch": 1.06,
"learning_rate": 9.608739252659095e-06,
"loss": 4.0339,
"step": 129000
},
{
"epoch": 1.06,
"learning_rate": 9.607095299939174e-06,
"loss": 4.0269,
"step": 129500
},
{
"epoch": 1.07,
"learning_rate": 9.605451347219255e-06,
"loss": 4.0204,
"step": 130000
},
{
"epoch": 1.07,
"learning_rate": 9.603807394499334e-06,
"loss": 4.0235,
"step": 130500
},
{
"epoch": 1.07,
"learning_rate": 9.602163441779415e-06,
"loss": 4.0276,
"step": 131000
},
{
"epoch": 1.08,
"learning_rate": 9.600519489059496e-06,
"loss": 4.0172,
"step": 131500
},
{
"epoch": 1.08,
"learning_rate": 9.598875536339575e-06,
"loss": 4.0097,
"step": 132000
},
{
"epoch": 1.09,
"learning_rate": 9.597231583619656e-06,
"loss": 4.0083,
"step": 132500
},
{
"epoch": 1.09,
"learning_rate": 9.595587630899736e-06,
"loss": 4.0063,
"step": 133000
},
{
"epoch": 1.09,
"learning_rate": 9.593943678179817e-06,
"loss": 4.0038,
"step": 133500
},
{
"epoch": 1.1,
"learning_rate": 9.592299725459898e-06,
"loss": 3.995,
"step": 134000
},
{
"epoch": 1.1,
"learning_rate": 9.590655772739977e-06,
"loss": 3.9927,
"step": 134500
},
{
"epoch": 1.11,
"learning_rate": 9.589011820020056e-06,
"loss": 3.9941,
"step": 135000
},
{
"epoch": 1.11,
"learning_rate": 9.587367867300137e-06,
"loss": 3.997,
"step": 135500
},
{
"epoch": 1.11,
"learning_rate": 9.585723914580216e-06,
"loss": 3.986,
"step": 136000
},
{
"epoch": 1.12,
"learning_rate": 9.584079961860297e-06,
"loss": 3.9807,
"step": 136500
},
{
"epoch": 1.12,
"learning_rate": 9.582436009140378e-06,
"loss": 3.9846,
"step": 137000
},
{
"epoch": 1.13,
"learning_rate": 9.580792056420458e-06,
"loss": 3.9712,
"step": 137500
},
{
"epoch": 1.13,
"learning_rate": 9.579148103700539e-06,
"loss": 3.9697,
"step": 138000
},
{
"epoch": 1.13,
"learning_rate": 9.57750415098062e-06,
"loss": 3.9665,
"step": 138500
},
{
"epoch": 1.14,
"learning_rate": 9.575860198260699e-06,
"loss": 3.9676,
"step": 139000
},
{
"epoch": 1.14,
"learning_rate": 9.57421624554078e-06,
"loss": 3.9659,
"step": 139500
},
{
"epoch": 1.15,
"learning_rate": 9.572572292820859e-06,
"loss": 3.9657,
"step": 140000
},
{
"epoch": 1.15,
"learning_rate": 9.570928340100938e-06,
"loss": 3.9616,
"step": 140500
},
{
"epoch": 1.16,
"learning_rate": 9.56928438738102e-06,
"loss": 3.95,
"step": 141000
},
{
"epoch": 1.16,
"learning_rate": 9.5676404346611e-06,
"loss": 3.9508,
"step": 141500
},
{
"epoch": 1.16,
"learning_rate": 9.56599648194118e-06,
"loss": 3.9479,
"step": 142000
},
{
"epoch": 1.17,
"learning_rate": 9.56435252922126e-06,
"loss": 3.9495,
"step": 142500
},
{
"epoch": 1.17,
"learning_rate": 9.56270857650134e-06,
"loss": 3.9443,
"step": 143000
},
{
"epoch": 1.18,
"learning_rate": 9.561064623781421e-06,
"loss": 3.9392,
"step": 143500
},
{
"epoch": 1.18,
"learning_rate": 9.559420671061502e-06,
"loss": 3.943,
"step": 144000
},
{
"epoch": 1.18,
"learning_rate": 9.557776718341581e-06,
"loss": 3.9397,
"step": 144500
},
{
"epoch": 1.19,
"learning_rate": 9.556132765621662e-06,
"loss": 3.9338,
"step": 145000
},
{
"epoch": 1.19,
"learning_rate": 9.554488812901741e-06,
"loss": 3.9284,
"step": 145500
},
{
"epoch": 1.2,
"learning_rate": 9.552844860181822e-06,
"loss": 3.9276,
"step": 146000
},
{
"epoch": 1.2,
"learning_rate": 9.551200907461902e-06,
"loss": 3.9332,
"step": 146500
},
{
"epoch": 1.2,
"learning_rate": 9.549556954741983e-06,
"loss": 3.9263,
"step": 147000
},
{
"epoch": 1.21,
"learning_rate": 9.547913002022062e-06,
"loss": 3.9198,
"step": 147500
},
{
"epoch": 1.21,
"learning_rate": 9.546269049302143e-06,
"loss": 3.9173,
"step": 148000
},
{
"epoch": 1.22,
"learning_rate": 9.544625096582222e-06,
"loss": 3.919,
"step": 148500
},
{
"epoch": 1.22,
"learning_rate": 9.542981143862303e-06,
"loss": 3.917,
"step": 149000
},
{
"epoch": 1.22,
"learning_rate": 9.541337191142384e-06,
"loss": 3.9189,
"step": 149500
},
{
"epoch": 1.23,
"learning_rate": 9.539693238422463e-06,
"loss": 3.9119,
"step": 150000
},
{
"epoch": 1.23,
"eval_accuracy": 0.3782837597418556,
"eval_loss": 3.7199175357818604,
"eval_runtime": 410.4253,
"eval_samples_per_second": 751.308,
"eval_steps_per_second": 15.654,
"step": 150000
},
{
"epoch": 1.23,
"learning_rate": 9.538049285702544e-06,
"loss": 3.9076,
"step": 150500
},
{
"epoch": 1.24,
"learning_rate": 9.536405332982625e-06,
"loss": 3.8992,
"step": 151000
},
{
"epoch": 1.24,
"learning_rate": 9.534761380262705e-06,
"loss": 3.8987,
"step": 151500
},
{
"epoch": 1.25,
"learning_rate": 9.533117427542786e-06,
"loss": 3.8942,
"step": 152000
},
{
"epoch": 1.25,
"learning_rate": 9.531473474822865e-06,
"loss": 3.9044,
"step": 152500
},
{
"epoch": 1.25,
"learning_rate": 9.529829522102944e-06,
"loss": 3.8889,
"step": 153000
},
{
"epoch": 1.26,
"learning_rate": 9.528185569383025e-06,
"loss": 3.8895,
"step": 153500
},
{
"epoch": 1.26,
"learning_rate": 9.526541616663106e-06,
"loss": 3.8905,
"step": 154000
},
{
"epoch": 1.27,
"learning_rate": 9.524897663943185e-06,
"loss": 3.8842,
"step": 154500
},
{
"epoch": 1.27,
"learning_rate": 9.523253711223266e-06,
"loss": 3.8843,
"step": 155000
},
{
"epoch": 1.27,
"learning_rate": 9.521609758503346e-06,
"loss": 3.8713,
"step": 155500
},
{
"epoch": 1.28,
"learning_rate": 9.519965805783427e-06,
"loss": 3.8828,
"step": 156000
},
{
"epoch": 1.28,
"learning_rate": 9.518321853063508e-06,
"loss": 3.8808,
"step": 156500
},
{
"epoch": 1.29,
"learning_rate": 9.516677900343587e-06,
"loss": 3.8747,
"step": 157000
},
{
"epoch": 1.29,
"learning_rate": 9.515033947623668e-06,
"loss": 3.8737,
"step": 157500
},
{
"epoch": 1.29,
"learning_rate": 9.513389994903747e-06,
"loss": 3.8709,
"step": 158000
},
{
"epoch": 1.3,
"learning_rate": 9.511746042183826e-06,
"loss": 3.8574,
"step": 158500
},
{
"epoch": 1.3,
"learning_rate": 9.510102089463907e-06,
"loss": 3.8695,
"step": 159000
},
{
"epoch": 1.31,
"learning_rate": 9.508458136743988e-06,
"loss": 3.8644,
"step": 159500
},
{
"epoch": 1.31,
"learning_rate": 9.506814184024068e-06,
"loss": 3.8627,
"step": 160000
},
{
"epoch": 1.31,
"learning_rate": 9.505170231304149e-06,
"loss": 3.8545,
"step": 160500
},
{
"epoch": 1.32,
"learning_rate": 9.503526278584228e-06,
"loss": 3.852,
"step": 161000
},
{
"epoch": 1.32,
"learning_rate": 9.501882325864309e-06,
"loss": 3.8451,
"step": 161500
},
{
"epoch": 1.33,
"learning_rate": 9.50023837314439e-06,
"loss": 3.8514,
"step": 162000
},
{
"epoch": 1.33,
"learning_rate": 9.498594420424469e-06,
"loss": 3.8516,
"step": 162500
},
{
"epoch": 1.34,
"learning_rate": 9.49695046770455e-06,
"loss": 3.8459,
"step": 163000
},
{
"epoch": 1.34,
"learning_rate": 9.495306514984631e-06,
"loss": 3.8456,
"step": 163500
},
{
"epoch": 1.34,
"learning_rate": 9.49366256226471e-06,
"loss": 3.8393,
"step": 164000
},
{
"epoch": 1.35,
"learning_rate": 9.492018609544791e-06,
"loss": 3.8407,
"step": 164500
},
{
"epoch": 1.35,
"learning_rate": 9.49037465682487e-06,
"loss": 3.8337,
"step": 165000
},
{
"epoch": 1.36,
"learning_rate": 9.48873070410495e-06,
"loss": 3.8352,
"step": 165500
},
{
"epoch": 1.36,
"learning_rate": 9.48708675138503e-06,
"loss": 3.8305,
"step": 166000
},
{
"epoch": 1.36,
"learning_rate": 9.485442798665112e-06,
"loss": 3.8327,
"step": 166500
},
{
"epoch": 1.37,
"learning_rate": 9.483798845945191e-06,
"loss": 3.8242,
"step": 167000
},
{
"epoch": 1.37,
"learning_rate": 9.482154893225272e-06,
"loss": 3.8248,
"step": 167500
},
{
"epoch": 1.38,
"learning_rate": 9.480510940505351e-06,
"loss": 3.8208,
"step": 168000
},
{
"epoch": 1.38,
"learning_rate": 9.478866987785432e-06,
"loss": 3.8195,
"step": 168500
},
{
"epoch": 1.38,
"learning_rate": 9.477223035065513e-06,
"loss": 3.8262,
"step": 169000
},
{
"epoch": 1.39,
"learning_rate": 9.475579082345593e-06,
"loss": 3.8205,
"step": 169500
},
{
"epoch": 1.39,
"learning_rate": 9.473935129625673e-06,
"loss": 3.823,
"step": 170000
},
{
"epoch": 1.4,
"learning_rate": 9.472291176905753e-06,
"loss": 3.8127,
"step": 170500
},
{
"epoch": 1.4,
"learning_rate": 9.470647224185832e-06,
"loss": 3.813,
"step": 171000
},
{
"epoch": 1.41,
"learning_rate": 9.469003271465913e-06,
"loss": 3.8126,
"step": 171500
},
{
"epoch": 1.41,
"learning_rate": 9.467359318745994e-06,
"loss": 3.808,
"step": 172000
},
{
"epoch": 1.41,
"learning_rate": 9.465715366026073e-06,
"loss": 3.8051,
"step": 172500
},
{
"epoch": 1.42,
"learning_rate": 9.464071413306154e-06,
"loss": 3.8038,
"step": 173000
},
{
"epoch": 1.42,
"learning_rate": 9.462427460586234e-06,
"loss": 3.7961,
"step": 173500
},
{
"epoch": 1.43,
"learning_rate": 9.460783507866314e-06,
"loss": 3.7979,
"step": 174000
},
{
"epoch": 1.43,
"learning_rate": 9.459139555146395e-06,
"loss": 3.795,
"step": 174500
},
{
"epoch": 1.43,
"learning_rate": 9.457495602426475e-06,
"loss": 3.8004,
"step": 175000
},
{
"epoch": 1.44,
"learning_rate": 9.455851649706556e-06,
"loss": 3.7958,
"step": 175500
},
{
"epoch": 1.44,
"learning_rate": 9.454207696986637e-06,
"loss": 3.7909,
"step": 176000
},
{
"epoch": 1.45,
"learning_rate": 9.452563744266716e-06,
"loss": 3.8,
"step": 176500
},
{
"epoch": 1.45,
"learning_rate": 9.450919791546795e-06,
"loss": 3.7826,
"step": 177000
},
{
"epoch": 1.45,
"learning_rate": 9.449275838826876e-06,
"loss": 3.7857,
"step": 177500
},
{
"epoch": 1.46,
"learning_rate": 9.447631886106956e-06,
"loss": 3.7867,
"step": 178000
},
{
"epoch": 1.46,
"learning_rate": 9.445987933387036e-06,
"loss": 3.7935,
"step": 178500
},
{
"epoch": 1.47,
"learning_rate": 9.444343980667117e-06,
"loss": 3.7803,
"step": 179000
},
{
"epoch": 1.47,
"learning_rate": 9.442700027947197e-06,
"loss": 3.782,
"step": 179500
},
{
"epoch": 1.47,
"learning_rate": 9.441056075227278e-06,
"loss": 3.7832,
"step": 180000
},
{
"epoch": 1.47,
"eval_accuracy": 0.39196367058721415,
"eval_loss": 3.6038873195648193,
"eval_runtime": 407.692,
"eval_samples_per_second": 756.345,
"eval_steps_per_second": 15.759,
"step": 180000
},
{
"epoch": 1.48,
"learning_rate": 9.439412122507357e-06,
"loss": 3.775,
"step": 180500
},
{
"epoch": 1.48,
"learning_rate": 9.437768169787438e-06,
"loss": 3.7777,
"step": 181000
},
{
"epoch": 1.49,
"learning_rate": 9.436124217067519e-06,
"loss": 3.769,
"step": 181500
},
{
"epoch": 1.49,
"learning_rate": 9.434480264347598e-06,
"loss": 3.7712,
"step": 182000
},
{
"epoch": 1.5,
"learning_rate": 9.43283631162768e-06,
"loss": 3.7638,
"step": 182500
},
{
"epoch": 1.5,
"learning_rate": 9.431192358907758e-06,
"loss": 3.7686,
"step": 183000
},
{
"epoch": 1.5,
"learning_rate": 9.429548406187838e-06,
"loss": 3.7652,
"step": 183500
},
{
"epoch": 1.51,
"learning_rate": 9.427904453467919e-06,
"loss": 3.7649,
"step": 184000
},
{
"epoch": 1.51,
"learning_rate": 9.426260500748e-06,
"loss": 3.7608,
"step": 184500
},
{
"epoch": 1.52,
"learning_rate": 9.424616548028079e-06,
"loss": 3.762,
"step": 185000
},
{
"epoch": 1.52,
"learning_rate": 9.42297259530816e-06,
"loss": 3.7551,
"step": 185500
},
{
"epoch": 1.52,
"learning_rate": 9.42132864258824e-06,
"loss": 3.7545,
"step": 186000
},
{
"epoch": 1.53,
"learning_rate": 9.41968468986832e-06,
"loss": 3.7556,
"step": 186500
},
{
"epoch": 1.53,
"learning_rate": 9.418040737148401e-06,
"loss": 3.7507,
"step": 187000
},
{
"epoch": 1.54,
"learning_rate": 9.41639678442848e-06,
"loss": 3.7467,
"step": 187500
},
{
"epoch": 1.54,
"learning_rate": 9.414752831708561e-06,
"loss": 3.7507,
"step": 188000
},
{
"epoch": 1.54,
"learning_rate": 9.413108878988642e-06,
"loss": 3.7384,
"step": 188500
},
{
"epoch": 1.55,
"learning_rate": 9.41146492626872e-06,
"loss": 3.7473,
"step": 189000
},
{
"epoch": 1.55,
"learning_rate": 9.409820973548801e-06,
"loss": 3.7429,
"step": 189500
},
{
"epoch": 1.56,
"learning_rate": 9.408177020828882e-06,
"loss": 3.7427,
"step": 190000
},
{
"epoch": 1.56,
"learning_rate": 9.406533068108961e-06,
"loss": 3.74,
"step": 190500
},
{
"epoch": 1.56,
"learning_rate": 9.404889115389042e-06,
"loss": 3.7361,
"step": 191000
},
{
"epoch": 1.57,
"learning_rate": 9.403245162669123e-06,
"loss": 3.7427,
"step": 191500
},
{
"epoch": 1.57,
"learning_rate": 9.401601209949202e-06,
"loss": 3.7388,
"step": 192000
},
{
"epoch": 1.58,
"learning_rate": 9.399957257229283e-06,
"loss": 3.7349,
"step": 192500
},
{
"epoch": 1.58,
"learning_rate": 9.398313304509363e-06,
"loss": 3.7302,
"step": 193000
},
{
"epoch": 1.59,
"learning_rate": 9.396669351789444e-06,
"loss": 3.7408,
"step": 193500
},
{
"epoch": 1.59,
"learning_rate": 9.395025399069525e-06,
"loss": 3.7239,
"step": 194000
},
{
"epoch": 1.59,
"learning_rate": 9.393381446349604e-06,
"loss": 3.7258,
"step": 194500
},
{
"epoch": 1.6,
"learning_rate": 9.391737493629683e-06,
"loss": 3.7149,
"step": 195000
},
{
"epoch": 1.6,
"learning_rate": 9.390093540909764e-06,
"loss": 3.7213,
"step": 195500
},
{
"epoch": 1.61,
"learning_rate": 9.388449588189843e-06,
"loss": 3.7218,
"step": 196000
},
{
"epoch": 1.61,
"learning_rate": 9.386805635469924e-06,
"loss": 3.7175,
"step": 196500
},
{
"epoch": 1.61,
"learning_rate": 9.385161682750005e-06,
"loss": 3.7088,
"step": 197000
},
{
"epoch": 1.62,
"learning_rate": 9.383517730030085e-06,
"loss": 3.7147,
"step": 197500
},
{
"epoch": 1.62,
"learning_rate": 9.381873777310166e-06,
"loss": 3.7194,
"step": 198000
},
{
"epoch": 1.63,
"learning_rate": 9.380229824590245e-06,
"loss": 3.7048,
"step": 198500
},
{
"epoch": 1.63,
"learning_rate": 9.378585871870326e-06,
"loss": 3.7022,
"step": 199000
},
{
"epoch": 1.63,
"learning_rate": 9.376941919150407e-06,
"loss": 3.6975,
"step": 199500
},
{
"epoch": 1.64,
"learning_rate": 9.375297966430486e-06,
"loss": 3.6994,
"step": 200000
},
{
"epoch": 1.64,
"learning_rate": 9.373654013710567e-06,
"loss": 3.6963,
"step": 200500
},
{
"epoch": 1.65,
"learning_rate": 9.372010060990646e-06,
"loss": 3.6973,
"step": 201000
},
{
"epoch": 1.65,
"learning_rate": 9.370366108270726e-06,
"loss": 3.6949,
"step": 201500
},
{
"epoch": 1.65,
"learning_rate": 9.368722155550807e-06,
"loss": 3.6867,
"step": 202000
},
{
"epoch": 1.66,
"learning_rate": 9.367078202830888e-06,
"loss": 3.6865,
"step": 202500
},
{
"epoch": 1.66,
"learning_rate": 9.365434250110967e-06,
"loss": 3.6862,
"step": 203000
},
{
"epoch": 1.67,
"learning_rate": 9.363790297391048e-06,
"loss": 3.682,
"step": 203500
},
{
"epoch": 1.67,
"learning_rate": 9.362146344671129e-06,
"loss": 3.6823,
"step": 204000
},
{
"epoch": 1.68,
"learning_rate": 9.360502391951208e-06,
"loss": 3.6839,
"step": 204500
},
{
"epoch": 1.68,
"learning_rate": 9.358858439231289e-06,
"loss": 3.6768,
"step": 205000
},
{
"epoch": 1.68,
"learning_rate": 9.357214486511368e-06,
"loss": 3.6763,
"step": 205500
},
{
"epoch": 1.69,
"learning_rate": 9.35557053379145e-06,
"loss": 3.6763,
"step": 206000
},
{
"epoch": 1.69,
"learning_rate": 9.35392658107153e-06,
"loss": 3.6788,
"step": 206500
},
{
"epoch": 1.7,
"learning_rate": 9.35228262835161e-06,
"loss": 3.6731,
"step": 207000
},
{
"epoch": 1.7,
"learning_rate": 9.350638675631689e-06,
"loss": 3.6703,
"step": 207500
},
{
"epoch": 1.7,
"learning_rate": 9.34899472291177e-06,
"loss": 3.6735,
"step": 208000
},
{
"epoch": 1.71,
"learning_rate": 9.347350770191849e-06,
"loss": 3.6659,
"step": 208500
},
{
"epoch": 1.71,
"learning_rate": 9.34570681747193e-06,
"loss": 3.6618,
"step": 209000
},
{
"epoch": 1.72,
"learning_rate": 9.344062864752011e-06,
"loss": 3.66,
"step": 209500
},
{
"epoch": 1.72,
"learning_rate": 9.34241891203209e-06,
"loss": 3.6686,
"step": 210000
},
{
"epoch": 1.72,
"eval_accuracy": 0.4032999985682802,
"eval_loss": 3.505682945251465,
"eval_runtime": 407.8472,
"eval_samples_per_second": 756.058,
"eval_steps_per_second": 15.753,
"step": 210000
},
{
"epoch": 1.72,
"learning_rate": 9.340774959312171e-06,
"loss": 3.6608,
"step": 210500
},
{
"epoch": 1.73,
"learning_rate": 9.33913100659225e-06,
"loss": 3.6528,
"step": 211000
},
{
"epoch": 1.73,
"learning_rate": 9.337487053872332e-06,
"loss": 3.6607,
"step": 211500
},
{
"epoch": 1.74,
"learning_rate": 9.335843101152413e-06,
"loss": 3.6561,
"step": 212000
},
{
"epoch": 1.74,
"learning_rate": 9.334199148432492e-06,
"loss": 3.6495,
"step": 212500
},
{
"epoch": 1.75,
"learning_rate": 9.332555195712573e-06,
"loss": 3.6438,
"step": 213000
},
{
"epoch": 1.75,
"learning_rate": 9.330911242992652e-06,
"loss": 3.6483,
"step": 213500
},
{
"epoch": 1.75,
"learning_rate": 9.329267290272731e-06,
"loss": 3.6504,
"step": 214000
},
{
"epoch": 1.76,
"learning_rate": 9.327623337552812e-06,
"loss": 3.6394,
"step": 214500
},
{
"epoch": 1.76,
"learning_rate": 9.325979384832893e-06,
"loss": 3.6413,
"step": 215000
},
{
"epoch": 1.77,
"learning_rate": 9.324335432112973e-06,
"loss": 3.6425,
"step": 215500
},
{
"epoch": 1.77,
"learning_rate": 9.322691479393054e-06,
"loss": 3.6369,
"step": 216000
},
{
"epoch": 1.77,
"learning_rate": 9.321047526673135e-06,
"loss": 3.6366,
"step": 216500
},
{
"epoch": 1.78,
"learning_rate": 9.319403573953214e-06,
"loss": 3.637,
"step": 217000
},
{
"epoch": 1.78,
"learning_rate": 9.317759621233295e-06,
"loss": 3.638,
"step": 217500
},
{
"epoch": 1.79,
"learning_rate": 9.316115668513374e-06,
"loss": 3.6389,
"step": 218000
},
{
"epoch": 1.79,
"learning_rate": 9.314471715793455e-06,
"loss": 3.6397,
"step": 218500
},
{
"epoch": 1.79,
"learning_rate": 9.312827763073536e-06,
"loss": 3.6262,
"step": 219000
},
{
"epoch": 1.8,
"learning_rate": 9.311183810353615e-06,
"loss": 3.6296,
"step": 219500
},
{
"epoch": 1.8,
"learning_rate": 9.309539857633695e-06,
"loss": 3.6228,
"step": 220000
},
{
"epoch": 1.81,
"learning_rate": 9.307895904913776e-06,
"loss": 3.6257,
"step": 220500
},
{
"epoch": 1.81,
"learning_rate": 9.306251952193855e-06,
"loss": 3.6242,
"step": 221000
},
{
"epoch": 1.81,
"learning_rate": 9.304607999473936e-06,
"loss": 3.6263,
"step": 221500
},
{
"epoch": 1.82,
"learning_rate": 9.302964046754017e-06,
"loss": 3.6204,
"step": 222000
},
{
"epoch": 1.82,
"learning_rate": 9.301320094034096e-06,
"loss": 3.6092,
"step": 222500
},
{
"epoch": 1.83,
"learning_rate": 9.299676141314177e-06,
"loss": 3.6146,
"step": 223000
},
{
"epoch": 1.83,
"learning_rate": 9.298032188594256e-06,
"loss": 3.6233,
"step": 223500
},
{
"epoch": 1.84,
"learning_rate": 9.296388235874337e-06,
"loss": 3.62,
"step": 224000
},
{
"epoch": 1.84,
"learning_rate": 9.294744283154418e-06,
"loss": 3.6191,
"step": 224500
},
{
"epoch": 1.84,
"learning_rate": 9.293100330434498e-06,
"loss": 3.616,
"step": 225000
},
{
"epoch": 1.85,
"learning_rate": 9.291456377714577e-06,
"loss": 3.6147,
"step": 225500
},
{
"epoch": 1.85,
"learning_rate": 9.289812424994658e-06,
"loss": 3.6147,
"step": 226000
},
{
"epoch": 1.86,
"learning_rate": 9.288168472274737e-06,
"loss": 3.6144,
"step": 226500
},
{
"epoch": 1.86,
"learning_rate": 9.286524519554818e-06,
"loss": 3.6047,
"step": 227000
},
{
"epoch": 1.86,
"learning_rate": 9.284880566834899e-06,
"loss": 3.6106,
"step": 227500
},
{
"epoch": 1.87,
"learning_rate": 9.283236614114978e-06,
"loss": 3.6059,
"step": 228000
},
{
"epoch": 1.87,
"learning_rate": 9.28159266139506e-06,
"loss": 3.6122,
"step": 228500
},
{
"epoch": 1.88,
"learning_rate": 9.279948708675139e-06,
"loss": 3.6052,
"step": 229000
},
{
"epoch": 1.88,
"learning_rate": 9.27830475595522e-06,
"loss": 3.6047,
"step": 229500
},
{
"epoch": 1.88,
"learning_rate": 9.2766608032353e-06,
"loss": 3.6021,
"step": 230000
},
{
"epoch": 1.89,
"learning_rate": 9.27501685051538e-06,
"loss": 3.6023,
"step": 230500
},
{
"epoch": 1.89,
"learning_rate": 9.27337289779546e-06,
"loss": 3.6019,
"step": 231000
},
{
"epoch": 1.9,
"learning_rate": 9.27172894507554e-06,
"loss": 3.595,
"step": 231500
},
{
"epoch": 1.9,
"learning_rate": 9.270084992355621e-06,
"loss": 3.5902,
"step": 232000
},
{
"epoch": 1.9,
"learning_rate": 9.2684410396357e-06,
"loss": 3.5964,
"step": 232500
},
{
"epoch": 1.91,
"learning_rate": 9.266797086915781e-06,
"loss": 3.5953,
"step": 233000
},
{
"epoch": 1.91,
"learning_rate": 9.26515313419586e-06,
"loss": 3.5934,
"step": 233500
},
{
"epoch": 1.92,
"learning_rate": 9.263509181475942e-06,
"loss": 3.5851,
"step": 234000
},
{
"epoch": 1.92,
"learning_rate": 9.261865228756023e-06,
"loss": 3.5861,
"step": 234500
},
{
"epoch": 1.93,
"learning_rate": 9.260221276036102e-06,
"loss": 3.5849,
"step": 235000
},
{
"epoch": 1.93,
"learning_rate": 9.258577323316183e-06,
"loss": 3.5883,
"step": 235500
},
{
"epoch": 1.93,
"learning_rate": 9.256933370596262e-06,
"loss": 3.5822,
"step": 236000
},
{
"epoch": 1.94,
"learning_rate": 9.255289417876343e-06,
"loss": 3.5742,
"step": 236500
},
{
"epoch": 1.94,
"learning_rate": 9.253645465156424e-06,
"loss": 3.5809,
"step": 237000
},
{
"epoch": 1.95,
"learning_rate": 9.252001512436503e-06,
"loss": 3.5805,
"step": 237500
},
{
"epoch": 1.95,
"learning_rate": 9.250357559716583e-06,
"loss": 3.5813,
"step": 238000
},
{
"epoch": 1.95,
"learning_rate": 9.248713606996664e-06,
"loss": 3.5793,
"step": 238500
},
{
"epoch": 1.96,
"learning_rate": 9.247069654276743e-06,
"loss": 3.5819,
"step": 239000
},
{
"epoch": 1.96,
"learning_rate": 9.245425701556824e-06,
"loss": 3.5771,
"step": 239500
},
{
"epoch": 1.97,
"learning_rate": 9.243781748836905e-06,
"loss": 3.5793,
"step": 240000
},
{
"epoch": 1.97,
"eval_accuracy": 0.4137328604632164,
"eval_loss": 3.4226527214050293,
"eval_runtime": 405.6879,
"eval_samples_per_second": 760.082,
"eval_steps_per_second": 15.837,
"step": 240000
},
{
"epoch": 1.97,
"learning_rate": 9.242137796116984e-06,
"loss": 3.5746,
"step": 240500
},
{
"epoch": 1.97,
"learning_rate": 9.240493843397065e-06,
"loss": 3.571,
"step": 241000
},
{
"epoch": 1.98,
"learning_rate": 9.238849890677144e-06,
"loss": 3.5759,
"step": 241500
},
{
"epoch": 1.98,
"learning_rate": 9.237205937957225e-06,
"loss": 3.5681,
"step": 242000
},
{
"epoch": 1.99,
"learning_rate": 9.235561985237306e-06,
"loss": 3.5718,
"step": 242500
},
{
"epoch": 1.99,
"learning_rate": 9.233918032517386e-06,
"loss": 3.5656,
"step": 243000
},
{
"epoch": 1.99,
"learning_rate": 9.232274079797465e-06,
"loss": 3.5635,
"step": 243500
},
{
"epoch": 2.0,
"learning_rate": 9.230630127077546e-06,
"loss": 3.5619,
"step": 244000
},
{
"epoch": 2.0,
"learning_rate": 9.228986174357627e-06,
"loss": 3.5596,
"step": 244500
},
{
"epoch": 2.01,
"learning_rate": 9.227342221637706e-06,
"loss": 3.5599,
"step": 245000
},
{
"epoch": 2.01,
"learning_rate": 9.225698268917787e-06,
"loss": 3.57,
"step": 245500
},
{
"epoch": 2.02,
"learning_rate": 9.224054316197866e-06,
"loss": 3.5608,
"step": 246000
},
{
"epoch": 2.02,
"learning_rate": 9.222410363477947e-06,
"loss": 3.5601,
"step": 246500
},
{
"epoch": 2.02,
"learning_rate": 9.220766410758028e-06,
"loss": 3.562,
"step": 247000
},
{
"epoch": 2.03,
"learning_rate": 9.219122458038107e-06,
"loss": 3.5569,
"step": 247500
},
{
"epoch": 2.03,
"learning_rate": 9.217478505318188e-06,
"loss": 3.5563,
"step": 248000
},
{
"epoch": 2.04,
"learning_rate": 9.215834552598268e-06,
"loss": 3.5634,
"step": 248500
},
{
"epoch": 2.04,
"learning_rate": 9.214190599878349e-06,
"loss": 3.55,
"step": 249000
},
{
"epoch": 2.04,
"learning_rate": 9.212546647158428e-06,
"loss": 3.5459,
"step": 249500
},
{
"epoch": 2.05,
"learning_rate": 9.210902694438509e-06,
"loss": 3.5478,
"step": 250000
},
{
"epoch": 2.05,
"learning_rate": 9.209258741718588e-06,
"loss": 3.5526,
"step": 250500
},
{
"epoch": 2.06,
"learning_rate": 9.20761478899867e-06,
"loss": 3.5476,
"step": 251000
},
{
"epoch": 2.06,
"learning_rate": 9.205970836278748e-06,
"loss": 3.5567,
"step": 251500
},
{
"epoch": 2.06,
"learning_rate": 9.20432688355883e-06,
"loss": 3.5423,
"step": 252000
},
{
"epoch": 2.07,
"learning_rate": 9.20268293083891e-06,
"loss": 3.5487,
"step": 252500
},
{
"epoch": 2.07,
"learning_rate": 9.20103897811899e-06,
"loss": 3.5401,
"step": 253000
},
{
"epoch": 2.08,
"learning_rate": 9.19939502539907e-06,
"loss": 3.5503,
"step": 253500
},
{
"epoch": 2.08,
"learning_rate": 9.19775107267915e-06,
"loss": 3.5503,
"step": 254000
},
{
"epoch": 2.09,
"learning_rate": 9.196107119959231e-06,
"loss": 3.5316,
"step": 254500
},
{
"epoch": 2.09,
"learning_rate": 9.194463167239312e-06,
"loss": 3.541,
"step": 255000
},
{
"epoch": 2.09,
"learning_rate": 9.192819214519391e-06,
"loss": 3.5368,
"step": 255500
},
{
"epoch": 2.1,
"learning_rate": 9.19117526179947e-06,
"loss": 3.5351,
"step": 256000
},
{
"epoch": 2.1,
"learning_rate": 9.189531309079551e-06,
"loss": 3.537,
"step": 256500
},
{
"epoch": 2.11,
"learning_rate": 9.18788735635963e-06,
"loss": 3.536,
"step": 257000
},
{
"epoch": 2.11,
"learning_rate": 9.186243403639712e-06,
"loss": 3.5331,
"step": 257500
},
{
"epoch": 2.11,
"learning_rate": 9.184599450919793e-06,
"loss": 3.5408,
"step": 258000
},
{
"epoch": 2.12,
"learning_rate": 9.182955498199872e-06,
"loss": 3.5391,
"step": 258500
},
{
"epoch": 2.12,
"learning_rate": 9.181311545479953e-06,
"loss": 3.5339,
"step": 259000
},
{
"epoch": 2.13,
"learning_rate": 9.179667592760034e-06,
"loss": 3.535,
"step": 259500
},
{
"epoch": 2.13,
"learning_rate": 9.178023640040113e-06,
"loss": 3.5261,
"step": 260000
},
{
"epoch": 2.13,
"learning_rate": 9.176379687320194e-06,
"loss": 3.5266,
"step": 260500
},
{
"epoch": 2.14,
"learning_rate": 9.174735734600273e-06,
"loss": 3.5283,
"step": 261000
},
{
"epoch": 2.14,
"learning_rate": 9.173091781880353e-06,
"loss": 3.5348,
"step": 261500
},
{
"epoch": 2.15,
"learning_rate": 9.171447829160434e-06,
"loss": 3.5231,
"step": 262000
},
{
"epoch": 2.15,
"learning_rate": 9.169803876440515e-06,
"loss": 3.5138,
"step": 262500
},
{
"epoch": 2.15,
"learning_rate": 9.168159923720594e-06,
"loss": 3.5306,
"step": 263000
},
{
"epoch": 2.16,
"learning_rate": 9.166515971000675e-06,
"loss": 3.5224,
"step": 263500
},
{
"epoch": 2.16,
"learning_rate": 9.164872018280754e-06,
"loss": 3.5279,
"step": 264000
},
{
"epoch": 2.17,
"learning_rate": 9.163228065560835e-06,
"loss": 3.5207,
"step": 264500
},
{
"epoch": 2.17,
"learning_rate": 9.161584112840916e-06,
"loss": 3.5213,
"step": 265000
},
{
"epoch": 2.18,
"learning_rate": 9.159940160120995e-06,
"loss": 3.5152,
"step": 265500
},
{
"epoch": 2.18,
"learning_rate": 9.158296207401076e-06,
"loss": 3.5148,
"step": 266000
},
{
"epoch": 2.18,
"learning_rate": 9.156652254681156e-06,
"loss": 3.5185,
"step": 266500
},
{
"epoch": 2.19,
"learning_rate": 9.155008301961237e-06,
"loss": 3.5155,
"step": 267000
},
{
"epoch": 2.19,
"learning_rate": 9.153364349241318e-06,
"loss": 3.5192,
"step": 267500
},
{
"epoch": 2.2,
"learning_rate": 9.151720396521397e-06,
"loss": 3.5185,
"step": 268000
},
{
"epoch": 2.2,
"learning_rate": 9.150076443801476e-06,
"loss": 3.518,
"step": 268500
},
{
"epoch": 2.2,
"learning_rate": 9.148432491081557e-06,
"loss": 3.5159,
"step": 269000
},
{
"epoch": 2.21,
"learning_rate": 9.146788538361636e-06,
"loss": 3.5211,
"step": 269500
},
{
"epoch": 2.21,
"learning_rate": 9.145144585641717e-06,
"loss": 3.5128,
"step": 270000
},
{
"epoch": 2.21,
"eval_accuracy": 0.42085813479275763,
"eval_loss": 3.364504814147949,
"eval_runtime": 411.9164,
"eval_samples_per_second": 748.589,
"eval_steps_per_second": 15.598,
"step": 270000
},
{
"epoch": 2.22,
"learning_rate": 9.143500632921798e-06,
"loss": 3.5162,
"step": 270500
},
{
"epoch": 2.22,
"learning_rate": 9.141856680201878e-06,
"loss": 3.5144,
"step": 271000
},
{
"epoch": 2.22,
"learning_rate": 9.140212727481959e-06,
"loss": 3.5095,
"step": 271500
},
{
"epoch": 2.23,
"learning_rate": 9.13856877476204e-06,
"loss": 3.502,
"step": 272000
},
{
"epoch": 2.23,
"learning_rate": 9.136924822042119e-06,
"loss": 3.503,
"step": 272500
},
{
"epoch": 2.24,
"learning_rate": 9.1352808693222e-06,
"loss": 3.5011,
"step": 273000
},
{
"epoch": 2.24,
"learning_rate": 9.133636916602279e-06,
"loss": 3.505,
"step": 273500
},
{
"epoch": 2.24,
"learning_rate": 9.131992963882358e-06,
"loss": 3.5071,
"step": 274000
},
{
"epoch": 2.25,
"learning_rate": 9.13034901116244e-06,
"loss": 3.5018,
"step": 274500
},
{
"epoch": 2.25,
"learning_rate": 9.12870505844252e-06,
"loss": 3.5058,
"step": 275000
},
{
"epoch": 2.26,
"learning_rate": 9.1270611057226e-06,
"loss": 3.5056,
"step": 275500
},
{
"epoch": 2.26,
"learning_rate": 9.12541715300268e-06,
"loss": 3.5085,
"step": 276000
},
{
"epoch": 2.27,
"learning_rate": 9.12377320028276e-06,
"loss": 3.5111,
"step": 276500
},
{
"epoch": 2.27,
"learning_rate": 9.122129247562841e-06,
"loss": 3.4963,
"step": 277000
},
{
"epoch": 2.27,
"learning_rate": 9.120485294842922e-06,
"loss": 3.5001,
"step": 277500
},
{
"epoch": 2.28,
"learning_rate": 9.118841342123001e-06,
"loss": 3.5004,
"step": 278000
},
{
"epoch": 2.28,
"learning_rate": 9.117197389403082e-06,
"loss": 3.5027,
"step": 278500
},
{
"epoch": 2.29,
"learning_rate": 9.115553436683161e-06,
"loss": 3.4984,
"step": 279000
},
{
"epoch": 2.29,
"learning_rate": 9.113909483963242e-06,
"loss": 3.5006,
"step": 279500
},
{
"epoch": 2.29,
"learning_rate": 9.112265531243322e-06,
"loss": 3.4952,
"step": 280000
},
{
"epoch": 2.3,
"learning_rate": 9.110621578523403e-06,
"loss": 3.4953,
"step": 280500
},
{
"epoch": 2.3,
"learning_rate": 9.108977625803482e-06,
"loss": 3.4951,
"step": 281000
},
{
"epoch": 2.31,
"learning_rate": 9.107333673083563e-06,
"loss": 3.4984,
"step": 281500
},
{
"epoch": 2.31,
"learning_rate": 9.105689720363642e-06,
"loss": 3.4849,
"step": 282000
},
{
"epoch": 2.31,
"learning_rate": 9.104045767643723e-06,
"loss": 3.4901,
"step": 282500
},
{
"epoch": 2.32,
"learning_rate": 9.102401814923804e-06,
"loss": 3.4884,
"step": 283000
},
{
"epoch": 2.32,
"learning_rate": 9.100757862203883e-06,
"loss": 3.4937,
"step": 283500
},
{
"epoch": 2.33,
"learning_rate": 9.099113909483964e-06,
"loss": 3.4915,
"step": 284000
},
{
"epoch": 2.33,
"learning_rate": 9.097469956764045e-06,
"loss": 3.4922,
"step": 284500
},
{
"epoch": 2.33,
"learning_rate": 9.095826004044125e-06,
"loss": 3.4892,
"step": 285000
},
{
"epoch": 2.34,
"learning_rate": 9.094182051324206e-06,
"loss": 3.4837,
"step": 285500
},
{
"epoch": 2.34,
"learning_rate": 9.092538098604285e-06,
"loss": 3.4821,
"step": 286000
},
{
"epoch": 2.35,
"learning_rate": 9.090894145884364e-06,
"loss": 3.4836,
"step": 286500
},
{
"epoch": 2.35,
"learning_rate": 9.089250193164445e-06,
"loss": 3.4826,
"step": 287000
},
{
"epoch": 2.36,
"learning_rate": 9.087606240444526e-06,
"loss": 3.477,
"step": 287500
},
{
"epoch": 2.36,
"learning_rate": 9.085962287724605e-06,
"loss": 3.485,
"step": 288000
},
{
"epoch": 2.36,
"learning_rate": 9.084318335004686e-06,
"loss": 3.4778,
"step": 288500
},
{
"epoch": 2.37,
"learning_rate": 9.082674382284766e-06,
"loss": 3.49,
"step": 289000
},
{
"epoch": 2.37,
"learning_rate": 9.081030429564847e-06,
"loss": 3.4814,
"step": 289500
},
{
"epoch": 2.38,
"learning_rate": 9.079386476844928e-06,
"loss": 3.4752,
"step": 290000
},
{
"epoch": 2.38,
"learning_rate": 9.077742524125007e-06,
"loss": 3.4825,
"step": 290500
},
{
"epoch": 2.38,
"learning_rate": 9.076098571405088e-06,
"loss": 3.4784,
"step": 291000
},
{
"epoch": 2.39,
"learning_rate": 9.074454618685167e-06,
"loss": 3.4804,
"step": 291500
},
{
"epoch": 2.39,
"learning_rate": 9.072810665965246e-06,
"loss": 3.4815,
"step": 292000
},
{
"epoch": 2.4,
"learning_rate": 9.071166713245327e-06,
"loss": 3.4706,
"step": 292500
},
{
"epoch": 2.4,
"learning_rate": 9.069522760525408e-06,
"loss": 3.4753,
"step": 293000
},
{
"epoch": 2.4,
"learning_rate": 9.067878807805488e-06,
"loss": 3.4782,
"step": 293500
},
{
"epoch": 2.41,
"learning_rate": 9.066234855085569e-06,
"loss": 3.4705,
"step": 294000
},
{
"epoch": 2.41,
"learning_rate": 9.064590902365648e-06,
"loss": 3.4739,
"step": 294500
},
{
"epoch": 2.42,
"learning_rate": 9.062946949645729e-06,
"loss": 3.4786,
"step": 295000
},
{
"epoch": 2.42,
"learning_rate": 9.06130299692581e-06,
"loss": 3.4724,
"step": 295500
},
{
"epoch": 2.43,
"learning_rate": 9.059659044205889e-06,
"loss": 3.4684,
"step": 296000
},
{
"epoch": 2.43,
"learning_rate": 9.05801509148597e-06,
"loss": 3.4717,
"step": 296500
},
{
"epoch": 2.43,
"learning_rate": 9.056371138766051e-06,
"loss": 3.4704,
"step": 297000
},
{
"epoch": 2.44,
"learning_rate": 9.05472718604613e-06,
"loss": 3.4656,
"step": 297500
},
{
"epoch": 2.44,
"learning_rate": 9.05308323332621e-06,
"loss": 3.4666,
"step": 298000
},
{
"epoch": 2.45,
"learning_rate": 9.05143928060629e-06,
"loss": 3.4695,
"step": 298500
},
{
"epoch": 2.45,
"learning_rate": 9.04979532788637e-06,
"loss": 3.4686,
"step": 299000
},
{
"epoch": 2.45,
"learning_rate": 9.04815137516645e-06,
"loss": 3.4652,
"step": 299500
},
{
"epoch": 2.46,
"learning_rate": 9.046507422446532e-06,
"loss": 3.4597,
"step": 300000
},
{
"epoch": 2.46,
"eval_accuracy": 0.42611833634349283,
"eval_loss": 3.321903944015503,
"eval_runtime": 409.6321,
"eval_samples_per_second": 752.763,
"eval_steps_per_second": 15.685,
"step": 300000
},
{
"epoch": 2.46,
"learning_rate": 9.044863469726611e-06,
"loss": 3.4601,
"step": 300500
},
{
"epoch": 2.47,
"learning_rate": 9.043219517006692e-06,
"loss": 3.4717,
"step": 301000
},
{
"epoch": 2.47,
"learning_rate": 9.041575564286771e-06,
"loss": 3.463,
"step": 301500
},
{
"epoch": 2.47,
"learning_rate": 9.039931611566852e-06,
"loss": 3.463,
"step": 302000
},
{
"epoch": 2.48,
"learning_rate": 9.038287658846933e-06,
"loss": 3.4575,
"step": 302500
},
{
"epoch": 2.48,
"learning_rate": 9.036643706127013e-06,
"loss": 3.4648,
"step": 303000
},
{
"epoch": 2.49,
"learning_rate": 9.034999753407094e-06,
"loss": 3.4625,
"step": 303500
},
{
"epoch": 2.49,
"learning_rate": 9.033355800687173e-06,
"loss": 3.4562,
"step": 304000
},
{
"epoch": 2.49,
"learning_rate": 9.031711847967252e-06,
"loss": 3.4587,
"step": 304500
},
{
"epoch": 2.5,
"learning_rate": 9.030067895247333e-06,
"loss": 3.4486,
"step": 305000
},
{
"epoch": 2.5,
"learning_rate": 9.028423942527414e-06,
"loss": 3.4609,
"step": 305500
},
{
"epoch": 2.51,
"learning_rate": 9.026779989807493e-06,
"loss": 3.4502,
"step": 306000
},
{
"epoch": 2.51,
"learning_rate": 9.025136037087574e-06,
"loss": 3.4515,
"step": 306500
},
{
"epoch": 2.52,
"learning_rate": 9.023492084367654e-06,
"loss": 3.4536,
"step": 307000
},
{
"epoch": 2.52,
"learning_rate": 9.021848131647735e-06,
"loss": 3.4527,
"step": 307500
},
{
"epoch": 2.52,
"learning_rate": 9.020204178927815e-06,
"loss": 3.4506,
"step": 308000
},
{
"epoch": 2.53,
"learning_rate": 9.018560226207895e-06,
"loss": 3.4553,
"step": 308500
},
{
"epoch": 2.53,
"learning_rate": 9.016916273487976e-06,
"loss": 3.457,
"step": 309000
},
{
"epoch": 2.54,
"learning_rate": 9.015272320768057e-06,
"loss": 3.4517,
"step": 309500
},
{
"epoch": 2.54,
"learning_rate": 9.013628368048134e-06,
"loss": 3.4471,
"step": 310000
},
{
"epoch": 2.54,
"learning_rate": 9.011984415328215e-06,
"loss": 3.4469,
"step": 310500
},
{
"epoch": 2.55,
"learning_rate": 9.010340462608296e-06,
"loss": 3.4488,
"step": 311000
},
{
"epoch": 2.55,
"learning_rate": 9.008696509888376e-06,
"loss": 3.4416,
"step": 311500
},
{
"epoch": 2.56,
"learning_rate": 9.007052557168457e-06,
"loss": 3.4526,
"step": 312000
},
{
"epoch": 2.56,
"learning_rate": 9.005408604448537e-06,
"loss": 3.4508,
"step": 312500
},
{
"epoch": 2.56,
"learning_rate": 9.003764651728617e-06,
"loss": 3.4583,
"step": 313000
},
{
"epoch": 2.57,
"learning_rate": 9.002120699008698e-06,
"loss": 3.4571,
"step": 313500
},
{
"epoch": 2.57,
"learning_rate": 9.000476746288777e-06,
"loss": 3.454,
"step": 314000
},
{
"epoch": 2.58,
"learning_rate": 8.998832793568858e-06,
"loss": 3.4457,
"step": 314500
},
{
"epoch": 2.58,
"learning_rate": 8.997188840848939e-06,
"loss": 3.4385,
"step": 315000
},
{
"epoch": 2.58,
"learning_rate": 8.995544888129018e-06,
"loss": 3.4465,
"step": 315500
},
{
"epoch": 2.59,
"learning_rate": 8.9939009354091e-06,
"loss": 3.4435,
"step": 316000
},
{
"epoch": 2.59,
"learning_rate": 8.992256982689178e-06,
"loss": 3.4489,
"step": 316500
},
{
"epoch": 2.6,
"learning_rate": 8.990613029969258e-06,
"loss": 3.438,
"step": 317000
},
{
"epoch": 2.6,
"learning_rate": 8.988969077249339e-06,
"loss": 3.4412,
"step": 317500
},
{
"epoch": 2.61,
"learning_rate": 8.98732512452942e-06,
"loss": 3.4394,
"step": 318000
},
{
"epoch": 2.61,
"learning_rate": 8.985681171809499e-06,
"loss": 3.4455,
"step": 318500
},
{
"epoch": 2.61,
"learning_rate": 8.98403721908958e-06,
"loss": 3.4398,
"step": 319000
},
{
"epoch": 2.62,
"learning_rate": 8.98239326636966e-06,
"loss": 3.4448,
"step": 319500
},
{
"epoch": 2.62,
"learning_rate": 8.98074931364974e-06,
"loss": 3.4345,
"step": 320000
},
{
"epoch": 2.63,
"learning_rate": 8.979105360929821e-06,
"loss": 3.4359,
"step": 320500
},
{
"epoch": 2.63,
"learning_rate": 8.9774614082099e-06,
"loss": 3.4498,
"step": 321000
},
{
"epoch": 2.63,
"learning_rate": 8.975817455489981e-06,
"loss": 3.4363,
"step": 321500
},
{
"epoch": 2.64,
"learning_rate": 8.974173502770062e-06,
"loss": 3.4381,
"step": 322000
},
{
"epoch": 2.64,
"learning_rate": 8.97252955005014e-06,
"loss": 3.4334,
"step": 322500
},
{
"epoch": 2.65,
"learning_rate": 8.970885597330221e-06,
"loss": 3.4354,
"step": 323000
},
{
"epoch": 2.65,
"learning_rate": 8.969241644610302e-06,
"loss": 3.4338,
"step": 323500
},
{
"epoch": 2.65,
"learning_rate": 8.967597691890381e-06,
"loss": 3.4305,
"step": 324000
},
{
"epoch": 2.66,
"learning_rate": 8.965953739170462e-06,
"loss": 3.4322,
"step": 324500
},
{
"epoch": 2.66,
"learning_rate": 8.964309786450543e-06,
"loss": 3.4363,
"step": 325000
},
{
"epoch": 2.67,
"learning_rate": 8.962665833730622e-06,
"loss": 3.4398,
"step": 325500
},
{
"epoch": 2.67,
"learning_rate": 8.961021881010703e-06,
"loss": 3.4376,
"step": 326000
},
{
"epoch": 2.67,
"learning_rate": 8.959377928290783e-06,
"loss": 3.4268,
"step": 326500
},
{
"epoch": 2.68,
"learning_rate": 8.957733975570864e-06,
"loss": 3.4415,
"step": 327000
},
{
"epoch": 2.68,
"learning_rate": 8.956090022850945e-06,
"loss": 3.4253,
"step": 327500
},
{
"epoch": 2.69,
"learning_rate": 8.954446070131024e-06,
"loss": 3.4335,
"step": 328000
},
{
"epoch": 2.69,
"learning_rate": 8.952802117411103e-06,
"loss": 3.4273,
"step": 328500
},
{
"epoch": 2.7,
"learning_rate": 8.951158164691184e-06,
"loss": 3.4334,
"step": 329000
},
{
"epoch": 2.7,
"learning_rate": 8.949514211971263e-06,
"loss": 3.4288,
"step": 329500
},
{
"epoch": 2.7,
"learning_rate": 8.947870259251344e-06,
"loss": 3.4263,
"step": 330000
},
{
"epoch": 2.7,
"eval_accuracy": 0.43119620464233216,
"eval_loss": 3.284120559692383,
"eval_runtime": 409.6426,
"eval_samples_per_second": 752.744,
"eval_steps_per_second": 15.684,
"step": 330000
},
{
"epoch": 2.71,
"learning_rate": 8.946226306531425e-06,
"loss": 3.4205,
"step": 330500
},
{
"epoch": 2.71,
"learning_rate": 8.944582353811505e-06,
"loss": 3.4233,
"step": 331000
},
{
"epoch": 2.72,
"learning_rate": 8.942938401091586e-06,
"loss": 3.4275,
"step": 331500
},
{
"epoch": 2.72,
"learning_rate": 8.941294448371665e-06,
"loss": 3.4317,
"step": 332000
},
{
"epoch": 2.72,
"learning_rate": 8.939650495651746e-06,
"loss": 3.4239,
"step": 332500
},
{
"epoch": 2.73,
"learning_rate": 8.938006542931827e-06,
"loss": 3.4237,
"step": 333000
},
{
"epoch": 2.73,
"learning_rate": 8.936362590211906e-06,
"loss": 3.4192,
"step": 333500
},
{
"epoch": 2.74,
"learning_rate": 8.934718637491987e-06,
"loss": 3.4193,
"step": 334000
},
{
"epoch": 2.74,
"learning_rate": 8.933074684772066e-06,
"loss": 3.4232,
"step": 334500
},
{
"epoch": 2.74,
"learning_rate": 8.931430732052146e-06,
"loss": 3.422,
"step": 335000
},
{
"epoch": 2.75,
"learning_rate": 8.929786779332227e-06,
"loss": 3.4168,
"step": 335500
},
{
"epoch": 2.75,
"learning_rate": 8.928142826612308e-06,
"loss": 3.4203,
"step": 336000
},
{
"epoch": 2.76,
"learning_rate": 8.926498873892387e-06,
"loss": 3.4188,
"step": 336500
},
{
"epoch": 2.76,
"learning_rate": 8.924854921172468e-06,
"loss": 3.4202,
"step": 337000
},
{
"epoch": 2.77,
"learning_rate": 8.923210968452549e-06,
"loss": 3.4217,
"step": 337500
},
{
"epoch": 2.77,
"learning_rate": 8.921567015732628e-06,
"loss": 3.4185,
"step": 338000
},
{
"epoch": 2.77,
"learning_rate": 8.919923063012709e-06,
"loss": 3.4202,
"step": 338500
},
{
"epoch": 2.78,
"learning_rate": 8.918279110292788e-06,
"loss": 3.4172,
"step": 339000
},
{
"epoch": 2.78,
"learning_rate": 8.91663515757287e-06,
"loss": 3.4221,
"step": 339500
},
{
"epoch": 2.79,
"learning_rate": 8.91499120485295e-06,
"loss": 3.4136,
"step": 340000
},
{
"epoch": 2.79,
"learning_rate": 8.91334725213303e-06,
"loss": 3.4209,
"step": 340500
},
{
"epoch": 2.79,
"learning_rate": 8.911703299413109e-06,
"loss": 3.4182,
"step": 341000
},
{
"epoch": 2.8,
"learning_rate": 8.91005934669319e-06,
"loss": 3.4152,
"step": 341500
},
{
"epoch": 2.8,
"learning_rate": 8.90841539397327e-06,
"loss": 3.4124,
"step": 342000
},
{
"epoch": 2.81,
"learning_rate": 8.90677144125335e-06,
"loss": 3.408,
"step": 342500
},
{
"epoch": 2.81,
"learning_rate": 8.905127488533431e-06,
"loss": 3.4112,
"step": 343000
},
{
"epoch": 2.81,
"learning_rate": 8.90348353581351e-06,
"loss": 3.4144,
"step": 343500
},
{
"epoch": 2.82,
"learning_rate": 8.901839583093591e-06,
"loss": 3.4173,
"step": 344000
},
{
"epoch": 2.82,
"learning_rate": 8.90019563037367e-06,
"loss": 3.4107,
"step": 344500
},
{
"epoch": 2.83,
"learning_rate": 8.898551677653752e-06,
"loss": 3.4094,
"step": 345000
},
{
"epoch": 2.83,
"learning_rate": 8.896907724933833e-06,
"loss": 3.4165,
"step": 345500
},
{
"epoch": 2.83,
"learning_rate": 8.895263772213912e-06,
"loss": 3.4163,
"step": 346000
},
{
"epoch": 2.84,
"learning_rate": 8.893619819493991e-06,
"loss": 3.4158,
"step": 346500
},
{
"epoch": 2.84,
"learning_rate": 8.891975866774072e-06,
"loss": 3.4166,
"step": 347000
},
{
"epoch": 2.85,
"learning_rate": 8.890331914054151e-06,
"loss": 3.4059,
"step": 347500
},
{
"epoch": 2.85,
"learning_rate": 8.888687961334232e-06,
"loss": 3.4079,
"step": 348000
},
{
"epoch": 2.86,
"learning_rate": 8.887044008614313e-06,
"loss": 3.4116,
"step": 348500
},
{
"epoch": 2.86,
"learning_rate": 8.885400055894393e-06,
"loss": 3.4078,
"step": 349000
},
{
"epoch": 2.86,
"learning_rate": 8.883756103174474e-06,
"loss": 3.4109,
"step": 349500
},
{
"epoch": 2.87,
"learning_rate": 8.882112150454555e-06,
"loss": 3.4022,
"step": 350000
},
{
"epoch": 2.87,
"learning_rate": 8.880468197734634e-06,
"loss": 3.4004,
"step": 350500
},
{
"epoch": 2.88,
"learning_rate": 8.878824245014715e-06,
"loss": 3.4024,
"step": 351000
},
{
"epoch": 2.88,
"learning_rate": 8.877180292294794e-06,
"loss": 3.4056,
"step": 351500
},
{
"epoch": 2.88,
"learning_rate": 8.875536339574875e-06,
"loss": 3.3985,
"step": 352000
},
{
"epoch": 2.89,
"learning_rate": 8.873892386854954e-06,
"loss": 3.3945,
"step": 352500
},
{
"epoch": 2.89,
"learning_rate": 8.872248434135035e-06,
"loss": 3.3978,
"step": 353000
},
{
"epoch": 2.9,
"learning_rate": 8.870604481415115e-06,
"loss": 3.4052,
"step": 353500
},
{
"epoch": 2.9,
"learning_rate": 8.868960528695196e-06,
"loss": 3.4048,
"step": 354000
},
{
"epoch": 2.9,
"learning_rate": 8.867316575975275e-06,
"loss": 3.4055,
"step": 354500
},
{
"epoch": 2.91,
"learning_rate": 8.865672623255356e-06,
"loss": 3.4025,
"step": 355000
},
{
"epoch": 2.91,
"learning_rate": 8.864028670535437e-06,
"loss": 3.3976,
"step": 355500
},
{
"epoch": 2.92,
"learning_rate": 8.862384717815516e-06,
"loss": 3.3962,
"step": 356000
},
{
"epoch": 2.92,
"learning_rate": 8.860740765095597e-06,
"loss": 3.3927,
"step": 356500
},
{
"epoch": 2.92,
"learning_rate": 8.859096812375676e-06,
"loss": 3.4024,
"step": 357000
},
{
"epoch": 2.93,
"learning_rate": 8.857452859655757e-06,
"loss": 3.3929,
"step": 357500
},
{
"epoch": 2.93,
"learning_rate": 8.855808906935838e-06,
"loss": 3.3911,
"step": 358000
},
{
"epoch": 2.94,
"learning_rate": 8.854164954215918e-06,
"loss": 3.3976,
"step": 358500
},
{
"epoch": 2.94,
"learning_rate": 8.852521001495997e-06,
"loss": 3.3944,
"step": 359000
},
{
"epoch": 2.95,
"learning_rate": 8.850877048776078e-06,
"loss": 3.3953,
"step": 359500
},
{
"epoch": 2.95,
"learning_rate": 8.849233096056157e-06,
"loss": 3.3909,
"step": 360000
},
{
"epoch": 2.95,
"eval_accuracy": 0.43477661532857337,
"eval_loss": 3.254718542098999,
"eval_runtime": 406.1401,
"eval_samples_per_second": 759.236,
"eval_steps_per_second": 15.82,
"step": 360000
},
{
"epoch": 2.95,
"learning_rate": 8.847589143336238e-06,
"loss": 3.403,
"step": 360500
},
{
"epoch": 2.96,
"learning_rate": 8.845945190616319e-06,
"loss": 3.3966,
"step": 361000
},
{
"epoch": 2.96,
"learning_rate": 8.844301237896398e-06,
"loss": 3.3835,
"step": 361500
},
{
"epoch": 2.97,
"learning_rate": 8.84265728517648e-06,
"loss": 3.3896,
"step": 362000
},
{
"epoch": 2.97,
"learning_rate": 8.84101333245656e-06,
"loss": 3.3855,
"step": 362500
},
{
"epoch": 2.97,
"learning_rate": 8.83936937973664e-06,
"loss": 3.3851,
"step": 363000
},
{
"epoch": 2.98,
"learning_rate": 8.83772542701672e-06,
"loss": 3.3927,
"step": 363500
},
{
"epoch": 2.98,
"learning_rate": 8.8360814742968e-06,
"loss": 3.389,
"step": 364000
},
{
"epoch": 2.99,
"learning_rate": 8.83443752157688e-06,
"loss": 3.3861,
"step": 364500
},
{
"epoch": 2.99,
"learning_rate": 8.83279356885696e-06,
"loss": 3.3955,
"step": 365000
},
{
"epoch": 2.99,
"learning_rate": 8.831149616137041e-06,
"loss": 3.395,
"step": 365500
},
{
"epoch": 3.0,
"learning_rate": 8.82950566341712e-06,
"loss": 3.3881,
"step": 366000
},
{
"epoch": 3.0,
"learning_rate": 8.827861710697201e-06,
"loss": 3.3844,
"step": 366500
},
{
"epoch": 3.01,
"learning_rate": 8.82621775797728e-06,
"loss": 3.3886,
"step": 367000
},
{
"epoch": 3.01,
"learning_rate": 8.824573805257362e-06,
"loss": 3.3897,
"step": 367500
},
{
"epoch": 3.01,
"learning_rate": 8.822929852537443e-06,
"loss": 3.3882,
"step": 368000
},
{
"epoch": 3.02,
"learning_rate": 8.821285899817522e-06,
"loss": 3.3919,
"step": 368500
},
{
"epoch": 3.02,
"learning_rate": 8.819641947097603e-06,
"loss": 3.3815,
"step": 369000
},
{
"epoch": 3.03,
"learning_rate": 8.817997994377682e-06,
"loss": 3.3802,
"step": 369500
},
{
"epoch": 3.03,
"learning_rate": 8.816354041657763e-06,
"loss": 3.3817,
"step": 370000
},
{
"epoch": 3.04,
"learning_rate": 8.814710088937844e-06,
"loss": 3.3874,
"step": 370500
},
{
"epoch": 3.04,
"learning_rate": 8.813066136217923e-06,
"loss": 3.3875,
"step": 371000
},
{
"epoch": 3.04,
"learning_rate": 8.811422183498003e-06,
"loss": 3.3885,
"step": 371500
},
{
"epoch": 3.05,
"learning_rate": 8.809778230778084e-06,
"loss": 3.3884,
"step": 372000
},
{
"epoch": 3.05,
"learning_rate": 8.808134278058163e-06,
"loss": 3.3905,
"step": 372500
},
{
"epoch": 3.06,
"learning_rate": 8.806490325338244e-06,
"loss": 3.3829,
"step": 373000
},
{
"epoch": 3.06,
"learning_rate": 8.804846372618325e-06,
"loss": 3.3858,
"step": 373500
},
{
"epoch": 3.06,
"learning_rate": 8.803202419898404e-06,
"loss": 3.3852,
"step": 374000
},
{
"epoch": 3.07,
"learning_rate": 8.801558467178485e-06,
"loss": 3.3814,
"step": 374500
},
{
"epoch": 3.07,
"learning_rate": 8.799914514458564e-06,
"loss": 3.3849,
"step": 375000
},
{
"epoch": 3.08,
"learning_rate": 8.798270561738645e-06,
"loss": 3.3812,
"step": 375500
},
{
"epoch": 3.08,
"learning_rate": 8.796626609018726e-06,
"loss": 3.3815,
"step": 376000
},
{
"epoch": 3.08,
"learning_rate": 8.794982656298806e-06,
"loss": 3.3738,
"step": 376500
},
{
"epoch": 3.09,
"learning_rate": 8.793338703578885e-06,
"loss": 3.381,
"step": 377000
},
{
"epoch": 3.09,
"learning_rate": 8.791694750858966e-06,
"loss": 3.3762,
"step": 377500
},
{
"epoch": 3.1,
"learning_rate": 8.790050798139047e-06,
"loss": 3.3709,
"step": 378000
},
{
"epoch": 3.1,
"learning_rate": 8.788406845419126e-06,
"loss": 3.3779,
"step": 378500
},
{
"epoch": 3.11,
"learning_rate": 8.786762892699207e-06,
"loss": 3.3774,
"step": 379000
},
{
"epoch": 3.11,
"learning_rate": 8.785118939979286e-06,
"loss": 3.3692,
"step": 379500
},
{
"epoch": 3.11,
"learning_rate": 8.783474987259367e-06,
"loss": 3.3737,
"step": 380000
},
{
"epoch": 3.12,
"learning_rate": 8.781831034539448e-06,
"loss": 3.3717,
"step": 380500
},
{
"epoch": 3.12,
"learning_rate": 8.780187081819528e-06,
"loss": 3.3763,
"step": 381000
},
{
"epoch": 3.13,
"learning_rate": 8.778543129099608e-06,
"loss": 3.3748,
"step": 381500
},
{
"epoch": 3.13,
"learning_rate": 8.776899176379688e-06,
"loss": 3.3824,
"step": 382000
},
{
"epoch": 3.13,
"learning_rate": 8.775255223659769e-06,
"loss": 3.3739,
"step": 382500
},
{
"epoch": 3.14,
"learning_rate": 8.773611270939848e-06,
"loss": 3.3745,
"step": 383000
},
{
"epoch": 3.14,
"learning_rate": 8.771967318219929e-06,
"loss": 3.3775,
"step": 383500
},
{
"epoch": 3.15,
"learning_rate": 8.770323365500008e-06,
"loss": 3.3675,
"step": 384000
},
{
"epoch": 3.15,
"learning_rate": 8.76867941278009e-06,
"loss": 3.3676,
"step": 384500
},
{
"epoch": 3.15,
"learning_rate": 8.767035460060169e-06,
"loss": 3.3667,
"step": 385000
},
{
"epoch": 3.16,
"learning_rate": 8.76539150734025e-06,
"loss": 3.3638,
"step": 385500
},
{
"epoch": 3.16,
"learning_rate": 8.76374755462033e-06,
"loss": 3.3682,
"step": 386000
},
{
"epoch": 3.17,
"learning_rate": 8.76210360190041e-06,
"loss": 3.3698,
"step": 386500
},
{
"epoch": 3.17,
"learning_rate": 8.76045964918049e-06,
"loss": 3.371,
"step": 387000
},
{
"epoch": 3.17,
"learning_rate": 8.75881569646057e-06,
"loss": 3.3797,
"step": 387500
},
{
"epoch": 3.18,
"learning_rate": 8.757171743740651e-06,
"loss": 3.3701,
"step": 388000
},
{
"epoch": 3.18,
"learning_rate": 8.755527791020732e-06,
"loss": 3.3725,
"step": 388500
},
{
"epoch": 3.19,
"learning_rate": 8.753883838300811e-06,
"loss": 3.3784,
"step": 389000
},
{
"epoch": 3.19,
"learning_rate": 8.75223988558089e-06,
"loss": 3.3677,
"step": 389500
},
{
"epoch": 3.2,
"learning_rate": 8.750595932860971e-06,
"loss": 3.3635,
"step": 390000
},
{
"epoch": 3.2,
"eval_accuracy": 0.43791422082834985,
"eval_loss": 3.228388547897339,
"eval_runtime": 413.1728,
"eval_samples_per_second": 746.312,
"eval_steps_per_second": 15.55,
"step": 390000
},
{
"epoch": 3.2,
"learning_rate": 8.748951980141052e-06,
"loss": 3.364,
"step": 390500
},
{
"epoch": 3.2,
"learning_rate": 8.747308027421132e-06,
"loss": 3.3708,
"step": 391000
},
{
"epoch": 3.21,
"learning_rate": 8.745664074701213e-06,
"loss": 3.3714,
"step": 391500
},
{
"epoch": 3.21,
"learning_rate": 8.744020121981292e-06,
"loss": 3.3658,
"step": 392000
},
{
"epoch": 3.22,
"learning_rate": 8.742376169261373e-06,
"loss": 3.3653,
"step": 392500
},
{
"epoch": 3.22,
"learning_rate": 8.740732216541454e-06,
"loss": 3.3617,
"step": 393000
},
{
"epoch": 3.22,
"learning_rate": 8.739088263821533e-06,
"loss": 3.367,
"step": 393500
},
{
"epoch": 3.23,
"learning_rate": 8.737444311101614e-06,
"loss": 3.3608,
"step": 394000
},
{
"epoch": 3.23,
"learning_rate": 8.735800358381693e-06,
"loss": 3.3643,
"step": 394500
},
{
"epoch": 3.24,
"learning_rate": 8.734156405661773e-06,
"loss": 3.3607,
"step": 395000
},
{
"epoch": 3.24,
"learning_rate": 8.732512452941854e-06,
"loss": 3.3638,
"step": 395500
},
{
"epoch": 3.24,
"learning_rate": 8.730868500221935e-06,
"loss": 3.3687,
"step": 396000
},
{
"epoch": 3.25,
"learning_rate": 8.729224547502014e-06,
"loss": 3.3616,
"step": 396500
},
{
"epoch": 3.25,
"learning_rate": 8.727580594782095e-06,
"loss": 3.3678,
"step": 397000
},
{
"epoch": 3.26,
"learning_rate": 8.725936642062174e-06,
"loss": 3.3616,
"step": 397500
},
{
"epoch": 3.26,
"learning_rate": 8.724292689342255e-06,
"loss": 3.3573,
"step": 398000
},
{
"epoch": 3.26,
"learning_rate": 8.722648736622336e-06,
"loss": 3.3622,
"step": 398500
},
{
"epoch": 3.27,
"learning_rate": 8.721004783902415e-06,
"loss": 3.3588,
"step": 399000
},
{
"epoch": 3.27,
"learning_rate": 8.719360831182496e-06,
"loss": 3.3604,
"step": 399500
},
{
"epoch": 3.28,
"learning_rate": 8.717716878462576e-06,
"loss": 3.361,
"step": 400000
},
{
"epoch": 3.28,
"learning_rate": 8.716072925742657e-06,
"loss": 3.3546,
"step": 400500
},
{
"epoch": 3.29,
"learning_rate": 8.714428973022736e-06,
"loss": 3.3613,
"step": 401000
},
{
"epoch": 3.29,
"learning_rate": 8.712785020302817e-06,
"loss": 3.3619,
"step": 401500
},
{
"epoch": 3.29,
"learning_rate": 8.711141067582896e-06,
"loss": 3.3623,
"step": 402000
},
{
"epoch": 3.3,
"learning_rate": 8.709497114862977e-06,
"loss": 3.3552,
"step": 402500
},
{
"epoch": 3.3,
"learning_rate": 8.707853162143056e-06,
"loss": 3.3554,
"step": 403000
},
{
"epoch": 3.31,
"learning_rate": 8.706209209423137e-06,
"loss": 3.3587,
"step": 403500
},
{
"epoch": 3.31,
"learning_rate": 8.704565256703218e-06,
"loss": 3.3558,
"step": 404000
},
{
"epoch": 3.31,
"learning_rate": 8.702921303983298e-06,
"loss": 3.3582,
"step": 404500
},
{
"epoch": 3.32,
"learning_rate": 8.701277351263379e-06,
"loss": 3.3627,
"step": 405000
},
{
"epoch": 3.32,
"learning_rate": 8.69963339854346e-06,
"loss": 3.3572,
"step": 405500
},
{
"epoch": 3.33,
"learning_rate": 8.697989445823539e-06,
"loss": 3.3658,
"step": 406000
},
{
"epoch": 3.33,
"learning_rate": 8.69634549310362e-06,
"loss": 3.358,
"step": 406500
},
{
"epoch": 3.33,
"learning_rate": 8.6947015403837e-06,
"loss": 3.349,
"step": 407000
},
{
"epoch": 3.34,
"learning_rate": 8.693057587663778e-06,
"loss": 3.3534,
"step": 407500
},
{
"epoch": 3.34,
"learning_rate": 8.69141363494386e-06,
"loss": 3.3595,
"step": 408000
},
{
"epoch": 3.35,
"learning_rate": 8.68976968222394e-06,
"loss": 3.3551,
"step": 408500
},
{
"epoch": 3.35,
"learning_rate": 8.68812572950402e-06,
"loss": 3.3574,
"step": 409000
},
{
"epoch": 3.35,
"learning_rate": 8.6864817767841e-06,
"loss": 3.3534,
"step": 409500
},
{
"epoch": 3.36,
"learning_rate": 8.68483782406418e-06,
"loss": 3.3541,
"step": 410000
},
{
"epoch": 3.36,
"learning_rate": 8.683193871344261e-06,
"loss": 3.3545,
"step": 410500
},
{
"epoch": 3.37,
"learning_rate": 8.681549918624342e-06,
"loss": 3.3506,
"step": 411000
},
{
"epoch": 3.37,
"learning_rate": 8.679905965904421e-06,
"loss": 3.3454,
"step": 411500
},
{
"epoch": 3.38,
"learning_rate": 8.678262013184502e-06,
"loss": 3.3557,
"step": 412000
},
{
"epoch": 3.38,
"learning_rate": 8.676618060464581e-06,
"loss": 3.3449,
"step": 412500
},
{
"epoch": 3.38,
"learning_rate": 8.67497410774466e-06,
"loss": 3.3565,
"step": 413000
},
{
"epoch": 3.39,
"learning_rate": 8.673330155024742e-06,
"loss": 3.3423,
"step": 413500
},
{
"epoch": 3.39,
"learning_rate": 8.671686202304823e-06,
"loss": 3.3515,
"step": 414000
},
{
"epoch": 3.4,
"learning_rate": 8.670042249584902e-06,
"loss": 3.3462,
"step": 414500
},
{
"epoch": 3.4,
"learning_rate": 8.668398296864983e-06,
"loss": 3.3515,
"step": 415000
},
{
"epoch": 3.4,
"learning_rate": 8.666754344145062e-06,
"loss": 3.3514,
"step": 415500
},
{
"epoch": 3.41,
"learning_rate": 8.665110391425143e-06,
"loss": 3.3498,
"step": 416000
},
{
"epoch": 3.41,
"learning_rate": 8.663466438705224e-06,
"loss": 3.3514,
"step": 416500
},
{
"epoch": 3.42,
"learning_rate": 8.661822485985303e-06,
"loss": 3.3517,
"step": 417000
},
{
"epoch": 3.42,
"learning_rate": 8.660178533265384e-06,
"loss": 3.3476,
"step": 417500
},
{
"epoch": 3.42,
"learning_rate": 8.658534580545465e-06,
"loss": 3.3381,
"step": 418000
},
{
"epoch": 3.43,
"learning_rate": 8.656890627825545e-06,
"loss": 3.3461,
"step": 418500
},
{
"epoch": 3.43,
"learning_rate": 8.655246675105626e-06,
"loss": 3.3492,
"step": 419000
},
{
"epoch": 3.44,
"learning_rate": 8.653602722385705e-06,
"loss": 3.3478,
"step": 419500
},
{
"epoch": 3.44,
"learning_rate": 8.651958769665784e-06,
"loss": 3.3488,
"step": 420000
},
{
"epoch": 3.44,
"eval_accuracy": 0.4409229499038142,
"eval_loss": 3.20596981048584,
"eval_runtime": 409.4361,
"eval_samples_per_second": 753.124,
"eval_steps_per_second": 15.692,
"step": 420000
},
{
"epoch": 3.45,
"learning_rate": 8.650314816945865e-06,
"loss": 3.3408,
"step": 420500
},
{
"epoch": 3.45,
"learning_rate": 8.648670864225946e-06,
"loss": 3.3418,
"step": 421000
},
{
"epoch": 3.45,
"learning_rate": 8.647026911506025e-06,
"loss": 3.3481,
"step": 421500
},
{
"epoch": 3.46,
"learning_rate": 8.645382958786106e-06,
"loss": 3.3451,
"step": 422000
},
{
"epoch": 3.46,
"learning_rate": 8.643739006066186e-06,
"loss": 3.3391,
"step": 422500
},
{
"epoch": 3.47,
"learning_rate": 8.642095053346267e-06,
"loss": 3.3423,
"step": 423000
},
{
"epoch": 3.47,
"learning_rate": 8.640451100626348e-06,
"loss": 3.3375,
"step": 423500
},
{
"epoch": 3.47,
"learning_rate": 8.638807147906427e-06,
"loss": 3.3379,
"step": 424000
},
{
"epoch": 3.48,
"learning_rate": 8.637163195186508e-06,
"loss": 3.3446,
"step": 424500
},
{
"epoch": 3.48,
"learning_rate": 8.635519242466587e-06,
"loss": 3.3365,
"step": 425000
},
{
"epoch": 3.49,
"learning_rate": 8.633875289746666e-06,
"loss": 3.3405,
"step": 425500
},
{
"epoch": 3.49,
"learning_rate": 8.632231337026747e-06,
"loss": 3.3409,
"step": 426000
},
{
"epoch": 3.49,
"learning_rate": 8.630587384306828e-06,
"loss": 3.3369,
"step": 426500
},
{
"epoch": 3.5,
"learning_rate": 8.628943431586908e-06,
"loss": 3.3391,
"step": 427000
},
{
"epoch": 3.5,
"learning_rate": 8.627299478866989e-06,
"loss": 3.3426,
"step": 427500
},
{
"epoch": 3.51,
"learning_rate": 8.625655526147068e-06,
"loss": 3.3418,
"step": 428000
},
{
"epoch": 3.51,
"learning_rate": 8.624011573427149e-06,
"loss": 3.3402,
"step": 428500
},
{
"epoch": 3.51,
"learning_rate": 8.62236762070723e-06,
"loss": 3.3424,
"step": 429000
},
{
"epoch": 3.52,
"learning_rate": 8.620723667987309e-06,
"loss": 3.338,
"step": 429500
},
{
"epoch": 3.52,
"learning_rate": 8.61907971526739e-06,
"loss": 3.3335,
"step": 430000
},
{
"epoch": 3.53,
"learning_rate": 8.617435762547471e-06,
"loss": 3.3276,
"step": 430500
},
{
"epoch": 3.53,
"learning_rate": 8.61579180982755e-06,
"loss": 3.3285,
"step": 431000
},
{
"epoch": 3.54,
"learning_rate": 8.61414785710763e-06,
"loss": 3.3357,
"step": 431500
},
{
"epoch": 3.54,
"learning_rate": 8.61250390438771e-06,
"loss": 3.337,
"step": 432000
},
{
"epoch": 3.54,
"learning_rate": 8.61085995166779e-06,
"loss": 3.3378,
"step": 432500
},
{
"epoch": 3.55,
"learning_rate": 8.60921599894787e-06,
"loss": 3.3323,
"step": 433000
},
{
"epoch": 3.55,
"learning_rate": 8.607572046227952e-06,
"loss": 3.3337,
"step": 433500
},
{
"epoch": 3.56,
"learning_rate": 8.605928093508031e-06,
"loss": 3.3325,
"step": 434000
},
{
"epoch": 3.56,
"learning_rate": 8.604284140788112e-06,
"loss": 3.3287,
"step": 434500
},
{
"epoch": 3.56,
"learning_rate": 8.602640188068191e-06,
"loss": 3.3334,
"step": 435000
},
{
"epoch": 3.57,
"learning_rate": 8.600996235348272e-06,
"loss": 3.3308,
"step": 435500
},
{
"epoch": 3.57,
"learning_rate": 8.599352282628353e-06,
"loss": 3.3421,
"step": 436000
},
{
"epoch": 3.58,
"learning_rate": 8.597708329908433e-06,
"loss": 3.3361,
"step": 436500
},
{
"epoch": 3.58,
"learning_rate": 8.596064377188514e-06,
"loss": 3.3349,
"step": 437000
},
{
"epoch": 3.58,
"learning_rate": 8.594420424468593e-06,
"loss": 3.3278,
"step": 437500
},
{
"epoch": 3.59,
"learning_rate": 8.592776471748672e-06,
"loss": 3.3309,
"step": 438000
},
{
"epoch": 3.59,
"learning_rate": 8.591132519028753e-06,
"loss": 3.3285,
"step": 438500
},
{
"epoch": 3.6,
"learning_rate": 8.589488566308834e-06,
"loss": 3.322,
"step": 439000
},
{
"epoch": 3.6,
"learning_rate": 8.587844613588913e-06,
"loss": 3.3353,
"step": 439500
},
{
"epoch": 3.6,
"learning_rate": 8.586200660868994e-06,
"loss": 3.3251,
"step": 440000
},
{
"epoch": 3.61,
"learning_rate": 8.584556708149074e-06,
"loss": 3.3303,
"step": 440500
},
{
"epoch": 3.61,
"learning_rate": 8.582912755429155e-06,
"loss": 3.3266,
"step": 441000
},
{
"epoch": 3.62,
"learning_rate": 8.581268802709236e-06,
"loss": 3.3359,
"step": 441500
},
{
"epoch": 3.62,
"learning_rate": 8.579624849989315e-06,
"loss": 3.3264,
"step": 442000
},
{
"epoch": 3.63,
"learning_rate": 8.577980897269396e-06,
"loss": 3.3244,
"step": 442500
},
{
"epoch": 3.63,
"learning_rate": 8.576336944549477e-06,
"loss": 3.3266,
"step": 443000
},
{
"epoch": 3.63,
"learning_rate": 8.574692991829554e-06,
"loss": 3.3246,
"step": 443500
},
{
"epoch": 3.64,
"learning_rate": 8.573049039109635e-06,
"loss": 3.3264,
"step": 444000
},
{
"epoch": 3.64,
"learning_rate": 8.571405086389716e-06,
"loss": 3.3309,
"step": 444500
},
{
"epoch": 3.65,
"learning_rate": 8.569761133669796e-06,
"loss": 3.329,
"step": 445000
},
{
"epoch": 3.65,
"learning_rate": 8.568117180949877e-06,
"loss": 3.3208,
"step": 445500
},
{
"epoch": 3.65,
"learning_rate": 8.566473228229958e-06,
"loss": 3.3259,
"step": 446000
},
{
"epoch": 3.66,
"learning_rate": 8.564829275510037e-06,
"loss": 3.3272,
"step": 446500
},
{
"epoch": 3.66,
"learning_rate": 8.563185322790118e-06,
"loss": 3.3228,
"step": 447000
},
{
"epoch": 3.67,
"learning_rate": 8.561541370070197e-06,
"loss": 3.3259,
"step": 447500
},
{
"epoch": 3.67,
"learning_rate": 8.559897417350278e-06,
"loss": 3.3244,
"step": 448000
},
{
"epoch": 3.67,
"learning_rate": 8.558253464630359e-06,
"loss": 3.3245,
"step": 448500
},
{
"epoch": 3.68,
"learning_rate": 8.556609511910438e-06,
"loss": 3.3324,
"step": 449000
},
{
"epoch": 3.68,
"learning_rate": 8.554965559190518e-06,
"loss": 3.3236,
"step": 449500
},
{
"epoch": 3.69,
"learning_rate": 8.553321606470599e-06,
"loss": 3.3239,
"step": 450000
},
{
"epoch": 3.69,
"eval_accuracy": 0.443556698280814,
"eval_loss": 3.187194585800171,
"eval_runtime": 414.6673,
"eval_samples_per_second": 743.623,
"eval_steps_per_second": 15.494,
"step": 450000
},
{
"epoch": 3.69,
"learning_rate": 8.551677653750678e-06,
"loss": 3.3201,
"step": 450500
},
{
"epoch": 3.69,
"learning_rate": 8.550033701030759e-06,
"loss": 3.3214,
"step": 451000
},
{
"epoch": 3.7,
"learning_rate": 8.54838974831084e-06,
"loss": 3.3252,
"step": 451500
},
{
"epoch": 3.7,
"learning_rate": 8.546745795590919e-06,
"loss": 3.3185,
"step": 452000
},
{
"epoch": 3.71,
"learning_rate": 8.545101842871e-06,
"loss": 3.324,
"step": 452500
},
{
"epoch": 3.71,
"learning_rate": 8.54345789015108e-06,
"loss": 3.3222,
"step": 453000
},
{
"epoch": 3.72,
"learning_rate": 8.54181393743116e-06,
"loss": 3.3129,
"step": 453500
},
{
"epoch": 3.72,
"learning_rate": 8.540169984711241e-06,
"loss": 3.3245,
"step": 454000
},
{
"epoch": 3.72,
"learning_rate": 8.53852603199132e-06,
"loss": 3.3226,
"step": 454500
},
{
"epoch": 3.73,
"learning_rate": 8.536882079271401e-06,
"loss": 3.3162,
"step": 455000
},
{
"epoch": 3.73,
"learning_rate": 8.53523812655148e-06,
"loss": 3.3124,
"step": 455500
},
{
"epoch": 3.74,
"learning_rate": 8.53359417383156e-06,
"loss": 3.3155,
"step": 456000
},
{
"epoch": 3.74,
"learning_rate": 8.531950221111641e-06,
"loss": 3.3179,
"step": 456500
},
{
"epoch": 3.74,
"learning_rate": 8.530306268391722e-06,
"loss": 3.3139,
"step": 457000
},
{
"epoch": 3.75,
"learning_rate": 8.528662315671801e-06,
"loss": 3.3116,
"step": 457500
},
{
"epoch": 3.75,
"learning_rate": 8.527018362951882e-06,
"loss": 3.3204,
"step": 458000
},
{
"epoch": 3.76,
"learning_rate": 8.525374410231963e-06,
"loss": 3.3166,
"step": 458500
},
{
"epoch": 3.76,
"learning_rate": 8.523730457512042e-06,
"loss": 3.3175,
"step": 459000
},
{
"epoch": 3.76,
"learning_rate": 8.522086504792123e-06,
"loss": 3.3107,
"step": 459500
},
{
"epoch": 3.77,
"learning_rate": 8.520442552072203e-06,
"loss": 3.3189,
"step": 460000
},
{
"epoch": 3.77,
"learning_rate": 8.518798599352284e-06,
"loss": 3.3178,
"step": 460500
},
{
"epoch": 3.78,
"learning_rate": 8.517154646632365e-06,
"loss": 3.3001,
"step": 461000
},
{
"epoch": 3.78,
"learning_rate": 8.515510693912444e-06,
"loss": 3.3178,
"step": 461500
},
{
"epoch": 3.79,
"learning_rate": 8.513866741192523e-06,
"loss": 3.3168,
"step": 462000
},
{
"epoch": 3.79,
"learning_rate": 8.512222788472604e-06,
"loss": 3.3178,
"step": 462500
},
{
"epoch": 3.79,
"learning_rate": 8.510578835752684e-06,
"loss": 3.3187,
"step": 463000
},
{
"epoch": 3.8,
"learning_rate": 8.508934883032764e-06,
"loss": 3.3136,
"step": 463500
},
{
"epoch": 3.8,
"learning_rate": 8.507290930312845e-06,
"loss": 3.3138,
"step": 464000
},
{
"epoch": 3.81,
"learning_rate": 8.505646977592925e-06,
"loss": 3.3117,
"step": 464500
},
{
"epoch": 3.81,
"learning_rate": 8.504003024873006e-06,
"loss": 3.3167,
"step": 465000
},
{
"epoch": 3.81,
"learning_rate": 8.502359072153085e-06,
"loss": 3.3172,
"step": 465500
},
{
"epoch": 3.82,
"learning_rate": 8.500715119433166e-06,
"loss": 3.3083,
"step": 466000
},
{
"epoch": 3.82,
"learning_rate": 8.499071166713247e-06,
"loss": 3.31,
"step": 466500
},
{
"epoch": 3.83,
"learning_rate": 8.497427213993326e-06,
"loss": 3.3198,
"step": 467000
},
{
"epoch": 3.83,
"learning_rate": 8.495783261273407e-06,
"loss": 3.3153,
"step": 467500
},
{
"epoch": 3.83,
"learning_rate": 8.494139308553486e-06,
"loss": 3.3139,
"step": 468000
},
{
"epoch": 3.84,
"learning_rate": 8.492495355833566e-06,
"loss": 3.3044,
"step": 468500
},
{
"epoch": 3.84,
"learning_rate": 8.490851403113647e-06,
"loss": 3.306,
"step": 469000
},
{
"epoch": 3.85,
"learning_rate": 8.489207450393728e-06,
"loss": 3.3104,
"step": 469500
},
{
"epoch": 3.85,
"learning_rate": 8.487563497673807e-06,
"loss": 3.3154,
"step": 470000
},
{
"epoch": 3.85,
"learning_rate": 8.485919544953888e-06,
"loss": 3.3111,
"step": 470500
},
{
"epoch": 3.86,
"learning_rate": 8.484275592233969e-06,
"loss": 3.3065,
"step": 471000
},
{
"epoch": 3.86,
"learning_rate": 8.482631639514048e-06,
"loss": 3.3094,
"step": 471500
},
{
"epoch": 3.87,
"learning_rate": 8.48098768679413e-06,
"loss": 3.307,
"step": 472000
},
{
"epoch": 3.87,
"learning_rate": 8.479343734074208e-06,
"loss": 3.3099,
"step": 472500
},
{
"epoch": 3.88,
"learning_rate": 8.47769978135429e-06,
"loss": 3.3043,
"step": 473000
},
{
"epoch": 3.88,
"learning_rate": 8.47605582863437e-06,
"loss": 3.3068,
"step": 473500
},
{
"epoch": 3.88,
"learning_rate": 8.47441187591445e-06,
"loss": 3.3114,
"step": 474000
},
{
"epoch": 3.89,
"learning_rate": 8.472767923194529e-06,
"loss": 3.3066,
"step": 474500
},
{
"epoch": 3.89,
"learning_rate": 8.47112397047461e-06,
"loss": 3.3085,
"step": 475000
},
{
"epoch": 3.9,
"learning_rate": 8.46948001775469e-06,
"loss": 3.3108,
"step": 475500
},
{
"epoch": 3.9,
"learning_rate": 8.46783606503477e-06,
"loss": 3.3058,
"step": 476000
},
{
"epoch": 3.9,
"learning_rate": 8.466192112314851e-06,
"loss": 3.3152,
"step": 476500
},
{
"epoch": 3.91,
"learning_rate": 8.46454815959493e-06,
"loss": 3.2999,
"step": 477000
},
{
"epoch": 3.91,
"learning_rate": 8.462904206875011e-06,
"loss": 3.3049,
"step": 477500
},
{
"epoch": 3.92,
"learning_rate": 8.46126025415509e-06,
"loss": 3.3029,
"step": 478000
},
{
"epoch": 3.92,
"learning_rate": 8.459616301435172e-06,
"loss": 3.3006,
"step": 478500
},
{
"epoch": 3.92,
"learning_rate": 8.457972348715253e-06,
"loss": 3.3031,
"step": 479000
},
{
"epoch": 3.93,
"learning_rate": 8.456328395995332e-06,
"loss": 3.3033,
"step": 479500
},
{
"epoch": 3.93,
"learning_rate": 8.454684443275411e-06,
"loss": 3.3062,
"step": 480000
},
{
"epoch": 3.93,
"eval_accuracy": 0.4462195104271718,
"eval_loss": 3.166045904159546,
"eval_runtime": 409.7348,
"eval_samples_per_second": 752.575,
"eval_steps_per_second": 15.681,
"step": 480000
},
{
"epoch": 3.94,
"learning_rate": 8.453040490555492e-06,
"loss": 3.3111,
"step": 480500
},
{
"epoch": 3.94,
"learning_rate": 8.451396537835571e-06,
"loss": 3.2931,
"step": 481000
},
{
"epoch": 3.94,
"learning_rate": 8.449752585115652e-06,
"loss": 3.2991,
"step": 481500
},
{
"epoch": 3.95,
"learning_rate": 8.448108632395733e-06,
"loss": 3.3012,
"step": 482000
},
{
"epoch": 3.95,
"learning_rate": 8.446464679675813e-06,
"loss": 3.3012,
"step": 482500
},
{
"epoch": 3.96,
"learning_rate": 8.444820726955894e-06,
"loss": 3.3084,
"step": 483000
},
{
"epoch": 3.96,
"learning_rate": 8.443176774235975e-06,
"loss": 3.3038,
"step": 483500
},
{
"epoch": 3.97,
"learning_rate": 8.441532821516054e-06,
"loss": 3.2989,
"step": 484000
},
{
"epoch": 3.97,
"learning_rate": 8.439888868796135e-06,
"loss": 3.3014,
"step": 484500
},
{
"epoch": 3.97,
"learning_rate": 8.438244916076214e-06,
"loss": 3.3043,
"step": 485000
},
{
"epoch": 3.98,
"learning_rate": 8.436600963356295e-06,
"loss": 3.305,
"step": 485500
},
{
"epoch": 3.98,
"learning_rate": 8.434957010636374e-06,
"loss": 3.3027,
"step": 486000
},
{
"epoch": 3.99,
"learning_rate": 8.433313057916455e-06,
"loss": 3.3018,
"step": 486500
},
{
"epoch": 3.99,
"learning_rate": 8.431669105196535e-06,
"loss": 3.3057,
"step": 487000
},
{
"epoch": 3.99,
"learning_rate": 8.430025152476616e-06,
"loss": 3.2995,
"step": 487500
},
{
"epoch": 4.0,
"learning_rate": 8.428381199756695e-06,
"loss": 3.3044,
"step": 488000
},
{
"epoch": 4.0,
"learning_rate": 8.426737247036776e-06,
"loss": 3.2979,
"step": 488500
},
{
"epoch": 4.01,
"learning_rate": 8.425093294316857e-06,
"loss": 3.2995,
"step": 489000
},
{
"epoch": 4.01,
"learning_rate": 8.423449341596936e-06,
"loss": 3.2958,
"step": 489500
},
{
"epoch": 4.01,
"learning_rate": 8.421805388877017e-06,
"loss": 3.3006,
"step": 490000
},
{
"epoch": 4.02,
"learning_rate": 8.420161436157096e-06,
"loss": 3.3015,
"step": 490500
},
{
"epoch": 4.02,
"learning_rate": 8.418517483437177e-06,
"loss": 3.2939,
"step": 491000
},
{
"epoch": 4.03,
"learning_rate": 8.416873530717258e-06,
"loss": 3.2984,
"step": 491500
},
{
"epoch": 4.03,
"learning_rate": 8.415229577997338e-06,
"loss": 3.294,
"step": 492000
},
{
"epoch": 4.03,
"learning_rate": 8.413585625277417e-06,
"loss": 3.2979,
"step": 492500
},
{
"epoch": 4.04,
"learning_rate": 8.411941672557498e-06,
"loss": 3.2938,
"step": 493000
},
{
"epoch": 4.04,
"learning_rate": 8.410297719837577e-06,
"loss": 3.2961,
"step": 493500
},
{
"epoch": 4.05,
"learning_rate": 8.408653767117658e-06,
"loss": 3.286,
"step": 494000
},
{
"epoch": 4.05,
"learning_rate": 8.407009814397739e-06,
"loss": 3.2893,
"step": 494500
},
{
"epoch": 4.06,
"learning_rate": 8.405365861677818e-06,
"loss": 3.289,
"step": 495000
},
{
"epoch": 4.06,
"learning_rate": 8.4037219089579e-06,
"loss": 3.2949,
"step": 495500
},
{
"epoch": 4.06,
"learning_rate": 8.40207795623798e-06,
"loss": 3.2887,
"step": 496000
},
{
"epoch": 4.07,
"learning_rate": 8.40043400351806e-06,
"loss": 3.2963,
"step": 496500
},
{
"epoch": 4.07,
"learning_rate": 8.39879005079814e-06,
"loss": 3.2874,
"step": 497000
},
{
"epoch": 4.08,
"learning_rate": 8.39714609807822e-06,
"loss": 3.2941,
"step": 497500
},
{
"epoch": 4.08,
"learning_rate": 8.395502145358299e-06,
"loss": 3.2907,
"step": 498000
},
{
"epoch": 4.08,
"learning_rate": 8.39385819263838e-06,
"loss": 3.2968,
"step": 498500
},
{
"epoch": 4.09,
"learning_rate": 8.392214239918461e-06,
"loss": 3.2853,
"step": 499000
},
{
"epoch": 4.09,
"learning_rate": 8.39057028719854e-06,
"loss": 3.2883,
"step": 499500
},
{
"epoch": 4.1,
"learning_rate": 8.388926334478621e-06,
"loss": 3.2939,
"step": 500000
},
{
"epoch": 4.1,
"learning_rate": 8.3872823817587e-06,
"loss": 3.2897,
"step": 500500
},
{
"epoch": 4.1,
"learning_rate": 8.385638429038782e-06,
"loss": 3.299,
"step": 501000
},
{
"epoch": 4.11,
"learning_rate": 8.383994476318863e-06,
"loss": 3.2911,
"step": 501500
},
{
"epoch": 4.11,
"learning_rate": 8.382350523598942e-06,
"loss": 3.2926,
"step": 502000
},
{
"epoch": 4.12,
"learning_rate": 8.380706570879023e-06,
"loss": 3.2913,
"step": 502500
},
{
"epoch": 4.12,
"learning_rate": 8.379062618159102e-06,
"loss": 3.3006,
"step": 503000
},
{
"epoch": 4.13,
"learning_rate": 8.377418665439183e-06,
"loss": 3.2884,
"step": 503500
},
{
"epoch": 4.13,
"learning_rate": 8.375774712719262e-06,
"loss": 3.2926,
"step": 504000
},
{
"epoch": 4.13,
"learning_rate": 8.374130759999343e-06,
"loss": 3.2898,
"step": 504500
},
{
"epoch": 4.14,
"learning_rate": 8.372486807279423e-06,
"loss": 3.2941,
"step": 505000
},
{
"epoch": 4.14,
"learning_rate": 8.370842854559504e-06,
"loss": 3.2886,
"step": 505500
},
{
"epoch": 4.15,
"learning_rate": 8.369198901839583e-06,
"loss": 3.2888,
"step": 506000
},
{
"epoch": 4.15,
"learning_rate": 8.367554949119664e-06,
"loss": 3.2876,
"step": 506500
},
{
"epoch": 4.15,
"learning_rate": 8.365910996399745e-06,
"loss": 3.2868,
"step": 507000
},
{
"epoch": 4.16,
"learning_rate": 8.364267043679824e-06,
"loss": 3.2854,
"step": 507500
},
{
"epoch": 4.16,
"learning_rate": 8.362623090959905e-06,
"loss": 3.2951,
"step": 508000
},
{
"epoch": 4.17,
"learning_rate": 8.360979138239986e-06,
"loss": 3.2928,
"step": 508500
},
{
"epoch": 4.17,
"learning_rate": 8.359335185520065e-06,
"loss": 3.286,
"step": 509000
},
{
"epoch": 4.17,
"learning_rate": 8.357691232800146e-06,
"loss": 3.2894,
"step": 509500
},
{
"epoch": 4.18,
"learning_rate": 8.356047280080226e-06,
"loss": 3.2841,
"step": 510000
},
{
"epoch": 4.18,
"eval_accuracy": 0.4485220748826658,
"eval_loss": 3.1493306159973145,
"eval_runtime": 411.9758,
"eval_samples_per_second": 748.481,
"eval_steps_per_second": 15.596,
"step": 510000
},
{
"epoch": 4.18,
"learning_rate": 8.354403327360305e-06,
"loss": 3.2809,
"step": 510500
},
{
"epoch": 4.19,
"learning_rate": 8.352759374640386e-06,
"loss": 3.2861,
"step": 511000
},
{
"epoch": 4.19,
"learning_rate": 8.351115421920467e-06,
"loss": 3.2792,
"step": 511500
},
{
"epoch": 4.19,
"learning_rate": 8.349471469200546e-06,
"loss": 3.2864,
"step": 512000
},
{
"epoch": 4.2,
"learning_rate": 8.347827516480627e-06,
"loss": 3.2761,
"step": 512500
},
{
"epoch": 4.2,
"learning_rate": 8.346183563760706e-06,
"loss": 3.2821,
"step": 513000
},
{
"epoch": 4.21,
"learning_rate": 8.344539611040787e-06,
"loss": 3.2797,
"step": 513500
},
{
"epoch": 4.21,
"learning_rate": 8.342895658320868e-06,
"loss": 3.2854,
"step": 514000
},
{
"epoch": 4.22,
"learning_rate": 8.341251705600948e-06,
"loss": 3.2826,
"step": 514500
},
{
"epoch": 4.22,
"learning_rate": 8.339607752881029e-06,
"loss": 3.2923,
"step": 515000
},
{
"epoch": 4.22,
"learning_rate": 8.337963800161108e-06,
"loss": 3.2826,
"step": 515500
},
{
"epoch": 4.23,
"learning_rate": 8.336319847441187e-06,
"loss": 3.2886,
"step": 516000
},
{
"epoch": 4.23,
"learning_rate": 8.334675894721268e-06,
"loss": 3.2822,
"step": 516500
},
{
"epoch": 4.24,
"learning_rate": 8.333031942001349e-06,
"loss": 3.2828,
"step": 517000
},
{
"epoch": 4.24,
"learning_rate": 8.331387989281428e-06,
"loss": 3.2848,
"step": 517500
},
{
"epoch": 4.24,
"learning_rate": 8.32974403656151e-06,
"loss": 3.2878,
"step": 518000
},
{
"epoch": 4.25,
"learning_rate": 8.328100083841589e-06,
"loss": 3.2879,
"step": 518500
},
{
"epoch": 4.25,
"learning_rate": 8.32645613112167e-06,
"loss": 3.2825,
"step": 519000
},
{
"epoch": 4.26,
"learning_rate": 8.32481217840175e-06,
"loss": 3.2823,
"step": 519500
},
{
"epoch": 4.26,
"learning_rate": 8.32316822568183e-06,
"loss": 3.2841,
"step": 520000
},
{
"epoch": 4.26,
"learning_rate": 8.32152427296191e-06,
"loss": 3.2854,
"step": 520500
},
{
"epoch": 4.27,
"learning_rate": 8.31988032024199e-06,
"loss": 3.2817,
"step": 521000
},
{
"epoch": 4.27,
"learning_rate": 8.318236367522071e-06,
"loss": 3.277,
"step": 521500
},
{
"epoch": 4.28,
"learning_rate": 8.316592414802152e-06,
"loss": 3.2717,
"step": 522000
},
{
"epoch": 4.28,
"learning_rate": 8.314948462082231e-06,
"loss": 3.2838,
"step": 522500
},
{
"epoch": 4.28,
"learning_rate": 8.31330450936231e-06,
"loss": 3.2764,
"step": 523000
},
{
"epoch": 4.29,
"learning_rate": 8.311660556642392e-06,
"loss": 3.2824,
"step": 523500
},
{
"epoch": 4.29,
"learning_rate": 8.310016603922472e-06,
"loss": 3.2748,
"step": 524000
},
{
"epoch": 4.3,
"learning_rate": 8.308372651202552e-06,
"loss": 3.275,
"step": 524500
},
{
"epoch": 4.3,
"learning_rate": 8.306728698482633e-06,
"loss": 3.2802,
"step": 525000
},
{
"epoch": 4.31,
"learning_rate": 8.305084745762712e-06,
"loss": 3.2764,
"step": 525500
},
{
"epoch": 4.31,
"learning_rate": 8.303440793042793e-06,
"loss": 3.2746,
"step": 526000
},
{
"epoch": 4.31,
"learning_rate": 8.301796840322874e-06,
"loss": 3.2793,
"step": 526500
},
{
"epoch": 4.32,
"learning_rate": 8.300152887602953e-06,
"loss": 3.2819,
"step": 527000
},
{
"epoch": 4.32,
"learning_rate": 8.298508934883034e-06,
"loss": 3.2755,
"step": 527500
},
{
"epoch": 4.33,
"learning_rate": 8.296864982163114e-06,
"loss": 3.2831,
"step": 528000
},
{
"epoch": 4.33,
"learning_rate": 8.295221029443193e-06,
"loss": 3.2733,
"step": 528500
},
{
"epoch": 4.33,
"learning_rate": 8.293577076723274e-06,
"loss": 3.277,
"step": 529000
},
{
"epoch": 4.34,
"learning_rate": 8.291933124003355e-06,
"loss": 3.2783,
"step": 529500
},
{
"epoch": 4.34,
"learning_rate": 8.290289171283434e-06,
"loss": 3.2877,
"step": 530000
},
{
"epoch": 4.35,
"learning_rate": 8.288645218563515e-06,
"loss": 3.2762,
"step": 530500
},
{
"epoch": 4.35,
"learning_rate": 8.287001265843594e-06,
"loss": 3.2744,
"step": 531000
},
{
"epoch": 4.35,
"learning_rate": 8.285357313123675e-06,
"loss": 3.275,
"step": 531500
},
{
"epoch": 4.36,
"learning_rate": 8.283713360403756e-06,
"loss": 3.2724,
"step": 532000
},
{
"epoch": 4.36,
"learning_rate": 8.282069407683835e-06,
"loss": 3.2723,
"step": 532500
},
{
"epoch": 4.37,
"learning_rate": 8.280425454963916e-06,
"loss": 3.2662,
"step": 533000
},
{
"epoch": 4.37,
"learning_rate": 8.278781502243996e-06,
"loss": 3.2702,
"step": 533500
},
{
"epoch": 4.37,
"learning_rate": 8.277137549524077e-06,
"loss": 3.2736,
"step": 534000
},
{
"epoch": 4.38,
"learning_rate": 8.275493596804156e-06,
"loss": 3.2694,
"step": 534500
},
{
"epoch": 4.38,
"learning_rate": 8.273849644084237e-06,
"loss": 3.2722,
"step": 535000
},
{
"epoch": 4.39,
"learning_rate": 8.272205691364316e-06,
"loss": 3.2708,
"step": 535500
},
{
"epoch": 4.39,
"learning_rate": 8.270561738644397e-06,
"loss": 3.274,
"step": 536000
},
{
"epoch": 4.4,
"learning_rate": 8.268917785924478e-06,
"loss": 3.2798,
"step": 536500
},
{
"epoch": 4.4,
"learning_rate": 8.267273833204557e-06,
"loss": 3.2723,
"step": 537000
},
{
"epoch": 4.4,
"learning_rate": 8.265629880484638e-06,
"loss": 3.2728,
"step": 537500
},
{
"epoch": 4.41,
"learning_rate": 8.263985927764718e-06,
"loss": 3.2721,
"step": 538000
},
{
"epoch": 4.41,
"learning_rate": 8.262341975044799e-06,
"loss": 3.2774,
"step": 538500
},
{
"epoch": 4.42,
"learning_rate": 8.26069802232488e-06,
"loss": 3.2741,
"step": 539000
},
{
"epoch": 4.42,
"learning_rate": 8.259054069604959e-06,
"loss": 3.2655,
"step": 539500
},
{
"epoch": 4.42,
"learning_rate": 8.25741011688504e-06,
"loss": 3.2663,
"step": 540000
},
{
"epoch": 4.42,
"eval_accuracy": 0.4502872659892971,
"eval_loss": 3.1354901790618896,
"eval_runtime": 409.1172,
"eval_samples_per_second": 753.711,
"eval_steps_per_second": 15.705,
"step": 540000
},
{
"epoch": 4.43,
"learning_rate": 8.25576616416512e-06,
"loss": 3.2669,
"step": 540500
},
{
"epoch": 4.43,
"learning_rate": 8.254122211445198e-06,
"loss": 3.2673,
"step": 541000
},
{
"epoch": 4.44,
"learning_rate": 8.25247825872528e-06,
"loss": 3.2689,
"step": 541500
},
{
"epoch": 4.44,
"learning_rate": 8.25083430600536e-06,
"loss": 3.2659,
"step": 542000
},
{
"epoch": 4.44,
"learning_rate": 8.24919035328544e-06,
"loss": 3.2657,
"step": 542500
},
{
"epoch": 4.45,
"learning_rate": 8.24754640056552e-06,
"loss": 3.2651,
"step": 543000
},
{
"epoch": 4.45,
"learning_rate": 8.2459024478456e-06,
"loss": 3.2673,
"step": 543500
},
{
"epoch": 4.46,
"learning_rate": 8.244258495125681e-06,
"loss": 3.2719,
"step": 544000
},
{
"epoch": 4.46,
"learning_rate": 8.242614542405762e-06,
"loss": 3.2714,
"step": 544500
},
{
"epoch": 4.47,
"learning_rate": 8.240970589685841e-06,
"loss": 3.2702,
"step": 545000
},
{
"epoch": 4.47,
"learning_rate": 8.239326636965922e-06,
"loss": 3.2649,
"step": 545500
},
{
"epoch": 4.47,
"learning_rate": 8.237682684246001e-06,
"loss": 3.2791,
"step": 546000
},
{
"epoch": 4.48,
"learning_rate": 8.23603873152608e-06,
"loss": 3.2627,
"step": 546500
},
{
"epoch": 4.48,
"learning_rate": 8.234394778806162e-06,
"loss": 3.2707,
"step": 547000
},
{
"epoch": 4.49,
"learning_rate": 8.232750826086243e-06,
"loss": 3.2704,
"step": 547500
},
{
"epoch": 4.49,
"learning_rate": 8.231106873366322e-06,
"loss": 3.2624,
"step": 548000
},
{
"epoch": 4.49,
"learning_rate": 8.229462920646403e-06,
"loss": 3.2661,
"step": 548500
},
{
"epoch": 4.5,
"learning_rate": 8.227818967926482e-06,
"loss": 3.2724,
"step": 549000
},
{
"epoch": 4.5,
"learning_rate": 8.226175015206563e-06,
"loss": 3.2653,
"step": 549500
},
{
"epoch": 4.51,
"learning_rate": 8.224531062486644e-06,
"loss": 3.2676,
"step": 550000
},
{
"epoch": 4.51,
"learning_rate": 8.222887109766723e-06,
"loss": 3.2621,
"step": 550500
},
{
"epoch": 4.51,
"learning_rate": 8.221243157046804e-06,
"loss": 3.2705,
"step": 551000
},
{
"epoch": 4.52,
"learning_rate": 8.219599204326885e-06,
"loss": 3.2677,
"step": 551500
},
{
"epoch": 4.52,
"learning_rate": 8.217955251606965e-06,
"loss": 3.2643,
"step": 552000
},
{
"epoch": 4.53,
"learning_rate": 8.216311298887044e-06,
"loss": 3.2621,
"step": 552500
},
{
"epoch": 4.53,
"learning_rate": 8.214667346167125e-06,
"loss": 3.262,
"step": 553000
},
{
"epoch": 4.53,
"learning_rate": 8.213023393447204e-06,
"loss": 3.2641,
"step": 553500
},
{
"epoch": 4.54,
"learning_rate": 8.211379440727285e-06,
"loss": 3.2677,
"step": 554000
},
{
"epoch": 4.54,
"learning_rate": 8.209735488007366e-06,
"loss": 3.2587,
"step": 554500
},
{
"epoch": 4.55,
"learning_rate": 8.208091535287445e-06,
"loss": 3.2668,
"step": 555000
},
{
"epoch": 4.55,
"learning_rate": 8.206447582567526e-06,
"loss": 3.2543,
"step": 555500
},
{
"epoch": 4.56,
"learning_rate": 8.204803629847606e-06,
"loss": 3.2612,
"step": 556000
},
{
"epoch": 4.56,
"learning_rate": 8.203159677127687e-06,
"loss": 3.2631,
"step": 556500
},
{
"epoch": 4.56,
"learning_rate": 8.201515724407768e-06,
"loss": 3.2678,
"step": 557000
},
{
"epoch": 4.57,
"learning_rate": 8.199871771687847e-06,
"loss": 3.2658,
"step": 557500
},
{
"epoch": 4.57,
"learning_rate": 8.198227818967928e-06,
"loss": 3.2576,
"step": 558000
},
{
"epoch": 4.58,
"learning_rate": 8.196583866248007e-06,
"loss": 3.2639,
"step": 558500
},
{
"epoch": 4.58,
"learning_rate": 8.194939913528086e-06,
"loss": 3.2549,
"step": 559000
},
{
"epoch": 4.58,
"learning_rate": 8.193295960808167e-06,
"loss": 3.2605,
"step": 559500
},
{
"epoch": 4.59,
"learning_rate": 8.191652008088248e-06,
"loss": 3.2623,
"step": 560000
},
{
"epoch": 4.59,
"learning_rate": 8.190008055368328e-06,
"loss": 3.2635,
"step": 560500
},
{
"epoch": 4.6,
"learning_rate": 8.188364102648409e-06,
"loss": 3.2587,
"step": 561000
},
{
"epoch": 4.6,
"learning_rate": 8.186720149928488e-06,
"loss": 3.266,
"step": 561500
},
{
"epoch": 4.6,
"learning_rate": 8.185076197208569e-06,
"loss": 3.2584,
"step": 562000
},
{
"epoch": 4.61,
"learning_rate": 8.18343224448865e-06,
"loss": 3.2625,
"step": 562500
},
{
"epoch": 4.61,
"learning_rate": 8.181788291768729e-06,
"loss": 3.2598,
"step": 563000
},
{
"epoch": 4.62,
"learning_rate": 8.18014433904881e-06,
"loss": 3.2545,
"step": 563500
},
{
"epoch": 4.62,
"learning_rate": 8.178500386328891e-06,
"loss": 3.2591,
"step": 564000
},
{
"epoch": 4.62,
"learning_rate": 8.17685643360897e-06,
"loss": 3.2561,
"step": 564500
},
{
"epoch": 4.63,
"learning_rate": 8.17521248088905e-06,
"loss": 3.2604,
"step": 565000
},
{
"epoch": 4.63,
"learning_rate": 8.17356852816913e-06,
"loss": 3.2529,
"step": 565500
},
{
"epoch": 4.64,
"learning_rate": 8.17192457544921e-06,
"loss": 3.2568,
"step": 566000
},
{
"epoch": 4.64,
"learning_rate": 8.170280622729291e-06,
"loss": 3.2554,
"step": 566500
},
{
"epoch": 4.65,
"learning_rate": 8.168636670009372e-06,
"loss": 3.2528,
"step": 567000
},
{
"epoch": 4.65,
"learning_rate": 8.166992717289451e-06,
"loss": 3.2633,
"step": 567500
},
{
"epoch": 4.65,
"learning_rate": 8.165348764569532e-06,
"loss": 3.2545,
"step": 568000
},
{
"epoch": 4.66,
"learning_rate": 8.163704811849611e-06,
"loss": 3.257,
"step": 568500
},
{
"epoch": 4.66,
"learning_rate": 8.162060859129692e-06,
"loss": 3.2531,
"step": 569000
},
{
"epoch": 4.67,
"learning_rate": 8.160416906409773e-06,
"loss": 3.2573,
"step": 569500
},
{
"epoch": 4.67,
"learning_rate": 8.158772953689853e-06,
"loss": 3.259,
"step": 570000
},
{
"epoch": 4.67,
"eval_accuracy": 0.45185906617575416,
"eval_loss": 3.122938632965088,
"eval_runtime": 406.5956,
"eval_samples_per_second": 758.385,
"eval_steps_per_second": 15.802,
"step": 570000
},
{
"epoch": 4.67,
"learning_rate": 8.157129000969934e-06,
"loss": 3.2607,
"step": 570500
},
{
"epoch": 4.68,
"learning_rate": 8.155485048250013e-06,
"loss": 3.2596,
"step": 571000
},
{
"epoch": 4.68,
"learning_rate": 8.153841095530092e-06,
"loss": 3.2551,
"step": 571500
},
{
"epoch": 4.69,
"learning_rate": 8.152197142810173e-06,
"loss": 3.26,
"step": 572000
},
{
"epoch": 4.69,
"learning_rate": 8.150553190090254e-06,
"loss": 3.2566,
"step": 572500
},
{
"epoch": 4.69,
"learning_rate": 8.148909237370333e-06,
"loss": 3.2509,
"step": 573000
},
{
"epoch": 4.7,
"learning_rate": 8.147265284650414e-06,
"loss": 3.2506,
"step": 573500
},
{
"epoch": 4.7,
"learning_rate": 8.145621331930494e-06,
"loss": 3.2583,
"step": 574000
},
{
"epoch": 4.71,
"learning_rate": 8.143977379210575e-06,
"loss": 3.2572,
"step": 574500
},
{
"epoch": 4.71,
"learning_rate": 8.142333426490656e-06,
"loss": 3.2544,
"step": 575000
},
{
"epoch": 4.71,
"learning_rate": 8.140689473770735e-06,
"loss": 3.2544,
"step": 575500
},
{
"epoch": 4.72,
"learning_rate": 8.139045521050816e-06,
"loss": 3.2497,
"step": 576000
},
{
"epoch": 4.72,
"learning_rate": 8.137401568330897e-06,
"loss": 3.2496,
"step": 576500
},
{
"epoch": 4.73,
"learning_rate": 8.135757615610974e-06,
"loss": 3.2468,
"step": 577000
},
{
"epoch": 4.73,
"learning_rate": 8.134113662891055e-06,
"loss": 3.2497,
"step": 577500
},
{
"epoch": 4.74,
"learning_rate": 8.132469710171136e-06,
"loss": 3.249,
"step": 578000
},
{
"epoch": 4.74,
"learning_rate": 8.130825757451216e-06,
"loss": 3.2553,
"step": 578500
},
{
"epoch": 4.74,
"learning_rate": 8.129181804731297e-06,
"loss": 3.2604,
"step": 579000
},
{
"epoch": 4.75,
"learning_rate": 8.127537852011378e-06,
"loss": 3.2497,
"step": 579500
},
{
"epoch": 4.75,
"learning_rate": 8.125893899291457e-06,
"loss": 3.2503,
"step": 580000
},
{
"epoch": 4.76,
"learning_rate": 8.124249946571538e-06,
"loss": 3.2484,
"step": 580500
},
{
"epoch": 4.76,
"learning_rate": 8.122605993851617e-06,
"loss": 3.2596,
"step": 581000
},
{
"epoch": 4.76,
"learning_rate": 8.120962041131698e-06,
"loss": 3.2464,
"step": 581500
},
{
"epoch": 4.77,
"learning_rate": 8.119318088411779e-06,
"loss": 3.2508,
"step": 582000
},
{
"epoch": 4.77,
"learning_rate": 8.117674135691858e-06,
"loss": 3.249,
"step": 582500
},
{
"epoch": 4.78,
"learning_rate": 8.116030182971938e-06,
"loss": 3.2492,
"step": 583000
},
{
"epoch": 4.78,
"learning_rate": 8.114386230252019e-06,
"loss": 3.2457,
"step": 583500
},
{
"epoch": 4.78,
"learning_rate": 8.112742277532098e-06,
"loss": 3.2488,
"step": 584000
},
{
"epoch": 4.79,
"learning_rate": 8.111098324812179e-06,
"loss": 3.2475,
"step": 584500
},
{
"epoch": 4.79,
"learning_rate": 8.10945437209226e-06,
"loss": 3.2466,
"step": 585000
},
{
"epoch": 4.8,
"learning_rate": 8.107810419372339e-06,
"loss": 3.2452,
"step": 585500
},
{
"epoch": 4.8,
"learning_rate": 8.10616646665242e-06,
"loss": 3.2506,
"step": 586000
},
{
"epoch": 4.81,
"learning_rate": 8.1045225139325e-06,
"loss": 3.243,
"step": 586500
},
{
"epoch": 4.81,
"learning_rate": 8.10287856121258e-06,
"loss": 3.2476,
"step": 587000
},
{
"epoch": 4.81,
"learning_rate": 8.101234608492661e-06,
"loss": 3.2427,
"step": 587500
},
{
"epoch": 4.82,
"learning_rate": 8.09959065577274e-06,
"loss": 3.2548,
"step": 588000
},
{
"epoch": 4.82,
"learning_rate": 8.097946703052822e-06,
"loss": 3.2503,
"step": 588500
},
{
"epoch": 4.83,
"learning_rate": 8.0963027503329e-06,
"loss": 3.2469,
"step": 589000
},
{
"epoch": 4.83,
"learning_rate": 8.09465879761298e-06,
"loss": 3.239,
"step": 589500
},
{
"epoch": 4.83,
"learning_rate": 8.093014844893061e-06,
"loss": 3.2494,
"step": 590000
},
{
"epoch": 4.84,
"learning_rate": 8.091370892173142e-06,
"loss": 3.2423,
"step": 590500
},
{
"epoch": 4.84,
"learning_rate": 8.089726939453221e-06,
"loss": 3.2455,
"step": 591000
},
{
"epoch": 4.85,
"learning_rate": 8.088082986733302e-06,
"loss": 3.2395,
"step": 591500
},
{
"epoch": 4.85,
"learning_rate": 8.086439034013383e-06,
"loss": 3.244,
"step": 592000
},
{
"epoch": 4.85,
"learning_rate": 8.084795081293463e-06,
"loss": 3.2469,
"step": 592500
},
{
"epoch": 4.86,
"learning_rate": 8.083151128573544e-06,
"loss": 3.244,
"step": 593000
},
{
"epoch": 4.86,
"learning_rate": 8.081507175853623e-06,
"loss": 3.2457,
"step": 593500
},
{
"epoch": 4.87,
"learning_rate": 8.079863223133704e-06,
"loss": 3.2449,
"step": 594000
},
{
"epoch": 4.87,
"learning_rate": 8.078219270413785e-06,
"loss": 3.2407,
"step": 594500
},
{
"epoch": 4.87,
"learning_rate": 8.076575317693864e-06,
"loss": 3.2462,
"step": 595000
},
{
"epoch": 4.88,
"learning_rate": 8.074931364973943e-06,
"loss": 3.2402,
"step": 595500
},
{
"epoch": 4.88,
"learning_rate": 8.073287412254024e-06,
"loss": 3.2431,
"step": 596000
},
{
"epoch": 4.89,
"learning_rate": 8.071643459534104e-06,
"loss": 3.2429,
"step": 596500
},
{
"epoch": 4.89,
"learning_rate": 8.069999506814185e-06,
"loss": 3.2418,
"step": 597000
},
{
"epoch": 4.9,
"learning_rate": 8.068355554094265e-06,
"loss": 3.2403,
"step": 597500
},
{
"epoch": 4.9,
"learning_rate": 8.066711601374345e-06,
"loss": 3.2438,
"step": 598000
},
{
"epoch": 4.9,
"learning_rate": 8.065067648654426e-06,
"loss": 3.2456,
"step": 598500
},
{
"epoch": 4.91,
"learning_rate": 8.063423695934505e-06,
"loss": 3.2345,
"step": 599000
},
{
"epoch": 4.91,
"learning_rate": 8.061779743214586e-06,
"loss": 3.2393,
"step": 599500
},
{
"epoch": 4.92,
"learning_rate": 8.060135790494667e-06,
"loss": 3.2429,
"step": 600000
},
{
"epoch": 4.92,
"eval_accuracy": 0.45349973762719414,
"eval_loss": 3.1096389293670654,
"eval_runtime": 406.7173,
"eval_samples_per_second": 758.158,
"eval_steps_per_second": 15.797,
"step": 600000
},
{
"epoch": 4.92,
"learning_rate": 8.058491837774746e-06,
"loss": 3.2356,
"step": 600500
},
{
"epoch": 4.92,
"learning_rate": 8.056847885054826e-06,
"loss": 3.2468,
"step": 601000
},
{
"epoch": 4.93,
"learning_rate": 8.055203932334907e-06,
"loss": 3.2398,
"step": 601500
},
{
"epoch": 4.93,
"learning_rate": 8.053559979614986e-06,
"loss": 3.2352,
"step": 602000
},
{
"epoch": 4.94,
"learning_rate": 8.051916026895067e-06,
"loss": 3.2442,
"step": 602500
},
{
"epoch": 4.94,
"learning_rate": 8.050272074175148e-06,
"loss": 3.2401,
"step": 603000
},
{
"epoch": 4.94,
"learning_rate": 8.048628121455227e-06,
"loss": 3.2406,
"step": 603500
},
{
"epoch": 4.95,
"learning_rate": 8.046984168735308e-06,
"loss": 3.2348,
"step": 604000
},
{
"epoch": 4.95,
"learning_rate": 8.045340216015389e-06,
"loss": 3.2435,
"step": 604500
},
{
"epoch": 4.96,
"learning_rate": 8.043696263295468e-06,
"loss": 3.2372,
"step": 605000
},
{
"epoch": 4.96,
"learning_rate": 8.04205231057555e-06,
"loss": 3.2415,
"step": 605500
},
{
"epoch": 4.96,
"learning_rate": 8.040408357855628e-06,
"loss": 3.234,
"step": 606000
},
{
"epoch": 4.97,
"learning_rate": 8.03876440513571e-06,
"loss": 3.2412,
"step": 606500
},
{
"epoch": 4.97,
"learning_rate": 8.037120452415789e-06,
"loss": 3.2354,
"step": 607000
},
{
"epoch": 4.98,
"learning_rate": 8.03547649969587e-06,
"loss": 3.2394,
"step": 607500
},
{
"epoch": 4.98,
"learning_rate": 8.033832546975949e-06,
"loss": 3.2317,
"step": 608000
},
{
"epoch": 4.99,
"learning_rate": 8.03218859425603e-06,
"loss": 3.2447,
"step": 608500
},
{
"epoch": 4.99,
"learning_rate": 8.03054464153611e-06,
"loss": 3.2438,
"step": 609000
},
{
"epoch": 4.99,
"learning_rate": 8.02890068881619e-06,
"loss": 3.2373,
"step": 609500
},
{
"epoch": 5.0,
"learning_rate": 8.027256736096271e-06,
"loss": 3.2411,
"step": 610000
},
{
"epoch": 5.0,
"learning_rate": 8.02561278337635e-06,
"loss": 3.2358,
"step": 610500
},
{
"epoch": 5.01,
"learning_rate": 8.023968830656431e-06,
"loss": 3.2351,
"step": 611000
},
{
"epoch": 5.01,
"learning_rate": 8.02232487793651e-06,
"loss": 3.2341,
"step": 611500
},
{
"epoch": 5.01,
"learning_rate": 8.020680925216592e-06,
"loss": 3.2321,
"step": 612000
},
{
"epoch": 5.02,
"learning_rate": 8.019036972496673e-06,
"loss": 3.2408,
"step": 612500
},
{
"epoch": 5.02,
"learning_rate": 8.017393019776752e-06,
"loss": 3.2406,
"step": 613000
},
{
"epoch": 5.03,
"learning_rate": 8.015749067056831e-06,
"loss": 3.2375,
"step": 613500
},
{
"epoch": 5.03,
"learning_rate": 8.014105114336912e-06,
"loss": 3.2416,
"step": 614000
},
{
"epoch": 5.03,
"learning_rate": 8.012461161616991e-06,
"loss": 3.2331,
"step": 614500
},
{
"epoch": 5.04,
"learning_rate": 8.010817208897072e-06,
"loss": 3.2473,
"step": 615000
},
{
"epoch": 5.04,
"learning_rate": 8.009173256177153e-06,
"loss": 3.2345,
"step": 615500
},
{
"epoch": 5.05,
"learning_rate": 8.007529303457233e-06,
"loss": 3.2292,
"step": 616000
},
{
"epoch": 5.05,
"learning_rate": 8.005885350737314e-06,
"loss": 3.2327,
"step": 616500
},
{
"epoch": 5.05,
"learning_rate": 8.004241398017395e-06,
"loss": 3.2251,
"step": 617000
},
{
"epoch": 5.06,
"learning_rate": 8.002597445297474e-06,
"loss": 3.2326,
"step": 617500
},
{
"epoch": 5.06,
"learning_rate": 8.000953492577555e-06,
"loss": 3.2348,
"step": 618000
},
{
"epoch": 5.07,
"learning_rate": 7.999309539857634e-06,
"loss": 3.231,
"step": 618500
},
{
"epoch": 5.07,
"learning_rate": 7.997665587137715e-06,
"loss": 3.236,
"step": 619000
},
{
"epoch": 5.08,
"learning_rate": 7.996021634417794e-06,
"loss": 3.2324,
"step": 619500
},
{
"epoch": 5.08,
"learning_rate": 7.994377681697875e-06,
"loss": 3.2321,
"step": 620000
},
{
"epoch": 5.08,
"learning_rate": 7.992733728977955e-06,
"loss": 3.2326,
"step": 620500
},
{
"epoch": 5.09,
"learning_rate": 7.991089776258036e-06,
"loss": 3.2318,
"step": 621000
},
{
"epoch": 5.09,
"learning_rate": 7.989445823538115e-06,
"loss": 3.2291,
"step": 621500
},
{
"epoch": 5.1,
"learning_rate": 7.987801870818196e-06,
"loss": 3.2291,
"step": 622000
},
{
"epoch": 5.1,
"learning_rate": 7.986157918098277e-06,
"loss": 3.2264,
"step": 622500
},
{
"epoch": 5.1,
"learning_rate": 7.984513965378356e-06,
"loss": 3.2319,
"step": 623000
},
{
"epoch": 5.11,
"learning_rate": 7.982870012658437e-06,
"loss": 3.231,
"step": 623500
},
{
"epoch": 5.11,
"learning_rate": 7.981226059938516e-06,
"loss": 3.2294,
"step": 624000
},
{
"epoch": 5.12,
"learning_rate": 7.979582107218597e-06,
"loss": 3.2212,
"step": 624500
},
{
"epoch": 5.12,
"learning_rate": 7.977938154498678e-06,
"loss": 3.2315,
"step": 625000
},
{
"epoch": 5.12,
"learning_rate": 7.976294201778758e-06,
"loss": 3.2388,
"step": 625500
},
{
"epoch": 5.13,
"learning_rate": 7.974650249058837e-06,
"loss": 3.2373,
"step": 626000
},
{
"epoch": 5.13,
"learning_rate": 7.973006296338918e-06,
"loss": 3.2268,
"step": 626500
},
{
"epoch": 5.14,
"learning_rate": 7.971362343618997e-06,
"loss": 3.2271,
"step": 627000
},
{
"epoch": 5.14,
"learning_rate": 7.969718390899078e-06,
"loss": 3.23,
"step": 627500
},
{
"epoch": 5.15,
"learning_rate": 7.968074438179159e-06,
"loss": 3.2272,
"step": 628000
},
{
"epoch": 5.15,
"learning_rate": 7.966430485459238e-06,
"loss": 3.2258,
"step": 628500
},
{
"epoch": 5.15,
"learning_rate": 7.96478653273932e-06,
"loss": 3.2267,
"step": 629000
},
{
"epoch": 5.16,
"learning_rate": 7.9631425800194e-06,
"loss": 3.2273,
"step": 629500
},
{
"epoch": 5.16,
"learning_rate": 7.96149862729948e-06,
"loss": 3.2234,
"step": 630000
},
{
"epoch": 5.16,
"eval_accuracy": 0.4554354208134474,
"eval_loss": 3.0947325229644775,
"eval_runtime": 407.3576,
"eval_samples_per_second": 756.966,
"eval_steps_per_second": 15.772,
"step": 630000
},
{
"epoch": 5.17,
"learning_rate": 7.95985467457956e-06,
"loss": 3.2299,
"step": 630500
},
{
"epoch": 5.17,
"learning_rate": 7.95821072185964e-06,
"loss": 3.2245,
"step": 631000
},
{
"epoch": 5.17,
"learning_rate": 7.95656676913972e-06,
"loss": 3.2264,
"step": 631500
},
{
"epoch": 5.18,
"learning_rate": 7.9549228164198e-06,
"loss": 3.2198,
"step": 632000
},
{
"epoch": 5.18,
"learning_rate": 7.953278863699881e-06,
"loss": 3.2207,
"step": 632500
},
{
"epoch": 5.19,
"learning_rate": 7.95163491097996e-06,
"loss": 3.225,
"step": 633000
},
{
"epoch": 5.19,
"learning_rate": 7.949990958260041e-06,
"loss": 3.2346,
"step": 633500
},
{
"epoch": 5.19,
"learning_rate": 7.94834700554012e-06,
"loss": 3.228,
"step": 634000
},
{
"epoch": 5.2,
"learning_rate": 7.946703052820202e-06,
"loss": 3.2245,
"step": 634500
},
{
"epoch": 5.2,
"learning_rate": 7.945059100100283e-06,
"loss": 3.2281,
"step": 635000
},
{
"epoch": 5.21,
"learning_rate": 7.943415147380362e-06,
"loss": 3.2263,
"step": 635500
},
{
"epoch": 5.21,
"learning_rate": 7.941771194660443e-06,
"loss": 3.2242,
"step": 636000
},
{
"epoch": 5.21,
"learning_rate": 7.940127241940522e-06,
"loss": 3.2237,
"step": 636500
},
{
"epoch": 5.22,
"learning_rate": 7.938483289220603e-06,
"loss": 3.2275,
"step": 637000
},
{
"epoch": 5.22,
"learning_rate": 7.936839336500682e-06,
"loss": 3.223,
"step": 637500
},
{
"epoch": 5.23,
"learning_rate": 7.935195383780763e-06,
"loss": 3.2262,
"step": 638000
},
{
"epoch": 5.23,
"learning_rate": 7.933551431060843e-06,
"loss": 3.2343,
"step": 638500
},
{
"epoch": 5.24,
"learning_rate": 7.931907478340924e-06,
"loss": 3.225,
"step": 639000
},
{
"epoch": 5.24,
"learning_rate": 7.930263525621003e-06,
"loss": 3.2203,
"step": 639500
},
{
"epoch": 5.24,
"learning_rate": 7.928619572901084e-06,
"loss": 3.2276,
"step": 640000
},
{
"epoch": 5.25,
"learning_rate": 7.926975620181165e-06,
"loss": 3.2216,
"step": 640500
},
{
"epoch": 5.25,
"learning_rate": 7.925331667461244e-06,
"loss": 3.2199,
"step": 641000
},
{
"epoch": 5.26,
"learning_rate": 7.923687714741325e-06,
"loss": 3.226,
"step": 641500
},
{
"epoch": 5.26,
"learning_rate": 7.922043762021406e-06,
"loss": 3.2282,
"step": 642000
},
{
"epoch": 5.26,
"learning_rate": 7.920399809301485e-06,
"loss": 3.2232,
"step": 642500
},
{
"epoch": 5.27,
"learning_rate": 7.918755856581566e-06,
"loss": 3.2266,
"step": 643000
},
{
"epoch": 5.27,
"learning_rate": 7.917111903861646e-06,
"loss": 3.2136,
"step": 643500
},
{
"epoch": 5.28,
"learning_rate": 7.915467951141725e-06,
"loss": 3.2274,
"step": 644000
},
{
"epoch": 5.28,
"learning_rate": 7.913823998421806e-06,
"loss": 3.2235,
"step": 644500
},
{
"epoch": 5.28,
"learning_rate": 7.912180045701887e-06,
"loss": 3.2288,
"step": 645000
},
{
"epoch": 5.29,
"learning_rate": 7.910536092981966e-06,
"loss": 3.2166,
"step": 645500
},
{
"epoch": 5.29,
"learning_rate": 7.908892140262047e-06,
"loss": 3.2206,
"step": 646000
},
{
"epoch": 5.3,
"learning_rate": 7.907248187542126e-06,
"loss": 3.2212,
"step": 646500
},
{
"epoch": 5.3,
"learning_rate": 7.905604234822207e-06,
"loss": 3.2169,
"step": 647000
},
{
"epoch": 5.3,
"learning_rate": 7.903960282102288e-06,
"loss": 3.2286,
"step": 647500
},
{
"epoch": 5.31,
"learning_rate": 7.902316329382368e-06,
"loss": 3.2228,
"step": 648000
},
{
"epoch": 5.31,
"learning_rate": 7.900672376662449e-06,
"loss": 3.2177,
"step": 648500
},
{
"epoch": 5.32,
"learning_rate": 7.899028423942528e-06,
"loss": 3.2192,
"step": 649000
},
{
"epoch": 5.32,
"learning_rate": 7.897384471222607e-06,
"loss": 3.2179,
"step": 649500
},
{
"epoch": 5.33,
"learning_rate": 7.895740518502688e-06,
"loss": 3.227,
"step": 650000
},
{
"epoch": 5.33,
"learning_rate": 7.894096565782769e-06,
"loss": 3.2206,
"step": 650500
},
{
"epoch": 5.33,
"learning_rate": 7.892452613062848e-06,
"loss": 3.2183,
"step": 651000
},
{
"epoch": 5.34,
"learning_rate": 7.89080866034293e-06,
"loss": 3.2277,
"step": 651500
},
{
"epoch": 5.34,
"learning_rate": 7.889164707623009e-06,
"loss": 3.2174,
"step": 652000
},
{
"epoch": 5.35,
"learning_rate": 7.88752075490309e-06,
"loss": 3.2233,
"step": 652500
},
{
"epoch": 5.35,
"learning_rate": 7.88587680218317e-06,
"loss": 3.2165,
"step": 653000
},
{
"epoch": 5.35,
"learning_rate": 7.88423284946325e-06,
"loss": 3.2164,
"step": 653500
},
{
"epoch": 5.36,
"learning_rate": 7.88258889674333e-06,
"loss": 3.2157,
"step": 654000
},
{
"epoch": 5.36,
"learning_rate": 7.880944944023412e-06,
"loss": 3.2215,
"step": 654500
},
{
"epoch": 5.37,
"learning_rate": 7.879300991303491e-06,
"loss": 3.2217,
"step": 655000
},
{
"epoch": 5.37,
"learning_rate": 7.87765703858357e-06,
"loss": 3.2213,
"step": 655500
},
{
"epoch": 5.37,
"learning_rate": 7.876013085863651e-06,
"loss": 3.2122,
"step": 656000
},
{
"epoch": 5.38,
"learning_rate": 7.87436913314373e-06,
"loss": 3.2176,
"step": 656500
},
{
"epoch": 5.38,
"learning_rate": 7.872725180423812e-06,
"loss": 3.2147,
"step": 657000
},
{
"epoch": 5.39,
"learning_rate": 7.871081227703893e-06,
"loss": 3.217,
"step": 657500
},
{
"epoch": 5.39,
"learning_rate": 7.869437274983972e-06,
"loss": 3.219,
"step": 658000
},
{
"epoch": 5.39,
"learning_rate": 7.867793322264053e-06,
"loss": 3.2219,
"step": 658500
},
{
"epoch": 5.4,
"learning_rate": 7.866149369544132e-06,
"loss": 3.215,
"step": 659000
},
{
"epoch": 5.4,
"learning_rate": 7.864505416824213e-06,
"loss": 3.2098,
"step": 659500
},
{
"epoch": 5.41,
"learning_rate": 7.862861464104294e-06,
"loss": 3.2115,
"step": 660000
},
{
"epoch": 5.41,
"eval_accuracy": 0.4573150918223206,
"eval_loss": 3.0818052291870117,
"eval_runtime": 409.6703,
"eval_samples_per_second": 752.693,
"eval_steps_per_second": 15.683,
"step": 660000
},
{
"epoch": 5.41,
"learning_rate": 7.861217511384373e-06,
"loss": 3.214,
"step": 660500
},
{
"epoch": 5.42,
"learning_rate": 7.859573558664454e-06,
"loss": 3.2186,
"step": 661000
},
{
"epoch": 5.42,
"learning_rate": 7.857929605944534e-06,
"loss": 3.2234,
"step": 661500
},
{
"epoch": 5.42,
"learning_rate": 7.856285653224613e-06,
"loss": 3.22,
"step": 662000
},
{
"epoch": 5.43,
"learning_rate": 7.854641700504694e-06,
"loss": 3.2187,
"step": 662500
},
{
"epoch": 5.43,
"learning_rate": 7.852997747784775e-06,
"loss": 3.2166,
"step": 663000
},
{
"epoch": 5.44,
"learning_rate": 7.851353795064854e-06,
"loss": 3.218,
"step": 663500
},
{
"epoch": 5.44,
"learning_rate": 7.849709842344935e-06,
"loss": 3.2183,
"step": 664000
},
{
"epoch": 5.44,
"learning_rate": 7.848065889625014e-06,
"loss": 3.2065,
"step": 664500
},
{
"epoch": 5.45,
"learning_rate": 7.846421936905095e-06,
"loss": 3.2123,
"step": 665000
},
{
"epoch": 5.45,
"learning_rate": 7.844777984185176e-06,
"loss": 3.2178,
"step": 665500
},
{
"epoch": 5.46,
"learning_rate": 7.843134031465256e-06,
"loss": 3.215,
"step": 666000
},
{
"epoch": 5.46,
"learning_rate": 7.841490078745336e-06,
"loss": 3.212,
"step": 666500
},
{
"epoch": 5.46,
"learning_rate": 7.839846126025416e-06,
"loss": 3.2089,
"step": 667000
},
{
"epoch": 5.47,
"learning_rate": 7.838202173305495e-06,
"loss": 3.2153,
"step": 667500
},
{
"epoch": 5.47,
"learning_rate": 7.836558220585576e-06,
"loss": 3.2125,
"step": 668000
},
{
"epoch": 5.48,
"learning_rate": 7.834914267865657e-06,
"loss": 3.2162,
"step": 668500
},
{
"epoch": 5.48,
"learning_rate": 7.833270315145736e-06,
"loss": 3.2106,
"step": 669000
},
{
"epoch": 5.49,
"learning_rate": 7.831626362425817e-06,
"loss": 3.2122,
"step": 669500
},
{
"epoch": 5.49,
"learning_rate": 7.829982409705898e-06,
"loss": 3.2046,
"step": 670000
},
{
"epoch": 5.49,
"learning_rate": 7.828338456985978e-06,
"loss": 3.202,
"step": 670500
},
{
"epoch": 5.5,
"learning_rate": 7.826694504266058e-06,
"loss": 3.2076,
"step": 671000
},
{
"epoch": 5.5,
"learning_rate": 7.825050551546138e-06,
"loss": 3.2012,
"step": 671500
},
{
"epoch": 5.51,
"learning_rate": 7.823406598826219e-06,
"loss": 3.2134,
"step": 672000
},
{
"epoch": 5.51,
"learning_rate": 7.8217626461063e-06,
"loss": 3.2088,
"step": 672500
},
{
"epoch": 5.51,
"learning_rate": 7.820118693386379e-06,
"loss": 3.2135,
"step": 673000
},
{
"epoch": 5.52,
"learning_rate": 7.81847474066646e-06,
"loss": 3.206,
"step": 673500
},
{
"epoch": 5.52,
"learning_rate": 7.81683078794654e-06,
"loss": 3.2101,
"step": 674000
},
{
"epoch": 5.53,
"learning_rate": 7.815186835226619e-06,
"loss": 3.2208,
"step": 674500
},
{
"epoch": 5.53,
"learning_rate": 7.8135428825067e-06,
"loss": 3.2144,
"step": 675000
},
{
"epoch": 5.53,
"learning_rate": 7.81189892978678e-06,
"loss": 3.2144,
"step": 675500
},
{
"epoch": 5.54,
"learning_rate": 7.81025497706686e-06,
"loss": 3.2076,
"step": 676000
},
{
"epoch": 5.54,
"learning_rate": 7.80861102434694e-06,
"loss": 3.2067,
"step": 676500
},
{
"epoch": 5.55,
"learning_rate": 7.80696707162702e-06,
"loss": 3.2107,
"step": 677000
},
{
"epoch": 5.55,
"learning_rate": 7.805323118907101e-06,
"loss": 3.2109,
"step": 677500
},
{
"epoch": 5.55,
"learning_rate": 7.803679166187182e-06,
"loss": 3.2116,
"step": 678000
},
{
"epoch": 5.56,
"learning_rate": 7.802035213467261e-06,
"loss": 3.205,
"step": 678500
},
{
"epoch": 5.56,
"learning_rate": 7.800391260747342e-06,
"loss": 3.2019,
"step": 679000
},
{
"epoch": 5.57,
"learning_rate": 7.798747308027421e-06,
"loss": 3.2089,
"step": 679500
},
{
"epoch": 5.57,
"learning_rate": 7.7971033553075e-06,
"loss": 3.2096,
"step": 680000
},
{
"epoch": 5.58,
"learning_rate": 7.795459402587582e-06,
"loss": 3.2131,
"step": 680500
},
{
"epoch": 5.58,
"learning_rate": 7.793815449867663e-06,
"loss": 3.2079,
"step": 681000
},
{
"epoch": 5.58,
"learning_rate": 7.792171497147742e-06,
"loss": 3.2019,
"step": 681500
},
{
"epoch": 5.59,
"learning_rate": 7.790527544427823e-06,
"loss": 3.2092,
"step": 682000
},
{
"epoch": 5.59,
"learning_rate": 7.788883591707904e-06,
"loss": 3.2066,
"step": 682500
},
{
"epoch": 5.6,
"learning_rate": 7.787239638987983e-06,
"loss": 3.1992,
"step": 683000
},
{
"epoch": 5.6,
"learning_rate": 7.785595686268064e-06,
"loss": 3.2042,
"step": 683500
},
{
"epoch": 5.6,
"learning_rate": 7.783951733548143e-06,
"loss": 3.1997,
"step": 684000
},
{
"epoch": 5.61,
"learning_rate": 7.782307780828224e-06,
"loss": 3.2064,
"step": 684500
},
{
"epoch": 5.61,
"learning_rate": 7.780663828108305e-06,
"loss": 3.2102,
"step": 685000
},
{
"epoch": 5.62,
"learning_rate": 7.779019875388385e-06,
"loss": 3.202,
"step": 685500
},
{
"epoch": 5.62,
"learning_rate": 7.777375922668464e-06,
"loss": 3.2049,
"step": 686000
},
{
"epoch": 5.62,
"learning_rate": 7.775731969948545e-06,
"loss": 3.212,
"step": 686500
},
{
"epoch": 5.63,
"learning_rate": 7.774088017228624e-06,
"loss": 3.2008,
"step": 687000
},
{
"epoch": 5.63,
"learning_rate": 7.772444064508705e-06,
"loss": 3.2015,
"step": 687500
},
{
"epoch": 5.64,
"learning_rate": 7.770800111788786e-06,
"loss": 3.1986,
"step": 688000
},
{
"epoch": 5.64,
"learning_rate": 7.769156159068865e-06,
"loss": 3.2028,
"step": 688500
},
{
"epoch": 5.64,
"learning_rate": 7.767512206348946e-06,
"loss": 3.21,
"step": 689000
},
{
"epoch": 5.65,
"learning_rate": 7.765868253629026e-06,
"loss": 3.2104,
"step": 689500
},
{
"epoch": 5.65,
"learning_rate": 7.764224300909107e-06,
"loss": 3.2011,
"step": 690000
},
{
"epoch": 5.65,
"eval_accuracy": 0.4590029752907897,
"eval_loss": 3.068504810333252,
"eval_runtime": 404.8644,
"eval_samples_per_second": 761.628,
"eval_steps_per_second": 15.87,
"step": 690000
},
{
"epoch": 5.66,
"learning_rate": 7.762580348189188e-06,
"loss": 3.2015,
"step": 690500
},
{
"epoch": 5.66,
"learning_rate": 7.760936395469267e-06,
"loss": 3.2047,
"step": 691000
},
{
"epoch": 5.67,
"learning_rate": 7.759292442749348e-06,
"loss": 3.2034,
"step": 691500
},
{
"epoch": 5.67,
"learning_rate": 7.757648490029427e-06,
"loss": 3.1976,
"step": 692000
},
{
"epoch": 5.67,
"learning_rate": 7.756004537309506e-06,
"loss": 3.2029,
"step": 692500
},
{
"epoch": 5.68,
"learning_rate": 7.754360584589587e-06,
"loss": 3.1993,
"step": 693000
},
{
"epoch": 5.68,
"learning_rate": 7.752716631869668e-06,
"loss": 3.2092,
"step": 693500
},
{
"epoch": 5.69,
"learning_rate": 7.751072679149748e-06,
"loss": 3.2087,
"step": 694000
},
{
"epoch": 5.69,
"learning_rate": 7.749428726429829e-06,
"loss": 3.1983,
"step": 694500
},
{
"epoch": 5.69,
"learning_rate": 7.747784773709908e-06,
"loss": 3.209,
"step": 695000
},
{
"epoch": 5.7,
"learning_rate": 7.746140820989989e-06,
"loss": 3.1986,
"step": 695500
},
{
"epoch": 5.7,
"learning_rate": 7.74449686827007e-06,
"loss": 3.2066,
"step": 696000
},
{
"epoch": 5.71,
"learning_rate": 7.74285291555015e-06,
"loss": 3.2,
"step": 696500
},
{
"epoch": 5.71,
"learning_rate": 7.74120896283023e-06,
"loss": 3.1934,
"step": 697000
},
{
"epoch": 5.71,
"learning_rate": 7.739565010110311e-06,
"loss": 3.1966,
"step": 697500
},
{
"epoch": 5.72,
"learning_rate": 7.73792105739039e-06,
"loss": 3.1951,
"step": 698000
},
{
"epoch": 5.72,
"learning_rate": 7.73627710467047e-06,
"loss": 3.2011,
"step": 698500
},
{
"epoch": 5.73,
"learning_rate": 7.73463315195055e-06,
"loss": 3.2043,
"step": 699000
},
{
"epoch": 5.73,
"learning_rate": 7.73298919923063e-06,
"loss": 3.1936,
"step": 699500
},
{
"epoch": 5.73,
"learning_rate": 7.731345246510711e-06,
"loss": 3.1944,
"step": 700000
},
{
"epoch": 5.74,
"learning_rate": 7.729701293790792e-06,
"loss": 3.1976,
"step": 700500
},
{
"epoch": 5.74,
"learning_rate": 7.728057341070871e-06,
"loss": 3.1919,
"step": 701000
},
{
"epoch": 5.75,
"learning_rate": 7.726413388350952e-06,
"loss": 3.2013,
"step": 701500
},
{
"epoch": 5.75,
"learning_rate": 7.724769435631031e-06,
"loss": 3.1973,
"step": 702000
},
{
"epoch": 5.76,
"learning_rate": 7.723125482911112e-06,
"loss": 3.1948,
"step": 702500
},
{
"epoch": 5.76,
"learning_rate": 7.721481530191193e-06,
"loss": 3.1967,
"step": 703000
},
{
"epoch": 5.76,
"learning_rate": 7.719837577471273e-06,
"loss": 3.1862,
"step": 703500
},
{
"epoch": 5.77,
"learning_rate": 7.718193624751352e-06,
"loss": 3.1962,
"step": 704000
},
{
"epoch": 5.77,
"learning_rate": 7.716549672031433e-06,
"loss": 3.1963,
"step": 704500
},
{
"epoch": 5.78,
"learning_rate": 7.714905719311512e-06,
"loss": 3.1839,
"step": 705000
},
{
"epoch": 5.78,
"learning_rate": 7.713261766591593e-06,
"loss": 3.1969,
"step": 705500
},
{
"epoch": 5.78,
"learning_rate": 7.711617813871674e-06,
"loss": 3.1906,
"step": 706000
},
{
"epoch": 5.79,
"learning_rate": 7.709973861151753e-06,
"loss": 3.1996,
"step": 706500
},
{
"epoch": 5.79,
"learning_rate": 7.708329908431834e-06,
"loss": 3.1966,
"step": 707000
},
{
"epoch": 5.8,
"learning_rate": 7.706685955711914e-06,
"loss": 3.1923,
"step": 707500
},
{
"epoch": 5.8,
"learning_rate": 7.705042002991995e-06,
"loss": 3.1893,
"step": 708000
},
{
"epoch": 5.8,
"learning_rate": 7.703398050272076e-06,
"loss": 3.1887,
"step": 708500
},
{
"epoch": 5.81,
"learning_rate": 7.701754097552155e-06,
"loss": 3.1983,
"step": 709000
},
{
"epoch": 5.81,
"learning_rate": 7.700110144832236e-06,
"loss": 3.1923,
"step": 709500
},
{
"epoch": 5.82,
"learning_rate": 7.698466192112315e-06,
"loss": 3.1964,
"step": 710000
},
{
"epoch": 5.82,
"learning_rate": 7.696822239392396e-06,
"loss": 3.1965,
"step": 710500
},
{
"epoch": 5.83,
"learning_rate": 7.695178286672475e-06,
"loss": 3.1941,
"step": 711000
},
{
"epoch": 5.83,
"learning_rate": 7.693534333952556e-06,
"loss": 3.1893,
"step": 711500
},
{
"epoch": 5.83,
"learning_rate": 7.691890381232636e-06,
"loss": 3.1945,
"step": 712000
},
{
"epoch": 5.84,
"learning_rate": 7.690246428512717e-06,
"loss": 3.1931,
"step": 712500
},
{
"epoch": 5.84,
"learning_rate": 7.688602475792798e-06,
"loss": 3.1853,
"step": 713000
},
{
"epoch": 5.85,
"learning_rate": 7.686958523072877e-06,
"loss": 3.1902,
"step": 713500
},
{
"epoch": 5.85,
"learning_rate": 7.685314570352958e-06,
"loss": 3.2006,
"step": 714000
},
{
"epoch": 5.85,
"learning_rate": 7.683670617633037e-06,
"loss": 3.195,
"step": 714500
},
{
"epoch": 5.86,
"learning_rate": 7.682026664913118e-06,
"loss": 3.1975,
"step": 715000
},
{
"epoch": 5.86,
"learning_rate": 7.680382712193199e-06,
"loss": 3.1905,
"step": 715500
},
{
"epoch": 5.87,
"learning_rate": 7.678738759473278e-06,
"loss": 3.1872,
"step": 716000
},
{
"epoch": 5.87,
"learning_rate": 7.677094806753358e-06,
"loss": 3.1891,
"step": 716500
},
{
"epoch": 5.87,
"learning_rate": 7.675450854033439e-06,
"loss": 3.1857,
"step": 717000
},
{
"epoch": 5.88,
"learning_rate": 7.673806901313518e-06,
"loss": 3.1894,
"step": 717500
},
{
"epoch": 5.88,
"learning_rate": 7.672162948593599e-06,
"loss": 3.1856,
"step": 718000
},
{
"epoch": 5.89,
"learning_rate": 7.67051899587368e-06,
"loss": 3.1828,
"step": 718500
},
{
"epoch": 5.89,
"learning_rate": 7.668875043153759e-06,
"loss": 3.1812,
"step": 719000
},
{
"epoch": 5.89,
"learning_rate": 7.66723109043384e-06,
"loss": 3.1908,
"step": 719500
},
{
"epoch": 5.9,
"learning_rate": 7.66558713771392e-06,
"loss": 3.1898,
"step": 720000
},
{
"epoch": 5.9,
"eval_accuracy": 0.46191997629259224,
"eval_loss": 3.0464377403259277,
"eval_runtime": 413.3901,
"eval_samples_per_second": 745.92,
"eval_steps_per_second": 15.542,
"step": 720000
},
{
"epoch": 5.9,
"learning_rate": 7.663943184994e-06,
"loss": 3.1823,
"step": 720500
},
{
"epoch": 5.91,
"learning_rate": 7.662299232274081e-06,
"loss": 3.1837,
"step": 721000
},
{
"epoch": 5.91,
"learning_rate": 7.66065527955416e-06,
"loss": 3.1796,
"step": 721500
},
{
"epoch": 5.92,
"learning_rate": 7.659011326834242e-06,
"loss": 3.188,
"step": 722000
},
{
"epoch": 5.92,
"learning_rate": 7.65736737411432e-06,
"loss": 3.1883,
"step": 722500
},
{
"epoch": 5.92,
"learning_rate": 7.6557234213944e-06,
"loss": 3.1769,
"step": 723000
},
{
"epoch": 5.93,
"learning_rate": 7.654079468674481e-06,
"loss": 3.1887,
"step": 723500
},
{
"epoch": 5.93,
"learning_rate": 7.652435515954562e-06,
"loss": 3.1806,
"step": 724000
},
{
"epoch": 5.94,
"learning_rate": 7.650791563234641e-06,
"loss": 3.1816,
"step": 724500
},
{
"epoch": 5.94,
"learning_rate": 7.649147610514722e-06,
"loss": 3.1748,
"step": 725000
},
{
"epoch": 5.94,
"learning_rate": 7.647503657794803e-06,
"loss": 3.1888,
"step": 725500
},
{
"epoch": 5.95,
"learning_rate": 7.645859705074883e-06,
"loss": 3.1823,
"step": 726000
},
{
"epoch": 5.95,
"learning_rate": 7.644215752354964e-06,
"loss": 3.1763,
"step": 726500
},
{
"epoch": 5.96,
"learning_rate": 7.642571799635043e-06,
"loss": 3.1843,
"step": 727000
},
{
"epoch": 5.96,
"learning_rate": 7.640927846915124e-06,
"loss": 3.1856,
"step": 727500
},
{
"epoch": 5.96,
"learning_rate": 7.639283894195205e-06,
"loss": 3.1812,
"step": 728000
},
{
"epoch": 5.97,
"learning_rate": 7.637639941475284e-06,
"loss": 3.176,
"step": 728500
},
{
"epoch": 5.97,
"learning_rate": 7.635995988755363e-06,
"loss": 3.1842,
"step": 729000
},
{
"epoch": 5.98,
"learning_rate": 7.634352036035444e-06,
"loss": 3.1775,
"step": 729500
},
{
"epoch": 5.98,
"learning_rate": 7.632708083315524e-06,
"loss": 3.1803,
"step": 730000
},
{
"epoch": 5.98,
"learning_rate": 7.631064130595605e-06,
"loss": 3.1818,
"step": 730500
},
{
"epoch": 5.99,
"learning_rate": 7.629420177875686e-06,
"loss": 3.1754,
"step": 731000
},
{
"epoch": 5.99,
"learning_rate": 7.627776225155765e-06,
"loss": 3.1827,
"step": 731500
},
{
"epoch": 6.0,
"learning_rate": 7.626132272435846e-06,
"loss": 3.1745,
"step": 732000
},
{
"epoch": 6.0,
"learning_rate": 7.624488319715926e-06,
"loss": 3.1769,
"step": 732500
},
{
"epoch": 6.01,
"learning_rate": 7.622844366996006e-06,
"loss": 3.1717,
"step": 733000
},
{
"epoch": 6.01,
"learning_rate": 7.621200414276086e-06,
"loss": 3.1804,
"step": 733500
},
{
"epoch": 6.01,
"learning_rate": 7.619556461556167e-06,
"loss": 3.1726,
"step": 734000
},
{
"epoch": 6.02,
"learning_rate": 7.617912508836246e-06,
"loss": 3.1754,
"step": 734500
},
{
"epoch": 6.02,
"learning_rate": 7.6162685561163265e-06,
"loss": 3.177,
"step": 735000
},
{
"epoch": 6.03,
"learning_rate": 7.614624603396407e-06,
"loss": 3.1697,
"step": 735500
},
{
"epoch": 6.03,
"learning_rate": 7.612980650676487e-06,
"loss": 3.1712,
"step": 736000
},
{
"epoch": 6.03,
"learning_rate": 7.611336697956567e-06,
"loss": 3.1734,
"step": 736500
},
{
"epoch": 6.04,
"learning_rate": 7.609692745236648e-06,
"loss": 3.1702,
"step": 737000
},
{
"epoch": 6.04,
"learning_rate": 7.608048792516728e-06,
"loss": 3.1723,
"step": 737500
},
{
"epoch": 6.05,
"learning_rate": 7.606404839796808e-06,
"loss": 3.1815,
"step": 738000
},
{
"epoch": 6.05,
"learning_rate": 7.604760887076888e-06,
"loss": 3.1775,
"step": 738500
},
{
"epoch": 6.05,
"learning_rate": 7.603116934356969e-06,
"loss": 3.1654,
"step": 739000
},
{
"epoch": 6.06,
"learning_rate": 7.601472981637049e-06,
"loss": 3.1639,
"step": 739500
},
{
"epoch": 6.06,
"learning_rate": 7.5998290289171295e-06,
"loss": 3.1757,
"step": 740000
},
{
"epoch": 6.07,
"learning_rate": 7.598185076197209e-06,
"loss": 3.176,
"step": 740500
},
{
"epoch": 6.07,
"learning_rate": 7.596541123477289e-06,
"loss": 3.1781,
"step": 741000
},
{
"epoch": 6.07,
"learning_rate": 7.594897170757369e-06,
"loss": 3.1671,
"step": 741500
},
{
"epoch": 6.08,
"learning_rate": 7.59325321803745e-06,
"loss": 3.1691,
"step": 742000
},
{
"epoch": 6.08,
"learning_rate": 7.59160926531753e-06,
"loss": 3.1701,
"step": 742500
},
{
"epoch": 6.09,
"learning_rate": 7.58996531259761e-06,
"loss": 3.1767,
"step": 743000
},
{
"epoch": 6.09,
"learning_rate": 7.58832135987769e-06,
"loss": 3.1719,
"step": 743500
},
{
"epoch": 6.1,
"learning_rate": 7.5866774071577705e-06,
"loss": 3.1576,
"step": 744000
},
{
"epoch": 6.1,
"learning_rate": 7.5850334544378515e-06,
"loss": 3.167,
"step": 744500
},
{
"epoch": 6.1,
"learning_rate": 7.583389501717932e-06,
"loss": 3.172,
"step": 745000
},
{
"epoch": 6.11,
"learning_rate": 7.581745548998012e-06,
"loss": 3.17,
"step": 745500
},
{
"epoch": 6.11,
"learning_rate": 7.580101596278092e-06,
"loss": 3.1622,
"step": 746000
},
{
"epoch": 6.12,
"learning_rate": 7.578457643558171e-06,
"loss": 3.1708,
"step": 746500
},
{
"epoch": 6.12,
"learning_rate": 7.576813690838252e-06,
"loss": 3.1658,
"step": 747000
},
{
"epoch": 6.12,
"learning_rate": 7.575169738118332e-06,
"loss": 3.1665,
"step": 747500
},
{
"epoch": 6.13,
"learning_rate": 7.573525785398412e-06,
"loss": 3.157,
"step": 748000
},
{
"epoch": 6.13,
"learning_rate": 7.5718818326784925e-06,
"loss": 3.164,
"step": 748500
},
{
"epoch": 6.14,
"learning_rate": 7.570237879958573e-06,
"loss": 3.1626,
"step": 749000
},
{
"epoch": 6.14,
"learning_rate": 7.568593927238654e-06,
"loss": 3.162,
"step": 749500
},
{
"epoch": 6.14,
"learning_rate": 7.566949974518734e-06,
"loss": 3.1651,
"step": 750000
},
{
"epoch": 6.14,
"eval_accuracy": 0.4657812440113609,
"eval_loss": 3.022564172744751,
"eval_runtime": 408.1772,
"eval_samples_per_second": 755.446,
"eval_steps_per_second": 15.741,
"step": 750000
},
{
"epoch": 6.15,
"learning_rate": 7.565306021798814e-06,
"loss": 3.1641,
"step": 750500
},
{
"epoch": 6.15,
"learning_rate": 7.563662069078894e-06,
"loss": 3.153,
"step": 751000
},
{
"epoch": 6.16,
"learning_rate": 7.562018116358975e-06,
"loss": 3.1609,
"step": 751500
},
{
"epoch": 6.16,
"learning_rate": 7.560374163639055e-06,
"loss": 3.1574,
"step": 752000
},
{
"epoch": 6.17,
"learning_rate": 7.558730210919134e-06,
"loss": 3.1566,
"step": 752500
},
{
"epoch": 6.17,
"learning_rate": 7.5570862581992145e-06,
"loss": 3.162,
"step": 753000
},
{
"epoch": 6.17,
"learning_rate": 7.555442305479295e-06,
"loss": 3.1615,
"step": 753500
},
{
"epoch": 6.18,
"learning_rate": 7.553798352759375e-06,
"loss": 3.1636,
"step": 754000
},
{
"epoch": 6.18,
"learning_rate": 7.552154400039456e-06,
"loss": 3.1591,
"step": 754500
},
{
"epoch": 6.19,
"learning_rate": 7.550510447319536e-06,
"loss": 3.1607,
"step": 755000
},
{
"epoch": 6.19,
"learning_rate": 7.548866494599616e-06,
"loss": 3.1572,
"step": 755500
},
{
"epoch": 6.19,
"learning_rate": 7.547222541879696e-06,
"loss": 3.1628,
"step": 756000
},
{
"epoch": 6.2,
"learning_rate": 7.545578589159776e-06,
"loss": 3.1555,
"step": 756500
},
{
"epoch": 6.2,
"learning_rate": 7.543934636439857e-06,
"loss": 3.1581,
"step": 757000
},
{
"epoch": 6.21,
"learning_rate": 7.542290683719937e-06,
"loss": 3.1646,
"step": 757500
},
{
"epoch": 6.21,
"learning_rate": 7.5406467310000174e-06,
"loss": 3.1626,
"step": 758000
},
{
"epoch": 6.21,
"learning_rate": 7.539002778280097e-06,
"loss": 3.1525,
"step": 758500
},
{
"epoch": 6.22,
"learning_rate": 7.537358825560177e-06,
"loss": 3.1567,
"step": 759000
},
{
"epoch": 6.22,
"learning_rate": 7.535714872840257e-06,
"loss": 3.1599,
"step": 759500
},
{
"epoch": 6.23,
"learning_rate": 7.534070920120338e-06,
"loss": 3.1513,
"step": 760000
},
{
"epoch": 6.23,
"learning_rate": 7.532426967400418e-06,
"loss": 3.1621,
"step": 760500
},
{
"epoch": 6.23,
"learning_rate": 7.530783014680498e-06,
"loss": 3.1586,
"step": 761000
},
{
"epoch": 6.24,
"learning_rate": 7.529139061960578e-06,
"loss": 3.1493,
"step": 761500
},
{
"epoch": 6.24,
"learning_rate": 7.527495109240659e-06,
"loss": 3.1613,
"step": 762000
},
{
"epoch": 6.25,
"learning_rate": 7.525851156520739e-06,
"loss": 3.1542,
"step": 762500
},
{
"epoch": 6.25,
"learning_rate": 7.5242072038008195e-06,
"loss": 3.1486,
"step": 763000
},
{
"epoch": 6.26,
"learning_rate": 7.5225632510809e-06,
"loss": 3.1542,
"step": 763500
},
{
"epoch": 6.26,
"learning_rate": 7.520919298360981e-06,
"loss": 3.1495,
"step": 764000
},
{
"epoch": 6.26,
"learning_rate": 7.519275345641059e-06,
"loss": 3.1548,
"step": 764500
},
{
"epoch": 6.27,
"learning_rate": 7.51763139292114e-06,
"loss": 3.1525,
"step": 765000
},
{
"epoch": 6.27,
"learning_rate": 7.51598744020122e-06,
"loss": 3.1562,
"step": 765500
},
{
"epoch": 6.28,
"learning_rate": 7.5143434874813e-06,
"loss": 3.163,
"step": 766000
},
{
"epoch": 6.28,
"learning_rate": 7.5126995347613804e-06,
"loss": 3.1509,
"step": 766500
},
{
"epoch": 6.28,
"learning_rate": 7.511055582041461e-06,
"loss": 3.1502,
"step": 767000
},
{
"epoch": 6.29,
"learning_rate": 7.5094116293215415e-06,
"loss": 3.1499,
"step": 767500
},
{
"epoch": 6.29,
"learning_rate": 7.507767676601622e-06,
"loss": 3.1461,
"step": 768000
},
{
"epoch": 6.3,
"learning_rate": 7.506123723881702e-06,
"loss": 3.1437,
"step": 768500
},
{
"epoch": 6.3,
"learning_rate": 7.504479771161782e-06,
"loss": 3.1509,
"step": 769000
},
{
"epoch": 6.3,
"learning_rate": 7.502835818441863e-06,
"loss": 3.1525,
"step": 769500
},
{
"epoch": 6.31,
"learning_rate": 7.501191865721943e-06,
"loss": 3.1488,
"step": 770000
},
{
"epoch": 6.31,
"learning_rate": 7.499547913002023e-06,
"loss": 3.1541,
"step": 770500
},
{
"epoch": 6.32,
"learning_rate": 7.497903960282102e-06,
"loss": 3.147,
"step": 771000
},
{
"epoch": 6.32,
"learning_rate": 7.4962600075621825e-06,
"loss": 3.1472,
"step": 771500
},
{
"epoch": 6.32,
"learning_rate": 7.494616054842263e-06,
"loss": 3.15,
"step": 772000
},
{
"epoch": 6.33,
"learning_rate": 7.492972102122344e-06,
"loss": 3.1486,
"step": 772500
},
{
"epoch": 6.33,
"learning_rate": 7.491328149402424e-06,
"loss": 3.1611,
"step": 773000
},
{
"epoch": 6.34,
"learning_rate": 7.489684196682504e-06,
"loss": 3.1517,
"step": 773500
},
{
"epoch": 6.34,
"learning_rate": 7.488040243962584e-06,
"loss": 3.1473,
"step": 774000
},
{
"epoch": 6.35,
"learning_rate": 7.486396291242665e-06,
"loss": 3.1438,
"step": 774500
},
{
"epoch": 6.35,
"learning_rate": 7.484752338522745e-06,
"loss": 3.137,
"step": 775000
},
{
"epoch": 6.35,
"learning_rate": 7.483108385802825e-06,
"loss": 3.15,
"step": 775500
},
{
"epoch": 6.36,
"learning_rate": 7.481464433082905e-06,
"loss": 3.1464,
"step": 776000
},
{
"epoch": 6.36,
"learning_rate": 7.4798204803629855e-06,
"loss": 3.1438,
"step": 776500
},
{
"epoch": 6.37,
"learning_rate": 7.478176527643065e-06,
"loss": 3.1437,
"step": 777000
},
{
"epoch": 6.37,
"learning_rate": 7.476532574923146e-06,
"loss": 3.1401,
"step": 777500
},
{
"epoch": 6.37,
"learning_rate": 7.474888622203226e-06,
"loss": 3.144,
"step": 778000
},
{
"epoch": 6.38,
"learning_rate": 7.473244669483306e-06,
"loss": 3.1513,
"step": 778500
},
{
"epoch": 6.38,
"learning_rate": 7.471600716763386e-06,
"loss": 3.1559,
"step": 779000
},
{
"epoch": 6.39,
"learning_rate": 7.469956764043467e-06,
"loss": 3.1461,
"step": 779500
},
{
"epoch": 6.39,
"learning_rate": 7.468312811323547e-06,
"loss": 3.1477,
"step": 780000
},
{
"epoch": 6.39,
"eval_accuracy": 0.4689297446428191,
"eval_loss": 3.002525568008423,
"eval_runtime": 411.5642,
"eval_samples_per_second": 749.229,
"eval_steps_per_second": 15.611,
"step": 780000
},
{
"epoch": 6.39,
"learning_rate": 7.466668858603627e-06,
"loss": 3.1411,
"step": 780500
},
{
"epoch": 6.4,
"learning_rate": 7.4650249058837075e-06,
"loss": 3.1422,
"step": 781000
},
{
"epoch": 6.4,
"learning_rate": 7.463380953163788e-06,
"loss": 3.1464,
"step": 781500
},
{
"epoch": 6.41,
"learning_rate": 7.461737000443869e-06,
"loss": 3.1429,
"step": 782000
},
{
"epoch": 6.41,
"learning_rate": 7.460093047723949e-06,
"loss": 3.1451,
"step": 782500
},
{
"epoch": 6.41,
"learning_rate": 7.458449095004028e-06,
"loss": 3.1436,
"step": 783000
},
{
"epoch": 6.42,
"learning_rate": 7.456805142284108e-06,
"loss": 3.1398,
"step": 783500
},
{
"epoch": 6.42,
"learning_rate": 7.455161189564188e-06,
"loss": 3.1447,
"step": 784000
},
{
"epoch": 6.43,
"learning_rate": 7.453517236844268e-06,
"loss": 3.1414,
"step": 784500
},
{
"epoch": 6.43,
"learning_rate": 7.451873284124349e-06,
"loss": 3.1408,
"step": 785000
},
{
"epoch": 6.44,
"learning_rate": 7.4502293314044295e-06,
"loss": 3.1384,
"step": 785500
},
{
"epoch": 6.44,
"learning_rate": 7.44858537868451e-06,
"loss": 3.1389,
"step": 786000
},
{
"epoch": 6.44,
"learning_rate": 7.44694142596459e-06,
"loss": 3.149,
"step": 786500
},
{
"epoch": 6.45,
"learning_rate": 7.445297473244671e-06,
"loss": 3.1404,
"step": 787000
},
{
"epoch": 6.45,
"learning_rate": 7.443653520524751e-06,
"loss": 3.1353,
"step": 787500
},
{
"epoch": 6.46,
"learning_rate": 7.442009567804831e-06,
"loss": 3.134,
"step": 788000
},
{
"epoch": 6.46,
"learning_rate": 7.440365615084911e-06,
"loss": 3.1423,
"step": 788500
},
{
"epoch": 6.46,
"learning_rate": 7.43872166236499e-06,
"loss": 3.14,
"step": 789000
},
{
"epoch": 6.47,
"learning_rate": 7.4370777096450705e-06,
"loss": 3.1309,
"step": 789500
},
{
"epoch": 6.47,
"learning_rate": 7.4354337569251515e-06,
"loss": 3.1384,
"step": 790000
},
{
"epoch": 6.48,
"learning_rate": 7.433789804205232e-06,
"loss": 3.1371,
"step": 790500
},
{
"epoch": 6.48,
"learning_rate": 7.432145851485312e-06,
"loss": 3.136,
"step": 791000
},
{
"epoch": 6.48,
"learning_rate": 7.430501898765392e-06,
"loss": 3.1433,
"step": 791500
},
{
"epoch": 6.49,
"learning_rate": 7.428857946045473e-06,
"loss": 3.1398,
"step": 792000
},
{
"epoch": 6.49,
"learning_rate": 7.427213993325553e-06,
"loss": 3.1428,
"step": 792500
},
{
"epoch": 6.5,
"learning_rate": 7.425570040605633e-06,
"loss": 3.1316,
"step": 793000
},
{
"epoch": 6.5,
"learning_rate": 7.423926087885713e-06,
"loss": 3.1392,
"step": 793500
},
{
"epoch": 6.51,
"learning_rate": 7.422282135165793e-06,
"loss": 3.1288,
"step": 794000
},
{
"epoch": 6.51,
"learning_rate": 7.420638182445874e-06,
"loss": 3.1354,
"step": 794500
},
{
"epoch": 6.51,
"learning_rate": 7.4189942297259536e-06,
"loss": 3.1339,
"step": 795000
},
{
"epoch": 6.52,
"learning_rate": 7.417350277006034e-06,
"loss": 3.1419,
"step": 795500
},
{
"epoch": 6.52,
"learning_rate": 7.415706324286114e-06,
"loss": 3.1363,
"step": 796000
},
{
"epoch": 6.53,
"learning_rate": 7.414062371566194e-06,
"loss": 3.134,
"step": 796500
},
{
"epoch": 6.53,
"learning_rate": 7.412418418846274e-06,
"loss": 3.13,
"step": 797000
},
{
"epoch": 6.53,
"learning_rate": 7.410774466126355e-06,
"loss": 3.1382,
"step": 797500
},
{
"epoch": 6.54,
"learning_rate": 7.409130513406435e-06,
"loss": 3.1341,
"step": 798000
},
{
"epoch": 6.54,
"learning_rate": 7.407486560686515e-06,
"loss": 3.1306,
"step": 798500
},
{
"epoch": 6.55,
"learning_rate": 7.4058426079665954e-06,
"loss": 3.134,
"step": 799000
},
{
"epoch": 6.55,
"learning_rate": 7.404198655246676e-06,
"loss": 3.1329,
"step": 799500
},
{
"epoch": 6.55,
"learning_rate": 7.4025547025267565e-06,
"loss": 3.1382,
"step": 800000
},
{
"epoch": 6.56,
"learning_rate": 7.400910749806837e-06,
"loss": 3.1293,
"step": 800500
},
{
"epoch": 6.56,
"learning_rate": 7.399266797086916e-06,
"loss": 3.135,
"step": 801000
},
{
"epoch": 6.57,
"learning_rate": 7.397622844366996e-06,
"loss": 3.1407,
"step": 801500
},
{
"epoch": 6.57,
"learning_rate": 7.395978891647076e-06,
"loss": 3.1354,
"step": 802000
},
{
"epoch": 6.57,
"learning_rate": 7.394334938927157e-06,
"loss": 3.1371,
"step": 802500
},
{
"epoch": 6.58,
"learning_rate": 7.392690986207237e-06,
"loss": 3.1362,
"step": 803000
},
{
"epoch": 6.58,
"learning_rate": 7.391047033487317e-06,
"loss": 3.129,
"step": 803500
},
{
"epoch": 6.59,
"learning_rate": 7.3894030807673975e-06,
"loss": 3.1378,
"step": 804000
},
{
"epoch": 6.59,
"learning_rate": 7.387759128047478e-06,
"loss": 3.125,
"step": 804500
},
{
"epoch": 6.6,
"learning_rate": 7.386115175327559e-06,
"loss": 3.1285,
"step": 805000
},
{
"epoch": 6.6,
"learning_rate": 7.384471222607639e-06,
"loss": 3.1344,
"step": 805500
},
{
"epoch": 6.6,
"learning_rate": 7.382827269887719e-06,
"loss": 3.1241,
"step": 806000
},
{
"epoch": 6.61,
"learning_rate": 7.381183317167799e-06,
"loss": 3.1264,
"step": 806500
},
{
"epoch": 6.61,
"learning_rate": 7.379539364447878e-06,
"loss": 3.1278,
"step": 807000
},
{
"epoch": 6.62,
"learning_rate": 7.377895411727959e-06,
"loss": 3.1363,
"step": 807500
},
{
"epoch": 6.62,
"learning_rate": 7.376251459008039e-06,
"loss": 3.124,
"step": 808000
},
{
"epoch": 6.62,
"learning_rate": 7.3746075062881195e-06,
"loss": 3.1246,
"step": 808500
},
{
"epoch": 6.63,
"learning_rate": 7.3729635535682e-06,
"loss": 3.1281,
"step": 809000
},
{
"epoch": 6.63,
"learning_rate": 7.37131960084828e-06,
"loss": 3.1198,
"step": 809500
},
{
"epoch": 6.64,
"learning_rate": 7.369675648128361e-06,
"loss": 3.1276,
"step": 810000
},
{
"epoch": 6.64,
"eval_accuracy": 0.47175398712230254,
"eval_loss": 2.9837634563446045,
"eval_runtime": 427.1705,
"eval_samples_per_second": 721.857,
"eval_steps_per_second": 15.041,
"step": 810000
},
{
"epoch": 6.64,
"learning_rate": 7.368031695408441e-06,
"loss": 3.1275,
"step": 810500
},
{
"epoch": 6.64,
"learning_rate": 7.366387742688521e-06,
"loss": 3.1262,
"step": 811000
},
{
"epoch": 6.65,
"learning_rate": 7.364743789968601e-06,
"loss": 3.1216,
"step": 811500
},
{
"epoch": 6.65,
"learning_rate": 7.363099837248682e-06,
"loss": 3.1212,
"step": 812000
},
{
"epoch": 6.66,
"learning_rate": 7.361455884528762e-06,
"loss": 3.1169,
"step": 812500
},
{
"epoch": 6.66,
"learning_rate": 7.3598119318088415e-06,
"loss": 3.1221,
"step": 813000
},
{
"epoch": 6.66,
"learning_rate": 7.358167979088922e-06,
"loss": 3.128,
"step": 813500
},
{
"epoch": 6.67,
"learning_rate": 7.356524026369002e-06,
"loss": 3.1245,
"step": 814000
},
{
"epoch": 6.67,
"learning_rate": 7.354880073649082e-06,
"loss": 3.1248,
"step": 814500
},
{
"epoch": 6.68,
"learning_rate": 7.353236120929163e-06,
"loss": 3.1219,
"step": 815000
},
{
"epoch": 6.68,
"learning_rate": 7.351592168209243e-06,
"loss": 3.1285,
"step": 815500
},
{
"epoch": 6.69,
"learning_rate": 7.349948215489323e-06,
"loss": 3.1275,
"step": 816000
},
{
"epoch": 6.69,
"learning_rate": 7.348304262769403e-06,
"loss": 3.1213,
"step": 816500
},
{
"epoch": 6.69,
"learning_rate": 7.346660310049483e-06,
"loss": 3.1211,
"step": 817000
},
{
"epoch": 6.7,
"learning_rate": 7.345016357329564e-06,
"loss": 3.1231,
"step": 817500
},
{
"epoch": 6.7,
"learning_rate": 7.3433724046096445e-06,
"loss": 3.1306,
"step": 818000
},
{
"epoch": 6.71,
"learning_rate": 7.341728451889725e-06,
"loss": 3.1169,
"step": 818500
},
{
"epoch": 6.71,
"learning_rate": 7.340084499169804e-06,
"loss": 3.1254,
"step": 819000
},
{
"epoch": 6.71,
"learning_rate": 7.338440546449884e-06,
"loss": 3.1182,
"step": 819500
},
{
"epoch": 6.72,
"learning_rate": 7.336796593729965e-06,
"loss": 3.1185,
"step": 820000
},
{
"epoch": 6.72,
"learning_rate": 7.335152641010045e-06,
"loss": 3.1197,
"step": 820500
},
{
"epoch": 6.73,
"learning_rate": 7.333508688290125e-06,
"loss": 3.1243,
"step": 821000
},
{
"epoch": 6.73,
"learning_rate": 7.331864735570205e-06,
"loss": 3.1223,
"step": 821500
},
{
"epoch": 6.73,
"learning_rate": 7.3302207828502855e-06,
"loss": 3.1259,
"step": 822000
},
{
"epoch": 6.74,
"learning_rate": 7.3285768301303665e-06,
"loss": 3.1265,
"step": 822500
},
{
"epoch": 6.74,
"learning_rate": 7.326932877410447e-06,
"loss": 3.1141,
"step": 823000
},
{
"epoch": 6.75,
"learning_rate": 7.325288924690527e-06,
"loss": 3.1206,
"step": 823500
},
{
"epoch": 6.75,
"learning_rate": 7.323644971970607e-06,
"loss": 3.1178,
"step": 824000
},
{
"epoch": 6.75,
"learning_rate": 7.322001019250688e-06,
"loss": 3.1122,
"step": 824500
},
{
"epoch": 6.76,
"learning_rate": 7.320357066530768e-06,
"loss": 3.1187,
"step": 825000
},
{
"epoch": 6.76,
"learning_rate": 7.318713113810847e-06,
"loss": 3.118,
"step": 825500
},
{
"epoch": 6.77,
"learning_rate": 7.317069161090927e-06,
"loss": 3.1143,
"step": 826000
},
{
"epoch": 6.77,
"learning_rate": 7.3154252083710075e-06,
"loss": 3.1176,
"step": 826500
},
{
"epoch": 6.78,
"learning_rate": 7.313781255651088e-06,
"loss": 3.1117,
"step": 827000
},
{
"epoch": 6.78,
"learning_rate": 7.3121373029311686e-06,
"loss": 3.1185,
"step": 827500
},
{
"epoch": 6.78,
"learning_rate": 7.310493350211249e-06,
"loss": 3.1208,
"step": 828000
},
{
"epoch": 6.79,
"learning_rate": 7.308849397491329e-06,
"loss": 3.1171,
"step": 828500
},
{
"epoch": 6.79,
"learning_rate": 7.307205444771409e-06,
"loss": 3.1127,
"step": 829000
},
{
"epoch": 6.8,
"learning_rate": 7.305561492051489e-06,
"loss": 3.1146,
"step": 829500
},
{
"epoch": 6.8,
"learning_rate": 7.30391753933157e-06,
"loss": 3.1141,
"step": 830000
},
{
"epoch": 6.8,
"learning_rate": 7.30227358661165e-06,
"loss": 3.1191,
"step": 830500
},
{
"epoch": 6.81,
"learning_rate": 7.30062963389173e-06,
"loss": 3.123,
"step": 831000
},
{
"epoch": 6.81,
"learning_rate": 7.29898568117181e-06,
"loss": 3.1178,
"step": 831500
},
{
"epoch": 6.82,
"learning_rate": 7.29734172845189e-06,
"loss": 3.1133,
"step": 832000
},
{
"epoch": 6.82,
"learning_rate": 7.29569777573197e-06,
"loss": 3.1174,
"step": 832500
},
{
"epoch": 6.82,
"learning_rate": 7.294053823012051e-06,
"loss": 3.1201,
"step": 833000
},
{
"epoch": 6.83,
"learning_rate": 7.292409870292131e-06,
"loss": 3.1199,
"step": 833500
},
{
"epoch": 6.83,
"learning_rate": 7.290765917572211e-06,
"loss": 3.1157,
"step": 834000
},
{
"epoch": 6.84,
"learning_rate": 7.289121964852291e-06,
"loss": 3.1183,
"step": 834500
},
{
"epoch": 6.84,
"learning_rate": 7.287478012132372e-06,
"loss": 3.1141,
"step": 835000
},
{
"epoch": 6.85,
"learning_rate": 7.285834059412452e-06,
"loss": 3.1165,
"step": 835500
},
{
"epoch": 6.85,
"learning_rate": 7.284190106692532e-06,
"loss": 3.1112,
"step": 836000
},
{
"epoch": 6.85,
"learning_rate": 7.2825461539726125e-06,
"loss": 3.1217,
"step": 836500
},
{
"epoch": 6.86,
"learning_rate": 7.2809022012526935e-06,
"loss": 3.1089,
"step": 837000
},
{
"epoch": 6.86,
"learning_rate": 7.279258248532772e-06,
"loss": 3.118,
"step": 837500
},
{
"epoch": 6.87,
"learning_rate": 7.277614295812853e-06,
"loss": 3.1127,
"step": 838000
},
{
"epoch": 6.87,
"learning_rate": 7.275970343092933e-06,
"loss": 3.1117,
"step": 838500
},
{
"epoch": 6.87,
"learning_rate": 7.274326390373013e-06,
"loss": 3.1112,
"step": 839000
},
{
"epoch": 6.88,
"learning_rate": 7.272682437653093e-06,
"loss": 3.1084,
"step": 839500
},
{
"epoch": 6.88,
"learning_rate": 7.271038484933174e-06,
"loss": 3.1102,
"step": 840000
},
{
"epoch": 6.88,
"eval_accuracy": 0.47402583913951796,
"eval_loss": 2.969046115875244,
"eval_runtime": 428.0247,
"eval_samples_per_second": 720.416,
"eval_steps_per_second": 15.011,
"step": 840000
},
{
"epoch": 6.89,
"learning_rate": 7.269394532213254e-06,
"loss": 3.1116,
"step": 840500
},
{
"epoch": 6.89,
"learning_rate": 7.2677505794933345e-06,
"loss": 3.1169,
"step": 841000
},
{
"epoch": 6.89,
"learning_rate": 7.266106626773415e-06,
"loss": 3.1087,
"step": 841500
},
{
"epoch": 6.9,
"learning_rate": 7.264462674053495e-06,
"loss": 3.1085,
"step": 842000
},
{
"epoch": 6.9,
"learning_rate": 7.262818721333576e-06,
"loss": 3.1109,
"step": 842500
},
{
"epoch": 6.91,
"learning_rate": 7.261174768613656e-06,
"loss": 3.112,
"step": 843000
},
{
"epoch": 6.91,
"learning_rate": 7.259530815893735e-06,
"loss": 3.1072,
"step": 843500
},
{
"epoch": 6.91,
"learning_rate": 7.257886863173815e-06,
"loss": 3.1142,
"step": 844000
},
{
"epoch": 6.92,
"learning_rate": 7.256242910453895e-06,
"loss": 3.1146,
"step": 844500
},
{
"epoch": 6.92,
"learning_rate": 7.2545989577339755e-06,
"loss": 3.1098,
"step": 845000
},
{
"epoch": 6.93,
"learning_rate": 7.2529550050140565e-06,
"loss": 3.1138,
"step": 845500
},
{
"epoch": 6.93,
"learning_rate": 7.251311052294137e-06,
"loss": 3.1111,
"step": 846000
},
{
"epoch": 6.94,
"learning_rate": 7.249667099574217e-06,
"loss": 3.1045,
"step": 846500
},
{
"epoch": 6.94,
"learning_rate": 7.248023146854297e-06,
"loss": 3.1108,
"step": 847000
},
{
"epoch": 6.94,
"learning_rate": 7.246379194134378e-06,
"loss": 3.109,
"step": 847500
},
{
"epoch": 6.95,
"learning_rate": 7.244735241414458e-06,
"loss": 3.118,
"step": 848000
},
{
"epoch": 6.95,
"learning_rate": 7.243091288694538e-06,
"loss": 3.1119,
"step": 848500
},
{
"epoch": 6.96,
"learning_rate": 7.241447335974618e-06,
"loss": 3.1125,
"step": 849000
},
{
"epoch": 6.96,
"learning_rate": 7.2398033832546975e-06,
"loss": 3.1131,
"step": 849500
},
{
"epoch": 6.96,
"learning_rate": 7.238159430534778e-06,
"loss": 3.1072,
"step": 850000
},
{
"epoch": 6.97,
"learning_rate": 7.236515477814859e-06,
"loss": 3.1102,
"step": 850500
},
{
"epoch": 6.97,
"learning_rate": 7.234871525094939e-06,
"loss": 3.1095,
"step": 851000
},
{
"epoch": 6.98,
"learning_rate": 7.233227572375019e-06,
"loss": 3.1029,
"step": 851500
},
{
"epoch": 6.98,
"learning_rate": 7.231583619655099e-06,
"loss": 3.1064,
"step": 852000
},
{
"epoch": 6.98,
"learning_rate": 7.22993966693518e-06,
"loss": 3.1105,
"step": 852500
},
{
"epoch": 6.99,
"learning_rate": 7.22829571421526e-06,
"loss": 3.1084,
"step": 853000
},
{
"epoch": 6.99,
"learning_rate": 7.22665176149534e-06,
"loss": 3.107,
"step": 853500
},
{
"epoch": 7.0,
"learning_rate": 7.22500780877542e-06,
"loss": 3.1091,
"step": 854000
},
{
"epoch": 7.0,
"learning_rate": 7.2233638560555005e-06,
"loss": 3.1021,
"step": 854500
},
{
"epoch": 7.0,
"learning_rate": 7.2217199033355815e-06,
"loss": 3.0996,
"step": 855000
},
{
"epoch": 7.01,
"learning_rate": 7.220075950615661e-06,
"loss": 3.0913,
"step": 855500
},
{
"epoch": 7.01,
"learning_rate": 7.218431997895741e-06,
"loss": 3.1022,
"step": 856000
},
{
"epoch": 7.02,
"learning_rate": 7.216788045175821e-06,
"loss": 3.1077,
"step": 856500
},
{
"epoch": 7.02,
"learning_rate": 7.215144092455901e-06,
"loss": 3.1003,
"step": 857000
},
{
"epoch": 7.03,
"learning_rate": 7.213500139735981e-06,
"loss": 3.1036,
"step": 857500
},
{
"epoch": 7.03,
"learning_rate": 7.211856187016062e-06,
"loss": 3.1037,
"step": 858000
},
{
"epoch": 7.03,
"learning_rate": 7.210212234296142e-06,
"loss": 3.1082,
"step": 858500
},
{
"epoch": 7.04,
"learning_rate": 7.2085682815762225e-06,
"loss": 3.1052,
"step": 859000
},
{
"epoch": 7.04,
"learning_rate": 7.206924328856303e-06,
"loss": 3.1078,
"step": 859500
},
{
"epoch": 7.05,
"learning_rate": 7.2052803761363836e-06,
"loss": 3.1049,
"step": 860000
},
{
"epoch": 7.05,
"learning_rate": 7.203636423416464e-06,
"loss": 3.0981,
"step": 860500
},
{
"epoch": 7.05,
"learning_rate": 7.201992470696544e-06,
"loss": 3.1124,
"step": 861000
},
{
"epoch": 7.06,
"learning_rate": 7.200348517976623e-06,
"loss": 3.1003,
"step": 861500
},
{
"epoch": 7.06,
"learning_rate": 7.198704565256703e-06,
"loss": 3.1041,
"step": 862000
},
{
"epoch": 7.07,
"learning_rate": 7.197060612536783e-06,
"loss": 3.0975,
"step": 862500
},
{
"epoch": 7.07,
"learning_rate": 7.195416659816864e-06,
"loss": 3.1068,
"step": 863000
},
{
"epoch": 7.07,
"learning_rate": 7.1937727070969445e-06,
"loss": 3.1015,
"step": 863500
},
{
"epoch": 7.08,
"learning_rate": 7.192128754377025e-06,
"loss": 3.1072,
"step": 864000
},
{
"epoch": 7.08,
"learning_rate": 7.190484801657105e-06,
"loss": 3.0992,
"step": 864500
},
{
"epoch": 7.09,
"learning_rate": 7.188840848937186e-06,
"loss": 3.1031,
"step": 865000
},
{
"epoch": 7.09,
"learning_rate": 7.187196896217266e-06,
"loss": 3.0964,
"step": 865500
},
{
"epoch": 7.09,
"learning_rate": 7.185552943497346e-06,
"loss": 3.1047,
"step": 866000
},
{
"epoch": 7.1,
"learning_rate": 7.183908990777426e-06,
"loss": 3.103,
"step": 866500
},
{
"epoch": 7.1,
"learning_rate": 7.182265038057506e-06,
"loss": 3.0944,
"step": 867000
},
{
"epoch": 7.11,
"learning_rate": 7.1806210853375855e-06,
"loss": 3.0977,
"step": 867500
},
{
"epoch": 7.11,
"learning_rate": 7.1789771326176664e-06,
"loss": 3.1003,
"step": 868000
},
{
"epoch": 7.12,
"learning_rate": 7.1773331798977466e-06,
"loss": 3.1025,
"step": 868500
},
{
"epoch": 7.12,
"learning_rate": 7.175689227177827e-06,
"loss": 3.097,
"step": 869000
},
{
"epoch": 7.12,
"learning_rate": 7.174045274457907e-06,
"loss": 3.0947,
"step": 869500
},
{
"epoch": 7.13,
"learning_rate": 7.172401321737987e-06,
"loss": 3.1046,
"step": 870000
},
{
"epoch": 7.13,
"eval_accuracy": 0.4757021230011519,
"eval_loss": 2.956329584121704,
"eval_runtime": 428.2951,
"eval_samples_per_second": 719.962,
"eval_steps_per_second": 15.001,
"step": 870000
},
{
"epoch": 7.13,
"learning_rate": 7.170757369018068e-06,
"loss": 3.1,
"step": 870500
},
{
"epoch": 7.14,
"learning_rate": 7.169113416298148e-06,
"loss": 3.0954,
"step": 871000
},
{
"epoch": 7.14,
"learning_rate": 7.167469463578228e-06,
"loss": 3.0978,
"step": 871500
},
{
"epoch": 7.14,
"learning_rate": 7.165825510858308e-06,
"loss": 3.097,
"step": 872000
},
{
"epoch": 7.15,
"learning_rate": 7.164181558138389e-06,
"loss": 3.1062,
"step": 872500
},
{
"epoch": 7.15,
"learning_rate": 7.162537605418469e-06,
"loss": 3.0963,
"step": 873000
},
{
"epoch": 7.16,
"learning_rate": 7.1608936526985495e-06,
"loss": 3.0909,
"step": 873500
},
{
"epoch": 7.16,
"learning_rate": 7.159249699978629e-06,
"loss": 3.0979,
"step": 874000
},
{
"epoch": 7.16,
"learning_rate": 7.157605747258709e-06,
"loss": 3.0919,
"step": 874500
},
{
"epoch": 7.17,
"learning_rate": 7.155961794538789e-06,
"loss": 3.0945,
"step": 875000
},
{
"epoch": 7.17,
"learning_rate": 7.15431784181887e-06,
"loss": 3.0981,
"step": 875500
},
{
"epoch": 7.18,
"learning_rate": 7.15267388909895e-06,
"loss": 3.0912,
"step": 876000
},
{
"epoch": 7.18,
"learning_rate": 7.15102993637903e-06,
"loss": 3.0948,
"step": 876500
},
{
"epoch": 7.19,
"learning_rate": 7.14938598365911e-06,
"loss": 3.1008,
"step": 877000
},
{
"epoch": 7.19,
"learning_rate": 7.1477420309391905e-06,
"loss": 3.103,
"step": 877500
},
{
"epoch": 7.19,
"learning_rate": 7.1460980782192715e-06,
"loss": 3.0908,
"step": 878000
},
{
"epoch": 7.2,
"learning_rate": 7.144454125499352e-06,
"loss": 3.0975,
"step": 878500
},
{
"epoch": 7.2,
"learning_rate": 7.142810172779432e-06,
"loss": 3.0972,
"step": 879000
},
{
"epoch": 7.21,
"learning_rate": 7.141166220059512e-06,
"loss": 3.0939,
"step": 879500
},
{
"epoch": 7.21,
"learning_rate": 7.139522267339591e-06,
"loss": 3.099,
"step": 880000
},
{
"epoch": 7.21,
"learning_rate": 7.137878314619672e-06,
"loss": 3.0973,
"step": 880500
},
{
"epoch": 7.22,
"learning_rate": 7.136234361899752e-06,
"loss": 3.0922,
"step": 881000
},
{
"epoch": 7.22,
"learning_rate": 7.134590409179832e-06,
"loss": 3.0964,
"step": 881500
},
{
"epoch": 7.23,
"learning_rate": 7.1329464564599125e-06,
"loss": 3.0954,
"step": 882000
},
{
"epoch": 7.23,
"learning_rate": 7.131302503739993e-06,
"loss": 3.1022,
"step": 882500
},
{
"epoch": 7.23,
"learning_rate": 7.129658551020074e-06,
"loss": 3.099,
"step": 883000
},
{
"epoch": 7.24,
"learning_rate": 7.128014598300154e-06,
"loss": 3.0887,
"step": 883500
},
{
"epoch": 7.24,
"learning_rate": 7.126370645580234e-06,
"loss": 3.0924,
"step": 884000
},
{
"epoch": 7.25,
"learning_rate": 7.124726692860314e-06,
"loss": 3.0895,
"step": 884500
},
{
"epoch": 7.25,
"learning_rate": 7.123082740140395e-06,
"loss": 3.0968,
"step": 885000
},
{
"epoch": 7.25,
"learning_rate": 7.121438787420475e-06,
"loss": 3.0948,
"step": 885500
},
{
"epoch": 7.26,
"learning_rate": 7.119794834700554e-06,
"loss": 3.0969,
"step": 886000
},
{
"epoch": 7.26,
"learning_rate": 7.1181508819806345e-06,
"loss": 3.0952,
"step": 886500
},
{
"epoch": 7.27,
"learning_rate": 7.116506929260715e-06,
"loss": 3.0957,
"step": 887000
},
{
"epoch": 7.27,
"learning_rate": 7.114862976540795e-06,
"loss": 3.0874,
"step": 887500
},
{
"epoch": 7.28,
"learning_rate": 7.113219023820876e-06,
"loss": 3.0911,
"step": 888000
},
{
"epoch": 7.28,
"learning_rate": 7.111575071100956e-06,
"loss": 3.0898,
"step": 888500
},
{
"epoch": 7.28,
"learning_rate": 7.109931118381036e-06,
"loss": 3.0941,
"step": 889000
},
{
"epoch": 7.29,
"learning_rate": 7.108287165661116e-06,
"loss": 3.097,
"step": 889500
},
{
"epoch": 7.29,
"learning_rate": 7.106643212941196e-06,
"loss": 3.0977,
"step": 890000
},
{
"epoch": 7.3,
"learning_rate": 7.104999260221277e-06,
"loss": 3.0923,
"step": 890500
},
{
"epoch": 7.3,
"learning_rate": 7.103355307501357e-06,
"loss": 3.0844,
"step": 891000
},
{
"epoch": 7.3,
"learning_rate": 7.1017113547814375e-06,
"loss": 3.0889,
"step": 891500
},
{
"epoch": 7.31,
"learning_rate": 7.100067402061517e-06,
"loss": 3.0952,
"step": 892000
},
{
"epoch": 7.31,
"learning_rate": 7.098423449341597e-06,
"loss": 3.0884,
"step": 892500
},
{
"epoch": 7.32,
"learning_rate": 7.096779496621678e-06,
"loss": 3.0956,
"step": 893000
},
{
"epoch": 7.32,
"learning_rate": 7.095135543901758e-06,
"loss": 3.0888,
"step": 893500
},
{
"epoch": 7.32,
"learning_rate": 7.093491591181838e-06,
"loss": 3.092,
"step": 894000
},
{
"epoch": 7.33,
"learning_rate": 7.091847638461918e-06,
"loss": 3.0842,
"step": 894500
},
{
"epoch": 7.33,
"learning_rate": 7.090203685741998e-06,
"loss": 3.092,
"step": 895000
},
{
"epoch": 7.34,
"learning_rate": 7.088559733022079e-06,
"loss": 3.0886,
"step": 895500
},
{
"epoch": 7.34,
"learning_rate": 7.0869157803021594e-06,
"loss": 3.0845,
"step": 896000
},
{
"epoch": 7.34,
"learning_rate": 7.0852718275822396e-06,
"loss": 3.0871,
"step": 896500
},
{
"epoch": 7.35,
"learning_rate": 7.08362787486232e-06,
"loss": 3.0804,
"step": 897000
},
{
"epoch": 7.35,
"learning_rate": 7.081983922142401e-06,
"loss": 3.0817,
"step": 897500
},
{
"epoch": 7.36,
"learning_rate": 7.080339969422479e-06,
"loss": 3.0935,
"step": 898000
},
{
"epoch": 7.36,
"learning_rate": 7.07869601670256e-06,
"loss": 3.0893,
"step": 898500
},
{
"epoch": 7.37,
"learning_rate": 7.07705206398264e-06,
"loss": 3.0876,
"step": 899000
},
{
"epoch": 7.37,
"learning_rate": 7.07540811126272e-06,
"loss": 3.0946,
"step": 899500
},
{
"epoch": 7.37,
"learning_rate": 7.0737641585428005e-06,
"loss": 3.0817,
"step": 900000
},
{
"epoch": 7.37,
"eval_accuracy": 0.47711464421124683,
"eval_loss": 2.9477081298828125,
"eval_runtime": 415.7627,
"eval_samples_per_second": 741.663,
"eval_steps_per_second": 15.454,
"step": 900000
},
{
"epoch": 7.38,
"learning_rate": 7.0721202058228814e-06,
"loss": 3.0775,
"step": 900500
},
{
"epoch": 7.38,
"learning_rate": 7.0704762531029616e-06,
"loss": 3.0839,
"step": 901000
},
{
"epoch": 7.39,
"learning_rate": 7.068832300383042e-06,
"loss": 3.086,
"step": 901500
},
{
"epoch": 7.39,
"learning_rate": 7.067188347663122e-06,
"loss": 3.0927,
"step": 902000
},
{
"epoch": 7.39,
"learning_rate": 7.065544394943202e-06,
"loss": 3.0884,
"step": 902500
},
{
"epoch": 7.4,
"learning_rate": 7.063900442223283e-06,
"loss": 3.0848,
"step": 903000
},
{
"epoch": 7.4,
"learning_rate": 7.062256489503363e-06,
"loss": 3.0805,
"step": 903500
},
{
"epoch": 7.41,
"learning_rate": 7.060612536783442e-06,
"loss": 3.0891,
"step": 904000
},
{
"epoch": 7.41,
"learning_rate": 7.0589685840635224e-06,
"loss": 3.0887,
"step": 904500
},
{
"epoch": 7.41,
"learning_rate": 7.0573246313436026e-06,
"loss": 3.0889,
"step": 905000
},
{
"epoch": 7.42,
"learning_rate": 7.055680678623683e-06,
"loss": 3.0918,
"step": 905500
},
{
"epoch": 7.42,
"learning_rate": 7.054036725903764e-06,
"loss": 3.0735,
"step": 906000
},
{
"epoch": 7.43,
"learning_rate": 7.052392773183844e-06,
"loss": 3.0821,
"step": 906500
},
{
"epoch": 7.43,
"learning_rate": 7.050748820463924e-06,
"loss": 3.0843,
"step": 907000
},
{
"epoch": 7.43,
"learning_rate": 7.049104867744004e-06,
"loss": 3.088,
"step": 907500
},
{
"epoch": 7.44,
"learning_rate": 7.047460915024085e-06,
"loss": 3.0885,
"step": 908000
},
{
"epoch": 7.44,
"learning_rate": 7.045816962304165e-06,
"loss": 3.0836,
"step": 908500
},
{
"epoch": 7.45,
"learning_rate": 7.044173009584245e-06,
"loss": 3.083,
"step": 909000
},
{
"epoch": 7.45,
"learning_rate": 7.042529056864325e-06,
"loss": 3.0838,
"step": 909500
},
{
"epoch": 7.46,
"learning_rate": 7.040885104144405e-06,
"loss": 3.0789,
"step": 910000
},
{
"epoch": 7.46,
"learning_rate": 7.039241151424485e-06,
"loss": 3.082,
"step": 910500
},
{
"epoch": 7.46,
"learning_rate": 7.037597198704566e-06,
"loss": 3.08,
"step": 911000
},
{
"epoch": 7.47,
"learning_rate": 7.035953245984646e-06,
"loss": 3.0853,
"step": 911500
},
{
"epoch": 7.47,
"learning_rate": 7.034309293264726e-06,
"loss": 3.0808,
"step": 912000
},
{
"epoch": 7.48,
"learning_rate": 7.032665340544806e-06,
"loss": 3.0855,
"step": 912500
},
{
"epoch": 7.48,
"learning_rate": 7.031021387824887e-06,
"loss": 3.0833,
"step": 913000
},
{
"epoch": 7.48,
"learning_rate": 7.029377435104967e-06,
"loss": 3.0754,
"step": 913500
},
{
"epoch": 7.49,
"learning_rate": 7.027733482385047e-06,
"loss": 3.0819,
"step": 914000
},
{
"epoch": 7.49,
"learning_rate": 7.0260895296651275e-06,
"loss": 3.083,
"step": 914500
},
{
"epoch": 7.5,
"learning_rate": 7.024445576945208e-06,
"loss": 3.0783,
"step": 915000
},
{
"epoch": 7.5,
"learning_rate": 7.022801624225289e-06,
"loss": 3.0812,
"step": 915500
},
{
"epoch": 7.5,
"learning_rate": 7.021157671505368e-06,
"loss": 3.0782,
"step": 916000
},
{
"epoch": 7.51,
"learning_rate": 7.019513718785448e-06,
"loss": 3.0797,
"step": 916500
},
{
"epoch": 7.51,
"learning_rate": 7.017869766065528e-06,
"loss": 3.0876,
"step": 917000
},
{
"epoch": 7.52,
"learning_rate": 7.016225813345608e-06,
"loss": 3.0857,
"step": 917500
},
{
"epoch": 7.52,
"learning_rate": 7.014581860625688e-06,
"loss": 3.0776,
"step": 918000
},
{
"epoch": 7.53,
"learning_rate": 7.012937907905769e-06,
"loss": 3.085,
"step": 918500
},
{
"epoch": 7.53,
"learning_rate": 7.0112939551858495e-06,
"loss": 3.0817,
"step": 919000
},
{
"epoch": 7.53,
"learning_rate": 7.00965000246593e-06,
"loss": 3.0807,
"step": 919500
},
{
"epoch": 7.54,
"learning_rate": 7.00800604974601e-06,
"loss": 3.0892,
"step": 920000
},
{
"epoch": 7.54,
"learning_rate": 7.006362097026091e-06,
"loss": 3.0781,
"step": 920500
},
{
"epoch": 7.55,
"learning_rate": 7.004718144306171e-06,
"loss": 3.0726,
"step": 921000
},
{
"epoch": 7.55,
"learning_rate": 7.003074191586251e-06,
"loss": 3.0824,
"step": 921500
},
{
"epoch": 7.55,
"learning_rate": 7.001430238866331e-06,
"loss": 3.0816,
"step": 922000
},
{
"epoch": 7.56,
"learning_rate": 6.99978628614641e-06,
"loss": 3.0747,
"step": 922500
},
{
"epoch": 7.56,
"learning_rate": 6.9981423334264905e-06,
"loss": 3.0819,
"step": 923000
},
{
"epoch": 7.57,
"learning_rate": 6.9964983807065715e-06,
"loss": 3.0806,
"step": 923500
},
{
"epoch": 7.57,
"learning_rate": 6.994854427986652e-06,
"loss": 3.0842,
"step": 924000
},
{
"epoch": 7.57,
"learning_rate": 6.993210475266732e-06,
"loss": 3.0732,
"step": 924500
},
{
"epoch": 7.58,
"learning_rate": 6.991566522546812e-06,
"loss": 3.0746,
"step": 925000
},
{
"epoch": 7.58,
"learning_rate": 6.989922569826893e-06,
"loss": 3.0789,
"step": 925500
},
{
"epoch": 7.59,
"learning_rate": 6.988278617106973e-06,
"loss": 3.0817,
"step": 926000
},
{
"epoch": 7.59,
"learning_rate": 6.986634664387053e-06,
"loss": 3.0718,
"step": 926500
},
{
"epoch": 7.59,
"learning_rate": 6.984990711667133e-06,
"loss": 3.0824,
"step": 927000
},
{
"epoch": 7.6,
"learning_rate": 6.983346758947213e-06,
"loss": 3.0776,
"step": 927500
},
{
"epoch": 7.6,
"learning_rate": 6.981702806227294e-06,
"loss": 3.075,
"step": 928000
},
{
"epoch": 7.61,
"learning_rate": 6.980058853507374e-06,
"loss": 3.0787,
"step": 928500
},
{
"epoch": 7.61,
"learning_rate": 6.978414900787454e-06,
"loss": 3.0708,
"step": 929000
},
{
"epoch": 7.62,
"learning_rate": 6.976770948067534e-06,
"loss": 3.086,
"step": 929500
},
{
"epoch": 7.62,
"learning_rate": 6.975126995347614e-06,
"loss": 3.0813,
"step": 930000
},
{
"epoch": 7.62,
"eval_accuracy": 0.478479420881626,
"eval_loss": 2.939741611480713,
"eval_runtime": 412.5363,
"eval_samples_per_second": 747.464,
"eval_steps_per_second": 15.574,
"step": 930000
},
{
"epoch": 7.62,
"learning_rate": 6.973483042627694e-06,
"loss": 3.0788,
"step": 930500
},
{
"epoch": 7.63,
"learning_rate": 6.971839089907775e-06,
"loss": 3.0747,
"step": 931000
},
{
"epoch": 7.63,
"learning_rate": 6.970195137187855e-06,
"loss": 3.0767,
"step": 931500
},
{
"epoch": 7.64,
"learning_rate": 6.968551184467935e-06,
"loss": 3.0797,
"step": 932000
},
{
"epoch": 7.64,
"learning_rate": 6.9669072317480155e-06,
"loss": 3.0759,
"step": 932500
},
{
"epoch": 7.64,
"learning_rate": 6.9652632790280964e-06,
"loss": 3.0809,
"step": 933000
},
{
"epoch": 7.65,
"learning_rate": 6.9636193263081766e-06,
"loss": 3.0756,
"step": 933500
},
{
"epoch": 7.65,
"learning_rate": 6.961975373588257e-06,
"loss": 3.0814,
"step": 934000
},
{
"epoch": 7.66,
"learning_rate": 6.960331420868336e-06,
"loss": 3.0757,
"step": 934500
},
{
"epoch": 7.66,
"learning_rate": 6.958687468148416e-06,
"loss": 3.0666,
"step": 935000
},
{
"epoch": 7.66,
"learning_rate": 6.957043515428496e-06,
"loss": 3.0683,
"step": 935500
},
{
"epoch": 7.67,
"learning_rate": 6.955399562708577e-06,
"loss": 3.0742,
"step": 936000
},
{
"epoch": 7.67,
"learning_rate": 6.953755609988657e-06,
"loss": 3.0645,
"step": 936500
},
{
"epoch": 7.68,
"learning_rate": 6.9521116572687374e-06,
"loss": 3.0785,
"step": 937000
},
{
"epoch": 7.68,
"learning_rate": 6.9504677045488176e-06,
"loss": 3.0819,
"step": 937500
},
{
"epoch": 7.68,
"learning_rate": 6.9488237518288985e-06,
"loss": 3.0803,
"step": 938000
},
{
"epoch": 7.69,
"learning_rate": 6.947179799108979e-06,
"loss": 3.0805,
"step": 938500
},
{
"epoch": 7.69,
"learning_rate": 6.945535846389059e-06,
"loss": 3.0775,
"step": 939000
},
{
"epoch": 7.7,
"learning_rate": 6.943891893669139e-06,
"loss": 3.0725,
"step": 939500
},
{
"epoch": 7.7,
"learning_rate": 6.942247940949219e-06,
"loss": 3.0744,
"step": 940000
},
{
"epoch": 7.71,
"learning_rate": 6.940603988229298e-06,
"loss": 3.0714,
"step": 940500
},
{
"epoch": 7.71,
"learning_rate": 6.938960035509379e-06,
"loss": 3.0715,
"step": 941000
},
{
"epoch": 7.71,
"learning_rate": 6.9373160827894594e-06,
"loss": 3.0773,
"step": 941500
},
{
"epoch": 7.72,
"learning_rate": 6.9356721300695396e-06,
"loss": 3.0694,
"step": 942000
},
{
"epoch": 7.72,
"learning_rate": 6.93402817734962e-06,
"loss": 3.0711,
"step": 942500
},
{
"epoch": 7.73,
"learning_rate": 6.9323842246297e-06,
"loss": 3.0744,
"step": 943000
},
{
"epoch": 7.73,
"learning_rate": 6.930740271909781e-06,
"loss": 3.0734,
"step": 943500
},
{
"epoch": 7.73,
"learning_rate": 6.929096319189861e-06,
"loss": 3.0716,
"step": 944000
},
{
"epoch": 7.74,
"learning_rate": 6.927452366469941e-06,
"loss": 3.0772,
"step": 944500
},
{
"epoch": 7.74,
"learning_rate": 6.925808413750021e-06,
"loss": 3.0681,
"step": 945000
},
{
"epoch": 7.75,
"learning_rate": 6.924164461030102e-06,
"loss": 3.0786,
"step": 945500
},
{
"epoch": 7.75,
"learning_rate": 6.922520508310182e-06,
"loss": 3.075,
"step": 946000
},
{
"epoch": 7.75,
"learning_rate": 6.9208765555902615e-06,
"loss": 3.0774,
"step": 946500
},
{
"epoch": 7.76,
"learning_rate": 6.919232602870342e-06,
"loss": 3.0651,
"step": 947000
},
{
"epoch": 7.76,
"learning_rate": 6.917588650150422e-06,
"loss": 3.0708,
"step": 947500
},
{
"epoch": 7.77,
"learning_rate": 6.915944697430502e-06,
"loss": 3.0813,
"step": 948000
},
{
"epoch": 7.77,
"learning_rate": 6.914300744710583e-06,
"loss": 3.0719,
"step": 948500
},
{
"epoch": 7.77,
"learning_rate": 6.912656791990663e-06,
"loss": 3.0739,
"step": 949000
},
{
"epoch": 7.78,
"learning_rate": 6.911012839270743e-06,
"loss": 3.07,
"step": 949500
},
{
"epoch": 7.78,
"learning_rate": 6.909368886550823e-06,
"loss": 3.0715,
"step": 950000
},
{
"epoch": 7.79,
"learning_rate": 6.907724933830903e-06,
"loss": 3.0675,
"step": 950500
},
{
"epoch": 7.79,
"learning_rate": 6.906080981110984e-06,
"loss": 3.0693,
"step": 951000
},
{
"epoch": 7.8,
"learning_rate": 6.9044370283910645e-06,
"loss": 3.0755,
"step": 951500
},
{
"epoch": 7.8,
"learning_rate": 6.902793075671145e-06,
"loss": 3.0741,
"step": 952000
},
{
"epoch": 7.8,
"learning_rate": 6.901149122951224e-06,
"loss": 3.068,
"step": 952500
},
{
"epoch": 7.81,
"learning_rate": 6.899505170231304e-06,
"loss": 3.0707,
"step": 953000
},
{
"epoch": 7.81,
"learning_rate": 6.897861217511385e-06,
"loss": 3.0731,
"step": 953500
},
{
"epoch": 7.82,
"learning_rate": 6.896217264791465e-06,
"loss": 3.0658,
"step": 954000
},
{
"epoch": 7.82,
"learning_rate": 6.894573312071545e-06,
"loss": 3.064,
"step": 954500
},
{
"epoch": 7.82,
"learning_rate": 6.892929359351625e-06,
"loss": 3.0747,
"step": 955000
},
{
"epoch": 7.83,
"learning_rate": 6.8912854066317055e-06,
"loss": 3.0731,
"step": 955500
},
{
"epoch": 7.83,
"learning_rate": 6.8896414539117865e-06,
"loss": 3.0695,
"step": 956000
},
{
"epoch": 7.84,
"learning_rate": 6.887997501191867e-06,
"loss": 3.0668,
"step": 956500
},
{
"epoch": 7.84,
"learning_rate": 6.886353548471947e-06,
"loss": 3.0691,
"step": 957000
},
{
"epoch": 7.84,
"learning_rate": 6.884709595752027e-06,
"loss": 3.068,
"step": 957500
},
{
"epoch": 7.85,
"learning_rate": 6.883065643032108e-06,
"loss": 3.0675,
"step": 958000
},
{
"epoch": 7.85,
"learning_rate": 6.881421690312186e-06,
"loss": 3.0692,
"step": 958500
},
{
"epoch": 7.86,
"learning_rate": 6.879777737592267e-06,
"loss": 3.0684,
"step": 959000
},
{
"epoch": 7.86,
"learning_rate": 6.878133784872347e-06,
"loss": 3.0657,
"step": 959500
},
{
"epoch": 7.87,
"learning_rate": 6.8764898321524275e-06,
"loss": 3.0709,
"step": 960000
},
{
"epoch": 7.87,
"eval_accuracy": 0.4803523362943657,
"eval_loss": 2.925929307937622,
"eval_runtime": 423.4884,
"eval_samples_per_second": 728.133,
"eval_steps_per_second": 15.172,
"step": 960000
},
{
"epoch": 7.87,
"learning_rate": 6.874845879432508e-06,
"loss": 3.0705,
"step": 960500
},
{
"epoch": 7.87,
"learning_rate": 6.873201926712589e-06,
"loss": 3.0672,
"step": 961000
},
{
"epoch": 7.88,
"learning_rate": 6.871557973992669e-06,
"loss": 3.064,
"step": 961500
},
{
"epoch": 7.88,
"learning_rate": 6.869914021272749e-06,
"loss": 3.0644,
"step": 962000
},
{
"epoch": 7.89,
"learning_rate": 6.868270068552829e-06,
"loss": 3.0636,
"step": 962500
},
{
"epoch": 7.89,
"learning_rate": 6.866626115832909e-06,
"loss": 3.0693,
"step": 963000
},
{
"epoch": 7.89,
"learning_rate": 6.86498216311299e-06,
"loss": 3.0597,
"step": 963500
},
{
"epoch": 7.9,
"learning_rate": 6.86333821039307e-06,
"loss": 3.0646,
"step": 964000
},
{
"epoch": 7.9,
"learning_rate": 6.8616942576731495e-06,
"loss": 3.0692,
"step": 964500
},
{
"epoch": 7.91,
"learning_rate": 6.86005030495323e-06,
"loss": 3.0657,
"step": 965000
},
{
"epoch": 7.91,
"learning_rate": 6.85840635223331e-06,
"loss": 3.061,
"step": 965500
},
{
"epoch": 7.91,
"learning_rate": 6.856762399513391e-06,
"loss": 3.0604,
"step": 966000
},
{
"epoch": 7.92,
"learning_rate": 6.855118446793471e-06,
"loss": 3.062,
"step": 966500
},
{
"epoch": 7.92,
"learning_rate": 6.853474494073551e-06,
"loss": 3.0679,
"step": 967000
},
{
"epoch": 7.93,
"learning_rate": 6.851830541353631e-06,
"loss": 3.0556,
"step": 967500
},
{
"epoch": 7.93,
"learning_rate": 6.850186588633711e-06,
"loss": 3.0666,
"step": 968000
},
{
"epoch": 7.93,
"learning_rate": 6.848542635913792e-06,
"loss": 3.0748,
"step": 968500
},
{
"epoch": 7.94,
"learning_rate": 6.846898683193872e-06,
"loss": 3.0643,
"step": 969000
},
{
"epoch": 7.94,
"learning_rate": 6.8452547304739524e-06,
"loss": 3.0599,
"step": 969500
},
{
"epoch": 7.95,
"learning_rate": 6.8436107777540326e-06,
"loss": 3.0642,
"step": 970000
},
{
"epoch": 7.95,
"learning_rate": 6.841966825034112e-06,
"loss": 3.0588,
"step": 970500
},
{
"epoch": 7.96,
"learning_rate": 6.840322872314192e-06,
"loss": 3.058,
"step": 971000
},
{
"epoch": 7.96,
"learning_rate": 6.838678919594273e-06,
"loss": 3.0621,
"step": 971500
},
{
"epoch": 7.96,
"learning_rate": 6.837034966874353e-06,
"loss": 3.0656,
"step": 972000
},
{
"epoch": 7.97,
"learning_rate": 6.835391014154433e-06,
"loss": 3.0681,
"step": 972500
},
{
"epoch": 7.97,
"learning_rate": 6.833747061434513e-06,
"loss": 3.0672,
"step": 973000
},
{
"epoch": 7.98,
"learning_rate": 6.832103108714594e-06,
"loss": 3.068,
"step": 973500
},
{
"epoch": 7.98,
"learning_rate": 6.8304591559946744e-06,
"loss": 3.0616,
"step": 974000
},
{
"epoch": 7.98,
"learning_rate": 6.8288152032747546e-06,
"loss": 3.0651,
"step": 974500
},
{
"epoch": 7.99,
"learning_rate": 6.827171250554835e-06,
"loss": 3.0669,
"step": 975000
},
{
"epoch": 7.99,
"learning_rate": 6.825527297834915e-06,
"loss": 3.0668,
"step": 975500
},
{
"epoch": 8.0,
"learning_rate": 6.823883345114996e-06,
"loss": 3.0692,
"step": 976000
},
{
"epoch": 8.0,
"learning_rate": 6.822239392395076e-06,
"loss": 3.0622,
"step": 976500
},
{
"epoch": 8.0,
"learning_rate": 6.820595439675155e-06,
"loss": 3.0618,
"step": 977000
},
{
"epoch": 8.01,
"learning_rate": 6.818951486955235e-06,
"loss": 3.0672,
"step": 977500
},
{
"epoch": 8.01,
"learning_rate": 6.8173075342353154e-06,
"loss": 3.0686,
"step": 978000
},
{
"epoch": 8.02,
"learning_rate": 6.8156635815153956e-06,
"loss": 3.065,
"step": 978500
},
{
"epoch": 8.02,
"learning_rate": 6.8140196287954765e-06,
"loss": 3.0568,
"step": 979000
},
{
"epoch": 8.02,
"learning_rate": 6.812375676075557e-06,
"loss": 3.0605,
"step": 979500
},
{
"epoch": 8.03,
"learning_rate": 6.810731723355637e-06,
"loss": 3.0586,
"step": 980000
},
{
"epoch": 8.03,
"learning_rate": 6.809087770635717e-06,
"loss": 3.061,
"step": 980500
},
{
"epoch": 8.04,
"learning_rate": 6.807443817915798e-06,
"loss": 3.0603,
"step": 981000
},
{
"epoch": 8.04,
"learning_rate": 6.805799865195878e-06,
"loss": 3.0591,
"step": 981500
},
{
"epoch": 8.05,
"learning_rate": 6.804155912475958e-06,
"loss": 3.0645,
"step": 982000
},
{
"epoch": 8.05,
"learning_rate": 6.802511959756038e-06,
"loss": 3.0609,
"step": 982500
},
{
"epoch": 8.05,
"learning_rate": 6.8008680070361176e-06,
"loss": 3.0596,
"step": 983000
},
{
"epoch": 8.06,
"learning_rate": 6.799224054316198e-06,
"loss": 3.0609,
"step": 983500
},
{
"epoch": 8.06,
"learning_rate": 6.797580101596279e-06,
"loss": 3.0547,
"step": 984000
},
{
"epoch": 8.07,
"learning_rate": 6.795936148876359e-06,
"loss": 3.0566,
"step": 984500
},
{
"epoch": 8.07,
"learning_rate": 6.794292196156439e-06,
"loss": 3.0511,
"step": 985000
},
{
"epoch": 8.07,
"learning_rate": 6.792648243436519e-06,
"loss": 3.0621,
"step": 985500
},
{
"epoch": 8.08,
"learning_rate": 6.7910042907166e-06,
"loss": 3.0607,
"step": 986000
},
{
"epoch": 8.08,
"learning_rate": 6.78936033799668e-06,
"loss": 3.0596,
"step": 986500
},
{
"epoch": 8.09,
"learning_rate": 6.78771638527676e-06,
"loss": 3.0615,
"step": 987000
},
{
"epoch": 8.09,
"learning_rate": 6.78607243255684e-06,
"loss": 3.0581,
"step": 987500
},
{
"epoch": 8.09,
"learning_rate": 6.7844284798369205e-06,
"loss": 3.0586,
"step": 988000
},
{
"epoch": 8.1,
"learning_rate": 6.7827845271170015e-06,
"loss": 3.0569,
"step": 988500
},
{
"epoch": 8.1,
"learning_rate": 6.781140574397081e-06,
"loss": 3.0522,
"step": 989000
},
{
"epoch": 8.11,
"learning_rate": 6.779496621677161e-06,
"loss": 3.0566,
"step": 989500
},
{
"epoch": 8.11,
"learning_rate": 6.777852668957241e-06,
"loss": 3.0528,
"step": 990000
},
{
"epoch": 8.11,
"eval_accuracy": 0.4812049123013876,
"eval_loss": 2.920849323272705,
"eval_runtime": 415.0385,
"eval_samples_per_second": 742.958,
"eval_steps_per_second": 15.48,
"step": 990000
},
{
"epoch": 8.11,
"learning_rate": 6.776208716237321e-06,
"loss": 3.0549,
"step": 990500
},
{
"epoch": 8.12,
"learning_rate": 6.774564763517401e-06,
"loss": 3.0566,
"step": 991000
},
{
"epoch": 8.12,
"learning_rate": 6.772920810797482e-06,
"loss": 3.0576,
"step": 991500
},
{
"epoch": 8.13,
"learning_rate": 6.771276858077562e-06,
"loss": 3.0539,
"step": 992000
},
{
"epoch": 8.13,
"learning_rate": 6.7696329053576425e-06,
"loss": 3.0525,
"step": 992500
},
{
"epoch": 8.14,
"learning_rate": 6.767988952637723e-06,
"loss": 3.0545,
"step": 993000
},
{
"epoch": 8.14,
"learning_rate": 6.766344999917804e-06,
"loss": 3.0543,
"step": 993500
},
{
"epoch": 8.14,
"learning_rate": 6.764701047197884e-06,
"loss": 3.0492,
"step": 994000
},
{
"epoch": 8.15,
"learning_rate": 6.763057094477964e-06,
"loss": 3.0608,
"step": 994500
},
{
"epoch": 8.15,
"learning_rate": 6.761413141758043e-06,
"loss": 3.0615,
"step": 995000
},
{
"epoch": 8.16,
"learning_rate": 6.759769189038123e-06,
"loss": 3.0605,
"step": 995500
},
{
"epoch": 8.16,
"learning_rate": 6.758125236318203e-06,
"loss": 3.0584,
"step": 996000
},
{
"epoch": 8.16,
"learning_rate": 6.756481283598284e-06,
"loss": 3.0554,
"step": 996500
},
{
"epoch": 8.17,
"learning_rate": 6.7548373308783645e-06,
"loss": 3.0558,
"step": 997000
},
{
"epoch": 8.17,
"learning_rate": 6.753193378158445e-06,
"loss": 3.056,
"step": 997500
},
{
"epoch": 8.18,
"learning_rate": 6.751549425438525e-06,
"loss": 3.0517,
"step": 998000
},
{
"epoch": 8.18,
"learning_rate": 6.749905472718606e-06,
"loss": 3.0549,
"step": 998500
},
{
"epoch": 8.18,
"learning_rate": 6.748261519998686e-06,
"loss": 3.0613,
"step": 999000
},
{
"epoch": 8.19,
"learning_rate": 6.746617567278766e-06,
"loss": 3.0511,
"step": 999500
},
{
"epoch": 8.19,
"learning_rate": 6.744973614558846e-06,
"loss": 3.0535,
"step": 1000000
},
{
"epoch": 8.2,
"learning_rate": 6.743329661838926e-06,
"loss": 3.0498,
"step": 1000500
},
{
"epoch": 8.2,
"learning_rate": 6.7416857091190055e-06,
"loss": 3.0545,
"step": 1001000
},
{
"epoch": 8.21,
"learning_rate": 6.7400417563990865e-06,
"loss": 3.0522,
"step": 1001500
},
{
"epoch": 8.21,
"learning_rate": 6.738397803679167e-06,
"loss": 3.0562,
"step": 1002000
},
{
"epoch": 8.21,
"learning_rate": 6.736753850959247e-06,
"loss": 3.0551,
"step": 1002500
},
{
"epoch": 8.22,
"learning_rate": 6.735109898239327e-06,
"loss": 3.0594,
"step": 1003000
},
{
"epoch": 8.22,
"learning_rate": 6.733465945519407e-06,
"loss": 3.0588,
"step": 1003500
},
{
"epoch": 8.23,
"learning_rate": 6.731821992799488e-06,
"loss": 3.0536,
"step": 1004000
},
{
"epoch": 8.23,
"learning_rate": 6.730178040079568e-06,
"loss": 3.0492,
"step": 1004500
},
{
"epoch": 8.23,
"learning_rate": 6.728534087359648e-06,
"loss": 3.049,
"step": 1005000
},
{
"epoch": 8.24,
"learning_rate": 6.726890134639728e-06,
"loss": 3.0574,
"step": 1005500
},
{
"epoch": 8.24,
"learning_rate": 6.725246181919809e-06,
"loss": 3.0467,
"step": 1006000
},
{
"epoch": 8.25,
"learning_rate": 6.7236022291998894e-06,
"loss": 3.0529,
"step": 1006500
},
{
"epoch": 8.25,
"learning_rate": 6.721958276479969e-06,
"loss": 3.0568,
"step": 1007000
},
{
"epoch": 8.25,
"learning_rate": 6.720314323760049e-06,
"loss": 3.0447,
"step": 1007500
},
{
"epoch": 8.26,
"learning_rate": 6.718670371040129e-06,
"loss": 3.0549,
"step": 1008000
},
{
"epoch": 8.26,
"learning_rate": 6.717026418320209e-06,
"loss": 3.0504,
"step": 1008500
},
{
"epoch": 8.27,
"learning_rate": 6.71538246560029e-06,
"loss": 3.0499,
"step": 1009000
},
{
"epoch": 8.27,
"learning_rate": 6.71373851288037e-06,
"loss": 3.0537,
"step": 1009500
},
{
"epoch": 8.27,
"learning_rate": 6.71209456016045e-06,
"loss": 3.0496,
"step": 1010000
},
{
"epoch": 8.28,
"learning_rate": 6.7104506074405304e-06,
"loss": 3.0458,
"step": 1010500
},
{
"epoch": 8.28,
"learning_rate": 6.708806654720611e-06,
"loss": 3.047,
"step": 1011000
},
{
"epoch": 8.29,
"learning_rate": 6.7071627020006915e-06,
"loss": 3.0525,
"step": 1011500
},
{
"epoch": 8.29,
"learning_rate": 6.705518749280772e-06,
"loss": 3.0566,
"step": 1012000
},
{
"epoch": 8.3,
"learning_rate": 6.703874796560852e-06,
"loss": 3.0503,
"step": 1012500
},
{
"epoch": 8.3,
"learning_rate": 6.702230843840931e-06,
"loss": 3.0531,
"step": 1013000
},
{
"epoch": 8.3,
"learning_rate": 6.700586891121011e-06,
"loss": 3.0495,
"step": 1013500
},
{
"epoch": 8.31,
"learning_rate": 6.698942938401092e-06,
"loss": 3.052,
"step": 1014000
},
{
"epoch": 8.31,
"learning_rate": 6.697298985681172e-06,
"loss": 3.0462,
"step": 1014500
},
{
"epoch": 8.32,
"learning_rate": 6.695655032961252e-06,
"loss": 3.0589,
"step": 1015000
},
{
"epoch": 8.32,
"learning_rate": 6.6940110802413325e-06,
"loss": 3.0518,
"step": 1015500
},
{
"epoch": 8.32,
"learning_rate": 6.692367127521413e-06,
"loss": 3.0572,
"step": 1016000
},
{
"epoch": 8.33,
"learning_rate": 6.690723174801494e-06,
"loss": 3.0515,
"step": 1016500
},
{
"epoch": 8.33,
"learning_rate": 6.689079222081574e-06,
"loss": 3.0498,
"step": 1017000
},
{
"epoch": 8.34,
"learning_rate": 6.687435269361654e-06,
"loss": 3.0501,
"step": 1017500
},
{
"epoch": 8.34,
"learning_rate": 6.685791316641734e-06,
"loss": 3.0552,
"step": 1018000
},
{
"epoch": 8.34,
"learning_rate": 6.684147363921815e-06,
"loss": 3.049,
"step": 1018500
},
{
"epoch": 8.35,
"learning_rate": 6.6825034112018934e-06,
"loss": 3.0508,
"step": 1019000
},
{
"epoch": 8.35,
"learning_rate": 6.680859458481974e-06,
"loss": 3.0454,
"step": 1019500
},
{
"epoch": 8.36,
"learning_rate": 6.6792155057620545e-06,
"loss": 3.0541,
"step": 1020000
},
{
"epoch": 8.36,
"eval_accuracy": 0.4829413695036341,
"eval_loss": 2.90887713432312,
"eval_runtime": 413.3015,
"eval_samples_per_second": 746.08,
"eval_steps_per_second": 15.546,
"step": 1020000
},
{
"epoch": 8.36,
"learning_rate": 6.677571553042135e-06,
"loss": 3.0473,
"step": 1020500
},
{
"epoch": 8.36,
"learning_rate": 6.675927600322215e-06,
"loss": 3.046,
"step": 1021000
},
{
"epoch": 8.37,
"learning_rate": 6.674283647602296e-06,
"loss": 3.0498,
"step": 1021500
},
{
"epoch": 8.37,
"learning_rate": 6.672639694882376e-06,
"loss": 3.0504,
"step": 1022000
},
{
"epoch": 8.38,
"learning_rate": 6.670995742162456e-06,
"loss": 3.0417,
"step": 1022500
},
{
"epoch": 8.38,
"learning_rate": 6.669351789442536e-06,
"loss": 3.0592,
"step": 1023000
},
{
"epoch": 8.39,
"learning_rate": 6.667707836722616e-06,
"loss": 3.0561,
"step": 1023500
},
{
"epoch": 8.39,
"learning_rate": 6.666063884002697e-06,
"loss": 3.0394,
"step": 1024000
},
{
"epoch": 8.39,
"learning_rate": 6.664419931282777e-06,
"loss": 3.0459,
"step": 1024500
},
{
"epoch": 8.4,
"learning_rate": 6.6627759785628575e-06,
"loss": 3.0518,
"step": 1025000
},
{
"epoch": 8.4,
"learning_rate": 6.661132025842937e-06,
"loss": 3.0447,
"step": 1025500
},
{
"epoch": 8.41,
"learning_rate": 6.659488073123017e-06,
"loss": 3.0462,
"step": 1026000
},
{
"epoch": 8.41,
"learning_rate": 6.657844120403098e-06,
"loss": 3.052,
"step": 1026500
},
{
"epoch": 8.41,
"learning_rate": 6.656200167683178e-06,
"loss": 3.0463,
"step": 1027000
},
{
"epoch": 8.42,
"learning_rate": 6.654556214963258e-06,
"loss": 3.0511,
"step": 1027500
},
{
"epoch": 8.42,
"learning_rate": 6.652912262243338e-06,
"loss": 3.0508,
"step": 1028000
},
{
"epoch": 8.43,
"learning_rate": 6.651268309523418e-06,
"loss": 3.0476,
"step": 1028500
},
{
"epoch": 8.43,
"learning_rate": 6.649624356803499e-06,
"loss": 3.0537,
"step": 1029000
},
{
"epoch": 8.43,
"learning_rate": 6.6479804040835795e-06,
"loss": 3.046,
"step": 1029500
},
{
"epoch": 8.44,
"learning_rate": 6.64633645136366e-06,
"loss": 3.0525,
"step": 1030000
},
{
"epoch": 8.44,
"learning_rate": 6.64469249864374e-06,
"loss": 3.0457,
"step": 1030500
},
{
"epoch": 8.45,
"learning_rate": 6.643048545923821e-06,
"loss": 3.0466,
"step": 1031000
},
{
"epoch": 8.45,
"learning_rate": 6.641404593203899e-06,
"loss": 3.0431,
"step": 1031500
},
{
"epoch": 8.45,
"learning_rate": 6.63976064048398e-06,
"loss": 3.0475,
"step": 1032000
},
{
"epoch": 8.46,
"learning_rate": 6.63811668776406e-06,
"loss": 3.0473,
"step": 1032500
},
{
"epoch": 8.46,
"learning_rate": 6.63647273504414e-06,
"loss": 3.0418,
"step": 1033000
},
{
"epoch": 8.47,
"learning_rate": 6.6348287823242205e-06,
"loss": 3.0426,
"step": 1033500
},
{
"epoch": 8.47,
"learning_rate": 6.6331848296043015e-06,
"loss": 3.0471,
"step": 1034000
},
{
"epoch": 8.48,
"learning_rate": 6.631540876884382e-06,
"loss": 3.0357,
"step": 1034500
},
{
"epoch": 8.48,
"learning_rate": 6.629896924164462e-06,
"loss": 3.0484,
"step": 1035000
},
{
"epoch": 8.48,
"learning_rate": 6.628252971444542e-06,
"loss": 3.051,
"step": 1035500
},
{
"epoch": 8.49,
"learning_rate": 6.626609018724622e-06,
"loss": 3.0439,
"step": 1036000
},
{
"epoch": 8.49,
"learning_rate": 6.624965066004703e-06,
"loss": 3.0461,
"step": 1036500
},
{
"epoch": 8.5,
"learning_rate": 6.623321113284783e-06,
"loss": 3.0467,
"step": 1037000
},
{
"epoch": 8.5,
"learning_rate": 6.621677160564862e-06,
"loss": 3.047,
"step": 1037500
},
{
"epoch": 8.5,
"learning_rate": 6.6200332078449425e-06,
"loss": 3.0444,
"step": 1038000
},
{
"epoch": 8.51,
"learning_rate": 6.618389255125023e-06,
"loss": 3.0433,
"step": 1038500
},
{
"epoch": 8.51,
"learning_rate": 6.616745302405104e-06,
"loss": 3.0416,
"step": 1039000
},
{
"epoch": 8.52,
"learning_rate": 6.615101349685184e-06,
"loss": 3.042,
"step": 1039500
},
{
"epoch": 8.52,
"learning_rate": 6.613457396965264e-06,
"loss": 3.042,
"step": 1040000
},
{
"epoch": 8.52,
"learning_rate": 6.611813444245344e-06,
"loss": 3.0503,
"step": 1040500
},
{
"epoch": 8.53,
"learning_rate": 6.610169491525424e-06,
"loss": 3.0503,
"step": 1041000
},
{
"epoch": 8.53,
"learning_rate": 6.608525538805505e-06,
"loss": 3.0391,
"step": 1041500
},
{
"epoch": 8.54,
"learning_rate": 6.606881586085585e-06,
"loss": 3.0456,
"step": 1042000
},
{
"epoch": 8.54,
"learning_rate": 6.605237633365665e-06,
"loss": 3.0408,
"step": 1042500
},
{
"epoch": 8.55,
"learning_rate": 6.6035936806457454e-06,
"loss": 3.0393,
"step": 1043000
},
{
"epoch": 8.55,
"learning_rate": 6.601949727925825e-06,
"loss": 3.048,
"step": 1043500
},
{
"epoch": 8.55,
"learning_rate": 6.600305775205905e-06,
"loss": 3.0361,
"step": 1044000
},
{
"epoch": 8.56,
"learning_rate": 6.598661822485986e-06,
"loss": 3.0415,
"step": 1044500
},
{
"epoch": 8.56,
"learning_rate": 6.597017869766066e-06,
"loss": 3.0402,
"step": 1045000
},
{
"epoch": 8.57,
"learning_rate": 6.595373917046146e-06,
"loss": 3.0475,
"step": 1045500
},
{
"epoch": 8.57,
"learning_rate": 6.593729964326226e-06,
"loss": 3.044,
"step": 1046000
},
{
"epoch": 8.57,
"learning_rate": 6.592086011606307e-06,
"loss": 3.0376,
"step": 1046500
},
{
"epoch": 8.58,
"learning_rate": 6.590442058886387e-06,
"loss": 3.0411,
"step": 1047000
},
{
"epoch": 8.58,
"learning_rate": 6.588798106166467e-06,
"loss": 3.0327,
"step": 1047500
},
{
"epoch": 8.59,
"learning_rate": 6.5871541534465475e-06,
"loss": 3.038,
"step": 1048000
},
{
"epoch": 8.59,
"learning_rate": 6.585510200726628e-06,
"loss": 3.0352,
"step": 1048500
},
{
"epoch": 8.59,
"learning_rate": 6.583866248006709e-06,
"loss": 3.0367,
"step": 1049000
},
{
"epoch": 8.6,
"learning_rate": 6.582222295286788e-06,
"loss": 3.0446,
"step": 1049500
},
{
"epoch": 8.6,
"learning_rate": 6.580578342566868e-06,
"loss": 3.0469,
"step": 1050000
},
{
"epoch": 8.6,
"eval_accuracy": 0.4838990642400208,
"eval_loss": 2.9014947414398193,
"eval_runtime": 404.752,
"eval_samples_per_second": 761.839,
"eval_steps_per_second": 15.874,
"step": 1050000
},
{
"epoch": 8.61,
"learning_rate": 6.578934389846948e-06,
"loss": 3.0436,
"step": 1050500
},
{
"epoch": 8.61,
"learning_rate": 6.577290437127028e-06,
"loss": 3.0516,
"step": 1051000
},
{
"epoch": 8.61,
"learning_rate": 6.5756464844071084e-06,
"loss": 3.046,
"step": 1051500
},
{
"epoch": 8.62,
"learning_rate": 6.574002531687189e-06,
"loss": 3.0435,
"step": 1052000
},
{
"epoch": 8.62,
"learning_rate": 6.5723585789672695e-06,
"loss": 3.0383,
"step": 1052500
},
{
"epoch": 8.63,
"learning_rate": 6.57071462624735e-06,
"loss": 3.0484,
"step": 1053000
},
{
"epoch": 8.63,
"learning_rate": 6.56907067352743e-06,
"loss": 3.0375,
"step": 1053500
},
{
"epoch": 8.64,
"learning_rate": 6.567426720807511e-06,
"loss": 3.0459,
"step": 1054000
},
{
"epoch": 8.64,
"learning_rate": 6.565782768087591e-06,
"loss": 3.0424,
"step": 1054500
},
{
"epoch": 8.64,
"learning_rate": 6.564138815367671e-06,
"loss": 3.0355,
"step": 1055000
},
{
"epoch": 8.65,
"learning_rate": 6.56249486264775e-06,
"loss": 3.0374,
"step": 1055500
},
{
"epoch": 8.65,
"learning_rate": 6.56085090992783e-06,
"loss": 3.0422,
"step": 1056000
},
{
"epoch": 8.66,
"learning_rate": 6.5592069572079105e-06,
"loss": 3.0372,
"step": 1056500
},
{
"epoch": 8.66,
"learning_rate": 6.5575630044879915e-06,
"loss": 3.0446,
"step": 1057000
},
{
"epoch": 8.66,
"learning_rate": 6.555919051768072e-06,
"loss": 3.0456,
"step": 1057500
},
{
"epoch": 8.67,
"learning_rate": 6.554275099048152e-06,
"loss": 3.0372,
"step": 1058000
},
{
"epoch": 8.67,
"learning_rate": 6.552631146328232e-06,
"loss": 3.0409,
"step": 1058500
},
{
"epoch": 8.68,
"learning_rate": 6.550987193608313e-06,
"loss": 3.0376,
"step": 1059000
},
{
"epoch": 8.68,
"learning_rate": 6.549343240888393e-06,
"loss": 3.0434,
"step": 1059500
},
{
"epoch": 8.68,
"learning_rate": 6.547699288168473e-06,
"loss": 3.0451,
"step": 1060000
},
{
"epoch": 8.69,
"learning_rate": 6.546055335448553e-06,
"loss": 3.0341,
"step": 1060500
},
{
"epoch": 8.69,
"learning_rate": 6.544411382728633e-06,
"loss": 3.0415,
"step": 1061000
},
{
"epoch": 8.7,
"learning_rate": 6.542767430008713e-06,
"loss": 3.0404,
"step": 1061500
},
{
"epoch": 8.7,
"learning_rate": 6.541123477288794e-06,
"loss": 3.0347,
"step": 1062000
},
{
"epoch": 8.7,
"learning_rate": 6.539479524568874e-06,
"loss": 3.0389,
"step": 1062500
},
{
"epoch": 8.71,
"learning_rate": 6.537835571848954e-06,
"loss": 3.044,
"step": 1063000
},
{
"epoch": 8.71,
"learning_rate": 6.536191619129034e-06,
"loss": 3.0347,
"step": 1063500
},
{
"epoch": 8.72,
"learning_rate": 6.534547666409114e-06,
"loss": 3.0444,
"step": 1064000
},
{
"epoch": 8.72,
"learning_rate": 6.532903713689195e-06,
"loss": 3.0326,
"step": 1064500
},
{
"epoch": 8.73,
"learning_rate": 6.531259760969275e-06,
"loss": 3.0371,
"step": 1065000
},
{
"epoch": 8.73,
"learning_rate": 6.529615808249355e-06,
"loss": 3.0326,
"step": 1065500
},
{
"epoch": 8.73,
"learning_rate": 6.5279718555294355e-06,
"loss": 3.0373,
"step": 1066000
},
{
"epoch": 8.74,
"learning_rate": 6.5263279028095165e-06,
"loss": 3.0276,
"step": 1066500
},
{
"epoch": 8.74,
"learning_rate": 6.524683950089597e-06,
"loss": 3.0328,
"step": 1067000
},
{
"epoch": 8.75,
"learning_rate": 6.523039997369676e-06,
"loss": 3.0332,
"step": 1067500
},
{
"epoch": 8.75,
"learning_rate": 6.521396044649756e-06,
"loss": 3.0334,
"step": 1068000
},
{
"epoch": 8.75,
"learning_rate": 6.519752091929836e-06,
"loss": 3.0387,
"step": 1068500
},
{
"epoch": 8.76,
"learning_rate": 6.518108139209916e-06,
"loss": 3.0343,
"step": 1069000
},
{
"epoch": 8.76,
"learning_rate": 6.516464186489997e-06,
"loss": 3.0377,
"step": 1069500
},
{
"epoch": 8.77,
"learning_rate": 6.514820233770077e-06,
"loss": 3.0383,
"step": 1070000
},
{
"epoch": 8.77,
"learning_rate": 6.5131762810501575e-06,
"loss": 3.0366,
"step": 1070500
},
{
"epoch": 8.77,
"learning_rate": 6.511532328330238e-06,
"loss": 3.0359,
"step": 1071000
},
{
"epoch": 8.78,
"learning_rate": 6.5098883756103186e-06,
"loss": 3.0398,
"step": 1071500
},
{
"epoch": 8.78,
"learning_rate": 6.508244422890399e-06,
"loss": 3.037,
"step": 1072000
},
{
"epoch": 8.79,
"learning_rate": 6.506600470170479e-06,
"loss": 3.0314,
"step": 1072500
},
{
"epoch": 8.79,
"learning_rate": 6.504956517450559e-06,
"loss": 3.0387,
"step": 1073000
},
{
"epoch": 8.79,
"learning_rate": 6.503312564730639e-06,
"loss": 3.0297,
"step": 1073500
},
{
"epoch": 8.8,
"learning_rate": 6.501668612010718e-06,
"loss": 3.0405,
"step": 1074000
},
{
"epoch": 8.8,
"learning_rate": 6.500024659290799e-06,
"loss": 3.0421,
"step": 1074500
},
{
"epoch": 8.81,
"learning_rate": 6.4983807065708795e-06,
"loss": 3.0287,
"step": 1075000
},
{
"epoch": 8.81,
"learning_rate": 6.49673675385096e-06,
"loss": 3.0283,
"step": 1075500
},
{
"epoch": 8.82,
"learning_rate": 6.49509280113104e-06,
"loss": 3.0377,
"step": 1076000
},
{
"epoch": 8.82,
"learning_rate": 6.49344884841112e-06,
"loss": 3.0367,
"step": 1076500
},
{
"epoch": 8.82,
"learning_rate": 6.491804895691201e-06,
"loss": 3.0373,
"step": 1077000
},
{
"epoch": 8.83,
"learning_rate": 6.490160942971281e-06,
"loss": 3.0327,
"step": 1077500
},
{
"epoch": 8.83,
"learning_rate": 6.488516990251361e-06,
"loss": 3.0261,
"step": 1078000
},
{
"epoch": 8.84,
"learning_rate": 6.486873037531441e-06,
"loss": 3.0347,
"step": 1078500
},
{
"epoch": 8.84,
"learning_rate": 6.485229084811522e-06,
"loss": 3.0382,
"step": 1079000
},
{
"epoch": 8.84,
"learning_rate": 6.483585132091602e-06,
"loss": 3.0369,
"step": 1079500
},
{
"epoch": 8.85,
"learning_rate": 6.4819411793716816e-06,
"loss": 3.0377,
"step": 1080000
},
{
"epoch": 8.85,
"eval_accuracy": 0.4847903545689289,
"eval_loss": 2.895965814590454,
"eval_runtime": 407.6957,
"eval_samples_per_second": 756.339,
"eval_steps_per_second": 15.759,
"step": 1080000
},
{
"epoch": 8.85,
"learning_rate": 6.480297226651762e-06,
"loss": 3.0378,
"step": 1080500
},
{
"epoch": 8.86,
"learning_rate": 6.478653273931842e-06,
"loss": 3.0374,
"step": 1081000
},
{
"epoch": 8.86,
"learning_rate": 6.477009321211922e-06,
"loss": 3.0315,
"step": 1081500
},
{
"epoch": 8.86,
"learning_rate": 6.475365368492003e-06,
"loss": 3.032,
"step": 1082000
},
{
"epoch": 8.87,
"learning_rate": 6.473721415772083e-06,
"loss": 3.0362,
"step": 1082500
},
{
"epoch": 8.87,
"learning_rate": 6.472077463052163e-06,
"loss": 3.0368,
"step": 1083000
},
{
"epoch": 8.88,
"learning_rate": 6.470433510332243e-06,
"loss": 3.0297,
"step": 1083500
},
{
"epoch": 8.88,
"learning_rate": 6.468789557612324e-06,
"loss": 3.031,
"step": 1084000
},
{
"epoch": 8.89,
"learning_rate": 6.467145604892404e-06,
"loss": 3.0321,
"step": 1084500
},
{
"epoch": 8.89,
"learning_rate": 6.4655016521724845e-06,
"loss": 3.0335,
"step": 1085000
},
{
"epoch": 8.89,
"learning_rate": 6.463857699452565e-06,
"loss": 3.0344,
"step": 1085500
},
{
"epoch": 8.9,
"learning_rate": 6.462213746732644e-06,
"loss": 3.0378,
"step": 1086000
},
{
"epoch": 8.9,
"learning_rate": 6.460569794012724e-06,
"loss": 3.0306,
"step": 1086500
},
{
"epoch": 8.91,
"learning_rate": 6.458925841292805e-06,
"loss": 3.0363,
"step": 1087000
},
{
"epoch": 8.91,
"learning_rate": 6.457281888572885e-06,
"loss": 3.0303,
"step": 1087500
},
{
"epoch": 8.91,
"learning_rate": 6.455637935852965e-06,
"loss": 3.0305,
"step": 1088000
},
{
"epoch": 8.92,
"learning_rate": 6.453993983133045e-06,
"loss": 3.0352,
"step": 1088500
},
{
"epoch": 8.92,
"learning_rate": 6.4523500304131255e-06,
"loss": 3.0338,
"step": 1089000
},
{
"epoch": 8.93,
"learning_rate": 6.4507060776932065e-06,
"loss": 3.036,
"step": 1089500
},
{
"epoch": 8.93,
"learning_rate": 6.449062124973287e-06,
"loss": 3.0388,
"step": 1090000
},
{
"epoch": 8.93,
"learning_rate": 6.447418172253367e-06,
"loss": 3.0292,
"step": 1090500
},
{
"epoch": 8.94,
"learning_rate": 6.445774219533447e-06,
"loss": 3.0351,
"step": 1091000
},
{
"epoch": 8.94,
"learning_rate": 6.444130266813528e-06,
"loss": 3.0325,
"step": 1091500
},
{
"epoch": 8.95,
"learning_rate": 6.442486314093606e-06,
"loss": 3.0258,
"step": 1092000
},
{
"epoch": 8.95,
"learning_rate": 6.440842361373687e-06,
"loss": 3.0293,
"step": 1092500
},
{
"epoch": 8.95,
"learning_rate": 6.439198408653767e-06,
"loss": 3.0273,
"step": 1093000
},
{
"epoch": 8.96,
"learning_rate": 6.4375544559338475e-06,
"loss": 3.0372,
"step": 1093500
},
{
"epoch": 8.96,
"learning_rate": 6.435910503213928e-06,
"loss": 3.0329,
"step": 1094000
},
{
"epoch": 8.97,
"learning_rate": 6.434266550494009e-06,
"loss": 3.0329,
"step": 1094500
},
{
"epoch": 8.97,
"learning_rate": 6.432622597774089e-06,
"loss": 3.0295,
"step": 1095000
},
{
"epoch": 8.98,
"learning_rate": 6.430978645054169e-06,
"loss": 3.0264,
"step": 1095500
},
{
"epoch": 8.98,
"learning_rate": 6.429334692334249e-06,
"loss": 3.028,
"step": 1096000
},
{
"epoch": 8.98,
"learning_rate": 6.427690739614329e-06,
"loss": 3.0408,
"step": 1096500
},
{
"epoch": 8.99,
"learning_rate": 6.42604678689441e-06,
"loss": 3.0269,
"step": 1097000
},
{
"epoch": 8.99,
"learning_rate": 6.42440283417449e-06,
"loss": 3.0304,
"step": 1097500
},
{
"epoch": 9.0,
"learning_rate": 6.4227588814545695e-06,
"loss": 3.0328,
"step": 1098000
},
{
"epoch": 9.0,
"learning_rate": 6.42111492873465e-06,
"loss": 3.0254,
"step": 1098500
},
{
"epoch": 9.0,
"learning_rate": 6.41947097601473e-06,
"loss": 3.0365,
"step": 1099000
},
{
"epoch": 9.01,
"learning_rate": 6.417827023294811e-06,
"loss": 3.0218,
"step": 1099500
},
{
"epoch": 9.01,
"learning_rate": 6.416183070574891e-06,
"loss": 3.0276,
"step": 1100000
},
{
"epoch": 9.02,
"learning_rate": 6.414539117854971e-06,
"loss": 3.0299,
"step": 1100500
},
{
"epoch": 9.02,
"learning_rate": 6.412895165135051e-06,
"loss": 3.0222,
"step": 1101000
},
{
"epoch": 9.02,
"learning_rate": 6.411251212415131e-06,
"loss": 3.0279,
"step": 1101500
},
{
"epoch": 9.03,
"learning_rate": 6.409607259695212e-06,
"loss": 3.0317,
"step": 1102000
},
{
"epoch": 9.03,
"learning_rate": 6.407963306975292e-06,
"loss": 3.0244,
"step": 1102500
},
{
"epoch": 9.04,
"learning_rate": 6.4063193542553725e-06,
"loss": 3.0241,
"step": 1103000
},
{
"epoch": 9.04,
"learning_rate": 6.404675401535453e-06,
"loss": 3.0223,
"step": 1103500
},
{
"epoch": 9.04,
"learning_rate": 6.403031448815532e-06,
"loss": 3.0291,
"step": 1104000
},
{
"epoch": 9.05,
"learning_rate": 6.401387496095612e-06,
"loss": 3.0236,
"step": 1104500
},
{
"epoch": 9.05,
"learning_rate": 6.399743543375693e-06,
"loss": 3.0248,
"step": 1105000
},
{
"epoch": 9.06,
"learning_rate": 6.398099590655773e-06,
"loss": 3.0271,
"step": 1105500
},
{
"epoch": 9.06,
"learning_rate": 6.396455637935853e-06,
"loss": 3.0282,
"step": 1106000
},
{
"epoch": 9.07,
"learning_rate": 6.394811685215933e-06,
"loss": 3.0316,
"step": 1106500
},
{
"epoch": 9.07,
"learning_rate": 6.393167732496014e-06,
"loss": 3.0257,
"step": 1107000
},
{
"epoch": 9.07,
"learning_rate": 6.3915237797760945e-06,
"loss": 3.0205,
"step": 1107500
},
{
"epoch": 9.08,
"learning_rate": 6.389879827056175e-06,
"loss": 3.0294,
"step": 1108000
},
{
"epoch": 9.08,
"learning_rate": 6.388235874336255e-06,
"loss": 3.0295,
"step": 1108500
},
{
"epoch": 9.09,
"learning_rate": 6.386591921616335e-06,
"loss": 3.0257,
"step": 1109000
},
{
"epoch": 9.09,
"learning_rate": 6.384947968896416e-06,
"loss": 3.0275,
"step": 1109500
},
{
"epoch": 9.09,
"learning_rate": 6.383304016176495e-06,
"loss": 3.0284,
"step": 1110000
},
{
"epoch": 9.09,
"eval_accuracy": 0.48608889348818574,
"eval_loss": 2.885928153991699,
"eval_runtime": 407.7381,
"eval_samples_per_second": 756.26,
"eval_steps_per_second": 15.758,
"step": 1110000
},
{
"epoch": 9.1,
"learning_rate": 6.381660063456575e-06,
"loss": 3.0216,
"step": 1110500
},
{
"epoch": 9.1,
"learning_rate": 6.380016110736655e-06,
"loss": 3.0199,
"step": 1111000
},
{
"epoch": 9.11,
"learning_rate": 6.3783721580167355e-06,
"loss": 3.0232,
"step": 1111500
},
{
"epoch": 9.11,
"learning_rate": 6.3767282052968164e-06,
"loss": 3.0248,
"step": 1112000
},
{
"epoch": 9.11,
"learning_rate": 6.3750842525768966e-06,
"loss": 3.0276,
"step": 1112500
},
{
"epoch": 9.12,
"learning_rate": 6.373440299856977e-06,
"loss": 3.0284,
"step": 1113000
},
{
"epoch": 9.12,
"learning_rate": 6.371796347137057e-06,
"loss": 3.0302,
"step": 1113500
},
{
"epoch": 9.13,
"learning_rate": 6.370152394417137e-06,
"loss": 3.0274,
"step": 1114000
},
{
"epoch": 9.13,
"learning_rate": 6.368508441697218e-06,
"loss": 3.0335,
"step": 1114500
},
{
"epoch": 9.14,
"learning_rate": 6.366864488977298e-06,
"loss": 3.0209,
"step": 1115000
},
{
"epoch": 9.14,
"learning_rate": 6.365220536257378e-06,
"loss": 3.0297,
"step": 1115500
},
{
"epoch": 9.14,
"learning_rate": 6.3635765835374575e-06,
"loss": 3.0267,
"step": 1116000
},
{
"epoch": 9.15,
"learning_rate": 6.361932630817538e-06,
"loss": 3.0261,
"step": 1116500
},
{
"epoch": 9.15,
"learning_rate": 6.360288678097618e-06,
"loss": 3.0273,
"step": 1117000
},
{
"epoch": 9.16,
"learning_rate": 6.358644725377699e-06,
"loss": 3.0163,
"step": 1117500
},
{
"epoch": 9.16,
"learning_rate": 6.357000772657779e-06,
"loss": 3.0258,
"step": 1118000
},
{
"epoch": 9.16,
"learning_rate": 6.355356819937859e-06,
"loss": 3.0321,
"step": 1118500
},
{
"epoch": 9.17,
"learning_rate": 6.353712867217939e-06,
"loss": 3.0181,
"step": 1119000
},
{
"epoch": 9.17,
"learning_rate": 6.35206891449802e-06,
"loss": 3.0194,
"step": 1119500
},
{
"epoch": 9.18,
"learning_rate": 6.3504249617781e-06,
"loss": 3.0279,
"step": 1120000
},
{
"epoch": 9.18,
"learning_rate": 6.34878100905818e-06,
"loss": 3.0236,
"step": 1120500
},
{
"epoch": 9.18,
"learning_rate": 6.34713705633826e-06,
"loss": 3.0279,
"step": 1121000
},
{
"epoch": 9.19,
"learning_rate": 6.3454931036183405e-06,
"loss": 3.026,
"step": 1121500
},
{
"epoch": 9.19,
"learning_rate": 6.34384915089842e-06,
"loss": 3.0217,
"step": 1122000
},
{
"epoch": 9.2,
"learning_rate": 6.342205198178501e-06,
"loss": 3.0299,
"step": 1122500
},
{
"epoch": 9.2,
"learning_rate": 6.340561245458581e-06,
"loss": 3.0203,
"step": 1123000
},
{
"epoch": 9.2,
"learning_rate": 6.338917292738661e-06,
"loss": 3.0224,
"step": 1123500
},
{
"epoch": 9.21,
"learning_rate": 6.337273340018741e-06,
"loss": 3.0184,
"step": 1124000
},
{
"epoch": 9.21,
"learning_rate": 6.335629387298821e-06,
"loss": 3.0215,
"step": 1124500
},
{
"epoch": 9.22,
"learning_rate": 6.333985434578902e-06,
"loss": 3.0261,
"step": 1125000
},
{
"epoch": 9.22,
"learning_rate": 6.332341481858982e-06,
"loss": 3.0242,
"step": 1125500
},
{
"epoch": 9.23,
"learning_rate": 6.3306975291390625e-06,
"loss": 3.0246,
"step": 1126000
},
{
"epoch": 9.23,
"learning_rate": 6.329053576419143e-06,
"loss": 3.0327,
"step": 1126500
},
{
"epoch": 9.23,
"learning_rate": 6.327409623699224e-06,
"loss": 3.0222,
"step": 1127000
},
{
"epoch": 9.24,
"learning_rate": 6.325765670979304e-06,
"loss": 3.0257,
"step": 1127500
},
{
"epoch": 9.24,
"learning_rate": 6.324121718259384e-06,
"loss": 3.0228,
"step": 1128000
},
{
"epoch": 9.25,
"learning_rate": 6.322477765539463e-06,
"loss": 3.0246,
"step": 1128500
},
{
"epoch": 9.25,
"learning_rate": 6.320833812819543e-06,
"loss": 3.0205,
"step": 1129000
},
{
"epoch": 9.25,
"learning_rate": 6.319189860099623e-06,
"loss": 3.0198,
"step": 1129500
},
{
"epoch": 9.26,
"learning_rate": 6.317545907379704e-06,
"loss": 3.0227,
"step": 1130000
},
{
"epoch": 9.26,
"learning_rate": 6.3159019546597845e-06,
"loss": 3.0205,
"step": 1130500
},
{
"epoch": 9.27,
"learning_rate": 6.314258001939865e-06,
"loss": 3.0218,
"step": 1131000
},
{
"epoch": 9.27,
"learning_rate": 6.312614049219945e-06,
"loss": 3.0221,
"step": 1131500
},
{
"epoch": 9.27,
"learning_rate": 6.310970096500026e-06,
"loss": 3.017,
"step": 1132000
},
{
"epoch": 9.28,
"learning_rate": 6.309326143780106e-06,
"loss": 3.0187,
"step": 1132500
},
{
"epoch": 9.28,
"learning_rate": 6.307682191060186e-06,
"loss": 3.0173,
"step": 1133000
},
{
"epoch": 9.29,
"learning_rate": 6.306038238340266e-06,
"loss": 3.0229,
"step": 1133500
},
{
"epoch": 9.29,
"learning_rate": 6.304394285620346e-06,
"loss": 3.0254,
"step": 1134000
},
{
"epoch": 9.29,
"learning_rate": 6.3027503329004255e-06,
"loss": 3.0292,
"step": 1134500
},
{
"epoch": 9.3,
"learning_rate": 6.3011063801805065e-06,
"loss": 3.0172,
"step": 1135000
},
{
"epoch": 9.3,
"learning_rate": 6.299462427460587e-06,
"loss": 3.0233,
"step": 1135500
},
{
"epoch": 9.31,
"learning_rate": 6.297818474740667e-06,
"loss": 3.0245,
"step": 1136000
},
{
"epoch": 9.31,
"learning_rate": 6.296174522020747e-06,
"loss": 3.0165,
"step": 1136500
},
{
"epoch": 9.32,
"learning_rate": 6.294530569300827e-06,
"loss": 3.0161,
"step": 1137000
},
{
"epoch": 9.32,
"learning_rate": 6.292886616580908e-06,
"loss": 3.017,
"step": 1137500
},
{
"epoch": 9.32,
"learning_rate": 6.291242663860988e-06,
"loss": 3.0244,
"step": 1138000
},
{
"epoch": 9.33,
"learning_rate": 6.289598711141068e-06,
"loss": 3.0189,
"step": 1138500
},
{
"epoch": 9.33,
"learning_rate": 6.287954758421148e-06,
"loss": 3.0196,
"step": 1139000
},
{
"epoch": 9.34,
"learning_rate": 6.286310805701229e-06,
"loss": 3.0169,
"step": 1139500
},
{
"epoch": 9.34,
"learning_rate": 6.2846668529813095e-06,
"loss": 3.0224,
"step": 1140000
},
{
"epoch": 9.34,
"eval_accuracy": 0.4867489885373119,
"eval_loss": 2.881913185119629,
"eval_runtime": 406.5724,
"eval_samples_per_second": 758.428,
"eval_steps_per_second": 15.803,
"step": 1140000
},
{
"epoch": 9.34,
"learning_rate": 6.283022900261389e-06,
"loss": 3.0198,
"step": 1140500
},
{
"epoch": 9.35,
"learning_rate": 6.281378947541469e-06,
"loss": 3.0248,
"step": 1141000
},
{
"epoch": 9.35,
"learning_rate": 6.279734994821549e-06,
"loss": 3.0256,
"step": 1141500
},
{
"epoch": 9.36,
"learning_rate": 6.278091042101629e-06,
"loss": 3.0153,
"step": 1142000
},
{
"epoch": 9.36,
"learning_rate": 6.27644708938171e-06,
"loss": 3.0159,
"step": 1142500
},
{
"epoch": 9.36,
"learning_rate": 6.27480313666179e-06,
"loss": 3.0145,
"step": 1143000
},
{
"epoch": 9.37,
"learning_rate": 6.27315918394187e-06,
"loss": 3.0236,
"step": 1143500
},
{
"epoch": 9.37,
"learning_rate": 6.2715152312219505e-06,
"loss": 3.0184,
"step": 1144000
},
{
"epoch": 9.38,
"learning_rate": 6.2698712785020314e-06,
"loss": 3.0168,
"step": 1144500
},
{
"epoch": 9.38,
"learning_rate": 6.2682273257821116e-06,
"loss": 3.0201,
"step": 1145000
},
{
"epoch": 9.38,
"learning_rate": 6.266583373062192e-06,
"loss": 3.0186,
"step": 1145500
},
{
"epoch": 9.39,
"learning_rate": 6.264939420342272e-06,
"loss": 3.0217,
"step": 1146000
},
{
"epoch": 9.39,
"learning_rate": 6.263295467622351e-06,
"loss": 3.0154,
"step": 1146500
},
{
"epoch": 9.4,
"learning_rate": 6.261651514902431e-06,
"loss": 3.0197,
"step": 1147000
},
{
"epoch": 9.4,
"learning_rate": 6.260007562182512e-06,
"loss": 3.0149,
"step": 1147500
},
{
"epoch": 9.41,
"learning_rate": 6.258363609462592e-06,
"loss": 3.0203,
"step": 1148000
},
{
"epoch": 9.41,
"learning_rate": 6.2567196567426725e-06,
"loss": 3.0206,
"step": 1148500
},
{
"epoch": 9.41,
"learning_rate": 6.255075704022753e-06,
"loss": 3.0151,
"step": 1149000
},
{
"epoch": 9.42,
"learning_rate": 6.253431751302833e-06,
"loss": 3.0192,
"step": 1149500
},
{
"epoch": 9.42,
"learning_rate": 6.251787798582914e-06,
"loss": 3.0161,
"step": 1150000
},
{
"epoch": 9.43,
"learning_rate": 6.250143845862994e-06,
"loss": 3.0224,
"step": 1150500
},
{
"epoch": 9.43,
"learning_rate": 6.248499893143074e-06,
"loss": 3.02,
"step": 1151000
},
{
"epoch": 9.43,
"learning_rate": 6.246855940423154e-06,
"loss": 3.0231,
"step": 1151500
},
{
"epoch": 9.44,
"learning_rate": 6.245211987703235e-06,
"loss": 3.0199,
"step": 1152000
},
{
"epoch": 9.44,
"learning_rate": 6.2435680349833135e-06,
"loss": 3.0159,
"step": 1152500
},
{
"epoch": 9.45,
"learning_rate": 6.2419240822633944e-06,
"loss": 3.0192,
"step": 1153000
},
{
"epoch": 9.45,
"learning_rate": 6.2402801295434746e-06,
"loss": 3.0151,
"step": 1153500
},
{
"epoch": 9.45,
"learning_rate": 6.238636176823555e-06,
"loss": 3.0198,
"step": 1154000
},
{
"epoch": 9.46,
"learning_rate": 6.236992224103635e-06,
"loss": 3.0142,
"step": 1154500
},
{
"epoch": 9.46,
"learning_rate": 6.235348271383716e-06,
"loss": 3.0177,
"step": 1155000
},
{
"epoch": 9.47,
"learning_rate": 6.233704318663796e-06,
"loss": 3.0148,
"step": 1155500
},
{
"epoch": 9.47,
"learning_rate": 6.232060365943876e-06,
"loss": 3.0101,
"step": 1156000
},
{
"epoch": 9.48,
"learning_rate": 6.230416413223956e-06,
"loss": 3.0113,
"step": 1156500
},
{
"epoch": 9.48,
"learning_rate": 6.228772460504036e-06,
"loss": 3.0084,
"step": 1157000
},
{
"epoch": 9.48,
"learning_rate": 6.227128507784117e-06,
"loss": 3.0133,
"step": 1157500
},
{
"epoch": 9.49,
"learning_rate": 6.225484555064197e-06,
"loss": 3.0143,
"step": 1158000
},
{
"epoch": 9.49,
"learning_rate": 6.223840602344277e-06,
"loss": 3.0167,
"step": 1158500
},
{
"epoch": 9.5,
"learning_rate": 6.222196649624357e-06,
"loss": 3.0136,
"step": 1159000
},
{
"epoch": 9.5,
"learning_rate": 6.220552696904437e-06,
"loss": 3.012,
"step": 1159500
},
{
"epoch": 9.5,
"learning_rate": 6.218908744184518e-06,
"loss": 3.012,
"step": 1160000
},
{
"epoch": 9.51,
"learning_rate": 6.217264791464598e-06,
"loss": 3.0111,
"step": 1160500
},
{
"epoch": 9.51,
"learning_rate": 6.215620838744678e-06,
"loss": 3.0147,
"step": 1161000
},
{
"epoch": 9.52,
"learning_rate": 6.213976886024758e-06,
"loss": 3.015,
"step": 1161500
},
{
"epoch": 9.52,
"learning_rate": 6.212332933304838e-06,
"loss": 3.017,
"step": 1162000
},
{
"epoch": 9.52,
"learning_rate": 6.210688980584919e-06,
"loss": 3.0108,
"step": 1162500
},
{
"epoch": 9.53,
"learning_rate": 6.2090450278649995e-06,
"loss": 3.0125,
"step": 1163000
},
{
"epoch": 9.53,
"learning_rate": 6.20740107514508e-06,
"loss": 3.0126,
"step": 1163500
},
{
"epoch": 9.54,
"learning_rate": 6.20575712242516e-06,
"loss": 3.0209,
"step": 1164000
},
{
"epoch": 9.54,
"learning_rate": 6.204113169705239e-06,
"loss": 3.0089,
"step": 1164500
},
{
"epoch": 9.54,
"learning_rate": 6.202469216985319e-06,
"loss": 3.0135,
"step": 1165000
},
{
"epoch": 9.55,
"learning_rate": 6.2008252642654e-06,
"loss": 3.011,
"step": 1165500
},
{
"epoch": 9.55,
"learning_rate": 6.19918131154548e-06,
"loss": 3.0125,
"step": 1166000
},
{
"epoch": 9.56,
"learning_rate": 6.19753735882556e-06,
"loss": 3.0127,
"step": 1166500
},
{
"epoch": 9.56,
"learning_rate": 6.1958934061056405e-06,
"loss": 3.0144,
"step": 1167000
},
{
"epoch": 9.57,
"learning_rate": 6.1942494533857215e-06,
"loss": 3.0174,
"step": 1167500
},
{
"epoch": 9.57,
"learning_rate": 6.192605500665802e-06,
"loss": 3.0088,
"step": 1168000
},
{
"epoch": 9.57,
"learning_rate": 6.190961547945882e-06,
"loss": 3.0135,
"step": 1168500
},
{
"epoch": 9.58,
"learning_rate": 6.189317595225962e-06,
"loss": 3.0141,
"step": 1169000
},
{
"epoch": 9.58,
"learning_rate": 6.187673642506042e-06,
"loss": 3.0075,
"step": 1169500
},
{
"epoch": 9.59,
"learning_rate": 6.186029689786123e-06,
"loss": 3.019,
"step": 1170000
},
{
"epoch": 9.59,
"eval_accuracy": 0.4878472453037176,
"eval_loss": 2.873086929321289,
"eval_runtime": 405.404,
"eval_samples_per_second": 760.614,
"eval_steps_per_second": 15.848,
"step": 1170000
},
{
"epoch": 9.59,
"learning_rate": 6.184385737066202e-06,
"loss": 3.0057,
"step": 1170500
},
{
"epoch": 9.59,
"learning_rate": 6.182741784346282e-06,
"loss": 3.0136,
"step": 1171000
},
{
"epoch": 9.6,
"learning_rate": 6.1810978316263625e-06,
"loss": 3.0157,
"step": 1171500
},
{
"epoch": 9.6,
"learning_rate": 6.179453878906443e-06,
"loss": 3.0087,
"step": 1172000
},
{
"epoch": 9.61,
"learning_rate": 6.177809926186524e-06,
"loss": 3.0223,
"step": 1172500
},
{
"epoch": 9.61,
"learning_rate": 6.176165973466604e-06,
"loss": 3.0064,
"step": 1173000
},
{
"epoch": 9.61,
"learning_rate": 6.174522020746684e-06,
"loss": 3.0084,
"step": 1173500
},
{
"epoch": 9.62,
"learning_rate": 6.172878068026764e-06,
"loss": 3.0118,
"step": 1174000
},
{
"epoch": 9.62,
"learning_rate": 6.171234115306844e-06,
"loss": 3.0115,
"step": 1174500
},
{
"epoch": 9.63,
"learning_rate": 6.169590162586925e-06,
"loss": 3.0137,
"step": 1175000
},
{
"epoch": 9.63,
"learning_rate": 6.167946209867005e-06,
"loss": 3.0106,
"step": 1175500
},
{
"epoch": 9.63,
"learning_rate": 6.166302257147085e-06,
"loss": 3.0104,
"step": 1176000
},
{
"epoch": 9.64,
"learning_rate": 6.1646583044271655e-06,
"loss": 3.0126,
"step": 1176500
},
{
"epoch": 9.64,
"learning_rate": 6.163014351707245e-06,
"loss": 3.0086,
"step": 1177000
},
{
"epoch": 9.65,
"learning_rate": 6.161370398987325e-06,
"loss": 3.0148,
"step": 1177500
},
{
"epoch": 9.65,
"learning_rate": 6.159726446267406e-06,
"loss": 3.0146,
"step": 1178000
},
{
"epoch": 9.66,
"learning_rate": 6.158082493547486e-06,
"loss": 3.0155,
"step": 1178500
},
{
"epoch": 9.66,
"learning_rate": 6.156438540827566e-06,
"loss": 3.012,
"step": 1179000
},
{
"epoch": 9.66,
"learning_rate": 6.154794588107646e-06,
"loss": 3.0118,
"step": 1179500
},
{
"epoch": 9.67,
"learning_rate": 6.153150635387727e-06,
"loss": 3.0086,
"step": 1180000
},
{
"epoch": 9.67,
"learning_rate": 6.151506682667807e-06,
"loss": 3.0139,
"step": 1180500
},
{
"epoch": 9.68,
"learning_rate": 6.1498627299478875e-06,
"loss": 3.0105,
"step": 1181000
},
{
"epoch": 9.68,
"learning_rate": 6.148218777227968e-06,
"loss": 3.012,
"step": 1181500
},
{
"epoch": 9.68,
"learning_rate": 6.146574824508048e-06,
"loss": 3.0096,
"step": 1182000
},
{
"epoch": 9.69,
"learning_rate": 6.144930871788129e-06,
"loss": 3.0109,
"step": 1182500
},
{
"epoch": 9.69,
"learning_rate": 6.143286919068208e-06,
"loss": 3.0119,
"step": 1183000
},
{
"epoch": 9.7,
"learning_rate": 6.141642966348288e-06,
"loss": 3.0086,
"step": 1183500
},
{
"epoch": 9.7,
"learning_rate": 6.139999013628368e-06,
"loss": 3.014,
"step": 1184000
},
{
"epoch": 9.7,
"learning_rate": 6.138355060908448e-06,
"loss": 3.0107,
"step": 1184500
},
{
"epoch": 9.71,
"learning_rate": 6.1367111081885285e-06,
"loss": 3.0039,
"step": 1185000
},
{
"epoch": 9.71,
"learning_rate": 6.1350671554686094e-06,
"loss": 3.0066,
"step": 1185500
},
{
"epoch": 9.72,
"learning_rate": 6.1334232027486896e-06,
"loss": 3.0103,
"step": 1186000
},
{
"epoch": 9.72,
"learning_rate": 6.13177925002877e-06,
"loss": 3.0094,
"step": 1186500
},
{
"epoch": 9.72,
"learning_rate": 6.13013529730885e-06,
"loss": 3.011,
"step": 1187000
},
{
"epoch": 9.73,
"learning_rate": 6.128491344588931e-06,
"loss": 3.0094,
"step": 1187500
},
{
"epoch": 9.73,
"learning_rate": 6.126847391869011e-06,
"loss": 3.0036,
"step": 1188000
},
{
"epoch": 9.74,
"learning_rate": 6.125203439149091e-06,
"loss": 3.0096,
"step": 1188500
},
{
"epoch": 9.74,
"learning_rate": 6.12355948642917e-06,
"loss": 3.0105,
"step": 1189000
},
{
"epoch": 9.75,
"learning_rate": 6.1219155337092504e-06,
"loss": 3.008,
"step": 1189500
},
{
"epoch": 9.75,
"learning_rate": 6.1202715809893306e-06,
"loss": 3.0032,
"step": 1190000
},
{
"epoch": 9.75,
"learning_rate": 6.1186276282694115e-06,
"loss": 3.0091,
"step": 1190500
},
{
"epoch": 9.76,
"learning_rate": 6.116983675549492e-06,
"loss": 3.0035,
"step": 1191000
},
{
"epoch": 9.76,
"learning_rate": 6.115339722829572e-06,
"loss": 3.0091,
"step": 1191500
},
{
"epoch": 9.77,
"learning_rate": 6.113695770109652e-06,
"loss": 3.0013,
"step": 1192000
},
{
"epoch": 9.77,
"learning_rate": 6.112051817389733e-06,
"loss": 3.0043,
"step": 1192500
},
{
"epoch": 9.77,
"learning_rate": 6.110407864669813e-06,
"loss": 3.0081,
"step": 1193000
},
{
"epoch": 9.78,
"learning_rate": 6.108763911949893e-06,
"loss": 3.0055,
"step": 1193500
},
{
"epoch": 9.78,
"learning_rate": 6.107119959229973e-06,
"loss": 3.011,
"step": 1194000
},
{
"epoch": 9.79,
"learning_rate": 6.105476006510053e-06,
"loss": 3.0094,
"step": 1194500
},
{
"epoch": 9.79,
"learning_rate": 6.103832053790133e-06,
"loss": 3.0133,
"step": 1195000
},
{
"epoch": 9.79,
"learning_rate": 6.102188101070214e-06,
"loss": 3.0032,
"step": 1195500
},
{
"epoch": 9.8,
"learning_rate": 6.100544148350294e-06,
"loss": 3.0075,
"step": 1196000
},
{
"epoch": 9.8,
"learning_rate": 6.098900195630374e-06,
"loss": 3.0091,
"step": 1196500
},
{
"epoch": 9.81,
"learning_rate": 6.097256242910454e-06,
"loss": 3.011,
"step": 1197000
},
{
"epoch": 9.81,
"learning_rate": 6.095612290190534e-06,
"loss": 3.0039,
"step": 1197500
},
{
"epoch": 9.82,
"learning_rate": 6.093968337470615e-06,
"loss": 3.0036,
"step": 1198000
},
{
"epoch": 9.82,
"learning_rate": 6.092324384750695e-06,
"loss": 3.0069,
"step": 1198500
},
{
"epoch": 9.82,
"learning_rate": 6.090680432030775e-06,
"loss": 3.0114,
"step": 1199000
},
{
"epoch": 9.83,
"learning_rate": 6.0890364793108555e-06,
"loss": 3.006,
"step": 1199500
},
{
"epoch": 9.83,
"learning_rate": 6.0873925265909365e-06,
"loss": 3.0094,
"step": 1200000
},
{
"epoch": 9.83,
"eval_accuracy": 0.48851002828245305,
"eval_loss": 2.8687474727630615,
"eval_runtime": 404.2357,
"eval_samples_per_second": 762.812,
"eval_steps_per_second": 15.894,
"step": 1200000
},
{
"epoch": 9.84,
"learning_rate": 6.085748573871017e-06,
"loss": 2.9995,
"step": 1200500
},
{
"epoch": 9.84,
"learning_rate": 6.084104621151096e-06,
"loss": 3.0002,
"step": 1201000
},
{
"epoch": 9.84,
"learning_rate": 6.082460668431176e-06,
"loss": 3.0047,
"step": 1201500
},
{
"epoch": 9.85,
"learning_rate": 6.080816715711256e-06,
"loss": 3.0093,
"step": 1202000
},
{
"epoch": 9.85,
"learning_rate": 6.079172762991336e-06,
"loss": 3.0018,
"step": 1202500
},
{
"epoch": 9.86,
"learning_rate": 6.077528810271417e-06,
"loss": 3.0067,
"step": 1203000
},
{
"epoch": 9.86,
"learning_rate": 6.075884857551497e-06,
"loss": 3.01,
"step": 1203500
},
{
"epoch": 9.86,
"learning_rate": 6.0742409048315775e-06,
"loss": 3.0108,
"step": 1204000
},
{
"epoch": 9.87,
"learning_rate": 6.072596952111658e-06,
"loss": 3.0089,
"step": 1204500
},
{
"epoch": 9.87,
"learning_rate": 6.070952999391739e-06,
"loss": 3.0047,
"step": 1205000
},
{
"epoch": 9.88,
"learning_rate": 6.069309046671819e-06,
"loss": 3.0057,
"step": 1205500
},
{
"epoch": 9.88,
"learning_rate": 6.067665093951899e-06,
"loss": 3.006,
"step": 1206000
},
{
"epoch": 9.88,
"learning_rate": 6.066021141231979e-06,
"loss": 3.0104,
"step": 1206500
},
{
"epoch": 9.89,
"learning_rate": 6.064377188512058e-06,
"loss": 3.0047,
"step": 1207000
},
{
"epoch": 9.89,
"learning_rate": 6.062733235792138e-06,
"loss": 3.0062,
"step": 1207500
},
{
"epoch": 9.9,
"learning_rate": 6.061089283072219e-06,
"loss": 3.0086,
"step": 1208000
},
{
"epoch": 9.9,
"learning_rate": 6.0594453303522995e-06,
"loss": 3.0032,
"step": 1208500
},
{
"epoch": 9.91,
"learning_rate": 6.05780137763238e-06,
"loss": 3.0042,
"step": 1209000
},
{
"epoch": 9.91,
"learning_rate": 6.05615742491246e-06,
"loss": 3.0064,
"step": 1209500
},
{
"epoch": 9.91,
"learning_rate": 6.05451347219254e-06,
"loss": 3.0005,
"step": 1210000
},
{
"epoch": 9.92,
"learning_rate": 6.052869519472621e-06,
"loss": 3.0043,
"step": 1210500
},
{
"epoch": 9.92,
"learning_rate": 6.051225566752701e-06,
"loss": 3.0002,
"step": 1211000
},
{
"epoch": 9.93,
"learning_rate": 6.049581614032781e-06,
"loss": 3.011,
"step": 1211500
},
{
"epoch": 9.93,
"learning_rate": 6.047937661312861e-06,
"loss": 3.0027,
"step": 1212000
},
{
"epoch": 9.93,
"learning_rate": 6.046293708592942e-06,
"loss": 3.0036,
"step": 1212500
},
{
"epoch": 9.94,
"learning_rate": 6.044649755873021e-06,
"loss": 3.0021,
"step": 1213000
},
{
"epoch": 9.94,
"learning_rate": 6.043005803153102e-06,
"loss": 2.9972,
"step": 1213500
},
{
"epoch": 9.95,
"learning_rate": 6.041361850433182e-06,
"loss": 3.0094,
"step": 1214000
},
{
"epoch": 9.95,
"learning_rate": 6.039717897713262e-06,
"loss": 3.0087,
"step": 1214500
},
{
"epoch": 9.95,
"learning_rate": 6.038073944993342e-06,
"loss": 3.0098,
"step": 1215000
},
{
"epoch": 9.96,
"learning_rate": 6.036429992273423e-06,
"loss": 3.0037,
"step": 1215500
},
{
"epoch": 9.96,
"learning_rate": 6.034786039553503e-06,
"loss": 3.0057,
"step": 1216000
},
{
"epoch": 9.97,
"learning_rate": 6.033142086833583e-06,
"loss": 3.0026,
"step": 1216500
},
{
"epoch": 9.97,
"learning_rate": 6.031498134113663e-06,
"loss": 3.0057,
"step": 1217000
},
{
"epoch": 9.97,
"learning_rate": 6.029854181393744e-06,
"loss": 3.0037,
"step": 1217500
},
{
"epoch": 9.98,
"learning_rate": 6.0282102286738244e-06,
"loss": 3.0066,
"step": 1218000
},
{
"epoch": 9.98,
"learning_rate": 6.0265662759539046e-06,
"loss": 3.0039,
"step": 1218500
},
{
"epoch": 9.99,
"learning_rate": 6.024922323233984e-06,
"loss": 3.0073,
"step": 1219000
},
{
"epoch": 9.99,
"learning_rate": 6.023278370514064e-06,
"loss": 2.9989,
"step": 1219500
},
{
"epoch": 10.0,
"learning_rate": 6.021634417794144e-06,
"loss": 3.0029,
"step": 1220000
},
{
"epoch": 10.0,
"learning_rate": 6.019990465074225e-06,
"loss": 2.9987,
"step": 1220500
},
{
"epoch": 10.0,
"learning_rate": 6.018346512354305e-06,
"loss": 3.0004,
"step": 1221000
},
{
"epoch": 10.01,
"learning_rate": 6.016702559634385e-06,
"loss": 3.006,
"step": 1221500
},
{
"epoch": 10.01,
"learning_rate": 6.0150586069144654e-06,
"loss": 3.0002,
"step": 1222000
},
{
"epoch": 10.02,
"learning_rate": 6.0134146541945456e-06,
"loss": 3.0044,
"step": 1222500
},
{
"epoch": 10.02,
"learning_rate": 6.0117707014746265e-06,
"loss": 2.9957,
"step": 1223000
},
{
"epoch": 10.02,
"learning_rate": 6.010126748754707e-06,
"loss": 3.0001,
"step": 1223500
},
{
"epoch": 10.03,
"learning_rate": 6.008482796034787e-06,
"loss": 3.0058,
"step": 1224000
},
{
"epoch": 10.03,
"learning_rate": 6.006838843314867e-06,
"loss": 2.9946,
"step": 1224500
},
{
"epoch": 10.04,
"learning_rate": 6.005194890594948e-06,
"loss": 3.0,
"step": 1225000
},
{
"epoch": 10.04,
"learning_rate": 6.003550937875026e-06,
"loss": 2.9992,
"step": 1225500
},
{
"epoch": 10.04,
"learning_rate": 6.001906985155107e-06,
"loss": 2.9963,
"step": 1226000
},
{
"epoch": 10.05,
"learning_rate": 6.0002630324351874e-06,
"loss": 2.9928,
"step": 1226500
},
{
"epoch": 10.05,
"learning_rate": 5.9986190797152676e-06,
"loss": 3.0001,
"step": 1227000
},
{
"epoch": 10.06,
"learning_rate": 5.996975126995348e-06,
"loss": 3.0003,
"step": 1227500
},
{
"epoch": 10.06,
"learning_rate": 5.995331174275429e-06,
"loss": 2.9989,
"step": 1228000
},
{
"epoch": 10.06,
"learning_rate": 5.993687221555509e-06,
"loss": 2.9957,
"step": 1228500
},
{
"epoch": 10.07,
"learning_rate": 5.992043268835589e-06,
"loss": 3.0049,
"step": 1229000
},
{
"epoch": 10.07,
"learning_rate": 5.990399316115669e-06,
"loss": 2.9962,
"step": 1229500
},
{
"epoch": 10.08,
"learning_rate": 5.988755363395749e-06,
"loss": 3.0065,
"step": 1230000
},
{
"epoch": 10.08,
"eval_accuracy": 0.48933464137984234,
"eval_loss": 2.8635189533233643,
"eval_runtime": 406.4876,
"eval_samples_per_second": 758.587,
"eval_steps_per_second": 15.806,
"step": 1230000
},
{
"epoch": 10.08,
"learning_rate": 5.98711141067583e-06,
"loss": 2.998,
"step": 1230500
},
{
"epoch": 10.09,
"learning_rate": 5.98546745795591e-06,
"loss": 3.0041,
"step": 1231000
},
{
"epoch": 10.09,
"learning_rate": 5.9838235052359895e-06,
"loss": 3.0004,
"step": 1231500
},
{
"epoch": 10.09,
"learning_rate": 5.98217955251607e-06,
"loss": 3.0011,
"step": 1232000
},
{
"epoch": 10.1,
"learning_rate": 5.98053559979615e-06,
"loss": 3.0099,
"step": 1232500
},
{
"epoch": 10.1,
"learning_rate": 5.978891647076231e-06,
"loss": 3.0018,
"step": 1233000
},
{
"epoch": 10.11,
"learning_rate": 5.977247694356311e-06,
"loss": 2.9946,
"step": 1233500
},
{
"epoch": 10.11,
"learning_rate": 5.975603741636391e-06,
"loss": 3.0006,
"step": 1234000
},
{
"epoch": 10.11,
"learning_rate": 5.973959788916471e-06,
"loss": 2.9976,
"step": 1234500
},
{
"epoch": 10.12,
"learning_rate": 5.972315836196551e-06,
"loss": 2.9933,
"step": 1235000
},
{
"epoch": 10.12,
"learning_rate": 5.970671883476632e-06,
"loss": 2.997,
"step": 1235500
},
{
"epoch": 10.13,
"learning_rate": 5.969027930756712e-06,
"loss": 3.0016,
"step": 1236000
},
{
"epoch": 10.13,
"learning_rate": 5.9673839780367925e-06,
"loss": 2.9995,
"step": 1236500
},
{
"epoch": 10.13,
"learning_rate": 5.965740025316873e-06,
"loss": 3.0035,
"step": 1237000
},
{
"epoch": 10.14,
"learning_rate": 5.964096072596952e-06,
"loss": 3.002,
"step": 1237500
},
{
"epoch": 10.14,
"learning_rate": 5.962452119877032e-06,
"loss": 2.9986,
"step": 1238000
},
{
"epoch": 10.15,
"learning_rate": 5.960808167157113e-06,
"loss": 3.001,
"step": 1238500
},
{
"epoch": 10.15,
"learning_rate": 5.959164214437193e-06,
"loss": 2.9956,
"step": 1239000
},
{
"epoch": 10.16,
"learning_rate": 5.957520261717273e-06,
"loss": 3.0005,
"step": 1239500
},
{
"epoch": 10.16,
"learning_rate": 5.955876308997353e-06,
"loss": 2.9997,
"step": 1240000
},
{
"epoch": 10.16,
"learning_rate": 5.954232356277434e-06,
"loss": 2.9939,
"step": 1240500
},
{
"epoch": 10.17,
"learning_rate": 5.9525884035575145e-06,
"loss": 2.9964,
"step": 1241000
},
{
"epoch": 10.17,
"learning_rate": 5.950944450837595e-06,
"loss": 2.9955,
"step": 1241500
},
{
"epoch": 10.18,
"learning_rate": 5.949300498117675e-06,
"loss": 3.0,
"step": 1242000
},
{
"epoch": 10.18,
"learning_rate": 5.947656545397755e-06,
"loss": 3.0017,
"step": 1242500
},
{
"epoch": 10.18,
"learning_rate": 5.946012592677836e-06,
"loss": 3.0005,
"step": 1243000
},
{
"epoch": 10.19,
"learning_rate": 5.944368639957915e-06,
"loss": 2.9954,
"step": 1243500
},
{
"epoch": 10.19,
"learning_rate": 5.942724687237995e-06,
"loss": 2.9982,
"step": 1244000
},
{
"epoch": 10.2,
"learning_rate": 5.941080734518075e-06,
"loss": 3.0029,
"step": 1244500
},
{
"epoch": 10.2,
"learning_rate": 5.9394367817981555e-06,
"loss": 2.9995,
"step": 1245000
},
{
"epoch": 10.2,
"learning_rate": 5.9377928290782365e-06,
"loss": 3.0004,
"step": 1245500
},
{
"epoch": 10.21,
"learning_rate": 5.936148876358317e-06,
"loss": 3.0033,
"step": 1246000
},
{
"epoch": 10.21,
"learning_rate": 5.934504923638397e-06,
"loss": 2.9923,
"step": 1246500
},
{
"epoch": 10.22,
"learning_rate": 5.932860970918477e-06,
"loss": 2.9955,
"step": 1247000
},
{
"epoch": 10.22,
"learning_rate": 5.931217018198557e-06,
"loss": 2.9977,
"step": 1247500
},
{
"epoch": 10.22,
"learning_rate": 5.929573065478638e-06,
"loss": 2.9962,
"step": 1248000
},
{
"epoch": 10.23,
"learning_rate": 5.927929112758718e-06,
"loss": 2.9916,
"step": 1248500
},
{
"epoch": 10.23,
"learning_rate": 5.926285160038798e-06,
"loss": 2.9935,
"step": 1249000
},
{
"epoch": 10.24,
"learning_rate": 5.9246412073188775e-06,
"loss": 2.9927,
"step": 1249500
},
{
"epoch": 10.24,
"learning_rate": 5.922997254598958e-06,
"loss": 2.9976,
"step": 1250000
},
{
"epoch": 10.25,
"learning_rate": 5.921353301879038e-06,
"loss": 2.9944,
"step": 1250500
},
{
"epoch": 10.25,
"learning_rate": 5.919709349159119e-06,
"loss": 3.0006,
"step": 1251000
},
{
"epoch": 10.25,
"learning_rate": 5.918065396439199e-06,
"loss": 2.9983,
"step": 1251500
},
{
"epoch": 10.26,
"learning_rate": 5.916421443719279e-06,
"loss": 2.9885,
"step": 1252000
},
{
"epoch": 10.26,
"learning_rate": 5.914777490999359e-06,
"loss": 2.9992,
"step": 1252500
},
{
"epoch": 10.27,
"learning_rate": 5.91313353827944e-06,
"loss": 3.0021,
"step": 1253000
},
{
"epoch": 10.27,
"learning_rate": 5.91148958555952e-06,
"loss": 2.9901,
"step": 1253500
},
{
"epoch": 10.27,
"learning_rate": 5.9098456328396e-06,
"loss": 2.9947,
"step": 1254000
},
{
"epoch": 10.28,
"learning_rate": 5.9082016801196804e-06,
"loss": 2.9951,
"step": 1254500
},
{
"epoch": 10.28,
"learning_rate": 5.9065577273997606e-06,
"loss": 2.9973,
"step": 1255000
},
{
"epoch": 10.29,
"learning_rate": 5.90491377467984e-06,
"loss": 2.9943,
"step": 1255500
},
{
"epoch": 10.29,
"learning_rate": 5.903269821959921e-06,
"loss": 2.9956,
"step": 1256000
},
{
"epoch": 10.29,
"learning_rate": 5.901625869240001e-06,
"loss": 2.9963,
"step": 1256500
},
{
"epoch": 10.3,
"learning_rate": 5.899981916520081e-06,
"loss": 2.9933,
"step": 1257000
},
{
"epoch": 10.3,
"learning_rate": 5.898337963800161e-06,
"loss": 2.9907,
"step": 1257500
},
{
"epoch": 10.31,
"learning_rate": 5.896694011080241e-06,
"loss": 2.9903,
"step": 1258000
},
{
"epoch": 10.31,
"learning_rate": 5.895050058360322e-06,
"loss": 2.9945,
"step": 1258500
},
{
"epoch": 10.31,
"learning_rate": 5.8934061056404024e-06,
"loss": 2.9894,
"step": 1259000
},
{
"epoch": 10.32,
"learning_rate": 5.8917621529204826e-06,
"loss": 2.9936,
"step": 1259500
},
{
"epoch": 10.32,
"learning_rate": 5.890118200200563e-06,
"loss": 2.9983,
"step": 1260000
},
{
"epoch": 10.32,
"eval_accuracy": 0.4899989186098096,
"eval_loss": 2.856139659881592,
"eval_runtime": 405.6967,
"eval_samples_per_second": 760.065,
"eval_steps_per_second": 15.837,
"step": 1260000
},
{
"epoch": 10.33,
"learning_rate": 5.888474247480644e-06,
"loss": 2.9992,
"step": 1260500
},
{
"epoch": 10.33,
"learning_rate": 5.886830294760724e-06,
"loss": 2.9973,
"step": 1261000
},
{
"epoch": 10.34,
"learning_rate": 5.885186342040803e-06,
"loss": 2.996,
"step": 1261500
},
{
"epoch": 10.34,
"learning_rate": 5.883542389320883e-06,
"loss": 2.9911,
"step": 1262000
},
{
"epoch": 10.34,
"learning_rate": 5.881898436600963e-06,
"loss": 2.9913,
"step": 1262500
},
{
"epoch": 10.35,
"learning_rate": 5.8802544838810434e-06,
"loss": 2.9906,
"step": 1263000
},
{
"epoch": 10.35,
"learning_rate": 5.878610531161124e-06,
"loss": 3.0007,
"step": 1263500
},
{
"epoch": 10.36,
"learning_rate": 5.8769665784412045e-06,
"loss": 2.9983,
"step": 1264000
},
{
"epoch": 10.36,
"learning_rate": 5.875322625721285e-06,
"loss": 2.9929,
"step": 1264500
},
{
"epoch": 10.36,
"learning_rate": 5.873678673001365e-06,
"loss": 2.999,
"step": 1265000
},
{
"epoch": 10.37,
"learning_rate": 5.872034720281446e-06,
"loss": 2.9903,
"step": 1265500
},
{
"epoch": 10.37,
"learning_rate": 5.870390767561526e-06,
"loss": 2.9906,
"step": 1266000
},
{
"epoch": 10.38,
"learning_rate": 5.868746814841606e-06,
"loss": 2.99,
"step": 1266500
},
{
"epoch": 10.38,
"learning_rate": 5.867102862121686e-06,
"loss": 2.9936,
"step": 1267000
},
{
"epoch": 10.38,
"learning_rate": 5.8654589094017654e-06,
"loss": 2.9955,
"step": 1267500
},
{
"epoch": 10.39,
"learning_rate": 5.8638149566818456e-06,
"loss": 2.9949,
"step": 1268000
},
{
"epoch": 10.39,
"learning_rate": 5.8621710039619265e-06,
"loss": 2.9959,
"step": 1268500
},
{
"epoch": 10.4,
"learning_rate": 5.860527051242007e-06,
"loss": 2.9974,
"step": 1269000
},
{
"epoch": 10.4,
"learning_rate": 5.858883098522087e-06,
"loss": 2.9972,
"step": 1269500
},
{
"epoch": 10.4,
"learning_rate": 5.857239145802167e-06,
"loss": 2.9952,
"step": 1270000
},
{
"epoch": 10.41,
"learning_rate": 5.855595193082247e-06,
"loss": 2.9968,
"step": 1270500
},
{
"epoch": 10.41,
"learning_rate": 5.853951240362328e-06,
"loss": 2.9893,
"step": 1271000
},
{
"epoch": 10.42,
"learning_rate": 5.852307287642408e-06,
"loss": 2.9974,
"step": 1271500
},
{
"epoch": 10.42,
"learning_rate": 5.850663334922488e-06,
"loss": 2.9862,
"step": 1272000
},
{
"epoch": 10.43,
"learning_rate": 5.849019382202568e-06,
"loss": 2.9944,
"step": 1272500
},
{
"epoch": 10.43,
"learning_rate": 5.847375429482649e-06,
"loss": 2.9933,
"step": 1273000
},
{
"epoch": 10.43,
"learning_rate": 5.845731476762729e-06,
"loss": 2.993,
"step": 1273500
},
{
"epoch": 10.44,
"learning_rate": 5.844087524042809e-06,
"loss": 2.9942,
"step": 1274000
},
{
"epoch": 10.44,
"learning_rate": 5.842443571322889e-06,
"loss": 2.9952,
"step": 1274500
},
{
"epoch": 10.45,
"learning_rate": 5.840799618602969e-06,
"loss": 2.9978,
"step": 1275000
},
{
"epoch": 10.45,
"learning_rate": 5.839155665883049e-06,
"loss": 2.9936,
"step": 1275500
},
{
"epoch": 10.45,
"learning_rate": 5.83751171316313e-06,
"loss": 2.9934,
"step": 1276000
},
{
"epoch": 10.46,
"learning_rate": 5.83586776044321e-06,
"loss": 2.9946,
"step": 1276500
},
{
"epoch": 10.46,
"learning_rate": 5.83422380772329e-06,
"loss": 2.9954,
"step": 1277000
},
{
"epoch": 10.47,
"learning_rate": 5.8325798550033705e-06,
"loss": 2.9865,
"step": 1277500
},
{
"epoch": 10.47,
"learning_rate": 5.8309359022834515e-06,
"loss": 2.9931,
"step": 1278000
},
{
"epoch": 10.47,
"learning_rate": 5.829291949563532e-06,
"loss": 2.9937,
"step": 1278500
},
{
"epoch": 10.48,
"learning_rate": 5.827647996843612e-06,
"loss": 2.9888,
"step": 1279000
},
{
"epoch": 10.48,
"learning_rate": 5.826004044123692e-06,
"loss": 2.9879,
"step": 1279500
},
{
"epoch": 10.49,
"learning_rate": 5.824360091403771e-06,
"loss": 2.9912,
"step": 1280000
},
{
"epoch": 10.49,
"learning_rate": 5.822716138683851e-06,
"loss": 3.0024,
"step": 1280500
},
{
"epoch": 10.5,
"learning_rate": 5.821072185963932e-06,
"loss": 2.9928,
"step": 1281000
},
{
"epoch": 10.5,
"learning_rate": 5.819428233244012e-06,
"loss": 2.9852,
"step": 1281500
},
{
"epoch": 10.5,
"learning_rate": 5.8177842805240925e-06,
"loss": 2.9819,
"step": 1282000
},
{
"epoch": 10.51,
"learning_rate": 5.816140327804173e-06,
"loss": 2.9923,
"step": 1282500
},
{
"epoch": 10.51,
"learning_rate": 5.814496375084253e-06,
"loss": 2.9913,
"step": 1283000
},
{
"epoch": 10.52,
"learning_rate": 5.812852422364334e-06,
"loss": 2.985,
"step": 1283500
},
{
"epoch": 10.52,
"learning_rate": 5.811208469644414e-06,
"loss": 2.9962,
"step": 1284000
},
{
"epoch": 10.52,
"learning_rate": 5.809564516924494e-06,
"loss": 2.9926,
"step": 1284500
},
{
"epoch": 10.53,
"learning_rate": 5.807920564204574e-06,
"loss": 2.9897,
"step": 1285000
},
{
"epoch": 10.53,
"learning_rate": 5.806276611484655e-06,
"loss": 2.9893,
"step": 1285500
},
{
"epoch": 10.54,
"learning_rate": 5.8046326587647335e-06,
"loss": 2.9912,
"step": 1286000
},
{
"epoch": 10.54,
"learning_rate": 5.8029887060448145e-06,
"loss": 2.9872,
"step": 1286500
},
{
"epoch": 10.54,
"learning_rate": 5.801344753324895e-06,
"loss": 2.9919,
"step": 1287000
},
{
"epoch": 10.55,
"learning_rate": 5.799700800604975e-06,
"loss": 2.991,
"step": 1287500
},
{
"epoch": 10.55,
"learning_rate": 5.798056847885055e-06,
"loss": 2.9917,
"step": 1288000
},
{
"epoch": 10.56,
"learning_rate": 5.796412895165136e-06,
"loss": 2.9909,
"step": 1288500
},
{
"epoch": 10.56,
"learning_rate": 5.794768942445216e-06,
"loss": 2.988,
"step": 1289000
},
{
"epoch": 10.56,
"learning_rate": 5.793124989725296e-06,
"loss": 2.9947,
"step": 1289500
},
{
"epoch": 10.57,
"learning_rate": 5.791481037005376e-06,
"loss": 2.9834,
"step": 1290000
},
{
"epoch": 10.57,
"eval_accuracy": 0.490716405708734,
"eval_loss": 2.852367401123047,
"eval_runtime": 409.9329,
"eval_samples_per_second": 752.211,
"eval_steps_per_second": 15.673,
"step": 1290000
},
{
"epoch": 10.57,
"learning_rate": 5.789837084285457e-06,
"loss": 2.9875,
"step": 1290500
},
{
"epoch": 10.58,
"learning_rate": 5.788193131565537e-06,
"loss": 2.99,
"step": 1291000
},
{
"epoch": 10.58,
"learning_rate": 5.7865491788456174e-06,
"loss": 2.9915,
"step": 1291500
},
{
"epoch": 10.59,
"learning_rate": 5.784905226125697e-06,
"loss": 2.989,
"step": 1292000
},
{
"epoch": 10.59,
"learning_rate": 5.783261273405777e-06,
"loss": 2.9908,
"step": 1292500
},
{
"epoch": 10.59,
"learning_rate": 5.781617320685857e-06,
"loss": 2.9896,
"step": 1293000
},
{
"epoch": 10.6,
"learning_rate": 5.779973367965938e-06,
"loss": 2.9904,
"step": 1293500
},
{
"epoch": 10.6,
"learning_rate": 5.778329415246018e-06,
"loss": 2.9917,
"step": 1294000
},
{
"epoch": 10.61,
"learning_rate": 5.776685462526098e-06,
"loss": 2.9959,
"step": 1294500
},
{
"epoch": 10.61,
"learning_rate": 5.775041509806178e-06,
"loss": 2.9892,
"step": 1295000
},
{
"epoch": 10.61,
"learning_rate": 5.7733975570862584e-06,
"loss": 2.9959,
"step": 1295500
},
{
"epoch": 10.62,
"learning_rate": 5.771753604366339e-06,
"loss": 2.9883,
"step": 1296000
},
{
"epoch": 10.62,
"learning_rate": 5.7701096516464195e-06,
"loss": 2.9849,
"step": 1296500
},
{
"epoch": 10.63,
"learning_rate": 5.7684656989265e-06,
"loss": 2.99,
"step": 1297000
},
{
"epoch": 10.63,
"learning_rate": 5.76682174620658e-06,
"loss": 2.9891,
"step": 1297500
},
{
"epoch": 10.63,
"learning_rate": 5.765177793486659e-06,
"loss": 2.9913,
"step": 1298000
},
{
"epoch": 10.64,
"learning_rate": 5.763533840766739e-06,
"loss": 2.9844,
"step": 1298500
},
{
"epoch": 10.64,
"learning_rate": 5.76188988804682e-06,
"loss": 2.988,
"step": 1299000
},
{
"epoch": 10.65,
"learning_rate": 5.7602459353269e-06,
"loss": 2.9861,
"step": 1299500
},
{
"epoch": 10.65,
"learning_rate": 5.7586019826069804e-06,
"loss": 2.9871,
"step": 1300000
},
{
"epoch": 10.65,
"learning_rate": 5.7569580298870606e-06,
"loss": 2.9841,
"step": 1300500
},
{
"epoch": 10.66,
"learning_rate": 5.7553140771671415e-06,
"loss": 2.9844,
"step": 1301000
},
{
"epoch": 10.66,
"learning_rate": 5.753670124447222e-06,
"loss": 2.9895,
"step": 1301500
},
{
"epoch": 10.67,
"learning_rate": 5.752026171727302e-06,
"loss": 2.9843,
"step": 1302000
},
{
"epoch": 10.67,
"learning_rate": 5.750382219007382e-06,
"loss": 2.9913,
"step": 1302500
},
{
"epoch": 10.68,
"learning_rate": 5.748738266287462e-06,
"loss": 2.9902,
"step": 1303000
},
{
"epoch": 10.68,
"learning_rate": 5.747094313567543e-06,
"loss": 2.9856,
"step": 1303500
},
{
"epoch": 10.68,
"learning_rate": 5.745450360847622e-06,
"loss": 2.9884,
"step": 1304000
},
{
"epoch": 10.69,
"learning_rate": 5.743806408127702e-06,
"loss": 2.9866,
"step": 1304500
},
{
"epoch": 10.69,
"learning_rate": 5.7421624554077825e-06,
"loss": 2.9871,
"step": 1305000
},
{
"epoch": 10.7,
"learning_rate": 5.740518502687863e-06,
"loss": 2.9865,
"step": 1305500
},
{
"epoch": 10.7,
"learning_rate": 5.738874549967944e-06,
"loss": 2.9871,
"step": 1306000
},
{
"epoch": 10.7,
"learning_rate": 5.737230597248024e-06,
"loss": 2.9857,
"step": 1306500
},
{
"epoch": 10.71,
"learning_rate": 5.735586644528104e-06,
"loss": 2.9791,
"step": 1307000
},
{
"epoch": 10.71,
"learning_rate": 5.733942691808184e-06,
"loss": 2.9903,
"step": 1307500
},
{
"epoch": 10.72,
"learning_rate": 5.732298739088264e-06,
"loss": 2.9838,
"step": 1308000
},
{
"epoch": 10.72,
"learning_rate": 5.730654786368345e-06,
"loss": 2.9891,
"step": 1308500
},
{
"epoch": 10.72,
"learning_rate": 5.729010833648425e-06,
"loss": 2.9863,
"step": 1309000
},
{
"epoch": 10.73,
"learning_rate": 5.727366880928505e-06,
"loss": 2.9899,
"step": 1309500
},
{
"epoch": 10.73,
"learning_rate": 5.725722928208585e-06,
"loss": 2.9845,
"step": 1310000
},
{
"epoch": 10.74,
"learning_rate": 5.724078975488665e-06,
"loss": 2.9834,
"step": 1310500
},
{
"epoch": 10.74,
"learning_rate": 5.722435022768745e-06,
"loss": 2.9827,
"step": 1311000
},
{
"epoch": 10.74,
"learning_rate": 5.720791070048826e-06,
"loss": 2.9816,
"step": 1311500
},
{
"epoch": 10.75,
"learning_rate": 5.719147117328906e-06,
"loss": 2.9773,
"step": 1312000
},
{
"epoch": 10.75,
"learning_rate": 5.717503164608986e-06,
"loss": 2.9882,
"step": 1312500
},
{
"epoch": 10.76,
"learning_rate": 5.715859211889066e-06,
"loss": 2.9822,
"step": 1313000
},
{
"epoch": 10.76,
"learning_rate": 5.714215259169147e-06,
"loss": 2.9863,
"step": 1313500
},
{
"epoch": 10.77,
"learning_rate": 5.712571306449227e-06,
"loss": 2.9802,
"step": 1314000
},
{
"epoch": 10.77,
"learning_rate": 5.7109273537293075e-06,
"loss": 2.9899,
"step": 1314500
},
{
"epoch": 10.77,
"learning_rate": 5.709283401009388e-06,
"loss": 2.9936,
"step": 1315000
},
{
"epoch": 10.78,
"learning_rate": 5.707639448289468e-06,
"loss": 2.9852,
"step": 1315500
},
{
"epoch": 10.78,
"learning_rate": 5.705995495569547e-06,
"loss": 2.9875,
"step": 1316000
},
{
"epoch": 10.79,
"learning_rate": 5.704351542849628e-06,
"loss": 2.9867,
"step": 1316500
},
{
"epoch": 10.79,
"learning_rate": 5.702707590129708e-06,
"loss": 2.982,
"step": 1317000
},
{
"epoch": 10.79,
"learning_rate": 5.701063637409788e-06,
"loss": 2.9805,
"step": 1317500
},
{
"epoch": 10.8,
"learning_rate": 5.699419684689868e-06,
"loss": 2.9825,
"step": 1318000
},
{
"epoch": 10.8,
"learning_rate": 5.697775731969949e-06,
"loss": 2.9785,
"step": 1318500
},
{
"epoch": 10.81,
"learning_rate": 5.6961317792500295e-06,
"loss": 2.9884,
"step": 1319000
},
{
"epoch": 10.81,
"learning_rate": 5.69448782653011e-06,
"loss": 2.9854,
"step": 1319500
},
{
"epoch": 10.81,
"learning_rate": 5.69284387381019e-06,
"loss": 2.9873,
"step": 1320000
},
{
"epoch": 10.81,
"eval_accuracy": 0.4910966306213036,
"eval_loss": 2.8484363555908203,
"eval_runtime": 403.7174,
"eval_samples_per_second": 763.792,
"eval_steps_per_second": 15.915,
"step": 1320000
},
{
"epoch": 10.82,
"learning_rate": 5.69119992109027e-06,
"loss": 2.9893,
"step": 1320500
},
{
"epoch": 10.82,
"learning_rate": 5.689555968370351e-06,
"loss": 2.9849,
"step": 1321000
},
{
"epoch": 10.83,
"learning_rate": 5.687912015650431e-06,
"loss": 2.9878,
"step": 1321500
},
{
"epoch": 10.83,
"learning_rate": 5.68626806293051e-06,
"loss": 2.9807,
"step": 1322000
},
{
"epoch": 10.84,
"learning_rate": 5.68462411021059e-06,
"loss": 2.9778,
"step": 1322500
},
{
"epoch": 10.84,
"learning_rate": 5.6829801574906705e-06,
"loss": 2.9867,
"step": 1323000
},
{
"epoch": 10.84,
"learning_rate": 5.681336204770751e-06,
"loss": 2.9818,
"step": 1323500
},
{
"epoch": 10.85,
"learning_rate": 5.679692252050832e-06,
"loss": 2.9833,
"step": 1324000
},
{
"epoch": 10.85,
"learning_rate": 5.678048299330912e-06,
"loss": 2.982,
"step": 1324500
},
{
"epoch": 10.86,
"learning_rate": 5.676404346610992e-06,
"loss": 2.9867,
"step": 1325000
},
{
"epoch": 10.86,
"learning_rate": 5.674760393891072e-06,
"loss": 2.9878,
"step": 1325500
},
{
"epoch": 10.86,
"learning_rate": 5.673116441171153e-06,
"loss": 2.9814,
"step": 1326000
},
{
"epoch": 10.87,
"learning_rate": 5.671472488451233e-06,
"loss": 2.9849,
"step": 1326500
},
{
"epoch": 10.87,
"learning_rate": 5.669828535731313e-06,
"loss": 2.9777,
"step": 1327000
},
{
"epoch": 10.88,
"learning_rate": 5.668184583011393e-06,
"loss": 2.9794,
"step": 1327500
},
{
"epoch": 10.88,
"learning_rate": 5.6665406302914734e-06,
"loss": 2.983,
"step": 1328000
},
{
"epoch": 10.88,
"learning_rate": 5.664896677571553e-06,
"loss": 2.9901,
"step": 1328500
},
{
"epoch": 10.89,
"learning_rate": 5.663252724851634e-06,
"loss": 2.9791,
"step": 1329000
},
{
"epoch": 10.89,
"learning_rate": 5.661608772131714e-06,
"loss": 2.9856,
"step": 1329500
},
{
"epoch": 10.9,
"learning_rate": 5.659964819411794e-06,
"loss": 2.9838,
"step": 1330000
},
{
"epoch": 10.9,
"learning_rate": 5.658320866691874e-06,
"loss": 2.9832,
"step": 1330500
},
{
"epoch": 10.9,
"learning_rate": 5.656676913971954e-06,
"loss": 2.985,
"step": 1331000
},
{
"epoch": 10.91,
"learning_rate": 5.655032961252035e-06,
"loss": 2.9829,
"step": 1331500
},
{
"epoch": 10.91,
"learning_rate": 5.653389008532115e-06,
"loss": 2.9868,
"step": 1332000
},
{
"epoch": 10.92,
"learning_rate": 5.6517450558121954e-06,
"loss": 2.9833,
"step": 1332500
},
{
"epoch": 10.92,
"learning_rate": 5.6501011030922756e-06,
"loss": 2.9865,
"step": 1333000
},
{
"epoch": 10.93,
"learning_rate": 5.6484571503723565e-06,
"loss": 2.9829,
"step": 1333500
},
{
"epoch": 10.93,
"learning_rate": 5.646813197652437e-06,
"loss": 2.9747,
"step": 1334000
},
{
"epoch": 10.93,
"learning_rate": 5.645169244932516e-06,
"loss": 2.9909,
"step": 1334500
},
{
"epoch": 10.94,
"learning_rate": 5.643525292212596e-06,
"loss": 2.9836,
"step": 1335000
},
{
"epoch": 10.94,
"learning_rate": 5.641881339492676e-06,
"loss": 2.9859,
"step": 1335500
},
{
"epoch": 10.95,
"learning_rate": 5.640237386772756e-06,
"loss": 2.9849,
"step": 1336000
},
{
"epoch": 10.95,
"learning_rate": 5.638593434052837e-06,
"loss": 2.9826,
"step": 1336500
},
{
"epoch": 10.95,
"learning_rate": 5.636949481332917e-06,
"loss": 2.9834,
"step": 1337000
},
{
"epoch": 10.96,
"learning_rate": 5.6353055286129975e-06,
"loss": 2.9836,
"step": 1337500
},
{
"epoch": 10.96,
"learning_rate": 5.633661575893078e-06,
"loss": 2.9822,
"step": 1338000
},
{
"epoch": 10.97,
"learning_rate": 5.632017623173159e-06,
"loss": 2.9908,
"step": 1338500
},
{
"epoch": 10.97,
"learning_rate": 5.630373670453239e-06,
"loss": 2.9815,
"step": 1339000
},
{
"epoch": 10.97,
"learning_rate": 5.628729717733319e-06,
"loss": 2.9858,
"step": 1339500
},
{
"epoch": 10.98,
"learning_rate": 5.627085765013399e-06,
"loss": 2.9802,
"step": 1340000
},
{
"epoch": 10.98,
"learning_rate": 5.625441812293478e-06,
"loss": 2.9778,
"step": 1340500
},
{
"epoch": 10.99,
"learning_rate": 5.623797859573558e-06,
"loss": 2.9915,
"step": 1341000
},
{
"epoch": 10.99,
"learning_rate": 5.622153906853639e-06,
"loss": 2.9819,
"step": 1341500
},
{
"epoch": 10.99,
"learning_rate": 5.6205099541337195e-06,
"loss": 2.9803,
"step": 1342000
},
{
"epoch": 11.0,
"learning_rate": 5.6188660014138e-06,
"loss": 2.9769,
"step": 1342500
},
{
"epoch": 11.0,
"learning_rate": 5.61722204869388e-06,
"loss": 2.9809,
"step": 1343000
},
{
"epoch": 11.01,
"learning_rate": 5.61557809597396e-06,
"loss": 2.981,
"step": 1343500
},
{
"epoch": 11.01,
"learning_rate": 5.613934143254041e-06,
"loss": 2.9829,
"step": 1344000
},
{
"epoch": 11.02,
"learning_rate": 5.612290190534121e-06,
"loss": 2.9789,
"step": 1344500
},
{
"epoch": 11.02,
"learning_rate": 5.610646237814201e-06,
"loss": 2.9819,
"step": 1345000
},
{
"epoch": 11.02,
"learning_rate": 5.609002285094281e-06,
"loss": 2.978,
"step": 1345500
},
{
"epoch": 11.03,
"learning_rate": 5.607358332374362e-06,
"loss": 2.987,
"step": 1346000
},
{
"epoch": 11.03,
"learning_rate": 5.6057143796544415e-06,
"loss": 2.9808,
"step": 1346500
},
{
"epoch": 11.04,
"learning_rate": 5.604070426934522e-06,
"loss": 2.9786,
"step": 1347000
},
{
"epoch": 11.04,
"learning_rate": 5.602426474214602e-06,
"loss": 2.9847,
"step": 1347500
},
{
"epoch": 11.04,
"learning_rate": 5.600782521494682e-06,
"loss": 2.9829,
"step": 1348000
},
{
"epoch": 11.05,
"learning_rate": 5.599138568774762e-06,
"loss": 2.9851,
"step": 1348500
},
{
"epoch": 11.05,
"learning_rate": 5.597494616054843e-06,
"loss": 2.9856,
"step": 1349000
},
{
"epoch": 11.06,
"learning_rate": 5.595850663334923e-06,
"loss": 2.9804,
"step": 1349500
},
{
"epoch": 11.06,
"learning_rate": 5.594206710615003e-06,
"loss": 2.978,
"step": 1350000
},
{
"epoch": 11.06,
"eval_accuracy": 0.4923695335880713,
"eval_loss": 2.841365337371826,
"eval_runtime": 412.3409,
"eval_samples_per_second": 747.818,
"eval_steps_per_second": 15.582,
"step": 1350000
},
{
"epoch": 11.06,
"learning_rate": 5.592562757895083e-06,
"loss": 2.9795,
"step": 1350500
},
{
"epoch": 11.07,
"learning_rate": 5.590918805175164e-06,
"loss": 2.979,
"step": 1351000
},
{
"epoch": 11.07,
"learning_rate": 5.5892748524552445e-06,
"loss": 2.9805,
"step": 1351500
},
{
"epoch": 11.08,
"learning_rate": 5.587630899735325e-06,
"loss": 2.9765,
"step": 1352000
},
{
"epoch": 11.08,
"learning_rate": 5.585986947015404e-06,
"loss": 2.984,
"step": 1352500
},
{
"epoch": 11.08,
"learning_rate": 5.584342994295484e-06,
"loss": 2.9842,
"step": 1353000
},
{
"epoch": 11.09,
"learning_rate": 5.582699041575564e-06,
"loss": 2.9697,
"step": 1353500
},
{
"epoch": 11.09,
"learning_rate": 5.581055088855645e-06,
"loss": 2.9756,
"step": 1354000
},
{
"epoch": 11.1,
"learning_rate": 5.579411136135725e-06,
"loss": 2.974,
"step": 1354500
},
{
"epoch": 11.1,
"learning_rate": 5.577767183415805e-06,
"loss": 2.9776,
"step": 1355000
},
{
"epoch": 11.11,
"learning_rate": 5.5761232306958855e-06,
"loss": 2.9763,
"step": 1355500
},
{
"epoch": 11.11,
"learning_rate": 5.574479277975966e-06,
"loss": 2.9764,
"step": 1356000
},
{
"epoch": 11.11,
"learning_rate": 5.572835325256047e-06,
"loss": 2.9828,
"step": 1356500
},
{
"epoch": 11.12,
"learning_rate": 5.571191372536127e-06,
"loss": 2.9758,
"step": 1357000
},
{
"epoch": 11.12,
"learning_rate": 5.569547419816207e-06,
"loss": 2.979,
"step": 1357500
},
{
"epoch": 11.13,
"learning_rate": 5.567903467096287e-06,
"loss": 2.9827,
"step": 1358000
},
{
"epoch": 11.13,
"learning_rate": 5.566259514376366e-06,
"loss": 2.9817,
"step": 1358500
},
{
"epoch": 11.13,
"learning_rate": 5.564615561656446e-06,
"loss": 2.9774,
"step": 1359000
},
{
"epoch": 11.14,
"learning_rate": 5.562971608936527e-06,
"loss": 2.9756,
"step": 1359500
},
{
"epoch": 11.14,
"learning_rate": 5.5613276562166075e-06,
"loss": 2.9787,
"step": 1360000
},
{
"epoch": 11.15,
"learning_rate": 5.559683703496688e-06,
"loss": 2.9761,
"step": 1360500
},
{
"epoch": 11.15,
"learning_rate": 5.558039750776768e-06,
"loss": 2.9789,
"step": 1361000
},
{
"epoch": 11.15,
"learning_rate": 5.556395798056849e-06,
"loss": 2.9803,
"step": 1361500
},
{
"epoch": 11.16,
"learning_rate": 5.554751845336929e-06,
"loss": 2.9784,
"step": 1362000
},
{
"epoch": 11.16,
"learning_rate": 5.553107892617009e-06,
"loss": 2.9759,
"step": 1362500
},
{
"epoch": 11.17,
"learning_rate": 5.551463939897089e-06,
"loss": 2.9823,
"step": 1363000
},
{
"epoch": 11.17,
"learning_rate": 5.54981998717717e-06,
"loss": 2.9767,
"step": 1363500
},
{
"epoch": 11.18,
"learning_rate": 5.54817603445725e-06,
"loss": 2.978,
"step": 1364000
},
{
"epoch": 11.18,
"learning_rate": 5.5465320817373294e-06,
"loss": 2.9843,
"step": 1364500
},
{
"epoch": 11.18,
"learning_rate": 5.5448881290174096e-06,
"loss": 2.9708,
"step": 1365000
},
{
"epoch": 11.19,
"learning_rate": 5.54324417629749e-06,
"loss": 2.9756,
"step": 1365500
},
{
"epoch": 11.19,
"learning_rate": 5.54160022357757e-06,
"loss": 2.973,
"step": 1366000
},
{
"epoch": 11.2,
"learning_rate": 5.539956270857651e-06,
"loss": 2.981,
"step": 1366500
},
{
"epoch": 11.2,
"learning_rate": 5.538312318137731e-06,
"loss": 2.971,
"step": 1367000
},
{
"epoch": 11.2,
"learning_rate": 5.536668365417811e-06,
"loss": 2.9819,
"step": 1367500
},
{
"epoch": 11.21,
"learning_rate": 5.535024412697891e-06,
"loss": 2.975,
"step": 1368000
},
{
"epoch": 11.21,
"learning_rate": 5.533380459977971e-06,
"loss": 2.9697,
"step": 1368500
},
{
"epoch": 11.22,
"learning_rate": 5.531736507258052e-06,
"loss": 2.9745,
"step": 1369000
},
{
"epoch": 11.22,
"learning_rate": 5.530092554538132e-06,
"loss": 2.9812,
"step": 1369500
},
{
"epoch": 11.22,
"learning_rate": 5.5284486018182125e-06,
"loss": 2.9816,
"step": 1370000
},
{
"epoch": 11.23,
"learning_rate": 5.526804649098292e-06,
"loss": 2.9712,
"step": 1370500
},
{
"epoch": 11.23,
"learning_rate": 5.525160696378372e-06,
"loss": 2.9771,
"step": 1371000
},
{
"epoch": 11.24,
"learning_rate": 5.523516743658452e-06,
"loss": 2.9747,
"step": 1371500
},
{
"epoch": 11.24,
"learning_rate": 5.521872790938533e-06,
"loss": 2.9702,
"step": 1372000
},
{
"epoch": 11.24,
"learning_rate": 5.520228838218613e-06,
"loss": 2.9757,
"step": 1372500
},
{
"epoch": 11.25,
"learning_rate": 5.518584885498693e-06,
"loss": 2.9752,
"step": 1373000
},
{
"epoch": 11.25,
"learning_rate": 5.516940932778773e-06,
"loss": 2.9699,
"step": 1373500
},
{
"epoch": 11.26,
"learning_rate": 5.515296980058854e-06,
"loss": 2.9753,
"step": 1374000
},
{
"epoch": 11.26,
"learning_rate": 5.5136530273389345e-06,
"loss": 2.9799,
"step": 1374500
},
{
"epoch": 11.27,
"learning_rate": 5.512009074619015e-06,
"loss": 2.9813,
"step": 1375000
},
{
"epoch": 11.27,
"learning_rate": 5.510365121899095e-06,
"loss": 2.9736,
"step": 1375500
},
{
"epoch": 11.27,
"learning_rate": 5.508721169179175e-06,
"loss": 2.9802,
"step": 1376000
},
{
"epoch": 11.28,
"learning_rate": 5.507077216459256e-06,
"loss": 2.9686,
"step": 1376500
},
{
"epoch": 11.28,
"learning_rate": 5.505433263739335e-06,
"loss": 2.9786,
"step": 1377000
},
{
"epoch": 11.29,
"learning_rate": 5.503789311019415e-06,
"loss": 2.9808,
"step": 1377500
},
{
"epoch": 11.29,
"learning_rate": 5.502145358299495e-06,
"loss": 2.9748,
"step": 1378000
},
{
"epoch": 11.29,
"learning_rate": 5.5005014055795755e-06,
"loss": 2.9771,
"step": 1378500
},
{
"epoch": 11.3,
"learning_rate": 5.4988574528596565e-06,
"loss": 2.9765,
"step": 1379000
},
{
"epoch": 11.3,
"learning_rate": 5.497213500139737e-06,
"loss": 2.9745,
"step": 1379500
},
{
"epoch": 11.31,
"learning_rate": 5.495569547419817e-06,
"loss": 2.9709,
"step": 1380000
},
{
"epoch": 11.31,
"eval_accuracy": 0.4926664563281789,
"eval_loss": 2.8374874591827393,
"eval_runtime": 405.084,
"eval_samples_per_second": 761.215,
"eval_steps_per_second": 15.861,
"step": 1380000
},
{
"epoch": 11.31,
"learning_rate": 5.493925594699897e-06,
"loss": 2.9771,
"step": 1380500
},
{
"epoch": 11.31,
"learning_rate": 5.492281641979977e-06,
"loss": 2.9832,
"step": 1381000
},
{
"epoch": 11.32,
"learning_rate": 5.490637689260058e-06,
"loss": 2.9741,
"step": 1381500
},
{
"epoch": 11.32,
"learning_rate": 5.488993736540138e-06,
"loss": 2.9815,
"step": 1382000
},
{
"epoch": 11.33,
"learning_rate": 5.487349783820218e-06,
"loss": 2.9758,
"step": 1382500
},
{
"epoch": 11.33,
"learning_rate": 5.4857058311002975e-06,
"loss": 2.9791,
"step": 1383000
},
{
"epoch": 11.33,
"learning_rate": 5.484061878380378e-06,
"loss": 2.9798,
"step": 1383500
},
{
"epoch": 11.34,
"learning_rate": 5.482417925660458e-06,
"loss": 2.9725,
"step": 1384000
},
{
"epoch": 11.34,
"learning_rate": 5.480773972940539e-06,
"loss": 2.975,
"step": 1384500
},
{
"epoch": 11.35,
"learning_rate": 5.479130020220619e-06,
"loss": 2.9717,
"step": 1385000
},
{
"epoch": 11.35,
"learning_rate": 5.477486067500699e-06,
"loss": 2.9701,
"step": 1385500
},
{
"epoch": 11.36,
"learning_rate": 5.475842114780779e-06,
"loss": 2.9725,
"step": 1386000
},
{
"epoch": 11.36,
"learning_rate": 5.47419816206086e-06,
"loss": 2.9766,
"step": 1386500
},
{
"epoch": 11.36,
"learning_rate": 5.47255420934094e-06,
"loss": 2.9762,
"step": 1387000
},
{
"epoch": 11.37,
"learning_rate": 5.47091025662102e-06,
"loss": 2.968,
"step": 1387500
},
{
"epoch": 11.37,
"learning_rate": 5.4692663039011005e-06,
"loss": 2.9754,
"step": 1388000
},
{
"epoch": 11.38,
"learning_rate": 5.467622351181181e-06,
"loss": 2.9663,
"step": 1388500
},
{
"epoch": 11.38,
"learning_rate": 5.46597839846126e-06,
"loss": 2.973,
"step": 1389000
},
{
"epoch": 11.38,
"learning_rate": 5.464334445741341e-06,
"loss": 2.9739,
"step": 1389500
},
{
"epoch": 11.39,
"learning_rate": 5.462690493021421e-06,
"loss": 2.9662,
"step": 1390000
},
{
"epoch": 11.39,
"learning_rate": 5.461046540301501e-06,
"loss": 2.9719,
"step": 1390500
},
{
"epoch": 11.4,
"learning_rate": 5.459402587581581e-06,
"loss": 2.9817,
"step": 1391000
},
{
"epoch": 11.4,
"learning_rate": 5.457758634861662e-06,
"loss": 2.9718,
"step": 1391500
},
{
"epoch": 11.4,
"learning_rate": 5.456114682141742e-06,
"loss": 2.9796,
"step": 1392000
},
{
"epoch": 11.41,
"learning_rate": 5.4544707294218225e-06,
"loss": 2.972,
"step": 1392500
},
{
"epoch": 11.41,
"learning_rate": 5.452826776701903e-06,
"loss": 2.9755,
"step": 1393000
},
{
"epoch": 11.42,
"learning_rate": 5.451182823981983e-06,
"loss": 2.9731,
"step": 1393500
},
{
"epoch": 11.42,
"learning_rate": 5.449538871262064e-06,
"loss": 2.9719,
"step": 1394000
},
{
"epoch": 11.42,
"learning_rate": 5.447894918542144e-06,
"loss": 2.9669,
"step": 1394500
},
{
"epoch": 11.43,
"learning_rate": 5.446250965822223e-06,
"loss": 2.974,
"step": 1395000
},
{
"epoch": 11.43,
"learning_rate": 5.444607013102303e-06,
"loss": 2.9758,
"step": 1395500
},
{
"epoch": 11.44,
"learning_rate": 5.442963060382383e-06,
"loss": 2.9695,
"step": 1396000
},
{
"epoch": 11.44,
"learning_rate": 5.4413191076624635e-06,
"loss": 2.9685,
"step": 1396500
},
{
"epoch": 11.45,
"learning_rate": 5.4396751549425444e-06,
"loss": 2.9747,
"step": 1397000
},
{
"epoch": 11.45,
"learning_rate": 5.4380312022226246e-06,
"loss": 2.973,
"step": 1397500
},
{
"epoch": 11.45,
"learning_rate": 5.436387249502705e-06,
"loss": 2.9753,
"step": 1398000
},
{
"epoch": 11.46,
"learning_rate": 5.434743296782785e-06,
"loss": 2.9692,
"step": 1398500
},
{
"epoch": 11.46,
"learning_rate": 5.433099344062866e-06,
"loss": 2.9772,
"step": 1399000
},
{
"epoch": 11.47,
"learning_rate": 5.431455391342946e-06,
"loss": 2.9683,
"step": 1399500
},
{
"epoch": 11.47,
"learning_rate": 5.429811438623026e-06,
"loss": 2.9707,
"step": 1400000
},
{
"epoch": 11.47,
"learning_rate": 5.428167485903106e-06,
"loss": 2.9687,
"step": 1400500
},
{
"epoch": 11.48,
"learning_rate": 5.4265235331831855e-06,
"loss": 2.9696,
"step": 1401000
},
{
"epoch": 11.48,
"learning_rate": 5.424879580463266e-06,
"loss": 2.9797,
"step": 1401500
},
{
"epoch": 11.49,
"learning_rate": 5.4232356277433466e-06,
"loss": 2.9733,
"step": 1402000
},
{
"epoch": 11.49,
"learning_rate": 5.421591675023427e-06,
"loss": 2.9671,
"step": 1402500
},
{
"epoch": 11.49,
"learning_rate": 5.419947722303507e-06,
"loss": 2.978,
"step": 1403000
},
{
"epoch": 11.5,
"learning_rate": 5.418303769583587e-06,
"loss": 2.9727,
"step": 1403500
},
{
"epoch": 11.5,
"learning_rate": 5.416659816863667e-06,
"loss": 2.9745,
"step": 1404000
},
{
"epoch": 11.51,
"learning_rate": 5.415015864143748e-06,
"loss": 2.9711,
"step": 1404500
},
{
"epoch": 11.51,
"learning_rate": 5.413371911423828e-06,
"loss": 2.9749,
"step": 1405000
},
{
"epoch": 11.52,
"learning_rate": 5.411727958703908e-06,
"loss": 2.972,
"step": 1405500
},
{
"epoch": 11.52,
"learning_rate": 5.410084005983988e-06,
"loss": 2.9717,
"step": 1406000
},
{
"epoch": 11.52,
"learning_rate": 5.408440053264069e-06,
"loss": 2.9745,
"step": 1406500
},
{
"epoch": 11.53,
"learning_rate": 5.406796100544149e-06,
"loss": 2.9727,
"step": 1407000
},
{
"epoch": 11.53,
"learning_rate": 5.405152147824229e-06,
"loss": 2.9724,
"step": 1407500
},
{
"epoch": 11.54,
"learning_rate": 5.403508195104309e-06,
"loss": 2.9654,
"step": 1408000
},
{
"epoch": 11.54,
"learning_rate": 5.401864242384389e-06,
"loss": 2.9749,
"step": 1408500
},
{
"epoch": 11.54,
"learning_rate": 5.400220289664469e-06,
"loss": 2.971,
"step": 1409000
},
{
"epoch": 11.55,
"learning_rate": 5.39857633694455e-06,
"loss": 2.9723,
"step": 1409500
},
{
"epoch": 11.55,
"learning_rate": 5.39693238422463e-06,
"loss": 2.9695,
"step": 1410000
},
{
"epoch": 11.55,
"eval_accuracy": 0.49317043725147325,
"eval_loss": 2.835310459136963,
"eval_runtime": 406.1684,
"eval_samples_per_second": 759.183,
"eval_steps_per_second": 15.819,
"step": 1410000
},
{
"epoch": 11.56,
"learning_rate": 5.39528843150471e-06,
"loss": 2.9687,
"step": 1410500
},
{
"epoch": 11.56,
"learning_rate": 5.3936444787847905e-06,
"loss": 2.9696,
"step": 1411000
},
{
"epoch": 11.56,
"learning_rate": 5.3920005260648715e-06,
"loss": 2.9737,
"step": 1411500
},
{
"epoch": 11.57,
"learning_rate": 5.390356573344952e-06,
"loss": 2.9715,
"step": 1412000
},
{
"epoch": 11.57,
"learning_rate": 5.388712620625032e-06,
"loss": 2.971,
"step": 1412500
},
{
"epoch": 11.58,
"learning_rate": 5.387068667905111e-06,
"loss": 2.9643,
"step": 1413000
},
{
"epoch": 11.58,
"learning_rate": 5.385424715185191e-06,
"loss": 2.9683,
"step": 1413500
},
{
"epoch": 11.58,
"learning_rate": 5.383780762465271e-06,
"loss": 2.9697,
"step": 1414000
},
{
"epoch": 11.59,
"learning_rate": 5.382136809745352e-06,
"loss": 2.9724,
"step": 1414500
},
{
"epoch": 11.59,
"learning_rate": 5.380492857025432e-06,
"loss": 2.9723,
"step": 1415000
},
{
"epoch": 11.6,
"learning_rate": 5.3788489043055125e-06,
"loss": 2.9665,
"step": 1415500
},
{
"epoch": 11.6,
"learning_rate": 5.377204951585593e-06,
"loss": 2.9792,
"step": 1416000
},
{
"epoch": 11.61,
"learning_rate": 5.375560998865673e-06,
"loss": 2.9728,
"step": 1416500
},
{
"epoch": 11.61,
"learning_rate": 5.373917046145754e-06,
"loss": 2.9705,
"step": 1417000
},
{
"epoch": 11.61,
"learning_rate": 5.372273093425834e-06,
"loss": 2.9694,
"step": 1417500
},
{
"epoch": 11.62,
"learning_rate": 5.370629140705914e-06,
"loss": 2.9662,
"step": 1418000
},
{
"epoch": 11.62,
"learning_rate": 5.368985187985994e-06,
"loss": 2.9696,
"step": 1418500
},
{
"epoch": 11.63,
"learning_rate": 5.367341235266073e-06,
"loss": 2.9668,
"step": 1419000
},
{
"epoch": 11.63,
"learning_rate": 5.365697282546154e-06,
"loss": 2.9754,
"step": 1419500
},
{
"epoch": 11.63,
"learning_rate": 5.3640533298262345e-06,
"loss": 2.9687,
"step": 1420000
},
{
"epoch": 11.64,
"learning_rate": 5.362409377106315e-06,
"loss": 2.9657,
"step": 1420500
},
{
"epoch": 11.64,
"learning_rate": 5.360765424386395e-06,
"loss": 2.9687,
"step": 1421000
},
{
"epoch": 11.65,
"learning_rate": 5.359121471666475e-06,
"loss": 2.963,
"step": 1421500
},
{
"epoch": 11.65,
"learning_rate": 5.357477518946556e-06,
"loss": 2.9688,
"step": 1422000
},
{
"epoch": 11.65,
"learning_rate": 5.355833566226636e-06,
"loss": 2.9654,
"step": 1422500
},
{
"epoch": 11.66,
"learning_rate": 5.354189613506716e-06,
"loss": 2.973,
"step": 1423000
},
{
"epoch": 11.66,
"learning_rate": 5.352545660786796e-06,
"loss": 2.9717,
"step": 1423500
},
{
"epoch": 11.67,
"learning_rate": 5.350901708066877e-06,
"loss": 2.9654,
"step": 1424000
},
{
"epoch": 11.67,
"learning_rate": 5.349257755346957e-06,
"loss": 2.96,
"step": 1424500
},
{
"epoch": 11.67,
"learning_rate": 5.347613802627037e-06,
"loss": 2.9653,
"step": 1425000
},
{
"epoch": 11.68,
"learning_rate": 5.345969849907117e-06,
"loss": 2.9667,
"step": 1425500
},
{
"epoch": 11.68,
"learning_rate": 5.344325897187197e-06,
"loss": 2.9697,
"step": 1426000
},
{
"epoch": 11.69,
"learning_rate": 5.342681944467277e-06,
"loss": 2.9725,
"step": 1426500
},
{
"epoch": 11.69,
"learning_rate": 5.341037991747358e-06,
"loss": 2.9673,
"step": 1427000
},
{
"epoch": 11.7,
"learning_rate": 5.339394039027438e-06,
"loss": 2.9679,
"step": 1427500
},
{
"epoch": 11.7,
"learning_rate": 5.337750086307518e-06,
"loss": 2.9735,
"step": 1428000
},
{
"epoch": 11.7,
"learning_rate": 5.336106133587598e-06,
"loss": 2.9692,
"step": 1428500
},
{
"epoch": 11.71,
"learning_rate": 5.3344621808676785e-06,
"loss": 2.9707,
"step": 1429000
},
{
"epoch": 11.71,
"learning_rate": 5.3328182281477594e-06,
"loss": 2.9742,
"step": 1429500
},
{
"epoch": 11.72,
"learning_rate": 5.3311742754278396e-06,
"loss": 2.9593,
"step": 1430000
},
{
"epoch": 11.72,
"learning_rate": 5.32953032270792e-06,
"loss": 2.9698,
"step": 1430500
},
{
"epoch": 11.72,
"learning_rate": 5.327886369988e-06,
"loss": 2.9685,
"step": 1431000
},
{
"epoch": 11.73,
"learning_rate": 5.326242417268079e-06,
"loss": 2.9763,
"step": 1431500
},
{
"epoch": 11.73,
"learning_rate": 5.324598464548159e-06,
"loss": 2.9721,
"step": 1432000
},
{
"epoch": 11.74,
"learning_rate": 5.32295451182824e-06,
"loss": 2.9659,
"step": 1432500
},
{
"epoch": 11.74,
"learning_rate": 5.32131055910832e-06,
"loss": 2.9669,
"step": 1433000
},
{
"epoch": 11.74,
"learning_rate": 5.3196666063884005e-06,
"loss": 2.9627,
"step": 1433500
},
{
"epoch": 11.75,
"learning_rate": 5.318022653668481e-06,
"loss": 2.9696,
"step": 1434000
},
{
"epoch": 11.75,
"learning_rate": 5.3163787009485616e-06,
"loss": 2.972,
"step": 1434500
},
{
"epoch": 11.76,
"learning_rate": 5.314734748228642e-06,
"loss": 2.9714,
"step": 1435000
},
{
"epoch": 11.76,
"learning_rate": 5.313090795508722e-06,
"loss": 2.9683,
"step": 1435500
},
{
"epoch": 11.76,
"learning_rate": 5.311446842788802e-06,
"loss": 2.9655,
"step": 1436000
},
{
"epoch": 11.77,
"learning_rate": 5.309802890068883e-06,
"loss": 2.9673,
"step": 1436500
},
{
"epoch": 11.77,
"learning_rate": 5.308158937348963e-06,
"loss": 2.9678,
"step": 1437000
},
{
"epoch": 11.78,
"learning_rate": 5.306514984629042e-06,
"loss": 2.969,
"step": 1437500
},
{
"epoch": 11.78,
"learning_rate": 5.3048710319091224e-06,
"loss": 2.9631,
"step": 1438000
},
{
"epoch": 11.79,
"learning_rate": 5.3032270791892026e-06,
"loss": 2.9681,
"step": 1438500
},
{
"epoch": 11.79,
"learning_rate": 5.301583126469283e-06,
"loss": 2.9658,
"step": 1439000
},
{
"epoch": 11.79,
"learning_rate": 5.299939173749364e-06,
"loss": 2.9621,
"step": 1439500
},
{
"epoch": 11.8,
"learning_rate": 5.298295221029444e-06,
"loss": 2.9607,
"step": 1440000
},
{
"epoch": 11.8,
"eval_accuracy": 0.4940959315942787,
"eval_loss": 2.829009771347046,
"eval_runtime": 405.0153,
"eval_samples_per_second": 761.344,
"eval_steps_per_second": 15.864,
"step": 1440000
},
{
"epoch": 11.8,
"learning_rate": 5.296651268309524e-06,
"loss": 2.9657,
"step": 1440500
},
{
"epoch": 11.81,
"learning_rate": 5.295007315589604e-06,
"loss": 2.9663,
"step": 1441000
},
{
"epoch": 11.81,
"learning_rate": 5.293363362869684e-06,
"loss": 2.9679,
"step": 1441500
},
{
"epoch": 11.81,
"learning_rate": 5.291719410149765e-06,
"loss": 2.9643,
"step": 1442000
},
{
"epoch": 11.82,
"learning_rate": 5.290075457429845e-06,
"loss": 2.975,
"step": 1442500
},
{
"epoch": 11.82,
"learning_rate": 5.288431504709925e-06,
"loss": 2.963,
"step": 1443000
},
{
"epoch": 11.83,
"learning_rate": 5.286787551990005e-06,
"loss": 2.9608,
"step": 1443500
},
{
"epoch": 11.83,
"learning_rate": 5.285143599270085e-06,
"loss": 2.962,
"step": 1444000
},
{
"epoch": 11.83,
"learning_rate": 5.283499646550165e-06,
"loss": 2.9687,
"step": 1444500
},
{
"epoch": 11.84,
"learning_rate": 5.281855693830246e-06,
"loss": 2.9767,
"step": 1445000
},
{
"epoch": 11.84,
"learning_rate": 5.280211741110326e-06,
"loss": 2.9664,
"step": 1445500
},
{
"epoch": 11.85,
"learning_rate": 5.278567788390406e-06,
"loss": 2.9695,
"step": 1446000
},
{
"epoch": 11.85,
"learning_rate": 5.276923835670486e-06,
"loss": 2.9689,
"step": 1446500
},
{
"epoch": 11.86,
"learning_rate": 5.275279882950567e-06,
"loss": 2.968,
"step": 1447000
},
{
"epoch": 11.86,
"learning_rate": 5.273635930230647e-06,
"loss": 2.9666,
"step": 1447500
},
{
"epoch": 11.86,
"learning_rate": 5.2719919775107275e-06,
"loss": 2.9656,
"step": 1448000
},
{
"epoch": 11.87,
"learning_rate": 5.270348024790808e-06,
"loss": 2.9618,
"step": 1448500
},
{
"epoch": 11.87,
"learning_rate": 5.268704072070888e-06,
"loss": 2.9627,
"step": 1449000
},
{
"epoch": 11.88,
"learning_rate": 5.267060119350967e-06,
"loss": 2.9654,
"step": 1449500
},
{
"epoch": 11.88,
"learning_rate": 5.265416166631048e-06,
"loss": 2.974,
"step": 1450000
},
{
"epoch": 11.88,
"learning_rate": 5.263772213911128e-06,
"loss": 2.9723,
"step": 1450500
},
{
"epoch": 11.89,
"learning_rate": 5.262128261191208e-06,
"loss": 2.9629,
"step": 1451000
},
{
"epoch": 11.89,
"learning_rate": 5.260484308471288e-06,
"loss": 2.9679,
"step": 1451500
},
{
"epoch": 11.9,
"learning_rate": 5.258840355751369e-06,
"loss": 2.9637,
"step": 1452000
},
{
"epoch": 11.9,
"learning_rate": 5.2571964030314495e-06,
"loss": 2.9677,
"step": 1452500
},
{
"epoch": 11.9,
"learning_rate": 5.25555245031153e-06,
"loss": 2.9707,
"step": 1453000
},
{
"epoch": 11.91,
"learning_rate": 5.25390849759161e-06,
"loss": 2.9706,
"step": 1453500
},
{
"epoch": 11.91,
"learning_rate": 5.25226454487169e-06,
"loss": 2.9627,
"step": 1454000
},
{
"epoch": 11.92,
"learning_rate": 5.250620592151771e-06,
"loss": 2.9631,
"step": 1454500
},
{
"epoch": 11.92,
"learning_rate": 5.248976639431851e-06,
"loss": 2.9652,
"step": 1455000
},
{
"epoch": 11.92,
"learning_rate": 5.24733268671193e-06,
"loss": 2.9613,
"step": 1455500
},
{
"epoch": 11.93,
"learning_rate": 5.24568873399201e-06,
"loss": 2.9683,
"step": 1456000
},
{
"epoch": 11.93,
"learning_rate": 5.2440447812720905e-06,
"loss": 2.954,
"step": 1456500
},
{
"epoch": 11.94,
"learning_rate": 5.242400828552171e-06,
"loss": 2.9646,
"step": 1457000
},
{
"epoch": 11.94,
"learning_rate": 5.240756875832252e-06,
"loss": 2.9694,
"step": 1457500
},
{
"epoch": 11.95,
"learning_rate": 5.239112923112332e-06,
"loss": 2.9651,
"step": 1458000
},
{
"epoch": 11.95,
"learning_rate": 5.237468970392412e-06,
"loss": 2.9675,
"step": 1458500
},
{
"epoch": 11.95,
"learning_rate": 5.235825017672492e-06,
"loss": 2.967,
"step": 1459000
},
{
"epoch": 11.96,
"learning_rate": 5.234181064952573e-06,
"loss": 2.9687,
"step": 1459500
},
{
"epoch": 11.96,
"learning_rate": 5.232537112232653e-06,
"loss": 2.9624,
"step": 1460000
},
{
"epoch": 11.97,
"learning_rate": 5.230893159512733e-06,
"loss": 2.9649,
"step": 1460500
},
{
"epoch": 11.97,
"learning_rate": 5.229249206792813e-06,
"loss": 2.9656,
"step": 1461000
},
{
"epoch": 11.97,
"learning_rate": 5.227605254072893e-06,
"loss": 2.9581,
"step": 1461500
},
{
"epoch": 11.98,
"learning_rate": 5.225961301352973e-06,
"loss": 2.964,
"step": 1462000
},
{
"epoch": 11.98,
"learning_rate": 5.224317348633054e-06,
"loss": 2.958,
"step": 1462500
},
{
"epoch": 11.99,
"learning_rate": 5.222673395913134e-06,
"loss": 2.9638,
"step": 1463000
},
{
"epoch": 11.99,
"learning_rate": 5.221029443193214e-06,
"loss": 2.9679,
"step": 1463500
},
{
"epoch": 11.99,
"learning_rate": 5.219385490473294e-06,
"loss": 2.9585,
"step": 1464000
},
{
"epoch": 12.0,
"learning_rate": 5.217741537753375e-06,
"loss": 2.9664,
"step": 1464500
},
{
"epoch": 12.0,
"learning_rate": 5.216097585033455e-06,
"loss": 2.9604,
"step": 1465000
},
{
"epoch": 12.01,
"learning_rate": 5.214453632313535e-06,
"loss": 2.9489,
"step": 1465500
},
{
"epoch": 12.01,
"learning_rate": 5.2128096795936155e-06,
"loss": 2.9678,
"step": 1466000
},
{
"epoch": 12.01,
"learning_rate": 5.211165726873696e-06,
"loss": 2.9626,
"step": 1466500
},
{
"epoch": 12.02,
"learning_rate": 5.2095217741537766e-06,
"loss": 2.9567,
"step": 1467000
},
{
"epoch": 12.02,
"learning_rate": 5.207877821433856e-06,
"loss": 2.9636,
"step": 1467500
},
{
"epoch": 12.03,
"learning_rate": 5.206233868713936e-06,
"loss": 2.9654,
"step": 1468000
},
{
"epoch": 12.03,
"learning_rate": 5.204589915994016e-06,
"loss": 2.9617,
"step": 1468500
},
{
"epoch": 12.04,
"learning_rate": 5.202945963274096e-06,
"loss": 2.959,
"step": 1469000
},
{
"epoch": 12.04,
"learning_rate": 5.201302010554176e-06,
"loss": 2.9599,
"step": 1469500
},
{
"epoch": 12.04,
"learning_rate": 5.199658057834257e-06,
"loss": 2.9636,
"step": 1470000
},
{
"epoch": 12.04,
"eval_accuracy": 0.494423367210693,
"eval_loss": 2.8266873359680176,
"eval_runtime": 407.8041,
"eval_samples_per_second": 756.138,
"eval_steps_per_second": 15.755,
"step": 1470000
},
{
"epoch": 12.05,
"learning_rate": 5.1980141051143374e-06,
"loss": 2.9612,
"step": 1470500
},
{
"epoch": 12.05,
"learning_rate": 5.1963701523944176e-06,
"loss": 2.9556,
"step": 1471000
},
{
"epoch": 12.06,
"learning_rate": 5.194726199674498e-06,
"loss": 2.9642,
"step": 1471500
},
{
"epoch": 12.06,
"learning_rate": 5.193082246954579e-06,
"loss": 2.9593,
"step": 1472000
},
{
"epoch": 12.06,
"learning_rate": 5.191438294234659e-06,
"loss": 2.9578,
"step": 1472500
},
{
"epoch": 12.07,
"learning_rate": 5.189794341514739e-06,
"loss": 2.962,
"step": 1473000
},
{
"epoch": 12.07,
"learning_rate": 5.188150388794818e-06,
"loss": 2.9613,
"step": 1473500
},
{
"epoch": 12.08,
"learning_rate": 5.186506436074898e-06,
"loss": 2.96,
"step": 1474000
},
{
"epoch": 12.08,
"learning_rate": 5.1848624833549785e-06,
"loss": 2.9622,
"step": 1474500
},
{
"epoch": 12.08,
"learning_rate": 5.1832185306350594e-06,
"loss": 2.9623,
"step": 1475000
},
{
"epoch": 12.09,
"learning_rate": 5.1815745779151396e-06,
"loss": 2.9627,
"step": 1475500
},
{
"epoch": 12.09,
"learning_rate": 5.17993062519522e-06,
"loss": 2.9649,
"step": 1476000
},
{
"epoch": 12.1,
"learning_rate": 5.1782866724753e-06,
"loss": 2.9676,
"step": 1476500
},
{
"epoch": 12.1,
"learning_rate": 5.17664271975538e-06,
"loss": 2.9661,
"step": 1477000
},
{
"epoch": 12.1,
"learning_rate": 5.174998767035461e-06,
"loss": 2.9663,
"step": 1477500
},
{
"epoch": 12.11,
"learning_rate": 5.173354814315541e-06,
"loss": 2.9647,
"step": 1478000
},
{
"epoch": 12.11,
"learning_rate": 5.171710861595621e-06,
"loss": 2.9625,
"step": 1478500
},
{
"epoch": 12.12,
"learning_rate": 5.170066908875701e-06,
"loss": 2.9613,
"step": 1479000
},
{
"epoch": 12.12,
"learning_rate": 5.168422956155782e-06,
"loss": 2.9616,
"step": 1479500
},
{
"epoch": 12.13,
"learning_rate": 5.1667790034358615e-06,
"loss": 2.9624,
"step": 1480000
},
{
"epoch": 12.13,
"learning_rate": 5.165135050715942e-06,
"loss": 2.9611,
"step": 1480500
},
{
"epoch": 12.13,
"learning_rate": 5.163491097996022e-06,
"loss": 2.9657,
"step": 1481000
},
{
"epoch": 12.14,
"learning_rate": 5.161847145276102e-06,
"loss": 2.9649,
"step": 1481500
},
{
"epoch": 12.14,
"learning_rate": 5.160203192556182e-06,
"loss": 2.9588,
"step": 1482000
},
{
"epoch": 12.15,
"learning_rate": 5.158559239836263e-06,
"loss": 2.9646,
"step": 1482500
},
{
"epoch": 12.15,
"learning_rate": 5.156915287116343e-06,
"loss": 2.968,
"step": 1483000
},
{
"epoch": 12.15,
"learning_rate": 5.155271334396423e-06,
"loss": 2.9627,
"step": 1483500
},
{
"epoch": 12.16,
"learning_rate": 5.153627381676503e-06,
"loss": 2.9611,
"step": 1484000
},
{
"epoch": 12.16,
"learning_rate": 5.151983428956584e-06,
"loss": 2.9668,
"step": 1484500
},
{
"epoch": 12.17,
"learning_rate": 5.1503394762366645e-06,
"loss": 2.954,
"step": 1485000
},
{
"epoch": 12.17,
"learning_rate": 5.148695523516745e-06,
"loss": 2.963,
"step": 1485500
},
{
"epoch": 12.17,
"learning_rate": 5.147051570796824e-06,
"loss": 2.9599,
"step": 1486000
},
{
"epoch": 12.18,
"learning_rate": 5.145407618076904e-06,
"loss": 2.9564,
"step": 1486500
},
{
"epoch": 12.18,
"learning_rate": 5.143763665356984e-06,
"loss": 2.9578,
"step": 1487000
},
{
"epoch": 12.19,
"learning_rate": 5.142119712637065e-06,
"loss": 2.9657,
"step": 1487500
},
{
"epoch": 12.19,
"learning_rate": 5.140475759917145e-06,
"loss": 2.9635,
"step": 1488000
},
{
"epoch": 12.2,
"learning_rate": 5.138831807197225e-06,
"loss": 2.966,
"step": 1488500
},
{
"epoch": 12.2,
"learning_rate": 5.1371878544773055e-06,
"loss": 2.96,
"step": 1489000
},
{
"epoch": 12.2,
"learning_rate": 5.135543901757386e-06,
"loss": 2.9557,
"step": 1489500
},
{
"epoch": 12.21,
"learning_rate": 5.133899949037467e-06,
"loss": 2.96,
"step": 1490000
},
{
"epoch": 12.21,
"learning_rate": 5.132255996317547e-06,
"loss": 2.967,
"step": 1490500
},
{
"epoch": 12.22,
"learning_rate": 5.130612043597627e-06,
"loss": 2.9576,
"step": 1491000
},
{
"epoch": 12.22,
"learning_rate": 5.128968090877707e-06,
"loss": 2.9589,
"step": 1491500
},
{
"epoch": 12.22,
"learning_rate": 5.127324138157786e-06,
"loss": 2.9561,
"step": 1492000
},
{
"epoch": 12.23,
"learning_rate": 5.125680185437867e-06,
"loss": 2.9592,
"step": 1492500
},
{
"epoch": 12.23,
"learning_rate": 5.124036232717947e-06,
"loss": 2.9611,
"step": 1493000
},
{
"epoch": 12.24,
"learning_rate": 5.1223922799980275e-06,
"loss": 2.9568,
"step": 1493500
},
{
"epoch": 12.24,
"learning_rate": 5.120748327278108e-06,
"loss": 2.96,
"step": 1494000
},
{
"epoch": 12.24,
"learning_rate": 5.119104374558188e-06,
"loss": 2.961,
"step": 1494500
},
{
"epoch": 12.25,
"learning_rate": 5.117460421838269e-06,
"loss": 2.9604,
"step": 1495000
},
{
"epoch": 12.25,
"learning_rate": 5.115816469118349e-06,
"loss": 2.9559,
"step": 1495500
},
{
"epoch": 12.26,
"learning_rate": 5.114172516398429e-06,
"loss": 2.9636,
"step": 1496000
},
{
"epoch": 12.26,
"learning_rate": 5.112528563678509e-06,
"loss": 2.9589,
"step": 1496500
},
{
"epoch": 12.26,
"learning_rate": 5.11088461095859e-06,
"loss": 2.9592,
"step": 1497000
},
{
"epoch": 12.27,
"learning_rate": 5.10924065823867e-06,
"loss": 2.9643,
"step": 1497500
},
{
"epoch": 12.27,
"learning_rate": 5.1075967055187495e-06,
"loss": 2.9587,
"step": 1498000
},
{
"epoch": 12.28,
"learning_rate": 5.10595275279883e-06,
"loss": 2.9601,
"step": 1498500
},
{
"epoch": 12.28,
"learning_rate": 5.10430880007891e-06,
"loss": 2.9574,
"step": 1499000
},
{
"epoch": 12.29,
"learning_rate": 5.10266484735899e-06,
"loss": 2.9626,
"step": 1499500
},
{
"epoch": 12.29,
"learning_rate": 5.101020894639071e-06,
"loss": 2.9584,
"step": 1500000
},
{
"epoch": 12.29,
"eval_accuracy": 0.49460218852402815,
"eval_loss": 2.8247358798980713,
"eval_runtime": 404.5149,
"eval_samples_per_second": 762.286,
"eval_steps_per_second": 15.883,
"step": 1500000
},
{
"epoch": 12.29,
"learning_rate": 5.099376941919151e-06,
"loss": 2.9567,
"step": 1500500
},
{
"epoch": 12.3,
"learning_rate": 5.097732989199231e-06,
"loss": 2.9534,
"step": 1501000
},
{
"epoch": 12.3,
"learning_rate": 5.096089036479311e-06,
"loss": 2.9594,
"step": 1501500
},
{
"epoch": 12.31,
"learning_rate": 5.094445083759391e-06,
"loss": 2.9505,
"step": 1502000
},
{
"epoch": 12.31,
"learning_rate": 5.092801131039472e-06,
"loss": 2.9665,
"step": 1502500
},
{
"epoch": 12.31,
"learning_rate": 5.0911571783195524e-06,
"loss": 2.9554,
"step": 1503000
},
{
"epoch": 12.32,
"learning_rate": 5.0895132255996326e-06,
"loss": 2.9596,
"step": 1503500
},
{
"epoch": 12.32,
"learning_rate": 5.087869272879712e-06,
"loss": 2.9574,
"step": 1504000
},
{
"epoch": 12.33,
"learning_rate": 5.086225320159792e-06,
"loss": 2.9596,
"step": 1504500
},
{
"epoch": 12.33,
"learning_rate": 5.084581367439872e-06,
"loss": 2.9548,
"step": 1505000
},
{
"epoch": 12.33,
"learning_rate": 5.082937414719953e-06,
"loss": 2.9619,
"step": 1505500
},
{
"epoch": 12.34,
"learning_rate": 5.081293462000033e-06,
"loss": 2.9556,
"step": 1506000
},
{
"epoch": 12.34,
"learning_rate": 5.079649509280113e-06,
"loss": 2.9447,
"step": 1506500
},
{
"epoch": 12.35,
"learning_rate": 5.0780055565601935e-06,
"loss": 2.951,
"step": 1507000
},
{
"epoch": 12.35,
"learning_rate": 5.0763616038402744e-06,
"loss": 2.9613,
"step": 1507500
},
{
"epoch": 12.35,
"learning_rate": 5.0747176511203546e-06,
"loss": 2.9594,
"step": 1508000
},
{
"epoch": 12.36,
"learning_rate": 5.073073698400435e-06,
"loss": 2.9587,
"step": 1508500
},
{
"epoch": 12.36,
"learning_rate": 5.071429745680515e-06,
"loss": 2.9508,
"step": 1509000
},
{
"epoch": 12.37,
"learning_rate": 5.069785792960596e-06,
"loss": 2.9505,
"step": 1509500
},
{
"epoch": 12.37,
"learning_rate": 5.068141840240674e-06,
"loss": 2.9547,
"step": 1510000
},
{
"epoch": 12.38,
"learning_rate": 5.066497887520755e-06,
"loss": 2.9588,
"step": 1510500
},
{
"epoch": 12.38,
"learning_rate": 5.064853934800835e-06,
"loss": 2.9564,
"step": 1511000
},
{
"epoch": 12.38,
"learning_rate": 5.0632099820809154e-06,
"loss": 2.953,
"step": 1511500
},
{
"epoch": 12.39,
"learning_rate": 5.0615660293609956e-06,
"loss": 2.964,
"step": 1512000
},
{
"epoch": 12.39,
"learning_rate": 5.0599220766410765e-06,
"loss": 2.9576,
"step": 1512500
},
{
"epoch": 12.4,
"learning_rate": 5.058278123921157e-06,
"loss": 2.9596,
"step": 1513000
},
{
"epoch": 12.4,
"learning_rate": 5.056634171201237e-06,
"loss": 2.9625,
"step": 1513500
},
{
"epoch": 12.4,
"learning_rate": 5.054990218481317e-06,
"loss": 2.9526,
"step": 1514000
},
{
"epoch": 12.41,
"learning_rate": 5.053346265761397e-06,
"loss": 2.957,
"step": 1514500
},
{
"epoch": 12.41,
"learning_rate": 5.051702313041478e-06,
"loss": 2.9643,
"step": 1515000
},
{
"epoch": 12.42,
"learning_rate": 5.050058360321558e-06,
"loss": 2.9526,
"step": 1515500
},
{
"epoch": 12.42,
"learning_rate": 5.048414407601637e-06,
"loss": 2.9535,
"step": 1516000
},
{
"epoch": 12.42,
"learning_rate": 5.0467704548817175e-06,
"loss": 2.9598,
"step": 1516500
},
{
"epoch": 12.43,
"learning_rate": 5.045126502161798e-06,
"loss": 2.953,
"step": 1517000
},
{
"epoch": 12.43,
"learning_rate": 5.043482549441878e-06,
"loss": 2.9571,
"step": 1517500
},
{
"epoch": 12.44,
"learning_rate": 5.041838596721959e-06,
"loss": 2.9583,
"step": 1518000
},
{
"epoch": 12.44,
"learning_rate": 5.040194644002039e-06,
"loss": 2.9615,
"step": 1518500
},
{
"epoch": 12.44,
"learning_rate": 5.038550691282119e-06,
"loss": 2.9541,
"step": 1519000
},
{
"epoch": 12.45,
"learning_rate": 5.036906738562199e-06,
"loss": 2.9595,
"step": 1519500
},
{
"epoch": 12.45,
"learning_rate": 5.03526278584228e-06,
"loss": 2.9549,
"step": 1520000
},
{
"epoch": 12.46,
"learning_rate": 5.03361883312236e-06,
"loss": 2.958,
"step": 1520500
},
{
"epoch": 12.46,
"learning_rate": 5.03197488040244e-06,
"loss": 2.9547,
"step": 1521000
},
{
"epoch": 12.47,
"learning_rate": 5.0303309276825205e-06,
"loss": 2.9544,
"step": 1521500
},
{
"epoch": 12.47,
"learning_rate": 5.0286869749626e-06,
"loss": 2.9553,
"step": 1522000
},
{
"epoch": 12.47,
"learning_rate": 5.02704302224268e-06,
"loss": 2.9533,
"step": 1522500
},
{
"epoch": 12.48,
"learning_rate": 5.025399069522761e-06,
"loss": 2.9578,
"step": 1523000
},
{
"epoch": 12.48,
"learning_rate": 5.023755116802841e-06,
"loss": 2.9628,
"step": 1523500
},
{
"epoch": 12.49,
"learning_rate": 5.022111164082921e-06,
"loss": 2.9515,
"step": 1524000
},
{
"epoch": 12.49,
"learning_rate": 5.020467211363001e-06,
"loss": 2.9511,
"step": 1524500
},
{
"epoch": 12.49,
"learning_rate": 5.018823258643082e-06,
"loss": 2.9623,
"step": 1525000
},
{
"epoch": 12.5,
"learning_rate": 5.017179305923162e-06,
"loss": 2.9648,
"step": 1525500
},
{
"epoch": 12.5,
"learning_rate": 5.0155353532032425e-06,
"loss": 2.9571,
"step": 1526000
},
{
"epoch": 12.51,
"learning_rate": 5.013891400483323e-06,
"loss": 2.9543,
"step": 1526500
},
{
"epoch": 12.51,
"learning_rate": 5.012247447763403e-06,
"loss": 2.9567,
"step": 1527000
},
{
"epoch": 12.51,
"learning_rate": 5.010603495043484e-06,
"loss": 2.9549,
"step": 1527500
},
{
"epoch": 12.52,
"learning_rate": 5.008959542323563e-06,
"loss": 2.9604,
"step": 1528000
},
{
"epoch": 12.52,
"learning_rate": 5.007315589603643e-06,
"loss": 2.9591,
"step": 1528500
},
{
"epoch": 12.53,
"learning_rate": 5.005671636883723e-06,
"loss": 2.9592,
"step": 1529000
},
{
"epoch": 12.53,
"learning_rate": 5.004027684163803e-06,
"loss": 2.9529,
"step": 1529500
},
{
"epoch": 12.54,
"learning_rate": 5.0023837314438835e-06,
"loss": 2.9546,
"step": 1530000
},
{
"epoch": 12.54,
"eval_accuracy": 0.4951152598295225,
"eval_loss": 2.8195910453796387,
"eval_runtime": 406.5071,
"eval_samples_per_second": 758.55,
"eval_steps_per_second": 15.805,
"step": 1530000
},
{
"epoch": 12.54,
"learning_rate": 5.0007397787239645e-06,
"loss": 2.9556,
"step": 1530500
},
{
"epoch": 12.54,
"learning_rate": 4.999095826004045e-06,
"loss": 2.9566,
"step": 1531000
},
{
"epoch": 12.55,
"learning_rate": 4.997451873284125e-06,
"loss": 2.9579,
"step": 1531500
},
{
"epoch": 12.55,
"learning_rate": 4.995807920564205e-06,
"loss": 2.9552,
"step": 1532000
},
{
"epoch": 12.56,
"learning_rate": 4.994163967844286e-06,
"loss": 2.9573,
"step": 1532500
},
{
"epoch": 12.56,
"learning_rate": 4.992520015124365e-06,
"loss": 2.9501,
"step": 1533000
},
{
"epoch": 12.56,
"learning_rate": 4.990876062404445e-06,
"loss": 2.9542,
"step": 1533500
},
{
"epoch": 12.57,
"learning_rate": 4.989232109684526e-06,
"loss": 2.9543,
"step": 1534000
},
{
"epoch": 12.57,
"learning_rate": 4.987588156964606e-06,
"loss": 2.9581,
"step": 1534500
},
{
"epoch": 12.58,
"learning_rate": 4.9859442042446865e-06,
"loss": 2.9519,
"step": 1535000
},
{
"epoch": 12.58,
"learning_rate": 4.984300251524767e-06,
"loss": 2.9525,
"step": 1535500
},
{
"epoch": 12.58,
"learning_rate": 4.982656298804847e-06,
"loss": 2.9575,
"step": 1536000
},
{
"epoch": 12.59,
"learning_rate": 4.981012346084927e-06,
"loss": 2.951,
"step": 1536500
},
{
"epoch": 12.59,
"learning_rate": 4.979368393365007e-06,
"loss": 2.9554,
"step": 1537000
},
{
"epoch": 12.6,
"learning_rate": 4.977724440645088e-06,
"loss": 2.948,
"step": 1537500
},
{
"epoch": 12.6,
"learning_rate": 4.976080487925168e-06,
"loss": 2.9571,
"step": 1538000
},
{
"epoch": 12.6,
"learning_rate": 4.974436535205248e-06,
"loss": 2.9573,
"step": 1538500
},
{
"epoch": 12.61,
"learning_rate": 4.972792582485328e-06,
"loss": 2.9505,
"step": 1539000
},
{
"epoch": 12.61,
"learning_rate": 4.9711486297654085e-06,
"loss": 2.9538,
"step": 1539500
},
{
"epoch": 12.62,
"learning_rate": 4.969504677045489e-06,
"loss": 2.953,
"step": 1540000
},
{
"epoch": 12.62,
"learning_rate": 4.967860724325569e-06,
"loss": 2.9508,
"step": 1540500
},
{
"epoch": 12.63,
"learning_rate": 4.966216771605649e-06,
"loss": 2.9517,
"step": 1541000
},
{
"epoch": 12.63,
"learning_rate": 4.96457281888573e-06,
"loss": 2.9515,
"step": 1541500
},
{
"epoch": 12.63,
"learning_rate": 4.962928866165809e-06,
"loss": 2.9529,
"step": 1542000
},
{
"epoch": 12.64,
"learning_rate": 4.961284913445889e-06,
"loss": 2.9492,
"step": 1542500
},
{
"epoch": 12.64,
"learning_rate": 4.95964096072597e-06,
"loss": 2.9554,
"step": 1543000
},
{
"epoch": 12.65,
"learning_rate": 4.95799700800605e-06,
"loss": 2.9546,
"step": 1543500
},
{
"epoch": 12.65,
"learning_rate": 4.9563530552861304e-06,
"loss": 2.9517,
"step": 1544000
},
{
"epoch": 12.65,
"learning_rate": 4.9547091025662106e-06,
"loss": 2.9527,
"step": 1544500
},
{
"epoch": 12.66,
"learning_rate": 4.953065149846291e-06,
"loss": 2.9507,
"step": 1545000
},
{
"epoch": 12.66,
"learning_rate": 4.951421197126371e-06,
"loss": 2.9389,
"step": 1545500
},
{
"epoch": 12.67,
"learning_rate": 4.949777244406451e-06,
"loss": 2.9514,
"step": 1546000
},
{
"epoch": 12.67,
"learning_rate": 4.948133291686532e-06,
"loss": 2.9557,
"step": 1546500
},
{
"epoch": 12.67,
"learning_rate": 4.946489338966612e-06,
"loss": 2.9524,
"step": 1547000
},
{
"epoch": 12.68,
"learning_rate": 4.944845386246692e-06,
"loss": 2.9492,
"step": 1547500
},
{
"epoch": 12.68,
"learning_rate": 4.943201433526772e-06,
"loss": 2.9557,
"step": 1548000
},
{
"epoch": 12.69,
"learning_rate": 4.941557480806852e-06,
"loss": 2.9515,
"step": 1548500
},
{
"epoch": 12.69,
"learning_rate": 4.9399135280869325e-06,
"loss": 2.9559,
"step": 1549000
},
{
"epoch": 12.69,
"learning_rate": 4.938269575367013e-06,
"loss": 2.9511,
"step": 1549500
},
{
"epoch": 12.7,
"learning_rate": 4.936625622647093e-06,
"loss": 2.9542,
"step": 1550000
},
{
"epoch": 12.7,
"learning_rate": 4.934981669927174e-06,
"loss": 2.9592,
"step": 1550500
},
{
"epoch": 12.71,
"learning_rate": 4.933337717207253e-06,
"loss": 2.95,
"step": 1551000
},
{
"epoch": 12.71,
"learning_rate": 4.931693764487334e-06,
"loss": 2.9537,
"step": 1551500
},
{
"epoch": 12.72,
"learning_rate": 4.930049811767414e-06,
"loss": 2.9526,
"step": 1552000
},
{
"epoch": 12.72,
"learning_rate": 4.928405859047494e-06,
"loss": 2.9549,
"step": 1552500
},
{
"epoch": 12.72,
"learning_rate": 4.926761906327574e-06,
"loss": 2.9513,
"step": 1553000
},
{
"epoch": 12.73,
"learning_rate": 4.9251179536076545e-06,
"loss": 2.9561,
"step": 1553500
},
{
"epoch": 12.73,
"learning_rate": 4.923474000887735e-06,
"loss": 2.9589,
"step": 1554000
},
{
"epoch": 12.74,
"learning_rate": 4.921830048167815e-06,
"loss": 2.9527,
"step": 1554500
},
{
"epoch": 12.74,
"learning_rate": 4.920186095447895e-06,
"loss": 2.9527,
"step": 1555000
},
{
"epoch": 12.74,
"learning_rate": 4.918542142727976e-06,
"loss": 2.954,
"step": 1555500
},
{
"epoch": 12.75,
"learning_rate": 4.916898190008056e-06,
"loss": 2.943,
"step": 1556000
},
{
"epoch": 12.75,
"learning_rate": 4.915254237288136e-06,
"loss": 2.9428,
"step": 1556500
},
{
"epoch": 12.76,
"learning_rate": 4.913610284568216e-06,
"loss": 2.9548,
"step": 1557000
},
{
"epoch": 12.76,
"learning_rate": 4.911966331848296e-06,
"loss": 2.9565,
"step": 1557500
},
{
"epoch": 12.76,
"learning_rate": 4.9103223791283765e-06,
"loss": 2.9475,
"step": 1558000
},
{
"epoch": 12.77,
"learning_rate": 4.908678426408457e-06,
"loss": 2.9534,
"step": 1558500
},
{
"epoch": 12.77,
"learning_rate": 4.907034473688538e-06,
"loss": 2.9567,
"step": 1559000
},
{
"epoch": 12.78,
"learning_rate": 4.905390520968618e-06,
"loss": 2.9496,
"step": 1559500
},
{
"epoch": 12.78,
"learning_rate": 4.903746568248697e-06,
"loss": 2.9544,
"step": 1560000
},
{
"epoch": 12.78,
"eval_accuracy": 0.4959430255891887,
"eval_loss": 2.8146307468414307,
"eval_runtime": 404.6908,
"eval_samples_per_second": 761.955,
"eval_steps_per_second": 15.876,
"step": 1560000
},
{
"epoch": 12.78,
"learning_rate": 4.902102615528778e-06,
"loss": 2.9512,
"step": 1560500
},
{
"epoch": 12.79,
"learning_rate": 4.900458662808858e-06,
"loss": 2.9506,
"step": 1561000
},
{
"epoch": 12.79,
"learning_rate": 4.898814710088938e-06,
"loss": 2.9507,
"step": 1561500
},
{
"epoch": 12.8,
"learning_rate": 4.897170757369018e-06,
"loss": 2.9485,
"step": 1562000
},
{
"epoch": 12.8,
"learning_rate": 4.8955268046490985e-06,
"loss": 2.9472,
"step": 1562500
},
{
"epoch": 12.81,
"learning_rate": 4.893882851929179e-06,
"loss": 2.9466,
"step": 1563000
},
{
"epoch": 12.81,
"learning_rate": 4.892238899209259e-06,
"loss": 2.9461,
"step": 1563500
},
{
"epoch": 12.81,
"learning_rate": 4.890594946489339e-06,
"loss": 2.9519,
"step": 1564000
},
{
"epoch": 12.82,
"learning_rate": 4.88895099376942e-06,
"loss": 2.9548,
"step": 1564500
},
{
"epoch": 12.82,
"learning_rate": 4.8873070410495e-06,
"loss": 2.9514,
"step": 1565000
},
{
"epoch": 12.83,
"learning_rate": 4.88566308832958e-06,
"loss": 2.9515,
"step": 1565500
},
{
"epoch": 12.83,
"learning_rate": 4.88401913560966e-06,
"loss": 2.9448,
"step": 1566000
},
{
"epoch": 12.83,
"learning_rate": 4.88237518288974e-06,
"loss": 2.9534,
"step": 1566500
},
{
"epoch": 12.84,
"learning_rate": 4.8807312301698205e-06,
"loss": 2.9453,
"step": 1567000
},
{
"epoch": 12.84,
"learning_rate": 4.879087277449901e-06,
"loss": 2.95,
"step": 1567500
},
{
"epoch": 12.85,
"learning_rate": 4.877443324729982e-06,
"loss": 2.9518,
"step": 1568000
},
{
"epoch": 12.85,
"learning_rate": 4.875799372010062e-06,
"loss": 2.9582,
"step": 1568500
},
{
"epoch": 12.85,
"learning_rate": 4.874155419290141e-06,
"loss": 2.9535,
"step": 1569000
},
{
"epoch": 12.86,
"learning_rate": 4.872511466570222e-06,
"loss": 2.9523,
"step": 1569500
},
{
"epoch": 12.86,
"learning_rate": 4.870867513850302e-06,
"loss": 2.9578,
"step": 1570000
},
{
"epoch": 12.87,
"learning_rate": 4.869223561130382e-06,
"loss": 2.95,
"step": 1570500
},
{
"epoch": 12.87,
"learning_rate": 4.867579608410462e-06,
"loss": 2.9472,
"step": 1571000
},
{
"epoch": 12.88,
"learning_rate": 4.865935655690543e-06,
"loss": 2.9505,
"step": 1571500
},
{
"epoch": 12.88,
"learning_rate": 4.864291702970623e-06,
"loss": 2.9519,
"step": 1572000
},
{
"epoch": 12.88,
"learning_rate": 4.862647750250703e-06,
"loss": 2.939,
"step": 1572500
},
{
"epoch": 12.89,
"learning_rate": 4.861003797530784e-06,
"loss": 2.9494,
"step": 1573000
},
{
"epoch": 12.89,
"learning_rate": 4.859359844810864e-06,
"loss": 2.9491,
"step": 1573500
},
{
"epoch": 12.9,
"learning_rate": 4.857715892090944e-06,
"loss": 2.9524,
"step": 1574000
},
{
"epoch": 12.9,
"learning_rate": 4.856071939371024e-06,
"loss": 2.9501,
"step": 1574500
},
{
"epoch": 12.9,
"learning_rate": 4.854427986651104e-06,
"loss": 2.9494,
"step": 1575000
},
{
"epoch": 12.91,
"learning_rate": 4.852784033931184e-06,
"loss": 2.949,
"step": 1575500
},
{
"epoch": 12.91,
"learning_rate": 4.8511400812112645e-06,
"loss": 2.9445,
"step": 1576000
},
{
"epoch": 12.92,
"learning_rate": 4.849496128491345e-06,
"loss": 2.9457,
"step": 1576500
},
{
"epoch": 12.92,
"learning_rate": 4.8478521757714256e-06,
"loss": 2.9505,
"step": 1577000
},
{
"epoch": 12.92,
"learning_rate": 4.846208223051506e-06,
"loss": 2.9521,
"step": 1577500
},
{
"epoch": 12.93,
"learning_rate": 4.844564270331585e-06,
"loss": 2.9494,
"step": 1578000
},
{
"epoch": 12.93,
"learning_rate": 4.842920317611666e-06,
"loss": 2.9524,
"step": 1578500
},
{
"epoch": 12.94,
"learning_rate": 4.841276364891746e-06,
"loss": 2.9477,
"step": 1579000
},
{
"epoch": 12.94,
"learning_rate": 4.839632412171826e-06,
"loss": 2.947,
"step": 1579500
},
{
"epoch": 12.94,
"learning_rate": 4.837988459451906e-06,
"loss": 2.9519,
"step": 1580000
},
{
"epoch": 12.95,
"learning_rate": 4.836344506731987e-06,
"loss": 2.95,
"step": 1580500
},
{
"epoch": 12.95,
"learning_rate": 4.834700554012067e-06,
"loss": 2.9561,
"step": 1581000
},
{
"epoch": 12.96,
"learning_rate": 4.833056601292147e-06,
"loss": 2.9502,
"step": 1581500
},
{
"epoch": 12.96,
"learning_rate": 4.831412648572228e-06,
"loss": 2.9511,
"step": 1582000
},
{
"epoch": 12.97,
"learning_rate": 4.829768695852308e-06,
"loss": 2.9545,
"step": 1582500
},
{
"epoch": 12.97,
"learning_rate": 4.828124743132388e-06,
"loss": 2.9486,
"step": 1583000
},
{
"epoch": 12.97,
"learning_rate": 4.826480790412468e-06,
"loss": 2.9494,
"step": 1583500
},
{
"epoch": 12.98,
"learning_rate": 4.824836837692549e-06,
"loss": 2.9427,
"step": 1584000
},
{
"epoch": 12.98,
"learning_rate": 4.823192884972628e-06,
"loss": 2.9539,
"step": 1584500
},
{
"epoch": 12.99,
"learning_rate": 4.8215489322527084e-06,
"loss": 2.9488,
"step": 1585000
},
{
"epoch": 12.99,
"learning_rate": 4.819904979532789e-06,
"loss": 2.9474,
"step": 1585500
},
{
"epoch": 12.99,
"learning_rate": 4.8182610268128695e-06,
"loss": 2.9436,
"step": 1586000
},
{
"epoch": 13.0,
"learning_rate": 4.81661707409295e-06,
"loss": 2.9426,
"step": 1586500
},
{
"epoch": 13.0,
"learning_rate": 4.81497312137303e-06,
"loss": 2.9475,
"step": 1587000
},
{
"epoch": 13.01,
"learning_rate": 4.81332916865311e-06,
"loss": 2.9566,
"step": 1587500
},
{
"epoch": 13.01,
"learning_rate": 4.81168521593319e-06,
"loss": 2.9528,
"step": 1588000
},
{
"epoch": 13.01,
"learning_rate": 4.81004126321327e-06,
"loss": 2.9459,
"step": 1588500
},
{
"epoch": 13.02,
"learning_rate": 4.80839731049335e-06,
"loss": 2.9478,
"step": 1589000
},
{
"epoch": 13.02,
"learning_rate": 4.806753357773431e-06,
"loss": 2.9457,
"step": 1589500
},
{
"epoch": 13.03,
"learning_rate": 4.805109405053511e-06,
"loss": 2.9486,
"step": 1590000
},
{
"epoch": 13.03,
"eval_accuracy": 0.4963652221640762,
"eval_loss": 2.8131563663482666,
"eval_runtime": 407.4078,
"eval_samples_per_second": 756.873,
"eval_steps_per_second": 15.77,
"step": 1590000
},
{
"epoch": 13.03,
"learning_rate": 4.803465452333591e-06,
"loss": 2.9509,
"step": 1590500
},
{
"epoch": 13.03,
"learning_rate": 4.801821499613672e-06,
"loss": 2.9485,
"step": 1591000
},
{
"epoch": 13.04,
"learning_rate": 4.800177546893752e-06,
"loss": 2.9573,
"step": 1591500
},
{
"epoch": 13.04,
"learning_rate": 4.798533594173832e-06,
"loss": 2.9446,
"step": 1592000
},
{
"epoch": 13.05,
"learning_rate": 4.796889641453912e-06,
"loss": 2.9451,
"step": 1592500
},
{
"epoch": 13.05,
"learning_rate": 4.795245688733993e-06,
"loss": 2.9395,
"step": 1593000
},
{
"epoch": 13.06,
"learning_rate": 4.793601736014072e-06,
"loss": 2.9443,
"step": 1593500
},
{
"epoch": 13.06,
"learning_rate": 4.791957783294152e-06,
"loss": 2.9481,
"step": 1594000
},
{
"epoch": 13.06,
"learning_rate": 4.790313830574233e-06,
"loss": 2.9436,
"step": 1594500
},
{
"epoch": 13.07,
"learning_rate": 4.7886698778543135e-06,
"loss": 2.95,
"step": 1595000
},
{
"epoch": 13.07,
"learning_rate": 4.787025925134394e-06,
"loss": 2.9522,
"step": 1595500
},
{
"epoch": 13.08,
"learning_rate": 4.785381972414474e-06,
"loss": 2.9437,
"step": 1596000
},
{
"epoch": 13.08,
"learning_rate": 4.783738019694554e-06,
"loss": 2.9467,
"step": 1596500
},
{
"epoch": 13.08,
"learning_rate": 4.782094066974634e-06,
"loss": 2.9474,
"step": 1597000
},
{
"epoch": 13.09,
"learning_rate": 4.780450114254714e-06,
"loss": 2.945,
"step": 1597500
},
{
"epoch": 13.09,
"learning_rate": 4.778806161534795e-06,
"loss": 2.9562,
"step": 1598000
},
{
"epoch": 13.1,
"learning_rate": 4.777162208814875e-06,
"loss": 2.9497,
"step": 1598500
},
{
"epoch": 13.1,
"learning_rate": 4.775518256094955e-06,
"loss": 2.9503,
"step": 1599000
},
{
"epoch": 13.1,
"learning_rate": 4.7738743033750355e-06,
"loss": 2.9489,
"step": 1599500
},
{
"epoch": 13.11,
"learning_rate": 4.772230350655116e-06,
"loss": 2.9467,
"step": 1600000
},
{
"epoch": 13.11,
"learning_rate": 4.770586397935196e-06,
"loss": 2.9436,
"step": 1600500
},
{
"epoch": 13.12,
"learning_rate": 4.768942445215276e-06,
"loss": 2.9546,
"step": 1601000
},
{
"epoch": 13.12,
"learning_rate": 4.767298492495356e-06,
"loss": 2.9411,
"step": 1601500
},
{
"epoch": 13.12,
"learning_rate": 4.765654539775437e-06,
"loss": 2.9413,
"step": 1602000
},
{
"epoch": 13.13,
"learning_rate": 4.764010587055516e-06,
"loss": 2.9436,
"step": 1602500
},
{
"epoch": 13.13,
"learning_rate": 4.762366634335596e-06,
"loss": 2.9467,
"step": 1603000
},
{
"epoch": 13.14,
"learning_rate": 4.760722681615677e-06,
"loss": 2.9432,
"step": 1603500
},
{
"epoch": 13.14,
"learning_rate": 4.7590787288957575e-06,
"loss": 2.9429,
"step": 1604000
},
{
"epoch": 13.15,
"learning_rate": 4.757434776175838e-06,
"loss": 2.9474,
"step": 1604500
},
{
"epoch": 13.15,
"learning_rate": 4.755790823455918e-06,
"loss": 2.9524,
"step": 1605000
},
{
"epoch": 13.15,
"learning_rate": 4.754146870735998e-06,
"loss": 2.9471,
"step": 1605500
},
{
"epoch": 13.16,
"learning_rate": 4.752502918016078e-06,
"loss": 2.9486,
"step": 1606000
},
{
"epoch": 13.16,
"learning_rate": 4.750858965296158e-06,
"loss": 2.9405,
"step": 1606500
},
{
"epoch": 13.17,
"learning_rate": 4.749215012576239e-06,
"loss": 2.9477,
"step": 1607000
},
{
"epoch": 13.17,
"learning_rate": 4.747571059856319e-06,
"loss": 2.9465,
"step": 1607500
},
{
"epoch": 13.17,
"learning_rate": 4.745927107136399e-06,
"loss": 2.9527,
"step": 1608000
},
{
"epoch": 13.18,
"learning_rate": 4.7442831544164795e-06,
"loss": 2.947,
"step": 1608500
},
{
"epoch": 13.18,
"learning_rate": 4.74263920169656e-06,
"loss": 2.9366,
"step": 1609000
},
{
"epoch": 13.19,
"learning_rate": 4.74099524897664e-06,
"loss": 2.9444,
"step": 1609500
},
{
"epoch": 13.19,
"learning_rate": 4.73935129625672e-06,
"loss": 2.9487,
"step": 1610000
},
{
"epoch": 13.19,
"learning_rate": 4.737707343536801e-06,
"loss": 2.9506,
"step": 1610500
},
{
"epoch": 13.2,
"learning_rate": 4.736063390816881e-06,
"loss": 2.9445,
"step": 1611000
},
{
"epoch": 13.2,
"learning_rate": 4.73441943809696e-06,
"loss": 2.9486,
"step": 1611500
},
{
"epoch": 13.21,
"learning_rate": 4.732775485377041e-06,
"loss": 2.9435,
"step": 1612000
},
{
"epoch": 13.21,
"learning_rate": 4.731131532657121e-06,
"loss": 2.9416,
"step": 1612500
},
{
"epoch": 13.22,
"learning_rate": 4.7294875799372014e-06,
"loss": 2.9449,
"step": 1613000
},
{
"epoch": 13.22,
"learning_rate": 4.7278436272172816e-06,
"loss": 2.95,
"step": 1613500
},
{
"epoch": 13.22,
"learning_rate": 4.726199674497362e-06,
"loss": 2.9506,
"step": 1614000
},
{
"epoch": 13.23,
"learning_rate": 4.724555721777442e-06,
"loss": 2.9452,
"step": 1614500
},
{
"epoch": 13.23,
"learning_rate": 4.722911769057522e-06,
"loss": 2.9432,
"step": 1615000
},
{
"epoch": 13.24,
"learning_rate": 4.721267816337602e-06,
"loss": 2.9425,
"step": 1615500
},
{
"epoch": 13.24,
"learning_rate": 4.719623863617683e-06,
"loss": 2.9451,
"step": 1616000
},
{
"epoch": 13.24,
"learning_rate": 4.717979910897763e-06,
"loss": 2.9468,
"step": 1616500
},
{
"epoch": 13.25,
"learning_rate": 4.716335958177843e-06,
"loss": 2.9504,
"step": 1617000
},
{
"epoch": 13.25,
"learning_rate": 4.7146920054579234e-06,
"loss": 2.9464,
"step": 1617500
},
{
"epoch": 13.26,
"learning_rate": 4.7130480527380036e-06,
"loss": 2.9482,
"step": 1618000
},
{
"epoch": 13.26,
"learning_rate": 4.711404100018084e-06,
"loss": 2.9461,
"step": 1618500
},
{
"epoch": 13.26,
"learning_rate": 4.709760147298164e-06,
"loss": 2.9471,
"step": 1619000
},
{
"epoch": 13.27,
"learning_rate": 4.708116194578245e-06,
"loss": 2.9439,
"step": 1619500
},
{
"epoch": 13.27,
"learning_rate": 4.706472241858325e-06,
"loss": 2.9413,
"step": 1620000
},
{
"epoch": 13.27,
"eval_accuracy": 0.49669974826700375,
"eval_loss": 2.809884786605835,
"eval_runtime": 410.3222,
"eval_samples_per_second": 751.497,
"eval_steps_per_second": 15.658,
"step": 1620000
},
{
"epoch": 13.28,
"learning_rate": 4.704828289138404e-06,
"loss": 2.9414,
"step": 1620500
},
{
"epoch": 13.28,
"learning_rate": 4.703184336418485e-06,
"loss": 2.9411,
"step": 1621000
},
{
"epoch": 13.28,
"learning_rate": 4.701540383698565e-06,
"loss": 2.9508,
"step": 1621500
},
{
"epoch": 13.29,
"learning_rate": 4.699896430978645e-06,
"loss": 2.9504,
"step": 1622000
},
{
"epoch": 13.29,
"learning_rate": 4.6982524782587255e-06,
"loss": 2.9523,
"step": 1622500
},
{
"epoch": 13.3,
"learning_rate": 4.696608525538806e-06,
"loss": 2.9378,
"step": 1623000
},
{
"epoch": 13.3,
"learning_rate": 4.694964572818886e-06,
"loss": 2.9436,
"step": 1623500
},
{
"epoch": 13.31,
"learning_rate": 4.693320620098966e-06,
"loss": 2.9432,
"step": 1624000
},
{
"epoch": 13.31,
"learning_rate": 4.691676667379047e-06,
"loss": 2.9492,
"step": 1624500
},
{
"epoch": 13.31,
"learning_rate": 4.690032714659127e-06,
"loss": 2.9463,
"step": 1625000
},
{
"epoch": 13.32,
"learning_rate": 4.688388761939207e-06,
"loss": 2.9491,
"step": 1625500
},
{
"epoch": 13.32,
"learning_rate": 4.686744809219287e-06,
"loss": 2.9386,
"step": 1626000
},
{
"epoch": 13.33,
"learning_rate": 4.685100856499367e-06,
"loss": 2.9441,
"step": 1626500
},
{
"epoch": 13.33,
"learning_rate": 4.6834569037794475e-06,
"loss": 2.9448,
"step": 1627000
},
{
"epoch": 13.33,
"learning_rate": 4.681812951059528e-06,
"loss": 2.9372,
"step": 1627500
},
{
"epoch": 13.34,
"learning_rate": 4.680168998339608e-06,
"loss": 2.9463,
"step": 1628000
},
{
"epoch": 13.34,
"learning_rate": 4.678525045619689e-06,
"loss": 2.9438,
"step": 1628500
},
{
"epoch": 13.35,
"learning_rate": 4.676881092899769e-06,
"loss": 2.9344,
"step": 1629000
},
{
"epoch": 13.35,
"learning_rate": 4.675237140179849e-06,
"loss": 2.9421,
"step": 1629500
},
{
"epoch": 13.35,
"learning_rate": 4.673593187459929e-06,
"loss": 2.9386,
"step": 1630000
},
{
"epoch": 13.36,
"learning_rate": 4.671949234740009e-06,
"loss": 2.9458,
"step": 1630500
},
{
"epoch": 13.36,
"learning_rate": 4.670305282020089e-06,
"loss": 2.9415,
"step": 1631000
},
{
"epoch": 13.37,
"learning_rate": 4.6686613293001695e-06,
"loss": 2.9457,
"step": 1631500
},
{
"epoch": 13.37,
"learning_rate": 4.6670173765802505e-06,
"loss": 2.9435,
"step": 1632000
},
{
"epoch": 13.37,
"learning_rate": 4.665373423860331e-06,
"loss": 2.9474,
"step": 1632500
},
{
"epoch": 13.38,
"learning_rate": 4.66372947114041e-06,
"loss": 2.9461,
"step": 1633000
},
{
"epoch": 13.38,
"learning_rate": 4.662085518420491e-06,
"loss": 2.9435,
"step": 1633500
},
{
"epoch": 13.39,
"learning_rate": 4.660441565700571e-06,
"loss": 2.9385,
"step": 1634000
},
{
"epoch": 13.39,
"learning_rate": 4.658797612980651e-06,
"loss": 2.9458,
"step": 1634500
},
{
"epoch": 13.4,
"learning_rate": 4.657153660260731e-06,
"loss": 2.9402,
"step": 1635000
},
{
"epoch": 13.4,
"learning_rate": 4.655509707540811e-06,
"loss": 2.9385,
"step": 1635500
},
{
"epoch": 13.4,
"learning_rate": 4.6538657548208915e-06,
"loss": 2.9452,
"step": 1636000
},
{
"epoch": 13.41,
"learning_rate": 4.652221802100972e-06,
"loss": 2.9495,
"step": 1636500
},
{
"epoch": 13.41,
"learning_rate": 4.650577849381052e-06,
"loss": 2.9466,
"step": 1637000
},
{
"epoch": 13.42,
"learning_rate": 4.648933896661133e-06,
"loss": 2.9438,
"step": 1637500
},
{
"epoch": 13.42,
"learning_rate": 4.647289943941213e-06,
"loss": 2.9452,
"step": 1638000
},
{
"epoch": 13.42,
"learning_rate": 4.645645991221293e-06,
"loss": 2.9386,
"step": 1638500
},
{
"epoch": 13.43,
"learning_rate": 4.644002038501373e-06,
"loss": 2.9393,
"step": 1639000
},
{
"epoch": 13.43,
"learning_rate": 4.642358085781453e-06,
"loss": 2.9346,
"step": 1639500
},
{
"epoch": 13.44,
"learning_rate": 4.640714133061533e-06,
"loss": 2.9463,
"step": 1640000
},
{
"epoch": 13.44,
"learning_rate": 4.6390701803416135e-06,
"loss": 2.9433,
"step": 1640500
},
{
"epoch": 13.44,
"learning_rate": 4.6374262276216945e-06,
"loss": 2.9408,
"step": 1641000
},
{
"epoch": 13.45,
"learning_rate": 4.635782274901775e-06,
"loss": 2.9425,
"step": 1641500
},
{
"epoch": 13.45,
"learning_rate": 4.634138322181854e-06,
"loss": 2.9493,
"step": 1642000
},
{
"epoch": 13.46,
"learning_rate": 4.632494369461935e-06,
"loss": 2.9481,
"step": 1642500
},
{
"epoch": 13.46,
"learning_rate": 4.630850416742015e-06,
"loss": 2.9438,
"step": 1643000
},
{
"epoch": 13.46,
"learning_rate": 4.629206464022095e-06,
"loss": 2.9436,
"step": 1643500
},
{
"epoch": 13.47,
"learning_rate": 4.627562511302175e-06,
"loss": 2.9404,
"step": 1644000
},
{
"epoch": 13.47,
"learning_rate": 4.625918558582256e-06,
"loss": 2.9467,
"step": 1644500
},
{
"epoch": 13.48,
"learning_rate": 4.6242746058623355e-06,
"loss": 2.9504,
"step": 1645000
},
{
"epoch": 13.48,
"learning_rate": 4.622630653142416e-06,
"loss": 2.947,
"step": 1645500
},
{
"epoch": 13.49,
"learning_rate": 4.6209867004224966e-06,
"loss": 2.9411,
"step": 1646000
},
{
"epoch": 13.49,
"learning_rate": 4.619342747702577e-06,
"loss": 2.9402,
"step": 1646500
},
{
"epoch": 13.49,
"learning_rate": 4.617698794982657e-06,
"loss": 2.9397,
"step": 1647000
},
{
"epoch": 13.5,
"learning_rate": 4.616054842262737e-06,
"loss": 2.9406,
"step": 1647500
},
{
"epoch": 13.5,
"learning_rate": 4.614410889542817e-06,
"loss": 2.9432,
"step": 1648000
},
{
"epoch": 13.51,
"learning_rate": 4.612766936822897e-06,
"loss": 2.9428,
"step": 1648500
},
{
"epoch": 13.51,
"learning_rate": 4.611122984102977e-06,
"loss": 2.9471,
"step": 1649000
},
{
"epoch": 13.51,
"learning_rate": 4.6094790313830575e-06,
"loss": 2.944,
"step": 1649500
},
{
"epoch": 13.52,
"learning_rate": 4.6078350786631384e-06,
"loss": 2.9381,
"step": 1650000
},
{
"epoch": 13.52,
"eval_accuracy": 0.49680267143539775,
"eval_loss": 2.8081107139587402,
"eval_runtime": 403.5601,
"eval_samples_per_second": 764.089,
"eval_steps_per_second": 15.921,
"step": 1650000
},
{
"epoch": 13.52,
"learning_rate": 4.6061911259432186e-06,
"loss": 2.9407,
"step": 1650500
},
{
"epoch": 13.53,
"learning_rate": 4.604547173223298e-06,
"loss": 2.9429,
"step": 1651000
},
{
"epoch": 13.53,
"learning_rate": 4.602903220503379e-06,
"loss": 2.9429,
"step": 1651500
},
{
"epoch": 13.53,
"learning_rate": 4.601259267783459e-06,
"loss": 2.9433,
"step": 1652000
},
{
"epoch": 13.54,
"learning_rate": 4.599615315063539e-06,
"loss": 2.9313,
"step": 1652500
},
{
"epoch": 13.54,
"learning_rate": 4.597971362343619e-06,
"loss": 2.942,
"step": 1653000
},
{
"epoch": 13.55,
"learning_rate": 4.5963274096237e-06,
"loss": 2.9498,
"step": 1653500
},
{
"epoch": 13.55,
"learning_rate": 4.5946834569037794e-06,
"loss": 2.9326,
"step": 1654000
},
{
"epoch": 13.56,
"learning_rate": 4.5930395041838596e-06,
"loss": 2.9415,
"step": 1654500
},
{
"epoch": 13.56,
"learning_rate": 4.5913955514639405e-06,
"loss": 2.9337,
"step": 1655000
},
{
"epoch": 13.56,
"learning_rate": 4.589751598744021e-06,
"loss": 2.9363,
"step": 1655500
},
{
"epoch": 13.57,
"learning_rate": 4.588107646024101e-06,
"loss": 2.943,
"step": 1656000
},
{
"epoch": 13.57,
"learning_rate": 4.586463693304181e-06,
"loss": 2.9443,
"step": 1656500
},
{
"epoch": 13.58,
"learning_rate": 4.584819740584261e-06,
"loss": 2.9424,
"step": 1657000
},
{
"epoch": 13.58,
"learning_rate": 4.583175787864341e-06,
"loss": 2.9449,
"step": 1657500
},
{
"epoch": 13.58,
"learning_rate": 4.581531835144421e-06,
"loss": 2.9351,
"step": 1658000
},
{
"epoch": 13.59,
"learning_rate": 4.579887882424502e-06,
"loss": 2.9457,
"step": 1658500
},
{
"epoch": 13.59,
"learning_rate": 4.578243929704582e-06,
"loss": 2.9411,
"step": 1659000
},
{
"epoch": 13.6,
"learning_rate": 4.5765999769846625e-06,
"loss": 2.9469,
"step": 1659500
},
{
"epoch": 13.6,
"learning_rate": 4.574956024264743e-06,
"loss": 2.9427,
"step": 1660000
},
{
"epoch": 13.6,
"learning_rate": 4.573312071544823e-06,
"loss": 2.9381,
"step": 1660500
},
{
"epoch": 13.61,
"learning_rate": 4.571668118824903e-06,
"loss": 2.9367,
"step": 1661000
},
{
"epoch": 13.61,
"learning_rate": 4.570024166104983e-06,
"loss": 2.9354,
"step": 1661500
},
{
"epoch": 13.62,
"learning_rate": 4.568380213385063e-06,
"loss": 2.9368,
"step": 1662000
},
{
"epoch": 13.62,
"learning_rate": 4.566736260665144e-06,
"loss": 2.948,
"step": 1662500
},
{
"epoch": 13.62,
"learning_rate": 4.565092307945223e-06,
"loss": 2.9395,
"step": 1663000
},
{
"epoch": 13.63,
"learning_rate": 4.5634483552253035e-06,
"loss": 2.942,
"step": 1663500
},
{
"epoch": 13.63,
"learning_rate": 4.5618044025053845e-06,
"loss": 2.933,
"step": 1664000
},
{
"epoch": 13.64,
"learning_rate": 4.560160449785465e-06,
"loss": 2.9397,
"step": 1664500
},
{
"epoch": 13.64,
"learning_rate": 4.558516497065545e-06,
"loss": 2.9435,
"step": 1665000
},
{
"epoch": 13.65,
"learning_rate": 4.556872544345625e-06,
"loss": 2.9398,
"step": 1665500
},
{
"epoch": 13.65,
"learning_rate": 4.555228591625705e-06,
"loss": 2.9433,
"step": 1666000
},
{
"epoch": 13.65,
"learning_rate": 4.553584638905785e-06,
"loss": 2.9367,
"step": 1666500
},
{
"epoch": 13.66,
"learning_rate": 4.551940686185865e-06,
"loss": 2.9471,
"step": 1667000
},
{
"epoch": 13.66,
"learning_rate": 4.550296733465946e-06,
"loss": 2.9446,
"step": 1667500
},
{
"epoch": 13.67,
"learning_rate": 4.548652780746026e-06,
"loss": 2.9373,
"step": 1668000
},
{
"epoch": 13.67,
"learning_rate": 4.5470088280261065e-06,
"loss": 2.9354,
"step": 1668500
},
{
"epoch": 13.67,
"learning_rate": 4.545364875306187e-06,
"loss": 2.9431,
"step": 1669000
},
{
"epoch": 13.68,
"learning_rate": 4.543720922586267e-06,
"loss": 2.9444,
"step": 1669500
},
{
"epoch": 13.68,
"learning_rate": 4.542076969866347e-06,
"loss": 2.9413,
"step": 1670000
},
{
"epoch": 13.69,
"learning_rate": 4.540433017146427e-06,
"loss": 2.9407,
"step": 1670500
},
{
"epoch": 13.69,
"learning_rate": 4.538789064426508e-06,
"loss": 2.9372,
"step": 1671000
},
{
"epoch": 13.69,
"learning_rate": 4.537145111706588e-06,
"loss": 2.9418,
"step": 1671500
},
{
"epoch": 13.7,
"learning_rate": 4.535501158986667e-06,
"loss": 2.9401,
"step": 1672000
},
{
"epoch": 13.7,
"learning_rate": 4.533857206266748e-06,
"loss": 2.9391,
"step": 1672500
},
{
"epoch": 13.71,
"learning_rate": 4.5322132535468285e-06,
"loss": 2.9417,
"step": 1673000
},
{
"epoch": 13.71,
"learning_rate": 4.530569300826909e-06,
"loss": 2.9413,
"step": 1673500
},
{
"epoch": 13.71,
"learning_rate": 4.528925348106989e-06,
"loss": 2.9382,
"step": 1674000
},
{
"epoch": 13.72,
"learning_rate": 4.527281395387069e-06,
"loss": 2.942,
"step": 1674500
},
{
"epoch": 13.72,
"learning_rate": 4.525637442667149e-06,
"loss": 2.9443,
"step": 1675000
},
{
"epoch": 13.73,
"learning_rate": 4.523993489947229e-06,
"loss": 2.9417,
"step": 1675500
},
{
"epoch": 13.73,
"learning_rate": 4.522349537227309e-06,
"loss": 2.9398,
"step": 1676000
},
{
"epoch": 13.74,
"learning_rate": 4.52070558450739e-06,
"loss": 2.943,
"step": 1676500
},
{
"epoch": 13.74,
"learning_rate": 4.51906163178747e-06,
"loss": 2.9367,
"step": 1677000
},
{
"epoch": 13.74,
"learning_rate": 4.5174176790675505e-06,
"loss": 2.9447,
"step": 1677500
},
{
"epoch": 13.75,
"learning_rate": 4.515773726347631e-06,
"loss": 2.9327,
"step": 1678000
},
{
"epoch": 13.75,
"learning_rate": 4.514129773627711e-06,
"loss": 2.9379,
"step": 1678500
},
{
"epoch": 13.76,
"learning_rate": 4.512485820907791e-06,
"loss": 2.9412,
"step": 1679000
},
{
"epoch": 13.76,
"learning_rate": 4.510841868187871e-06,
"loss": 2.9383,
"step": 1679500
},
{
"epoch": 13.76,
"learning_rate": 4.509197915467952e-06,
"loss": 2.9389,
"step": 1680000
},
{
"epoch": 13.76,
"eval_accuracy": 0.4972946717627968,
"eval_loss": 2.80572772026062,
"eval_runtime": 409.9534,
"eval_samples_per_second": 752.173,
"eval_steps_per_second": 15.673,
"step": 1680000
},
{
"epoch": 13.77,
"learning_rate": 4.507553962748032e-06,
"loss": 2.9444,
"step": 1680500
},
{
"epoch": 13.77,
"learning_rate": 4.505910010028112e-06,
"loss": 2.9413,
"step": 1681000
},
{
"epoch": 13.78,
"learning_rate": 4.504266057308192e-06,
"loss": 2.949,
"step": 1681500
},
{
"epoch": 13.78,
"learning_rate": 4.5026221045882725e-06,
"loss": 2.9386,
"step": 1682000
},
{
"epoch": 13.78,
"learning_rate": 4.500978151868353e-06,
"loss": 2.9448,
"step": 1682500
},
{
"epoch": 13.79,
"learning_rate": 4.499334199148433e-06,
"loss": 2.934,
"step": 1683000
},
{
"epoch": 13.79,
"learning_rate": 4.497690246428514e-06,
"loss": 2.9403,
"step": 1683500
},
{
"epoch": 13.8,
"learning_rate": 4.496046293708594e-06,
"loss": 2.9464,
"step": 1684000
},
{
"epoch": 13.8,
"learning_rate": 4.494402340988673e-06,
"loss": 2.9362,
"step": 1684500
},
{
"epoch": 13.8,
"learning_rate": 4.492758388268754e-06,
"loss": 2.9427,
"step": 1685000
},
{
"epoch": 13.81,
"learning_rate": 4.491114435548834e-06,
"loss": 2.939,
"step": 1685500
},
{
"epoch": 13.81,
"learning_rate": 4.489470482828914e-06,
"loss": 2.9373,
"step": 1686000
},
{
"epoch": 13.82,
"learning_rate": 4.4878265301089944e-06,
"loss": 2.941,
"step": 1686500
},
{
"epoch": 13.82,
"learning_rate": 4.4861825773890746e-06,
"loss": 2.9358,
"step": 1687000
},
{
"epoch": 13.83,
"learning_rate": 4.484538624669155e-06,
"loss": 2.9371,
"step": 1687500
},
{
"epoch": 13.83,
"learning_rate": 4.482894671949235e-06,
"loss": 2.9382,
"step": 1688000
},
{
"epoch": 13.83,
"learning_rate": 4.481250719229315e-06,
"loss": 2.931,
"step": 1688500
},
{
"epoch": 13.84,
"learning_rate": 4.479606766509396e-06,
"loss": 2.9357,
"step": 1689000
},
{
"epoch": 13.84,
"learning_rate": 4.477962813789476e-06,
"loss": 2.9389,
"step": 1689500
},
{
"epoch": 13.85,
"learning_rate": 4.476318861069556e-06,
"loss": 2.9362,
"step": 1690000
},
{
"epoch": 13.85,
"learning_rate": 4.474674908349636e-06,
"loss": 2.9395,
"step": 1690500
},
{
"epoch": 13.85,
"learning_rate": 4.473030955629716e-06,
"loss": 2.9368,
"step": 1691000
},
{
"epoch": 13.86,
"learning_rate": 4.4713870029097965e-06,
"loss": 2.9447,
"step": 1691500
},
{
"epoch": 13.86,
"learning_rate": 4.469743050189877e-06,
"loss": 2.9401,
"step": 1692000
},
{
"epoch": 13.87,
"learning_rate": 4.468099097469958e-06,
"loss": 2.9357,
"step": 1692500
},
{
"epoch": 13.87,
"learning_rate": 4.466455144750038e-06,
"loss": 2.9433,
"step": 1693000
},
{
"epoch": 13.87,
"learning_rate": 4.464811192030117e-06,
"loss": 2.9427,
"step": 1693500
},
{
"epoch": 13.88,
"learning_rate": 4.463167239310198e-06,
"loss": 2.9336,
"step": 1694000
},
{
"epoch": 13.88,
"learning_rate": 4.461523286590278e-06,
"loss": 2.9388,
"step": 1694500
},
{
"epoch": 13.89,
"learning_rate": 4.459879333870358e-06,
"loss": 2.9386,
"step": 1695000
},
{
"epoch": 13.89,
"learning_rate": 4.458235381150438e-06,
"loss": 2.9384,
"step": 1695500
},
{
"epoch": 13.9,
"learning_rate": 4.4565914284305185e-06,
"loss": 2.9361,
"step": 1696000
},
{
"epoch": 13.9,
"learning_rate": 4.454947475710599e-06,
"loss": 2.9363,
"step": 1696500
},
{
"epoch": 13.9,
"learning_rate": 4.453303522990679e-06,
"loss": 2.9361,
"step": 1697000
},
{
"epoch": 13.91,
"learning_rate": 4.45165957027076e-06,
"loss": 2.9355,
"step": 1697500
},
{
"epoch": 13.91,
"learning_rate": 4.45001561755084e-06,
"loss": 2.9356,
"step": 1698000
},
{
"epoch": 13.92,
"learning_rate": 4.44837166483092e-06,
"loss": 2.9391,
"step": 1698500
},
{
"epoch": 13.92,
"learning_rate": 4.446727712111e-06,
"loss": 2.9384,
"step": 1699000
},
{
"epoch": 13.92,
"learning_rate": 4.44508375939108e-06,
"loss": 2.9367,
"step": 1699500
},
{
"epoch": 13.93,
"learning_rate": 4.44343980667116e-06,
"loss": 2.9446,
"step": 1700000
},
{
"epoch": 13.93,
"learning_rate": 4.4417958539512405e-06,
"loss": 2.9439,
"step": 1700500
},
{
"epoch": 13.94,
"learning_rate": 4.440151901231321e-06,
"loss": 2.9381,
"step": 1701000
},
{
"epoch": 13.94,
"learning_rate": 4.438507948511402e-06,
"loss": 2.9345,
"step": 1701500
},
{
"epoch": 13.94,
"learning_rate": 4.436863995791482e-06,
"loss": 2.9391,
"step": 1702000
},
{
"epoch": 13.95,
"learning_rate": 4.435220043071561e-06,
"loss": 2.9316,
"step": 1702500
},
{
"epoch": 13.95,
"learning_rate": 4.433576090351642e-06,
"loss": 2.9351,
"step": 1703000
},
{
"epoch": 13.96,
"learning_rate": 4.431932137631722e-06,
"loss": 2.932,
"step": 1703500
},
{
"epoch": 13.96,
"learning_rate": 4.430288184911802e-06,
"loss": 2.938,
"step": 1704000
},
{
"epoch": 13.96,
"learning_rate": 4.428644232191882e-06,
"loss": 2.9325,
"step": 1704500
},
{
"epoch": 13.97,
"learning_rate": 4.427000279471963e-06,
"loss": 2.9388,
"step": 1705000
},
{
"epoch": 13.97,
"learning_rate": 4.425356326752043e-06,
"loss": 2.9338,
"step": 1705500
},
{
"epoch": 13.98,
"learning_rate": 4.423712374032123e-06,
"loss": 2.9372,
"step": 1706000
},
{
"epoch": 13.98,
"learning_rate": 4.422068421312204e-06,
"loss": 2.9421,
"step": 1706500
},
{
"epoch": 13.99,
"learning_rate": 4.420424468592284e-06,
"loss": 2.9381,
"step": 1707000
},
{
"epoch": 13.99,
"learning_rate": 4.418780515872364e-06,
"loss": 2.9352,
"step": 1707500
},
{
"epoch": 13.99,
"learning_rate": 4.417136563152444e-06,
"loss": 2.9376,
"step": 1708000
},
{
"epoch": 14.0,
"learning_rate": 4.415492610432524e-06,
"loss": 2.9373,
"step": 1708500
},
{
"epoch": 14.0,
"learning_rate": 4.413848657712604e-06,
"loss": 2.9386,
"step": 1709000
},
{
"epoch": 14.01,
"learning_rate": 4.4122047049926845e-06,
"loss": 2.9335,
"step": 1709500
},
{
"epoch": 14.01,
"learning_rate": 4.410560752272765e-06,
"loss": 2.9374,
"step": 1710000
},
{
"epoch": 14.01,
"eval_accuracy": 0.4976944853269889,
"eval_loss": 2.8028008937835693,
"eval_runtime": 411.9878,
"eval_samples_per_second": 748.459,
"eval_steps_per_second": 15.595,
"step": 1710000
},
{
"epoch": 14.01,
"learning_rate": 4.408916799552846e-06,
"loss": 2.9313,
"step": 1710500
},
{
"epoch": 14.02,
"learning_rate": 4.407272846832926e-06,
"loss": 2.9383,
"step": 1711000
},
{
"epoch": 14.02,
"learning_rate": 4.405628894113006e-06,
"loss": 2.939,
"step": 1711500
},
{
"epoch": 14.03,
"learning_rate": 4.403984941393086e-06,
"loss": 2.9374,
"step": 1712000
},
{
"epoch": 14.03,
"learning_rate": 4.402340988673166e-06,
"loss": 2.9351,
"step": 1712500
},
{
"epoch": 14.03,
"learning_rate": 4.400697035953246e-06,
"loss": 2.9335,
"step": 1713000
},
{
"epoch": 14.04,
"learning_rate": 4.399053083233326e-06,
"loss": 2.9393,
"step": 1713500
},
{
"epoch": 14.04,
"learning_rate": 4.397409130513407e-06,
"loss": 2.9374,
"step": 1714000
},
{
"epoch": 14.05,
"learning_rate": 4.395765177793487e-06,
"loss": 2.9355,
"step": 1714500
},
{
"epoch": 14.05,
"learning_rate": 4.394121225073567e-06,
"loss": 2.9253,
"step": 1715000
},
{
"epoch": 14.05,
"learning_rate": 4.392477272353648e-06,
"loss": 2.9334,
"step": 1715500
},
{
"epoch": 14.06,
"learning_rate": 4.390833319633728e-06,
"loss": 2.9287,
"step": 1716000
},
{
"epoch": 14.06,
"learning_rate": 4.389189366913808e-06,
"loss": 2.9363,
"step": 1716500
},
{
"epoch": 14.07,
"learning_rate": 4.387545414193888e-06,
"loss": 2.9334,
"step": 1717000
},
{
"epoch": 14.07,
"learning_rate": 4.385901461473968e-06,
"loss": 2.9396,
"step": 1717500
},
{
"epoch": 14.08,
"learning_rate": 4.384257508754048e-06,
"loss": 2.9314,
"step": 1718000
},
{
"epoch": 14.08,
"learning_rate": 4.3826135560341285e-06,
"loss": 2.9284,
"step": 1718500
},
{
"epoch": 14.08,
"learning_rate": 4.3809696033142094e-06,
"loss": 2.9397,
"step": 1719000
},
{
"epoch": 14.09,
"learning_rate": 4.3793256505942896e-06,
"loss": 2.9268,
"step": 1719500
},
{
"epoch": 14.09,
"learning_rate": 4.37768169787437e-06,
"loss": 2.9344,
"step": 1720000
},
{
"epoch": 14.1,
"learning_rate": 4.37603774515445e-06,
"loss": 2.9336,
"step": 1720500
},
{
"epoch": 14.1,
"learning_rate": 4.37439379243453e-06,
"loss": 2.9399,
"step": 1721000
},
{
"epoch": 14.1,
"learning_rate": 4.37274983971461e-06,
"loss": 2.9335,
"step": 1721500
},
{
"epoch": 14.11,
"learning_rate": 4.37110588699469e-06,
"loss": 2.9299,
"step": 1722000
},
{
"epoch": 14.11,
"learning_rate": 4.36946193427477e-06,
"loss": 2.9353,
"step": 1722500
},
{
"epoch": 14.12,
"learning_rate": 4.367817981554851e-06,
"loss": 2.9282,
"step": 1723000
},
{
"epoch": 14.12,
"learning_rate": 4.3661740288349306e-06,
"loss": 2.9343,
"step": 1723500
},
{
"epoch": 14.12,
"learning_rate": 4.364530076115011e-06,
"loss": 2.9299,
"step": 1724000
},
{
"epoch": 14.13,
"learning_rate": 4.362886123395092e-06,
"loss": 2.9317,
"step": 1724500
},
{
"epoch": 14.13,
"learning_rate": 4.361242170675172e-06,
"loss": 2.9287,
"step": 1725000
},
{
"epoch": 14.14,
"learning_rate": 4.359598217955252e-06,
"loss": 2.9372,
"step": 1725500
},
{
"epoch": 14.14,
"learning_rate": 4.357954265235332e-06,
"loss": 2.9415,
"step": 1726000
},
{
"epoch": 14.14,
"learning_rate": 4.356310312515412e-06,
"loss": 2.9302,
"step": 1726500
},
{
"epoch": 14.15,
"learning_rate": 4.354666359795492e-06,
"loss": 2.9365,
"step": 1727000
},
{
"epoch": 14.15,
"learning_rate": 4.3530224070755724e-06,
"loss": 2.9399,
"step": 1727500
},
{
"epoch": 14.16,
"learning_rate": 4.351378454355653e-06,
"loss": 2.934,
"step": 1728000
},
{
"epoch": 14.16,
"learning_rate": 4.3497345016357335e-06,
"loss": 2.9384,
"step": 1728500
},
{
"epoch": 14.17,
"learning_rate": 4.348090548915814e-06,
"loss": 2.9384,
"step": 1729000
},
{
"epoch": 14.17,
"learning_rate": 4.346446596195894e-06,
"loss": 2.9356,
"step": 1729500
},
{
"epoch": 14.17,
"learning_rate": 4.344802643475974e-06,
"loss": 2.9365,
"step": 1730000
},
{
"epoch": 14.18,
"learning_rate": 4.343158690756054e-06,
"loss": 2.941,
"step": 1730500
},
{
"epoch": 14.18,
"learning_rate": 4.341514738036134e-06,
"loss": 2.9355,
"step": 1731000
},
{
"epoch": 14.19,
"learning_rate": 4.339870785316215e-06,
"loss": 2.9348,
"step": 1731500
},
{
"epoch": 14.19,
"learning_rate": 4.338226832596295e-06,
"loss": 2.9331,
"step": 1732000
},
{
"epoch": 14.19,
"learning_rate": 4.336582879876375e-06,
"loss": 2.9319,
"step": 1732500
},
{
"epoch": 14.2,
"learning_rate": 4.3349389271564555e-06,
"loss": 2.9369,
"step": 1733000
},
{
"epoch": 14.2,
"learning_rate": 4.333294974436536e-06,
"loss": 2.9361,
"step": 1733500
},
{
"epoch": 14.21,
"learning_rate": 4.331651021716616e-06,
"loss": 2.9339,
"step": 1734000
},
{
"epoch": 14.21,
"learning_rate": 4.330007068996696e-06,
"loss": 2.9307,
"step": 1734500
},
{
"epoch": 14.21,
"learning_rate": 4.328363116276776e-06,
"loss": 2.9329,
"step": 1735000
},
{
"epoch": 14.22,
"learning_rate": 4.326719163556857e-06,
"loss": 2.9387,
"step": 1735500
},
{
"epoch": 14.22,
"learning_rate": 4.325075210836936e-06,
"loss": 2.9352,
"step": 1736000
},
{
"epoch": 14.23,
"learning_rate": 4.323431258117016e-06,
"loss": 2.9308,
"step": 1736500
},
{
"epoch": 14.23,
"learning_rate": 4.321787305397097e-06,
"loss": 2.9319,
"step": 1737000
},
{
"epoch": 14.24,
"learning_rate": 4.3201433526771775e-06,
"loss": 2.9321,
"step": 1737500
},
{
"epoch": 14.24,
"learning_rate": 4.318499399957258e-06,
"loss": 2.9276,
"step": 1738000
},
{
"epoch": 14.24,
"learning_rate": 4.316855447237338e-06,
"loss": 2.933,
"step": 1738500
},
{
"epoch": 14.25,
"learning_rate": 4.315211494517418e-06,
"loss": 2.936,
"step": 1739000
},
{
"epoch": 14.25,
"learning_rate": 4.313567541797498e-06,
"loss": 2.9336,
"step": 1739500
},
{
"epoch": 14.26,
"learning_rate": 4.311923589077578e-06,
"loss": 2.9341,
"step": 1740000
},
{
"epoch": 14.26,
"eval_accuracy": 0.4978347321882909,
"eval_loss": 2.8000051975250244,
"eval_runtime": 409.1394,
"eval_samples_per_second": 753.67,
"eval_steps_per_second": 15.704,
"step": 1740000
},
{
"epoch": 14.26,
"learning_rate": 4.310279636357659e-06,
"loss": 2.9279,
"step": 1740500
},
{
"epoch": 14.26,
"learning_rate": 4.308635683637739e-06,
"loss": 2.9396,
"step": 1741000
},
{
"epoch": 14.27,
"learning_rate": 4.306991730917819e-06,
"loss": 2.9339,
"step": 1741500
},
{
"epoch": 14.27,
"learning_rate": 4.3053477781978995e-06,
"loss": 2.9332,
"step": 1742000
},
{
"epoch": 14.28,
"learning_rate": 4.30370382547798e-06,
"loss": 2.9357,
"step": 1742500
},
{
"epoch": 14.28,
"learning_rate": 4.30205987275806e-06,
"loss": 2.9298,
"step": 1743000
},
{
"epoch": 14.28,
"learning_rate": 4.30041592003814e-06,
"loss": 2.936,
"step": 1743500
},
{
"epoch": 14.29,
"learning_rate": 4.298771967318221e-06,
"loss": 2.9274,
"step": 1744000
},
{
"epoch": 14.29,
"learning_rate": 4.297128014598301e-06,
"loss": 2.9341,
"step": 1744500
},
{
"epoch": 14.3,
"learning_rate": 4.29548406187838e-06,
"loss": 2.9329,
"step": 1745000
},
{
"epoch": 14.3,
"learning_rate": 4.293840109158461e-06,
"loss": 2.9282,
"step": 1745500
},
{
"epoch": 14.3,
"learning_rate": 4.292196156438541e-06,
"loss": 2.9359,
"step": 1746000
},
{
"epoch": 14.31,
"learning_rate": 4.2905522037186215e-06,
"loss": 2.9311,
"step": 1746500
},
{
"epoch": 14.31,
"learning_rate": 4.288908250998702e-06,
"loss": 2.9301,
"step": 1747000
},
{
"epoch": 14.32,
"learning_rate": 4.287264298278782e-06,
"loss": 2.9277,
"step": 1747500
},
{
"epoch": 14.32,
"learning_rate": 4.285620345558862e-06,
"loss": 2.9333,
"step": 1748000
},
{
"epoch": 14.33,
"learning_rate": 4.283976392838942e-06,
"loss": 2.9305,
"step": 1748500
},
{
"epoch": 14.33,
"learning_rate": 4.282332440119022e-06,
"loss": 2.9342,
"step": 1749000
},
{
"epoch": 14.33,
"learning_rate": 4.280688487399103e-06,
"loss": 2.9321,
"step": 1749500
},
{
"epoch": 14.34,
"learning_rate": 4.279044534679183e-06,
"loss": 2.9341,
"step": 1750000
},
{
"epoch": 14.34,
"learning_rate": 4.277400581959263e-06,
"loss": 2.9312,
"step": 1750500
},
{
"epoch": 14.35,
"learning_rate": 4.2757566292393435e-06,
"loss": 2.9252,
"step": 1751000
},
{
"epoch": 14.35,
"learning_rate": 4.274112676519424e-06,
"loss": 2.9335,
"step": 1751500
},
{
"epoch": 14.35,
"learning_rate": 4.272468723799504e-06,
"loss": 2.9248,
"step": 1752000
},
{
"epoch": 14.36,
"learning_rate": 4.270824771079584e-06,
"loss": 2.9322,
"step": 1752500
},
{
"epoch": 14.36,
"learning_rate": 4.269180818359665e-06,
"loss": 2.9343,
"step": 1753000
},
{
"epoch": 14.37,
"learning_rate": 4.267536865639745e-06,
"loss": 2.9346,
"step": 1753500
},
{
"epoch": 14.37,
"learning_rate": 4.265892912919824e-06,
"loss": 2.9312,
"step": 1754000
},
{
"epoch": 14.37,
"learning_rate": 4.264248960199905e-06,
"loss": 2.9274,
"step": 1754500
},
{
"epoch": 14.38,
"learning_rate": 4.262605007479985e-06,
"loss": 2.9308,
"step": 1755000
},
{
"epoch": 14.38,
"learning_rate": 4.2609610547600654e-06,
"loss": 2.9315,
"step": 1755500
},
{
"epoch": 14.39,
"learning_rate": 4.2593171020401456e-06,
"loss": 2.929,
"step": 1756000
},
{
"epoch": 14.39,
"learning_rate": 4.2576731493202265e-06,
"loss": 2.9362,
"step": 1756500
},
{
"epoch": 14.39,
"learning_rate": 4.256029196600306e-06,
"loss": 2.9238,
"step": 1757000
},
{
"epoch": 14.4,
"learning_rate": 4.254385243880386e-06,
"loss": 2.9318,
"step": 1757500
},
{
"epoch": 14.4,
"learning_rate": 4.252741291160467e-06,
"loss": 2.935,
"step": 1758000
},
{
"epoch": 14.41,
"learning_rate": 4.251097338440547e-06,
"loss": 2.9325,
"step": 1758500
},
{
"epoch": 14.41,
"learning_rate": 4.249453385720627e-06,
"loss": 2.9388,
"step": 1759000
},
{
"epoch": 14.42,
"learning_rate": 4.247809433000707e-06,
"loss": 2.9329,
"step": 1759500
},
{
"epoch": 14.42,
"learning_rate": 4.2461654802807874e-06,
"loss": 2.9317,
"step": 1760000
},
{
"epoch": 14.42,
"learning_rate": 4.2445215275608676e-06,
"loss": 2.9316,
"step": 1760500
},
{
"epoch": 14.43,
"learning_rate": 4.242877574840948e-06,
"loss": 2.9311,
"step": 1761000
},
{
"epoch": 14.43,
"learning_rate": 4.241233622121028e-06,
"loss": 2.932,
"step": 1761500
},
{
"epoch": 14.44,
"learning_rate": 4.239589669401109e-06,
"loss": 2.9319,
"step": 1762000
},
{
"epoch": 14.44,
"learning_rate": 4.237945716681189e-06,
"loss": 2.9314,
"step": 1762500
},
{
"epoch": 14.44,
"learning_rate": 4.236301763961268e-06,
"loss": 2.934,
"step": 1763000
},
{
"epoch": 14.45,
"learning_rate": 4.234657811241349e-06,
"loss": 2.9321,
"step": 1763500
},
{
"epoch": 14.45,
"learning_rate": 4.233013858521429e-06,
"loss": 2.936,
"step": 1764000
},
{
"epoch": 14.46,
"learning_rate": 4.231369905801509e-06,
"loss": 2.9388,
"step": 1764500
},
{
"epoch": 14.46,
"learning_rate": 4.2297259530815895e-06,
"loss": 2.925,
"step": 1765000
},
{
"epoch": 14.46,
"learning_rate": 4.2280820003616705e-06,
"loss": 2.9331,
"step": 1765500
},
{
"epoch": 14.47,
"learning_rate": 4.22643804764175e-06,
"loss": 2.9265,
"step": 1766000
},
{
"epoch": 14.47,
"learning_rate": 4.22479409492183e-06,
"loss": 2.9322,
"step": 1766500
},
{
"epoch": 14.48,
"learning_rate": 4.223150142201911e-06,
"loss": 2.9266,
"step": 1767000
},
{
"epoch": 14.48,
"learning_rate": 4.221506189481991e-06,
"loss": 2.9338,
"step": 1767500
},
{
"epoch": 14.48,
"learning_rate": 4.219862236762071e-06,
"loss": 2.9362,
"step": 1768000
},
{
"epoch": 14.49,
"learning_rate": 4.218218284042151e-06,
"loss": 2.9338,
"step": 1768500
},
{
"epoch": 14.49,
"learning_rate": 4.216574331322231e-06,
"loss": 2.9352,
"step": 1769000
},
{
"epoch": 14.5,
"learning_rate": 4.2149303786023115e-06,
"loss": 2.9298,
"step": 1769500
},
{
"epoch": 14.5,
"learning_rate": 4.213286425882392e-06,
"loss": 2.9275,
"step": 1770000
},
{
"epoch": 14.5,
"eval_accuracy": 0.49835039139690634,
"eval_loss": 2.7977969646453857,
"eval_runtime": 403.9508,
"eval_samples_per_second": 763.35,
"eval_steps_per_second": 15.905,
"step": 1770000
},
{
"epoch": 14.51,
"learning_rate": 4.211642473162473e-06,
"loss": 2.9349,
"step": 1770500
},
{
"epoch": 14.51,
"learning_rate": 4.209998520442553e-06,
"loss": 2.9281,
"step": 1771000
},
{
"epoch": 14.51,
"learning_rate": 4.208354567722633e-06,
"loss": 2.9367,
"step": 1771500
},
{
"epoch": 14.52,
"learning_rate": 4.206710615002713e-06,
"loss": 2.9272,
"step": 1772000
},
{
"epoch": 14.52,
"learning_rate": 4.205066662282793e-06,
"loss": 2.9336,
"step": 1772500
},
{
"epoch": 14.53,
"learning_rate": 4.203422709562873e-06,
"loss": 2.9265,
"step": 1773000
},
{
"epoch": 14.53,
"learning_rate": 4.201778756842953e-06,
"loss": 2.9294,
"step": 1773500
},
{
"epoch": 14.53,
"learning_rate": 4.2001348041230335e-06,
"loss": 2.9236,
"step": 1774000
},
{
"epoch": 14.54,
"learning_rate": 4.1984908514031145e-06,
"loss": 2.9314,
"step": 1774500
},
{
"epoch": 14.54,
"learning_rate": 4.196846898683194e-06,
"loss": 2.9274,
"step": 1775000
},
{
"epoch": 14.55,
"learning_rate": 4.195202945963274e-06,
"loss": 2.9199,
"step": 1775500
},
{
"epoch": 14.55,
"learning_rate": 4.193558993243355e-06,
"loss": 2.9313,
"step": 1776000
},
{
"epoch": 14.55,
"learning_rate": 4.191915040523435e-06,
"loss": 2.9253,
"step": 1776500
},
{
"epoch": 14.56,
"learning_rate": 4.190271087803515e-06,
"loss": 2.9304,
"step": 1777000
},
{
"epoch": 14.56,
"learning_rate": 4.188627135083595e-06,
"loss": 2.9333,
"step": 1777500
},
{
"epoch": 14.57,
"learning_rate": 4.186983182363675e-06,
"loss": 2.9317,
"step": 1778000
},
{
"epoch": 14.57,
"learning_rate": 4.1853392296437555e-06,
"loss": 2.9276,
"step": 1778500
},
{
"epoch": 14.58,
"learning_rate": 4.183695276923836e-06,
"loss": 2.9319,
"step": 1779000
},
{
"epoch": 14.58,
"learning_rate": 4.182051324203917e-06,
"loss": 2.933,
"step": 1779500
},
{
"epoch": 14.58,
"learning_rate": 4.180407371483997e-06,
"loss": 2.9379,
"step": 1780000
},
{
"epoch": 14.59,
"learning_rate": 4.178763418764077e-06,
"loss": 2.9306,
"step": 1780500
},
{
"epoch": 14.59,
"learning_rate": 4.177119466044157e-06,
"loss": 2.9328,
"step": 1781000
},
{
"epoch": 14.6,
"learning_rate": 4.175475513324237e-06,
"loss": 2.9255,
"step": 1781500
},
{
"epoch": 14.6,
"learning_rate": 4.173831560604317e-06,
"loss": 2.9313,
"step": 1782000
},
{
"epoch": 14.6,
"learning_rate": 4.172187607884397e-06,
"loss": 2.9252,
"step": 1782500
},
{
"epoch": 14.61,
"learning_rate": 4.1705436551644775e-06,
"loss": 2.9281,
"step": 1783000
},
{
"epoch": 14.61,
"learning_rate": 4.1688997024445585e-06,
"loss": 2.9268,
"step": 1783500
},
{
"epoch": 14.62,
"learning_rate": 4.167255749724639e-06,
"loss": 2.9269,
"step": 1784000
},
{
"epoch": 14.62,
"learning_rate": 4.165611797004719e-06,
"loss": 2.9347,
"step": 1784500
},
{
"epoch": 14.62,
"learning_rate": 4.163967844284799e-06,
"loss": 2.9234,
"step": 1785000
},
{
"epoch": 14.63,
"learning_rate": 4.162323891564879e-06,
"loss": 2.9271,
"step": 1785500
},
{
"epoch": 14.63,
"learning_rate": 4.160679938844959e-06,
"loss": 2.9335,
"step": 1786000
},
{
"epoch": 14.64,
"learning_rate": 4.159035986125039e-06,
"loss": 2.9314,
"step": 1786500
},
{
"epoch": 14.64,
"learning_rate": 4.15739203340512e-06,
"loss": 2.9327,
"step": 1787000
},
{
"epoch": 14.64,
"learning_rate": 4.1557480806851995e-06,
"loss": 2.9259,
"step": 1787500
},
{
"epoch": 14.65,
"learning_rate": 4.15410412796528e-06,
"loss": 2.9288,
"step": 1788000
},
{
"epoch": 14.65,
"learning_rate": 4.1524601752453606e-06,
"loss": 2.9282,
"step": 1788500
},
{
"epoch": 14.66,
"learning_rate": 4.150816222525441e-06,
"loss": 2.9276,
"step": 1789000
},
{
"epoch": 14.66,
"learning_rate": 4.149172269805521e-06,
"loss": 2.9337,
"step": 1789500
},
{
"epoch": 14.67,
"learning_rate": 4.147528317085601e-06,
"loss": 2.9284,
"step": 1790000
},
{
"epoch": 14.67,
"learning_rate": 4.145884364365681e-06,
"loss": 2.9208,
"step": 1790500
},
{
"epoch": 14.67,
"learning_rate": 4.144240411645761e-06,
"loss": 2.9408,
"step": 1791000
},
{
"epoch": 14.68,
"learning_rate": 4.142596458925841e-06,
"loss": 2.9293,
"step": 1791500
},
{
"epoch": 14.68,
"learning_rate": 4.140952506205922e-06,
"loss": 2.93,
"step": 1792000
},
{
"epoch": 14.69,
"learning_rate": 4.1393085534860024e-06,
"loss": 2.9313,
"step": 1792500
},
{
"epoch": 14.69,
"learning_rate": 4.1376646007660826e-06,
"loss": 2.9321,
"step": 1793000
},
{
"epoch": 14.69,
"learning_rate": 4.136020648046163e-06,
"loss": 2.9249,
"step": 1793500
},
{
"epoch": 14.7,
"learning_rate": 4.134376695326243e-06,
"loss": 2.9286,
"step": 1794000
},
{
"epoch": 14.7,
"learning_rate": 4.132732742606323e-06,
"loss": 2.9354,
"step": 1794500
},
{
"epoch": 14.71,
"learning_rate": 4.131088789886403e-06,
"loss": 2.9287,
"step": 1795000
},
{
"epoch": 14.71,
"learning_rate": 4.129444837166483e-06,
"loss": 2.9271,
"step": 1795500
},
{
"epoch": 14.71,
"learning_rate": 4.127800884446564e-06,
"loss": 2.9335,
"step": 1796000
},
{
"epoch": 14.72,
"learning_rate": 4.1261569317266434e-06,
"loss": 2.9235,
"step": 1796500
},
{
"epoch": 14.72,
"learning_rate": 4.1245129790067236e-06,
"loss": 2.9245,
"step": 1797000
},
{
"epoch": 14.73,
"learning_rate": 4.1228690262868045e-06,
"loss": 2.925,
"step": 1797500
},
{
"epoch": 14.73,
"learning_rate": 4.121225073566885e-06,
"loss": 2.9244,
"step": 1798000
},
{
"epoch": 14.73,
"learning_rate": 4.119581120846965e-06,
"loss": 2.9306,
"step": 1798500
},
{
"epoch": 14.74,
"learning_rate": 4.117937168127045e-06,
"loss": 2.9243,
"step": 1799000
},
{
"epoch": 14.74,
"learning_rate": 4.116293215407125e-06,
"loss": 2.9272,
"step": 1799500
},
{
"epoch": 14.75,
"learning_rate": 4.114649262687205e-06,
"loss": 2.9319,
"step": 1800000
},
{
"epoch": 14.75,
"eval_accuracy": 0.4989067207393835,
"eval_loss": 2.794658899307251,
"eval_runtime": 404.1884,
"eval_samples_per_second": 762.902,
"eval_steps_per_second": 15.896,
"step": 1800000
},
{
"epoch": 14.75,
"learning_rate": 4.113005309967285e-06,
"loss": 2.9225,
"step": 1800500
},
{
"epoch": 14.76,
"learning_rate": 4.111361357247366e-06,
"loss": 2.9304,
"step": 1801000
},
{
"epoch": 14.76,
"learning_rate": 4.109717404527446e-06,
"loss": 2.9258,
"step": 1801500
},
{
"epoch": 14.76,
"learning_rate": 4.1080734518075265e-06,
"loss": 2.9214,
"step": 1802000
},
{
"epoch": 14.77,
"learning_rate": 4.106429499087607e-06,
"loss": 2.9272,
"step": 1802500
},
{
"epoch": 14.77,
"learning_rate": 4.104785546367687e-06,
"loss": 2.9293,
"step": 1803000
},
{
"epoch": 14.78,
"learning_rate": 4.103141593647767e-06,
"loss": 2.9307,
"step": 1803500
},
{
"epoch": 14.78,
"learning_rate": 4.101497640927847e-06,
"loss": 2.9316,
"step": 1804000
},
{
"epoch": 14.78,
"learning_rate": 4.099853688207928e-06,
"loss": 2.9265,
"step": 1804500
},
{
"epoch": 14.79,
"learning_rate": 4.098209735488008e-06,
"loss": 2.9225,
"step": 1805000
},
{
"epoch": 14.79,
"learning_rate": 4.096565782768087e-06,
"loss": 2.9219,
"step": 1805500
},
{
"epoch": 14.8,
"learning_rate": 4.094921830048168e-06,
"loss": 2.9265,
"step": 1806000
},
{
"epoch": 14.8,
"learning_rate": 4.0932778773282485e-06,
"loss": 2.9258,
"step": 1806500
},
{
"epoch": 14.8,
"learning_rate": 4.091633924608329e-06,
"loss": 2.9324,
"step": 1807000
},
{
"epoch": 14.81,
"learning_rate": 4.089989971888409e-06,
"loss": 2.9335,
"step": 1807500
},
{
"epoch": 14.81,
"learning_rate": 4.088346019168489e-06,
"loss": 2.9219,
"step": 1808000
},
{
"epoch": 14.82,
"learning_rate": 4.086702066448569e-06,
"loss": 2.9355,
"step": 1808500
},
{
"epoch": 14.82,
"learning_rate": 4.085058113728649e-06,
"loss": 2.9275,
"step": 1809000
},
{
"epoch": 14.82,
"learning_rate": 4.083414161008729e-06,
"loss": 2.9191,
"step": 1809500
},
{
"epoch": 14.83,
"learning_rate": 4.08177020828881e-06,
"loss": 2.9248,
"step": 1810000
},
{
"epoch": 14.83,
"learning_rate": 4.08012625556889e-06,
"loss": 2.9263,
"step": 1810500
},
{
"epoch": 14.84,
"learning_rate": 4.0784823028489705e-06,
"loss": 2.9324,
"step": 1811000
},
{
"epoch": 14.84,
"learning_rate": 4.076838350129051e-06,
"loss": 2.9256,
"step": 1811500
},
{
"epoch": 14.85,
"learning_rate": 4.075194397409131e-06,
"loss": 2.9276,
"step": 1812000
},
{
"epoch": 14.85,
"learning_rate": 4.073550444689211e-06,
"loss": 2.9319,
"step": 1812500
},
{
"epoch": 14.85,
"learning_rate": 4.071906491969291e-06,
"loss": 2.9265,
"step": 1813000
},
{
"epoch": 14.86,
"learning_rate": 4.070262539249372e-06,
"loss": 2.9284,
"step": 1813500
},
{
"epoch": 14.86,
"learning_rate": 4.068618586529452e-06,
"loss": 2.9313,
"step": 1814000
},
{
"epoch": 14.87,
"learning_rate": 4.066974633809531e-06,
"loss": 2.9201,
"step": 1814500
},
{
"epoch": 14.87,
"learning_rate": 4.065330681089612e-06,
"loss": 2.9272,
"step": 1815000
},
{
"epoch": 14.87,
"learning_rate": 4.0636867283696925e-06,
"loss": 2.929,
"step": 1815500
},
{
"epoch": 14.88,
"learning_rate": 4.062042775649773e-06,
"loss": 2.9221,
"step": 1816000
},
{
"epoch": 14.88,
"learning_rate": 4.060398822929853e-06,
"loss": 2.9246,
"step": 1816500
},
{
"epoch": 14.89,
"learning_rate": 4.058754870209934e-06,
"loss": 2.9316,
"step": 1817000
},
{
"epoch": 14.89,
"learning_rate": 4.057110917490013e-06,
"loss": 2.9285,
"step": 1817500
},
{
"epoch": 14.89,
"learning_rate": 4.055466964770093e-06,
"loss": 2.9272,
"step": 1818000
},
{
"epoch": 14.9,
"learning_rate": 4.053823012050174e-06,
"loss": 2.928,
"step": 1818500
},
{
"epoch": 14.9,
"learning_rate": 4.052179059330254e-06,
"loss": 2.9296,
"step": 1819000
},
{
"epoch": 14.91,
"learning_rate": 4.050535106610334e-06,
"loss": 2.9252,
"step": 1819500
},
{
"epoch": 14.91,
"learning_rate": 4.0488911538904145e-06,
"loss": 2.9287,
"step": 1820000
},
{
"epoch": 14.92,
"learning_rate": 4.047247201170495e-06,
"loss": 2.9318,
"step": 1820500
},
{
"epoch": 14.92,
"learning_rate": 4.045603248450575e-06,
"loss": 2.927,
"step": 1821000
},
{
"epoch": 14.92,
"learning_rate": 4.043959295730655e-06,
"loss": 2.9241,
"step": 1821500
},
{
"epoch": 14.93,
"learning_rate": 4.042315343010735e-06,
"loss": 2.9234,
"step": 1822000
},
{
"epoch": 14.93,
"learning_rate": 4.040671390290816e-06,
"loss": 2.9218,
"step": 1822500
},
{
"epoch": 14.94,
"learning_rate": 4.039027437570896e-06,
"loss": 2.9306,
"step": 1823000
},
{
"epoch": 14.94,
"learning_rate": 4.037383484850975e-06,
"loss": 2.9271,
"step": 1823500
},
{
"epoch": 14.94,
"learning_rate": 4.035739532131056e-06,
"loss": 2.9231,
"step": 1824000
},
{
"epoch": 14.95,
"learning_rate": 4.0340955794111365e-06,
"loss": 2.9276,
"step": 1824500
},
{
"epoch": 14.95,
"learning_rate": 4.032451626691217e-06,
"loss": 2.9269,
"step": 1825000
},
{
"epoch": 14.96,
"learning_rate": 4.030807673971297e-06,
"loss": 2.9248,
"step": 1825500
},
{
"epoch": 14.96,
"learning_rate": 4.029163721251378e-06,
"loss": 2.927,
"step": 1826000
},
{
"epoch": 14.96,
"learning_rate": 4.027519768531457e-06,
"loss": 2.932,
"step": 1826500
},
{
"epoch": 14.97,
"learning_rate": 4.025875815811537e-06,
"loss": 2.9253,
"step": 1827000
},
{
"epoch": 14.97,
"learning_rate": 4.024231863091618e-06,
"loss": 2.9283,
"step": 1827500
},
{
"epoch": 14.98,
"learning_rate": 4.022587910371698e-06,
"loss": 2.9251,
"step": 1828000
},
{
"epoch": 14.98,
"learning_rate": 4.020943957651778e-06,
"loss": 2.9207,
"step": 1828500
},
{
"epoch": 14.98,
"learning_rate": 4.0193000049318584e-06,
"loss": 2.931,
"step": 1829000
},
{
"epoch": 14.99,
"learning_rate": 4.0176560522119386e-06,
"loss": 2.922,
"step": 1829500
},
{
"epoch": 14.99,
"learning_rate": 4.016012099492019e-06,
"loss": 2.9304,
"step": 1830000
},
{
"epoch": 14.99,
"eval_accuracy": 0.49915539316259305,
"eval_loss": 2.7920210361480713,
"eval_runtime": 408.9888,
"eval_samples_per_second": 753.947,
"eval_steps_per_second": 15.709,
"step": 1830000
},
{
"epoch": 15.0,
"learning_rate": 4.014368146772099e-06,
"loss": 2.924,
"step": 1830500
},
{
"epoch": 15.0,
"learning_rate": 4.01272419405218e-06,
"loss": 2.9216,
"step": 1831000
},
{
"epoch": 15.01,
"learning_rate": 4.01108024133226e-06,
"loss": 2.9301,
"step": 1831500
},
{
"epoch": 15.01,
"learning_rate": 4.00943628861234e-06,
"loss": 2.9275,
"step": 1832000
},
{
"epoch": 15.01,
"learning_rate": 4.00779233589242e-06,
"loss": 2.9231,
"step": 1832500
},
{
"epoch": 15.02,
"learning_rate": 4.0061483831725e-06,
"loss": 2.9206,
"step": 1833000
},
{
"epoch": 15.02,
"learning_rate": 4.0045044304525804e-06,
"loss": 2.9231,
"step": 1833500
},
{
"epoch": 15.03,
"learning_rate": 4.0028604777326606e-06,
"loss": 2.9246,
"step": 1834000
},
{
"epoch": 15.03,
"learning_rate": 4.001216525012741e-06,
"loss": 2.9253,
"step": 1834500
},
{
"epoch": 15.03,
"learning_rate": 3.999572572292822e-06,
"loss": 2.9233,
"step": 1835000
},
{
"epoch": 15.04,
"learning_rate": 3.997928619572902e-06,
"loss": 2.9254,
"step": 1835500
},
{
"epoch": 15.04,
"learning_rate": 3.996284666852981e-06,
"loss": 2.9231,
"step": 1836000
},
{
"epoch": 15.05,
"learning_rate": 3.994640714133062e-06,
"loss": 2.9289,
"step": 1836500
},
{
"epoch": 15.05,
"learning_rate": 3.992996761413142e-06,
"loss": 2.9245,
"step": 1837000
},
{
"epoch": 15.05,
"learning_rate": 3.991352808693222e-06,
"loss": 2.9221,
"step": 1837500
},
{
"epoch": 15.06,
"learning_rate": 3.989708855973302e-06,
"loss": 2.9205,
"step": 1838000
},
{
"epoch": 15.06,
"learning_rate": 3.988064903253383e-06,
"loss": 2.9306,
"step": 1838500
},
{
"epoch": 15.07,
"learning_rate": 3.986420950533463e-06,
"loss": 2.9248,
"step": 1839000
},
{
"epoch": 15.07,
"learning_rate": 3.984776997813543e-06,
"loss": 2.9229,
"step": 1839500
},
{
"epoch": 15.07,
"learning_rate": 3.983133045093624e-06,
"loss": 2.9208,
"step": 1840000
},
{
"epoch": 15.08,
"learning_rate": 3.981489092373704e-06,
"loss": 2.9246,
"step": 1840500
},
{
"epoch": 15.08,
"learning_rate": 3.979845139653784e-06,
"loss": 2.9267,
"step": 1841000
},
{
"epoch": 15.09,
"learning_rate": 3.978201186933864e-06,
"loss": 2.9254,
"step": 1841500
},
{
"epoch": 15.09,
"step": 1841687,
"total_flos": 9.934758834172068e+17,
"train_loss": 3.300056499910879,
"train_runtime": 215976.5267,
"train_samples_per_second": 678.171,
"train_steps_per_second": 14.129
}
],
"max_steps": 3051450,
"num_train_epochs": 25,
"total_flos": 9.934758834172068e+17,
"trial_name": null,
"trial_params": null
}