vit-base-game-icons / trainer_state.json
chromefan's picture
games-ad
75d56c1
{
"best_metric": 2.623465061187744,
"best_model_checkpoint": "./game-ad-0306_outputs/checkpoint-2266",
"epoch": 1000.0,
"global_step": 103000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.97,
"learning_rate": 1.9980582524271846e-05,
"loss": 3.2891,
"step": 100
},
{
"epoch": 1.0,
"eval_accuracy": 0.21649484536082475,
"eval_loss": 3.026599884033203,
"eval_runtime": 4.3891,
"eval_samples_per_second": 66.301,
"eval_steps_per_second": 4.329,
"step": 103
},
{
"epoch": 1.94,
"learning_rate": 1.996116504854369e-05,
"loss": 2.9971,
"step": 200
},
{
"epoch": 2.0,
"eval_accuracy": 0.23024054982817868,
"eval_loss": 2.9193508625030518,
"eval_runtime": 4.4108,
"eval_samples_per_second": 65.974,
"eval_steps_per_second": 4.308,
"step": 206
},
{
"epoch": 2.91,
"learning_rate": 1.9941747572815535e-05,
"loss": 2.9151,
"step": 300
},
{
"epoch": 3.0,
"eval_accuracy": 0.24742268041237114,
"eval_loss": 2.873065948486328,
"eval_runtime": 4.3961,
"eval_samples_per_second": 66.196,
"eval_steps_per_second": 4.322,
"step": 309
},
{
"epoch": 3.88,
"learning_rate": 1.992233009708738e-05,
"loss": 2.8579,
"step": 400
},
{
"epoch": 4.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.8072028160095215,
"eval_runtime": 4.4479,
"eval_samples_per_second": 65.424,
"eval_steps_per_second": 4.272,
"step": 412
},
{
"epoch": 4.85,
"learning_rate": 1.9902912621359225e-05,
"loss": 2.7768,
"step": 500
},
{
"epoch": 5.0,
"eval_accuracy": 0.25773195876288657,
"eval_loss": 2.7917871475219727,
"eval_runtime": 4.4494,
"eval_samples_per_second": 65.402,
"eval_steps_per_second": 4.27,
"step": 515
},
{
"epoch": 5.83,
"learning_rate": 1.988349514563107e-05,
"loss": 2.7184,
"step": 600
},
{
"epoch": 6.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 2.7295849323272705,
"eval_runtime": 4.356,
"eval_samples_per_second": 66.805,
"eval_steps_per_second": 4.362,
"step": 618
},
{
"epoch": 6.8,
"learning_rate": 1.9864077669902914e-05,
"loss": 2.648,
"step": 700
},
{
"epoch": 7.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.7044482231140137,
"eval_runtime": 4.3613,
"eval_samples_per_second": 66.723,
"eval_steps_per_second": 4.356,
"step": 721
},
{
"epoch": 7.77,
"learning_rate": 1.9844660194174758e-05,
"loss": 2.5884,
"step": 800
},
{
"epoch": 8.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 2.7190284729003906,
"eval_runtime": 4.4514,
"eval_samples_per_second": 65.372,
"eval_steps_per_second": 4.268,
"step": 824
},
{
"epoch": 8.74,
"learning_rate": 1.9825242718446603e-05,
"loss": 2.5146,
"step": 900
},
{
"epoch": 9.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 2.694195508956909,
"eval_runtime": 4.3642,
"eval_samples_per_second": 66.679,
"eval_steps_per_second": 4.354,
"step": 927
},
{
"epoch": 9.71,
"learning_rate": 1.9805825242718447e-05,
"loss": 2.4384,
"step": 1000
},
{
"epoch": 10.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.687737464904785,
"eval_runtime": 4.3751,
"eval_samples_per_second": 66.513,
"eval_steps_per_second": 4.343,
"step": 1030
},
{
"epoch": 10.68,
"learning_rate": 1.9786407766990292e-05,
"loss": 2.442,
"step": 1100
},
{
"epoch": 11.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 2.6412112712860107,
"eval_runtime": 4.4431,
"eval_samples_per_second": 65.495,
"eval_steps_per_second": 4.276,
"step": 1133
},
{
"epoch": 11.65,
"learning_rate": 1.9766990291262137e-05,
"loss": 2.3099,
"step": 1200
},
{
"epoch": 12.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 2.6331019401550293,
"eval_runtime": 4.3972,
"eval_samples_per_second": 66.178,
"eval_steps_per_second": 4.321,
"step": 1236
},
{
"epoch": 12.62,
"learning_rate": 1.974757281553398e-05,
"loss": 2.2685,
"step": 1300
},
{
"epoch": 13.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 2.64509916305542,
"eval_runtime": 4.38,
"eval_samples_per_second": 66.438,
"eval_steps_per_second": 4.338,
"step": 1339
},
{
"epoch": 13.59,
"learning_rate": 1.972815533980583e-05,
"loss": 2.182,
"step": 1400
},
{
"epoch": 14.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.692749500274658,
"eval_runtime": 4.4055,
"eval_samples_per_second": 66.053,
"eval_steps_per_second": 4.313,
"step": 1442
},
{
"epoch": 14.56,
"learning_rate": 1.970873786407767e-05,
"loss": 2.1421,
"step": 1500
},
{
"epoch": 15.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 2.661494016647339,
"eval_runtime": 4.3695,
"eval_samples_per_second": 66.599,
"eval_steps_per_second": 4.348,
"step": 1545
},
{
"epoch": 15.53,
"learning_rate": 1.9689320388349515e-05,
"loss": 2.0483,
"step": 1600
},
{
"epoch": 16.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 2.6499741077423096,
"eval_runtime": 4.3729,
"eval_samples_per_second": 66.546,
"eval_steps_per_second": 4.345,
"step": 1648
},
{
"epoch": 16.5,
"learning_rate": 1.9669902912621363e-05,
"loss": 1.9884,
"step": 1700
},
{
"epoch": 17.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 2.6526920795440674,
"eval_runtime": 4.4576,
"eval_samples_per_second": 65.282,
"eval_steps_per_second": 4.262,
"step": 1751
},
{
"epoch": 17.48,
"learning_rate": 1.9650485436893204e-05,
"loss": 1.9316,
"step": 1800
},
{
"epoch": 18.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 2.673600435256958,
"eval_runtime": 4.3873,
"eval_samples_per_second": 66.328,
"eval_steps_per_second": 4.331,
"step": 1854
},
{
"epoch": 18.45,
"learning_rate": 1.9631067961165052e-05,
"loss": 1.8785,
"step": 1900
},
{
"epoch": 19.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.639138698577881,
"eval_runtime": 4.4001,
"eval_samples_per_second": 66.135,
"eval_steps_per_second": 4.318,
"step": 1957
},
{
"epoch": 19.42,
"learning_rate": 1.9611650485436893e-05,
"loss": 1.788,
"step": 2000
},
{
"epoch": 20.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 2.7002217769622803,
"eval_runtime": 4.3688,
"eval_samples_per_second": 66.609,
"eval_steps_per_second": 4.349,
"step": 2060
},
{
"epoch": 20.39,
"learning_rate": 1.9592233009708738e-05,
"loss": 1.7115,
"step": 2100
},
{
"epoch": 21.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.832120180130005,
"eval_runtime": 4.3608,
"eval_samples_per_second": 66.731,
"eval_steps_per_second": 4.357,
"step": 2163
},
{
"epoch": 21.36,
"learning_rate": 1.9572815533980586e-05,
"loss": 1.6929,
"step": 2200
},
{
"epoch": 22.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 2.623465061187744,
"eval_runtime": 4.3818,
"eval_samples_per_second": 66.411,
"eval_steps_per_second": 4.336,
"step": 2266
},
{
"epoch": 22.33,
"learning_rate": 1.9553398058252427e-05,
"loss": 1.6239,
"step": 2300
},
{
"epoch": 23.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 2.6378397941589355,
"eval_runtime": 4.3841,
"eval_samples_per_second": 66.377,
"eval_steps_per_second": 4.334,
"step": 2369
},
{
"epoch": 23.3,
"learning_rate": 1.9533980582524275e-05,
"loss": 1.5387,
"step": 2400
},
{
"epoch": 24.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 2.688793659210205,
"eval_runtime": 4.3688,
"eval_samples_per_second": 66.609,
"eval_steps_per_second": 4.349,
"step": 2472
},
{
"epoch": 24.27,
"learning_rate": 1.951456310679612e-05,
"loss": 1.5095,
"step": 2500
},
{
"epoch": 25.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 2.688781261444092,
"eval_runtime": 4.374,
"eval_samples_per_second": 66.53,
"eval_steps_per_second": 4.344,
"step": 2575
},
{
"epoch": 25.24,
"learning_rate": 1.949514563106796e-05,
"loss": 1.4153,
"step": 2600
},
{
"epoch": 26.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.677133560180664,
"eval_runtime": 4.3662,
"eval_samples_per_second": 66.648,
"eval_steps_per_second": 4.352,
"step": 2678
},
{
"epoch": 26.21,
"learning_rate": 1.947572815533981e-05,
"loss": 1.4254,
"step": 2700
},
{
"epoch": 27.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 2.7354466915130615,
"eval_runtime": 4.4258,
"eval_samples_per_second": 65.751,
"eval_steps_per_second": 4.293,
"step": 2781
},
{
"epoch": 27.18,
"learning_rate": 1.9456310679611653e-05,
"loss": 1.3351,
"step": 2800
},
{
"epoch": 28.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 2.7175216674804688,
"eval_runtime": 4.4547,
"eval_samples_per_second": 65.325,
"eval_steps_per_second": 4.265,
"step": 2884
},
{
"epoch": 28.16,
"learning_rate": 1.9436893203883495e-05,
"loss": 1.2955,
"step": 2900
},
{
"epoch": 29.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 2.767915725708008,
"eval_runtime": 4.4025,
"eval_samples_per_second": 66.099,
"eval_steps_per_second": 4.316,
"step": 2987
},
{
"epoch": 29.13,
"learning_rate": 1.9417475728155343e-05,
"loss": 1.2232,
"step": 3000
},
{
"epoch": 30.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.7784156799316406,
"eval_runtime": 4.4307,
"eval_samples_per_second": 65.678,
"eval_steps_per_second": 4.288,
"step": 3090
},
{
"epoch": 30.1,
"learning_rate": 1.9398058252427187e-05,
"loss": 1.2115,
"step": 3100
},
{
"epoch": 31.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 2.8495712280273438,
"eval_runtime": 4.3998,
"eval_samples_per_second": 66.139,
"eval_steps_per_second": 4.318,
"step": 3193
},
{
"epoch": 31.07,
"learning_rate": 1.937864077669903e-05,
"loss": 1.1656,
"step": 3200
},
{
"epoch": 32.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 2.7899186611175537,
"eval_runtime": 4.3536,
"eval_samples_per_second": 66.842,
"eval_steps_per_second": 4.364,
"step": 3296
},
{
"epoch": 32.04,
"learning_rate": 1.9359223300970876e-05,
"loss": 1.1419,
"step": 3300
},
{
"epoch": 33.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.7646260261535645,
"eval_runtime": 4.387,
"eval_samples_per_second": 66.332,
"eval_steps_per_second": 4.331,
"step": 3399
},
{
"epoch": 33.01,
"learning_rate": 1.9339805825242717e-05,
"loss": 1.0743,
"step": 3400
},
{
"epoch": 33.98,
"learning_rate": 1.9320388349514565e-05,
"loss": 1.0481,
"step": 3500
},
{
"epoch": 34.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.8416407108306885,
"eval_runtime": 4.4404,
"eval_samples_per_second": 65.535,
"eval_steps_per_second": 4.279,
"step": 3502
},
{
"epoch": 34.95,
"learning_rate": 1.930097087378641e-05,
"loss": 0.9763,
"step": 3600
},
{
"epoch": 35.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 2.8369979858398438,
"eval_runtime": 4.3776,
"eval_samples_per_second": 66.474,
"eval_steps_per_second": 4.34,
"step": 3605
},
{
"epoch": 35.92,
"learning_rate": 1.9281553398058255e-05,
"loss": 0.9452,
"step": 3700
},
{
"epoch": 36.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 2.7903831005096436,
"eval_runtime": 4.4202,
"eval_samples_per_second": 65.833,
"eval_steps_per_second": 4.298,
"step": 3708
},
{
"epoch": 36.89,
"learning_rate": 1.92621359223301e-05,
"loss": 0.9178,
"step": 3800
},
{
"epoch": 37.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 2.830864191055298,
"eval_runtime": 4.3724,
"eval_samples_per_second": 66.554,
"eval_steps_per_second": 4.345,
"step": 3811
},
{
"epoch": 37.86,
"learning_rate": 1.9242718446601944e-05,
"loss": 0.9115,
"step": 3900
},
{
"epoch": 38.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 2.858407735824585,
"eval_runtime": 4.4507,
"eval_samples_per_second": 65.382,
"eval_steps_per_second": 4.269,
"step": 3914
},
{
"epoch": 38.83,
"learning_rate": 1.922330097087379e-05,
"loss": 0.8472,
"step": 4000
},
{
"epoch": 39.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 2.906602382659912,
"eval_runtime": 4.3684,
"eval_samples_per_second": 66.615,
"eval_steps_per_second": 4.349,
"step": 4017
},
{
"epoch": 39.81,
"learning_rate": 1.9203883495145633e-05,
"loss": 0.8323,
"step": 4100
},
{
"epoch": 40.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.862963914871216,
"eval_runtime": 4.3894,
"eval_samples_per_second": 66.295,
"eval_steps_per_second": 4.329,
"step": 4120
},
{
"epoch": 40.78,
"learning_rate": 1.9184466019417478e-05,
"loss": 0.7622,
"step": 4200
},
{
"epoch": 41.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 3.0019726753234863,
"eval_runtime": 4.3977,
"eval_samples_per_second": 66.17,
"eval_steps_per_second": 4.32,
"step": 4223
},
{
"epoch": 41.75,
"learning_rate": 1.9165048543689322e-05,
"loss": 0.7531,
"step": 4300
},
{
"epoch": 42.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.88852596282959,
"eval_runtime": 4.3979,
"eval_samples_per_second": 66.168,
"eval_steps_per_second": 4.32,
"step": 4326
},
{
"epoch": 42.72,
"learning_rate": 1.9145631067961167e-05,
"loss": 0.7054,
"step": 4400
},
{
"epoch": 43.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 2.882045269012451,
"eval_runtime": 4.4212,
"eval_samples_per_second": 65.82,
"eval_steps_per_second": 4.298,
"step": 4429
},
{
"epoch": 43.69,
"learning_rate": 1.912621359223301e-05,
"loss": 0.685,
"step": 4500
},
{
"epoch": 44.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 2.8763577938079834,
"eval_runtime": 4.4513,
"eval_samples_per_second": 65.374,
"eval_steps_per_second": 4.268,
"step": 4532
},
{
"epoch": 44.66,
"learning_rate": 1.9106796116504856e-05,
"loss": 0.7206,
"step": 4600
},
{
"epoch": 45.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 2.8658735752105713,
"eval_runtime": 4.3654,
"eval_samples_per_second": 66.66,
"eval_steps_per_second": 4.352,
"step": 4635
},
{
"epoch": 45.63,
"learning_rate": 1.90873786407767e-05,
"loss": 0.6304,
"step": 4700
},
{
"epoch": 46.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 2.953686237335205,
"eval_runtime": 4.5626,
"eval_samples_per_second": 63.78,
"eval_steps_per_second": 4.164,
"step": 4738
},
{
"epoch": 46.6,
"learning_rate": 1.9067961165048545e-05,
"loss": 0.6369,
"step": 4800
},
{
"epoch": 47.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 2.9659738540649414,
"eval_runtime": 4.4308,
"eval_samples_per_second": 65.677,
"eval_steps_per_second": 4.288,
"step": 4841
},
{
"epoch": 47.57,
"learning_rate": 1.904854368932039e-05,
"loss": 0.6161,
"step": 4900
},
{
"epoch": 48.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 3.1111767292022705,
"eval_runtime": 4.3981,
"eval_samples_per_second": 66.165,
"eval_steps_per_second": 4.32,
"step": 4944
},
{
"epoch": 48.54,
"learning_rate": 1.9029126213592234e-05,
"loss": 0.618,
"step": 5000
},
{
"epoch": 49.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 2.9729325771331787,
"eval_runtime": 4.4073,
"eval_samples_per_second": 66.027,
"eval_steps_per_second": 4.311,
"step": 5047
},
{
"epoch": 49.51,
"learning_rate": 1.900970873786408e-05,
"loss": 0.556,
"step": 5100
},
{
"epoch": 50.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 2.986999273300171,
"eval_runtime": 4.3872,
"eval_samples_per_second": 66.33,
"eval_steps_per_second": 4.331,
"step": 5150
},
{
"epoch": 50.49,
"learning_rate": 1.8990291262135923e-05,
"loss": 0.5314,
"step": 5200
},
{
"epoch": 51.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 2.993405342102051,
"eval_runtime": 4.3709,
"eval_samples_per_second": 66.577,
"eval_steps_per_second": 4.347,
"step": 5253
},
{
"epoch": 51.46,
"learning_rate": 1.8970873786407768e-05,
"loss": 0.5502,
"step": 5300
},
{
"epoch": 52.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 2.937934160232544,
"eval_runtime": 4.3519,
"eval_samples_per_second": 66.868,
"eval_steps_per_second": 4.366,
"step": 5356
},
{
"epoch": 52.43,
"learning_rate": 1.8951456310679613e-05,
"loss": 0.4958,
"step": 5400
},
{
"epoch": 53.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 3.0344159603118896,
"eval_runtime": 4.3703,
"eval_samples_per_second": 66.585,
"eval_steps_per_second": 4.347,
"step": 5459
},
{
"epoch": 53.4,
"learning_rate": 1.8932038834951457e-05,
"loss": 0.4896,
"step": 5500
},
{
"epoch": 54.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 2.9924163818359375,
"eval_runtime": 4.5316,
"eval_samples_per_second": 64.215,
"eval_steps_per_second": 4.193,
"step": 5562
},
{
"epoch": 54.37,
"learning_rate": 1.89126213592233e-05,
"loss": 0.4803,
"step": 5600
},
{
"epoch": 55.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 3.0161025524139404,
"eval_runtime": 4.5126,
"eval_samples_per_second": 64.486,
"eval_steps_per_second": 4.21,
"step": 5665
},
{
"epoch": 55.34,
"learning_rate": 1.889320388349515e-05,
"loss": 0.4554,
"step": 5700
},
{
"epoch": 56.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.0220870971679688,
"eval_runtime": 4.3658,
"eval_samples_per_second": 66.654,
"eval_steps_per_second": 4.352,
"step": 5768
},
{
"epoch": 56.31,
"learning_rate": 1.887378640776699e-05,
"loss": 0.4591,
"step": 5800
},
{
"epoch": 57.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 3.0460500717163086,
"eval_runtime": 4.3815,
"eval_samples_per_second": 66.415,
"eval_steps_per_second": 4.336,
"step": 5871
},
{
"epoch": 57.28,
"learning_rate": 1.8854368932038835e-05,
"loss": 0.4349,
"step": 5900
},
{
"epoch": 58.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 3.137669801712036,
"eval_runtime": 4.3638,
"eval_samples_per_second": 66.685,
"eval_steps_per_second": 4.354,
"step": 5974
},
{
"epoch": 58.25,
"learning_rate": 1.883495145631068e-05,
"loss": 0.4127,
"step": 6000
},
{
"epoch": 59.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.0168519020080566,
"eval_runtime": 4.3582,
"eval_samples_per_second": 66.771,
"eval_steps_per_second": 4.36,
"step": 6077
},
{
"epoch": 59.22,
"learning_rate": 1.8815533980582525e-05,
"loss": 0.3973,
"step": 6100
},
{
"epoch": 60.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.0337910652160645,
"eval_runtime": 4.4091,
"eval_samples_per_second": 66.001,
"eval_steps_per_second": 4.309,
"step": 6180
},
{
"epoch": 60.19,
"learning_rate": 1.8796116504854373e-05,
"loss": 0.4109,
"step": 6200
},
{
"epoch": 61.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.063812255859375,
"eval_runtime": 4.5298,
"eval_samples_per_second": 64.241,
"eval_steps_per_second": 4.194,
"step": 6283
},
{
"epoch": 61.17,
"learning_rate": 1.8776699029126214e-05,
"loss": 0.3872,
"step": 6300
},
{
"epoch": 62.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.0810182094573975,
"eval_runtime": 4.5282,
"eval_samples_per_second": 64.264,
"eval_steps_per_second": 4.196,
"step": 6386
},
{
"epoch": 62.14,
"learning_rate": 1.875728155339806e-05,
"loss": 0.3693,
"step": 6400
},
{
"epoch": 63.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 3.2002737522125244,
"eval_runtime": 4.4521,
"eval_samples_per_second": 65.362,
"eval_steps_per_second": 4.268,
"step": 6489
},
{
"epoch": 63.11,
"learning_rate": 1.8737864077669906e-05,
"loss": 0.3457,
"step": 6500
},
{
"epoch": 64.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 3.0842859745025635,
"eval_runtime": 4.4307,
"eval_samples_per_second": 65.677,
"eval_steps_per_second": 4.288,
"step": 6592
},
{
"epoch": 64.08,
"learning_rate": 1.8718446601941747e-05,
"loss": 0.3521,
"step": 6600
},
{
"epoch": 65.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 3.1622819900512695,
"eval_runtime": 4.433,
"eval_samples_per_second": 65.644,
"eval_steps_per_second": 4.286,
"step": 6695
},
{
"epoch": 65.05,
"learning_rate": 1.8699029126213595e-05,
"loss": 0.3625,
"step": 6700
},
{
"epoch": 66.0,
"eval_accuracy": 0.32989690721649484,
"eval_loss": 3.003610372543335,
"eval_runtime": 4.5052,
"eval_samples_per_second": 64.592,
"eval_steps_per_second": 4.217,
"step": 6798
},
{
"epoch": 66.02,
"learning_rate": 1.867961165048544e-05,
"loss": 0.3746,
"step": 6800
},
{
"epoch": 66.99,
"learning_rate": 1.866019417475728e-05,
"loss": 0.3339,
"step": 6900
},
{
"epoch": 67.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.2389235496520996,
"eval_runtime": 4.4738,
"eval_samples_per_second": 65.046,
"eval_steps_per_second": 4.247,
"step": 6901
},
{
"epoch": 67.96,
"learning_rate": 1.864077669902913e-05,
"loss": 0.3378,
"step": 7000
},
{
"epoch": 68.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 3.249319076538086,
"eval_runtime": 4.4083,
"eval_samples_per_second": 66.011,
"eval_steps_per_second": 4.31,
"step": 7004
},
{
"epoch": 68.93,
"learning_rate": 1.8621359223300974e-05,
"loss": 0.2981,
"step": 7100
},
{
"epoch": 69.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.130829334259033,
"eval_runtime": 4.4429,
"eval_samples_per_second": 65.498,
"eval_steps_per_second": 4.277,
"step": 7107
},
{
"epoch": 69.9,
"learning_rate": 1.860194174757282e-05,
"loss": 0.3023,
"step": 7200
},
{
"epoch": 70.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.2455456256866455,
"eval_runtime": 4.4181,
"eval_samples_per_second": 65.866,
"eval_steps_per_second": 4.301,
"step": 7210
},
{
"epoch": 70.87,
"learning_rate": 1.8582524271844663e-05,
"loss": 0.3076,
"step": 7300
},
{
"epoch": 71.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 3.27248477935791,
"eval_runtime": 4.418,
"eval_samples_per_second": 65.867,
"eval_steps_per_second": 4.301,
"step": 7313
},
{
"epoch": 71.84,
"learning_rate": 1.8563106796116504e-05,
"loss": 0.3201,
"step": 7400
},
{
"epoch": 72.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 3.2563161849975586,
"eval_runtime": 4.3553,
"eval_samples_per_second": 66.814,
"eval_steps_per_second": 4.362,
"step": 7416
},
{
"epoch": 72.82,
"learning_rate": 1.8543689320388352e-05,
"loss": 0.3083,
"step": 7500
},
{
"epoch": 73.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.252042055130005,
"eval_runtime": 4.4514,
"eval_samples_per_second": 65.372,
"eval_steps_per_second": 4.268,
"step": 7519
},
{
"epoch": 73.79,
"learning_rate": 1.8524271844660197e-05,
"loss": 0.2906,
"step": 7600
},
{
"epoch": 74.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.3343799114227295,
"eval_runtime": 4.3733,
"eval_samples_per_second": 66.54,
"eval_steps_per_second": 4.345,
"step": 7622
},
{
"epoch": 74.76,
"learning_rate": 1.850485436893204e-05,
"loss": 0.2721,
"step": 7700
},
{
"epoch": 75.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 3.1951873302459717,
"eval_runtime": 4.376,
"eval_samples_per_second": 66.499,
"eval_steps_per_second": 4.342,
"step": 7725
},
{
"epoch": 75.73,
"learning_rate": 1.8485436893203886e-05,
"loss": 0.2873,
"step": 7800
},
{
"epoch": 76.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 3.2528605461120605,
"eval_runtime": 4.3937,
"eval_samples_per_second": 66.231,
"eval_steps_per_second": 4.324,
"step": 7828
},
{
"epoch": 76.7,
"learning_rate": 1.846601941747573e-05,
"loss": 0.278,
"step": 7900
},
{
"epoch": 77.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.3427820205688477,
"eval_runtime": 4.393,
"eval_samples_per_second": 66.241,
"eval_steps_per_second": 4.325,
"step": 7931
},
{
"epoch": 77.67,
"learning_rate": 1.8446601941747575e-05,
"loss": 0.2573,
"step": 8000
},
{
"epoch": 78.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 3.3216073513031006,
"eval_runtime": 4.3997,
"eval_samples_per_second": 66.141,
"eval_steps_per_second": 4.318,
"step": 8034
},
{
"epoch": 78.64,
"learning_rate": 1.842718446601942e-05,
"loss": 0.2578,
"step": 8100
},
{
"epoch": 79.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.4177794456481934,
"eval_runtime": 4.3657,
"eval_samples_per_second": 66.656,
"eval_steps_per_second": 4.352,
"step": 8137
},
{
"epoch": 79.61,
"learning_rate": 1.8407766990291264e-05,
"loss": 0.2774,
"step": 8200
},
{
"epoch": 80.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.344855785369873,
"eval_runtime": 4.4508,
"eval_samples_per_second": 65.382,
"eval_steps_per_second": 4.269,
"step": 8240
},
{
"epoch": 80.58,
"learning_rate": 1.838834951456311e-05,
"loss": 0.2762,
"step": 8300
},
{
"epoch": 81.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 3.3451921939849854,
"eval_runtime": 4.3629,
"eval_samples_per_second": 66.699,
"eval_steps_per_second": 4.355,
"step": 8343
},
{
"epoch": 81.55,
"learning_rate": 1.8368932038834953e-05,
"loss": 0.2504,
"step": 8400
},
{
"epoch": 82.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.579151153564453,
"eval_runtime": 4.3815,
"eval_samples_per_second": 66.416,
"eval_steps_per_second": 4.336,
"step": 8446
},
{
"epoch": 82.52,
"learning_rate": 1.8349514563106798e-05,
"loss": 0.2552,
"step": 8500
},
{
"epoch": 83.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.3477821350097656,
"eval_runtime": 4.3706,
"eval_samples_per_second": 66.582,
"eval_steps_per_second": 4.347,
"step": 8549
},
{
"epoch": 83.5,
"learning_rate": 1.8330097087378643e-05,
"loss": 0.2541,
"step": 8600
},
{
"epoch": 84.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 3.4901580810546875,
"eval_runtime": 4.4137,
"eval_samples_per_second": 65.932,
"eval_steps_per_second": 4.305,
"step": 8652
},
{
"epoch": 84.47,
"learning_rate": 1.8310679611650487e-05,
"loss": 0.2616,
"step": 8700
},
{
"epoch": 85.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 3.282921552658081,
"eval_runtime": 4.4452,
"eval_samples_per_second": 65.464,
"eval_steps_per_second": 4.274,
"step": 8755
},
{
"epoch": 85.44,
"learning_rate": 1.8291262135922332e-05,
"loss": 0.2079,
"step": 8800
},
{
"epoch": 86.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.528667688369751,
"eval_runtime": 4.496,
"eval_samples_per_second": 64.725,
"eval_steps_per_second": 4.226,
"step": 8858
},
{
"epoch": 86.41,
"learning_rate": 1.8271844660194176e-05,
"loss": 0.2538,
"step": 8900
},
{
"epoch": 87.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.4730610847473145,
"eval_runtime": 4.4242,
"eval_samples_per_second": 65.774,
"eval_steps_per_second": 4.295,
"step": 8961
},
{
"epoch": 87.38,
"learning_rate": 1.825242718446602e-05,
"loss": 0.2485,
"step": 9000
},
{
"epoch": 88.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 3.5997567176818848,
"eval_runtime": 4.3773,
"eval_samples_per_second": 66.479,
"eval_steps_per_second": 4.341,
"step": 9064
},
{
"epoch": 88.35,
"learning_rate": 1.8233009708737865e-05,
"loss": 0.2714,
"step": 9100
},
{
"epoch": 89.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.4566540718078613,
"eval_runtime": 4.4582,
"eval_samples_per_second": 65.274,
"eval_steps_per_second": 4.262,
"step": 9167
},
{
"epoch": 89.32,
"learning_rate": 1.821359223300971e-05,
"loss": 0.232,
"step": 9200
},
{
"epoch": 90.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.5061261653900146,
"eval_runtime": 4.3957,
"eval_samples_per_second": 66.2,
"eval_steps_per_second": 4.322,
"step": 9270
},
{
"epoch": 90.29,
"learning_rate": 1.8194174757281555e-05,
"loss": 0.2577,
"step": 9300
},
{
"epoch": 91.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.536961317062378,
"eval_runtime": 4.4048,
"eval_samples_per_second": 66.065,
"eval_steps_per_second": 4.313,
"step": 9373
},
{
"epoch": 91.26,
"learning_rate": 1.81747572815534e-05,
"loss": 0.2232,
"step": 9400
},
{
"epoch": 92.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 3.5062103271484375,
"eval_runtime": 4.3856,
"eval_samples_per_second": 66.353,
"eval_steps_per_second": 4.332,
"step": 9476
},
{
"epoch": 92.23,
"learning_rate": 1.8155339805825244e-05,
"loss": 0.2351,
"step": 9500
},
{
"epoch": 93.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 3.559199094772339,
"eval_runtime": 4.3998,
"eval_samples_per_second": 66.14,
"eval_steps_per_second": 4.318,
"step": 9579
},
{
"epoch": 93.2,
"learning_rate": 1.813592233009709e-05,
"loss": 0.2299,
"step": 9600
},
{
"epoch": 94.0,
"eval_accuracy": 0.3333333333333333,
"eval_loss": 3.516669988632202,
"eval_runtime": 4.4819,
"eval_samples_per_second": 64.928,
"eval_steps_per_second": 4.239,
"step": 9682
},
{
"epoch": 94.17,
"learning_rate": 1.8116504854368933e-05,
"loss": 0.2415,
"step": 9700
},
{
"epoch": 95.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 3.6282804012298584,
"eval_runtime": 4.3633,
"eval_samples_per_second": 66.692,
"eval_steps_per_second": 4.354,
"step": 9785
},
{
"epoch": 95.15,
"learning_rate": 1.8097087378640778e-05,
"loss": 0.2265,
"step": 9800
},
{
"epoch": 96.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 3.4819419384002686,
"eval_runtime": 4.3709,
"eval_samples_per_second": 66.577,
"eval_steps_per_second": 4.347,
"step": 9888
},
{
"epoch": 96.12,
"learning_rate": 1.8077669902912622e-05,
"loss": 0.2448,
"step": 9900
},
{
"epoch": 97.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 3.5793020725250244,
"eval_runtime": 4.3824,
"eval_samples_per_second": 66.402,
"eval_steps_per_second": 4.336,
"step": 9991
},
{
"epoch": 97.09,
"learning_rate": 1.8058252427184467e-05,
"loss": 0.2141,
"step": 10000
},
{
"epoch": 98.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 3.5728020668029785,
"eval_runtime": 4.3625,
"eval_samples_per_second": 66.704,
"eval_steps_per_second": 4.355,
"step": 10094
},
{
"epoch": 98.06,
"learning_rate": 1.803883495145631e-05,
"loss": 0.1979,
"step": 10100
},
{
"epoch": 99.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.4685394763946533,
"eval_runtime": 4.4238,
"eval_samples_per_second": 65.781,
"eval_steps_per_second": 4.295,
"step": 10197
},
{
"epoch": 99.03,
"learning_rate": 1.8019417475728156e-05,
"loss": 0.2188,
"step": 10200
},
{
"epoch": 100.0,
"learning_rate": 1.8e-05,
"loss": 0.2077,
"step": 10300
},
{
"epoch": 100.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 3.558551788330078,
"eval_runtime": 4.4141,
"eval_samples_per_second": 65.924,
"eval_steps_per_second": 4.304,
"step": 10300
},
{
"epoch": 100.97,
"learning_rate": 1.7980582524271845e-05,
"loss": 0.1854,
"step": 10400
},
{
"epoch": 101.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.5650315284729004,
"eval_runtime": 4.4256,
"eval_samples_per_second": 65.754,
"eval_steps_per_second": 4.293,
"step": 10403
},
{
"epoch": 101.94,
"learning_rate": 1.7961165048543693e-05,
"loss": 0.2017,
"step": 10500
},
{
"epoch": 102.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.4760477542877197,
"eval_runtime": 4.4327,
"eval_samples_per_second": 65.649,
"eval_steps_per_second": 4.286,
"step": 10506
},
{
"epoch": 102.91,
"learning_rate": 1.7941747572815534e-05,
"loss": 0.2119,
"step": 10600
},
{
"epoch": 103.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 3.5530800819396973,
"eval_runtime": 4.4147,
"eval_samples_per_second": 65.916,
"eval_steps_per_second": 4.304,
"step": 10609
},
{
"epoch": 103.88,
"learning_rate": 1.792233009708738e-05,
"loss": 0.2314,
"step": 10700
},
{
"epoch": 104.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 3.5117688179016113,
"eval_runtime": 4.4182,
"eval_samples_per_second": 65.863,
"eval_steps_per_second": 4.3,
"step": 10712
},
{
"epoch": 104.85,
"learning_rate": 1.7902912621359227e-05,
"loss": 0.212,
"step": 10800
},
{
"epoch": 105.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.54956316947937,
"eval_runtime": 4.4027,
"eval_samples_per_second": 66.095,
"eval_steps_per_second": 4.315,
"step": 10815
},
{
"epoch": 105.83,
"learning_rate": 1.7883495145631068e-05,
"loss": 0.197,
"step": 10900
},
{
"epoch": 106.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 3.607961654663086,
"eval_runtime": 4.4253,
"eval_samples_per_second": 65.758,
"eval_steps_per_second": 4.294,
"step": 10918
},
{
"epoch": 106.8,
"learning_rate": 1.7864077669902916e-05,
"loss": 0.2067,
"step": 11000
},
{
"epoch": 107.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 3.621704578399658,
"eval_runtime": 4.4372,
"eval_samples_per_second": 65.582,
"eval_steps_per_second": 4.282,
"step": 11021
},
{
"epoch": 107.77,
"learning_rate": 1.7844660194174757e-05,
"loss": 0.1896,
"step": 11100
},
{
"epoch": 108.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 3.6445584297180176,
"eval_runtime": 4.4137,
"eval_samples_per_second": 65.931,
"eval_steps_per_second": 4.305,
"step": 11124
},
{
"epoch": 108.74,
"learning_rate": 1.7825242718446602e-05,
"loss": 0.198,
"step": 11200
},
{
"epoch": 109.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 3.769904136657715,
"eval_runtime": 4.4154,
"eval_samples_per_second": 65.905,
"eval_steps_per_second": 4.303,
"step": 11227
},
{
"epoch": 109.71,
"learning_rate": 1.780582524271845e-05,
"loss": 0.2152,
"step": 11300
},
{
"epoch": 110.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.6709232330322266,
"eval_runtime": 4.4235,
"eval_samples_per_second": 65.784,
"eval_steps_per_second": 4.295,
"step": 11330
},
{
"epoch": 110.68,
"learning_rate": 1.778640776699029e-05,
"loss": 0.2121,
"step": 11400
},
{
"epoch": 111.0,
"eval_accuracy": 0.33676975945017185,
"eval_loss": 3.6265642642974854,
"eval_runtime": 4.4106,
"eval_samples_per_second": 65.977,
"eval_steps_per_second": 4.308,
"step": 11433
},
{
"epoch": 111.65,
"learning_rate": 1.776699029126214e-05,
"loss": 0.1869,
"step": 11500
},
{
"epoch": 112.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.668063163757324,
"eval_runtime": 4.4048,
"eval_samples_per_second": 66.064,
"eval_steps_per_second": 4.313,
"step": 11536
},
{
"epoch": 112.62,
"learning_rate": 1.7747572815533983e-05,
"loss": 0.1927,
"step": 11600
},
{
"epoch": 113.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.7304697036743164,
"eval_runtime": 4.4143,
"eval_samples_per_second": 65.922,
"eval_steps_per_second": 4.304,
"step": 11639
},
{
"epoch": 113.59,
"learning_rate": 1.7728155339805825e-05,
"loss": 0.2259,
"step": 11700
},
{
"epoch": 114.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 3.630201578140259,
"eval_runtime": 4.4007,
"eval_samples_per_second": 66.126,
"eval_steps_per_second": 4.317,
"step": 11742
},
{
"epoch": 114.56,
"learning_rate": 1.7708737864077673e-05,
"loss": 0.1809,
"step": 11800
},
{
"epoch": 115.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.6300668716430664,
"eval_runtime": 4.4113,
"eval_samples_per_second": 65.967,
"eval_steps_per_second": 4.307,
"step": 11845
},
{
"epoch": 115.53,
"learning_rate": 1.7689320388349517e-05,
"loss": 0.2071,
"step": 11900
},
{
"epoch": 116.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 3.7288320064544678,
"eval_runtime": 4.4967,
"eval_samples_per_second": 64.714,
"eval_steps_per_second": 4.225,
"step": 11948
},
{
"epoch": 116.5,
"learning_rate": 1.7669902912621362e-05,
"loss": 0.1977,
"step": 12000
},
{
"epoch": 117.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 3.646707057952881,
"eval_runtime": 4.4852,
"eval_samples_per_second": 64.88,
"eval_steps_per_second": 4.236,
"step": 12051
},
{
"epoch": 117.48,
"learning_rate": 1.7650485436893206e-05,
"loss": 0.1902,
"step": 12100
},
{
"epoch": 118.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.703948974609375,
"eval_runtime": 4.5028,
"eval_samples_per_second": 64.627,
"eval_steps_per_second": 4.22,
"step": 12154
},
{
"epoch": 118.45,
"learning_rate": 1.763106796116505e-05,
"loss": 0.1996,
"step": 12200
},
{
"epoch": 119.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.901280403137207,
"eval_runtime": 4.4298,
"eval_samples_per_second": 65.691,
"eval_steps_per_second": 4.289,
"step": 12257
},
{
"epoch": 119.42,
"learning_rate": 1.7611650485436896e-05,
"loss": 0.2122,
"step": 12300
},
{
"epoch": 120.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 3.822838306427002,
"eval_runtime": 4.4449,
"eval_samples_per_second": 65.468,
"eval_steps_per_second": 4.275,
"step": 12360
},
{
"epoch": 120.39,
"learning_rate": 1.759223300970874e-05,
"loss": 0.1702,
"step": 12400
},
{
"epoch": 121.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.7117698192596436,
"eval_runtime": 4.422,
"eval_samples_per_second": 65.807,
"eval_steps_per_second": 4.297,
"step": 12463
},
{
"epoch": 121.36,
"learning_rate": 1.7572815533980585e-05,
"loss": 0.1889,
"step": 12500
},
{
"epoch": 122.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.721066474914551,
"eval_runtime": 4.4242,
"eval_samples_per_second": 65.774,
"eval_steps_per_second": 4.295,
"step": 12566
},
{
"epoch": 122.33,
"learning_rate": 1.755339805825243e-05,
"loss": 0.1857,
"step": 12600
},
{
"epoch": 123.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 3.8894174098968506,
"eval_runtime": 4.4176,
"eval_samples_per_second": 65.873,
"eval_steps_per_second": 4.301,
"step": 12669
},
{
"epoch": 123.3,
"learning_rate": 1.7533980582524274e-05,
"loss": 0.2003,
"step": 12700
},
{
"epoch": 124.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.657545328140259,
"eval_runtime": 4.4115,
"eval_samples_per_second": 65.964,
"eval_steps_per_second": 4.307,
"step": 12772
},
{
"epoch": 124.27,
"learning_rate": 1.751456310679612e-05,
"loss": 0.202,
"step": 12800
},
{
"epoch": 125.0,
"eval_accuracy": 0.3333333333333333,
"eval_loss": 3.792531728744507,
"eval_runtime": 4.4022,
"eval_samples_per_second": 66.104,
"eval_steps_per_second": 4.316,
"step": 12875
},
{
"epoch": 125.24,
"learning_rate": 1.7495145631067963e-05,
"loss": 0.1722,
"step": 12900
},
{
"epoch": 126.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.8187637329101562,
"eval_runtime": 4.5091,
"eval_samples_per_second": 64.535,
"eval_steps_per_second": 4.214,
"step": 12978
},
{
"epoch": 126.21,
"learning_rate": 1.7475728155339808e-05,
"loss": 0.1716,
"step": 13000
},
{
"epoch": 127.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 3.958421468734741,
"eval_runtime": 4.4913,
"eval_samples_per_second": 64.792,
"eval_steps_per_second": 4.23,
"step": 13081
},
{
"epoch": 127.18,
"learning_rate": 1.7456310679611652e-05,
"loss": 0.1598,
"step": 13100
},
{
"epoch": 128.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 3.7731645107269287,
"eval_runtime": 4.4084,
"eval_samples_per_second": 66.01,
"eval_steps_per_second": 4.31,
"step": 13184
},
{
"epoch": 128.16,
"learning_rate": 1.7436893203883497e-05,
"loss": 0.1825,
"step": 13200
},
{
"epoch": 129.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.803807497024536,
"eval_runtime": 4.4164,
"eval_samples_per_second": 65.891,
"eval_steps_per_second": 4.302,
"step": 13287
},
{
"epoch": 129.13,
"learning_rate": 1.741747572815534e-05,
"loss": 0.1716,
"step": 13300
},
{
"epoch": 130.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.760632276535034,
"eval_runtime": 4.3993,
"eval_samples_per_second": 66.147,
"eval_steps_per_second": 4.319,
"step": 13390
},
{
"epoch": 130.1,
"learning_rate": 1.7398058252427186e-05,
"loss": 0.179,
"step": 13400
},
{
"epoch": 131.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.7458295822143555,
"eval_runtime": 4.3974,
"eval_samples_per_second": 66.176,
"eval_steps_per_second": 4.321,
"step": 13493
},
{
"epoch": 131.07,
"learning_rate": 1.737864077669903e-05,
"loss": 0.1817,
"step": 13500
},
{
"epoch": 132.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.841256618499756,
"eval_runtime": 4.5286,
"eval_samples_per_second": 64.258,
"eval_steps_per_second": 4.196,
"step": 13596
},
{
"epoch": 132.04,
"learning_rate": 1.7359223300970875e-05,
"loss": 0.1606,
"step": 13600
},
{
"epoch": 133.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.876582384109497,
"eval_runtime": 4.399,
"eval_samples_per_second": 66.151,
"eval_steps_per_second": 4.319,
"step": 13699
},
{
"epoch": 133.01,
"learning_rate": 1.733980582524272e-05,
"loss": 0.1785,
"step": 13700
},
{
"epoch": 133.98,
"learning_rate": 1.7320388349514564e-05,
"loss": 0.1625,
"step": 13800
},
{
"epoch": 134.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 3.8187553882598877,
"eval_runtime": 4.4306,
"eval_samples_per_second": 65.68,
"eval_steps_per_second": 4.288,
"step": 13802
},
{
"epoch": 134.95,
"learning_rate": 1.730097087378641e-05,
"loss": 0.1622,
"step": 13900
},
{
"epoch": 135.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.7222514152526855,
"eval_runtime": 4.4404,
"eval_samples_per_second": 65.534,
"eval_steps_per_second": 4.279,
"step": 13905
},
{
"epoch": 135.92,
"learning_rate": 1.7281553398058253e-05,
"loss": 0.1852,
"step": 14000
},
{
"epoch": 136.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 3.777442693710327,
"eval_runtime": 4.4465,
"eval_samples_per_second": 65.444,
"eval_steps_per_second": 4.273,
"step": 14008
},
{
"epoch": 136.89,
"learning_rate": 1.7262135922330098e-05,
"loss": 0.1671,
"step": 14100
},
{
"epoch": 137.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 3.8406755924224854,
"eval_runtime": 4.4235,
"eval_samples_per_second": 65.785,
"eval_steps_per_second": 4.295,
"step": 14111
},
{
"epoch": 137.86,
"learning_rate": 1.7242718446601943e-05,
"loss": 0.1862,
"step": 14200
},
{
"epoch": 138.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.744192123413086,
"eval_runtime": 4.3988,
"eval_samples_per_second": 66.155,
"eval_steps_per_second": 4.319,
"step": 14214
},
{
"epoch": 138.83,
"learning_rate": 1.7223300970873787e-05,
"loss": 0.1808,
"step": 14300
},
{
"epoch": 139.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 3.845832347869873,
"eval_runtime": 4.4115,
"eval_samples_per_second": 65.964,
"eval_steps_per_second": 4.307,
"step": 14317
},
{
"epoch": 139.81,
"learning_rate": 1.7203883495145632e-05,
"loss": 0.1375,
"step": 14400
},
{
"epoch": 140.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 3.7371747493743896,
"eval_runtime": 4.4129,
"eval_samples_per_second": 65.944,
"eval_steps_per_second": 4.306,
"step": 14420
},
{
"epoch": 140.78,
"learning_rate": 1.7184466019417476e-05,
"loss": 0.1876,
"step": 14500
},
{
"epoch": 141.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 3.992500066757202,
"eval_runtime": 4.4146,
"eval_samples_per_second": 65.918,
"eval_steps_per_second": 4.304,
"step": 14523
},
{
"epoch": 141.75,
"learning_rate": 1.716504854368932e-05,
"loss": 0.1693,
"step": 14600
},
{
"epoch": 142.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 3.9364075660705566,
"eval_runtime": 4.4593,
"eval_samples_per_second": 65.257,
"eval_steps_per_second": 4.261,
"step": 14626
},
{
"epoch": 142.72,
"learning_rate": 1.7145631067961165e-05,
"loss": 0.1719,
"step": 14700
},
{
"epoch": 143.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 3.9148850440979004,
"eval_runtime": 4.4169,
"eval_samples_per_second": 65.883,
"eval_steps_per_second": 4.302,
"step": 14729
},
{
"epoch": 143.69,
"learning_rate": 1.7126213592233013e-05,
"loss": 0.1406,
"step": 14800
},
{
"epoch": 144.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 3.8602797985076904,
"eval_runtime": 4.4407,
"eval_samples_per_second": 65.53,
"eval_steps_per_second": 4.279,
"step": 14832
},
{
"epoch": 144.66,
"learning_rate": 1.7106796116504855e-05,
"loss": 0.1709,
"step": 14900
},
{
"epoch": 145.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 3.921625852584839,
"eval_runtime": 4.4044,
"eval_samples_per_second": 66.071,
"eval_steps_per_second": 4.314,
"step": 14935
},
{
"epoch": 145.63,
"learning_rate": 1.70873786407767e-05,
"loss": 0.1794,
"step": 15000
},
{
"epoch": 146.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 3.8933568000793457,
"eval_runtime": 4.4071,
"eval_samples_per_second": 66.03,
"eval_steps_per_second": 4.311,
"step": 15038
},
{
"epoch": 146.6,
"learning_rate": 1.7067961165048544e-05,
"loss": 0.1455,
"step": 15100
},
{
"epoch": 147.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.008619785308838,
"eval_runtime": 4.429,
"eval_samples_per_second": 65.703,
"eval_steps_per_second": 4.29,
"step": 15141
},
{
"epoch": 147.57,
"learning_rate": 1.704854368932039e-05,
"loss": 0.1959,
"step": 15200
},
{
"epoch": 148.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 3.9358479976654053,
"eval_runtime": 4.4602,
"eval_samples_per_second": 65.243,
"eval_steps_per_second": 4.26,
"step": 15244
},
{
"epoch": 148.54,
"learning_rate": 1.7029126213592236e-05,
"loss": 0.1664,
"step": 15300
},
{
"epoch": 149.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 3.977458953857422,
"eval_runtime": 4.4996,
"eval_samples_per_second": 64.673,
"eval_steps_per_second": 4.223,
"step": 15347
},
{
"epoch": 149.51,
"learning_rate": 1.7009708737864078e-05,
"loss": 0.1455,
"step": 15400
},
{
"epoch": 150.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 3.9304189682006836,
"eval_runtime": 4.5108,
"eval_samples_per_second": 64.512,
"eval_steps_per_second": 4.212,
"step": 15450
},
{
"epoch": 150.49,
"learning_rate": 1.6990291262135922e-05,
"loss": 0.1819,
"step": 15500
},
{
"epoch": 151.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.029915809631348,
"eval_runtime": 4.5126,
"eval_samples_per_second": 64.486,
"eval_steps_per_second": 4.21,
"step": 15553
},
{
"epoch": 151.46,
"learning_rate": 1.697087378640777e-05,
"loss": 0.1532,
"step": 15600
},
{
"epoch": 152.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.121899127960205,
"eval_runtime": 4.4026,
"eval_samples_per_second": 66.097,
"eval_steps_per_second": 4.316,
"step": 15656
},
{
"epoch": 152.43,
"learning_rate": 1.695145631067961e-05,
"loss": 0.1638,
"step": 15700
},
{
"epoch": 153.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.146513938903809,
"eval_runtime": 4.3942,
"eval_samples_per_second": 66.224,
"eval_steps_per_second": 4.324,
"step": 15759
},
{
"epoch": 153.4,
"learning_rate": 1.693203883495146e-05,
"loss": 0.1579,
"step": 15800
},
{
"epoch": 154.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.05957555770874,
"eval_runtime": 4.4053,
"eval_samples_per_second": 66.056,
"eval_steps_per_second": 4.313,
"step": 15862
},
{
"epoch": 154.37,
"learning_rate": 1.6912621359223304e-05,
"loss": 0.1668,
"step": 15900
},
{
"epoch": 155.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.085700988769531,
"eval_runtime": 4.3881,
"eval_samples_per_second": 66.316,
"eval_steps_per_second": 4.33,
"step": 15965
},
{
"epoch": 155.34,
"learning_rate": 1.6893203883495145e-05,
"loss": 0.1401,
"step": 16000
},
{
"epoch": 156.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.166921138763428,
"eval_runtime": 4.4102,
"eval_samples_per_second": 65.983,
"eval_steps_per_second": 4.308,
"step": 16068
},
{
"epoch": 156.31,
"learning_rate": 1.6873786407766993e-05,
"loss": 0.1452,
"step": 16100
},
{
"epoch": 157.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.043022632598877,
"eval_runtime": 4.4074,
"eval_samples_per_second": 66.026,
"eval_steps_per_second": 4.311,
"step": 16171
},
{
"epoch": 157.28,
"learning_rate": 1.6854368932038838e-05,
"loss": 0.1568,
"step": 16200
},
{
"epoch": 158.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.015657901763916,
"eval_runtime": 4.3977,
"eval_samples_per_second": 66.17,
"eval_steps_per_second": 4.32,
"step": 16274
},
{
"epoch": 158.25,
"learning_rate": 1.6834951456310682e-05,
"loss": 0.1771,
"step": 16300
},
{
"epoch": 159.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.076967716217041,
"eval_runtime": 4.3856,
"eval_samples_per_second": 66.354,
"eval_steps_per_second": 4.332,
"step": 16377
},
{
"epoch": 159.22,
"learning_rate": 1.6815533980582527e-05,
"loss": 0.1383,
"step": 16400
},
{
"epoch": 160.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.0888471603393555,
"eval_runtime": 4.4106,
"eval_samples_per_second": 65.977,
"eval_steps_per_second": 4.308,
"step": 16480
},
{
"epoch": 160.19,
"learning_rate": 1.6796116504854368e-05,
"loss": 0.1572,
"step": 16500
},
{
"epoch": 161.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.227140426635742,
"eval_runtime": 4.4013,
"eval_samples_per_second": 66.116,
"eval_steps_per_second": 4.317,
"step": 16583
},
{
"epoch": 161.17,
"learning_rate": 1.6776699029126216e-05,
"loss": 0.1472,
"step": 16600
},
{
"epoch": 162.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.02153205871582,
"eval_runtime": 4.396,
"eval_samples_per_second": 66.196,
"eval_steps_per_second": 4.322,
"step": 16686
},
{
"epoch": 162.14,
"learning_rate": 1.675728155339806e-05,
"loss": 0.1534,
"step": 16700
},
{
"epoch": 163.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.224771499633789,
"eval_runtime": 4.4208,
"eval_samples_per_second": 65.825,
"eval_steps_per_second": 4.298,
"step": 16789
},
{
"epoch": 163.11,
"learning_rate": 1.6737864077669905e-05,
"loss": 0.136,
"step": 16800
},
{
"epoch": 164.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.215867519378662,
"eval_runtime": 4.4089,
"eval_samples_per_second": 66.003,
"eval_steps_per_second": 4.309,
"step": 16892
},
{
"epoch": 164.08,
"learning_rate": 1.671844660194175e-05,
"loss": 0.1525,
"step": 16900
},
{
"epoch": 165.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.05654239654541,
"eval_runtime": 4.3994,
"eval_samples_per_second": 66.146,
"eval_steps_per_second": 4.319,
"step": 16995
},
{
"epoch": 165.05,
"learning_rate": 1.6699029126213594e-05,
"loss": 0.1418,
"step": 17000
},
{
"epoch": 166.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.117518424987793,
"eval_runtime": 4.4011,
"eval_samples_per_second": 66.12,
"eval_steps_per_second": 4.317,
"step": 17098
},
{
"epoch": 166.02,
"learning_rate": 1.667961165048544e-05,
"loss": 0.1542,
"step": 17100
},
{
"epoch": 166.99,
"learning_rate": 1.6660194174757283e-05,
"loss": 0.1374,
"step": 17200
},
{
"epoch": 167.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.170831203460693,
"eval_runtime": 4.3965,
"eval_samples_per_second": 66.188,
"eval_steps_per_second": 4.322,
"step": 17201
},
{
"epoch": 167.96,
"learning_rate": 1.6640776699029128e-05,
"loss": 0.1538,
"step": 17300
},
{
"epoch": 168.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.256599426269531,
"eval_runtime": 4.4123,
"eval_samples_per_second": 65.952,
"eval_steps_per_second": 4.306,
"step": 17304
},
{
"epoch": 168.93,
"learning_rate": 1.6621359223300973e-05,
"loss": 0.1365,
"step": 17400
},
{
"epoch": 169.0,
"eval_accuracy": 0.25773195876288657,
"eval_loss": 4.306251525878906,
"eval_runtime": 4.409,
"eval_samples_per_second": 66.002,
"eval_steps_per_second": 4.309,
"step": 17407
},
{
"epoch": 169.9,
"learning_rate": 1.6601941747572817e-05,
"loss": 0.1661,
"step": 17500
},
{
"epoch": 170.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.223095417022705,
"eval_runtime": 4.413,
"eval_samples_per_second": 65.941,
"eval_steps_per_second": 4.305,
"step": 17510
},
{
"epoch": 170.87,
"learning_rate": 1.6582524271844662e-05,
"loss": 0.1278,
"step": 17600
},
{
"epoch": 171.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.312500953674316,
"eval_runtime": 4.3996,
"eval_samples_per_second": 66.143,
"eval_steps_per_second": 4.319,
"step": 17613
},
{
"epoch": 171.84,
"learning_rate": 1.6563106796116506e-05,
"loss": 0.1418,
"step": 17700
},
{
"epoch": 172.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.333723545074463,
"eval_runtime": 4.4095,
"eval_samples_per_second": 65.994,
"eval_steps_per_second": 4.309,
"step": 17716
},
{
"epoch": 172.82,
"learning_rate": 1.654368932038835e-05,
"loss": 0.1538,
"step": 17800
},
{
"epoch": 173.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.3129119873046875,
"eval_runtime": 4.4001,
"eval_samples_per_second": 66.135,
"eval_steps_per_second": 4.318,
"step": 17819
},
{
"epoch": 173.79,
"learning_rate": 1.6524271844660196e-05,
"loss": 0.1315,
"step": 17900
},
{
"epoch": 174.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.3102030754089355,
"eval_runtime": 4.3961,
"eval_samples_per_second": 66.195,
"eval_steps_per_second": 4.322,
"step": 17922
},
{
"epoch": 174.76,
"learning_rate": 1.650485436893204e-05,
"loss": 0.128,
"step": 18000
},
{
"epoch": 175.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.285308837890625,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.109,
"eval_steps_per_second": 4.316,
"step": 18025
},
{
"epoch": 175.73,
"learning_rate": 1.6485436893203885e-05,
"loss": 0.1398,
"step": 18100
},
{
"epoch": 176.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.156043529510498,
"eval_runtime": 4.4077,
"eval_samples_per_second": 66.02,
"eval_steps_per_second": 4.311,
"step": 18128
},
{
"epoch": 176.7,
"learning_rate": 1.646601941747573e-05,
"loss": 0.1525,
"step": 18200
},
{
"epoch": 177.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.181150436401367,
"eval_runtime": 4.4122,
"eval_samples_per_second": 65.954,
"eval_steps_per_second": 4.306,
"step": 18231
},
{
"epoch": 177.67,
"learning_rate": 1.6446601941747574e-05,
"loss": 0.1603,
"step": 18300
},
{
"epoch": 178.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.126158714294434,
"eval_runtime": 4.4076,
"eval_samples_per_second": 66.022,
"eval_steps_per_second": 4.311,
"step": 18334
},
{
"epoch": 178.64,
"learning_rate": 1.642718446601942e-05,
"loss": 0.1412,
"step": 18400
},
{
"epoch": 179.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.277770519256592,
"eval_runtime": 4.4525,
"eval_samples_per_second": 65.357,
"eval_steps_per_second": 4.267,
"step": 18437
},
{
"epoch": 179.61,
"learning_rate": 1.6407766990291263e-05,
"loss": 0.1521,
"step": 18500
},
{
"epoch": 180.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.288129806518555,
"eval_runtime": 4.3977,
"eval_samples_per_second": 66.17,
"eval_steps_per_second": 4.32,
"step": 18540
},
{
"epoch": 180.58,
"learning_rate": 1.6388349514563108e-05,
"loss": 0.1404,
"step": 18600
},
{
"epoch": 181.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.314670562744141,
"eval_runtime": 4.4108,
"eval_samples_per_second": 65.974,
"eval_steps_per_second": 4.308,
"step": 18643
},
{
"epoch": 181.55,
"learning_rate": 1.6368932038834952e-05,
"loss": 0.1468,
"step": 18700
},
{
"epoch": 182.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.204223155975342,
"eval_runtime": 4.4068,
"eval_samples_per_second": 66.034,
"eval_steps_per_second": 4.312,
"step": 18746
},
{
"epoch": 182.52,
"learning_rate": 1.6349514563106797e-05,
"loss": 0.1448,
"step": 18800
},
{
"epoch": 183.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.211010456085205,
"eval_runtime": 4.411,
"eval_samples_per_second": 65.971,
"eval_steps_per_second": 4.307,
"step": 18849
},
{
"epoch": 183.5,
"learning_rate": 1.633009708737864e-05,
"loss": 0.1299,
"step": 18900
},
{
"epoch": 184.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.231362342834473,
"eval_runtime": 4.4084,
"eval_samples_per_second": 66.011,
"eval_steps_per_second": 4.31,
"step": 18952
},
{
"epoch": 184.47,
"learning_rate": 1.6310679611650486e-05,
"loss": 0.1361,
"step": 19000
},
{
"epoch": 185.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.299282550811768,
"eval_runtime": 4.5219,
"eval_samples_per_second": 64.354,
"eval_steps_per_second": 4.202,
"step": 19055
},
{
"epoch": 185.44,
"learning_rate": 1.629126213592233e-05,
"loss": 0.1455,
"step": 19100
},
{
"epoch": 186.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.350893020629883,
"eval_runtime": 4.4097,
"eval_samples_per_second": 65.991,
"eval_steps_per_second": 4.309,
"step": 19158
},
{
"epoch": 186.41,
"learning_rate": 1.6271844660194175e-05,
"loss": 0.1345,
"step": 19200
},
{
"epoch": 187.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.282843112945557,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.981,
"eval_steps_per_second": 4.308,
"step": 19261
},
{
"epoch": 187.38,
"learning_rate": 1.625242718446602e-05,
"loss": 0.1394,
"step": 19300
},
{
"epoch": 188.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.100064277648926,
"eval_runtime": 4.4148,
"eval_samples_per_second": 65.914,
"eval_steps_per_second": 4.304,
"step": 19364
},
{
"epoch": 188.35,
"learning_rate": 1.6233009708737864e-05,
"loss": 0.1415,
"step": 19400
},
{
"epoch": 189.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.217869281768799,
"eval_runtime": 4.4147,
"eval_samples_per_second": 65.917,
"eval_steps_per_second": 4.304,
"step": 19467
},
{
"epoch": 189.32,
"learning_rate": 1.621359223300971e-05,
"loss": 0.1235,
"step": 19500
},
{
"epoch": 190.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.296295642852783,
"eval_runtime": 4.4171,
"eval_samples_per_second": 65.88,
"eval_steps_per_second": 4.301,
"step": 19570
},
{
"epoch": 190.29,
"learning_rate": 1.6194174757281557e-05,
"loss": 0.1373,
"step": 19600
},
{
"epoch": 191.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.183337211608887,
"eval_runtime": 4.402,
"eval_samples_per_second": 66.107,
"eval_steps_per_second": 4.316,
"step": 19673
},
{
"epoch": 191.26,
"learning_rate": 1.6174757281553398e-05,
"loss": 0.1323,
"step": 19700
},
{
"epoch": 192.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.305690288543701,
"eval_runtime": 4.3952,
"eval_samples_per_second": 66.209,
"eval_steps_per_second": 4.323,
"step": 19776
},
{
"epoch": 192.23,
"learning_rate": 1.6155339805825243e-05,
"loss": 0.1188,
"step": 19800
},
{
"epoch": 193.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.381898880004883,
"eval_runtime": 4.3965,
"eval_samples_per_second": 66.189,
"eval_steps_per_second": 4.322,
"step": 19879
},
{
"epoch": 193.2,
"learning_rate": 1.613592233009709e-05,
"loss": 0.1528,
"step": 19900
},
{
"epoch": 194.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.309067726135254,
"eval_runtime": 4.3965,
"eval_samples_per_second": 66.189,
"eval_steps_per_second": 4.322,
"step": 19982
},
{
"epoch": 194.17,
"learning_rate": 1.6116504854368932e-05,
"loss": 0.1365,
"step": 20000
},
{
"epoch": 195.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.387022495269775,
"eval_runtime": 4.3928,
"eval_samples_per_second": 66.245,
"eval_steps_per_second": 4.325,
"step": 20085
},
{
"epoch": 195.15,
"learning_rate": 1.609708737864078e-05,
"loss": 0.1187,
"step": 20100
},
{
"epoch": 196.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.230319499969482,
"eval_runtime": 4.4107,
"eval_samples_per_second": 65.976,
"eval_steps_per_second": 4.308,
"step": 20188
},
{
"epoch": 196.12,
"learning_rate": 1.6077669902912624e-05,
"loss": 0.1409,
"step": 20200
},
{
"epoch": 197.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.234382152557373,
"eval_runtime": 4.4056,
"eval_samples_per_second": 66.053,
"eval_steps_per_second": 4.313,
"step": 20291
},
{
"epoch": 197.09,
"learning_rate": 1.6058252427184466e-05,
"loss": 0.1346,
"step": 20300
},
{
"epoch": 198.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 4.06366491317749,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.854,
"eval_steps_per_second": 4.3,
"step": 20394
},
{
"epoch": 198.06,
"learning_rate": 1.6038834951456313e-05,
"loss": 0.1449,
"step": 20400
},
{
"epoch": 199.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.3022308349609375,
"eval_runtime": 4.4119,
"eval_samples_per_second": 65.959,
"eval_steps_per_second": 4.307,
"step": 20497
},
{
"epoch": 199.03,
"learning_rate": 1.6019417475728155e-05,
"loss": 0.131,
"step": 20500
},
{
"epoch": 200.0,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.1415,
"step": 20600
},
{
"epoch": 200.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.267215728759766,
"eval_runtime": 4.4711,
"eval_samples_per_second": 65.085,
"eval_steps_per_second": 4.25,
"step": 20600
},
{
"epoch": 200.97,
"learning_rate": 1.5980582524271847e-05,
"loss": 0.1283,
"step": 20700
},
{
"epoch": 201.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.236283302307129,
"eval_runtime": 4.4239,
"eval_samples_per_second": 65.779,
"eval_steps_per_second": 4.295,
"step": 20703
},
{
"epoch": 201.94,
"learning_rate": 1.596116504854369e-05,
"loss": 0.1469,
"step": 20800
},
{
"epoch": 202.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.27135705947876,
"eval_runtime": 4.4118,
"eval_samples_per_second": 65.96,
"eval_steps_per_second": 4.307,
"step": 20806
},
{
"epoch": 202.91,
"learning_rate": 1.5941747572815536e-05,
"loss": 0.1288,
"step": 20900
},
{
"epoch": 203.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.324564456939697,
"eval_runtime": 4.41,
"eval_samples_per_second": 65.986,
"eval_steps_per_second": 4.308,
"step": 20909
},
{
"epoch": 203.88,
"learning_rate": 1.592233009708738e-05,
"loss": 0.1334,
"step": 21000
},
{
"epoch": 204.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.171061038970947,
"eval_runtime": 4.4147,
"eval_samples_per_second": 65.915,
"eval_steps_per_second": 4.304,
"step": 21012
},
{
"epoch": 204.85,
"learning_rate": 1.5902912621359226e-05,
"loss": 0.1419,
"step": 21100
},
{
"epoch": 205.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.326306343078613,
"eval_runtime": 4.3969,
"eval_samples_per_second": 66.183,
"eval_steps_per_second": 4.321,
"step": 21115
},
{
"epoch": 205.83,
"learning_rate": 1.588349514563107e-05,
"loss": 0.1395,
"step": 21200
},
{
"epoch": 206.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.285510063171387,
"eval_runtime": 4.4115,
"eval_samples_per_second": 65.963,
"eval_steps_per_second": 4.307,
"step": 21218
},
{
"epoch": 206.8,
"learning_rate": 1.5864077669902915e-05,
"loss": 0.1255,
"step": 21300
},
{
"epoch": 207.0,
"eval_accuracy": 0.24742268041237114,
"eval_loss": 4.430055141448975,
"eval_runtime": 4.4012,
"eval_samples_per_second": 66.118,
"eval_steps_per_second": 4.317,
"step": 21321
},
{
"epoch": 207.77,
"learning_rate": 1.584466019417476e-05,
"loss": 0.1288,
"step": 21400
},
{
"epoch": 208.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.373450756072998,
"eval_runtime": 4.4162,
"eval_samples_per_second": 65.893,
"eval_steps_per_second": 4.302,
"step": 21424
},
{
"epoch": 208.74,
"learning_rate": 1.5825242718446604e-05,
"loss": 0.1395,
"step": 21500
},
{
"epoch": 209.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.354865550994873,
"eval_runtime": 4.3919,
"eval_samples_per_second": 66.259,
"eval_steps_per_second": 4.326,
"step": 21527
},
{
"epoch": 209.71,
"learning_rate": 1.580582524271845e-05,
"loss": 0.1144,
"step": 21600
},
{
"epoch": 210.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.456879615783691,
"eval_runtime": 4.3921,
"eval_samples_per_second": 66.255,
"eval_steps_per_second": 4.326,
"step": 21630
},
{
"epoch": 210.68,
"learning_rate": 1.5786407766990293e-05,
"loss": 0.1185,
"step": 21700
},
{
"epoch": 211.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.500795364379883,
"eval_runtime": 4.4075,
"eval_samples_per_second": 66.024,
"eval_steps_per_second": 4.311,
"step": 21733
},
{
"epoch": 211.65,
"learning_rate": 1.5766990291262138e-05,
"loss": 0.1578,
"step": 21800
},
{
"epoch": 212.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.231286525726318,
"eval_runtime": 4.3986,
"eval_samples_per_second": 66.157,
"eval_steps_per_second": 4.32,
"step": 21836
},
{
"epoch": 212.62,
"learning_rate": 1.5747572815533982e-05,
"loss": 0.1434,
"step": 21900
},
{
"epoch": 213.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.444507122039795,
"eval_runtime": 4.403,
"eval_samples_per_second": 66.091,
"eval_steps_per_second": 4.315,
"step": 21939
},
{
"epoch": 213.59,
"learning_rate": 1.5728155339805827e-05,
"loss": 0.1147,
"step": 22000
},
{
"epoch": 214.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.432860851287842,
"eval_runtime": 4.4129,
"eval_samples_per_second": 65.942,
"eval_steps_per_second": 4.306,
"step": 22042
},
{
"epoch": 214.56,
"learning_rate": 1.570873786407767e-05,
"loss": 0.1239,
"step": 22100
},
{
"epoch": 215.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.410243034362793,
"eval_runtime": 4.4417,
"eval_samples_per_second": 65.516,
"eval_steps_per_second": 4.278,
"step": 22145
},
{
"epoch": 215.53,
"learning_rate": 1.5689320388349516e-05,
"loss": 0.1315,
"step": 22200
},
{
"epoch": 216.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.250341892242432,
"eval_runtime": 4.4449,
"eval_samples_per_second": 65.469,
"eval_steps_per_second": 4.275,
"step": 22248
},
{
"epoch": 216.5,
"learning_rate": 1.566990291262136e-05,
"loss": 0.1413,
"step": 22300
},
{
"epoch": 217.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.555916786193848,
"eval_runtime": 4.4149,
"eval_samples_per_second": 65.913,
"eval_steps_per_second": 4.304,
"step": 22351
},
{
"epoch": 217.48,
"learning_rate": 1.5650485436893205e-05,
"loss": 0.1137,
"step": 22400
},
{
"epoch": 218.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.450405120849609,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.008,
"eval_steps_per_second": 4.31,
"step": 22454
},
{
"epoch": 218.45,
"learning_rate": 1.563106796116505e-05,
"loss": 0.1412,
"step": 22500
},
{
"epoch": 219.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.337742805480957,
"eval_runtime": 4.4117,
"eval_samples_per_second": 65.961,
"eval_steps_per_second": 4.307,
"step": 22557
},
{
"epoch": 219.42,
"learning_rate": 1.5611650485436894e-05,
"loss": 0.1051,
"step": 22600
},
{
"epoch": 220.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.524986743927002,
"eval_runtime": 4.4284,
"eval_samples_per_second": 65.713,
"eval_steps_per_second": 4.291,
"step": 22660
},
{
"epoch": 220.39,
"learning_rate": 1.559223300970874e-05,
"loss": 0.1314,
"step": 22700
},
{
"epoch": 221.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.453868389129639,
"eval_runtime": 4.4017,
"eval_samples_per_second": 66.111,
"eval_steps_per_second": 4.317,
"step": 22763
},
{
"epoch": 221.36,
"learning_rate": 1.5572815533980583e-05,
"loss": 0.1284,
"step": 22800
},
{
"epoch": 222.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.34807825088501,
"eval_runtime": 4.3999,
"eval_samples_per_second": 66.139,
"eval_steps_per_second": 4.318,
"step": 22866
},
{
"epoch": 222.33,
"learning_rate": 1.5553398058252428e-05,
"loss": 0.1159,
"step": 22900
},
{
"epoch": 223.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.428357124328613,
"eval_runtime": 4.527,
"eval_samples_per_second": 64.28,
"eval_steps_per_second": 4.197,
"step": 22969
},
{
"epoch": 223.3,
"learning_rate": 1.5533980582524273e-05,
"loss": 0.1219,
"step": 23000
},
{
"epoch": 224.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.506850242614746,
"eval_runtime": 4.5066,
"eval_samples_per_second": 64.572,
"eval_steps_per_second": 4.216,
"step": 23072
},
{
"epoch": 224.27,
"learning_rate": 1.5514563106796117e-05,
"loss": 0.1183,
"step": 23100
},
{
"epoch": 225.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.546067237854004,
"eval_runtime": 4.3972,
"eval_samples_per_second": 66.179,
"eval_steps_per_second": 4.321,
"step": 23175
},
{
"epoch": 225.24,
"learning_rate": 1.5495145631067962e-05,
"loss": 0.1172,
"step": 23200
},
{
"epoch": 226.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.398603439331055,
"eval_runtime": 4.3972,
"eval_samples_per_second": 66.179,
"eval_steps_per_second": 4.321,
"step": 23278
},
{
"epoch": 226.21,
"learning_rate": 1.5475728155339806e-05,
"loss": 0.1216,
"step": 23300
},
{
"epoch": 227.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.515445232391357,
"eval_runtime": 4.4132,
"eval_samples_per_second": 65.938,
"eval_steps_per_second": 4.305,
"step": 23381
},
{
"epoch": 227.18,
"learning_rate": 1.545631067961165e-05,
"loss": 0.1207,
"step": 23400
},
{
"epoch": 228.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.484820365905762,
"eval_runtime": 4.4113,
"eval_samples_per_second": 65.967,
"eval_steps_per_second": 4.307,
"step": 23484
},
{
"epoch": 228.16,
"learning_rate": 1.5436893203883496e-05,
"loss": 0.1303,
"step": 23500
},
{
"epoch": 229.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.392459869384766,
"eval_runtime": 4.4087,
"eval_samples_per_second": 66.006,
"eval_steps_per_second": 4.31,
"step": 23587
},
{
"epoch": 229.13,
"learning_rate": 1.541747572815534e-05,
"loss": 0.1238,
"step": 23600
},
{
"epoch": 230.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.37477445602417,
"eval_runtime": 4.4077,
"eval_samples_per_second": 66.022,
"eval_steps_per_second": 4.311,
"step": 23690
},
{
"epoch": 230.1,
"learning_rate": 1.5398058252427185e-05,
"loss": 0.1126,
"step": 23700
},
{
"epoch": 231.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.480639934539795,
"eval_runtime": 4.3943,
"eval_samples_per_second": 66.222,
"eval_steps_per_second": 4.324,
"step": 23793
},
{
"epoch": 231.07,
"learning_rate": 1.537864077669903e-05,
"loss": 0.1227,
"step": 23800
},
{
"epoch": 232.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.443945407867432,
"eval_runtime": 4.5038,
"eval_samples_per_second": 64.612,
"eval_steps_per_second": 4.219,
"step": 23896
},
{
"epoch": 232.04,
"learning_rate": 1.5359223300970877e-05,
"loss": 0.1146,
"step": 23900
},
{
"epoch": 233.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.522760391235352,
"eval_runtime": 4.3912,
"eval_samples_per_second": 66.269,
"eval_steps_per_second": 4.327,
"step": 23999
},
{
"epoch": 233.01,
"learning_rate": 1.533980582524272e-05,
"loss": 0.123,
"step": 24000
},
{
"epoch": 233.98,
"learning_rate": 1.5320388349514563e-05,
"loss": 0.1168,
"step": 24100
},
{
"epoch": 234.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.56139612197876,
"eval_runtime": 4.4137,
"eval_samples_per_second": 65.93,
"eval_steps_per_second": 4.305,
"step": 24102
},
{
"epoch": 234.95,
"learning_rate": 1.5300970873786408e-05,
"loss": 0.1219,
"step": 24200
},
{
"epoch": 235.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.4129486083984375,
"eval_runtime": 4.4147,
"eval_samples_per_second": 65.915,
"eval_steps_per_second": 4.304,
"step": 24205
},
{
"epoch": 235.92,
"learning_rate": 1.5281553398058252e-05,
"loss": 0.1181,
"step": 24300
},
{
"epoch": 236.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.544414520263672,
"eval_runtime": 4.3973,
"eval_samples_per_second": 66.178,
"eval_steps_per_second": 4.321,
"step": 24308
},
{
"epoch": 236.89,
"learning_rate": 1.52621359223301e-05,
"loss": 0.1167,
"step": 24400
},
{
"epoch": 237.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.403836727142334,
"eval_runtime": 4.4112,
"eval_samples_per_second": 65.968,
"eval_steps_per_second": 4.307,
"step": 24411
},
{
"epoch": 237.86,
"learning_rate": 1.5242718446601943e-05,
"loss": 0.1173,
"step": 24500
},
{
"epoch": 238.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 4.396702289581299,
"eval_runtime": 4.4026,
"eval_samples_per_second": 66.097,
"eval_steps_per_second": 4.316,
"step": 24514
},
{
"epoch": 238.83,
"learning_rate": 1.5223300970873786e-05,
"loss": 0.1052,
"step": 24600
},
{
"epoch": 239.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.505501747131348,
"eval_runtime": 4.3966,
"eval_samples_per_second": 66.188,
"eval_steps_per_second": 4.322,
"step": 24617
},
{
"epoch": 239.81,
"learning_rate": 1.5203883495145632e-05,
"loss": 0.1216,
"step": 24700
},
{
"epoch": 240.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.56933069229126,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.854,
"eval_steps_per_second": 4.3,
"step": 24720
},
{
"epoch": 240.78,
"learning_rate": 1.5184466019417477e-05,
"loss": 0.1242,
"step": 24800
},
{
"epoch": 241.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.490577697753906,
"eval_runtime": 4.4022,
"eval_samples_per_second": 66.104,
"eval_steps_per_second": 4.316,
"step": 24823
},
{
"epoch": 241.75,
"learning_rate": 1.5165048543689323e-05,
"loss": 0.1553,
"step": 24900
},
{
"epoch": 242.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.497089862823486,
"eval_runtime": 4.4179,
"eval_samples_per_second": 65.869,
"eval_steps_per_second": 4.301,
"step": 24926
},
{
"epoch": 242.72,
"learning_rate": 1.5145631067961166e-05,
"loss": 0.1377,
"step": 25000
},
{
"epoch": 243.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.453564643859863,
"eval_runtime": 4.4333,
"eval_samples_per_second": 65.639,
"eval_steps_per_second": 4.286,
"step": 25029
},
{
"epoch": 243.69,
"learning_rate": 1.512621359223301e-05,
"loss": 0.1126,
"step": 25100
},
{
"epoch": 244.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.532435417175293,
"eval_runtime": 4.4152,
"eval_samples_per_second": 65.908,
"eval_steps_per_second": 4.303,
"step": 25132
},
{
"epoch": 244.66,
"learning_rate": 1.5106796116504855e-05,
"loss": 0.1321,
"step": 25200
},
{
"epoch": 245.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.803735256195068,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.007,
"eval_steps_per_second": 4.31,
"step": 25235
},
{
"epoch": 245.63,
"learning_rate": 1.50873786407767e-05,
"loss": 0.115,
"step": 25300
},
{
"epoch": 246.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.66818380355835,
"eval_runtime": 4.4075,
"eval_samples_per_second": 66.024,
"eval_steps_per_second": 4.311,
"step": 25338
},
{
"epoch": 246.6,
"learning_rate": 1.5067961165048546e-05,
"loss": 0.1311,
"step": 25400
},
{
"epoch": 247.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 4.63736629486084,
"eval_runtime": 4.3953,
"eval_samples_per_second": 66.206,
"eval_steps_per_second": 4.323,
"step": 25441
},
{
"epoch": 247.57,
"learning_rate": 1.5048543689320389e-05,
"loss": 0.1224,
"step": 25500
},
{
"epoch": 248.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.7802581787109375,
"eval_runtime": 4.3891,
"eval_samples_per_second": 66.301,
"eval_steps_per_second": 4.329,
"step": 25544
},
{
"epoch": 248.54,
"learning_rate": 1.5029126213592234e-05,
"loss": 0.1291,
"step": 25600
},
{
"epoch": 249.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.656409740447998,
"eval_runtime": 4.41,
"eval_samples_per_second": 65.987,
"eval_steps_per_second": 4.308,
"step": 25647
},
{
"epoch": 249.51,
"learning_rate": 1.500970873786408e-05,
"loss": 0.1138,
"step": 25700
},
{
"epoch": 250.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.518815040588379,
"eval_runtime": 4.4021,
"eval_samples_per_second": 66.105,
"eval_steps_per_second": 4.316,
"step": 25750
},
{
"epoch": 250.49,
"learning_rate": 1.4990291262135923e-05,
"loss": 0.1159,
"step": 25800
},
{
"epoch": 251.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.511619567871094,
"eval_runtime": 4.4073,
"eval_samples_per_second": 66.027,
"eval_steps_per_second": 4.311,
"step": 25853
},
{
"epoch": 251.46,
"learning_rate": 1.4970873786407769e-05,
"loss": 0.1172,
"step": 25900
},
{
"epoch": 252.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.703920841217041,
"eval_runtime": 4.4286,
"eval_samples_per_second": 65.709,
"eval_steps_per_second": 4.29,
"step": 25956
},
{
"epoch": 252.43,
"learning_rate": 1.4951456310679614e-05,
"loss": 0.1256,
"step": 26000
},
{
"epoch": 253.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.646224498748779,
"eval_runtime": 4.4924,
"eval_samples_per_second": 64.776,
"eval_steps_per_second": 4.229,
"step": 26059
},
{
"epoch": 253.4,
"learning_rate": 1.4932038834951456e-05,
"loss": 0.1227,
"step": 26100
},
{
"epoch": 254.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.746954917907715,
"eval_runtime": 4.4099,
"eval_samples_per_second": 65.987,
"eval_steps_per_second": 4.308,
"step": 26162
},
{
"epoch": 254.37,
"learning_rate": 1.4912621359223303e-05,
"loss": 0.1186,
"step": 26200
},
{
"epoch": 255.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.654090404510498,
"eval_runtime": 4.4095,
"eval_samples_per_second": 65.994,
"eval_steps_per_second": 4.309,
"step": 26265
},
{
"epoch": 255.34,
"learning_rate": 1.4893203883495147e-05,
"loss": 0.1114,
"step": 26300
},
{
"epoch": 256.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.600460052490234,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.007,
"eval_steps_per_second": 4.31,
"step": 26368
},
{
"epoch": 256.31,
"learning_rate": 1.4873786407766992e-05,
"loss": 0.1154,
"step": 26400
},
{
"epoch": 257.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.570699691772461,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.981,
"eval_steps_per_second": 4.308,
"step": 26471
},
{
"epoch": 257.28,
"learning_rate": 1.4854368932038836e-05,
"loss": 0.1229,
"step": 26500
},
{
"epoch": 258.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.517983913421631,
"eval_runtime": 4.4402,
"eval_samples_per_second": 65.537,
"eval_steps_per_second": 4.279,
"step": 26574
},
{
"epoch": 258.25,
"learning_rate": 1.483495145631068e-05,
"loss": 0.1138,
"step": 26600
},
{
"epoch": 259.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.621974468231201,
"eval_runtime": 4.3978,
"eval_samples_per_second": 66.17,
"eval_steps_per_second": 4.32,
"step": 26677
},
{
"epoch": 259.22,
"learning_rate": 1.4815533980582526e-05,
"loss": 0.0987,
"step": 26700
},
{
"epoch": 260.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.6445817947387695,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 26780
},
{
"epoch": 260.19,
"learning_rate": 1.479611650485437e-05,
"loss": 0.1056,
"step": 26800
},
{
"epoch": 261.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.759962558746338,
"eval_runtime": 4.4457,
"eval_samples_per_second": 65.457,
"eval_steps_per_second": 4.274,
"step": 26883
},
{
"epoch": 261.17,
"learning_rate": 1.4776699029126216e-05,
"loss": 0.1362,
"step": 26900
},
{
"epoch": 262.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.670341968536377,
"eval_runtime": 4.3936,
"eval_samples_per_second": 66.232,
"eval_steps_per_second": 4.324,
"step": 26986
},
{
"epoch": 262.14,
"learning_rate": 1.475728155339806e-05,
"loss": 0.1131,
"step": 27000
},
{
"epoch": 263.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.606517314910889,
"eval_runtime": 4.4121,
"eval_samples_per_second": 65.955,
"eval_steps_per_second": 4.306,
"step": 27089
},
{
"epoch": 263.11,
"learning_rate": 1.4737864077669904e-05,
"loss": 0.1127,
"step": 27100
},
{
"epoch": 264.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.512498378753662,
"eval_runtime": 4.4169,
"eval_samples_per_second": 65.883,
"eval_steps_per_second": 4.302,
"step": 27192
},
{
"epoch": 264.08,
"learning_rate": 1.4718446601941749e-05,
"loss": 0.1248,
"step": 27200
},
{
"epoch": 265.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.596677303314209,
"eval_runtime": 4.4358,
"eval_samples_per_second": 65.603,
"eval_steps_per_second": 4.283,
"step": 27295
},
{
"epoch": 265.05,
"learning_rate": 1.4699029126213593e-05,
"loss": 0.111,
"step": 27300
},
{
"epoch": 266.0,
"eval_accuracy": 0.24742268041237114,
"eval_loss": 4.618172645568848,
"eval_runtime": 4.4576,
"eval_samples_per_second": 65.281,
"eval_steps_per_second": 4.262,
"step": 27398
},
{
"epoch": 266.02,
"learning_rate": 1.467961165048544e-05,
"loss": 0.1022,
"step": 27400
},
{
"epoch": 266.99,
"learning_rate": 1.4660194174757282e-05,
"loss": 0.1203,
"step": 27500
},
{
"epoch": 267.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.596898555755615,
"eval_runtime": 4.4052,
"eval_samples_per_second": 66.059,
"eval_steps_per_second": 4.313,
"step": 27501
},
{
"epoch": 267.96,
"learning_rate": 1.4640776699029127e-05,
"loss": 0.1242,
"step": 27600
},
{
"epoch": 268.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.543684959411621,
"eval_runtime": 4.4036,
"eval_samples_per_second": 66.082,
"eval_steps_per_second": 4.315,
"step": 27604
},
{
"epoch": 268.93,
"learning_rate": 1.4621359223300973e-05,
"loss": 0.1041,
"step": 27700
},
{
"epoch": 269.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.710482120513916,
"eval_runtime": 4.4051,
"eval_samples_per_second": 66.06,
"eval_steps_per_second": 4.313,
"step": 27707
},
{
"epoch": 269.9,
"learning_rate": 1.4601941747572816e-05,
"loss": 0.1233,
"step": 27800
},
{
"epoch": 270.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.630477428436279,
"eval_runtime": 4.4588,
"eval_samples_per_second": 65.264,
"eval_steps_per_second": 4.261,
"step": 27810
},
{
"epoch": 270.87,
"learning_rate": 1.4582524271844662e-05,
"loss": 0.1003,
"step": 27900
},
{
"epoch": 271.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.586513996124268,
"eval_runtime": 4.4119,
"eval_samples_per_second": 65.957,
"eval_steps_per_second": 4.306,
"step": 27913
},
{
"epoch": 271.84,
"learning_rate": 1.4563106796116507e-05,
"loss": 0.1144,
"step": 28000
},
{
"epoch": 272.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.621643543243408,
"eval_runtime": 4.398,
"eval_samples_per_second": 66.166,
"eval_steps_per_second": 4.32,
"step": 28016
},
{
"epoch": 272.82,
"learning_rate": 1.454368932038835e-05,
"loss": 0.1061,
"step": 28100
},
{
"epoch": 273.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.538716793060303,
"eval_runtime": 4.4,
"eval_samples_per_second": 66.136,
"eval_steps_per_second": 4.318,
"step": 28119
},
{
"epoch": 273.79,
"learning_rate": 1.4524271844660196e-05,
"loss": 0.1102,
"step": 28200
},
{
"epoch": 274.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.58504581451416,
"eval_runtime": 4.424,
"eval_samples_per_second": 65.778,
"eval_steps_per_second": 4.295,
"step": 28222
},
{
"epoch": 274.76,
"learning_rate": 1.450485436893204e-05,
"loss": 0.109,
"step": 28300
},
{
"epoch": 275.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.644214630126953,
"eval_runtime": 4.3972,
"eval_samples_per_second": 66.178,
"eval_steps_per_second": 4.321,
"step": 28325
},
{
"epoch": 275.73,
"learning_rate": 1.4485436893203884e-05,
"loss": 0.1277,
"step": 28400
},
{
"epoch": 276.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 4.583741188049316,
"eval_runtime": 4.4168,
"eval_samples_per_second": 65.885,
"eval_steps_per_second": 4.302,
"step": 28428
},
{
"epoch": 276.7,
"learning_rate": 1.446601941747573e-05,
"loss": 0.1101,
"step": 28500
},
{
"epoch": 277.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.7879719734191895,
"eval_runtime": 4.4436,
"eval_samples_per_second": 65.488,
"eval_steps_per_second": 4.276,
"step": 28531
},
{
"epoch": 277.67,
"learning_rate": 1.4446601941747573e-05,
"loss": 0.1136,
"step": 28600
},
{
"epoch": 278.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.566427230834961,
"eval_runtime": 4.4045,
"eval_samples_per_second": 66.069,
"eval_steps_per_second": 4.314,
"step": 28634
},
{
"epoch": 278.64,
"learning_rate": 1.4427184466019419e-05,
"loss": 0.1125,
"step": 28700
},
{
"epoch": 279.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.724515914916992,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.11,
"eval_steps_per_second": 4.316,
"step": 28737
},
{
"epoch": 279.61,
"learning_rate": 1.4407766990291264e-05,
"loss": 0.1207,
"step": 28800
},
{
"epoch": 280.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.78406286239624,
"eval_runtime": 4.4052,
"eval_samples_per_second": 66.059,
"eval_steps_per_second": 4.313,
"step": 28840
},
{
"epoch": 280.58,
"learning_rate": 1.4388349514563106e-05,
"loss": 0.1223,
"step": 28900
},
{
"epoch": 281.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.773590564727783,
"eval_runtime": 4.3939,
"eval_samples_per_second": 66.228,
"eval_steps_per_second": 4.324,
"step": 28943
},
{
"epoch": 281.55,
"learning_rate": 1.4368932038834953e-05,
"loss": 0.1132,
"step": 29000
},
{
"epoch": 282.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.619295597076416,
"eval_runtime": 4.4015,
"eval_samples_per_second": 66.114,
"eval_steps_per_second": 4.317,
"step": 29046
},
{
"epoch": 282.52,
"learning_rate": 1.4349514563106797e-05,
"loss": 0.1118,
"step": 29100
},
{
"epoch": 283.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.751223087310791,
"eval_runtime": 4.5072,
"eval_samples_per_second": 64.563,
"eval_steps_per_second": 4.215,
"step": 29149
},
{
"epoch": 283.5,
"learning_rate": 1.4330097087378642e-05,
"loss": 0.1196,
"step": 29200
},
{
"epoch": 284.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.777285099029541,
"eval_runtime": 4.3932,
"eval_samples_per_second": 66.239,
"eval_steps_per_second": 4.325,
"step": 29252
},
{
"epoch": 284.47,
"learning_rate": 1.4310679611650486e-05,
"loss": 0.1035,
"step": 29300
},
{
"epoch": 285.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.661113262176514,
"eval_runtime": 4.4004,
"eval_samples_per_second": 66.131,
"eval_steps_per_second": 4.318,
"step": 29355
},
{
"epoch": 285.44,
"learning_rate": 1.4291262135922331e-05,
"loss": 0.1079,
"step": 29400
},
{
"epoch": 286.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.691645622253418,
"eval_runtime": 4.4078,
"eval_samples_per_second": 66.02,
"eval_steps_per_second": 4.311,
"step": 29458
},
{
"epoch": 286.41,
"learning_rate": 1.4271844660194176e-05,
"loss": 0.1124,
"step": 29500
},
{
"epoch": 287.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.650529384613037,
"eval_runtime": 4.3911,
"eval_samples_per_second": 66.27,
"eval_steps_per_second": 4.327,
"step": 29561
},
{
"epoch": 287.38,
"learning_rate": 1.425242718446602e-05,
"loss": 0.1024,
"step": 29600
},
{
"epoch": 288.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.63031005859375,
"eval_runtime": 4.3925,
"eval_samples_per_second": 66.25,
"eval_steps_per_second": 4.326,
"step": 29664
},
{
"epoch": 288.35,
"learning_rate": 1.4233009708737866e-05,
"loss": 0.101,
"step": 29700
},
{
"epoch": 289.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.607905864715576,
"eval_runtime": 4.3996,
"eval_samples_per_second": 66.142,
"eval_steps_per_second": 4.319,
"step": 29767
},
{
"epoch": 289.32,
"learning_rate": 1.421359223300971e-05,
"loss": 0.124,
"step": 29800
},
{
"epoch": 290.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.456625938415527,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.11,
"eval_steps_per_second": 4.316,
"step": 29870
},
{
"epoch": 290.29,
"learning_rate": 1.4194174757281554e-05,
"loss": 0.1121,
"step": 29900
},
{
"epoch": 291.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.502068519592285,
"eval_runtime": 4.4021,
"eval_samples_per_second": 66.105,
"eval_steps_per_second": 4.316,
"step": 29973
},
{
"epoch": 291.26,
"learning_rate": 1.41747572815534e-05,
"loss": 0.1005,
"step": 30000
},
{
"epoch": 292.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.547921180725098,
"eval_runtime": 4.4521,
"eval_samples_per_second": 65.362,
"eval_steps_per_second": 4.268,
"step": 30076
},
{
"epoch": 292.23,
"learning_rate": 1.4155339805825243e-05,
"loss": 0.1152,
"step": 30100
},
{
"epoch": 293.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.665774822235107,
"eval_runtime": 4.4006,
"eval_samples_per_second": 66.128,
"eval_steps_per_second": 4.318,
"step": 30179
},
{
"epoch": 293.2,
"learning_rate": 1.413592233009709e-05,
"loss": 0.113,
"step": 30200
},
{
"epoch": 294.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.5608320236206055,
"eval_runtime": 4.4028,
"eval_samples_per_second": 66.094,
"eval_steps_per_second": 4.315,
"step": 30282
},
{
"epoch": 294.17,
"learning_rate": 1.4116504854368934e-05,
"loss": 0.112,
"step": 30300
},
{
"epoch": 295.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.657680511474609,
"eval_runtime": 4.4088,
"eval_samples_per_second": 66.004,
"eval_steps_per_second": 4.31,
"step": 30385
},
{
"epoch": 295.15,
"learning_rate": 1.4097087378640777e-05,
"loss": 0.1095,
"step": 30400
},
{
"epoch": 296.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.532250881195068,
"eval_runtime": 4.4136,
"eval_samples_per_second": 65.932,
"eval_steps_per_second": 4.305,
"step": 30488
},
{
"epoch": 296.12,
"learning_rate": 1.4077669902912623e-05,
"loss": 0.1053,
"step": 30500
},
{
"epoch": 297.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.635454177856445,
"eval_runtime": 4.4197,
"eval_samples_per_second": 65.841,
"eval_steps_per_second": 4.299,
"step": 30591
},
{
"epoch": 297.09,
"learning_rate": 1.4058252427184466e-05,
"loss": 0.1138,
"step": 30600
},
{
"epoch": 298.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.718722343444824,
"eval_runtime": 4.4225,
"eval_samples_per_second": 65.799,
"eval_steps_per_second": 4.296,
"step": 30694
},
{
"epoch": 298.06,
"learning_rate": 1.4038834951456312e-05,
"loss": 0.1105,
"step": 30700
},
{
"epoch": 299.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.603695392608643,
"eval_runtime": 4.4021,
"eval_samples_per_second": 66.105,
"eval_steps_per_second": 4.316,
"step": 30797
},
{
"epoch": 299.03,
"learning_rate": 1.4019417475728157e-05,
"loss": 0.1175,
"step": 30800
},
{
"epoch": 300.0,
"learning_rate": 1.4e-05,
"loss": 0.0944,
"step": 30900
},
{
"epoch": 300.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.719486713409424,
"eval_runtime": 4.4224,
"eval_samples_per_second": 65.801,
"eval_steps_per_second": 4.296,
"step": 30900
},
{
"epoch": 300.97,
"learning_rate": 1.3980582524271846e-05,
"loss": 0.1027,
"step": 31000
},
{
"epoch": 301.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.67861795425415,
"eval_runtime": 4.3977,
"eval_samples_per_second": 66.171,
"eval_steps_per_second": 4.32,
"step": 31003
},
{
"epoch": 301.94,
"learning_rate": 1.396116504854369e-05,
"loss": 0.0994,
"step": 31100
},
{
"epoch": 302.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.762547492980957,
"eval_runtime": 4.3983,
"eval_samples_per_second": 66.162,
"eval_steps_per_second": 4.32,
"step": 31106
},
{
"epoch": 302.91,
"learning_rate": 1.3941747572815535e-05,
"loss": 0.1229,
"step": 31200
},
{
"epoch": 303.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.849686622619629,
"eval_runtime": 4.3908,
"eval_samples_per_second": 66.275,
"eval_steps_per_second": 4.327,
"step": 31209
},
{
"epoch": 303.88,
"learning_rate": 1.392233009708738e-05,
"loss": 0.1094,
"step": 31300
},
{
"epoch": 304.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 4.74536657333374,
"eval_runtime": 4.4588,
"eval_samples_per_second": 65.265,
"eval_steps_per_second": 4.261,
"step": 31312
},
{
"epoch": 304.85,
"learning_rate": 1.3902912621359224e-05,
"loss": 0.1225,
"step": 31400
},
{
"epoch": 305.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.77222204208374,
"eval_runtime": 4.468,
"eval_samples_per_second": 65.13,
"eval_steps_per_second": 4.252,
"step": 31415
},
{
"epoch": 305.83,
"learning_rate": 1.3883495145631069e-05,
"loss": 0.102,
"step": 31500
},
{
"epoch": 306.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.843104839324951,
"eval_runtime": 4.4326,
"eval_samples_per_second": 65.65,
"eval_steps_per_second": 4.286,
"step": 31518
},
{
"epoch": 306.8,
"learning_rate": 1.3864077669902914e-05,
"loss": 0.1283,
"step": 31600
},
{
"epoch": 307.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.797704219818115,
"eval_runtime": 4.4133,
"eval_samples_per_second": 65.937,
"eval_steps_per_second": 4.305,
"step": 31621
},
{
"epoch": 307.77,
"learning_rate": 1.384466019417476e-05,
"loss": 0.109,
"step": 31700
},
{
"epoch": 308.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.638222694396973,
"eval_runtime": 4.4167,
"eval_samples_per_second": 65.886,
"eval_steps_per_second": 4.302,
"step": 31724
},
{
"epoch": 308.74,
"learning_rate": 1.3825242718446603e-05,
"loss": 0.1193,
"step": 31800
},
{
"epoch": 309.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 4.7093939781188965,
"eval_runtime": 4.4231,
"eval_samples_per_second": 65.791,
"eval_steps_per_second": 4.296,
"step": 31827
},
{
"epoch": 309.71,
"learning_rate": 1.3805825242718447e-05,
"loss": 0.1106,
"step": 31900
},
{
"epoch": 310.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.756236553192139,
"eval_runtime": 4.5026,
"eval_samples_per_second": 64.629,
"eval_steps_per_second": 4.22,
"step": 31930
},
{
"epoch": 310.68,
"learning_rate": 1.3786407766990294e-05,
"loss": 0.1032,
"step": 32000
},
{
"epoch": 311.0,
"eval_accuracy": 0.25773195876288657,
"eval_loss": 4.726458549499512,
"eval_runtime": 4.4077,
"eval_samples_per_second": 66.021,
"eval_steps_per_second": 4.311,
"step": 32033
},
{
"epoch": 311.65,
"learning_rate": 1.3766990291262136e-05,
"loss": 0.114,
"step": 32100
},
{
"epoch": 312.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.751614570617676,
"eval_runtime": 4.411,
"eval_samples_per_second": 65.971,
"eval_steps_per_second": 4.307,
"step": 32136
},
{
"epoch": 312.62,
"learning_rate": 1.3747572815533983e-05,
"loss": 0.1265,
"step": 32200
},
{
"epoch": 313.0,
"eval_accuracy": 0.24742268041237114,
"eval_loss": 4.788166522979736,
"eval_runtime": 4.4529,
"eval_samples_per_second": 65.351,
"eval_steps_per_second": 4.267,
"step": 32239
},
{
"epoch": 313.59,
"learning_rate": 1.3728155339805826e-05,
"loss": 0.1252,
"step": 32300
},
{
"epoch": 314.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 4.70837926864624,
"eval_runtime": 4.4294,
"eval_samples_per_second": 65.697,
"eval_steps_per_second": 4.29,
"step": 32342
},
{
"epoch": 314.56,
"learning_rate": 1.370873786407767e-05,
"loss": 0.1102,
"step": 32400
},
{
"epoch": 315.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.689497470855713,
"eval_runtime": 4.4128,
"eval_samples_per_second": 65.944,
"eval_steps_per_second": 4.306,
"step": 32445
},
{
"epoch": 315.53,
"learning_rate": 1.3689320388349517e-05,
"loss": 0.0984,
"step": 32500
},
{
"epoch": 316.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.6340837478637695,
"eval_runtime": 4.3942,
"eval_samples_per_second": 66.224,
"eval_steps_per_second": 4.324,
"step": 32548
},
{
"epoch": 316.5,
"learning_rate": 1.366990291262136e-05,
"loss": 0.0978,
"step": 32600
},
{
"epoch": 317.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 4.621079921722412,
"eval_runtime": 4.4388,
"eval_samples_per_second": 65.558,
"eval_steps_per_second": 4.28,
"step": 32651
},
{
"epoch": 317.48,
"learning_rate": 1.3650485436893206e-05,
"loss": 0.1068,
"step": 32700
},
{
"epoch": 318.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.7675371170043945,
"eval_runtime": 4.4083,
"eval_samples_per_second": 66.012,
"eval_steps_per_second": 4.31,
"step": 32754
},
{
"epoch": 318.45,
"learning_rate": 1.363106796116505e-05,
"loss": 0.1017,
"step": 32800
},
{
"epoch": 319.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.706081390380859,
"eval_runtime": 4.4275,
"eval_samples_per_second": 65.726,
"eval_steps_per_second": 4.291,
"step": 32857
},
{
"epoch": 319.42,
"learning_rate": 1.3611650485436893e-05,
"loss": 0.1138,
"step": 32900
},
{
"epoch": 320.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.713945388793945,
"eval_runtime": 4.4304,
"eval_samples_per_second": 65.682,
"eval_steps_per_second": 4.289,
"step": 32960
},
{
"epoch": 320.39,
"learning_rate": 1.359223300970874e-05,
"loss": 0.0997,
"step": 33000
},
{
"epoch": 321.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.711687088012695,
"eval_runtime": 4.4111,
"eval_samples_per_second": 65.97,
"eval_steps_per_second": 4.307,
"step": 33063
},
{
"epoch": 321.36,
"learning_rate": 1.3572815533980584e-05,
"loss": 0.1036,
"step": 33100
},
{
"epoch": 322.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.71359920501709,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.853,
"eval_steps_per_second": 4.3,
"step": 33166
},
{
"epoch": 322.33,
"learning_rate": 1.3553398058252429e-05,
"loss": 0.0988,
"step": 33200
},
{
"epoch": 323.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.7139410972595215,
"eval_runtime": 4.4238,
"eval_samples_per_second": 65.78,
"eval_steps_per_second": 4.295,
"step": 33269
},
{
"epoch": 323.3,
"learning_rate": 1.3533980582524273e-05,
"loss": 0.1052,
"step": 33300
},
{
"epoch": 324.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.7646050453186035,
"eval_runtime": 4.3998,
"eval_samples_per_second": 66.139,
"eval_steps_per_second": 4.318,
"step": 33372
},
{
"epoch": 324.27,
"learning_rate": 1.3514563106796118e-05,
"loss": 0.0957,
"step": 33400
},
{
"epoch": 325.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.79006290435791,
"eval_runtime": 4.4111,
"eval_samples_per_second": 65.97,
"eval_steps_per_second": 4.307,
"step": 33475
},
{
"epoch": 325.24,
"learning_rate": 1.3495145631067962e-05,
"loss": 0.1009,
"step": 33500
},
{
"epoch": 326.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.704848289489746,
"eval_runtime": 4.3963,
"eval_samples_per_second": 66.192,
"eval_steps_per_second": 4.322,
"step": 33578
},
{
"epoch": 326.21,
"learning_rate": 1.3475728155339807e-05,
"loss": 0.0957,
"step": 33600
},
{
"epoch": 327.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.62115478515625,
"eval_runtime": 4.4064,
"eval_samples_per_second": 66.04,
"eval_steps_per_second": 4.312,
"step": 33681
},
{
"epoch": 327.18,
"learning_rate": 1.345631067961165e-05,
"loss": 0.1244,
"step": 33700
},
{
"epoch": 328.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.748119831085205,
"eval_runtime": 4.4371,
"eval_samples_per_second": 65.583,
"eval_steps_per_second": 4.282,
"step": 33784
},
{
"epoch": 328.16,
"learning_rate": 1.3436893203883496e-05,
"loss": 0.1021,
"step": 33800
},
{
"epoch": 329.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.749732971191406,
"eval_runtime": 4.4065,
"eval_samples_per_second": 66.038,
"eval_steps_per_second": 4.312,
"step": 33887
},
{
"epoch": 329.13,
"learning_rate": 1.341747572815534e-05,
"loss": 0.1017,
"step": 33900
},
{
"epoch": 330.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.830997467041016,
"eval_runtime": 4.4152,
"eval_samples_per_second": 65.908,
"eval_steps_per_second": 4.303,
"step": 33990
},
{
"epoch": 330.1,
"learning_rate": 1.3398058252427187e-05,
"loss": 0.0957,
"step": 34000
},
{
"epoch": 331.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.694131851196289,
"eval_runtime": 4.4251,
"eval_samples_per_second": 65.762,
"eval_steps_per_second": 4.294,
"step": 34093
},
{
"epoch": 331.07,
"learning_rate": 1.337864077669903e-05,
"loss": 0.1042,
"step": 34100
},
{
"epoch": 332.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.725266456604004,
"eval_runtime": 4.3958,
"eval_samples_per_second": 66.199,
"eval_steps_per_second": 4.322,
"step": 34196
},
{
"epoch": 332.04,
"learning_rate": 1.3359223300970874e-05,
"loss": 0.1046,
"step": 34200
},
{
"epoch": 333.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.859306335449219,
"eval_runtime": 4.3972,
"eval_samples_per_second": 66.179,
"eval_steps_per_second": 4.321,
"step": 34299
},
{
"epoch": 333.01,
"learning_rate": 1.3339805825242719e-05,
"loss": 0.0984,
"step": 34300
},
{
"epoch": 333.98,
"learning_rate": 1.3320388349514564e-05,
"loss": 0.1103,
"step": 34400
},
{
"epoch": 334.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.847973823547363,
"eval_runtime": 4.4128,
"eval_samples_per_second": 65.944,
"eval_steps_per_second": 4.306,
"step": 34402
},
{
"epoch": 334.95,
"learning_rate": 1.330097087378641e-05,
"loss": 0.09,
"step": 34500
},
{
"epoch": 335.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 4.91008996963501,
"eval_runtime": 4.407,
"eval_samples_per_second": 66.032,
"eval_steps_per_second": 4.311,
"step": 34505
},
{
"epoch": 335.92,
"learning_rate": 1.3281553398058253e-05,
"loss": 0.1108,
"step": 34600
},
{
"epoch": 336.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.783932209014893,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.98,
"eval_steps_per_second": 4.308,
"step": 34608
},
{
"epoch": 336.89,
"learning_rate": 1.3262135922330097e-05,
"loss": 0.1043,
"step": 34700
},
{
"epoch": 337.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.954315662384033,
"eval_runtime": 4.4085,
"eval_samples_per_second": 66.009,
"eval_steps_per_second": 4.31,
"step": 34711
},
{
"epoch": 337.86,
"learning_rate": 1.3242718446601944e-05,
"loss": 0.104,
"step": 34800
},
{
"epoch": 338.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.802567481994629,
"eval_runtime": 4.454,
"eval_samples_per_second": 65.335,
"eval_steps_per_second": 4.266,
"step": 34814
},
{
"epoch": 338.83,
"learning_rate": 1.3223300970873786e-05,
"loss": 0.1015,
"step": 34900
},
{
"epoch": 339.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.800775051116943,
"eval_runtime": 4.4097,
"eval_samples_per_second": 65.992,
"eval_steps_per_second": 4.309,
"step": 34917
},
{
"epoch": 339.81,
"learning_rate": 1.3203883495145633e-05,
"loss": 0.1029,
"step": 35000
},
{
"epoch": 340.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.90689754486084,
"eval_runtime": 4.4116,
"eval_samples_per_second": 65.963,
"eval_steps_per_second": 4.307,
"step": 35020
},
{
"epoch": 340.78,
"learning_rate": 1.3184466019417477e-05,
"loss": 0.1002,
"step": 35100
},
{
"epoch": 341.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.924178600311279,
"eval_runtime": 4.4136,
"eval_samples_per_second": 65.933,
"eval_steps_per_second": 4.305,
"step": 35123
},
{
"epoch": 341.75,
"learning_rate": 1.316504854368932e-05,
"loss": 0.1076,
"step": 35200
},
{
"epoch": 342.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.719906330108643,
"eval_runtime": 4.4068,
"eval_samples_per_second": 66.034,
"eval_steps_per_second": 4.312,
"step": 35226
},
{
"epoch": 342.72,
"learning_rate": 1.3145631067961167e-05,
"loss": 0.1055,
"step": 35300
},
{
"epoch": 343.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 4.844000816345215,
"eval_runtime": 4.4456,
"eval_samples_per_second": 65.458,
"eval_steps_per_second": 4.274,
"step": 35329
},
{
"epoch": 343.69,
"learning_rate": 1.3126213592233011e-05,
"loss": 0.0925,
"step": 35400
},
{
"epoch": 344.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 4.857196807861328,
"eval_runtime": 4.408,
"eval_samples_per_second": 66.016,
"eval_steps_per_second": 4.31,
"step": 35432
},
{
"epoch": 344.66,
"learning_rate": 1.3106796116504856e-05,
"loss": 0.0827,
"step": 35500
},
{
"epoch": 345.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.913283824920654,
"eval_runtime": 4.4856,
"eval_samples_per_second": 64.874,
"eval_steps_per_second": 4.236,
"step": 35535
},
{
"epoch": 345.63,
"learning_rate": 1.30873786407767e-05,
"loss": 0.1105,
"step": 35600
},
{
"epoch": 346.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.986526012420654,
"eval_runtime": 4.4528,
"eval_samples_per_second": 65.352,
"eval_steps_per_second": 4.267,
"step": 35638
},
{
"epoch": 346.6,
"learning_rate": 1.3067961165048543e-05,
"loss": 0.0875,
"step": 35700
},
{
"epoch": 347.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.797267436981201,
"eval_runtime": 4.4052,
"eval_samples_per_second": 66.059,
"eval_steps_per_second": 4.313,
"step": 35741
},
{
"epoch": 347.57,
"learning_rate": 1.304854368932039e-05,
"loss": 0.106,
"step": 35800
},
{
"epoch": 348.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.869633674621582,
"eval_runtime": 4.392,
"eval_samples_per_second": 66.257,
"eval_steps_per_second": 4.326,
"step": 35844
},
{
"epoch": 348.54,
"learning_rate": 1.3029126213592234e-05,
"loss": 0.1083,
"step": 35900
},
{
"epoch": 349.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.978613376617432,
"eval_runtime": 4.3987,
"eval_samples_per_second": 66.156,
"eval_steps_per_second": 4.319,
"step": 35947
},
{
"epoch": 349.51,
"learning_rate": 1.300970873786408e-05,
"loss": 0.105,
"step": 36000
},
{
"epoch": 350.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.911431789398193,
"eval_runtime": 4.4527,
"eval_samples_per_second": 65.354,
"eval_steps_per_second": 4.267,
"step": 36050
},
{
"epoch": 350.49,
"learning_rate": 1.2990291262135923e-05,
"loss": 0.1075,
"step": 36100
},
{
"epoch": 351.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 4.869287967681885,
"eval_runtime": 4.3993,
"eval_samples_per_second": 66.147,
"eval_steps_per_second": 4.319,
"step": 36153
},
{
"epoch": 351.46,
"learning_rate": 1.2970873786407768e-05,
"loss": 0.1026,
"step": 36200
},
{
"epoch": 352.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.873523235321045,
"eval_runtime": 4.4141,
"eval_samples_per_second": 65.926,
"eval_steps_per_second": 4.304,
"step": 36256
},
{
"epoch": 352.43,
"learning_rate": 1.2951456310679612e-05,
"loss": 0.101,
"step": 36300
},
{
"epoch": 353.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.044714450836182,
"eval_runtime": 4.4175,
"eval_samples_per_second": 65.874,
"eval_steps_per_second": 4.301,
"step": 36359
},
{
"epoch": 353.4,
"learning_rate": 1.2932038834951457e-05,
"loss": 0.0944,
"step": 36400
},
{
"epoch": 354.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.94920015335083,
"eval_runtime": 4.4052,
"eval_samples_per_second": 66.058,
"eval_steps_per_second": 4.313,
"step": 36462
},
{
"epoch": 354.37,
"learning_rate": 1.2912621359223303e-05,
"loss": 0.1055,
"step": 36500
},
{
"epoch": 355.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.989469051361084,
"eval_runtime": 4.4203,
"eval_samples_per_second": 65.833,
"eval_steps_per_second": 4.298,
"step": 36565
},
{
"epoch": 355.34,
"learning_rate": 1.2893203883495146e-05,
"loss": 0.0858,
"step": 36600
},
{
"epoch": 356.0,
"eval_accuracy": 0.24398625429553264,
"eval_loss": 5.095457077026367,
"eval_runtime": 4.3921,
"eval_samples_per_second": 66.256,
"eval_steps_per_second": 4.326,
"step": 36668
},
{
"epoch": 356.31,
"learning_rate": 1.287378640776699e-05,
"loss": 0.0955,
"step": 36700
},
{
"epoch": 357.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.010565280914307,
"eval_runtime": 4.4098,
"eval_samples_per_second": 65.99,
"eval_steps_per_second": 4.309,
"step": 36771
},
{
"epoch": 357.28,
"learning_rate": 1.2854368932038837e-05,
"loss": 0.1108,
"step": 36800
},
{
"epoch": 358.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.910917282104492,
"eval_runtime": 4.4051,
"eval_samples_per_second": 66.06,
"eval_steps_per_second": 4.313,
"step": 36874
},
{
"epoch": 358.25,
"learning_rate": 1.283495145631068e-05,
"loss": 0.1179,
"step": 36900
},
{
"epoch": 359.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.908169269561768,
"eval_runtime": 4.4082,
"eval_samples_per_second": 66.013,
"eval_steps_per_second": 4.31,
"step": 36977
},
{
"epoch": 359.22,
"learning_rate": 1.2815533980582526e-05,
"loss": 0.0984,
"step": 37000
},
{
"epoch": 360.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.848038673400879,
"eval_runtime": 4.4081,
"eval_samples_per_second": 66.015,
"eval_steps_per_second": 4.31,
"step": 37080
},
{
"epoch": 360.19,
"learning_rate": 1.279611650485437e-05,
"loss": 0.0997,
"step": 37100
},
{
"epoch": 361.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.895744323730469,
"eval_runtime": 4.3961,
"eval_samples_per_second": 66.196,
"eval_steps_per_second": 4.322,
"step": 37183
},
{
"epoch": 361.17,
"learning_rate": 1.2776699029126214e-05,
"loss": 0.1128,
"step": 37200
},
{
"epoch": 362.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.912665843963623,
"eval_runtime": 4.4624,
"eval_samples_per_second": 65.212,
"eval_steps_per_second": 4.258,
"step": 37286
},
{
"epoch": 362.14,
"learning_rate": 1.275728155339806e-05,
"loss": 0.0961,
"step": 37300
},
{
"epoch": 363.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.096518039703369,
"eval_runtime": 4.4077,
"eval_samples_per_second": 66.021,
"eval_steps_per_second": 4.311,
"step": 37389
},
{
"epoch": 363.11,
"learning_rate": 1.2737864077669904e-05,
"loss": 0.1096,
"step": 37400
},
{
"epoch": 364.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.031692028045654,
"eval_runtime": 4.4003,
"eval_samples_per_second": 66.132,
"eval_steps_per_second": 4.318,
"step": 37492
},
{
"epoch": 364.08,
"learning_rate": 1.2718446601941749e-05,
"loss": 0.0916,
"step": 37500
},
{
"epoch": 365.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.974483966827393,
"eval_runtime": 4.4064,
"eval_samples_per_second": 66.04,
"eval_steps_per_second": 4.312,
"step": 37595
},
{
"epoch": 365.05,
"learning_rate": 1.2699029126213594e-05,
"loss": 0.1057,
"step": 37600
},
{
"epoch": 366.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.8774895668029785,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.008,
"eval_steps_per_second": 4.31,
"step": 37698
},
{
"epoch": 366.02,
"learning_rate": 1.2679611650485437e-05,
"loss": 0.0978,
"step": 37700
},
{
"epoch": 366.99,
"learning_rate": 1.2660194174757283e-05,
"loss": 0.0932,
"step": 37800
},
{
"epoch": 367.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.028201580047607,
"eval_runtime": 4.4112,
"eval_samples_per_second": 65.968,
"eval_steps_per_second": 4.307,
"step": 37801
},
{
"epoch": 367.96,
"learning_rate": 1.2640776699029127e-05,
"loss": 0.1072,
"step": 37900
},
{
"epoch": 368.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.809718608856201,
"eval_runtime": 4.4102,
"eval_samples_per_second": 65.983,
"eval_steps_per_second": 4.308,
"step": 37904
},
{
"epoch": 368.93,
"learning_rate": 1.2621359223300974e-05,
"loss": 0.0973,
"step": 38000
},
{
"epoch": 369.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.932143211364746,
"eval_runtime": 4.3895,
"eval_samples_per_second": 66.295,
"eval_steps_per_second": 4.329,
"step": 38007
},
{
"epoch": 369.9,
"learning_rate": 1.2601941747572817e-05,
"loss": 0.1034,
"step": 38100
},
{
"epoch": 370.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.817590236663818,
"eval_runtime": 4.4213,
"eval_samples_per_second": 65.818,
"eval_steps_per_second": 4.297,
"step": 38110
},
{
"epoch": 370.87,
"learning_rate": 1.2582524271844661e-05,
"loss": 0.1084,
"step": 38200
},
{
"epoch": 371.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.856151103973389,
"eval_runtime": 4.4093,
"eval_samples_per_second": 65.996,
"eval_steps_per_second": 4.309,
"step": 38213
},
{
"epoch": 371.84,
"learning_rate": 1.2563106796116506e-05,
"loss": 0.0957,
"step": 38300
},
{
"epoch": 372.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.946646690368652,
"eval_runtime": 4.4359,
"eval_samples_per_second": 65.601,
"eval_steps_per_second": 4.283,
"step": 38316
},
{
"epoch": 372.82,
"learning_rate": 1.254368932038835e-05,
"loss": 0.1049,
"step": 38400
},
{
"epoch": 373.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 4.851525783538818,
"eval_runtime": 4.4102,
"eval_samples_per_second": 65.984,
"eval_steps_per_second": 4.308,
"step": 38419
},
{
"epoch": 373.79,
"learning_rate": 1.2524271844660197e-05,
"loss": 0.097,
"step": 38500
},
{
"epoch": 374.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.8833394050598145,
"eval_runtime": 4.4255,
"eval_samples_per_second": 65.755,
"eval_steps_per_second": 4.293,
"step": 38522
},
{
"epoch": 374.76,
"learning_rate": 1.250485436893204e-05,
"loss": 0.1008,
"step": 38600
},
{
"epoch": 375.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.944166660308838,
"eval_runtime": 4.5113,
"eval_samples_per_second": 64.504,
"eval_steps_per_second": 4.212,
"step": 38625
},
{
"epoch": 375.73,
"learning_rate": 1.2485436893203884e-05,
"loss": 0.1019,
"step": 38700
},
{
"epoch": 376.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.8345046043396,
"eval_runtime": 4.4758,
"eval_samples_per_second": 65.016,
"eval_steps_per_second": 4.245,
"step": 38728
},
{
"epoch": 376.7,
"learning_rate": 1.246601941747573e-05,
"loss": 0.1083,
"step": 38800
},
{
"epoch": 377.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.934985637664795,
"eval_runtime": 4.4219,
"eval_samples_per_second": 65.809,
"eval_steps_per_second": 4.297,
"step": 38831
},
{
"epoch": 377.67,
"learning_rate": 1.2446601941747573e-05,
"loss": 0.1181,
"step": 38900
},
{
"epoch": 378.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 4.860500335693359,
"eval_runtime": 4.4042,
"eval_samples_per_second": 66.073,
"eval_steps_per_second": 4.314,
"step": 38934
},
{
"epoch": 378.64,
"learning_rate": 1.2427184466019418e-05,
"loss": 0.1043,
"step": 39000
},
{
"epoch": 379.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.878326416015625,
"eval_runtime": 4.41,
"eval_samples_per_second": 65.987,
"eval_steps_per_second": 4.308,
"step": 39037
},
{
"epoch": 379.61,
"learning_rate": 1.2407766990291264e-05,
"loss": 0.1212,
"step": 39100
},
{
"epoch": 380.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.8640666007995605,
"eval_runtime": 4.3997,
"eval_samples_per_second": 66.14,
"eval_steps_per_second": 4.318,
"step": 39140
},
{
"epoch": 380.58,
"learning_rate": 1.2388349514563107e-05,
"loss": 0.0941,
"step": 39200
},
{
"epoch": 381.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.9771833419799805,
"eval_runtime": 4.4284,
"eval_samples_per_second": 65.712,
"eval_steps_per_second": 4.29,
"step": 39243
},
{
"epoch": 381.55,
"learning_rate": 1.2368932038834953e-05,
"loss": 0.0986,
"step": 39300
},
{
"epoch": 382.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.919087886810303,
"eval_runtime": 4.5207,
"eval_samples_per_second": 64.371,
"eval_steps_per_second": 4.203,
"step": 39346
},
{
"epoch": 382.52,
"learning_rate": 1.2349514563106798e-05,
"loss": 0.1054,
"step": 39400
},
{
"epoch": 383.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.069497108459473,
"eval_runtime": 4.3931,
"eval_samples_per_second": 66.241,
"eval_steps_per_second": 4.325,
"step": 39449
},
{
"epoch": 383.5,
"learning_rate": 1.233009708737864e-05,
"loss": 0.1066,
"step": 39500
},
{
"epoch": 384.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.114091873168945,
"eval_runtime": 4.3868,
"eval_samples_per_second": 66.336,
"eval_steps_per_second": 4.331,
"step": 39552
},
{
"epoch": 384.47,
"learning_rate": 1.2310679611650487e-05,
"loss": 0.0929,
"step": 39600
},
{
"epoch": 385.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.017634391784668,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.98,
"eval_steps_per_second": 4.308,
"step": 39655
},
{
"epoch": 385.44,
"learning_rate": 1.229126213592233e-05,
"loss": 0.102,
"step": 39700
},
{
"epoch": 386.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.778977870941162,
"eval_runtime": 4.4049,
"eval_samples_per_second": 66.063,
"eval_steps_per_second": 4.313,
"step": 39758
},
{
"epoch": 386.41,
"learning_rate": 1.2271844660194176e-05,
"loss": 0.103,
"step": 39800
},
{
"epoch": 387.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.734787464141846,
"eval_runtime": 4.4138,
"eval_samples_per_second": 65.93,
"eval_steps_per_second": 4.305,
"step": 39861
},
{
"epoch": 387.38,
"learning_rate": 1.225242718446602e-05,
"loss": 0.107,
"step": 39900
},
{
"epoch": 388.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.666727066040039,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 39964
},
{
"epoch": 388.35,
"learning_rate": 1.2233009708737864e-05,
"loss": 0.0922,
"step": 40000
},
{
"epoch": 389.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.668744087219238,
"eval_runtime": 4.3997,
"eval_samples_per_second": 66.14,
"eval_steps_per_second": 4.318,
"step": 40067
},
{
"epoch": 389.32,
"learning_rate": 1.221359223300971e-05,
"loss": 0.102,
"step": 40100
},
{
"epoch": 390.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.845048427581787,
"eval_runtime": 4.405,
"eval_samples_per_second": 66.061,
"eval_steps_per_second": 4.313,
"step": 40170
},
{
"epoch": 390.29,
"learning_rate": 1.2194174757281554e-05,
"loss": 0.0958,
"step": 40200
},
{
"epoch": 391.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.127882957458496,
"eval_runtime": 4.4089,
"eval_samples_per_second": 66.002,
"eval_steps_per_second": 4.309,
"step": 40273
},
{
"epoch": 391.26,
"learning_rate": 1.2174757281553399e-05,
"loss": 0.0908,
"step": 40300
},
{
"epoch": 392.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 4.962398529052734,
"eval_runtime": 4.3996,
"eval_samples_per_second": 66.143,
"eval_steps_per_second": 4.319,
"step": 40376
},
{
"epoch": 392.23,
"learning_rate": 1.2155339805825244e-05,
"loss": 0.0988,
"step": 40400
},
{
"epoch": 393.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.167624473571777,
"eval_runtime": 4.3907,
"eval_samples_per_second": 66.277,
"eval_steps_per_second": 4.327,
"step": 40479
},
{
"epoch": 393.2,
"learning_rate": 1.2135922330097088e-05,
"loss": 0.0995,
"step": 40500
},
{
"epoch": 394.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.872605323791504,
"eval_runtime": 4.4082,
"eval_samples_per_second": 66.013,
"eval_steps_per_second": 4.31,
"step": 40582
},
{
"epoch": 394.17,
"learning_rate": 1.2116504854368933e-05,
"loss": 0.1087,
"step": 40600
},
{
"epoch": 395.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.9525041580200195,
"eval_runtime": 4.408,
"eval_samples_per_second": 66.016,
"eval_steps_per_second": 4.31,
"step": 40685
},
{
"epoch": 395.15,
"learning_rate": 1.2097087378640777e-05,
"loss": 0.11,
"step": 40700
},
{
"epoch": 396.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 5.0257697105407715,
"eval_runtime": 4.4366,
"eval_samples_per_second": 65.591,
"eval_steps_per_second": 4.283,
"step": 40788
},
{
"epoch": 396.12,
"learning_rate": 1.2077669902912624e-05,
"loss": 0.0916,
"step": 40800
},
{
"epoch": 397.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 5.011427402496338,
"eval_runtime": 4.4266,
"eval_samples_per_second": 65.739,
"eval_steps_per_second": 4.292,
"step": 40891
},
{
"epoch": 397.09,
"learning_rate": 1.2058252427184467e-05,
"loss": 0.089,
"step": 40900
},
{
"epoch": 398.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.968867778778076,
"eval_runtime": 4.4037,
"eval_samples_per_second": 66.081,
"eval_steps_per_second": 4.315,
"step": 40994
},
{
"epoch": 398.06,
"learning_rate": 1.2038834951456311e-05,
"loss": 0.1089,
"step": 41000
},
{
"epoch": 399.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.864815711975098,
"eval_runtime": 4.3906,
"eval_samples_per_second": 66.279,
"eval_steps_per_second": 4.327,
"step": 41097
},
{
"epoch": 399.03,
"learning_rate": 1.2019417475728157e-05,
"loss": 0.0909,
"step": 41100
},
{
"epoch": 400.0,
"learning_rate": 1.2e-05,
"loss": 0.085,
"step": 41200
},
{
"epoch": 400.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.737619400024414,
"eval_runtime": 4.4091,
"eval_samples_per_second": 65.999,
"eval_steps_per_second": 4.309,
"step": 41200
},
{
"epoch": 400.97,
"learning_rate": 1.1980582524271847e-05,
"loss": 0.1135,
"step": 41300
},
{
"epoch": 401.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.968517303466797,
"eval_runtime": 4.421,
"eval_samples_per_second": 65.821,
"eval_steps_per_second": 4.298,
"step": 41303
},
{
"epoch": 401.94,
"learning_rate": 1.196116504854369e-05,
"loss": 0.1032,
"step": 41400
},
{
"epoch": 402.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 4.695452690124512,
"eval_runtime": 4.4013,
"eval_samples_per_second": 66.117,
"eval_steps_per_second": 4.317,
"step": 41406
},
{
"epoch": 402.91,
"learning_rate": 1.1941747572815534e-05,
"loss": 0.0987,
"step": 41500
},
{
"epoch": 403.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.897180557250977,
"eval_runtime": 4.4151,
"eval_samples_per_second": 65.911,
"eval_steps_per_second": 4.303,
"step": 41509
},
{
"epoch": 403.88,
"learning_rate": 1.192233009708738e-05,
"loss": 0.1112,
"step": 41600
},
{
"epoch": 404.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.8028459548950195,
"eval_runtime": 4.3946,
"eval_samples_per_second": 66.218,
"eval_steps_per_second": 4.323,
"step": 41612
},
{
"epoch": 404.85,
"learning_rate": 1.1902912621359223e-05,
"loss": 0.0926,
"step": 41700
},
{
"epoch": 405.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 4.6858320236206055,
"eval_runtime": 4.399,
"eval_samples_per_second": 66.152,
"eval_steps_per_second": 4.319,
"step": 41715
},
{
"epoch": 405.83,
"learning_rate": 1.188349514563107e-05,
"loss": 0.1032,
"step": 41800
},
{
"epoch": 406.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.768010139465332,
"eval_runtime": 4.395,
"eval_samples_per_second": 66.212,
"eval_steps_per_second": 4.323,
"step": 41818
},
{
"epoch": 406.8,
"learning_rate": 1.1864077669902914e-05,
"loss": 0.1066,
"step": 41900
},
{
"epoch": 407.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.80867338180542,
"eval_runtime": 4.5161,
"eval_samples_per_second": 64.436,
"eval_steps_per_second": 4.207,
"step": 41921
},
{
"epoch": 407.77,
"learning_rate": 1.1844660194174757e-05,
"loss": 0.1053,
"step": 42000
},
{
"epoch": 408.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.887094020843506,
"eval_runtime": 4.4348,
"eval_samples_per_second": 65.618,
"eval_steps_per_second": 4.284,
"step": 42024
},
{
"epoch": 408.74,
"learning_rate": 1.1825242718446603e-05,
"loss": 0.0999,
"step": 42100
},
{
"epoch": 409.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.705599784851074,
"eval_runtime": 4.3941,
"eval_samples_per_second": 66.225,
"eval_steps_per_second": 4.324,
"step": 42127
},
{
"epoch": 409.71,
"learning_rate": 1.1805825242718448e-05,
"loss": 0.0929,
"step": 42200
},
{
"epoch": 410.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.884646892547607,
"eval_runtime": 4.4074,
"eval_samples_per_second": 66.026,
"eval_steps_per_second": 4.311,
"step": 42230
},
{
"epoch": 410.68,
"learning_rate": 1.1786407766990292e-05,
"loss": 0.1138,
"step": 42300
},
{
"epoch": 411.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.774139404296875,
"eval_runtime": 4.4036,
"eval_samples_per_second": 66.082,
"eval_steps_per_second": 4.315,
"step": 42333
},
{
"epoch": 411.65,
"learning_rate": 1.1766990291262137e-05,
"loss": 0.1126,
"step": 42400
},
{
"epoch": 412.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.915742874145508,
"eval_runtime": 4.3921,
"eval_samples_per_second": 66.255,
"eval_steps_per_second": 4.326,
"step": 42436
},
{
"epoch": 412.62,
"learning_rate": 1.1747572815533982e-05,
"loss": 0.0835,
"step": 42500
},
{
"epoch": 413.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.960720539093018,
"eval_runtime": 4.3944,
"eval_samples_per_second": 66.22,
"eval_steps_per_second": 4.324,
"step": 42539
},
{
"epoch": 413.59,
"learning_rate": 1.1728155339805826e-05,
"loss": 0.1004,
"step": 42600
},
{
"epoch": 414.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.77178955078125,
"eval_runtime": 4.3908,
"eval_samples_per_second": 66.275,
"eval_steps_per_second": 4.327,
"step": 42642
},
{
"epoch": 414.56,
"learning_rate": 1.170873786407767e-05,
"loss": 0.0972,
"step": 42700
},
{
"epoch": 415.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.828794479370117,
"eval_runtime": 4.5174,
"eval_samples_per_second": 64.417,
"eval_steps_per_second": 4.206,
"step": 42745
},
{
"epoch": 415.53,
"learning_rate": 1.1689320388349517e-05,
"loss": 0.1023,
"step": 42800
},
{
"epoch": 416.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 4.908327102661133,
"eval_runtime": 4.4015,
"eval_samples_per_second": 66.114,
"eval_steps_per_second": 4.317,
"step": 42848
},
{
"epoch": 416.5,
"learning_rate": 1.166990291262136e-05,
"loss": 0.0948,
"step": 42900
},
{
"epoch": 417.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.850914478302002,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.853,
"eval_steps_per_second": 4.3,
"step": 42951
},
{
"epoch": 417.48,
"learning_rate": 1.1650485436893204e-05,
"loss": 0.0918,
"step": 43000
},
{
"epoch": 418.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.832261085510254,
"eval_runtime": 4.5134,
"eval_samples_per_second": 64.475,
"eval_steps_per_second": 4.21,
"step": 43054
},
{
"epoch": 418.45,
"learning_rate": 1.163106796116505e-05,
"loss": 0.0961,
"step": 43100
},
{
"epoch": 419.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.956958293914795,
"eval_runtime": 4.5157,
"eval_samples_per_second": 64.441,
"eval_steps_per_second": 4.207,
"step": 43157
},
{
"epoch": 419.42,
"learning_rate": 1.1611650485436894e-05,
"loss": 0.0911,
"step": 43200
},
{
"epoch": 420.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 4.95814847946167,
"eval_runtime": 4.398,
"eval_samples_per_second": 66.166,
"eval_steps_per_second": 4.32,
"step": 43260
},
{
"epoch": 420.39,
"learning_rate": 1.159223300970874e-05,
"loss": 0.0927,
"step": 43300
},
{
"epoch": 421.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 4.985574245452881,
"eval_runtime": 4.4056,
"eval_samples_per_second": 66.052,
"eval_steps_per_second": 4.313,
"step": 43363
},
{
"epoch": 421.36,
"learning_rate": 1.1572815533980583e-05,
"loss": 0.0907,
"step": 43400
},
{
"epoch": 422.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.9146223068237305,
"eval_runtime": 4.4091,
"eval_samples_per_second": 66.001,
"eval_steps_per_second": 4.309,
"step": 43466
},
{
"epoch": 422.33,
"learning_rate": 1.1553398058252427e-05,
"loss": 0.1039,
"step": 43500
},
{
"epoch": 423.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.781336307525635,
"eval_runtime": 4.4095,
"eval_samples_per_second": 65.994,
"eval_steps_per_second": 4.309,
"step": 43569
},
{
"epoch": 423.3,
"learning_rate": 1.1533980582524274e-05,
"loss": 0.1093,
"step": 43600
},
{
"epoch": 424.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.957409858703613,
"eval_runtime": 4.4089,
"eval_samples_per_second": 66.003,
"eval_steps_per_second": 4.309,
"step": 43672
},
{
"epoch": 424.27,
"learning_rate": 1.1514563106796117e-05,
"loss": 0.0859,
"step": 43700
},
{
"epoch": 425.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.893417835235596,
"eval_runtime": 4.4094,
"eval_samples_per_second": 65.995,
"eval_steps_per_second": 4.309,
"step": 43775
},
{
"epoch": 425.24,
"learning_rate": 1.1495145631067961e-05,
"loss": 0.111,
"step": 43800
},
{
"epoch": 426.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.856235504150391,
"eval_runtime": 4.4105,
"eval_samples_per_second": 65.979,
"eval_steps_per_second": 4.308,
"step": 43878
},
{
"epoch": 426.21,
"learning_rate": 1.1475728155339807e-05,
"loss": 0.0944,
"step": 43900
},
{
"epoch": 427.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.826057434082031,
"eval_runtime": 4.3947,
"eval_samples_per_second": 66.215,
"eval_steps_per_second": 4.323,
"step": 43981
},
{
"epoch": 427.18,
"learning_rate": 1.145631067961165e-05,
"loss": 0.1,
"step": 44000
},
{
"epoch": 428.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.822572708129883,
"eval_runtime": 4.4039,
"eval_samples_per_second": 66.078,
"eval_steps_per_second": 4.314,
"step": 44084
},
{
"epoch": 428.16,
"learning_rate": 1.1436893203883497e-05,
"loss": 0.0965,
"step": 44100
},
{
"epoch": 429.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 4.810351371765137,
"eval_runtime": 4.4271,
"eval_samples_per_second": 65.731,
"eval_steps_per_second": 4.292,
"step": 44187
},
{
"epoch": 429.13,
"learning_rate": 1.1417475728155341e-05,
"loss": 0.0905,
"step": 44200
},
{
"epoch": 430.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.741602420806885,
"eval_runtime": 4.3937,
"eval_samples_per_second": 66.232,
"eval_steps_per_second": 4.324,
"step": 44290
},
{
"epoch": 430.1,
"learning_rate": 1.1398058252427184e-05,
"loss": 0.1095,
"step": 44300
},
{
"epoch": 431.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.087652683258057,
"eval_runtime": 4.3905,
"eval_samples_per_second": 66.28,
"eval_steps_per_second": 4.328,
"step": 44393
},
{
"epoch": 431.07,
"learning_rate": 1.137864077669903e-05,
"loss": 0.0855,
"step": 44400
},
{
"epoch": 432.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.93923282623291,
"eval_runtime": 4.406,
"eval_samples_per_second": 66.046,
"eval_steps_per_second": 4.312,
"step": 44496
},
{
"epoch": 432.04,
"learning_rate": 1.1359223300970875e-05,
"loss": 0.1079,
"step": 44500
},
{
"epoch": 433.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 4.822700023651123,
"eval_runtime": 4.393,
"eval_samples_per_second": 66.242,
"eval_steps_per_second": 4.325,
"step": 44599
},
{
"epoch": 433.01,
"learning_rate": 1.133980582524272e-05,
"loss": 0.112,
"step": 44600
},
{
"epoch": 433.98,
"learning_rate": 1.1320388349514564e-05,
"loss": 0.102,
"step": 44700
},
{
"epoch": 434.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.977917671203613,
"eval_runtime": 4.4052,
"eval_samples_per_second": 66.058,
"eval_steps_per_second": 4.313,
"step": 44702
},
{
"epoch": 434.95,
"learning_rate": 1.1300970873786407e-05,
"loss": 0.0888,
"step": 44800
},
{
"epoch": 435.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.995783805847168,
"eval_runtime": 4.3942,
"eval_samples_per_second": 66.224,
"eval_steps_per_second": 4.324,
"step": 44805
},
{
"epoch": 435.92,
"learning_rate": 1.1281553398058253e-05,
"loss": 0.0842,
"step": 44900
},
{
"epoch": 436.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.74613094329834,
"eval_runtime": 4.417,
"eval_samples_per_second": 65.881,
"eval_steps_per_second": 4.302,
"step": 44908
},
{
"epoch": 436.89,
"learning_rate": 1.1262135922330098e-05,
"loss": 0.0918,
"step": 45000
},
{
"epoch": 437.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.059698104858398,
"eval_runtime": 4.4033,
"eval_samples_per_second": 66.087,
"eval_steps_per_second": 4.315,
"step": 45011
},
{
"epoch": 437.86,
"learning_rate": 1.1242718446601944e-05,
"loss": 0.0911,
"step": 45100
},
{
"epoch": 438.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.977145195007324,
"eval_runtime": 4.4276,
"eval_samples_per_second": 65.724,
"eval_steps_per_second": 4.291,
"step": 45114
},
{
"epoch": 438.83,
"learning_rate": 1.1223300970873787e-05,
"loss": 0.0859,
"step": 45200
},
{
"epoch": 439.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.837311744689941,
"eval_runtime": 4.4081,
"eval_samples_per_second": 66.014,
"eval_steps_per_second": 4.31,
"step": 45217
},
{
"epoch": 439.81,
"learning_rate": 1.1203883495145632e-05,
"loss": 0.0916,
"step": 45300
},
{
"epoch": 440.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 4.74083948135376,
"eval_runtime": 4.3993,
"eval_samples_per_second": 66.147,
"eval_steps_per_second": 4.319,
"step": 45320
},
{
"epoch": 440.78,
"learning_rate": 1.1184466019417476e-05,
"loss": 0.0988,
"step": 45400
},
{
"epoch": 441.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 4.78790807723999,
"eval_runtime": 4.3954,
"eval_samples_per_second": 66.205,
"eval_steps_per_second": 4.323,
"step": 45423
},
{
"epoch": 441.75,
"learning_rate": 1.116504854368932e-05,
"loss": 0.0994,
"step": 45500
},
{
"epoch": 442.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.735467433929443,
"eval_runtime": 4.3956,
"eval_samples_per_second": 66.203,
"eval_steps_per_second": 4.323,
"step": 45526
},
{
"epoch": 442.72,
"learning_rate": 1.1145631067961167e-05,
"loss": 0.102,
"step": 45600
},
{
"epoch": 443.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 4.869570255279541,
"eval_runtime": 4.4285,
"eval_samples_per_second": 65.711,
"eval_steps_per_second": 4.29,
"step": 45629
},
{
"epoch": 443.69,
"learning_rate": 1.112621359223301e-05,
"loss": 0.0951,
"step": 45700
},
{
"epoch": 444.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 4.957821369171143,
"eval_runtime": 4.4022,
"eval_samples_per_second": 66.104,
"eval_steps_per_second": 4.316,
"step": 45732
},
{
"epoch": 444.66,
"learning_rate": 1.1106796116504855e-05,
"loss": 0.0843,
"step": 45800
},
{
"epoch": 445.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.033973217010498,
"eval_runtime": 4.4069,
"eval_samples_per_second": 66.033,
"eval_steps_per_second": 4.311,
"step": 45835
},
{
"epoch": 445.63,
"learning_rate": 1.10873786407767e-05,
"loss": 0.0927,
"step": 45900
},
{
"epoch": 446.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.01215934753418,
"eval_runtime": 4.4401,
"eval_samples_per_second": 65.539,
"eval_steps_per_second": 4.279,
"step": 45938
},
{
"epoch": 446.6,
"learning_rate": 1.1067961165048544e-05,
"loss": 0.1028,
"step": 46000
},
{
"epoch": 447.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.8365044593811035,
"eval_runtime": 4.4122,
"eval_samples_per_second": 65.953,
"eval_steps_per_second": 4.306,
"step": 46041
},
{
"epoch": 447.57,
"learning_rate": 1.104854368932039e-05,
"loss": 0.0988,
"step": 46100
},
{
"epoch": 448.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 4.978984355926514,
"eval_runtime": 4.3946,
"eval_samples_per_second": 66.218,
"eval_steps_per_second": 4.324,
"step": 46144
},
{
"epoch": 448.54,
"learning_rate": 1.1029126213592235e-05,
"loss": 0.0993,
"step": 46200
},
{
"epoch": 449.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 4.857437610626221,
"eval_runtime": 4.3957,
"eval_samples_per_second": 66.2,
"eval_steps_per_second": 4.322,
"step": 46247
},
{
"epoch": 449.51,
"learning_rate": 1.1009708737864077e-05,
"loss": 0.0935,
"step": 46300
},
{
"epoch": 450.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.0488691329956055,
"eval_runtime": 4.3937,
"eval_samples_per_second": 66.231,
"eval_steps_per_second": 4.324,
"step": 46350
},
{
"epoch": 450.49,
"learning_rate": 1.0990291262135924e-05,
"loss": 0.0942,
"step": 46400
},
{
"epoch": 451.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.9593119621276855,
"eval_runtime": 4.3941,
"eval_samples_per_second": 66.225,
"eval_steps_per_second": 4.324,
"step": 46453
},
{
"epoch": 451.46,
"learning_rate": 1.0970873786407768e-05,
"loss": 0.0875,
"step": 46500
},
{
"epoch": 452.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 4.957134246826172,
"eval_runtime": 4.4335,
"eval_samples_per_second": 65.637,
"eval_steps_per_second": 4.286,
"step": 46556
},
{
"epoch": 452.43,
"learning_rate": 1.0951456310679613e-05,
"loss": 0.0968,
"step": 46600
},
{
"epoch": 453.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 4.800377368927002,
"eval_runtime": 4.4318,
"eval_samples_per_second": 65.662,
"eval_steps_per_second": 4.287,
"step": 46659
},
{
"epoch": 453.4,
"learning_rate": 1.0932038834951457e-05,
"loss": 0.0969,
"step": 46700
},
{
"epoch": 454.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.191004276275635,
"eval_runtime": 4.3946,
"eval_samples_per_second": 66.217,
"eval_steps_per_second": 4.323,
"step": 46762
},
{
"epoch": 454.37,
"learning_rate": 1.09126213592233e-05,
"loss": 0.0954,
"step": 46800
},
{
"epoch": 455.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.035511016845703,
"eval_runtime": 4.4202,
"eval_samples_per_second": 65.835,
"eval_steps_per_second": 4.298,
"step": 46865
},
{
"epoch": 455.34,
"learning_rate": 1.0893203883495147e-05,
"loss": 0.1008,
"step": 46900
},
{
"epoch": 456.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 4.853602409362793,
"eval_runtime": 4.442,
"eval_samples_per_second": 65.51,
"eval_steps_per_second": 4.277,
"step": 46968
},
{
"epoch": 456.31,
"learning_rate": 1.0873786407766991e-05,
"loss": 0.09,
"step": 47000
},
{
"epoch": 457.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 4.704257488250732,
"eval_runtime": 4.4109,
"eval_samples_per_second": 65.972,
"eval_steps_per_second": 4.307,
"step": 47071
},
{
"epoch": 457.28,
"learning_rate": 1.0854368932038837e-05,
"loss": 0.1064,
"step": 47100
},
{
"epoch": 458.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 4.873353958129883,
"eval_runtime": 4.4341,
"eval_samples_per_second": 65.628,
"eval_steps_per_second": 4.285,
"step": 47174
},
{
"epoch": 458.25,
"learning_rate": 1.083495145631068e-05,
"loss": 0.0902,
"step": 47200
},
{
"epoch": 459.0,
"eval_accuracy": 0.32989690721649484,
"eval_loss": 4.906158447265625,
"eval_runtime": 4.3951,
"eval_samples_per_second": 66.21,
"eval_steps_per_second": 4.323,
"step": 47277
},
{
"epoch": 459.22,
"learning_rate": 1.0815533980582525e-05,
"loss": 0.0831,
"step": 47300
},
{
"epoch": 460.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.066910266876221,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.007,
"eval_steps_per_second": 4.31,
"step": 47380
},
{
"epoch": 460.19,
"learning_rate": 1.079611650485437e-05,
"loss": 0.1008,
"step": 47400
},
{
"epoch": 461.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.140333652496338,
"eval_runtime": 4.413,
"eval_samples_per_second": 65.942,
"eval_steps_per_second": 4.305,
"step": 47483
},
{
"epoch": 461.17,
"learning_rate": 1.0776699029126214e-05,
"loss": 0.0883,
"step": 47500
},
{
"epoch": 462.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.177355766296387,
"eval_runtime": 4.3997,
"eval_samples_per_second": 66.141,
"eval_steps_per_second": 4.318,
"step": 47586
},
{
"epoch": 462.14,
"learning_rate": 1.075728155339806e-05,
"loss": 0.0915,
"step": 47600
},
{
"epoch": 463.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.148591995239258,
"eval_runtime": 4.4145,
"eval_samples_per_second": 65.92,
"eval_steps_per_second": 4.304,
"step": 47689
},
{
"epoch": 463.11,
"learning_rate": 1.0737864077669903e-05,
"loss": 0.1124,
"step": 47700
},
{
"epoch": 464.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.107584476470947,
"eval_runtime": 4.5054,
"eval_samples_per_second": 64.589,
"eval_steps_per_second": 4.217,
"step": 47792
},
{
"epoch": 464.08,
"learning_rate": 1.0718446601941748e-05,
"loss": 0.0892,
"step": 47800
},
{
"epoch": 465.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.02621603012085,
"eval_runtime": 4.4092,
"eval_samples_per_second": 65.999,
"eval_steps_per_second": 4.309,
"step": 47895
},
{
"epoch": 465.05,
"learning_rate": 1.0699029126213594e-05,
"loss": 0.088,
"step": 47900
},
{
"epoch": 466.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.167210102081299,
"eval_runtime": 4.4082,
"eval_samples_per_second": 66.014,
"eval_steps_per_second": 4.31,
"step": 47998
},
{
"epoch": 466.02,
"learning_rate": 1.0679611650485437e-05,
"loss": 0.0862,
"step": 48000
},
{
"epoch": 466.99,
"learning_rate": 1.0660194174757283e-05,
"loss": 0.0969,
"step": 48100
},
{
"epoch": 467.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.17960786819458,
"eval_runtime": 4.4129,
"eval_samples_per_second": 65.943,
"eval_steps_per_second": 4.306,
"step": 48101
},
{
"epoch": 467.96,
"learning_rate": 1.0640776699029128e-05,
"loss": 0.0851,
"step": 48200
},
{
"epoch": 468.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.142207622528076,
"eval_runtime": 4.4128,
"eval_samples_per_second": 65.945,
"eval_steps_per_second": 4.306,
"step": 48204
},
{
"epoch": 468.93,
"learning_rate": 1.062135922330097e-05,
"loss": 0.094,
"step": 48300
},
{
"epoch": 469.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 5.166329383850098,
"eval_runtime": 4.4012,
"eval_samples_per_second": 66.118,
"eval_steps_per_second": 4.317,
"step": 48307
},
{
"epoch": 469.9,
"learning_rate": 1.0601941747572817e-05,
"loss": 0.085,
"step": 48400
},
{
"epoch": 470.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.2026872634887695,
"eval_runtime": 4.5175,
"eval_samples_per_second": 64.416,
"eval_steps_per_second": 4.206,
"step": 48410
},
{
"epoch": 470.87,
"learning_rate": 1.0582524271844662e-05,
"loss": 0.0953,
"step": 48500
},
{
"epoch": 471.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.078782081604004,
"eval_runtime": 4.4749,
"eval_samples_per_second": 65.03,
"eval_steps_per_second": 4.246,
"step": 48513
},
{
"epoch": 471.84,
"learning_rate": 1.0563106796116506e-05,
"loss": 0.097,
"step": 48600
},
{
"epoch": 472.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.156815528869629,
"eval_runtime": 4.4008,
"eval_samples_per_second": 66.124,
"eval_steps_per_second": 4.317,
"step": 48616
},
{
"epoch": 472.82,
"learning_rate": 1.054368932038835e-05,
"loss": 0.092,
"step": 48700
},
{
"epoch": 473.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.017523765563965,
"eval_runtime": 4.3959,
"eval_samples_per_second": 66.199,
"eval_steps_per_second": 4.322,
"step": 48719
},
{
"epoch": 473.79,
"learning_rate": 1.0524271844660194e-05,
"loss": 0.0876,
"step": 48800
},
{
"epoch": 474.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.006375789642334,
"eval_runtime": 4.4478,
"eval_samples_per_second": 65.426,
"eval_steps_per_second": 4.272,
"step": 48822
},
{
"epoch": 474.76,
"learning_rate": 1.050485436893204e-05,
"loss": 0.0984,
"step": 48900
},
{
"epoch": 475.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 4.988500595092773,
"eval_runtime": 4.388,
"eval_samples_per_second": 66.317,
"eval_steps_per_second": 4.33,
"step": 48925
},
{
"epoch": 475.73,
"learning_rate": 1.0485436893203885e-05,
"loss": 0.0781,
"step": 49000
},
{
"epoch": 476.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.167103290557861,
"eval_runtime": 4.4047,
"eval_samples_per_second": 66.066,
"eval_steps_per_second": 4.314,
"step": 49028
},
{
"epoch": 476.7,
"learning_rate": 1.0466019417475727e-05,
"loss": 0.1001,
"step": 49100
},
{
"epoch": 477.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.242895603179932,
"eval_runtime": 4.3902,
"eval_samples_per_second": 66.284,
"eval_steps_per_second": 4.328,
"step": 49131
},
{
"epoch": 477.67,
"learning_rate": 1.0446601941747574e-05,
"loss": 0.085,
"step": 49200
},
{
"epoch": 478.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.267037868499756,
"eval_runtime": 4.5251,
"eval_samples_per_second": 64.307,
"eval_steps_per_second": 4.199,
"step": 49234
},
{
"epoch": 478.64,
"learning_rate": 1.0427184466019418e-05,
"loss": 0.0924,
"step": 49300
},
{
"epoch": 479.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.075860023498535,
"eval_runtime": 4.3975,
"eval_samples_per_second": 66.174,
"eval_steps_per_second": 4.321,
"step": 49337
},
{
"epoch": 479.61,
"learning_rate": 1.0407766990291263e-05,
"loss": 0.0855,
"step": 49400
},
{
"epoch": 480.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.26733922958374,
"eval_runtime": 4.4145,
"eval_samples_per_second": 65.919,
"eval_steps_per_second": 4.304,
"step": 49440
},
{
"epoch": 480.58,
"learning_rate": 1.0388349514563107e-05,
"loss": 0.1018,
"step": 49500
},
{
"epoch": 481.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.171545028686523,
"eval_runtime": 4.4381,
"eval_samples_per_second": 65.569,
"eval_steps_per_second": 4.281,
"step": 49543
},
{
"epoch": 481.55,
"learning_rate": 1.0368932038834952e-05,
"loss": 0.0883,
"step": 49600
},
{
"epoch": 482.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.085958480834961,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.008,
"eval_steps_per_second": 4.31,
"step": 49646
},
{
"epoch": 482.52,
"learning_rate": 1.0349514563106797e-05,
"loss": 0.101,
"step": 49700
},
{
"epoch": 483.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.18726110458374,
"eval_runtime": 4.4027,
"eval_samples_per_second": 66.095,
"eval_steps_per_second": 4.316,
"step": 49749
},
{
"epoch": 483.5,
"learning_rate": 1.0330097087378641e-05,
"loss": 0.1061,
"step": 49800
},
{
"epoch": 484.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.115561008453369,
"eval_runtime": 4.4013,
"eval_samples_per_second": 66.117,
"eval_steps_per_second": 4.317,
"step": 49852
},
{
"epoch": 484.47,
"learning_rate": 1.0310679611650487e-05,
"loss": 0.1091,
"step": 49900
},
{
"epoch": 485.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.1338372230529785,
"eval_runtime": 4.4034,
"eval_samples_per_second": 66.085,
"eval_steps_per_second": 4.315,
"step": 49955
},
{
"epoch": 485.44,
"learning_rate": 1.029126213592233e-05,
"loss": 0.0935,
"step": 50000
},
{
"epoch": 486.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.0872015953063965,
"eval_runtime": 4.4043,
"eval_samples_per_second": 66.072,
"eval_steps_per_second": 4.314,
"step": 50058
},
{
"epoch": 486.41,
"learning_rate": 1.0271844660194175e-05,
"loss": 0.0983,
"step": 50100
},
{
"epoch": 487.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.034875392913818,
"eval_runtime": 4.3888,
"eval_samples_per_second": 66.305,
"eval_steps_per_second": 4.329,
"step": 50161
},
{
"epoch": 487.38,
"learning_rate": 1.0252427184466021e-05,
"loss": 0.0955,
"step": 50200
},
{
"epoch": 488.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.149185657501221,
"eval_runtime": 4.4094,
"eval_samples_per_second": 65.995,
"eval_steps_per_second": 4.309,
"step": 50264
},
{
"epoch": 488.35,
"learning_rate": 1.0233009708737864e-05,
"loss": 0.1065,
"step": 50300
},
{
"epoch": 489.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.052890777587891,
"eval_runtime": 4.4248,
"eval_samples_per_second": 65.765,
"eval_steps_per_second": 4.294,
"step": 50367
},
{
"epoch": 489.32,
"learning_rate": 1.021359223300971e-05,
"loss": 0.0771,
"step": 50400
},
{
"epoch": 490.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.01772928237915,
"eval_runtime": 4.4527,
"eval_samples_per_second": 65.354,
"eval_steps_per_second": 4.267,
"step": 50470
},
{
"epoch": 490.29,
"learning_rate": 1.0194174757281555e-05,
"loss": 0.0962,
"step": 50500
},
{
"epoch": 491.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.068234443664551,
"eval_runtime": 4.3973,
"eval_samples_per_second": 66.177,
"eval_steps_per_second": 4.321,
"step": 50573
},
{
"epoch": 491.26,
"learning_rate": 1.0174757281553398e-05,
"loss": 0.0701,
"step": 50600
},
{
"epoch": 492.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.144649505615234,
"eval_runtime": 4.425,
"eval_samples_per_second": 65.763,
"eval_steps_per_second": 4.294,
"step": 50676
},
{
"epoch": 492.23,
"learning_rate": 1.0155339805825244e-05,
"loss": 0.0908,
"step": 50700
},
{
"epoch": 493.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.131927013397217,
"eval_runtime": 4.4148,
"eval_samples_per_second": 65.915,
"eval_steps_per_second": 4.304,
"step": 50779
},
{
"epoch": 493.2,
"learning_rate": 1.0135922330097087e-05,
"loss": 0.0957,
"step": 50800
},
{
"epoch": 494.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 5.173168659210205,
"eval_runtime": 4.4124,
"eval_samples_per_second": 65.95,
"eval_steps_per_second": 4.306,
"step": 50882
},
{
"epoch": 494.17,
"learning_rate": 1.0116504854368933e-05,
"loss": 0.1039,
"step": 50900
},
{
"epoch": 495.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.140829086303711,
"eval_runtime": 4.4096,
"eval_samples_per_second": 65.992,
"eval_steps_per_second": 4.309,
"step": 50985
},
{
"epoch": 495.15,
"learning_rate": 1.0097087378640778e-05,
"loss": 0.0947,
"step": 51000
},
{
"epoch": 496.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.1906304359436035,
"eval_runtime": 4.4456,
"eval_samples_per_second": 65.458,
"eval_steps_per_second": 4.274,
"step": 51088
},
{
"epoch": 496.12,
"learning_rate": 1.007766990291262e-05,
"loss": 0.097,
"step": 51100
},
{
"epoch": 497.0,
"eval_accuracy": 0.24054982817869416,
"eval_loss": 5.318382740020752,
"eval_runtime": 4.3901,
"eval_samples_per_second": 66.286,
"eval_steps_per_second": 4.328,
"step": 51191
},
{
"epoch": 497.09,
"learning_rate": 1.0058252427184467e-05,
"loss": 0.0848,
"step": 51200
},
{
"epoch": 498.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.134629726409912,
"eval_runtime": 4.3879,
"eval_samples_per_second": 66.319,
"eval_steps_per_second": 4.33,
"step": 51294
},
{
"epoch": 498.06,
"learning_rate": 1.0038834951456312e-05,
"loss": 0.0855,
"step": 51300
},
{
"epoch": 499.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.015251636505127,
"eval_runtime": 4.4095,
"eval_samples_per_second": 65.994,
"eval_steps_per_second": 4.309,
"step": 51397
},
{
"epoch": 499.03,
"learning_rate": 1.0019417475728156e-05,
"loss": 0.0848,
"step": 51400
},
{
"epoch": 500.0,
"learning_rate": 1e-05,
"loss": 0.1041,
"step": 51500
},
{
"epoch": 500.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 5.1230010986328125,
"eval_runtime": 4.4364,
"eval_samples_per_second": 65.593,
"eval_steps_per_second": 4.283,
"step": 51500
},
{
"epoch": 500.97,
"learning_rate": 9.980582524271845e-06,
"loss": 0.0936,
"step": 51600
},
{
"epoch": 501.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.133138656616211,
"eval_runtime": 4.4417,
"eval_samples_per_second": 65.515,
"eval_steps_per_second": 4.278,
"step": 51603
},
{
"epoch": 501.94,
"learning_rate": 9.96116504854369e-06,
"loss": 0.0934,
"step": 51700
},
{
"epoch": 502.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 5.176680564880371,
"eval_runtime": 4.3896,
"eval_samples_per_second": 66.293,
"eval_steps_per_second": 4.328,
"step": 51706
},
{
"epoch": 502.91,
"learning_rate": 9.941747572815535e-06,
"loss": 0.0966,
"step": 51800
},
{
"epoch": 503.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.0494513511657715,
"eval_runtime": 4.4589,
"eval_samples_per_second": 65.263,
"eval_steps_per_second": 4.261,
"step": 51809
},
{
"epoch": 503.88,
"learning_rate": 9.922330097087379e-06,
"loss": 0.0953,
"step": 51900
},
{
"epoch": 504.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 5.061805248260498,
"eval_runtime": 4.3961,
"eval_samples_per_second": 66.195,
"eval_steps_per_second": 4.322,
"step": 51912
},
{
"epoch": 504.85,
"learning_rate": 9.902912621359224e-06,
"loss": 0.0852,
"step": 52000
},
{
"epoch": 505.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.116728782653809,
"eval_runtime": 4.3911,
"eval_samples_per_second": 66.27,
"eval_steps_per_second": 4.327,
"step": 52015
},
{
"epoch": 505.83,
"learning_rate": 9.883495145631068e-06,
"loss": 0.0889,
"step": 52100
},
{
"epoch": 506.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.098071575164795,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.109,
"eval_steps_per_second": 4.316,
"step": 52118
},
{
"epoch": 506.8,
"learning_rate": 9.864077669902915e-06,
"loss": 0.0854,
"step": 52200
},
{
"epoch": 507.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.185293674468994,
"eval_runtime": 4.4,
"eval_samples_per_second": 66.136,
"eval_steps_per_second": 4.318,
"step": 52221
},
{
"epoch": 507.77,
"learning_rate": 9.844660194174757e-06,
"loss": 0.0877,
"step": 52300
},
{
"epoch": 508.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.2160773277282715,
"eval_runtime": 4.3889,
"eval_samples_per_second": 66.303,
"eval_steps_per_second": 4.329,
"step": 52324
},
{
"epoch": 508.74,
"learning_rate": 9.825242718446602e-06,
"loss": 0.1074,
"step": 52400
},
{
"epoch": 509.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.167038440704346,
"eval_runtime": 4.3915,
"eval_samples_per_second": 66.264,
"eval_steps_per_second": 4.327,
"step": 52427
},
{
"epoch": 509.71,
"learning_rate": 9.805825242718447e-06,
"loss": 0.1055,
"step": 52500
},
{
"epoch": 510.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.054455757141113,
"eval_runtime": 4.3937,
"eval_samples_per_second": 66.231,
"eval_steps_per_second": 4.324,
"step": 52530
},
{
"epoch": 510.68,
"learning_rate": 9.786407766990293e-06,
"loss": 0.0789,
"step": 52600
},
{
"epoch": 511.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 5.069103717803955,
"eval_runtime": 4.4043,
"eval_samples_per_second": 66.072,
"eval_steps_per_second": 4.314,
"step": 52633
},
{
"epoch": 511.65,
"learning_rate": 9.766990291262138e-06,
"loss": 0.0816,
"step": 52700
},
{
"epoch": 512.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.084735870361328,
"eval_runtime": 4.4034,
"eval_samples_per_second": 66.085,
"eval_steps_per_second": 4.315,
"step": 52736
},
{
"epoch": 512.62,
"learning_rate": 9.74757281553398e-06,
"loss": 0.0818,
"step": 52800
},
{
"epoch": 513.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.130674839019775,
"eval_runtime": 4.4001,
"eval_samples_per_second": 66.134,
"eval_steps_per_second": 4.318,
"step": 52839
},
{
"epoch": 513.59,
"learning_rate": 9.728155339805827e-06,
"loss": 0.0999,
"step": 52900
},
{
"epoch": 514.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.102930068969727,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 52942
},
{
"epoch": 514.56,
"learning_rate": 9.708737864077671e-06,
"loss": 0.0787,
"step": 53000
},
{
"epoch": 515.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.226955890655518,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 53045
},
{
"epoch": 515.53,
"learning_rate": 9.689320388349516e-06,
"loss": 0.0892,
"step": 53100
},
{
"epoch": 516.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.192480087280273,
"eval_runtime": 4.4136,
"eval_samples_per_second": 65.932,
"eval_steps_per_second": 4.305,
"step": 53148
},
{
"epoch": 516.5,
"learning_rate": 9.669902912621359e-06,
"loss": 0.0995,
"step": 53200
},
{
"epoch": 517.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.246269702911377,
"eval_runtime": 4.4171,
"eval_samples_per_second": 65.88,
"eval_steps_per_second": 4.301,
"step": 53251
},
{
"epoch": 517.48,
"learning_rate": 9.650485436893205e-06,
"loss": 0.0812,
"step": 53300
},
{
"epoch": 518.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.37426233291626,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.853,
"eval_steps_per_second": 4.3,
"step": 53354
},
{
"epoch": 518.45,
"learning_rate": 9.63106796116505e-06,
"loss": 0.101,
"step": 53400
},
{
"epoch": 519.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.19058084487915,
"eval_runtime": 4.3966,
"eval_samples_per_second": 66.188,
"eval_steps_per_second": 4.322,
"step": 53457
},
{
"epoch": 519.42,
"learning_rate": 9.611650485436894e-06,
"loss": 0.082,
"step": 53500
},
{
"epoch": 520.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.165647506713867,
"eval_runtime": 4.4225,
"eval_samples_per_second": 65.8,
"eval_steps_per_second": 4.296,
"step": 53560
},
{
"epoch": 520.39,
"learning_rate": 9.592233009708739e-06,
"loss": 0.0904,
"step": 53600
},
{
"epoch": 521.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.105106353759766,
"eval_runtime": 4.4296,
"eval_samples_per_second": 65.694,
"eval_steps_per_second": 4.289,
"step": 53663
},
{
"epoch": 521.36,
"learning_rate": 9.572815533980583e-06,
"loss": 0.0909,
"step": 53700
},
{
"epoch": 522.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.254262924194336,
"eval_runtime": 4.496,
"eval_samples_per_second": 64.725,
"eval_steps_per_second": 4.226,
"step": 53766
},
{
"epoch": 522.33,
"learning_rate": 9.553398058252428e-06,
"loss": 0.1033,
"step": 53800
},
{
"epoch": 523.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.217056751251221,
"eval_runtime": 4.4543,
"eval_samples_per_second": 65.33,
"eval_steps_per_second": 4.266,
"step": 53869
},
{
"epoch": 523.3,
"learning_rate": 9.533980582524273e-06,
"loss": 0.0793,
"step": 53900
},
{
"epoch": 524.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.242816925048828,
"eval_runtime": 4.3911,
"eval_samples_per_second": 66.27,
"eval_steps_per_second": 4.327,
"step": 53972
},
{
"epoch": 524.27,
"learning_rate": 9.514563106796117e-06,
"loss": 0.0879,
"step": 54000
},
{
"epoch": 525.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.3479905128479,
"eval_runtime": 4.4236,
"eval_samples_per_second": 65.784,
"eval_steps_per_second": 4.295,
"step": 54075
},
{
"epoch": 525.24,
"learning_rate": 9.495145631067962e-06,
"loss": 0.0836,
"step": 54100
},
{
"epoch": 526.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.280987739562988,
"eval_runtime": 4.4062,
"eval_samples_per_second": 66.044,
"eval_steps_per_second": 4.312,
"step": 54178
},
{
"epoch": 526.21,
"learning_rate": 9.475728155339806e-06,
"loss": 0.0886,
"step": 54200
},
{
"epoch": 527.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.253178596496582,
"eval_runtime": 4.4495,
"eval_samples_per_second": 65.401,
"eval_steps_per_second": 4.27,
"step": 54281
},
{
"epoch": 527.18,
"learning_rate": 9.45631067961165e-06,
"loss": 0.0881,
"step": 54300
},
{
"epoch": 528.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.499323844909668,
"eval_runtime": 4.4145,
"eval_samples_per_second": 65.919,
"eval_steps_per_second": 4.304,
"step": 54384
},
{
"epoch": 528.16,
"learning_rate": 9.436893203883495e-06,
"loss": 0.1158,
"step": 54400
},
{
"epoch": 529.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.275381565093994,
"eval_runtime": 4.3577,
"eval_samples_per_second": 66.779,
"eval_steps_per_second": 4.36,
"step": 54487
},
{
"epoch": 529.13,
"learning_rate": 9.41747572815534e-06,
"loss": 0.0984,
"step": 54500
},
{
"epoch": 530.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 5.223719120025635,
"eval_runtime": 4.3685,
"eval_samples_per_second": 66.614,
"eval_steps_per_second": 4.349,
"step": 54590
},
{
"epoch": 530.1,
"learning_rate": 9.398058252427186e-06,
"loss": 0.0974,
"step": 54600
},
{
"epoch": 531.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.413337707519531,
"eval_runtime": 4.4815,
"eval_samples_per_second": 64.934,
"eval_steps_per_second": 4.24,
"step": 54693
},
{
"epoch": 531.07,
"learning_rate": 9.37864077669903e-06,
"loss": 0.0892,
"step": 54700
},
{
"epoch": 532.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.2499871253967285,
"eval_runtime": 4.38,
"eval_samples_per_second": 66.438,
"eval_steps_per_second": 4.338,
"step": 54796
},
{
"epoch": 532.04,
"learning_rate": 9.359223300970874e-06,
"loss": 0.0892,
"step": 54800
},
{
"epoch": 533.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 5.320400714874268,
"eval_runtime": 4.4262,
"eval_samples_per_second": 65.745,
"eval_steps_per_second": 4.293,
"step": 54899
},
{
"epoch": 533.01,
"learning_rate": 9.33980582524272e-06,
"loss": 0.0938,
"step": 54900
},
{
"epoch": 533.98,
"learning_rate": 9.320388349514565e-06,
"loss": 0.0873,
"step": 55000
},
{
"epoch": 534.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.227492332458496,
"eval_runtime": 4.3496,
"eval_samples_per_second": 66.902,
"eval_steps_per_second": 4.368,
"step": 55002
},
{
"epoch": 534.95,
"learning_rate": 9.30097087378641e-06,
"loss": 0.0882,
"step": 55100
},
{
"epoch": 535.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.204889297485352,
"eval_runtime": 4.401,
"eval_samples_per_second": 66.121,
"eval_steps_per_second": 4.317,
"step": 55105
},
{
"epoch": 535.92,
"learning_rate": 9.281553398058252e-06,
"loss": 0.0915,
"step": 55200
},
{
"epoch": 536.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.2154951095581055,
"eval_runtime": 4.3769,
"eval_samples_per_second": 66.486,
"eval_steps_per_second": 4.341,
"step": 55208
},
{
"epoch": 536.89,
"learning_rate": 9.262135922330098e-06,
"loss": 0.0759,
"step": 55300
},
{
"epoch": 537.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.279453754425049,
"eval_runtime": 4.3694,
"eval_samples_per_second": 66.6,
"eval_steps_per_second": 4.348,
"step": 55311
},
{
"epoch": 537.86,
"learning_rate": 9.242718446601943e-06,
"loss": 0.0893,
"step": 55400
},
{
"epoch": 538.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.227136135101318,
"eval_runtime": 4.3788,
"eval_samples_per_second": 66.457,
"eval_steps_per_second": 4.339,
"step": 55414
},
{
"epoch": 538.83,
"learning_rate": 9.223300970873788e-06,
"loss": 0.0845,
"step": 55500
},
{
"epoch": 539.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.234629154205322,
"eval_runtime": 4.3591,
"eval_samples_per_second": 66.757,
"eval_steps_per_second": 4.359,
"step": 55517
},
{
"epoch": 539.81,
"learning_rate": 9.203883495145632e-06,
"loss": 0.0912,
"step": 55600
},
{
"epoch": 540.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.244317054748535,
"eval_runtime": 4.3649,
"eval_samples_per_second": 66.669,
"eval_steps_per_second": 4.353,
"step": 55620
},
{
"epoch": 540.78,
"learning_rate": 9.184466019417477e-06,
"loss": 0.0804,
"step": 55700
},
{
"epoch": 541.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.277728080749512,
"eval_runtime": 4.4039,
"eval_samples_per_second": 66.078,
"eval_steps_per_second": 4.314,
"step": 55723
},
{
"epoch": 541.75,
"learning_rate": 9.165048543689321e-06,
"loss": 0.0753,
"step": 55800
},
{
"epoch": 542.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.358335494995117,
"eval_runtime": 4.3815,
"eval_samples_per_second": 66.415,
"eval_steps_per_second": 4.336,
"step": 55826
},
{
"epoch": 542.72,
"learning_rate": 9.145631067961166e-06,
"loss": 0.0829,
"step": 55900
},
{
"epoch": 543.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.189969062805176,
"eval_runtime": 4.3835,
"eval_samples_per_second": 66.385,
"eval_steps_per_second": 4.334,
"step": 55929
},
{
"epoch": 543.69,
"learning_rate": 9.12621359223301e-06,
"loss": 0.0984,
"step": 56000
},
{
"epoch": 544.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.192966938018799,
"eval_runtime": 4.3632,
"eval_samples_per_second": 66.695,
"eval_steps_per_second": 4.355,
"step": 56032
},
{
"epoch": 544.66,
"learning_rate": 9.106796116504855e-06,
"loss": 0.0993,
"step": 56100
},
{
"epoch": 545.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.122324466705322,
"eval_runtime": 4.439,
"eval_samples_per_second": 65.555,
"eval_steps_per_second": 4.28,
"step": 56135
},
{
"epoch": 545.63,
"learning_rate": 9.0873786407767e-06,
"loss": 0.0793,
"step": 56200
},
{
"epoch": 546.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.210149765014648,
"eval_runtime": 4.3685,
"eval_samples_per_second": 66.613,
"eval_steps_per_second": 4.349,
"step": 56238
},
{
"epoch": 546.6,
"learning_rate": 9.067961165048544e-06,
"loss": 0.0912,
"step": 56300
},
{
"epoch": 547.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.274239540100098,
"eval_runtime": 4.3627,
"eval_samples_per_second": 66.701,
"eval_steps_per_second": 4.355,
"step": 56341
},
{
"epoch": 547.57,
"learning_rate": 9.048543689320389e-06,
"loss": 0.0892,
"step": 56400
},
{
"epoch": 548.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.1733808517456055,
"eval_runtime": 4.3916,
"eval_samples_per_second": 66.262,
"eval_steps_per_second": 4.326,
"step": 56444
},
{
"epoch": 548.54,
"learning_rate": 9.029126213592233e-06,
"loss": 0.1029,
"step": 56500
},
{
"epoch": 549.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.265845775604248,
"eval_runtime": 4.4438,
"eval_samples_per_second": 65.485,
"eval_steps_per_second": 4.276,
"step": 56547
},
{
"epoch": 549.51,
"learning_rate": 9.009708737864078e-06,
"loss": 0.0863,
"step": 56600
},
{
"epoch": 550.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.237177848815918,
"eval_runtime": 4.3771,
"eval_samples_per_second": 66.483,
"eval_steps_per_second": 4.341,
"step": 56650
},
{
"epoch": 550.49,
"learning_rate": 8.990291262135923e-06,
"loss": 0.1017,
"step": 56700
},
{
"epoch": 551.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.210503101348877,
"eval_runtime": 4.4112,
"eval_samples_per_second": 65.968,
"eval_steps_per_second": 4.307,
"step": 56753
},
{
"epoch": 551.46,
"learning_rate": 8.970873786407767e-06,
"loss": 0.0883,
"step": 56800
},
{
"epoch": 552.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.105496883392334,
"eval_runtime": 4.4302,
"eval_samples_per_second": 65.685,
"eval_steps_per_second": 4.289,
"step": 56856
},
{
"epoch": 552.43,
"learning_rate": 8.951456310679613e-06,
"loss": 0.1042,
"step": 56900
},
{
"epoch": 553.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 5.24324893951416,
"eval_runtime": 4.3771,
"eval_samples_per_second": 66.482,
"eval_steps_per_second": 4.341,
"step": 56959
},
{
"epoch": 553.4,
"learning_rate": 8.932038834951458e-06,
"loss": 0.0817,
"step": 57000
},
{
"epoch": 554.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.242309093475342,
"eval_runtime": 4.3846,
"eval_samples_per_second": 66.368,
"eval_steps_per_second": 4.333,
"step": 57062
},
{
"epoch": 554.37,
"learning_rate": 8.912621359223301e-06,
"loss": 0.0869,
"step": 57100
},
{
"epoch": 555.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.22501277923584,
"eval_runtime": 4.4814,
"eval_samples_per_second": 64.935,
"eval_steps_per_second": 4.24,
"step": 57165
},
{
"epoch": 555.34,
"learning_rate": 8.893203883495145e-06,
"loss": 0.0843,
"step": 57200
},
{
"epoch": 556.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.196157932281494,
"eval_runtime": 4.4042,
"eval_samples_per_second": 66.073,
"eval_steps_per_second": 4.314,
"step": 57268
},
{
"epoch": 556.31,
"learning_rate": 8.873786407766992e-06,
"loss": 0.0887,
"step": 57300
},
{
"epoch": 557.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.1147990226745605,
"eval_runtime": 4.4748,
"eval_samples_per_second": 65.031,
"eval_steps_per_second": 4.246,
"step": 57371
},
{
"epoch": 557.28,
"learning_rate": 8.854368932038836e-06,
"loss": 0.0838,
"step": 57400
},
{
"epoch": 558.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.020167350769043,
"eval_runtime": 4.3534,
"eval_samples_per_second": 66.845,
"eval_steps_per_second": 4.364,
"step": 57474
},
{
"epoch": 558.25,
"learning_rate": 8.834951456310681e-06,
"loss": 0.0759,
"step": 57500
},
{
"epoch": 559.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 5.0678253173828125,
"eval_runtime": 4.3946,
"eval_samples_per_second": 66.218,
"eval_steps_per_second": 4.324,
"step": 57577
},
{
"epoch": 559.22,
"learning_rate": 8.815533980582525e-06,
"loss": 0.0934,
"step": 57600
},
{
"epoch": 560.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 4.955771446228027,
"eval_runtime": 4.4482,
"eval_samples_per_second": 65.42,
"eval_steps_per_second": 4.271,
"step": 57680
},
{
"epoch": 560.19,
"learning_rate": 8.79611650485437e-06,
"loss": 0.0858,
"step": 57700
},
{
"epoch": 561.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.016815185546875,
"eval_runtime": 4.3972,
"eval_samples_per_second": 66.178,
"eval_steps_per_second": 4.321,
"step": 57783
},
{
"epoch": 561.17,
"learning_rate": 8.776699029126215e-06,
"loss": 0.0873,
"step": 57800
},
{
"epoch": 562.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.045673847198486,
"eval_runtime": 4.3475,
"eval_samples_per_second": 66.936,
"eval_steps_per_second": 4.37,
"step": 57886
},
{
"epoch": 562.14,
"learning_rate": 8.75728155339806e-06,
"loss": 0.0902,
"step": 57900
},
{
"epoch": 563.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 5.046875,
"eval_runtime": 4.3618,
"eval_samples_per_second": 66.716,
"eval_steps_per_second": 4.356,
"step": 57989
},
{
"epoch": 563.11,
"learning_rate": 8.737864077669904e-06,
"loss": 0.0793,
"step": 58000
},
{
"epoch": 564.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 4.987062931060791,
"eval_runtime": 4.3493,
"eval_samples_per_second": 66.907,
"eval_steps_per_second": 4.368,
"step": 58092
},
{
"epoch": 564.08,
"learning_rate": 8.718446601941748e-06,
"loss": 0.0882,
"step": 58100
},
{
"epoch": 565.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.158361911773682,
"eval_runtime": 4.485,
"eval_samples_per_second": 64.883,
"eval_steps_per_second": 4.236,
"step": 58195
},
{
"epoch": 565.05,
"learning_rate": 8.699029126213593e-06,
"loss": 0.0984,
"step": 58200
},
{
"epoch": 566.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 5.074683666229248,
"eval_runtime": 4.5094,
"eval_samples_per_second": 64.532,
"eval_steps_per_second": 4.213,
"step": 58298
},
{
"epoch": 566.02,
"learning_rate": 8.679611650485438e-06,
"loss": 0.0818,
"step": 58300
},
{
"epoch": 566.99,
"learning_rate": 8.660194174757282e-06,
"loss": 0.0824,
"step": 58400
},
{
"epoch": 567.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 5.173541069030762,
"eval_runtime": 4.496,
"eval_samples_per_second": 64.723,
"eval_steps_per_second": 4.226,
"step": 58401
},
{
"epoch": 567.96,
"learning_rate": 8.640776699029127e-06,
"loss": 0.0794,
"step": 58500
},
{
"epoch": 568.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 5.1322712898254395,
"eval_runtime": 4.3776,
"eval_samples_per_second": 66.475,
"eval_steps_per_second": 4.34,
"step": 58504
},
{
"epoch": 568.93,
"learning_rate": 8.621359223300971e-06,
"loss": 0.0847,
"step": 58600
},
{
"epoch": 569.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 5.129234313964844,
"eval_runtime": 4.3766,
"eval_samples_per_second": 66.49,
"eval_steps_per_second": 4.341,
"step": 58607
},
{
"epoch": 569.9,
"learning_rate": 8.601941747572816e-06,
"loss": 0.0833,
"step": 58700
},
{
"epoch": 570.0,
"eval_accuracy": 0.32646048109965636,
"eval_loss": 5.070975303649902,
"eval_runtime": 4.3647,
"eval_samples_per_second": 66.671,
"eval_steps_per_second": 4.353,
"step": 58710
},
{
"epoch": 570.87,
"learning_rate": 8.58252427184466e-06,
"loss": 0.0831,
"step": 58800
},
{
"epoch": 571.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.120458126068115,
"eval_runtime": 4.3931,
"eval_samples_per_second": 66.24,
"eval_steps_per_second": 4.325,
"step": 58813
},
{
"epoch": 571.84,
"learning_rate": 8.563106796116507e-06,
"loss": 0.0922,
"step": 58900
},
{
"epoch": 572.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.100735187530518,
"eval_runtime": 4.3647,
"eval_samples_per_second": 66.672,
"eval_steps_per_second": 4.353,
"step": 58916
},
{
"epoch": 572.82,
"learning_rate": 8.54368932038835e-06,
"loss": 0.0906,
"step": 59000
},
{
"epoch": 573.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.19244384765625,
"eval_runtime": 4.3639,
"eval_samples_per_second": 66.683,
"eval_steps_per_second": 4.354,
"step": 59019
},
{
"epoch": 573.79,
"learning_rate": 8.524271844660194e-06,
"loss": 0.1079,
"step": 59100
},
{
"epoch": 574.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.193302154541016,
"eval_runtime": 4.3456,
"eval_samples_per_second": 66.964,
"eval_steps_per_second": 4.372,
"step": 59122
},
{
"epoch": 574.76,
"learning_rate": 8.504854368932039e-06,
"loss": 0.0943,
"step": 59200
},
{
"epoch": 575.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.155801773071289,
"eval_runtime": 4.3466,
"eval_samples_per_second": 66.949,
"eval_steps_per_second": 4.371,
"step": 59225
},
{
"epoch": 575.73,
"learning_rate": 8.485436893203885e-06,
"loss": 0.0877,
"step": 59300
},
{
"epoch": 576.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.157259941101074,
"eval_runtime": 4.4499,
"eval_samples_per_second": 65.395,
"eval_steps_per_second": 4.27,
"step": 59328
},
{
"epoch": 576.7,
"learning_rate": 8.46601941747573e-06,
"loss": 0.0977,
"step": 59400
},
{
"epoch": 577.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.031143665313721,
"eval_runtime": 4.3879,
"eval_samples_per_second": 66.319,
"eval_steps_per_second": 4.33,
"step": 59431
},
{
"epoch": 577.67,
"learning_rate": 8.446601941747573e-06,
"loss": 0.0751,
"step": 59500
},
{
"epoch": 578.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.158066272735596,
"eval_runtime": 4.4017,
"eval_samples_per_second": 66.111,
"eval_steps_per_second": 4.317,
"step": 59534
},
{
"epoch": 578.64,
"learning_rate": 8.427184466019419e-06,
"loss": 0.096,
"step": 59600
},
{
"epoch": 579.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.211477756500244,
"eval_runtime": 4.3934,
"eval_samples_per_second": 66.236,
"eval_steps_per_second": 4.325,
"step": 59637
},
{
"epoch": 579.61,
"learning_rate": 8.407766990291263e-06,
"loss": 0.0902,
"step": 59700
},
{
"epoch": 580.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.254421234130859,
"eval_runtime": 4.4012,
"eval_samples_per_second": 66.118,
"eval_steps_per_second": 4.317,
"step": 59740
},
{
"epoch": 580.58,
"learning_rate": 8.388349514563108e-06,
"loss": 0.1052,
"step": 59800
},
{
"epoch": 581.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 5.161226749420166,
"eval_runtime": 4.4919,
"eval_samples_per_second": 64.784,
"eval_steps_per_second": 4.23,
"step": 59843
},
{
"epoch": 581.55,
"learning_rate": 8.368932038834953e-06,
"loss": 0.0763,
"step": 59900
},
{
"epoch": 582.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.143395900726318,
"eval_runtime": 4.3936,
"eval_samples_per_second": 66.233,
"eval_steps_per_second": 4.325,
"step": 59946
},
{
"epoch": 582.52,
"learning_rate": 8.349514563106797e-06,
"loss": 0.0904,
"step": 60000
},
{
"epoch": 583.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.191125869750977,
"eval_runtime": 4.4669,
"eval_samples_per_second": 65.146,
"eval_steps_per_second": 4.254,
"step": 60049
},
{
"epoch": 583.5,
"learning_rate": 8.330097087378642e-06,
"loss": 0.0868,
"step": 60100
},
{
"epoch": 584.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.171573162078857,
"eval_runtime": 4.4419,
"eval_samples_per_second": 65.512,
"eval_steps_per_second": 4.277,
"step": 60152
},
{
"epoch": 584.47,
"learning_rate": 8.310679611650486e-06,
"loss": 0.091,
"step": 60200
},
{
"epoch": 585.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.17667818069458,
"eval_runtime": 4.4127,
"eval_samples_per_second": 65.946,
"eval_steps_per_second": 4.306,
"step": 60255
},
{
"epoch": 585.44,
"learning_rate": 8.291262135922331e-06,
"loss": 0.0936,
"step": 60300
},
{
"epoch": 586.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.180116176605225,
"eval_runtime": 4.3695,
"eval_samples_per_second": 66.599,
"eval_steps_per_second": 4.348,
"step": 60358
},
{
"epoch": 586.41,
"learning_rate": 8.271844660194175e-06,
"loss": 0.082,
"step": 60400
},
{
"epoch": 587.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.049594879150391,
"eval_runtime": 4.3508,
"eval_samples_per_second": 66.885,
"eval_steps_per_second": 4.367,
"step": 60461
},
{
"epoch": 587.38,
"learning_rate": 8.25242718446602e-06,
"loss": 0.0999,
"step": 60500
},
{
"epoch": 588.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.258527755737305,
"eval_runtime": 4.3735,
"eval_samples_per_second": 66.538,
"eval_steps_per_second": 4.344,
"step": 60564
},
{
"epoch": 588.35,
"learning_rate": 8.233009708737865e-06,
"loss": 0.0826,
"step": 60600
},
{
"epoch": 589.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.256552696228027,
"eval_runtime": 4.521,
"eval_samples_per_second": 64.367,
"eval_steps_per_second": 4.203,
"step": 60667
},
{
"epoch": 589.32,
"learning_rate": 8.21359223300971e-06,
"loss": 0.0949,
"step": 60700
},
{
"epoch": 590.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.301484107971191,
"eval_runtime": 4.3639,
"eval_samples_per_second": 66.684,
"eval_steps_per_second": 4.354,
"step": 60770
},
{
"epoch": 590.29,
"learning_rate": 8.194174757281554e-06,
"loss": 0.0828,
"step": 60800
},
{
"epoch": 591.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.1411333084106445,
"eval_runtime": 4.4822,
"eval_samples_per_second": 64.924,
"eval_steps_per_second": 4.239,
"step": 60873
},
{
"epoch": 591.26,
"learning_rate": 8.174757281553398e-06,
"loss": 0.0827,
"step": 60900
},
{
"epoch": 592.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.119908332824707,
"eval_runtime": 4.392,
"eval_samples_per_second": 66.256,
"eval_steps_per_second": 4.326,
"step": 60976
},
{
"epoch": 592.23,
"learning_rate": 8.155339805825243e-06,
"loss": 0.0943,
"step": 61000
},
{
"epoch": 593.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.10630989074707,
"eval_runtime": 4.4688,
"eval_samples_per_second": 65.119,
"eval_steps_per_second": 4.252,
"step": 61079
},
{
"epoch": 593.2,
"learning_rate": 8.135922330097088e-06,
"loss": 0.076,
"step": 61100
},
{
"epoch": 594.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.114058494567871,
"eval_runtime": 4.602,
"eval_samples_per_second": 63.233,
"eval_steps_per_second": 4.129,
"step": 61182
},
{
"epoch": 594.17,
"learning_rate": 8.116504854368932e-06,
"loss": 0.0917,
"step": 61200
},
{
"epoch": 595.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.14142370223999,
"eval_runtime": 4.3886,
"eval_samples_per_second": 66.308,
"eval_steps_per_second": 4.329,
"step": 61285
},
{
"epoch": 595.15,
"learning_rate": 8.097087378640778e-06,
"loss": 0.0976,
"step": 61300
},
{
"epoch": 596.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.144129753112793,
"eval_runtime": 4.3645,
"eval_samples_per_second": 66.674,
"eval_steps_per_second": 4.353,
"step": 61388
},
{
"epoch": 596.12,
"learning_rate": 8.077669902912621e-06,
"loss": 0.0804,
"step": 61400
},
{
"epoch": 597.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.168061256408691,
"eval_runtime": 4.4405,
"eval_samples_per_second": 65.533,
"eval_steps_per_second": 4.279,
"step": 61491
},
{
"epoch": 597.09,
"learning_rate": 8.058252427184466e-06,
"loss": 0.0923,
"step": 61500
},
{
"epoch": 598.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.133292198181152,
"eval_runtime": 4.3696,
"eval_samples_per_second": 66.596,
"eval_steps_per_second": 4.348,
"step": 61594
},
{
"epoch": 598.06,
"learning_rate": 8.038834951456312e-06,
"loss": 0.093,
"step": 61600
},
{
"epoch": 599.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.125970840454102,
"eval_runtime": 4.3619,
"eval_samples_per_second": 66.715,
"eval_steps_per_second": 4.356,
"step": 61697
},
{
"epoch": 599.03,
"learning_rate": 8.019417475728157e-06,
"loss": 0.0872,
"step": 61700
},
{
"epoch": 600.0,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0926,
"step": 61800
},
{
"epoch": 600.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 5.156001567840576,
"eval_runtime": 4.4356,
"eval_samples_per_second": 65.606,
"eval_steps_per_second": 4.284,
"step": 61800
},
{
"epoch": 600.97,
"learning_rate": 7.980582524271844e-06,
"loss": 0.0844,
"step": 61900
},
{
"epoch": 601.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.193061828613281,
"eval_runtime": 4.3861,
"eval_samples_per_second": 66.346,
"eval_steps_per_second": 4.332,
"step": 61903
},
{
"epoch": 601.94,
"learning_rate": 7.96116504854369e-06,
"loss": 0.0847,
"step": 62000
},
{
"epoch": 602.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.086513996124268,
"eval_runtime": 4.4154,
"eval_samples_per_second": 65.906,
"eval_steps_per_second": 4.303,
"step": 62006
},
{
"epoch": 602.91,
"learning_rate": 7.941747572815535e-06,
"loss": 0.0822,
"step": 62100
},
{
"epoch": 603.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.086156368255615,
"eval_runtime": 4.3772,
"eval_samples_per_second": 66.48,
"eval_steps_per_second": 4.341,
"step": 62109
},
{
"epoch": 603.88,
"learning_rate": 7.92233009708738e-06,
"loss": 0.0771,
"step": 62200
},
{
"epoch": 604.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.047454833984375,
"eval_runtime": 4.4113,
"eval_samples_per_second": 65.967,
"eval_steps_per_second": 4.307,
"step": 62212
},
{
"epoch": 604.85,
"learning_rate": 7.902912621359224e-06,
"loss": 0.0885,
"step": 62300
},
{
"epoch": 605.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.088384628295898,
"eval_runtime": 4.3498,
"eval_samples_per_second": 66.9,
"eval_steps_per_second": 4.368,
"step": 62315
},
{
"epoch": 605.83,
"learning_rate": 7.883495145631069e-06,
"loss": 0.0809,
"step": 62400
},
{
"epoch": 606.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.215940475463867,
"eval_runtime": 4.3881,
"eval_samples_per_second": 66.316,
"eval_steps_per_second": 4.33,
"step": 62418
},
{
"epoch": 606.8,
"learning_rate": 7.864077669902913e-06,
"loss": 0.0892,
"step": 62500
},
{
"epoch": 607.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.086651802062988,
"eval_runtime": 4.4003,
"eval_samples_per_second": 66.132,
"eval_steps_per_second": 4.318,
"step": 62521
},
{
"epoch": 607.77,
"learning_rate": 7.844660194174758e-06,
"loss": 0.085,
"step": 62600
},
{
"epoch": 608.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.0848236083984375,
"eval_runtime": 4.4944,
"eval_samples_per_second": 64.747,
"eval_steps_per_second": 4.227,
"step": 62624
},
{
"epoch": 608.74,
"learning_rate": 7.825242718446603e-06,
"loss": 0.0828,
"step": 62700
},
{
"epoch": 609.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.234314918518066,
"eval_runtime": 4.3909,
"eval_samples_per_second": 66.274,
"eval_steps_per_second": 4.327,
"step": 62727
},
{
"epoch": 609.71,
"learning_rate": 7.805825242718447e-06,
"loss": 0.0978,
"step": 62800
},
{
"epoch": 610.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.1202826499938965,
"eval_runtime": 4.4024,
"eval_samples_per_second": 66.1,
"eval_steps_per_second": 4.316,
"step": 62830
},
{
"epoch": 610.68,
"learning_rate": 7.786407766990292e-06,
"loss": 0.0922,
"step": 62900
},
{
"epoch": 611.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.254323959350586,
"eval_runtime": 4.3525,
"eval_samples_per_second": 66.859,
"eval_steps_per_second": 4.365,
"step": 62933
},
{
"epoch": 611.65,
"learning_rate": 7.766990291262136e-06,
"loss": 0.091,
"step": 63000
},
{
"epoch": 612.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.122802734375,
"eval_runtime": 4.399,
"eval_samples_per_second": 66.151,
"eval_steps_per_second": 4.319,
"step": 63036
},
{
"epoch": 612.62,
"learning_rate": 7.747572815533981e-06,
"loss": 0.0926,
"step": 63100
},
{
"epoch": 613.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.306426525115967,
"eval_runtime": 4.4296,
"eval_samples_per_second": 65.695,
"eval_steps_per_second": 4.289,
"step": 63139
},
{
"epoch": 613.59,
"learning_rate": 7.728155339805825e-06,
"loss": 0.078,
"step": 63200
},
{
"epoch": 614.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.336696624755859,
"eval_runtime": 4.4235,
"eval_samples_per_second": 65.786,
"eval_steps_per_second": 4.295,
"step": 63242
},
{
"epoch": 614.56,
"learning_rate": 7.70873786407767e-06,
"loss": 0.0791,
"step": 63300
},
{
"epoch": 615.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.27379846572876,
"eval_runtime": 4.3721,
"eval_samples_per_second": 66.559,
"eval_steps_per_second": 4.346,
"step": 63345
},
{
"epoch": 615.53,
"learning_rate": 7.689320388349515e-06,
"loss": 0.0803,
"step": 63400
},
{
"epoch": 616.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.269800186157227,
"eval_runtime": 4.3964,
"eval_samples_per_second": 66.191,
"eval_steps_per_second": 4.322,
"step": 63448
},
{
"epoch": 616.5,
"learning_rate": 7.66990291262136e-06,
"loss": 0.0936,
"step": 63500
},
{
"epoch": 617.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.30620002746582,
"eval_runtime": 4.3979,
"eval_samples_per_second": 66.168,
"eval_steps_per_second": 4.32,
"step": 63551
},
{
"epoch": 617.48,
"learning_rate": 7.650485436893204e-06,
"loss": 0.0894,
"step": 63600
},
{
"epoch": 618.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.383390426635742,
"eval_runtime": 4.3878,
"eval_samples_per_second": 66.32,
"eval_steps_per_second": 4.33,
"step": 63654
},
{
"epoch": 618.45,
"learning_rate": 7.63106796116505e-06,
"loss": 0.0794,
"step": 63700
},
{
"epoch": 619.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 5.2768330574035645,
"eval_runtime": 4.3931,
"eval_samples_per_second": 66.24,
"eval_steps_per_second": 4.325,
"step": 63757
},
{
"epoch": 619.42,
"learning_rate": 7.611650485436893e-06,
"loss": 0.0885,
"step": 63800
},
{
"epoch": 620.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.2569475173950195,
"eval_runtime": 4.4091,
"eval_samples_per_second": 65.999,
"eval_steps_per_second": 4.309,
"step": 63860
},
{
"epoch": 620.39,
"learning_rate": 7.592233009708738e-06,
"loss": 0.0866,
"step": 63900
},
{
"epoch": 621.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.332491874694824,
"eval_runtime": 4.4767,
"eval_samples_per_second": 65.004,
"eval_steps_per_second": 4.244,
"step": 63963
},
{
"epoch": 621.36,
"learning_rate": 7.572815533980583e-06,
"loss": 0.079,
"step": 64000
},
{
"epoch": 622.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.279804229736328,
"eval_runtime": 4.452,
"eval_samples_per_second": 65.364,
"eval_steps_per_second": 4.268,
"step": 64066
},
{
"epoch": 622.33,
"learning_rate": 7.553398058252428e-06,
"loss": 0.084,
"step": 64100
},
{
"epoch": 623.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.460251331329346,
"eval_runtime": 4.4075,
"eval_samples_per_second": 66.025,
"eval_steps_per_second": 4.311,
"step": 64169
},
{
"epoch": 623.3,
"learning_rate": 7.533980582524273e-06,
"loss": 0.0886,
"step": 64200
},
{
"epoch": 624.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.292215347290039,
"eval_runtime": 4.5112,
"eval_samples_per_second": 64.507,
"eval_steps_per_second": 4.212,
"step": 64272
},
{
"epoch": 624.27,
"learning_rate": 7.514563106796117e-06,
"loss": 0.0726,
"step": 64300
},
{
"epoch": 625.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.195230960845947,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.98,
"eval_steps_per_second": 4.308,
"step": 64375
},
{
"epoch": 625.24,
"learning_rate": 7.495145631067961e-06,
"loss": 0.0893,
"step": 64400
},
{
"epoch": 626.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 5.411427974700928,
"eval_runtime": 4.3902,
"eval_samples_per_second": 66.285,
"eval_steps_per_second": 4.328,
"step": 64478
},
{
"epoch": 626.21,
"learning_rate": 7.475728155339807e-06,
"loss": 0.0881,
"step": 64500
},
{
"epoch": 627.0,
"eval_accuracy": 0.2508591065292096,
"eval_loss": 5.48668909072876,
"eval_runtime": 4.3905,
"eval_samples_per_second": 66.279,
"eval_steps_per_second": 4.328,
"step": 64581
},
{
"epoch": 627.18,
"learning_rate": 7.456310679611651e-06,
"loss": 0.079,
"step": 64600
},
{
"epoch": 628.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.483811855316162,
"eval_runtime": 4.4689,
"eval_samples_per_second": 65.117,
"eval_steps_per_second": 4.252,
"step": 64684
},
{
"epoch": 628.16,
"learning_rate": 7.436893203883496e-06,
"loss": 0.0933,
"step": 64700
},
{
"epoch": 629.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.521385192871094,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.109,
"eval_steps_per_second": 4.316,
"step": 64787
},
{
"epoch": 629.13,
"learning_rate": 7.41747572815534e-06,
"loss": 0.0795,
"step": 64800
},
{
"epoch": 630.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.425594806671143,
"eval_runtime": 4.4068,
"eval_samples_per_second": 66.034,
"eval_steps_per_second": 4.311,
"step": 64890
},
{
"epoch": 630.1,
"learning_rate": 7.398058252427185e-06,
"loss": 0.0882,
"step": 64900
},
{
"epoch": 631.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.362780570983887,
"eval_runtime": 4.4131,
"eval_samples_per_second": 65.94,
"eval_steps_per_second": 4.305,
"step": 64993
},
{
"epoch": 631.07,
"learning_rate": 7.37864077669903e-06,
"loss": 0.0826,
"step": 65000
},
{
"epoch": 632.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.2815680503845215,
"eval_runtime": 4.4008,
"eval_samples_per_second": 66.124,
"eval_steps_per_second": 4.317,
"step": 65096
},
{
"epoch": 632.04,
"learning_rate": 7.359223300970874e-06,
"loss": 0.0853,
"step": 65100
},
{
"epoch": 633.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.261467933654785,
"eval_runtime": 4.4513,
"eval_samples_per_second": 65.374,
"eval_steps_per_second": 4.268,
"step": 65199
},
{
"epoch": 633.01,
"learning_rate": 7.33980582524272e-06,
"loss": 0.0809,
"step": 65200
},
{
"epoch": 633.98,
"learning_rate": 7.3203883495145634e-06,
"loss": 0.0862,
"step": 65300
},
{
"epoch": 634.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.262171745300293,
"eval_runtime": 4.4171,
"eval_samples_per_second": 65.88,
"eval_steps_per_second": 4.301,
"step": 65302
},
{
"epoch": 634.95,
"learning_rate": 7.300970873786408e-06,
"loss": 0.0823,
"step": 65400
},
{
"epoch": 635.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.3122878074646,
"eval_runtime": 4.4674,
"eval_samples_per_second": 65.138,
"eval_steps_per_second": 4.253,
"step": 65405
},
{
"epoch": 635.92,
"learning_rate": 7.2815533980582534e-06,
"loss": 0.0915,
"step": 65500
},
{
"epoch": 636.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.248616695404053,
"eval_runtime": 4.4395,
"eval_samples_per_second": 65.549,
"eval_steps_per_second": 4.28,
"step": 65508
},
{
"epoch": 636.89,
"learning_rate": 7.262135922330098e-06,
"loss": 0.0776,
"step": 65600
},
{
"epoch": 637.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.264139175415039,
"eval_runtime": 4.5011,
"eval_samples_per_second": 64.651,
"eval_steps_per_second": 4.221,
"step": 65611
},
{
"epoch": 637.86,
"learning_rate": 7.242718446601942e-06,
"loss": 0.0799,
"step": 65700
},
{
"epoch": 638.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.432704448699951,
"eval_runtime": 4.4798,
"eval_samples_per_second": 64.959,
"eval_steps_per_second": 4.241,
"step": 65714
},
{
"epoch": 638.83,
"learning_rate": 7.223300970873786e-06,
"loss": 0.0925,
"step": 65800
},
{
"epoch": 639.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.366397380828857,
"eval_runtime": 4.4339,
"eval_samples_per_second": 65.63,
"eval_steps_per_second": 4.285,
"step": 65817
},
{
"epoch": 639.81,
"learning_rate": 7.203883495145632e-06,
"loss": 0.0865,
"step": 65900
},
{
"epoch": 640.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.30657958984375,
"eval_runtime": 4.4192,
"eval_samples_per_second": 65.849,
"eval_steps_per_second": 4.299,
"step": 65920
},
{
"epoch": 640.78,
"learning_rate": 7.184466019417476e-06,
"loss": 0.09,
"step": 66000
},
{
"epoch": 641.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.0984883308410645,
"eval_runtime": 4.401,
"eval_samples_per_second": 66.121,
"eval_steps_per_second": 4.317,
"step": 66023
},
{
"epoch": 641.75,
"learning_rate": 7.165048543689321e-06,
"loss": 0.0867,
"step": 66100
},
{
"epoch": 642.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.173170566558838,
"eval_runtime": 4.467,
"eval_samples_per_second": 65.145,
"eval_steps_per_second": 4.253,
"step": 66126
},
{
"epoch": 642.72,
"learning_rate": 7.1456310679611655e-06,
"loss": 0.084,
"step": 66200
},
{
"epoch": 643.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.233015537261963,
"eval_runtime": 4.3939,
"eval_samples_per_second": 66.229,
"eval_steps_per_second": 4.324,
"step": 66229
},
{
"epoch": 643.69,
"learning_rate": 7.12621359223301e-06,
"loss": 0.0806,
"step": 66300
},
{
"epoch": 644.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.209733486175537,
"eval_runtime": 4.4601,
"eval_samples_per_second": 65.245,
"eval_steps_per_second": 4.26,
"step": 66332
},
{
"epoch": 644.66,
"learning_rate": 7.106796116504855e-06,
"loss": 0.0821,
"step": 66400
},
{
"epoch": 645.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.3271918296813965,
"eval_runtime": 4.4348,
"eval_samples_per_second": 65.617,
"eval_steps_per_second": 4.284,
"step": 66435
},
{
"epoch": 645.63,
"learning_rate": 7.0873786407767e-06,
"loss": 0.0869,
"step": 66500
},
{
"epoch": 646.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.393039703369141,
"eval_runtime": 4.4132,
"eval_samples_per_second": 65.938,
"eval_steps_per_second": 4.305,
"step": 66538
},
{
"epoch": 646.6,
"learning_rate": 7.067961165048545e-06,
"loss": 0.0777,
"step": 66600
},
{
"epoch": 647.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.334554672241211,
"eval_runtime": 4.3932,
"eval_samples_per_second": 66.239,
"eval_steps_per_second": 4.325,
"step": 66641
},
{
"epoch": 647.57,
"learning_rate": 7.0485436893203884e-06,
"loss": 0.0822,
"step": 66700
},
{
"epoch": 648.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.216523170471191,
"eval_runtime": 4.4213,
"eval_samples_per_second": 65.817,
"eval_steps_per_second": 4.297,
"step": 66744
},
{
"epoch": 648.54,
"learning_rate": 7.029126213592233e-06,
"loss": 0.0967,
"step": 66800
},
{
"epoch": 649.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.228401184082031,
"eval_runtime": 4.43,
"eval_samples_per_second": 65.689,
"eval_steps_per_second": 4.289,
"step": 66847
},
{
"epoch": 649.51,
"learning_rate": 7.0097087378640785e-06,
"loss": 0.0792,
"step": 66900
},
{
"epoch": 650.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.392093181610107,
"eval_runtime": 4.4149,
"eval_samples_per_second": 65.913,
"eval_steps_per_second": 4.304,
"step": 66950
},
{
"epoch": 650.49,
"learning_rate": 6.990291262135923e-06,
"loss": 0.0849,
"step": 67000
},
{
"epoch": 651.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.52961540222168,
"eval_runtime": 4.4619,
"eval_samples_per_second": 65.219,
"eval_steps_per_second": 4.258,
"step": 67053
},
{
"epoch": 651.46,
"learning_rate": 6.970873786407768e-06,
"loss": 0.0854,
"step": 67100
},
{
"epoch": 652.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.479518890380859,
"eval_runtime": 4.4569,
"eval_samples_per_second": 65.291,
"eval_steps_per_second": 4.263,
"step": 67156
},
{
"epoch": 652.43,
"learning_rate": 6.951456310679612e-06,
"loss": 0.0796,
"step": 67200
},
{
"epoch": 653.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.333386421203613,
"eval_runtime": 4.4265,
"eval_samples_per_second": 65.74,
"eval_steps_per_second": 4.292,
"step": 67259
},
{
"epoch": 653.4,
"learning_rate": 6.932038834951457e-06,
"loss": 0.093,
"step": 67300
},
{
"epoch": 654.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.3139777183532715,
"eval_runtime": 4.421,
"eval_samples_per_second": 65.822,
"eval_steps_per_second": 4.298,
"step": 67362
},
{
"epoch": 654.37,
"learning_rate": 6.912621359223301e-06,
"loss": 0.076,
"step": 67400
},
{
"epoch": 655.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.306375980377197,
"eval_runtime": 4.5012,
"eval_samples_per_second": 64.65,
"eval_steps_per_second": 4.221,
"step": 67465
},
{
"epoch": 655.34,
"learning_rate": 6.893203883495147e-06,
"loss": 0.086,
"step": 67500
},
{
"epoch": 656.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.385765075683594,
"eval_runtime": 4.452,
"eval_samples_per_second": 65.363,
"eval_steps_per_second": 4.268,
"step": 67568
},
{
"epoch": 656.31,
"learning_rate": 6.873786407766991e-06,
"loss": 0.0856,
"step": 67600
},
{
"epoch": 657.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.320601940155029,
"eval_runtime": 4.4353,
"eval_samples_per_second": 65.61,
"eval_steps_per_second": 4.284,
"step": 67671
},
{
"epoch": 657.28,
"learning_rate": 6.854368932038835e-06,
"loss": 0.0826,
"step": 67700
},
{
"epoch": 658.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.273061275482178,
"eval_runtime": 4.4023,
"eval_samples_per_second": 66.102,
"eval_steps_per_second": 4.316,
"step": 67774
},
{
"epoch": 658.25,
"learning_rate": 6.83495145631068e-06,
"loss": 0.0972,
"step": 67800
},
{
"epoch": 659.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.310391902923584,
"eval_runtime": 4.4173,
"eval_samples_per_second": 65.877,
"eval_steps_per_second": 4.301,
"step": 67877
},
{
"epoch": 659.22,
"learning_rate": 6.815533980582525e-06,
"loss": 0.0828,
"step": 67900
},
{
"epoch": 660.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.32990026473999,
"eval_runtime": 4.4134,
"eval_samples_per_second": 65.935,
"eval_steps_per_second": 4.305,
"step": 67980
},
{
"epoch": 660.19,
"learning_rate": 6.79611650485437e-06,
"loss": 0.0792,
"step": 68000
},
{
"epoch": 661.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.461109161376953,
"eval_runtime": 4.4061,
"eval_samples_per_second": 66.045,
"eval_steps_per_second": 4.312,
"step": 68083
},
{
"epoch": 661.17,
"learning_rate": 6.776699029126214e-06,
"loss": 0.0839,
"step": 68100
},
{
"epoch": 662.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.407573699951172,
"eval_runtime": 4.4041,
"eval_samples_per_second": 66.075,
"eval_steps_per_second": 4.314,
"step": 68186
},
{
"epoch": 662.14,
"learning_rate": 6.757281553398059e-06,
"loss": 0.0816,
"step": 68200
},
{
"epoch": 663.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.33349609375,
"eval_runtime": 4.4,
"eval_samples_per_second": 66.136,
"eval_steps_per_second": 4.318,
"step": 68289
},
{
"epoch": 663.11,
"learning_rate": 6.7378640776699035e-06,
"loss": 0.0786,
"step": 68300
},
{
"epoch": 664.0,
"eval_accuracy": 0.25773195876288657,
"eval_loss": 5.388492107391357,
"eval_runtime": 4.4145,
"eval_samples_per_second": 65.919,
"eval_steps_per_second": 4.304,
"step": 68392
},
{
"epoch": 664.08,
"learning_rate": 6.718446601941748e-06,
"loss": 0.0958,
"step": 68400
},
{
"epoch": 665.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 5.482216835021973,
"eval_runtime": 4.4201,
"eval_samples_per_second": 65.835,
"eval_steps_per_second": 4.299,
"step": 68495
},
{
"epoch": 665.05,
"learning_rate": 6.6990291262135935e-06,
"loss": 0.0872,
"step": 68500
},
{
"epoch": 666.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.474820137023926,
"eval_runtime": 4.4131,
"eval_samples_per_second": 65.94,
"eval_steps_per_second": 4.305,
"step": 68598
},
{
"epoch": 666.02,
"learning_rate": 6.679611650485437e-06,
"loss": 0.0861,
"step": 68600
},
{
"epoch": 666.99,
"learning_rate": 6.660194174757282e-06,
"loss": 0.0823,
"step": 68700
},
{
"epoch": 667.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.3411993980407715,
"eval_runtime": 4.4496,
"eval_samples_per_second": 65.399,
"eval_steps_per_second": 4.27,
"step": 68701
},
{
"epoch": 667.96,
"learning_rate": 6.640776699029126e-06,
"loss": 0.0845,
"step": 68800
},
{
"epoch": 668.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.271579265594482,
"eval_runtime": 4.496,
"eval_samples_per_second": 64.724,
"eval_steps_per_second": 4.226,
"step": 68804
},
{
"epoch": 668.93,
"learning_rate": 6.621359223300972e-06,
"loss": 0.0882,
"step": 68900
},
{
"epoch": 669.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.405780792236328,
"eval_runtime": 4.3969,
"eval_samples_per_second": 66.183,
"eval_steps_per_second": 4.321,
"step": 68907
},
{
"epoch": 669.9,
"learning_rate": 6.601941747572816e-06,
"loss": 0.0794,
"step": 69000
},
{
"epoch": 670.0,
"eval_accuracy": 0.2542955326460481,
"eval_loss": 5.5217204093933105,
"eval_runtime": 4.4131,
"eval_samples_per_second": 65.94,
"eval_steps_per_second": 4.305,
"step": 69010
},
{
"epoch": 670.87,
"learning_rate": 6.58252427184466e-06,
"loss": 0.0876,
"step": 69100
},
{
"epoch": 671.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.354759693145752,
"eval_runtime": 4.4085,
"eval_samples_per_second": 66.009,
"eval_steps_per_second": 4.31,
"step": 69113
},
{
"epoch": 671.84,
"learning_rate": 6.5631067961165056e-06,
"loss": 0.0754,
"step": 69200
},
{
"epoch": 672.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.359265327453613,
"eval_runtime": 4.4009,
"eval_samples_per_second": 66.122,
"eval_steps_per_second": 4.317,
"step": 69216
},
{
"epoch": 672.82,
"learning_rate": 6.54368932038835e-06,
"loss": 0.0842,
"step": 69300
},
{
"epoch": 673.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.426083087921143,
"eval_runtime": 4.3984,
"eval_samples_per_second": 66.16,
"eval_steps_per_second": 4.32,
"step": 69319
},
{
"epoch": 673.79,
"learning_rate": 6.524271844660195e-06,
"loss": 0.0832,
"step": 69400
},
{
"epoch": 674.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.360762596130371,
"eval_runtime": 4.3989,
"eval_samples_per_second": 66.153,
"eval_steps_per_second": 4.319,
"step": 69422
},
{
"epoch": 674.76,
"learning_rate": 6.50485436893204e-06,
"loss": 0.0874,
"step": 69500
},
{
"epoch": 675.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.422213554382324,
"eval_runtime": 4.405,
"eval_samples_per_second": 66.062,
"eval_steps_per_second": 4.313,
"step": 69525
},
{
"epoch": 675.73,
"learning_rate": 6.485436893203884e-06,
"loss": 0.0822,
"step": 69600
},
{
"epoch": 676.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.25921630859375,
"eval_runtime": 4.4206,
"eval_samples_per_second": 65.828,
"eval_steps_per_second": 4.298,
"step": 69628
},
{
"epoch": 676.7,
"learning_rate": 6.4660194174757285e-06,
"loss": 0.0852,
"step": 69700
},
{
"epoch": 677.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.290493011474609,
"eval_runtime": 4.4465,
"eval_samples_per_second": 65.445,
"eval_steps_per_second": 4.273,
"step": 69731
},
{
"epoch": 677.67,
"learning_rate": 6.446601941747573e-06,
"loss": 0.0819,
"step": 69800
},
{
"epoch": 678.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.2874016761779785,
"eval_runtime": 4.4284,
"eval_samples_per_second": 65.712,
"eval_steps_per_second": 4.29,
"step": 69834
},
{
"epoch": 678.64,
"learning_rate": 6.4271844660194185e-06,
"loss": 0.0842,
"step": 69900
},
{
"epoch": 679.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.514050483703613,
"eval_runtime": 4.4431,
"eval_samples_per_second": 65.495,
"eval_steps_per_second": 4.276,
"step": 69937
},
{
"epoch": 679.61,
"learning_rate": 6.407766990291263e-06,
"loss": 0.0871,
"step": 70000
},
{
"epoch": 680.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.368432521820068,
"eval_runtime": 4.4109,
"eval_samples_per_second": 65.973,
"eval_steps_per_second": 4.307,
"step": 70040
},
{
"epoch": 680.58,
"learning_rate": 6.388349514563107e-06,
"loss": 0.0756,
"step": 70100
},
{
"epoch": 681.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.452810287475586,
"eval_runtime": 4.4231,
"eval_samples_per_second": 65.792,
"eval_steps_per_second": 4.296,
"step": 70143
},
{
"epoch": 681.55,
"learning_rate": 6.368932038834952e-06,
"loss": 0.0844,
"step": 70200
},
{
"epoch": 682.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.371203422546387,
"eval_runtime": 4.4399,
"eval_samples_per_second": 65.542,
"eval_steps_per_second": 4.279,
"step": 70246
},
{
"epoch": 682.52,
"learning_rate": 6.349514563106797e-06,
"loss": 0.0774,
"step": 70300
},
{
"epoch": 683.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.362085819244385,
"eval_runtime": 4.4613,
"eval_samples_per_second": 65.228,
"eval_steps_per_second": 4.259,
"step": 70349
},
{
"epoch": 683.5,
"learning_rate": 6.330097087378641e-06,
"loss": 0.0914,
"step": 70400
},
{
"epoch": 684.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.372140884399414,
"eval_runtime": 4.3903,
"eval_samples_per_second": 66.283,
"eval_steps_per_second": 4.328,
"step": 70452
},
{
"epoch": 684.47,
"learning_rate": 6.310679611650487e-06,
"loss": 0.0883,
"step": 70500
},
{
"epoch": 685.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.2809271812438965,
"eval_runtime": 4.4334,
"eval_samples_per_second": 65.638,
"eval_steps_per_second": 4.286,
"step": 70555
},
{
"epoch": 685.44,
"learning_rate": 6.2912621359223306e-06,
"loss": 0.0812,
"step": 70600
},
{
"epoch": 686.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.343222618103027,
"eval_runtime": 4.3898,
"eval_samples_per_second": 66.29,
"eval_steps_per_second": 4.328,
"step": 70658
},
{
"epoch": 686.41,
"learning_rate": 6.271844660194175e-06,
"loss": 0.0838,
"step": 70700
},
{
"epoch": 687.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.31311559677124,
"eval_runtime": 4.3958,
"eval_samples_per_second": 66.199,
"eval_steps_per_second": 4.322,
"step": 70761
},
{
"epoch": 687.38,
"learning_rate": 6.25242718446602e-06,
"loss": 0.081,
"step": 70800
},
{
"epoch": 688.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.305084228515625,
"eval_runtime": 4.4287,
"eval_samples_per_second": 65.708,
"eval_steps_per_second": 4.29,
"step": 70864
},
{
"epoch": 688.35,
"learning_rate": 6.233009708737865e-06,
"loss": 0.0785,
"step": 70900
},
{
"epoch": 689.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.239564418792725,
"eval_runtime": 4.452,
"eval_samples_per_second": 65.364,
"eval_steps_per_second": 4.268,
"step": 70967
},
{
"epoch": 689.32,
"learning_rate": 6.213592233009709e-06,
"loss": 0.0842,
"step": 71000
},
{
"epoch": 690.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.2474684715271,
"eval_runtime": 4.4012,
"eval_samples_per_second": 66.119,
"eval_steps_per_second": 4.317,
"step": 71070
},
{
"epoch": 690.29,
"learning_rate": 6.1941747572815535e-06,
"loss": 0.0956,
"step": 71100
},
{
"epoch": 691.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.349338531494141,
"eval_runtime": 4.4106,
"eval_samples_per_second": 65.978,
"eval_steps_per_second": 4.308,
"step": 71173
},
{
"epoch": 691.26,
"learning_rate": 6.174757281553399e-06,
"loss": 0.0823,
"step": 71200
},
{
"epoch": 692.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.211832046508789,
"eval_runtime": 4.489,
"eval_samples_per_second": 64.825,
"eval_steps_per_second": 4.233,
"step": 71276
},
{
"epoch": 692.23,
"learning_rate": 6.1553398058252435e-06,
"loss": 0.0841,
"step": 71300
},
{
"epoch": 693.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.162426471710205,
"eval_runtime": 4.505,
"eval_samples_per_second": 64.596,
"eval_steps_per_second": 4.218,
"step": 71379
},
{
"epoch": 693.2,
"learning_rate": 6.135922330097088e-06,
"loss": 0.078,
"step": 71400
},
{
"epoch": 694.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.222851753234863,
"eval_runtime": 4.393,
"eval_samples_per_second": 66.242,
"eval_steps_per_second": 4.325,
"step": 71482
},
{
"epoch": 694.17,
"learning_rate": 6.116504854368932e-06,
"loss": 0.0831,
"step": 71500
},
{
"epoch": 695.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.266942024230957,
"eval_runtime": 4.437,
"eval_samples_per_second": 65.585,
"eval_steps_per_second": 4.282,
"step": 71585
},
{
"epoch": 695.15,
"learning_rate": 6.097087378640777e-06,
"loss": 0.0863,
"step": 71600
},
{
"epoch": 696.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.276256561279297,
"eval_runtime": 4.4054,
"eval_samples_per_second": 66.055,
"eval_steps_per_second": 4.313,
"step": 71688
},
{
"epoch": 696.12,
"learning_rate": 6.077669902912622e-06,
"loss": 0.0957,
"step": 71700
},
{
"epoch": 697.0,
"eval_accuracy": 0.3333333333333333,
"eval_loss": 5.301415920257568,
"eval_runtime": 4.4203,
"eval_samples_per_second": 65.833,
"eval_steps_per_second": 4.298,
"step": 71791
},
{
"epoch": 697.09,
"learning_rate": 6.058252427184466e-06,
"loss": 0.0775,
"step": 71800
},
{
"epoch": 698.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.38198709487915,
"eval_runtime": 4.4349,
"eval_samples_per_second": 65.616,
"eval_steps_per_second": 4.284,
"step": 71894
},
{
"epoch": 698.06,
"learning_rate": 6.038834951456312e-06,
"loss": 0.0907,
"step": 71900
},
{
"epoch": 699.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.4358649253845215,
"eval_runtime": 4.4417,
"eval_samples_per_second": 65.515,
"eval_steps_per_second": 4.278,
"step": 71997
},
{
"epoch": 699.03,
"learning_rate": 6.0194174757281556e-06,
"loss": 0.0887,
"step": 72000
},
{
"epoch": 700.0,
"learning_rate": 6e-06,
"loss": 0.0802,
"step": 72100
},
{
"epoch": 700.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.401218414306641,
"eval_runtime": 4.4053,
"eval_samples_per_second": 66.056,
"eval_steps_per_second": 4.313,
"step": 72100
},
{
"epoch": 700.97,
"learning_rate": 5.980582524271845e-06,
"loss": 0.0799,
"step": 72200
},
{
"epoch": 701.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.378960132598877,
"eval_runtime": 4.4059,
"eval_samples_per_second": 66.047,
"eval_steps_per_second": 4.312,
"step": 72203
},
{
"epoch": 701.94,
"learning_rate": 5.96116504854369e-06,
"loss": 0.0822,
"step": 72300
},
{
"epoch": 702.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.3592753410339355,
"eval_runtime": 4.4023,
"eval_samples_per_second": 66.101,
"eval_steps_per_second": 4.316,
"step": 72306
},
{
"epoch": 702.91,
"learning_rate": 5.941747572815535e-06,
"loss": 0.0841,
"step": 72400
},
{
"epoch": 703.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.317993640899658,
"eval_runtime": 4.3917,
"eval_samples_per_second": 66.261,
"eval_steps_per_second": 4.326,
"step": 72409
},
{
"epoch": 703.88,
"learning_rate": 5.9223300970873785e-06,
"loss": 0.0883,
"step": 72500
},
{
"epoch": 704.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.275454998016357,
"eval_runtime": 4.46,
"eval_samples_per_second": 65.247,
"eval_steps_per_second": 4.26,
"step": 72512
},
{
"epoch": 704.85,
"learning_rate": 5.902912621359224e-06,
"loss": 0.0863,
"step": 72600
},
{
"epoch": 705.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.243884086608887,
"eval_runtime": 4.3982,
"eval_samples_per_second": 66.163,
"eval_steps_per_second": 4.32,
"step": 72615
},
{
"epoch": 705.83,
"learning_rate": 5.8834951456310685e-06,
"loss": 0.0776,
"step": 72700
},
{
"epoch": 706.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.292761325836182,
"eval_runtime": 4.4045,
"eval_samples_per_second": 66.069,
"eval_steps_per_second": 4.314,
"step": 72718
},
{
"epoch": 706.8,
"learning_rate": 5.864077669902913e-06,
"loss": 0.0854,
"step": 72800
},
{
"epoch": 707.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.34207820892334,
"eval_runtime": 4.3984,
"eval_samples_per_second": 66.16,
"eval_steps_per_second": 4.32,
"step": 72821
},
{
"epoch": 707.77,
"learning_rate": 5.8446601941747585e-06,
"loss": 0.0853,
"step": 72900
},
{
"epoch": 708.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.336627006530762,
"eval_runtime": 4.4249,
"eval_samples_per_second": 65.765,
"eval_steps_per_second": 4.294,
"step": 72924
},
{
"epoch": 708.74,
"learning_rate": 5.825242718446602e-06,
"loss": 0.0864,
"step": 73000
},
{
"epoch": 709.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.3050031661987305,
"eval_runtime": 4.4004,
"eval_samples_per_second": 66.131,
"eval_steps_per_second": 4.318,
"step": 73027
},
{
"epoch": 709.71,
"learning_rate": 5.805825242718447e-06,
"loss": 0.0802,
"step": 73100
},
{
"epoch": 710.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.309476375579834,
"eval_runtime": 4.4093,
"eval_samples_per_second": 65.997,
"eval_steps_per_second": 4.309,
"step": 73130
},
{
"epoch": 710.68,
"learning_rate": 5.786407766990291e-06,
"loss": 0.0868,
"step": 73200
},
{
"epoch": 711.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.308775424957275,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.981,
"eval_steps_per_second": 4.308,
"step": 73233
},
{
"epoch": 711.65,
"learning_rate": 5.766990291262137e-06,
"loss": 0.0817,
"step": 73300
},
{
"epoch": 712.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.284626007080078,
"eval_runtime": 4.4228,
"eval_samples_per_second": 65.795,
"eval_steps_per_second": 4.296,
"step": 73336
},
{
"epoch": 712.62,
"learning_rate": 5.747572815533981e-06,
"loss": 0.0848,
"step": 73400
},
{
"epoch": 713.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 5.321852207183838,
"eval_runtime": 4.4087,
"eval_samples_per_second": 66.006,
"eval_steps_per_second": 4.31,
"step": 73439
},
{
"epoch": 713.59,
"learning_rate": 5.728155339805825e-06,
"loss": 0.0891,
"step": 73500
},
{
"epoch": 714.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.370724678039551,
"eval_runtime": 4.3913,
"eval_samples_per_second": 66.267,
"eval_steps_per_second": 4.327,
"step": 73542
},
{
"epoch": 714.56,
"learning_rate": 5.708737864077671e-06,
"loss": 0.0829,
"step": 73600
},
{
"epoch": 715.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.340518474578857,
"eval_runtime": 4.4354,
"eval_samples_per_second": 65.608,
"eval_steps_per_second": 4.284,
"step": 73645
},
{
"epoch": 715.53,
"learning_rate": 5.689320388349515e-06,
"loss": 0.0882,
"step": 73700
},
{
"epoch": 716.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.187460422515869,
"eval_runtime": 4.399,
"eval_samples_per_second": 66.152,
"eval_steps_per_second": 4.319,
"step": 73748
},
{
"epoch": 716.5,
"learning_rate": 5.66990291262136e-06,
"loss": 0.0944,
"step": 73800
},
{
"epoch": 717.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.266665935516357,
"eval_runtime": 4.4415,
"eval_samples_per_second": 65.519,
"eval_steps_per_second": 4.278,
"step": 73851
},
{
"epoch": 717.48,
"learning_rate": 5.6504854368932035e-06,
"loss": 0.0713,
"step": 73900
},
{
"epoch": 718.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.291965961456299,
"eval_runtime": 4.3974,
"eval_samples_per_second": 66.176,
"eval_steps_per_second": 4.321,
"step": 73954
},
{
"epoch": 718.45,
"learning_rate": 5.631067961165049e-06,
"loss": 0.0855,
"step": 74000
},
{
"epoch": 719.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.172239303588867,
"eval_runtime": 4.4587,
"eval_samples_per_second": 65.266,
"eval_steps_per_second": 4.261,
"step": 74057
},
{
"epoch": 719.42,
"learning_rate": 5.6116504854368935e-06,
"loss": 0.0812,
"step": 74100
},
{
"epoch": 720.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.137197017669678,
"eval_runtime": 4.4382,
"eval_samples_per_second": 65.567,
"eval_steps_per_second": 4.281,
"step": 74160
},
{
"epoch": 720.39,
"learning_rate": 5.592233009708738e-06,
"loss": 0.0731,
"step": 74200
},
{
"epoch": 721.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.101325511932373,
"eval_runtime": 4.4604,
"eval_samples_per_second": 65.241,
"eval_steps_per_second": 4.26,
"step": 74263
},
{
"epoch": 721.36,
"learning_rate": 5.5728155339805835e-06,
"loss": 0.0845,
"step": 74300
},
{
"epoch": 722.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.105453968048096,
"eval_runtime": 4.4086,
"eval_samples_per_second": 66.007,
"eval_steps_per_second": 4.31,
"step": 74366
},
{
"epoch": 722.33,
"learning_rate": 5.553398058252427e-06,
"loss": 0.0857,
"step": 74400
},
{
"epoch": 723.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.216444969177246,
"eval_runtime": 4.4028,
"eval_samples_per_second": 66.094,
"eval_steps_per_second": 4.315,
"step": 74469
},
{
"epoch": 723.3,
"learning_rate": 5.533980582524272e-06,
"loss": 0.0843,
"step": 74500
},
{
"epoch": 724.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.302288055419922,
"eval_runtime": 4.4618,
"eval_samples_per_second": 65.22,
"eval_steps_per_second": 4.258,
"step": 74572
},
{
"epoch": 724.27,
"learning_rate": 5.514563106796117e-06,
"loss": 0.084,
"step": 74600
},
{
"epoch": 725.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.123310089111328,
"eval_runtime": 4.4177,
"eval_samples_per_second": 65.871,
"eval_steps_per_second": 4.301,
"step": 74675
},
{
"epoch": 725.24,
"learning_rate": 5.495145631067962e-06,
"loss": 0.0846,
"step": 74700
},
{
"epoch": 726.0,
"eval_accuracy": 0.26804123711340205,
"eval_loss": 5.316282272338867,
"eval_runtime": 4.435,
"eval_samples_per_second": 65.615,
"eval_steps_per_second": 4.284,
"step": 74778
},
{
"epoch": 726.21,
"learning_rate": 5.4757281553398064e-06,
"loss": 0.0838,
"step": 74800
},
{
"epoch": 727.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.224409580230713,
"eval_runtime": 4.4056,
"eval_samples_per_second": 66.052,
"eval_steps_per_second": 4.313,
"step": 74881
},
{
"epoch": 727.18,
"learning_rate": 5.45631067961165e-06,
"loss": 0.0815,
"step": 74900
},
{
"epoch": 728.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.161591529846191,
"eval_runtime": 4.4105,
"eval_samples_per_second": 65.979,
"eval_steps_per_second": 4.308,
"step": 74984
},
{
"epoch": 728.16,
"learning_rate": 5.436893203883496e-06,
"loss": 0.0849,
"step": 75000
},
{
"epoch": 729.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.15138578414917,
"eval_runtime": 4.412,
"eval_samples_per_second": 65.956,
"eval_steps_per_second": 4.306,
"step": 75087
},
{
"epoch": 729.13,
"learning_rate": 5.41747572815534e-06,
"loss": 0.0818,
"step": 75100
},
{
"epoch": 730.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.14281702041626,
"eval_runtime": 4.4541,
"eval_samples_per_second": 65.333,
"eval_steps_per_second": 4.266,
"step": 75190
},
{
"epoch": 730.1,
"learning_rate": 5.398058252427185e-06,
"loss": 0.0751,
"step": 75200
},
{
"epoch": 731.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.182039737701416,
"eval_runtime": 4.3971,
"eval_samples_per_second": 66.18,
"eval_steps_per_second": 4.321,
"step": 75293
},
{
"epoch": 731.07,
"learning_rate": 5.37864077669903e-06,
"loss": 0.0766,
"step": 75300
},
{
"epoch": 732.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.232609748840332,
"eval_runtime": 4.4105,
"eval_samples_per_second": 65.978,
"eval_steps_per_second": 4.308,
"step": 75396
},
{
"epoch": 732.04,
"learning_rate": 5.359223300970874e-06,
"loss": 0.0772,
"step": 75400
},
{
"epoch": 733.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.208255290985107,
"eval_runtime": 4.4325,
"eval_samples_per_second": 65.652,
"eval_steps_per_second": 4.287,
"step": 75499
},
{
"epoch": 733.01,
"learning_rate": 5.3398058252427185e-06,
"loss": 0.0871,
"step": 75500
},
{
"epoch": 733.98,
"learning_rate": 5.320388349514564e-06,
"loss": 0.0846,
"step": 75600
},
{
"epoch": 734.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.325695037841797,
"eval_runtime": 4.4062,
"eval_samples_per_second": 66.044,
"eval_steps_per_second": 4.312,
"step": 75602
},
{
"epoch": 734.95,
"learning_rate": 5.3009708737864085e-06,
"loss": 0.0811,
"step": 75700
},
{
"epoch": 735.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.345978736877441,
"eval_runtime": 4.451,
"eval_samples_per_second": 65.378,
"eval_steps_per_second": 4.269,
"step": 75705
},
{
"epoch": 735.92,
"learning_rate": 5.281553398058253e-06,
"loss": 0.089,
"step": 75800
},
{
"epoch": 736.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.300384521484375,
"eval_runtime": 4.4009,
"eval_samples_per_second": 66.124,
"eval_steps_per_second": 4.317,
"step": 75808
},
{
"epoch": 736.89,
"learning_rate": 5.262135922330097e-06,
"loss": 0.0711,
"step": 75900
},
{
"epoch": 737.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.24236536026001,
"eval_runtime": 4.4122,
"eval_samples_per_second": 65.954,
"eval_steps_per_second": 4.306,
"step": 75911
},
{
"epoch": 737.86,
"learning_rate": 5.242718446601942e-06,
"loss": 0.0852,
"step": 76000
},
{
"epoch": 738.0,
"eval_accuracy": 0.2611683848797251,
"eval_loss": 5.3143439292907715,
"eval_runtime": 4.3939,
"eval_samples_per_second": 66.229,
"eval_steps_per_second": 4.324,
"step": 76014
},
{
"epoch": 738.83,
"learning_rate": 5.223300970873787e-06,
"loss": 0.0798,
"step": 76100
},
{
"epoch": 739.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.32684326171875,
"eval_runtime": 4.4153,
"eval_samples_per_second": 65.907,
"eval_steps_per_second": 4.303,
"step": 76117
},
{
"epoch": 739.81,
"learning_rate": 5.2038834951456314e-06,
"loss": 0.0783,
"step": 76200
},
{
"epoch": 740.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.26964807510376,
"eval_runtime": 4.4134,
"eval_samples_per_second": 65.935,
"eval_steps_per_second": 4.305,
"step": 76220
},
{
"epoch": 740.78,
"learning_rate": 5.184466019417476e-06,
"loss": 0.086,
"step": 76300
},
{
"epoch": 741.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.274394989013672,
"eval_runtime": 4.3931,
"eval_samples_per_second": 66.24,
"eval_steps_per_second": 4.325,
"step": 76323
},
{
"epoch": 741.75,
"learning_rate": 5.165048543689321e-06,
"loss": 0.0778,
"step": 76400
},
{
"epoch": 742.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.327398300170898,
"eval_runtime": 4.3897,
"eval_samples_per_second": 66.291,
"eval_steps_per_second": 4.328,
"step": 76426
},
{
"epoch": 742.72,
"learning_rate": 5.145631067961165e-06,
"loss": 0.0832,
"step": 76500
},
{
"epoch": 743.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.329669952392578,
"eval_runtime": 4.4137,
"eval_samples_per_second": 65.931,
"eval_steps_per_second": 4.305,
"step": 76529
},
{
"epoch": 743.69,
"learning_rate": 5.126213592233011e-06,
"loss": 0.0826,
"step": 76600
},
{
"epoch": 744.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.285783767700195,
"eval_runtime": 4.4299,
"eval_samples_per_second": 65.689,
"eval_steps_per_second": 4.289,
"step": 76632
},
{
"epoch": 744.66,
"learning_rate": 5.106796116504855e-06,
"loss": 0.0792,
"step": 76700
},
{
"epoch": 745.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.336843967437744,
"eval_runtime": 4.5001,
"eval_samples_per_second": 64.665,
"eval_steps_per_second": 4.222,
"step": 76735
},
{
"epoch": 745.63,
"learning_rate": 5.087378640776699e-06,
"loss": 0.0787,
"step": 76800
},
{
"epoch": 746.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.3573713302612305,
"eval_runtime": 4.3936,
"eval_samples_per_second": 66.232,
"eval_steps_per_second": 4.324,
"step": 76838
},
{
"epoch": 746.6,
"learning_rate": 5.0679611650485435e-06,
"loss": 0.0732,
"step": 76900
},
{
"epoch": 747.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.346883773803711,
"eval_runtime": 4.4248,
"eval_samples_per_second": 65.766,
"eval_steps_per_second": 4.294,
"step": 76941
},
{
"epoch": 747.57,
"learning_rate": 5.048543689320389e-06,
"loss": 0.0857,
"step": 77000
},
{
"epoch": 748.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.297471523284912,
"eval_runtime": 4.3992,
"eval_samples_per_second": 66.148,
"eval_steps_per_second": 4.319,
"step": 77044
},
{
"epoch": 748.54,
"learning_rate": 5.0291262135922335e-06,
"loss": 0.07,
"step": 77100
},
{
"epoch": 749.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.337193489074707,
"eval_runtime": 4.4061,
"eval_samples_per_second": 66.045,
"eval_steps_per_second": 4.312,
"step": 77147
},
{
"epoch": 749.51,
"learning_rate": 5.009708737864078e-06,
"loss": 0.0829,
"step": 77200
},
{
"epoch": 750.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.252513408660889,
"eval_runtime": 4.4464,
"eval_samples_per_second": 65.447,
"eval_steps_per_second": 4.273,
"step": 77250
},
{
"epoch": 750.49,
"learning_rate": 4.990291262135923e-06,
"loss": 0.0794,
"step": 77300
},
{
"epoch": 751.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.331362247467041,
"eval_runtime": 4.4255,
"eval_samples_per_second": 65.756,
"eval_steps_per_second": 4.293,
"step": 77353
},
{
"epoch": 751.46,
"learning_rate": 4.970873786407767e-06,
"loss": 0.0781,
"step": 77400
},
{
"epoch": 752.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.331817626953125,
"eval_runtime": 4.4236,
"eval_samples_per_second": 65.783,
"eval_steps_per_second": 4.295,
"step": 77456
},
{
"epoch": 752.43,
"learning_rate": 4.951456310679612e-06,
"loss": 0.0914,
"step": 77500
},
{
"epoch": 753.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.265148162841797,
"eval_runtime": 4.423,
"eval_samples_per_second": 65.792,
"eval_steps_per_second": 4.296,
"step": 77559
},
{
"epoch": 753.4,
"learning_rate": 4.932038834951457e-06,
"loss": 0.0822,
"step": 77600
},
{
"epoch": 754.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.355736255645752,
"eval_runtime": 4.4337,
"eval_samples_per_second": 65.633,
"eval_steps_per_second": 4.285,
"step": 77662
},
{
"epoch": 754.37,
"learning_rate": 4.912621359223301e-06,
"loss": 0.0782,
"step": 77700
},
{
"epoch": 755.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.41204309463501,
"eval_runtime": 4.4469,
"eval_samples_per_second": 65.439,
"eval_steps_per_second": 4.273,
"step": 77765
},
{
"epoch": 755.34,
"learning_rate": 4.8932038834951465e-06,
"loss": 0.0828,
"step": 77800
},
{
"epoch": 756.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.419083118438721,
"eval_runtime": 4.4195,
"eval_samples_per_second": 65.845,
"eval_steps_per_second": 4.299,
"step": 77868
},
{
"epoch": 756.31,
"learning_rate": 4.87378640776699e-06,
"loss": 0.0747,
"step": 77900
},
{
"epoch": 757.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.410015106201172,
"eval_runtime": 4.4331,
"eval_samples_per_second": 65.642,
"eval_steps_per_second": 4.286,
"step": 77971
},
{
"epoch": 757.28,
"learning_rate": 4.854368932038836e-06,
"loss": 0.0765,
"step": 78000
},
{
"epoch": 758.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.383244514465332,
"eval_runtime": 4.4023,
"eval_samples_per_second": 66.101,
"eval_steps_per_second": 4.316,
"step": 78074
},
{
"epoch": 758.25,
"learning_rate": 4.834951456310679e-06,
"loss": 0.077,
"step": 78100
},
{
"epoch": 759.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.3800554275512695,
"eval_runtime": 4.4424,
"eval_samples_per_second": 65.505,
"eval_steps_per_second": 4.277,
"step": 78177
},
{
"epoch": 759.22,
"learning_rate": 4.815533980582525e-06,
"loss": 0.0751,
"step": 78200
},
{
"epoch": 760.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.327369213104248,
"eval_runtime": 4.4113,
"eval_samples_per_second": 65.967,
"eval_steps_per_second": 4.307,
"step": 78280
},
{
"epoch": 760.19,
"learning_rate": 4.796116504854369e-06,
"loss": 0.0821,
"step": 78300
},
{
"epoch": 761.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.391132354736328,
"eval_runtime": 4.3952,
"eval_samples_per_second": 66.208,
"eval_steps_per_second": 4.323,
"step": 78383
},
{
"epoch": 761.17,
"learning_rate": 4.776699029126214e-06,
"loss": 0.0854,
"step": 78400
},
{
"epoch": 762.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.411304473876953,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 78486
},
{
"epoch": 762.14,
"learning_rate": 4.7572815533980585e-06,
"loss": 0.0765,
"step": 78500
},
{
"epoch": 763.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.364217758178711,
"eval_runtime": 4.5151,
"eval_samples_per_second": 64.45,
"eval_steps_per_second": 4.208,
"step": 78589
},
{
"epoch": 763.11,
"learning_rate": 4.737864077669903e-06,
"loss": 0.0787,
"step": 78600
},
{
"epoch": 764.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.354491710662842,
"eval_runtime": 4.518,
"eval_samples_per_second": 64.408,
"eval_steps_per_second": 4.205,
"step": 78692
},
{
"epoch": 764.08,
"learning_rate": 4.718446601941748e-06,
"loss": 0.0842,
"step": 78700
},
{
"epoch": 765.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.398560047149658,
"eval_runtime": 4.5154,
"eval_samples_per_second": 64.447,
"eval_steps_per_second": 4.208,
"step": 78795
},
{
"epoch": 765.05,
"learning_rate": 4.699029126213593e-06,
"loss": 0.0856,
"step": 78800
},
{
"epoch": 766.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.403796672821045,
"eval_runtime": 4.4093,
"eval_samples_per_second": 65.997,
"eval_steps_per_second": 4.309,
"step": 78898
},
{
"epoch": 766.02,
"learning_rate": 4.679611650485437e-06,
"loss": 0.0777,
"step": 78900
},
{
"epoch": 766.99,
"learning_rate": 4.660194174757282e-06,
"loss": 0.082,
"step": 79000
},
{
"epoch": 767.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.3815436363220215,
"eval_runtime": 4.4585,
"eval_samples_per_second": 65.269,
"eval_steps_per_second": 4.262,
"step": 79001
},
{
"epoch": 767.96,
"learning_rate": 4.640776699029126e-06,
"loss": 0.0787,
"step": 79100
},
{
"epoch": 768.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.409327507019043,
"eval_runtime": 4.4203,
"eval_samples_per_second": 65.832,
"eval_steps_per_second": 4.298,
"step": 79104
},
{
"epoch": 768.93,
"learning_rate": 4.6213592233009715e-06,
"loss": 0.0731,
"step": 79200
},
{
"epoch": 769.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.396090984344482,
"eval_runtime": 4.4163,
"eval_samples_per_second": 65.893,
"eval_steps_per_second": 4.302,
"step": 79207
},
{
"epoch": 769.9,
"learning_rate": 4.601941747572816e-06,
"loss": 0.0762,
"step": 79300
},
{
"epoch": 770.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.37462043762207,
"eval_runtime": 4.4241,
"eval_samples_per_second": 65.776,
"eval_steps_per_second": 4.295,
"step": 79310
},
{
"epoch": 770.87,
"learning_rate": 4.582524271844661e-06,
"loss": 0.0874,
"step": 79400
},
{
"epoch": 771.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.398296356201172,
"eval_runtime": 4.419,
"eval_samples_per_second": 65.853,
"eval_steps_per_second": 4.3,
"step": 79413
},
{
"epoch": 771.84,
"learning_rate": 4.563106796116505e-06,
"loss": 0.0835,
"step": 79500
},
{
"epoch": 772.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.426390171051025,
"eval_runtime": 4.3989,
"eval_samples_per_second": 66.153,
"eval_steps_per_second": 4.319,
"step": 79516
},
{
"epoch": 772.82,
"learning_rate": 4.54368932038835e-06,
"loss": 0.0841,
"step": 79600
},
{
"epoch": 773.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.42516565322876,
"eval_runtime": 4.415,
"eval_samples_per_second": 65.911,
"eval_steps_per_second": 4.303,
"step": 79619
},
{
"epoch": 773.79,
"learning_rate": 4.524271844660194e-06,
"loss": 0.0792,
"step": 79700
},
{
"epoch": 774.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.373010158538818,
"eval_runtime": 4.4077,
"eval_samples_per_second": 66.021,
"eval_steps_per_second": 4.311,
"step": 79722
},
{
"epoch": 774.76,
"learning_rate": 4.504854368932039e-06,
"loss": 0.0816,
"step": 79800
},
{
"epoch": 775.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.383403778076172,
"eval_runtime": 4.4067,
"eval_samples_per_second": 66.036,
"eval_steps_per_second": 4.312,
"step": 79825
},
{
"epoch": 775.73,
"learning_rate": 4.4854368932038836e-06,
"loss": 0.0928,
"step": 79900
},
{
"epoch": 776.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.469430923461914,
"eval_runtime": 4.3928,
"eval_samples_per_second": 66.244,
"eval_steps_per_second": 4.325,
"step": 79928
},
{
"epoch": 776.7,
"learning_rate": 4.466019417475729e-06,
"loss": 0.0739,
"step": 80000
},
{
"epoch": 777.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.380051612854004,
"eval_runtime": 4.4336,
"eval_samples_per_second": 65.635,
"eval_steps_per_second": 4.285,
"step": 80031
},
{
"epoch": 777.67,
"learning_rate": 4.446601941747573e-06,
"loss": 0.0778,
"step": 80100
},
{
"epoch": 778.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.382711887359619,
"eval_runtime": 4.3971,
"eval_samples_per_second": 66.18,
"eval_steps_per_second": 4.321,
"step": 80134
},
{
"epoch": 778.64,
"learning_rate": 4.427184466019418e-06,
"loss": 0.0826,
"step": 80200
},
{
"epoch": 779.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.497971534729004,
"eval_runtime": 4.3998,
"eval_samples_per_second": 66.139,
"eval_steps_per_second": 4.318,
"step": 80237
},
{
"epoch": 779.61,
"learning_rate": 4.407766990291263e-06,
"loss": 0.0873,
"step": 80300
},
{
"epoch": 780.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.38844108581543,
"eval_runtime": 4.4096,
"eval_samples_per_second": 65.993,
"eval_steps_per_second": 4.309,
"step": 80340
},
{
"epoch": 780.58,
"learning_rate": 4.388349514563107e-06,
"loss": 0.0762,
"step": 80400
},
{
"epoch": 781.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.383063316345215,
"eval_runtime": 4.3973,
"eval_samples_per_second": 66.176,
"eval_steps_per_second": 4.321,
"step": 80443
},
{
"epoch": 781.55,
"learning_rate": 4.368932038834952e-06,
"loss": 0.0802,
"step": 80500
},
{
"epoch": 782.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.444866180419922,
"eval_runtime": 4.3986,
"eval_samples_per_second": 66.158,
"eval_steps_per_second": 4.32,
"step": 80546
},
{
"epoch": 782.52,
"learning_rate": 4.3495145631067965e-06,
"loss": 0.0832,
"step": 80600
},
{
"epoch": 783.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.402950286865234,
"eval_runtime": 4.3971,
"eval_samples_per_second": 66.18,
"eval_steps_per_second": 4.321,
"step": 80649
},
{
"epoch": 783.5,
"learning_rate": 4.330097087378641e-06,
"loss": 0.0716,
"step": 80700
},
{
"epoch": 784.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.450810432434082,
"eval_runtime": 4.3963,
"eval_samples_per_second": 66.192,
"eval_steps_per_second": 4.322,
"step": 80752
},
{
"epoch": 784.47,
"learning_rate": 4.310679611650486e-06,
"loss": 0.0885,
"step": 80800
},
{
"epoch": 785.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.386898517608643,
"eval_runtime": 4.4618,
"eval_samples_per_second": 65.221,
"eval_steps_per_second": 4.258,
"step": 80855
},
{
"epoch": 785.44,
"learning_rate": 4.29126213592233e-06,
"loss": 0.0685,
"step": 80900
},
{
"epoch": 786.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.369156360626221,
"eval_runtime": 4.4618,
"eval_samples_per_second": 65.22,
"eval_steps_per_second": 4.258,
"step": 80958
},
{
"epoch": 786.41,
"learning_rate": 4.271844660194175e-06,
"loss": 0.0797,
"step": 81000
},
{
"epoch": 787.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.388444423675537,
"eval_runtime": 4.4049,
"eval_samples_per_second": 66.062,
"eval_steps_per_second": 4.313,
"step": 81061
},
{
"epoch": 787.38,
"learning_rate": 4.252427184466019e-06,
"loss": 0.0748,
"step": 81100
},
{
"epoch": 788.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.32634973526001,
"eval_runtime": 4.4157,
"eval_samples_per_second": 65.901,
"eval_steps_per_second": 4.303,
"step": 81164
},
{
"epoch": 788.35,
"learning_rate": 4.233009708737865e-06,
"loss": 0.0741,
"step": 81200
},
{
"epoch": 789.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.352422714233398,
"eval_runtime": 4.5192,
"eval_samples_per_second": 64.392,
"eval_steps_per_second": 4.204,
"step": 81267
},
{
"epoch": 789.32,
"learning_rate": 4.213592233009709e-06,
"loss": 0.0767,
"step": 81300
},
{
"epoch": 790.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 5.262473106384277,
"eval_runtime": 4.419,
"eval_samples_per_second": 65.852,
"eval_steps_per_second": 4.3,
"step": 81370
},
{
"epoch": 790.29,
"learning_rate": 4.194174757281554e-06,
"loss": 0.0814,
"step": 81400
},
{
"epoch": 791.0,
"eval_accuracy": 0.32989690721649484,
"eval_loss": 5.266846656799316,
"eval_runtime": 4.4649,
"eval_samples_per_second": 65.176,
"eval_steps_per_second": 4.255,
"step": 81473
},
{
"epoch": 791.26,
"learning_rate": 4.1747572815533986e-06,
"loss": 0.0845,
"step": 81500
},
{
"epoch": 792.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.23559045791626,
"eval_runtime": 4.4615,
"eval_samples_per_second": 65.225,
"eval_steps_per_second": 4.259,
"step": 81576
},
{
"epoch": 792.23,
"learning_rate": 4.155339805825243e-06,
"loss": 0.076,
"step": 81600
},
{
"epoch": 793.0,
"eval_accuracy": 0.3230240549828179,
"eval_loss": 5.261579990386963,
"eval_runtime": 4.4103,
"eval_samples_per_second": 65.982,
"eval_steps_per_second": 4.308,
"step": 81679
},
{
"epoch": 793.2,
"learning_rate": 4.135922330097088e-06,
"loss": 0.0769,
"step": 81700
},
{
"epoch": 794.0,
"eval_accuracy": 0.3333333333333333,
"eval_loss": 5.304605960845947,
"eval_runtime": 4.4261,
"eval_samples_per_second": 65.746,
"eval_steps_per_second": 4.293,
"step": 81782
},
{
"epoch": 794.17,
"learning_rate": 4.116504854368932e-06,
"loss": 0.0866,
"step": 81800
},
{
"epoch": 795.0,
"eval_accuracy": 0.32989690721649484,
"eval_loss": 5.290163040161133,
"eval_runtime": 4.4172,
"eval_samples_per_second": 65.88,
"eval_steps_per_second": 4.301,
"step": 81885
},
{
"epoch": 795.15,
"learning_rate": 4.097087378640777e-06,
"loss": 0.0772,
"step": 81900
},
{
"epoch": 796.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.3077616691589355,
"eval_runtime": 4.4159,
"eval_samples_per_second": 65.898,
"eval_steps_per_second": 4.303,
"step": 81988
},
{
"epoch": 796.12,
"learning_rate": 4.0776699029126215e-06,
"loss": 0.079,
"step": 82000
},
{
"epoch": 797.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.288947105407715,
"eval_runtime": 4.4024,
"eval_samples_per_second": 66.101,
"eval_steps_per_second": 4.316,
"step": 82091
},
{
"epoch": 797.09,
"learning_rate": 4.058252427184466e-06,
"loss": 0.0797,
"step": 82100
},
{
"epoch": 798.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.215836524963379,
"eval_runtime": 4.402,
"eval_samples_per_second": 66.107,
"eval_steps_per_second": 4.316,
"step": 82194
},
{
"epoch": 798.06,
"learning_rate": 4.038834951456311e-06,
"loss": 0.0802,
"step": 82200
},
{
"epoch": 799.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.312952041625977,
"eval_runtime": 4.3928,
"eval_samples_per_second": 66.245,
"eval_steps_per_second": 4.325,
"step": 82297
},
{
"epoch": 799.03,
"learning_rate": 4.019417475728156e-06,
"loss": 0.0736,
"step": 82300
},
{
"epoch": 800.0,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0859,
"step": 82400
},
{
"epoch": 800.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.284284591674805,
"eval_runtime": 4.4141,
"eval_samples_per_second": 65.926,
"eval_steps_per_second": 4.304,
"step": 82400
},
{
"epoch": 800.97,
"learning_rate": 3.980582524271845e-06,
"loss": 0.0789,
"step": 82500
},
{
"epoch": 801.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.243020534515381,
"eval_runtime": 4.4114,
"eval_samples_per_second": 65.965,
"eval_steps_per_second": 4.307,
"step": 82503
},
{
"epoch": 801.94,
"learning_rate": 3.96116504854369e-06,
"loss": 0.0809,
"step": 82600
},
{
"epoch": 802.0,
"eval_accuracy": 0.3436426116838488,
"eval_loss": 5.216651916503906,
"eval_runtime": 4.3885,
"eval_samples_per_second": 66.31,
"eval_steps_per_second": 4.33,
"step": 82606
},
{
"epoch": 802.91,
"learning_rate": 3.941747572815534e-06,
"loss": 0.0787,
"step": 82700
},
{
"epoch": 803.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.220209121704102,
"eval_runtime": 4.4262,
"eval_samples_per_second": 65.745,
"eval_steps_per_second": 4.293,
"step": 82709
},
{
"epoch": 803.88,
"learning_rate": 3.922330097087379e-06,
"loss": 0.0878,
"step": 82800
},
{
"epoch": 804.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.356659889221191,
"eval_runtime": 4.4019,
"eval_samples_per_second": 66.107,
"eval_steps_per_second": 4.316,
"step": 82812
},
{
"epoch": 804.85,
"learning_rate": 3.902912621359224e-06,
"loss": 0.0772,
"step": 82900
},
{
"epoch": 805.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.398636341094971,
"eval_runtime": 4.4271,
"eval_samples_per_second": 65.731,
"eval_steps_per_second": 4.292,
"step": 82915
},
{
"epoch": 805.83,
"learning_rate": 3.883495145631068e-06,
"loss": 0.0809,
"step": 83000
},
{
"epoch": 806.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.357775688171387,
"eval_runtime": 4.4302,
"eval_samples_per_second": 65.685,
"eval_steps_per_second": 4.289,
"step": 83018
},
{
"epoch": 806.8,
"learning_rate": 3.864077669902913e-06,
"loss": 0.0815,
"step": 83100
},
{
"epoch": 807.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.314184188842773,
"eval_runtime": 4.4506,
"eval_samples_per_second": 65.384,
"eval_steps_per_second": 4.269,
"step": 83121
},
{
"epoch": 807.77,
"learning_rate": 3.844660194174757e-06,
"loss": 0.0762,
"step": 83200
},
{
"epoch": 808.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.285727500915527,
"eval_runtime": 4.4066,
"eval_samples_per_second": 66.038,
"eval_steps_per_second": 4.312,
"step": 83224
},
{
"epoch": 808.74,
"learning_rate": 3.825242718446602e-06,
"loss": 0.0732,
"step": 83300
},
{
"epoch": 809.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.2570672035217285,
"eval_runtime": 4.3998,
"eval_samples_per_second": 66.14,
"eval_steps_per_second": 4.318,
"step": 83327
},
{
"epoch": 809.71,
"learning_rate": 3.8058252427184465e-06,
"loss": 0.0779,
"step": 83400
},
{
"epoch": 810.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.28815221786499,
"eval_runtime": 4.3987,
"eval_samples_per_second": 66.156,
"eval_steps_per_second": 4.319,
"step": 83430
},
{
"epoch": 810.68,
"learning_rate": 3.7864077669902915e-06,
"loss": 0.0872,
"step": 83500
},
{
"epoch": 811.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.345547676086426,
"eval_runtime": 4.4198,
"eval_samples_per_second": 65.84,
"eval_steps_per_second": 4.299,
"step": 83533
},
{
"epoch": 811.65,
"learning_rate": 3.7669902912621365e-06,
"loss": 0.076,
"step": 83600
},
{
"epoch": 812.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.280517101287842,
"eval_runtime": 4.4067,
"eval_samples_per_second": 66.037,
"eval_steps_per_second": 4.312,
"step": 83636
},
{
"epoch": 812.62,
"learning_rate": 3.7475728155339807e-06,
"loss": 0.0894,
"step": 83700
},
{
"epoch": 813.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.292069435119629,
"eval_runtime": 4.4116,
"eval_samples_per_second": 65.963,
"eval_steps_per_second": 4.307,
"step": 83739
},
{
"epoch": 813.59,
"learning_rate": 3.7281553398058257e-06,
"loss": 0.0724,
"step": 83800
},
{
"epoch": 814.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.350996971130371,
"eval_runtime": 4.4025,
"eval_samples_per_second": 66.099,
"eval_steps_per_second": 4.316,
"step": 83842
},
{
"epoch": 814.56,
"learning_rate": 3.70873786407767e-06,
"loss": 0.0828,
"step": 83900
},
{
"epoch": 815.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.30106782913208,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.109,
"eval_steps_per_second": 4.316,
"step": 83945
},
{
"epoch": 815.53,
"learning_rate": 3.689320388349515e-06,
"loss": 0.0818,
"step": 84000
},
{
"epoch": 816.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 5.294423580169678,
"eval_runtime": 4.4503,
"eval_samples_per_second": 65.389,
"eval_steps_per_second": 4.269,
"step": 84048
},
{
"epoch": 816.5,
"learning_rate": 3.66990291262136e-06,
"loss": 0.0728,
"step": 84100
},
{
"epoch": 817.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.252551555633545,
"eval_runtime": 4.4458,
"eval_samples_per_second": 65.456,
"eval_steps_per_second": 4.274,
"step": 84151
},
{
"epoch": 817.48,
"learning_rate": 3.650485436893204e-06,
"loss": 0.0776,
"step": 84200
},
{
"epoch": 818.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.264585494995117,
"eval_runtime": 4.4128,
"eval_samples_per_second": 65.945,
"eval_steps_per_second": 4.306,
"step": 84254
},
{
"epoch": 818.45,
"learning_rate": 3.631067961165049e-06,
"loss": 0.0768,
"step": 84300
},
{
"epoch": 819.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.3151092529296875,
"eval_runtime": 4.3951,
"eval_samples_per_second": 66.209,
"eval_steps_per_second": 4.323,
"step": 84357
},
{
"epoch": 819.42,
"learning_rate": 3.611650485436893e-06,
"loss": 0.0725,
"step": 84400
},
{
"epoch": 820.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.304262161254883,
"eval_runtime": 4.469,
"eval_samples_per_second": 65.115,
"eval_steps_per_second": 4.251,
"step": 84460
},
{
"epoch": 820.39,
"learning_rate": 3.592233009708738e-06,
"loss": 0.077,
"step": 84500
},
{
"epoch": 821.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.353638172149658,
"eval_runtime": 4.4311,
"eval_samples_per_second": 65.673,
"eval_steps_per_second": 4.288,
"step": 84563
},
{
"epoch": 821.36,
"learning_rate": 3.5728155339805828e-06,
"loss": 0.0815,
"step": 84600
},
{
"epoch": 822.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.324342727661133,
"eval_runtime": 4.4038,
"eval_samples_per_second": 66.079,
"eval_steps_per_second": 4.314,
"step": 84666
},
{
"epoch": 822.33,
"learning_rate": 3.5533980582524273e-06,
"loss": 0.0753,
"step": 84700
},
{
"epoch": 823.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.3727874755859375,
"eval_runtime": 4.4068,
"eval_samples_per_second": 66.034,
"eval_steps_per_second": 4.312,
"step": 84769
},
{
"epoch": 823.3,
"learning_rate": 3.5339805825242724e-06,
"loss": 0.0837,
"step": 84800
},
{
"epoch": 824.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.35664176940918,
"eval_runtime": 4.4441,
"eval_samples_per_second": 65.48,
"eval_steps_per_second": 4.275,
"step": 84872
},
{
"epoch": 824.27,
"learning_rate": 3.5145631067961165e-06,
"loss": 0.0786,
"step": 84900
},
{
"epoch": 825.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.3486647605896,
"eval_runtime": 4.4202,
"eval_samples_per_second": 65.834,
"eval_steps_per_second": 4.298,
"step": 84975
},
{
"epoch": 825.24,
"learning_rate": 3.4951456310679615e-06,
"loss": 0.0897,
"step": 85000
},
{
"epoch": 826.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.384740829467773,
"eval_runtime": 4.4168,
"eval_samples_per_second": 65.884,
"eval_steps_per_second": 4.302,
"step": 85078
},
{
"epoch": 826.21,
"learning_rate": 3.475728155339806e-06,
"loss": 0.079,
"step": 85100
},
{
"epoch": 827.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.357576847076416,
"eval_runtime": 4.4063,
"eval_samples_per_second": 66.042,
"eval_steps_per_second": 4.312,
"step": 85181
},
{
"epoch": 827.18,
"learning_rate": 3.4563106796116507e-06,
"loss": 0.0791,
"step": 85200
},
{
"epoch": 828.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.343855857849121,
"eval_runtime": 4.4485,
"eval_samples_per_second": 65.415,
"eval_steps_per_second": 4.271,
"step": 85284
},
{
"epoch": 828.16,
"learning_rate": 3.4368932038834957e-06,
"loss": 0.0778,
"step": 85300
},
{
"epoch": 829.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.3456902503967285,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.853,
"eval_steps_per_second": 4.3,
"step": 85387
},
{
"epoch": 829.13,
"learning_rate": 3.41747572815534e-06,
"loss": 0.0732,
"step": 85400
},
{
"epoch": 830.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.346973896026611,
"eval_runtime": 4.4395,
"eval_samples_per_second": 65.549,
"eval_steps_per_second": 4.28,
"step": 85490
},
{
"epoch": 830.1,
"learning_rate": 3.398058252427185e-06,
"loss": 0.0752,
"step": 85500
},
{
"epoch": 831.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.329358100891113,
"eval_runtime": 4.4045,
"eval_samples_per_second": 66.068,
"eval_steps_per_second": 4.314,
"step": 85593
},
{
"epoch": 831.07,
"learning_rate": 3.3786407766990294e-06,
"loss": 0.0823,
"step": 85600
},
{
"epoch": 832.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.416337966918945,
"eval_runtime": 4.4167,
"eval_samples_per_second": 65.887,
"eval_steps_per_second": 4.302,
"step": 85696
},
{
"epoch": 832.04,
"learning_rate": 3.359223300970874e-06,
"loss": 0.0803,
"step": 85700
},
{
"epoch": 833.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.396190643310547,
"eval_runtime": 4.4054,
"eval_samples_per_second": 66.055,
"eval_steps_per_second": 4.313,
"step": 85799
},
{
"epoch": 833.01,
"learning_rate": 3.3398058252427186e-06,
"loss": 0.0769,
"step": 85800
},
{
"epoch": 833.98,
"learning_rate": 3.320388349514563e-06,
"loss": 0.0792,
"step": 85900
},
{
"epoch": 834.0,
"eval_accuracy": 0.3127147766323024,
"eval_loss": 5.394441604614258,
"eval_runtime": 4.4439,
"eval_samples_per_second": 65.483,
"eval_steps_per_second": 4.276,
"step": 85902
},
{
"epoch": 834.95,
"learning_rate": 3.300970873786408e-06,
"loss": 0.0701,
"step": 86000
},
{
"epoch": 835.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.410513877868652,
"eval_runtime": 4.415,
"eval_samples_per_second": 65.912,
"eval_steps_per_second": 4.304,
"step": 86005
},
{
"epoch": 835.92,
"learning_rate": 3.2815533980582528e-06,
"loss": 0.0853,
"step": 86100
},
{
"epoch": 836.0,
"eval_accuracy": 0.3161512027491409,
"eval_loss": 5.340237140655518,
"eval_runtime": 4.4626,
"eval_samples_per_second": 65.209,
"eval_steps_per_second": 4.258,
"step": 86108
},
{
"epoch": 836.89,
"learning_rate": 3.2621359223300974e-06,
"loss": 0.0753,
"step": 86200
},
{
"epoch": 837.0,
"eval_accuracy": 0.31958762886597936,
"eval_loss": 5.384557723999023,
"eval_runtime": 4.4269,
"eval_samples_per_second": 65.734,
"eval_steps_per_second": 4.292,
"step": 86211
},
{
"epoch": 837.86,
"learning_rate": 3.242718446601942e-06,
"loss": 0.0867,
"step": 86300
},
{
"epoch": 838.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.402867317199707,
"eval_runtime": 4.4242,
"eval_samples_per_second": 65.774,
"eval_steps_per_second": 4.295,
"step": 86314
},
{
"epoch": 838.83,
"learning_rate": 3.2233009708737865e-06,
"loss": 0.0722,
"step": 86400
},
{
"epoch": 839.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.361295223236084,
"eval_runtime": 4.4189,
"eval_samples_per_second": 65.854,
"eval_steps_per_second": 4.3,
"step": 86417
},
{
"epoch": 839.81,
"learning_rate": 3.2038834951456315e-06,
"loss": 0.0686,
"step": 86500
},
{
"epoch": 840.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.396561622619629,
"eval_runtime": 4.4714,
"eval_samples_per_second": 65.08,
"eval_steps_per_second": 4.249,
"step": 86520
},
{
"epoch": 840.78,
"learning_rate": 3.184466019417476e-06,
"loss": 0.0891,
"step": 86600
},
{
"epoch": 841.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.3979573249816895,
"eval_runtime": 4.414,
"eval_samples_per_second": 65.927,
"eval_steps_per_second": 4.305,
"step": 86623
},
{
"epoch": 841.75,
"learning_rate": 3.1650485436893207e-06,
"loss": 0.0826,
"step": 86700
},
{
"epoch": 842.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.337278366088867,
"eval_runtime": 4.4387,
"eval_samples_per_second": 65.56,
"eval_steps_per_second": 4.281,
"step": 86726
},
{
"epoch": 842.72,
"learning_rate": 3.1456310679611653e-06,
"loss": 0.0767,
"step": 86800
},
{
"epoch": 843.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.402046203613281,
"eval_runtime": 4.4158,
"eval_samples_per_second": 65.9,
"eval_steps_per_second": 4.303,
"step": 86829
},
{
"epoch": 843.69,
"learning_rate": 3.12621359223301e-06,
"loss": 0.0816,
"step": 86900
},
{
"epoch": 844.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.381257057189941,
"eval_runtime": 4.4224,
"eval_samples_per_second": 65.801,
"eval_steps_per_second": 4.296,
"step": 86932
},
{
"epoch": 844.66,
"learning_rate": 3.1067961165048544e-06,
"loss": 0.0775,
"step": 87000
},
{
"epoch": 845.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.396775722503662,
"eval_runtime": 4.4202,
"eval_samples_per_second": 65.834,
"eval_steps_per_second": 4.298,
"step": 87035
},
{
"epoch": 845.63,
"learning_rate": 3.0873786407766995e-06,
"loss": 0.0694,
"step": 87100
},
{
"epoch": 846.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.428651809692383,
"eval_runtime": 4.4207,
"eval_samples_per_second": 65.827,
"eval_steps_per_second": 4.298,
"step": 87138
},
{
"epoch": 846.6,
"learning_rate": 3.067961165048544e-06,
"loss": 0.0816,
"step": 87200
},
{
"epoch": 847.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.442520618438721,
"eval_runtime": 4.4107,
"eval_samples_per_second": 65.976,
"eval_steps_per_second": 4.308,
"step": 87241
},
{
"epoch": 847.57,
"learning_rate": 3.0485436893203886e-06,
"loss": 0.0697,
"step": 87300
},
{
"epoch": 848.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.404880046844482,
"eval_runtime": 4.4039,
"eval_samples_per_second": 66.078,
"eval_steps_per_second": 4.314,
"step": 87344
},
{
"epoch": 848.54,
"learning_rate": 3.029126213592233e-06,
"loss": 0.0771,
"step": 87400
},
{
"epoch": 849.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.404363632202148,
"eval_runtime": 4.4102,
"eval_samples_per_second": 65.984,
"eval_steps_per_second": 4.308,
"step": 87447
},
{
"epoch": 849.51,
"learning_rate": 3.0097087378640778e-06,
"loss": 0.0712,
"step": 87500
},
{
"epoch": 850.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.402904987335205,
"eval_runtime": 4.4059,
"eval_samples_per_second": 66.048,
"eval_steps_per_second": 4.312,
"step": 87550
},
{
"epoch": 850.49,
"learning_rate": 2.9902912621359224e-06,
"loss": 0.0806,
"step": 87600
},
{
"epoch": 851.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.3960394859313965,
"eval_runtime": 4.4669,
"eval_samples_per_second": 65.146,
"eval_steps_per_second": 4.253,
"step": 87653
},
{
"epoch": 851.46,
"learning_rate": 2.9708737864077674e-06,
"loss": 0.0766,
"step": 87700
},
{
"epoch": 852.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.387826919555664,
"eval_runtime": 4.4035,
"eval_samples_per_second": 66.084,
"eval_steps_per_second": 4.315,
"step": 87756
},
{
"epoch": 852.43,
"learning_rate": 2.951456310679612e-06,
"loss": 0.074,
"step": 87800
},
{
"epoch": 853.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.421268463134766,
"eval_runtime": 4.4094,
"eval_samples_per_second": 65.995,
"eval_steps_per_second": 4.309,
"step": 87859
},
{
"epoch": 853.4,
"learning_rate": 2.9320388349514565e-06,
"loss": 0.0779,
"step": 87900
},
{
"epoch": 854.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.402750492095947,
"eval_runtime": 4.4054,
"eval_samples_per_second": 66.055,
"eval_steps_per_second": 4.313,
"step": 87962
},
{
"epoch": 854.37,
"learning_rate": 2.912621359223301e-06,
"loss": 0.084,
"step": 88000
},
{
"epoch": 855.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.471996784210205,
"eval_runtime": 4.4141,
"eval_samples_per_second": 65.925,
"eval_steps_per_second": 4.304,
"step": 88065
},
{
"epoch": 855.34,
"learning_rate": 2.8932038834951457e-06,
"loss": 0.0757,
"step": 88100
},
{
"epoch": 856.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.4470367431640625,
"eval_runtime": 4.4149,
"eval_samples_per_second": 65.913,
"eval_steps_per_second": 4.304,
"step": 88168
},
{
"epoch": 856.31,
"learning_rate": 2.8737864077669903e-06,
"loss": 0.0763,
"step": 88200
},
{
"epoch": 857.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.443137168884277,
"eval_runtime": 4.4241,
"eval_samples_per_second": 65.776,
"eval_steps_per_second": 4.295,
"step": 88271
},
{
"epoch": 857.28,
"learning_rate": 2.8543689320388353e-06,
"loss": 0.0816,
"step": 88300
},
{
"epoch": 858.0,
"eval_accuracy": 0.27491408934707906,
"eval_loss": 5.41270112991333,
"eval_runtime": 4.4182,
"eval_samples_per_second": 65.864,
"eval_steps_per_second": 4.3,
"step": 88374
},
{
"epoch": 858.25,
"learning_rate": 2.83495145631068e-06,
"loss": 0.0761,
"step": 88400
},
{
"epoch": 859.0,
"eval_accuracy": 0.2646048109965636,
"eval_loss": 5.420130729675293,
"eval_runtime": 4.4118,
"eval_samples_per_second": 65.959,
"eval_steps_per_second": 4.307,
"step": 88477
},
{
"epoch": 859.22,
"learning_rate": 2.8155339805825245e-06,
"loss": 0.093,
"step": 88500
},
{
"epoch": 860.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.346418380737305,
"eval_runtime": 4.41,
"eval_samples_per_second": 65.987,
"eval_steps_per_second": 4.308,
"step": 88580
},
{
"epoch": 860.19,
"learning_rate": 2.796116504854369e-06,
"loss": 0.0729,
"step": 88600
},
{
"epoch": 861.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.3696441650390625,
"eval_runtime": 4.459,
"eval_samples_per_second": 65.262,
"eval_steps_per_second": 4.261,
"step": 88683
},
{
"epoch": 861.17,
"learning_rate": 2.7766990291262136e-06,
"loss": 0.0792,
"step": 88700
},
{
"epoch": 862.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.340893268585205,
"eval_runtime": 4.4118,
"eval_samples_per_second": 65.959,
"eval_steps_per_second": 4.307,
"step": 88786
},
{
"epoch": 862.14,
"learning_rate": 2.7572815533980586e-06,
"loss": 0.0742,
"step": 88800
},
{
"epoch": 863.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.372981071472168,
"eval_runtime": 4.4766,
"eval_samples_per_second": 65.004,
"eval_steps_per_second": 4.244,
"step": 88889
},
{
"epoch": 863.11,
"learning_rate": 2.7378640776699032e-06,
"loss": 0.0795,
"step": 88900
},
{
"epoch": 864.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.429410457611084,
"eval_runtime": 4.4651,
"eval_samples_per_second": 65.171,
"eval_steps_per_second": 4.255,
"step": 88992
},
{
"epoch": 864.08,
"learning_rate": 2.718446601941748e-06,
"loss": 0.0701,
"step": 89000
},
{
"epoch": 865.0,
"eval_accuracy": 0.27147766323024053,
"eval_loss": 5.41763973236084,
"eval_runtime": 4.4618,
"eval_samples_per_second": 65.221,
"eval_steps_per_second": 4.258,
"step": 89095
},
{
"epoch": 865.05,
"learning_rate": 2.6990291262135924e-06,
"loss": 0.087,
"step": 89100
},
{
"epoch": 866.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.433925151824951,
"eval_runtime": 4.4181,
"eval_samples_per_second": 65.866,
"eval_steps_per_second": 4.301,
"step": 89198
},
{
"epoch": 866.02,
"learning_rate": 2.679611650485437e-06,
"loss": 0.0749,
"step": 89200
},
{
"epoch": 866.99,
"learning_rate": 2.660194174757282e-06,
"loss": 0.0775,
"step": 89300
},
{
"epoch": 867.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.466894626617432,
"eval_runtime": 4.397,
"eval_samples_per_second": 66.182,
"eval_steps_per_second": 4.321,
"step": 89301
},
{
"epoch": 867.96,
"learning_rate": 2.6407766990291266e-06,
"loss": 0.0764,
"step": 89400
},
{
"epoch": 868.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.4773712158203125,
"eval_runtime": 4.4111,
"eval_samples_per_second": 65.971,
"eval_steps_per_second": 4.307,
"step": 89404
},
{
"epoch": 868.93,
"learning_rate": 2.621359223300971e-06,
"loss": 0.0827,
"step": 89500
},
{
"epoch": 869.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.422665119171143,
"eval_runtime": 4.4281,
"eval_samples_per_second": 65.717,
"eval_steps_per_second": 4.291,
"step": 89507
},
{
"epoch": 869.9,
"learning_rate": 2.6019417475728157e-06,
"loss": 0.0757,
"step": 89600
},
{
"epoch": 870.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.422026634216309,
"eval_runtime": 4.4452,
"eval_samples_per_second": 65.464,
"eval_steps_per_second": 4.274,
"step": 89610
},
{
"epoch": 870.87,
"learning_rate": 2.5825242718446603e-06,
"loss": 0.0761,
"step": 89700
},
{
"epoch": 871.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.395354747772217,
"eval_runtime": 4.5037,
"eval_samples_per_second": 64.614,
"eval_steps_per_second": 4.219,
"step": 89713
},
{
"epoch": 871.84,
"learning_rate": 2.5631067961165053e-06,
"loss": 0.0777,
"step": 89800
},
{
"epoch": 872.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.385969638824463,
"eval_runtime": 4.4447,
"eval_samples_per_second": 65.471,
"eval_steps_per_second": 4.275,
"step": 89816
},
{
"epoch": 872.82,
"learning_rate": 2.5436893203883495e-06,
"loss": 0.0737,
"step": 89900
},
{
"epoch": 873.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.362537860870361,
"eval_runtime": 4.4093,
"eval_samples_per_second": 65.997,
"eval_steps_per_second": 4.309,
"step": 89919
},
{
"epoch": 873.79,
"learning_rate": 2.5242718446601945e-06,
"loss": 0.0777,
"step": 90000
},
{
"epoch": 874.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.413704872131348,
"eval_runtime": 4.4462,
"eval_samples_per_second": 65.45,
"eval_steps_per_second": 4.273,
"step": 90022
},
{
"epoch": 874.76,
"learning_rate": 2.504854368932039e-06,
"loss": 0.0758,
"step": 90100
},
{
"epoch": 875.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.415232181549072,
"eval_runtime": 4.408,
"eval_samples_per_second": 66.016,
"eval_steps_per_second": 4.31,
"step": 90125
},
{
"epoch": 875.73,
"learning_rate": 2.4854368932038836e-06,
"loss": 0.0764,
"step": 90200
},
{
"epoch": 876.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.38121223449707,
"eval_runtime": 4.4095,
"eval_samples_per_second": 65.993,
"eval_steps_per_second": 4.309,
"step": 90228
},
{
"epoch": 876.7,
"learning_rate": 2.4660194174757286e-06,
"loss": 0.087,
"step": 90300
},
{
"epoch": 877.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.375702857971191,
"eval_runtime": 4.471,
"eval_samples_per_second": 65.086,
"eval_steps_per_second": 4.25,
"step": 90331
},
{
"epoch": 877.67,
"learning_rate": 2.4466019417475732e-06,
"loss": 0.0705,
"step": 90400
},
{
"epoch": 878.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.39950704574585,
"eval_runtime": 4.3946,
"eval_samples_per_second": 66.218,
"eval_steps_per_second": 4.323,
"step": 90434
},
{
"epoch": 878.64,
"learning_rate": 2.427184466019418e-06,
"loss": 0.0831,
"step": 90500
},
{
"epoch": 879.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.375514984130859,
"eval_runtime": 4.405,
"eval_samples_per_second": 66.062,
"eval_steps_per_second": 4.313,
"step": 90537
},
{
"epoch": 879.61,
"learning_rate": 2.4077669902912624e-06,
"loss": 0.0692,
"step": 90600
},
{
"epoch": 880.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.384296417236328,
"eval_runtime": 4.5292,
"eval_samples_per_second": 64.249,
"eval_steps_per_second": 4.195,
"step": 90640
},
{
"epoch": 880.58,
"learning_rate": 2.388349514563107e-06,
"loss": 0.0752,
"step": 90700
},
{
"epoch": 881.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.397815704345703,
"eval_runtime": 4.4681,
"eval_samples_per_second": 65.128,
"eval_steps_per_second": 4.252,
"step": 90743
},
{
"epoch": 881.55,
"learning_rate": 2.3689320388349516e-06,
"loss": 0.0732,
"step": 90800
},
{
"epoch": 882.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.387296676635742,
"eval_runtime": 4.3718,
"eval_samples_per_second": 66.564,
"eval_steps_per_second": 4.346,
"step": 90846
},
{
"epoch": 882.52,
"learning_rate": 2.3495145631067966e-06,
"loss": 0.0836,
"step": 90900
},
{
"epoch": 883.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.39614725112915,
"eval_runtime": 4.3772,
"eval_samples_per_second": 66.481,
"eval_steps_per_second": 4.341,
"step": 90949
},
{
"epoch": 883.5,
"learning_rate": 2.330097087378641e-06,
"loss": 0.0761,
"step": 91000
},
{
"epoch": 884.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.415928363800049,
"eval_runtime": 4.4665,
"eval_samples_per_second": 65.152,
"eval_steps_per_second": 4.254,
"step": 91052
},
{
"epoch": 884.47,
"learning_rate": 2.3106796116504857e-06,
"loss": 0.082,
"step": 91100
},
{
"epoch": 885.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.418325424194336,
"eval_runtime": 4.4755,
"eval_samples_per_second": 65.021,
"eval_steps_per_second": 4.245,
"step": 91155
},
{
"epoch": 885.44,
"learning_rate": 2.2912621359223303e-06,
"loss": 0.0729,
"step": 91200
},
{
"epoch": 886.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.443818092346191,
"eval_runtime": 4.3458,
"eval_samples_per_second": 66.961,
"eval_steps_per_second": 4.372,
"step": 91258
},
{
"epoch": 886.41,
"learning_rate": 2.271844660194175e-06,
"loss": 0.0908,
"step": 91300
},
{
"epoch": 887.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.458770275115967,
"eval_runtime": 4.3756,
"eval_samples_per_second": 66.505,
"eval_steps_per_second": 4.342,
"step": 91361
},
{
"epoch": 887.38,
"learning_rate": 2.2524271844660195e-06,
"loss": 0.0677,
"step": 91400
},
{
"epoch": 888.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.484026908874512,
"eval_runtime": 4.4708,
"eval_samples_per_second": 65.089,
"eval_steps_per_second": 4.25,
"step": 91464
},
{
"epoch": 888.35,
"learning_rate": 2.2330097087378645e-06,
"loss": 0.0821,
"step": 91500
},
{
"epoch": 889.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.466355323791504,
"eval_runtime": 4.3868,
"eval_samples_per_second": 66.335,
"eval_steps_per_second": 4.331,
"step": 91567
},
{
"epoch": 889.32,
"learning_rate": 2.213592233009709e-06,
"loss": 0.0812,
"step": 91600
},
{
"epoch": 890.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.5019025802612305,
"eval_runtime": 4.3644,
"eval_samples_per_second": 66.676,
"eval_steps_per_second": 4.353,
"step": 91670
},
{
"epoch": 890.29,
"learning_rate": 2.1941747572815537e-06,
"loss": 0.0849,
"step": 91700
},
{
"epoch": 891.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.478269577026367,
"eval_runtime": 4.3779,
"eval_samples_per_second": 66.47,
"eval_steps_per_second": 4.34,
"step": 91773
},
{
"epoch": 891.26,
"learning_rate": 2.1747572815533982e-06,
"loss": 0.079,
"step": 91800
},
{
"epoch": 892.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.493340969085693,
"eval_runtime": 4.3925,
"eval_samples_per_second": 66.249,
"eval_steps_per_second": 4.326,
"step": 91876
},
{
"epoch": 892.23,
"learning_rate": 2.155339805825243e-06,
"loss": 0.0703,
"step": 91900
},
{
"epoch": 893.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.519100189208984,
"eval_runtime": 4.441,
"eval_samples_per_second": 65.526,
"eval_steps_per_second": 4.278,
"step": 91979
},
{
"epoch": 893.2,
"learning_rate": 2.1359223300970874e-06,
"loss": 0.0777,
"step": 92000
},
{
"epoch": 894.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.517086982727051,
"eval_runtime": 4.5323,
"eval_samples_per_second": 64.206,
"eval_steps_per_second": 4.192,
"step": 92082
},
{
"epoch": 894.17,
"learning_rate": 2.1165048543689324e-06,
"loss": 0.0767,
"step": 92100
},
{
"epoch": 895.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.527967929840088,
"eval_runtime": 4.4907,
"eval_samples_per_second": 64.8,
"eval_steps_per_second": 4.231,
"step": 92185
},
{
"epoch": 895.15,
"learning_rate": 2.097087378640777e-06,
"loss": 0.0697,
"step": 92200
},
{
"epoch": 896.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.491966724395752,
"eval_runtime": 4.3722,
"eval_samples_per_second": 66.557,
"eval_steps_per_second": 4.346,
"step": 92288
},
{
"epoch": 896.12,
"learning_rate": 2.0776699029126216e-06,
"loss": 0.0831,
"step": 92300
},
{
"epoch": 897.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.458693981170654,
"eval_runtime": 4.3596,
"eval_samples_per_second": 66.75,
"eval_steps_per_second": 4.358,
"step": 92391
},
{
"epoch": 897.09,
"learning_rate": 2.058252427184466e-06,
"loss": 0.0715,
"step": 92400
},
{
"epoch": 898.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.484317779541016,
"eval_runtime": 4.3692,
"eval_samples_per_second": 66.602,
"eval_steps_per_second": 4.349,
"step": 92494
},
{
"epoch": 898.06,
"learning_rate": 2.0388349514563107e-06,
"loss": 0.0764,
"step": 92500
},
{
"epoch": 899.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.503616809844971,
"eval_runtime": 4.3713,
"eval_samples_per_second": 66.57,
"eval_steps_per_second": 4.347,
"step": 92597
},
{
"epoch": 899.03,
"learning_rate": 2.0194174757281553e-06,
"loss": 0.074,
"step": 92600
},
{
"epoch": 900.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0785,
"step": 92700
},
{
"epoch": 900.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4780683517456055,
"eval_runtime": 4.3991,
"eval_samples_per_second": 66.149,
"eval_steps_per_second": 4.319,
"step": 92700
},
{
"epoch": 900.97,
"learning_rate": 1.980582524271845e-06,
"loss": 0.0783,
"step": 92800
},
{
"epoch": 901.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.468466281890869,
"eval_runtime": 4.4156,
"eval_samples_per_second": 65.902,
"eval_steps_per_second": 4.303,
"step": 92803
},
{
"epoch": 901.94,
"learning_rate": 1.9611650485436895e-06,
"loss": 0.0791,
"step": 92900
},
{
"epoch": 902.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.443396091461182,
"eval_runtime": 4.3883,
"eval_samples_per_second": 66.313,
"eval_steps_per_second": 4.33,
"step": 92906
},
{
"epoch": 902.91,
"learning_rate": 1.941747572815534e-06,
"loss": 0.0714,
"step": 93000
},
{
"epoch": 903.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.470444679260254,
"eval_runtime": 4.3876,
"eval_samples_per_second": 66.324,
"eval_steps_per_second": 4.33,
"step": 93009
},
{
"epoch": 903.88,
"learning_rate": 1.9223300970873787e-06,
"loss": 0.0834,
"step": 93100
},
{
"epoch": 904.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.4543023109436035,
"eval_runtime": 4.3556,
"eval_samples_per_second": 66.811,
"eval_steps_per_second": 4.362,
"step": 93112
},
{
"epoch": 904.85,
"learning_rate": 1.9029126213592232e-06,
"loss": 0.0796,
"step": 93200
},
{
"epoch": 905.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.442955017089844,
"eval_runtime": 4.3847,
"eval_samples_per_second": 66.367,
"eval_steps_per_second": 4.333,
"step": 93215
},
{
"epoch": 905.83,
"learning_rate": 1.8834951456310683e-06,
"loss": 0.0741,
"step": 93300
},
{
"epoch": 906.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.462120532989502,
"eval_runtime": 4.5039,
"eval_samples_per_second": 64.611,
"eval_steps_per_second": 4.219,
"step": 93318
},
{
"epoch": 906.8,
"learning_rate": 1.8640776699029128e-06,
"loss": 0.0752,
"step": 93400
},
{
"epoch": 907.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.449808597564697,
"eval_runtime": 4.3965,
"eval_samples_per_second": 66.19,
"eval_steps_per_second": 4.322,
"step": 93421
},
{
"epoch": 907.77,
"learning_rate": 1.8446601941747574e-06,
"loss": 0.0776,
"step": 93500
},
{
"epoch": 908.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.455343246459961,
"eval_runtime": 4.4319,
"eval_samples_per_second": 65.66,
"eval_steps_per_second": 4.287,
"step": 93524
},
{
"epoch": 908.74,
"learning_rate": 1.825242718446602e-06,
"loss": 0.0795,
"step": 93600
},
{
"epoch": 909.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.415092945098877,
"eval_runtime": 4.403,
"eval_samples_per_second": 66.091,
"eval_steps_per_second": 4.315,
"step": 93627
},
{
"epoch": 909.71,
"learning_rate": 1.8058252427184466e-06,
"loss": 0.0771,
"step": 93700
},
{
"epoch": 910.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.396514415740967,
"eval_runtime": 4.4896,
"eval_samples_per_second": 64.816,
"eval_steps_per_second": 4.232,
"step": 93730
},
{
"epoch": 910.68,
"learning_rate": 1.7864077669902914e-06,
"loss": 0.0756,
"step": 93800
},
{
"epoch": 911.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.412069797515869,
"eval_runtime": 4.4395,
"eval_samples_per_second": 65.548,
"eval_steps_per_second": 4.28,
"step": 93833
},
{
"epoch": 911.65,
"learning_rate": 1.7669902912621362e-06,
"loss": 0.0769,
"step": 93900
},
{
"epoch": 912.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.40557861328125,
"eval_runtime": 4.441,
"eval_samples_per_second": 65.526,
"eval_steps_per_second": 4.278,
"step": 93936
},
{
"epoch": 912.62,
"learning_rate": 1.7475728155339808e-06,
"loss": 0.0799,
"step": 94000
},
{
"epoch": 913.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.387575149536133,
"eval_runtime": 4.3533,
"eval_samples_per_second": 66.845,
"eval_steps_per_second": 4.364,
"step": 94039
},
{
"epoch": 913.59,
"learning_rate": 1.7281553398058253e-06,
"loss": 0.0853,
"step": 94100
},
{
"epoch": 914.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.4021782875061035,
"eval_runtime": 4.4157,
"eval_samples_per_second": 65.901,
"eval_steps_per_second": 4.303,
"step": 94142
},
{
"epoch": 914.56,
"learning_rate": 1.70873786407767e-06,
"loss": 0.0726,
"step": 94200
},
{
"epoch": 915.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.43842887878418,
"eval_runtime": 4.4744,
"eval_samples_per_second": 65.037,
"eval_steps_per_second": 4.246,
"step": 94245
},
{
"epoch": 915.53,
"learning_rate": 1.6893203883495147e-06,
"loss": 0.0745,
"step": 94300
},
{
"epoch": 916.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.422259330749512,
"eval_runtime": 4.3587,
"eval_samples_per_second": 66.763,
"eval_steps_per_second": 4.359,
"step": 94348
},
{
"epoch": 916.5,
"learning_rate": 1.6699029126213593e-06,
"loss": 0.0688,
"step": 94400
},
{
"epoch": 917.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.429776191711426,
"eval_runtime": 4.3673,
"eval_samples_per_second": 66.632,
"eval_steps_per_second": 4.351,
"step": 94451
},
{
"epoch": 917.48,
"learning_rate": 1.650485436893204e-06,
"loss": 0.0743,
"step": 94500
},
{
"epoch": 918.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.422666072845459,
"eval_runtime": 4.3668,
"eval_samples_per_second": 66.639,
"eval_steps_per_second": 4.351,
"step": 94554
},
{
"epoch": 918.45,
"learning_rate": 1.6310679611650487e-06,
"loss": 0.0842,
"step": 94600
},
{
"epoch": 919.0,
"eval_accuracy": 0.30927835051546393,
"eval_loss": 5.38067626953125,
"eval_runtime": 4.3401,
"eval_samples_per_second": 67.049,
"eval_steps_per_second": 4.378,
"step": 94657
},
{
"epoch": 919.42,
"learning_rate": 1.6116504854368933e-06,
"loss": 0.0732,
"step": 94700
},
{
"epoch": 920.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.388149261474609,
"eval_runtime": 4.3924,
"eval_samples_per_second": 66.25,
"eval_steps_per_second": 4.326,
"step": 94760
},
{
"epoch": 920.39,
"learning_rate": 1.592233009708738e-06,
"loss": 0.0717,
"step": 94800
},
{
"epoch": 921.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.3828325271606445,
"eval_runtime": 4.4097,
"eval_samples_per_second": 65.991,
"eval_steps_per_second": 4.309,
"step": 94863
},
{
"epoch": 921.36,
"learning_rate": 1.5728155339805826e-06,
"loss": 0.084,
"step": 94900
},
{
"epoch": 922.0,
"eval_accuracy": 0.3024054982817869,
"eval_loss": 5.377039432525635,
"eval_runtime": 4.3946,
"eval_samples_per_second": 66.217,
"eval_steps_per_second": 4.323,
"step": 94966
},
{
"epoch": 922.33,
"learning_rate": 1.5533980582524272e-06,
"loss": 0.079,
"step": 95000
},
{
"epoch": 923.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.387304782867432,
"eval_runtime": 4.4138,
"eval_samples_per_second": 65.93,
"eval_steps_per_second": 4.305,
"step": 95069
},
{
"epoch": 923.3,
"learning_rate": 1.533980582524272e-06,
"loss": 0.0761,
"step": 95100
},
{
"epoch": 924.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.378848552703857,
"eval_runtime": 4.4188,
"eval_samples_per_second": 65.855,
"eval_steps_per_second": 4.3,
"step": 95172
},
{
"epoch": 924.27,
"learning_rate": 1.5145631067961166e-06,
"loss": 0.0777,
"step": 95200
},
{
"epoch": 925.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.393227577209473,
"eval_runtime": 4.3963,
"eval_samples_per_second": 66.193,
"eval_steps_per_second": 4.322,
"step": 95275
},
{
"epoch": 925.24,
"learning_rate": 1.4951456310679612e-06,
"loss": 0.0729,
"step": 95300
},
{
"epoch": 926.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.435247898101807,
"eval_runtime": 4.4184,
"eval_samples_per_second": 65.861,
"eval_steps_per_second": 4.3,
"step": 95378
},
{
"epoch": 926.21,
"learning_rate": 1.475728155339806e-06,
"loss": 0.0756,
"step": 95400
},
{
"epoch": 927.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.427146911621094,
"eval_runtime": 4.4068,
"eval_samples_per_second": 66.034,
"eval_steps_per_second": 4.311,
"step": 95481
},
{
"epoch": 927.18,
"learning_rate": 1.4563106796116506e-06,
"loss": 0.0699,
"step": 95500
},
{
"epoch": 928.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.408605098724365,
"eval_runtime": 4.4391,
"eval_samples_per_second": 65.554,
"eval_steps_per_second": 4.28,
"step": 95584
},
{
"epoch": 928.16,
"learning_rate": 1.4368932038834951e-06,
"loss": 0.0814,
"step": 95600
},
{
"epoch": 929.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.421037197113037,
"eval_runtime": 4.421,
"eval_samples_per_second": 65.822,
"eval_steps_per_second": 4.298,
"step": 95687
},
{
"epoch": 929.13,
"learning_rate": 1.41747572815534e-06,
"loss": 0.07,
"step": 95700
},
{
"epoch": 930.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.417635440826416,
"eval_runtime": 4.4051,
"eval_samples_per_second": 66.059,
"eval_steps_per_second": 4.313,
"step": 95790
},
{
"epoch": 930.1,
"learning_rate": 1.3980582524271845e-06,
"loss": 0.0736,
"step": 95800
},
{
"epoch": 931.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.434685230255127,
"eval_runtime": 4.4155,
"eval_samples_per_second": 65.904,
"eval_steps_per_second": 4.303,
"step": 95893
},
{
"epoch": 931.07,
"learning_rate": 1.3786407766990293e-06,
"loss": 0.0694,
"step": 95900
},
{
"epoch": 932.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.436407089233398,
"eval_runtime": 4.402,
"eval_samples_per_second": 66.106,
"eval_steps_per_second": 4.316,
"step": 95996
},
{
"epoch": 932.04,
"learning_rate": 1.359223300970874e-06,
"loss": 0.0771,
"step": 96000
},
{
"epoch": 933.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.446750640869141,
"eval_runtime": 4.442,
"eval_samples_per_second": 65.511,
"eval_steps_per_second": 4.277,
"step": 96099
},
{
"epoch": 933.01,
"learning_rate": 1.3398058252427185e-06,
"loss": 0.0701,
"step": 96100
},
{
"epoch": 933.98,
"learning_rate": 1.3203883495145633e-06,
"loss": 0.0718,
"step": 96200
},
{
"epoch": 934.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.452322959899902,
"eval_runtime": 4.4596,
"eval_samples_per_second": 65.253,
"eval_steps_per_second": 4.26,
"step": 96202
},
{
"epoch": 934.95,
"learning_rate": 1.3009708737864079e-06,
"loss": 0.0784,
"step": 96300
},
{
"epoch": 935.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.4216132164001465,
"eval_runtime": 4.3711,
"eval_samples_per_second": 66.574,
"eval_steps_per_second": 4.347,
"step": 96305
},
{
"epoch": 935.92,
"learning_rate": 1.2815533980582527e-06,
"loss": 0.087,
"step": 96400
},
{
"epoch": 936.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.415948390960693,
"eval_runtime": 4.3669,
"eval_samples_per_second": 66.637,
"eval_steps_per_second": 4.351,
"step": 96408
},
{
"epoch": 936.89,
"learning_rate": 1.2621359223300972e-06,
"loss": 0.0717,
"step": 96500
},
{
"epoch": 937.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.422750949859619,
"eval_runtime": 4.4213,
"eval_samples_per_second": 65.818,
"eval_steps_per_second": 4.297,
"step": 96511
},
{
"epoch": 937.86,
"learning_rate": 1.2427184466019418e-06,
"loss": 0.0714,
"step": 96600
},
{
"epoch": 938.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.401679992675781,
"eval_runtime": 4.3842,
"eval_samples_per_second": 66.375,
"eval_steps_per_second": 4.334,
"step": 96614
},
{
"epoch": 938.83,
"learning_rate": 1.2233009708737866e-06,
"loss": 0.0754,
"step": 96700
},
{
"epoch": 939.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.4021172523498535,
"eval_runtime": 4.359,
"eval_samples_per_second": 66.759,
"eval_steps_per_second": 4.359,
"step": 96717
},
{
"epoch": 939.81,
"learning_rate": 1.2038834951456312e-06,
"loss": 0.0733,
"step": 96800
},
{
"epoch": 940.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.395828723907471,
"eval_runtime": 4.4108,
"eval_samples_per_second": 65.974,
"eval_steps_per_second": 4.308,
"step": 96820
},
{
"epoch": 940.78,
"learning_rate": 1.1844660194174758e-06,
"loss": 0.0697,
"step": 96900
},
{
"epoch": 941.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.385928630828857,
"eval_runtime": 4.4078,
"eval_samples_per_second": 66.019,
"eval_steps_per_second": 4.31,
"step": 96923
},
{
"epoch": 941.75,
"learning_rate": 1.1650485436893206e-06,
"loss": 0.082,
"step": 97000
},
{
"epoch": 942.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.371447563171387,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 97026
},
{
"epoch": 942.72,
"learning_rate": 1.1456310679611652e-06,
"loss": 0.0696,
"step": 97100
},
{
"epoch": 943.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.369715690612793,
"eval_runtime": 4.4234,
"eval_samples_per_second": 65.786,
"eval_steps_per_second": 4.295,
"step": 97129
},
{
"epoch": 943.69,
"learning_rate": 1.1262135922330097e-06,
"loss": 0.0719,
"step": 97200
},
{
"epoch": 944.0,
"eval_accuracy": 0.27835051546391754,
"eval_loss": 5.396899700164795,
"eval_runtime": 4.3747,
"eval_samples_per_second": 66.519,
"eval_steps_per_second": 4.343,
"step": 97232
},
{
"epoch": 944.66,
"learning_rate": 1.1067961165048545e-06,
"loss": 0.0772,
"step": 97300
},
{
"epoch": 945.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.395821571350098,
"eval_runtime": 4.5669,
"eval_samples_per_second": 63.72,
"eval_steps_per_second": 4.16,
"step": 97335
},
{
"epoch": 945.63,
"learning_rate": 1.0873786407766991e-06,
"loss": 0.0759,
"step": 97400
},
{
"epoch": 946.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.41284704208374,
"eval_runtime": 4.4196,
"eval_samples_per_second": 65.843,
"eval_steps_per_second": 4.299,
"step": 97438
},
{
"epoch": 946.6,
"learning_rate": 1.0679611650485437e-06,
"loss": 0.074,
"step": 97500
},
{
"epoch": 947.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.428328514099121,
"eval_runtime": 4.4673,
"eval_samples_per_second": 65.139,
"eval_steps_per_second": 4.253,
"step": 97541
},
{
"epoch": 947.57,
"learning_rate": 1.0485436893203885e-06,
"loss": 0.0704,
"step": 97600
},
{
"epoch": 948.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.430525779724121,
"eval_runtime": 4.4255,
"eval_samples_per_second": 65.755,
"eval_steps_per_second": 4.293,
"step": 97644
},
{
"epoch": 948.54,
"learning_rate": 1.029126213592233e-06,
"loss": 0.069,
"step": 97700
},
{
"epoch": 949.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.429955005645752,
"eval_runtime": 4.3939,
"eval_samples_per_second": 66.228,
"eval_steps_per_second": 4.324,
"step": 97747
},
{
"epoch": 949.51,
"learning_rate": 1.0097087378640777e-06,
"loss": 0.0701,
"step": 97800
},
{
"epoch": 950.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.44457483291626,
"eval_runtime": 4.3896,
"eval_samples_per_second": 66.293,
"eval_steps_per_second": 4.328,
"step": 97850
},
{
"epoch": 950.49,
"learning_rate": 9.902912621359225e-07,
"loss": 0.087,
"step": 97900
},
{
"epoch": 951.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.436530590057373,
"eval_runtime": 4.3811,
"eval_samples_per_second": 66.421,
"eval_steps_per_second": 4.337,
"step": 97953
},
{
"epoch": 951.46,
"learning_rate": 9.70873786407767e-07,
"loss": 0.0837,
"step": 98000
},
{
"epoch": 952.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.426782131195068,
"eval_runtime": 4.3706,
"eval_samples_per_second": 66.581,
"eval_steps_per_second": 4.347,
"step": 98056
},
{
"epoch": 952.43,
"learning_rate": 9.514563106796116e-07,
"loss": 0.0754,
"step": 98100
},
{
"epoch": 953.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.425992488861084,
"eval_runtime": 4.4133,
"eval_samples_per_second": 65.937,
"eval_steps_per_second": 4.305,
"step": 98159
},
{
"epoch": 953.4,
"learning_rate": 9.320388349514564e-07,
"loss": 0.0778,
"step": 98200
},
{
"epoch": 954.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.405651092529297,
"eval_runtime": 4.4174,
"eval_samples_per_second": 65.876,
"eval_steps_per_second": 4.301,
"step": 98262
},
{
"epoch": 954.37,
"learning_rate": 9.12621359223301e-07,
"loss": 0.0643,
"step": 98300
},
{
"epoch": 955.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.39918851852417,
"eval_runtime": 4.522,
"eval_samples_per_second": 64.352,
"eval_steps_per_second": 4.202,
"step": 98365
},
{
"epoch": 955.34,
"learning_rate": 8.932038834951457e-07,
"loss": 0.0768,
"step": 98400
},
{
"epoch": 956.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.388579845428467,
"eval_runtime": 4.4701,
"eval_samples_per_second": 65.1,
"eval_steps_per_second": 4.251,
"step": 98468
},
{
"epoch": 956.31,
"learning_rate": 8.737864077669904e-07,
"loss": 0.0727,
"step": 98500
},
{
"epoch": 957.0,
"eval_accuracy": 0.29896907216494845,
"eval_loss": 5.384490489959717,
"eval_runtime": 4.3672,
"eval_samples_per_second": 66.633,
"eval_steps_per_second": 4.351,
"step": 98571
},
{
"epoch": 957.28,
"learning_rate": 8.54368932038835e-07,
"loss": 0.0859,
"step": 98600
},
{
"epoch": 958.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.3821940422058105,
"eval_runtime": 4.3548,
"eval_samples_per_second": 66.822,
"eval_steps_per_second": 4.363,
"step": 98674
},
{
"epoch": 958.25,
"learning_rate": 8.349514563106797e-07,
"loss": 0.0831,
"step": 98700
},
{
"epoch": 959.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.38521146774292,
"eval_runtime": 4.4018,
"eval_samples_per_second": 66.109,
"eval_steps_per_second": 4.316,
"step": 98777
},
{
"epoch": 959.22,
"learning_rate": 8.155339805825243e-07,
"loss": 0.0756,
"step": 98800
},
{
"epoch": 960.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.38844108581543,
"eval_runtime": 4.4092,
"eval_samples_per_second": 65.998,
"eval_steps_per_second": 4.309,
"step": 98880
},
{
"epoch": 960.19,
"learning_rate": 7.96116504854369e-07,
"loss": 0.0857,
"step": 98900
},
{
"epoch": 961.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.389212608337402,
"eval_runtime": 4.4817,
"eval_samples_per_second": 64.93,
"eval_steps_per_second": 4.239,
"step": 98983
},
{
"epoch": 961.17,
"learning_rate": 7.766990291262136e-07,
"loss": 0.0707,
"step": 99000
},
{
"epoch": 962.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.377591133117676,
"eval_runtime": 4.3914,
"eval_samples_per_second": 66.266,
"eval_steps_per_second": 4.327,
"step": 99086
},
{
"epoch": 962.14,
"learning_rate": 7.572815533980583e-07,
"loss": 0.0746,
"step": 99100
},
{
"epoch": 963.0,
"eval_accuracy": 0.30584192439862545,
"eval_loss": 5.37846565246582,
"eval_runtime": 4.4873,
"eval_samples_per_second": 64.85,
"eval_steps_per_second": 4.234,
"step": 99189
},
{
"epoch": 963.11,
"learning_rate": 7.37864077669903e-07,
"loss": 0.0745,
"step": 99200
},
{
"epoch": 964.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.377573013305664,
"eval_runtime": 4.3679,
"eval_samples_per_second": 66.623,
"eval_steps_per_second": 4.35,
"step": 99292
},
{
"epoch": 964.08,
"learning_rate": 7.184466019417476e-07,
"loss": 0.0827,
"step": 99300
},
{
"epoch": 965.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.370428562164307,
"eval_runtime": 4.4339,
"eval_samples_per_second": 65.631,
"eval_steps_per_second": 4.285,
"step": 99395
},
{
"epoch": 965.05,
"learning_rate": 6.990291262135923e-07,
"loss": 0.0774,
"step": 99400
},
{
"epoch": 966.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.365330219268799,
"eval_runtime": 4.3501,
"eval_samples_per_second": 66.895,
"eval_steps_per_second": 4.368,
"step": 99498
},
{
"epoch": 966.02,
"learning_rate": 6.79611650485437e-07,
"loss": 0.0752,
"step": 99500
},
{
"epoch": 966.99,
"learning_rate": 6.601941747572816e-07,
"loss": 0.0795,
"step": 99600
},
{
"epoch": 967.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.356910228729248,
"eval_runtime": 4.4051,
"eval_samples_per_second": 66.06,
"eval_steps_per_second": 4.313,
"step": 99601
},
{
"epoch": 967.96,
"learning_rate": 6.407766990291263e-07,
"loss": 0.0759,
"step": 99700
},
{
"epoch": 968.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.351494312286377,
"eval_runtime": 4.3759,
"eval_samples_per_second": 66.501,
"eval_steps_per_second": 4.342,
"step": 99704
},
{
"epoch": 968.93,
"learning_rate": 6.213592233009709e-07,
"loss": 0.0713,
"step": 99800
},
{
"epoch": 969.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.375247478485107,
"eval_runtime": 4.6323,
"eval_samples_per_second": 62.82,
"eval_steps_per_second": 4.102,
"step": 99807
},
{
"epoch": 969.9,
"learning_rate": 6.019417475728156e-07,
"loss": 0.0735,
"step": 99900
},
{
"epoch": 970.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.372783660888672,
"eval_runtime": 4.3885,
"eval_samples_per_second": 66.31,
"eval_steps_per_second": 4.33,
"step": 99910
},
{
"epoch": 970.87,
"learning_rate": 5.825242718446603e-07,
"loss": 0.0777,
"step": 100000
},
{
"epoch": 971.0,
"eval_accuracy": 0.29553264604810997,
"eval_loss": 5.368955612182617,
"eval_runtime": 4.4012,
"eval_samples_per_second": 66.119,
"eval_steps_per_second": 4.317,
"step": 100013
},
{
"epoch": 971.84,
"learning_rate": 5.631067961165049e-07,
"loss": 0.0844,
"step": 100100
},
{
"epoch": 972.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.3782057762146,
"eval_runtime": 4.3662,
"eval_samples_per_second": 66.649,
"eval_steps_per_second": 4.352,
"step": 100116
},
{
"epoch": 972.82,
"learning_rate": 5.436893203883496e-07,
"loss": 0.0758,
"step": 100200
},
{
"epoch": 973.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.382194995880127,
"eval_runtime": 4.3834,
"eval_samples_per_second": 66.388,
"eval_steps_per_second": 4.335,
"step": 100219
},
{
"epoch": 973.79,
"learning_rate": 5.242718446601942e-07,
"loss": 0.0735,
"step": 100300
},
{
"epoch": 974.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.389287948608398,
"eval_runtime": 4.3644,
"eval_samples_per_second": 66.676,
"eval_steps_per_second": 4.353,
"step": 100322
},
{
"epoch": 974.76,
"learning_rate": 5.048543689320388e-07,
"loss": 0.0698,
"step": 100400
},
{
"epoch": 975.0,
"eval_accuracy": 0.281786941580756,
"eval_loss": 5.388708591461182,
"eval_runtime": 4.394,
"eval_samples_per_second": 66.227,
"eval_steps_per_second": 4.324,
"step": 100425
},
{
"epoch": 975.73,
"learning_rate": 4.854368932038835e-07,
"loss": 0.0773,
"step": 100500
},
{
"epoch": 976.0,
"eval_accuracy": 0.2852233676975945,
"eval_loss": 5.390843868255615,
"eval_runtime": 4.8733,
"eval_samples_per_second": 59.713,
"eval_steps_per_second": 3.899,
"step": 100528
},
{
"epoch": 976.7,
"learning_rate": 4.660194174757282e-07,
"loss": 0.0695,
"step": 100600
},
{
"epoch": 977.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.390900135040283,
"eval_runtime": 4.418,
"eval_samples_per_second": 65.867,
"eval_steps_per_second": 4.301,
"step": 100631
},
{
"epoch": 977.67,
"learning_rate": 4.4660194174757285e-07,
"loss": 0.0786,
"step": 100700
},
{
"epoch": 978.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.393945693969727,
"eval_runtime": 4.4024,
"eval_samples_per_second": 66.1,
"eval_steps_per_second": 4.316,
"step": 100734
},
{
"epoch": 978.64,
"learning_rate": 4.271844660194175e-07,
"loss": 0.0784,
"step": 100800
},
{
"epoch": 979.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.383818626403809,
"eval_runtime": 4.4523,
"eval_samples_per_second": 65.36,
"eval_steps_per_second": 4.267,
"step": 100837
},
{
"epoch": 979.61,
"learning_rate": 4.0776699029126217e-07,
"loss": 0.078,
"step": 100900
},
{
"epoch": 980.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.389102458953857,
"eval_runtime": 4.4077,
"eval_samples_per_second": 66.02,
"eval_steps_per_second": 4.311,
"step": 100940
},
{
"epoch": 980.58,
"learning_rate": 3.883495145631068e-07,
"loss": 0.0721,
"step": 101000
},
{
"epoch": 981.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.387507915496826,
"eval_runtime": 4.4006,
"eval_samples_per_second": 66.128,
"eval_steps_per_second": 4.318,
"step": 101043
},
{
"epoch": 981.55,
"learning_rate": 3.689320388349515e-07,
"loss": 0.0779,
"step": 101100
},
{
"epoch": 982.0,
"eval_accuracy": 0.28865979381443296,
"eval_loss": 5.392478942871094,
"eval_runtime": 4.4664,
"eval_samples_per_second": 65.153,
"eval_steps_per_second": 4.254,
"step": 101146
},
{
"epoch": 982.52,
"learning_rate": 3.4951456310679613e-07,
"loss": 0.0706,
"step": 101200
},
{
"epoch": 983.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.400639057159424,
"eval_runtime": 4.4109,
"eval_samples_per_second": 65.973,
"eval_steps_per_second": 4.308,
"step": 101249
},
{
"epoch": 983.5,
"learning_rate": 3.300970873786408e-07,
"loss": 0.0808,
"step": 101300
},
{
"epoch": 984.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.402230262756348,
"eval_runtime": 4.4104,
"eval_samples_per_second": 65.98,
"eval_steps_per_second": 4.308,
"step": 101352
},
{
"epoch": 984.47,
"learning_rate": 3.1067961165048546e-07,
"loss": 0.071,
"step": 101400
},
{
"epoch": 985.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4075798988342285,
"eval_runtime": 4.4611,
"eval_samples_per_second": 65.23,
"eval_steps_per_second": 4.259,
"step": 101455
},
{
"epoch": 985.44,
"learning_rate": 2.9126213592233014e-07,
"loss": 0.0743,
"step": 101500
},
{
"epoch": 986.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4103875160217285,
"eval_runtime": 4.5223,
"eval_samples_per_second": 64.348,
"eval_steps_per_second": 4.201,
"step": 101558
},
{
"epoch": 986.41,
"learning_rate": 2.718446601941748e-07,
"loss": 0.0784,
"step": 101600
},
{
"epoch": 987.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.409285545349121,
"eval_runtime": 4.454,
"eval_samples_per_second": 65.335,
"eval_steps_per_second": 4.266,
"step": 101661
},
{
"epoch": 987.38,
"learning_rate": 2.524271844660194e-07,
"loss": 0.0793,
"step": 101700
},
{
"epoch": 988.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.407143592834473,
"eval_runtime": 4.4102,
"eval_samples_per_second": 65.983,
"eval_steps_per_second": 4.308,
"step": 101764
},
{
"epoch": 988.35,
"learning_rate": 2.330097087378641e-07,
"loss": 0.0838,
"step": 101800
},
{
"epoch": 989.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.402917385101318,
"eval_runtime": 4.4226,
"eval_samples_per_second": 65.798,
"eval_steps_per_second": 4.296,
"step": 101867
},
{
"epoch": 989.32,
"learning_rate": 2.1359223300970874e-07,
"loss": 0.0708,
"step": 101900
},
{
"epoch": 990.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4035491943359375,
"eval_runtime": 4.4171,
"eval_samples_per_second": 65.88,
"eval_steps_per_second": 4.301,
"step": 101970
},
{
"epoch": 990.29,
"learning_rate": 1.941747572815534e-07,
"loss": 0.0742,
"step": 102000
},
{
"epoch": 991.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4020819664001465,
"eval_runtime": 4.427,
"eval_samples_per_second": 65.733,
"eval_steps_per_second": 4.292,
"step": 102073
},
{
"epoch": 991.26,
"learning_rate": 1.7475728155339807e-07,
"loss": 0.0746,
"step": 102100
},
{
"epoch": 992.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.404983043670654,
"eval_runtime": 4.4417,
"eval_samples_per_second": 65.515,
"eval_steps_per_second": 4.278,
"step": 102176
},
{
"epoch": 992.23,
"learning_rate": 1.5533980582524273e-07,
"loss": 0.0756,
"step": 102200
},
{
"epoch": 993.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.405885219573975,
"eval_runtime": 4.413,
"eval_samples_per_second": 65.942,
"eval_steps_per_second": 4.306,
"step": 102279
},
{
"epoch": 993.2,
"learning_rate": 1.359223300970874e-07,
"loss": 0.0744,
"step": 102300
},
{
"epoch": 994.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.405316352844238,
"eval_runtime": 4.5201,
"eval_samples_per_second": 64.379,
"eval_steps_per_second": 4.203,
"step": 102382
},
{
"epoch": 994.17,
"learning_rate": 1.1650485436893205e-07,
"loss": 0.0741,
"step": 102400
},
{
"epoch": 995.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.407505512237549,
"eval_runtime": 4.4833,
"eval_samples_per_second": 64.907,
"eval_steps_per_second": 4.238,
"step": 102485
},
{
"epoch": 995.15,
"learning_rate": 9.70873786407767e-08,
"loss": 0.0757,
"step": 102500
},
{
"epoch": 996.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4071760177612305,
"eval_runtime": 4.455,
"eval_samples_per_second": 65.32,
"eval_steps_per_second": 4.265,
"step": 102588
},
{
"epoch": 996.12,
"learning_rate": 7.766990291262136e-08,
"loss": 0.0735,
"step": 102600
},
{
"epoch": 997.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.408614635467529,
"eval_runtime": 4.3981,
"eval_samples_per_second": 66.165,
"eval_steps_per_second": 4.32,
"step": 102691
},
{
"epoch": 997.09,
"learning_rate": 5.8252427184466026e-08,
"loss": 0.0708,
"step": 102700
},
{
"epoch": 998.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.4088239669799805,
"eval_runtime": 4.3935,
"eval_samples_per_second": 66.234,
"eval_steps_per_second": 4.325,
"step": 102794
},
{
"epoch": 998.06,
"learning_rate": 3.883495145631068e-08,
"loss": 0.0812,
"step": 102800
},
{
"epoch": 999.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.408839702606201,
"eval_runtime": 4.4716,
"eval_samples_per_second": 65.077,
"eval_steps_per_second": 4.249,
"step": 102897
},
{
"epoch": 999.03,
"learning_rate": 1.941747572815534e-08,
"loss": 0.0746,
"step": 102900
},
{
"epoch": 1000.0,
"learning_rate": 0.0,
"loss": 0.0722,
"step": 103000
},
{
"epoch": 1000.0,
"eval_accuracy": 0.2920962199312715,
"eval_loss": 5.409001350402832,
"eval_runtime": 4.4131,
"eval_samples_per_second": 65.94,
"eval_steps_per_second": 4.305,
"step": 103000
},
{
"epoch": 1000.0,
"step": 103000,
"total_flos": 1.2743565272137728e+20,
"train_loss": 0.18406761223135643,
"train_runtime": 65091.9872,
"train_samples_per_second": 25.257,
"train_steps_per_second": 1.582
}
],
"max_steps": 103000,
"num_train_epochs": 1000,
"total_flos": 1.2743565272137728e+20,
"trial_name": null,
"trial_params": null
}