| { |
| "best_metric": 2.623465061187744, |
| "best_model_checkpoint": "./game-ad-0306_outputs/checkpoint-2266", |
| "epoch": 1000.0, |
| "global_step": 103000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.9980582524271846e-05, |
| "loss": 3.2891, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.21649484536082475, |
| "eval_loss": 3.026599884033203, |
| "eval_runtime": 4.3891, |
| "eval_samples_per_second": 66.301, |
| "eval_steps_per_second": 4.329, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.996116504854369e-05, |
| "loss": 2.9971, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.23024054982817868, |
| "eval_loss": 2.9193508625030518, |
| "eval_runtime": 4.4108, |
| "eval_samples_per_second": 65.974, |
| "eval_steps_per_second": 4.308, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.9941747572815535e-05, |
| "loss": 2.9151, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.24742268041237114, |
| "eval_loss": 2.873065948486328, |
| "eval_runtime": 4.3961, |
| "eval_samples_per_second": 66.196, |
| "eval_steps_per_second": 4.322, |
| "step": 309 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 1.992233009708738e-05, |
| "loss": 2.8579, |
| "step": 400 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.8072028160095215, |
| "eval_runtime": 4.4479, |
| "eval_samples_per_second": 65.424, |
| "eval_steps_per_second": 4.272, |
| "step": 412 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 1.9902912621359225e-05, |
| "loss": 2.7768, |
| "step": 500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.25773195876288657, |
| "eval_loss": 2.7917871475219727, |
| "eval_runtime": 4.4494, |
| "eval_samples_per_second": 65.402, |
| "eval_steps_per_second": 4.27, |
| "step": 515 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 1.988349514563107e-05, |
| "loss": 2.7184, |
| "step": 600 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 2.7295849323272705, |
| "eval_runtime": 4.356, |
| "eval_samples_per_second": 66.805, |
| "eval_steps_per_second": 4.362, |
| "step": 618 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 1.9864077669902914e-05, |
| "loss": 2.648, |
| "step": 700 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.7044482231140137, |
| "eval_runtime": 4.3613, |
| "eval_samples_per_second": 66.723, |
| "eval_steps_per_second": 4.356, |
| "step": 721 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 1.9844660194174758e-05, |
| "loss": 2.5884, |
| "step": 800 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 2.7190284729003906, |
| "eval_runtime": 4.4514, |
| "eval_samples_per_second": 65.372, |
| "eval_steps_per_second": 4.268, |
| "step": 824 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 1.9825242718446603e-05, |
| "loss": 2.5146, |
| "step": 900 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 2.694195508956909, |
| "eval_runtime": 4.3642, |
| "eval_samples_per_second": 66.679, |
| "eval_steps_per_second": 4.354, |
| "step": 927 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 1.9805825242718447e-05, |
| "loss": 2.4384, |
| "step": 1000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.687737464904785, |
| "eval_runtime": 4.3751, |
| "eval_samples_per_second": 66.513, |
| "eval_steps_per_second": 4.343, |
| "step": 1030 |
| }, |
| { |
| "epoch": 10.68, |
| "learning_rate": 1.9786407766990292e-05, |
| "loss": 2.442, |
| "step": 1100 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 2.6412112712860107, |
| "eval_runtime": 4.4431, |
| "eval_samples_per_second": 65.495, |
| "eval_steps_per_second": 4.276, |
| "step": 1133 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 1.9766990291262137e-05, |
| "loss": 2.3099, |
| "step": 1200 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 2.6331019401550293, |
| "eval_runtime": 4.3972, |
| "eval_samples_per_second": 66.178, |
| "eval_steps_per_second": 4.321, |
| "step": 1236 |
| }, |
| { |
| "epoch": 12.62, |
| "learning_rate": 1.974757281553398e-05, |
| "loss": 2.2685, |
| "step": 1300 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 2.64509916305542, |
| "eval_runtime": 4.38, |
| "eval_samples_per_second": 66.438, |
| "eval_steps_per_second": 4.338, |
| "step": 1339 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 1.972815533980583e-05, |
| "loss": 2.182, |
| "step": 1400 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.692749500274658, |
| "eval_runtime": 4.4055, |
| "eval_samples_per_second": 66.053, |
| "eval_steps_per_second": 4.313, |
| "step": 1442 |
| }, |
| { |
| "epoch": 14.56, |
| "learning_rate": 1.970873786407767e-05, |
| "loss": 2.1421, |
| "step": 1500 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 2.661494016647339, |
| "eval_runtime": 4.3695, |
| "eval_samples_per_second": 66.599, |
| "eval_steps_per_second": 4.348, |
| "step": 1545 |
| }, |
| { |
| "epoch": 15.53, |
| "learning_rate": 1.9689320388349515e-05, |
| "loss": 2.0483, |
| "step": 1600 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 2.6499741077423096, |
| "eval_runtime": 4.3729, |
| "eval_samples_per_second": 66.546, |
| "eval_steps_per_second": 4.345, |
| "step": 1648 |
| }, |
| { |
| "epoch": 16.5, |
| "learning_rate": 1.9669902912621363e-05, |
| "loss": 1.9884, |
| "step": 1700 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 2.6526920795440674, |
| "eval_runtime": 4.4576, |
| "eval_samples_per_second": 65.282, |
| "eval_steps_per_second": 4.262, |
| "step": 1751 |
| }, |
| { |
| "epoch": 17.48, |
| "learning_rate": 1.9650485436893204e-05, |
| "loss": 1.9316, |
| "step": 1800 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 2.673600435256958, |
| "eval_runtime": 4.3873, |
| "eval_samples_per_second": 66.328, |
| "eval_steps_per_second": 4.331, |
| "step": 1854 |
| }, |
| { |
| "epoch": 18.45, |
| "learning_rate": 1.9631067961165052e-05, |
| "loss": 1.8785, |
| "step": 1900 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.639138698577881, |
| "eval_runtime": 4.4001, |
| "eval_samples_per_second": 66.135, |
| "eval_steps_per_second": 4.318, |
| "step": 1957 |
| }, |
| { |
| "epoch": 19.42, |
| "learning_rate": 1.9611650485436893e-05, |
| "loss": 1.788, |
| "step": 2000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 2.7002217769622803, |
| "eval_runtime": 4.3688, |
| "eval_samples_per_second": 66.609, |
| "eval_steps_per_second": 4.349, |
| "step": 2060 |
| }, |
| { |
| "epoch": 20.39, |
| "learning_rate": 1.9592233009708738e-05, |
| "loss": 1.7115, |
| "step": 2100 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.832120180130005, |
| "eval_runtime": 4.3608, |
| "eval_samples_per_second": 66.731, |
| "eval_steps_per_second": 4.357, |
| "step": 2163 |
| }, |
| { |
| "epoch": 21.36, |
| "learning_rate": 1.9572815533980586e-05, |
| "loss": 1.6929, |
| "step": 2200 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 2.623465061187744, |
| "eval_runtime": 4.3818, |
| "eval_samples_per_second": 66.411, |
| "eval_steps_per_second": 4.336, |
| "step": 2266 |
| }, |
| { |
| "epoch": 22.33, |
| "learning_rate": 1.9553398058252427e-05, |
| "loss": 1.6239, |
| "step": 2300 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 2.6378397941589355, |
| "eval_runtime": 4.3841, |
| "eval_samples_per_second": 66.377, |
| "eval_steps_per_second": 4.334, |
| "step": 2369 |
| }, |
| { |
| "epoch": 23.3, |
| "learning_rate": 1.9533980582524275e-05, |
| "loss": 1.5387, |
| "step": 2400 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 2.688793659210205, |
| "eval_runtime": 4.3688, |
| "eval_samples_per_second": 66.609, |
| "eval_steps_per_second": 4.349, |
| "step": 2472 |
| }, |
| { |
| "epoch": 24.27, |
| "learning_rate": 1.951456310679612e-05, |
| "loss": 1.5095, |
| "step": 2500 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 2.688781261444092, |
| "eval_runtime": 4.374, |
| "eval_samples_per_second": 66.53, |
| "eval_steps_per_second": 4.344, |
| "step": 2575 |
| }, |
| { |
| "epoch": 25.24, |
| "learning_rate": 1.949514563106796e-05, |
| "loss": 1.4153, |
| "step": 2600 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.677133560180664, |
| "eval_runtime": 4.3662, |
| "eval_samples_per_second": 66.648, |
| "eval_steps_per_second": 4.352, |
| "step": 2678 |
| }, |
| { |
| "epoch": 26.21, |
| "learning_rate": 1.947572815533981e-05, |
| "loss": 1.4254, |
| "step": 2700 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 2.7354466915130615, |
| "eval_runtime": 4.4258, |
| "eval_samples_per_second": 65.751, |
| "eval_steps_per_second": 4.293, |
| "step": 2781 |
| }, |
| { |
| "epoch": 27.18, |
| "learning_rate": 1.9456310679611653e-05, |
| "loss": 1.3351, |
| "step": 2800 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 2.7175216674804688, |
| "eval_runtime": 4.4547, |
| "eval_samples_per_second": 65.325, |
| "eval_steps_per_second": 4.265, |
| "step": 2884 |
| }, |
| { |
| "epoch": 28.16, |
| "learning_rate": 1.9436893203883495e-05, |
| "loss": 1.2955, |
| "step": 2900 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 2.767915725708008, |
| "eval_runtime": 4.4025, |
| "eval_samples_per_second": 66.099, |
| "eval_steps_per_second": 4.316, |
| "step": 2987 |
| }, |
| { |
| "epoch": 29.13, |
| "learning_rate": 1.9417475728155343e-05, |
| "loss": 1.2232, |
| "step": 3000 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.7784156799316406, |
| "eval_runtime": 4.4307, |
| "eval_samples_per_second": 65.678, |
| "eval_steps_per_second": 4.288, |
| "step": 3090 |
| }, |
| { |
| "epoch": 30.1, |
| "learning_rate": 1.9398058252427187e-05, |
| "loss": 1.2115, |
| "step": 3100 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 2.8495712280273438, |
| "eval_runtime": 4.3998, |
| "eval_samples_per_second": 66.139, |
| "eval_steps_per_second": 4.318, |
| "step": 3193 |
| }, |
| { |
| "epoch": 31.07, |
| "learning_rate": 1.937864077669903e-05, |
| "loss": 1.1656, |
| "step": 3200 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 2.7899186611175537, |
| "eval_runtime": 4.3536, |
| "eval_samples_per_second": 66.842, |
| "eval_steps_per_second": 4.364, |
| "step": 3296 |
| }, |
| { |
| "epoch": 32.04, |
| "learning_rate": 1.9359223300970876e-05, |
| "loss": 1.1419, |
| "step": 3300 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.7646260261535645, |
| "eval_runtime": 4.387, |
| "eval_samples_per_second": 66.332, |
| "eval_steps_per_second": 4.331, |
| "step": 3399 |
| }, |
| { |
| "epoch": 33.01, |
| "learning_rate": 1.9339805825242717e-05, |
| "loss": 1.0743, |
| "step": 3400 |
| }, |
| { |
| "epoch": 33.98, |
| "learning_rate": 1.9320388349514565e-05, |
| "loss": 1.0481, |
| "step": 3500 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.8416407108306885, |
| "eval_runtime": 4.4404, |
| "eval_samples_per_second": 65.535, |
| "eval_steps_per_second": 4.279, |
| "step": 3502 |
| }, |
| { |
| "epoch": 34.95, |
| "learning_rate": 1.930097087378641e-05, |
| "loss": 0.9763, |
| "step": 3600 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 2.8369979858398438, |
| "eval_runtime": 4.3776, |
| "eval_samples_per_second": 66.474, |
| "eval_steps_per_second": 4.34, |
| "step": 3605 |
| }, |
| { |
| "epoch": 35.92, |
| "learning_rate": 1.9281553398058255e-05, |
| "loss": 0.9452, |
| "step": 3700 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 2.7903831005096436, |
| "eval_runtime": 4.4202, |
| "eval_samples_per_second": 65.833, |
| "eval_steps_per_second": 4.298, |
| "step": 3708 |
| }, |
| { |
| "epoch": 36.89, |
| "learning_rate": 1.92621359223301e-05, |
| "loss": 0.9178, |
| "step": 3800 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 2.830864191055298, |
| "eval_runtime": 4.3724, |
| "eval_samples_per_second": 66.554, |
| "eval_steps_per_second": 4.345, |
| "step": 3811 |
| }, |
| { |
| "epoch": 37.86, |
| "learning_rate": 1.9242718446601944e-05, |
| "loss": 0.9115, |
| "step": 3900 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 2.858407735824585, |
| "eval_runtime": 4.4507, |
| "eval_samples_per_second": 65.382, |
| "eval_steps_per_second": 4.269, |
| "step": 3914 |
| }, |
| { |
| "epoch": 38.83, |
| "learning_rate": 1.922330097087379e-05, |
| "loss": 0.8472, |
| "step": 4000 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 2.906602382659912, |
| "eval_runtime": 4.3684, |
| "eval_samples_per_second": 66.615, |
| "eval_steps_per_second": 4.349, |
| "step": 4017 |
| }, |
| { |
| "epoch": 39.81, |
| "learning_rate": 1.9203883495145633e-05, |
| "loss": 0.8323, |
| "step": 4100 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.862963914871216, |
| "eval_runtime": 4.3894, |
| "eval_samples_per_second": 66.295, |
| "eval_steps_per_second": 4.329, |
| "step": 4120 |
| }, |
| { |
| "epoch": 40.78, |
| "learning_rate": 1.9184466019417478e-05, |
| "loss": 0.7622, |
| "step": 4200 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 3.0019726753234863, |
| "eval_runtime": 4.3977, |
| "eval_samples_per_second": 66.17, |
| "eval_steps_per_second": 4.32, |
| "step": 4223 |
| }, |
| { |
| "epoch": 41.75, |
| "learning_rate": 1.9165048543689322e-05, |
| "loss": 0.7531, |
| "step": 4300 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.88852596282959, |
| "eval_runtime": 4.3979, |
| "eval_samples_per_second": 66.168, |
| "eval_steps_per_second": 4.32, |
| "step": 4326 |
| }, |
| { |
| "epoch": 42.72, |
| "learning_rate": 1.9145631067961167e-05, |
| "loss": 0.7054, |
| "step": 4400 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 2.882045269012451, |
| "eval_runtime": 4.4212, |
| "eval_samples_per_second": 65.82, |
| "eval_steps_per_second": 4.298, |
| "step": 4429 |
| }, |
| { |
| "epoch": 43.69, |
| "learning_rate": 1.912621359223301e-05, |
| "loss": 0.685, |
| "step": 4500 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 2.8763577938079834, |
| "eval_runtime": 4.4513, |
| "eval_samples_per_second": 65.374, |
| "eval_steps_per_second": 4.268, |
| "step": 4532 |
| }, |
| { |
| "epoch": 44.66, |
| "learning_rate": 1.9106796116504856e-05, |
| "loss": 0.7206, |
| "step": 4600 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 2.8658735752105713, |
| "eval_runtime": 4.3654, |
| "eval_samples_per_second": 66.66, |
| "eval_steps_per_second": 4.352, |
| "step": 4635 |
| }, |
| { |
| "epoch": 45.63, |
| "learning_rate": 1.90873786407767e-05, |
| "loss": 0.6304, |
| "step": 4700 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 2.953686237335205, |
| "eval_runtime": 4.5626, |
| "eval_samples_per_second": 63.78, |
| "eval_steps_per_second": 4.164, |
| "step": 4738 |
| }, |
| { |
| "epoch": 46.6, |
| "learning_rate": 1.9067961165048545e-05, |
| "loss": 0.6369, |
| "step": 4800 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 2.9659738540649414, |
| "eval_runtime": 4.4308, |
| "eval_samples_per_second": 65.677, |
| "eval_steps_per_second": 4.288, |
| "step": 4841 |
| }, |
| { |
| "epoch": 47.57, |
| "learning_rate": 1.904854368932039e-05, |
| "loss": 0.6161, |
| "step": 4900 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 3.1111767292022705, |
| "eval_runtime": 4.3981, |
| "eval_samples_per_second": 66.165, |
| "eval_steps_per_second": 4.32, |
| "step": 4944 |
| }, |
| { |
| "epoch": 48.54, |
| "learning_rate": 1.9029126213592234e-05, |
| "loss": 0.618, |
| "step": 5000 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 2.9729325771331787, |
| "eval_runtime": 4.4073, |
| "eval_samples_per_second": 66.027, |
| "eval_steps_per_second": 4.311, |
| "step": 5047 |
| }, |
| { |
| "epoch": 49.51, |
| "learning_rate": 1.900970873786408e-05, |
| "loss": 0.556, |
| "step": 5100 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 2.986999273300171, |
| "eval_runtime": 4.3872, |
| "eval_samples_per_second": 66.33, |
| "eval_steps_per_second": 4.331, |
| "step": 5150 |
| }, |
| { |
| "epoch": 50.49, |
| "learning_rate": 1.8990291262135923e-05, |
| "loss": 0.5314, |
| "step": 5200 |
| }, |
| { |
| "epoch": 51.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 2.993405342102051, |
| "eval_runtime": 4.3709, |
| "eval_samples_per_second": 66.577, |
| "eval_steps_per_second": 4.347, |
| "step": 5253 |
| }, |
| { |
| "epoch": 51.46, |
| "learning_rate": 1.8970873786407768e-05, |
| "loss": 0.5502, |
| "step": 5300 |
| }, |
| { |
| "epoch": 52.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 2.937934160232544, |
| "eval_runtime": 4.3519, |
| "eval_samples_per_second": 66.868, |
| "eval_steps_per_second": 4.366, |
| "step": 5356 |
| }, |
| { |
| "epoch": 52.43, |
| "learning_rate": 1.8951456310679613e-05, |
| "loss": 0.4958, |
| "step": 5400 |
| }, |
| { |
| "epoch": 53.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 3.0344159603118896, |
| "eval_runtime": 4.3703, |
| "eval_samples_per_second": 66.585, |
| "eval_steps_per_second": 4.347, |
| "step": 5459 |
| }, |
| { |
| "epoch": 53.4, |
| "learning_rate": 1.8932038834951457e-05, |
| "loss": 0.4896, |
| "step": 5500 |
| }, |
| { |
| "epoch": 54.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 2.9924163818359375, |
| "eval_runtime": 4.5316, |
| "eval_samples_per_second": 64.215, |
| "eval_steps_per_second": 4.193, |
| "step": 5562 |
| }, |
| { |
| "epoch": 54.37, |
| "learning_rate": 1.89126213592233e-05, |
| "loss": 0.4803, |
| "step": 5600 |
| }, |
| { |
| "epoch": 55.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 3.0161025524139404, |
| "eval_runtime": 4.5126, |
| "eval_samples_per_second": 64.486, |
| "eval_steps_per_second": 4.21, |
| "step": 5665 |
| }, |
| { |
| "epoch": 55.34, |
| "learning_rate": 1.889320388349515e-05, |
| "loss": 0.4554, |
| "step": 5700 |
| }, |
| { |
| "epoch": 56.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.0220870971679688, |
| "eval_runtime": 4.3658, |
| "eval_samples_per_second": 66.654, |
| "eval_steps_per_second": 4.352, |
| "step": 5768 |
| }, |
| { |
| "epoch": 56.31, |
| "learning_rate": 1.887378640776699e-05, |
| "loss": 0.4591, |
| "step": 5800 |
| }, |
| { |
| "epoch": 57.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 3.0460500717163086, |
| "eval_runtime": 4.3815, |
| "eval_samples_per_second": 66.415, |
| "eval_steps_per_second": 4.336, |
| "step": 5871 |
| }, |
| { |
| "epoch": 57.28, |
| "learning_rate": 1.8854368932038835e-05, |
| "loss": 0.4349, |
| "step": 5900 |
| }, |
| { |
| "epoch": 58.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 3.137669801712036, |
| "eval_runtime": 4.3638, |
| "eval_samples_per_second": 66.685, |
| "eval_steps_per_second": 4.354, |
| "step": 5974 |
| }, |
| { |
| "epoch": 58.25, |
| "learning_rate": 1.883495145631068e-05, |
| "loss": 0.4127, |
| "step": 6000 |
| }, |
| { |
| "epoch": 59.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.0168519020080566, |
| "eval_runtime": 4.3582, |
| "eval_samples_per_second": 66.771, |
| "eval_steps_per_second": 4.36, |
| "step": 6077 |
| }, |
| { |
| "epoch": 59.22, |
| "learning_rate": 1.8815533980582525e-05, |
| "loss": 0.3973, |
| "step": 6100 |
| }, |
| { |
| "epoch": 60.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.0337910652160645, |
| "eval_runtime": 4.4091, |
| "eval_samples_per_second": 66.001, |
| "eval_steps_per_second": 4.309, |
| "step": 6180 |
| }, |
| { |
| "epoch": 60.19, |
| "learning_rate": 1.8796116504854373e-05, |
| "loss": 0.4109, |
| "step": 6200 |
| }, |
| { |
| "epoch": 61.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.063812255859375, |
| "eval_runtime": 4.5298, |
| "eval_samples_per_second": 64.241, |
| "eval_steps_per_second": 4.194, |
| "step": 6283 |
| }, |
| { |
| "epoch": 61.17, |
| "learning_rate": 1.8776699029126214e-05, |
| "loss": 0.3872, |
| "step": 6300 |
| }, |
| { |
| "epoch": 62.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.0810182094573975, |
| "eval_runtime": 4.5282, |
| "eval_samples_per_second": 64.264, |
| "eval_steps_per_second": 4.196, |
| "step": 6386 |
| }, |
| { |
| "epoch": 62.14, |
| "learning_rate": 1.875728155339806e-05, |
| "loss": 0.3693, |
| "step": 6400 |
| }, |
| { |
| "epoch": 63.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 3.2002737522125244, |
| "eval_runtime": 4.4521, |
| "eval_samples_per_second": 65.362, |
| "eval_steps_per_second": 4.268, |
| "step": 6489 |
| }, |
| { |
| "epoch": 63.11, |
| "learning_rate": 1.8737864077669906e-05, |
| "loss": 0.3457, |
| "step": 6500 |
| }, |
| { |
| "epoch": 64.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 3.0842859745025635, |
| "eval_runtime": 4.4307, |
| "eval_samples_per_second": 65.677, |
| "eval_steps_per_second": 4.288, |
| "step": 6592 |
| }, |
| { |
| "epoch": 64.08, |
| "learning_rate": 1.8718446601941747e-05, |
| "loss": 0.3521, |
| "step": 6600 |
| }, |
| { |
| "epoch": 65.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 3.1622819900512695, |
| "eval_runtime": 4.433, |
| "eval_samples_per_second": 65.644, |
| "eval_steps_per_second": 4.286, |
| "step": 6695 |
| }, |
| { |
| "epoch": 65.05, |
| "learning_rate": 1.8699029126213595e-05, |
| "loss": 0.3625, |
| "step": 6700 |
| }, |
| { |
| "epoch": 66.0, |
| "eval_accuracy": 0.32989690721649484, |
| "eval_loss": 3.003610372543335, |
| "eval_runtime": 4.5052, |
| "eval_samples_per_second": 64.592, |
| "eval_steps_per_second": 4.217, |
| "step": 6798 |
| }, |
| { |
| "epoch": 66.02, |
| "learning_rate": 1.867961165048544e-05, |
| "loss": 0.3746, |
| "step": 6800 |
| }, |
| { |
| "epoch": 66.99, |
| "learning_rate": 1.866019417475728e-05, |
| "loss": 0.3339, |
| "step": 6900 |
| }, |
| { |
| "epoch": 67.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.2389235496520996, |
| "eval_runtime": 4.4738, |
| "eval_samples_per_second": 65.046, |
| "eval_steps_per_second": 4.247, |
| "step": 6901 |
| }, |
| { |
| "epoch": 67.96, |
| "learning_rate": 1.864077669902913e-05, |
| "loss": 0.3378, |
| "step": 7000 |
| }, |
| { |
| "epoch": 68.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 3.249319076538086, |
| "eval_runtime": 4.4083, |
| "eval_samples_per_second": 66.011, |
| "eval_steps_per_second": 4.31, |
| "step": 7004 |
| }, |
| { |
| "epoch": 68.93, |
| "learning_rate": 1.8621359223300974e-05, |
| "loss": 0.2981, |
| "step": 7100 |
| }, |
| { |
| "epoch": 69.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.130829334259033, |
| "eval_runtime": 4.4429, |
| "eval_samples_per_second": 65.498, |
| "eval_steps_per_second": 4.277, |
| "step": 7107 |
| }, |
| { |
| "epoch": 69.9, |
| "learning_rate": 1.860194174757282e-05, |
| "loss": 0.3023, |
| "step": 7200 |
| }, |
| { |
| "epoch": 70.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.2455456256866455, |
| "eval_runtime": 4.4181, |
| "eval_samples_per_second": 65.866, |
| "eval_steps_per_second": 4.301, |
| "step": 7210 |
| }, |
| { |
| "epoch": 70.87, |
| "learning_rate": 1.8582524271844663e-05, |
| "loss": 0.3076, |
| "step": 7300 |
| }, |
| { |
| "epoch": 71.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 3.27248477935791, |
| "eval_runtime": 4.418, |
| "eval_samples_per_second": 65.867, |
| "eval_steps_per_second": 4.301, |
| "step": 7313 |
| }, |
| { |
| "epoch": 71.84, |
| "learning_rate": 1.8563106796116504e-05, |
| "loss": 0.3201, |
| "step": 7400 |
| }, |
| { |
| "epoch": 72.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 3.2563161849975586, |
| "eval_runtime": 4.3553, |
| "eval_samples_per_second": 66.814, |
| "eval_steps_per_second": 4.362, |
| "step": 7416 |
| }, |
| { |
| "epoch": 72.82, |
| "learning_rate": 1.8543689320388352e-05, |
| "loss": 0.3083, |
| "step": 7500 |
| }, |
| { |
| "epoch": 73.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.252042055130005, |
| "eval_runtime": 4.4514, |
| "eval_samples_per_second": 65.372, |
| "eval_steps_per_second": 4.268, |
| "step": 7519 |
| }, |
| { |
| "epoch": 73.79, |
| "learning_rate": 1.8524271844660197e-05, |
| "loss": 0.2906, |
| "step": 7600 |
| }, |
| { |
| "epoch": 74.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.3343799114227295, |
| "eval_runtime": 4.3733, |
| "eval_samples_per_second": 66.54, |
| "eval_steps_per_second": 4.345, |
| "step": 7622 |
| }, |
| { |
| "epoch": 74.76, |
| "learning_rate": 1.850485436893204e-05, |
| "loss": 0.2721, |
| "step": 7700 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 3.1951873302459717, |
| "eval_runtime": 4.376, |
| "eval_samples_per_second": 66.499, |
| "eval_steps_per_second": 4.342, |
| "step": 7725 |
| }, |
| { |
| "epoch": 75.73, |
| "learning_rate": 1.8485436893203886e-05, |
| "loss": 0.2873, |
| "step": 7800 |
| }, |
| { |
| "epoch": 76.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 3.2528605461120605, |
| "eval_runtime": 4.3937, |
| "eval_samples_per_second": 66.231, |
| "eval_steps_per_second": 4.324, |
| "step": 7828 |
| }, |
| { |
| "epoch": 76.7, |
| "learning_rate": 1.846601941747573e-05, |
| "loss": 0.278, |
| "step": 7900 |
| }, |
| { |
| "epoch": 77.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.3427820205688477, |
| "eval_runtime": 4.393, |
| "eval_samples_per_second": 66.241, |
| "eval_steps_per_second": 4.325, |
| "step": 7931 |
| }, |
| { |
| "epoch": 77.67, |
| "learning_rate": 1.8446601941747575e-05, |
| "loss": 0.2573, |
| "step": 8000 |
| }, |
| { |
| "epoch": 78.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 3.3216073513031006, |
| "eval_runtime": 4.3997, |
| "eval_samples_per_second": 66.141, |
| "eval_steps_per_second": 4.318, |
| "step": 8034 |
| }, |
| { |
| "epoch": 78.64, |
| "learning_rate": 1.842718446601942e-05, |
| "loss": 0.2578, |
| "step": 8100 |
| }, |
| { |
| "epoch": 79.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.4177794456481934, |
| "eval_runtime": 4.3657, |
| "eval_samples_per_second": 66.656, |
| "eval_steps_per_second": 4.352, |
| "step": 8137 |
| }, |
| { |
| "epoch": 79.61, |
| "learning_rate": 1.8407766990291264e-05, |
| "loss": 0.2774, |
| "step": 8200 |
| }, |
| { |
| "epoch": 80.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.344855785369873, |
| "eval_runtime": 4.4508, |
| "eval_samples_per_second": 65.382, |
| "eval_steps_per_second": 4.269, |
| "step": 8240 |
| }, |
| { |
| "epoch": 80.58, |
| "learning_rate": 1.838834951456311e-05, |
| "loss": 0.2762, |
| "step": 8300 |
| }, |
| { |
| "epoch": 81.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 3.3451921939849854, |
| "eval_runtime": 4.3629, |
| "eval_samples_per_second": 66.699, |
| "eval_steps_per_second": 4.355, |
| "step": 8343 |
| }, |
| { |
| "epoch": 81.55, |
| "learning_rate": 1.8368932038834953e-05, |
| "loss": 0.2504, |
| "step": 8400 |
| }, |
| { |
| "epoch": 82.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.579151153564453, |
| "eval_runtime": 4.3815, |
| "eval_samples_per_second": 66.416, |
| "eval_steps_per_second": 4.336, |
| "step": 8446 |
| }, |
| { |
| "epoch": 82.52, |
| "learning_rate": 1.8349514563106798e-05, |
| "loss": 0.2552, |
| "step": 8500 |
| }, |
| { |
| "epoch": 83.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.3477821350097656, |
| "eval_runtime": 4.3706, |
| "eval_samples_per_second": 66.582, |
| "eval_steps_per_second": 4.347, |
| "step": 8549 |
| }, |
| { |
| "epoch": 83.5, |
| "learning_rate": 1.8330097087378643e-05, |
| "loss": 0.2541, |
| "step": 8600 |
| }, |
| { |
| "epoch": 84.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 3.4901580810546875, |
| "eval_runtime": 4.4137, |
| "eval_samples_per_second": 65.932, |
| "eval_steps_per_second": 4.305, |
| "step": 8652 |
| }, |
| { |
| "epoch": 84.47, |
| "learning_rate": 1.8310679611650487e-05, |
| "loss": 0.2616, |
| "step": 8700 |
| }, |
| { |
| "epoch": 85.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 3.282921552658081, |
| "eval_runtime": 4.4452, |
| "eval_samples_per_second": 65.464, |
| "eval_steps_per_second": 4.274, |
| "step": 8755 |
| }, |
| { |
| "epoch": 85.44, |
| "learning_rate": 1.8291262135922332e-05, |
| "loss": 0.2079, |
| "step": 8800 |
| }, |
| { |
| "epoch": 86.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.528667688369751, |
| "eval_runtime": 4.496, |
| "eval_samples_per_second": 64.725, |
| "eval_steps_per_second": 4.226, |
| "step": 8858 |
| }, |
| { |
| "epoch": 86.41, |
| "learning_rate": 1.8271844660194176e-05, |
| "loss": 0.2538, |
| "step": 8900 |
| }, |
| { |
| "epoch": 87.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.4730610847473145, |
| "eval_runtime": 4.4242, |
| "eval_samples_per_second": 65.774, |
| "eval_steps_per_second": 4.295, |
| "step": 8961 |
| }, |
| { |
| "epoch": 87.38, |
| "learning_rate": 1.825242718446602e-05, |
| "loss": 0.2485, |
| "step": 9000 |
| }, |
| { |
| "epoch": 88.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 3.5997567176818848, |
| "eval_runtime": 4.3773, |
| "eval_samples_per_second": 66.479, |
| "eval_steps_per_second": 4.341, |
| "step": 9064 |
| }, |
| { |
| "epoch": 88.35, |
| "learning_rate": 1.8233009708737865e-05, |
| "loss": 0.2714, |
| "step": 9100 |
| }, |
| { |
| "epoch": 89.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.4566540718078613, |
| "eval_runtime": 4.4582, |
| "eval_samples_per_second": 65.274, |
| "eval_steps_per_second": 4.262, |
| "step": 9167 |
| }, |
| { |
| "epoch": 89.32, |
| "learning_rate": 1.821359223300971e-05, |
| "loss": 0.232, |
| "step": 9200 |
| }, |
| { |
| "epoch": 90.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.5061261653900146, |
| "eval_runtime": 4.3957, |
| "eval_samples_per_second": 66.2, |
| "eval_steps_per_second": 4.322, |
| "step": 9270 |
| }, |
| { |
| "epoch": 90.29, |
| "learning_rate": 1.8194174757281555e-05, |
| "loss": 0.2577, |
| "step": 9300 |
| }, |
| { |
| "epoch": 91.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.536961317062378, |
| "eval_runtime": 4.4048, |
| "eval_samples_per_second": 66.065, |
| "eval_steps_per_second": 4.313, |
| "step": 9373 |
| }, |
| { |
| "epoch": 91.26, |
| "learning_rate": 1.81747572815534e-05, |
| "loss": 0.2232, |
| "step": 9400 |
| }, |
| { |
| "epoch": 92.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 3.5062103271484375, |
| "eval_runtime": 4.3856, |
| "eval_samples_per_second": 66.353, |
| "eval_steps_per_second": 4.332, |
| "step": 9476 |
| }, |
| { |
| "epoch": 92.23, |
| "learning_rate": 1.8155339805825244e-05, |
| "loss": 0.2351, |
| "step": 9500 |
| }, |
| { |
| "epoch": 93.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 3.559199094772339, |
| "eval_runtime": 4.3998, |
| "eval_samples_per_second": 66.14, |
| "eval_steps_per_second": 4.318, |
| "step": 9579 |
| }, |
| { |
| "epoch": 93.2, |
| "learning_rate": 1.813592233009709e-05, |
| "loss": 0.2299, |
| "step": 9600 |
| }, |
| { |
| "epoch": 94.0, |
| "eval_accuracy": 0.3333333333333333, |
| "eval_loss": 3.516669988632202, |
| "eval_runtime": 4.4819, |
| "eval_samples_per_second": 64.928, |
| "eval_steps_per_second": 4.239, |
| "step": 9682 |
| }, |
| { |
| "epoch": 94.17, |
| "learning_rate": 1.8116504854368933e-05, |
| "loss": 0.2415, |
| "step": 9700 |
| }, |
| { |
| "epoch": 95.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 3.6282804012298584, |
| "eval_runtime": 4.3633, |
| "eval_samples_per_second": 66.692, |
| "eval_steps_per_second": 4.354, |
| "step": 9785 |
| }, |
| { |
| "epoch": 95.15, |
| "learning_rate": 1.8097087378640778e-05, |
| "loss": 0.2265, |
| "step": 9800 |
| }, |
| { |
| "epoch": 96.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 3.4819419384002686, |
| "eval_runtime": 4.3709, |
| "eval_samples_per_second": 66.577, |
| "eval_steps_per_second": 4.347, |
| "step": 9888 |
| }, |
| { |
| "epoch": 96.12, |
| "learning_rate": 1.8077669902912622e-05, |
| "loss": 0.2448, |
| "step": 9900 |
| }, |
| { |
| "epoch": 97.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 3.5793020725250244, |
| "eval_runtime": 4.3824, |
| "eval_samples_per_second": 66.402, |
| "eval_steps_per_second": 4.336, |
| "step": 9991 |
| }, |
| { |
| "epoch": 97.09, |
| "learning_rate": 1.8058252427184467e-05, |
| "loss": 0.2141, |
| "step": 10000 |
| }, |
| { |
| "epoch": 98.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 3.5728020668029785, |
| "eval_runtime": 4.3625, |
| "eval_samples_per_second": 66.704, |
| "eval_steps_per_second": 4.355, |
| "step": 10094 |
| }, |
| { |
| "epoch": 98.06, |
| "learning_rate": 1.803883495145631e-05, |
| "loss": 0.1979, |
| "step": 10100 |
| }, |
| { |
| "epoch": 99.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.4685394763946533, |
| "eval_runtime": 4.4238, |
| "eval_samples_per_second": 65.781, |
| "eval_steps_per_second": 4.295, |
| "step": 10197 |
| }, |
| { |
| "epoch": 99.03, |
| "learning_rate": 1.8019417475728156e-05, |
| "loss": 0.2188, |
| "step": 10200 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 1.8e-05, |
| "loss": 0.2077, |
| "step": 10300 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 3.558551788330078, |
| "eval_runtime": 4.4141, |
| "eval_samples_per_second": 65.924, |
| "eval_steps_per_second": 4.304, |
| "step": 10300 |
| }, |
| { |
| "epoch": 100.97, |
| "learning_rate": 1.7980582524271845e-05, |
| "loss": 0.1854, |
| "step": 10400 |
| }, |
| { |
| "epoch": 101.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.5650315284729004, |
| "eval_runtime": 4.4256, |
| "eval_samples_per_second": 65.754, |
| "eval_steps_per_second": 4.293, |
| "step": 10403 |
| }, |
| { |
| "epoch": 101.94, |
| "learning_rate": 1.7961165048543693e-05, |
| "loss": 0.2017, |
| "step": 10500 |
| }, |
| { |
| "epoch": 102.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.4760477542877197, |
| "eval_runtime": 4.4327, |
| "eval_samples_per_second": 65.649, |
| "eval_steps_per_second": 4.286, |
| "step": 10506 |
| }, |
| { |
| "epoch": 102.91, |
| "learning_rate": 1.7941747572815534e-05, |
| "loss": 0.2119, |
| "step": 10600 |
| }, |
| { |
| "epoch": 103.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 3.5530800819396973, |
| "eval_runtime": 4.4147, |
| "eval_samples_per_second": 65.916, |
| "eval_steps_per_second": 4.304, |
| "step": 10609 |
| }, |
| { |
| "epoch": 103.88, |
| "learning_rate": 1.792233009708738e-05, |
| "loss": 0.2314, |
| "step": 10700 |
| }, |
| { |
| "epoch": 104.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 3.5117688179016113, |
| "eval_runtime": 4.4182, |
| "eval_samples_per_second": 65.863, |
| "eval_steps_per_second": 4.3, |
| "step": 10712 |
| }, |
| { |
| "epoch": 104.85, |
| "learning_rate": 1.7902912621359227e-05, |
| "loss": 0.212, |
| "step": 10800 |
| }, |
| { |
| "epoch": 105.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.54956316947937, |
| "eval_runtime": 4.4027, |
| "eval_samples_per_second": 66.095, |
| "eval_steps_per_second": 4.315, |
| "step": 10815 |
| }, |
| { |
| "epoch": 105.83, |
| "learning_rate": 1.7883495145631068e-05, |
| "loss": 0.197, |
| "step": 10900 |
| }, |
| { |
| "epoch": 106.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 3.607961654663086, |
| "eval_runtime": 4.4253, |
| "eval_samples_per_second": 65.758, |
| "eval_steps_per_second": 4.294, |
| "step": 10918 |
| }, |
| { |
| "epoch": 106.8, |
| "learning_rate": 1.7864077669902916e-05, |
| "loss": 0.2067, |
| "step": 11000 |
| }, |
| { |
| "epoch": 107.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 3.621704578399658, |
| "eval_runtime": 4.4372, |
| "eval_samples_per_second": 65.582, |
| "eval_steps_per_second": 4.282, |
| "step": 11021 |
| }, |
| { |
| "epoch": 107.77, |
| "learning_rate": 1.7844660194174757e-05, |
| "loss": 0.1896, |
| "step": 11100 |
| }, |
| { |
| "epoch": 108.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 3.6445584297180176, |
| "eval_runtime": 4.4137, |
| "eval_samples_per_second": 65.931, |
| "eval_steps_per_second": 4.305, |
| "step": 11124 |
| }, |
| { |
| "epoch": 108.74, |
| "learning_rate": 1.7825242718446602e-05, |
| "loss": 0.198, |
| "step": 11200 |
| }, |
| { |
| "epoch": 109.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 3.769904136657715, |
| "eval_runtime": 4.4154, |
| "eval_samples_per_second": 65.905, |
| "eval_steps_per_second": 4.303, |
| "step": 11227 |
| }, |
| { |
| "epoch": 109.71, |
| "learning_rate": 1.780582524271845e-05, |
| "loss": 0.2152, |
| "step": 11300 |
| }, |
| { |
| "epoch": 110.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.6709232330322266, |
| "eval_runtime": 4.4235, |
| "eval_samples_per_second": 65.784, |
| "eval_steps_per_second": 4.295, |
| "step": 11330 |
| }, |
| { |
| "epoch": 110.68, |
| "learning_rate": 1.778640776699029e-05, |
| "loss": 0.2121, |
| "step": 11400 |
| }, |
| { |
| "epoch": 111.0, |
| "eval_accuracy": 0.33676975945017185, |
| "eval_loss": 3.6265642642974854, |
| "eval_runtime": 4.4106, |
| "eval_samples_per_second": 65.977, |
| "eval_steps_per_second": 4.308, |
| "step": 11433 |
| }, |
| { |
| "epoch": 111.65, |
| "learning_rate": 1.776699029126214e-05, |
| "loss": 0.1869, |
| "step": 11500 |
| }, |
| { |
| "epoch": 112.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.668063163757324, |
| "eval_runtime": 4.4048, |
| "eval_samples_per_second": 66.064, |
| "eval_steps_per_second": 4.313, |
| "step": 11536 |
| }, |
| { |
| "epoch": 112.62, |
| "learning_rate": 1.7747572815533983e-05, |
| "loss": 0.1927, |
| "step": 11600 |
| }, |
| { |
| "epoch": 113.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.7304697036743164, |
| "eval_runtime": 4.4143, |
| "eval_samples_per_second": 65.922, |
| "eval_steps_per_second": 4.304, |
| "step": 11639 |
| }, |
| { |
| "epoch": 113.59, |
| "learning_rate": 1.7728155339805825e-05, |
| "loss": 0.2259, |
| "step": 11700 |
| }, |
| { |
| "epoch": 114.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 3.630201578140259, |
| "eval_runtime": 4.4007, |
| "eval_samples_per_second": 66.126, |
| "eval_steps_per_second": 4.317, |
| "step": 11742 |
| }, |
| { |
| "epoch": 114.56, |
| "learning_rate": 1.7708737864077673e-05, |
| "loss": 0.1809, |
| "step": 11800 |
| }, |
| { |
| "epoch": 115.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.6300668716430664, |
| "eval_runtime": 4.4113, |
| "eval_samples_per_second": 65.967, |
| "eval_steps_per_second": 4.307, |
| "step": 11845 |
| }, |
| { |
| "epoch": 115.53, |
| "learning_rate": 1.7689320388349517e-05, |
| "loss": 0.2071, |
| "step": 11900 |
| }, |
| { |
| "epoch": 116.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 3.7288320064544678, |
| "eval_runtime": 4.4967, |
| "eval_samples_per_second": 64.714, |
| "eval_steps_per_second": 4.225, |
| "step": 11948 |
| }, |
| { |
| "epoch": 116.5, |
| "learning_rate": 1.7669902912621362e-05, |
| "loss": 0.1977, |
| "step": 12000 |
| }, |
| { |
| "epoch": 117.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 3.646707057952881, |
| "eval_runtime": 4.4852, |
| "eval_samples_per_second": 64.88, |
| "eval_steps_per_second": 4.236, |
| "step": 12051 |
| }, |
| { |
| "epoch": 117.48, |
| "learning_rate": 1.7650485436893206e-05, |
| "loss": 0.1902, |
| "step": 12100 |
| }, |
| { |
| "epoch": 118.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.703948974609375, |
| "eval_runtime": 4.5028, |
| "eval_samples_per_second": 64.627, |
| "eval_steps_per_second": 4.22, |
| "step": 12154 |
| }, |
| { |
| "epoch": 118.45, |
| "learning_rate": 1.763106796116505e-05, |
| "loss": 0.1996, |
| "step": 12200 |
| }, |
| { |
| "epoch": 119.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.901280403137207, |
| "eval_runtime": 4.4298, |
| "eval_samples_per_second": 65.691, |
| "eval_steps_per_second": 4.289, |
| "step": 12257 |
| }, |
| { |
| "epoch": 119.42, |
| "learning_rate": 1.7611650485436896e-05, |
| "loss": 0.2122, |
| "step": 12300 |
| }, |
| { |
| "epoch": 120.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 3.822838306427002, |
| "eval_runtime": 4.4449, |
| "eval_samples_per_second": 65.468, |
| "eval_steps_per_second": 4.275, |
| "step": 12360 |
| }, |
| { |
| "epoch": 120.39, |
| "learning_rate": 1.759223300970874e-05, |
| "loss": 0.1702, |
| "step": 12400 |
| }, |
| { |
| "epoch": 121.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.7117698192596436, |
| "eval_runtime": 4.422, |
| "eval_samples_per_second": 65.807, |
| "eval_steps_per_second": 4.297, |
| "step": 12463 |
| }, |
| { |
| "epoch": 121.36, |
| "learning_rate": 1.7572815533980585e-05, |
| "loss": 0.1889, |
| "step": 12500 |
| }, |
| { |
| "epoch": 122.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.721066474914551, |
| "eval_runtime": 4.4242, |
| "eval_samples_per_second": 65.774, |
| "eval_steps_per_second": 4.295, |
| "step": 12566 |
| }, |
| { |
| "epoch": 122.33, |
| "learning_rate": 1.755339805825243e-05, |
| "loss": 0.1857, |
| "step": 12600 |
| }, |
| { |
| "epoch": 123.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 3.8894174098968506, |
| "eval_runtime": 4.4176, |
| "eval_samples_per_second": 65.873, |
| "eval_steps_per_second": 4.301, |
| "step": 12669 |
| }, |
| { |
| "epoch": 123.3, |
| "learning_rate": 1.7533980582524274e-05, |
| "loss": 0.2003, |
| "step": 12700 |
| }, |
| { |
| "epoch": 124.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.657545328140259, |
| "eval_runtime": 4.4115, |
| "eval_samples_per_second": 65.964, |
| "eval_steps_per_second": 4.307, |
| "step": 12772 |
| }, |
| { |
| "epoch": 124.27, |
| "learning_rate": 1.751456310679612e-05, |
| "loss": 0.202, |
| "step": 12800 |
| }, |
| { |
| "epoch": 125.0, |
| "eval_accuracy": 0.3333333333333333, |
| "eval_loss": 3.792531728744507, |
| "eval_runtime": 4.4022, |
| "eval_samples_per_second": 66.104, |
| "eval_steps_per_second": 4.316, |
| "step": 12875 |
| }, |
| { |
| "epoch": 125.24, |
| "learning_rate": 1.7495145631067963e-05, |
| "loss": 0.1722, |
| "step": 12900 |
| }, |
| { |
| "epoch": 126.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.8187637329101562, |
| "eval_runtime": 4.5091, |
| "eval_samples_per_second": 64.535, |
| "eval_steps_per_second": 4.214, |
| "step": 12978 |
| }, |
| { |
| "epoch": 126.21, |
| "learning_rate": 1.7475728155339808e-05, |
| "loss": 0.1716, |
| "step": 13000 |
| }, |
| { |
| "epoch": 127.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 3.958421468734741, |
| "eval_runtime": 4.4913, |
| "eval_samples_per_second": 64.792, |
| "eval_steps_per_second": 4.23, |
| "step": 13081 |
| }, |
| { |
| "epoch": 127.18, |
| "learning_rate": 1.7456310679611652e-05, |
| "loss": 0.1598, |
| "step": 13100 |
| }, |
| { |
| "epoch": 128.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 3.7731645107269287, |
| "eval_runtime": 4.4084, |
| "eval_samples_per_second": 66.01, |
| "eval_steps_per_second": 4.31, |
| "step": 13184 |
| }, |
| { |
| "epoch": 128.16, |
| "learning_rate": 1.7436893203883497e-05, |
| "loss": 0.1825, |
| "step": 13200 |
| }, |
| { |
| "epoch": 129.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.803807497024536, |
| "eval_runtime": 4.4164, |
| "eval_samples_per_second": 65.891, |
| "eval_steps_per_second": 4.302, |
| "step": 13287 |
| }, |
| { |
| "epoch": 129.13, |
| "learning_rate": 1.741747572815534e-05, |
| "loss": 0.1716, |
| "step": 13300 |
| }, |
| { |
| "epoch": 130.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.760632276535034, |
| "eval_runtime": 4.3993, |
| "eval_samples_per_second": 66.147, |
| "eval_steps_per_second": 4.319, |
| "step": 13390 |
| }, |
| { |
| "epoch": 130.1, |
| "learning_rate": 1.7398058252427186e-05, |
| "loss": 0.179, |
| "step": 13400 |
| }, |
| { |
| "epoch": 131.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.7458295822143555, |
| "eval_runtime": 4.3974, |
| "eval_samples_per_second": 66.176, |
| "eval_steps_per_second": 4.321, |
| "step": 13493 |
| }, |
| { |
| "epoch": 131.07, |
| "learning_rate": 1.737864077669903e-05, |
| "loss": 0.1817, |
| "step": 13500 |
| }, |
| { |
| "epoch": 132.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.841256618499756, |
| "eval_runtime": 4.5286, |
| "eval_samples_per_second": 64.258, |
| "eval_steps_per_second": 4.196, |
| "step": 13596 |
| }, |
| { |
| "epoch": 132.04, |
| "learning_rate": 1.7359223300970875e-05, |
| "loss": 0.1606, |
| "step": 13600 |
| }, |
| { |
| "epoch": 133.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.876582384109497, |
| "eval_runtime": 4.399, |
| "eval_samples_per_second": 66.151, |
| "eval_steps_per_second": 4.319, |
| "step": 13699 |
| }, |
| { |
| "epoch": 133.01, |
| "learning_rate": 1.733980582524272e-05, |
| "loss": 0.1785, |
| "step": 13700 |
| }, |
| { |
| "epoch": 133.98, |
| "learning_rate": 1.7320388349514564e-05, |
| "loss": 0.1625, |
| "step": 13800 |
| }, |
| { |
| "epoch": 134.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 3.8187553882598877, |
| "eval_runtime": 4.4306, |
| "eval_samples_per_second": 65.68, |
| "eval_steps_per_second": 4.288, |
| "step": 13802 |
| }, |
| { |
| "epoch": 134.95, |
| "learning_rate": 1.730097087378641e-05, |
| "loss": 0.1622, |
| "step": 13900 |
| }, |
| { |
| "epoch": 135.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.7222514152526855, |
| "eval_runtime": 4.4404, |
| "eval_samples_per_second": 65.534, |
| "eval_steps_per_second": 4.279, |
| "step": 13905 |
| }, |
| { |
| "epoch": 135.92, |
| "learning_rate": 1.7281553398058253e-05, |
| "loss": 0.1852, |
| "step": 14000 |
| }, |
| { |
| "epoch": 136.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 3.777442693710327, |
| "eval_runtime": 4.4465, |
| "eval_samples_per_second": 65.444, |
| "eval_steps_per_second": 4.273, |
| "step": 14008 |
| }, |
| { |
| "epoch": 136.89, |
| "learning_rate": 1.7262135922330098e-05, |
| "loss": 0.1671, |
| "step": 14100 |
| }, |
| { |
| "epoch": 137.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 3.8406755924224854, |
| "eval_runtime": 4.4235, |
| "eval_samples_per_second": 65.785, |
| "eval_steps_per_second": 4.295, |
| "step": 14111 |
| }, |
| { |
| "epoch": 137.86, |
| "learning_rate": 1.7242718446601943e-05, |
| "loss": 0.1862, |
| "step": 14200 |
| }, |
| { |
| "epoch": 138.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.744192123413086, |
| "eval_runtime": 4.3988, |
| "eval_samples_per_second": 66.155, |
| "eval_steps_per_second": 4.319, |
| "step": 14214 |
| }, |
| { |
| "epoch": 138.83, |
| "learning_rate": 1.7223300970873787e-05, |
| "loss": 0.1808, |
| "step": 14300 |
| }, |
| { |
| "epoch": 139.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 3.845832347869873, |
| "eval_runtime": 4.4115, |
| "eval_samples_per_second": 65.964, |
| "eval_steps_per_second": 4.307, |
| "step": 14317 |
| }, |
| { |
| "epoch": 139.81, |
| "learning_rate": 1.7203883495145632e-05, |
| "loss": 0.1375, |
| "step": 14400 |
| }, |
| { |
| "epoch": 140.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 3.7371747493743896, |
| "eval_runtime": 4.4129, |
| "eval_samples_per_second": 65.944, |
| "eval_steps_per_second": 4.306, |
| "step": 14420 |
| }, |
| { |
| "epoch": 140.78, |
| "learning_rate": 1.7184466019417476e-05, |
| "loss": 0.1876, |
| "step": 14500 |
| }, |
| { |
| "epoch": 141.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 3.992500066757202, |
| "eval_runtime": 4.4146, |
| "eval_samples_per_second": 65.918, |
| "eval_steps_per_second": 4.304, |
| "step": 14523 |
| }, |
| { |
| "epoch": 141.75, |
| "learning_rate": 1.716504854368932e-05, |
| "loss": 0.1693, |
| "step": 14600 |
| }, |
| { |
| "epoch": 142.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 3.9364075660705566, |
| "eval_runtime": 4.4593, |
| "eval_samples_per_second": 65.257, |
| "eval_steps_per_second": 4.261, |
| "step": 14626 |
| }, |
| { |
| "epoch": 142.72, |
| "learning_rate": 1.7145631067961165e-05, |
| "loss": 0.1719, |
| "step": 14700 |
| }, |
| { |
| "epoch": 143.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 3.9148850440979004, |
| "eval_runtime": 4.4169, |
| "eval_samples_per_second": 65.883, |
| "eval_steps_per_second": 4.302, |
| "step": 14729 |
| }, |
| { |
| "epoch": 143.69, |
| "learning_rate": 1.7126213592233013e-05, |
| "loss": 0.1406, |
| "step": 14800 |
| }, |
| { |
| "epoch": 144.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 3.8602797985076904, |
| "eval_runtime": 4.4407, |
| "eval_samples_per_second": 65.53, |
| "eval_steps_per_second": 4.279, |
| "step": 14832 |
| }, |
| { |
| "epoch": 144.66, |
| "learning_rate": 1.7106796116504855e-05, |
| "loss": 0.1709, |
| "step": 14900 |
| }, |
| { |
| "epoch": 145.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 3.921625852584839, |
| "eval_runtime": 4.4044, |
| "eval_samples_per_second": 66.071, |
| "eval_steps_per_second": 4.314, |
| "step": 14935 |
| }, |
| { |
| "epoch": 145.63, |
| "learning_rate": 1.70873786407767e-05, |
| "loss": 0.1794, |
| "step": 15000 |
| }, |
| { |
| "epoch": 146.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 3.8933568000793457, |
| "eval_runtime": 4.4071, |
| "eval_samples_per_second": 66.03, |
| "eval_steps_per_second": 4.311, |
| "step": 15038 |
| }, |
| { |
| "epoch": 146.6, |
| "learning_rate": 1.7067961165048544e-05, |
| "loss": 0.1455, |
| "step": 15100 |
| }, |
| { |
| "epoch": 147.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.008619785308838, |
| "eval_runtime": 4.429, |
| "eval_samples_per_second": 65.703, |
| "eval_steps_per_second": 4.29, |
| "step": 15141 |
| }, |
| { |
| "epoch": 147.57, |
| "learning_rate": 1.704854368932039e-05, |
| "loss": 0.1959, |
| "step": 15200 |
| }, |
| { |
| "epoch": 148.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 3.9358479976654053, |
| "eval_runtime": 4.4602, |
| "eval_samples_per_second": 65.243, |
| "eval_steps_per_second": 4.26, |
| "step": 15244 |
| }, |
| { |
| "epoch": 148.54, |
| "learning_rate": 1.7029126213592236e-05, |
| "loss": 0.1664, |
| "step": 15300 |
| }, |
| { |
| "epoch": 149.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 3.977458953857422, |
| "eval_runtime": 4.4996, |
| "eval_samples_per_second": 64.673, |
| "eval_steps_per_second": 4.223, |
| "step": 15347 |
| }, |
| { |
| "epoch": 149.51, |
| "learning_rate": 1.7009708737864078e-05, |
| "loss": 0.1455, |
| "step": 15400 |
| }, |
| { |
| "epoch": 150.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 3.9304189682006836, |
| "eval_runtime": 4.5108, |
| "eval_samples_per_second": 64.512, |
| "eval_steps_per_second": 4.212, |
| "step": 15450 |
| }, |
| { |
| "epoch": 150.49, |
| "learning_rate": 1.6990291262135922e-05, |
| "loss": 0.1819, |
| "step": 15500 |
| }, |
| { |
| "epoch": 151.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.029915809631348, |
| "eval_runtime": 4.5126, |
| "eval_samples_per_second": 64.486, |
| "eval_steps_per_second": 4.21, |
| "step": 15553 |
| }, |
| { |
| "epoch": 151.46, |
| "learning_rate": 1.697087378640777e-05, |
| "loss": 0.1532, |
| "step": 15600 |
| }, |
| { |
| "epoch": 152.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.121899127960205, |
| "eval_runtime": 4.4026, |
| "eval_samples_per_second": 66.097, |
| "eval_steps_per_second": 4.316, |
| "step": 15656 |
| }, |
| { |
| "epoch": 152.43, |
| "learning_rate": 1.695145631067961e-05, |
| "loss": 0.1638, |
| "step": 15700 |
| }, |
| { |
| "epoch": 153.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.146513938903809, |
| "eval_runtime": 4.3942, |
| "eval_samples_per_second": 66.224, |
| "eval_steps_per_second": 4.324, |
| "step": 15759 |
| }, |
| { |
| "epoch": 153.4, |
| "learning_rate": 1.693203883495146e-05, |
| "loss": 0.1579, |
| "step": 15800 |
| }, |
| { |
| "epoch": 154.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.05957555770874, |
| "eval_runtime": 4.4053, |
| "eval_samples_per_second": 66.056, |
| "eval_steps_per_second": 4.313, |
| "step": 15862 |
| }, |
| { |
| "epoch": 154.37, |
| "learning_rate": 1.6912621359223304e-05, |
| "loss": 0.1668, |
| "step": 15900 |
| }, |
| { |
| "epoch": 155.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.085700988769531, |
| "eval_runtime": 4.3881, |
| "eval_samples_per_second": 66.316, |
| "eval_steps_per_second": 4.33, |
| "step": 15965 |
| }, |
| { |
| "epoch": 155.34, |
| "learning_rate": 1.6893203883495145e-05, |
| "loss": 0.1401, |
| "step": 16000 |
| }, |
| { |
| "epoch": 156.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.166921138763428, |
| "eval_runtime": 4.4102, |
| "eval_samples_per_second": 65.983, |
| "eval_steps_per_second": 4.308, |
| "step": 16068 |
| }, |
| { |
| "epoch": 156.31, |
| "learning_rate": 1.6873786407766993e-05, |
| "loss": 0.1452, |
| "step": 16100 |
| }, |
| { |
| "epoch": 157.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.043022632598877, |
| "eval_runtime": 4.4074, |
| "eval_samples_per_second": 66.026, |
| "eval_steps_per_second": 4.311, |
| "step": 16171 |
| }, |
| { |
| "epoch": 157.28, |
| "learning_rate": 1.6854368932038838e-05, |
| "loss": 0.1568, |
| "step": 16200 |
| }, |
| { |
| "epoch": 158.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.015657901763916, |
| "eval_runtime": 4.3977, |
| "eval_samples_per_second": 66.17, |
| "eval_steps_per_second": 4.32, |
| "step": 16274 |
| }, |
| { |
| "epoch": 158.25, |
| "learning_rate": 1.6834951456310682e-05, |
| "loss": 0.1771, |
| "step": 16300 |
| }, |
| { |
| "epoch": 159.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.076967716217041, |
| "eval_runtime": 4.3856, |
| "eval_samples_per_second": 66.354, |
| "eval_steps_per_second": 4.332, |
| "step": 16377 |
| }, |
| { |
| "epoch": 159.22, |
| "learning_rate": 1.6815533980582527e-05, |
| "loss": 0.1383, |
| "step": 16400 |
| }, |
| { |
| "epoch": 160.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.0888471603393555, |
| "eval_runtime": 4.4106, |
| "eval_samples_per_second": 65.977, |
| "eval_steps_per_second": 4.308, |
| "step": 16480 |
| }, |
| { |
| "epoch": 160.19, |
| "learning_rate": 1.6796116504854368e-05, |
| "loss": 0.1572, |
| "step": 16500 |
| }, |
| { |
| "epoch": 161.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.227140426635742, |
| "eval_runtime": 4.4013, |
| "eval_samples_per_second": 66.116, |
| "eval_steps_per_second": 4.317, |
| "step": 16583 |
| }, |
| { |
| "epoch": 161.17, |
| "learning_rate": 1.6776699029126216e-05, |
| "loss": 0.1472, |
| "step": 16600 |
| }, |
| { |
| "epoch": 162.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.02153205871582, |
| "eval_runtime": 4.396, |
| "eval_samples_per_second": 66.196, |
| "eval_steps_per_second": 4.322, |
| "step": 16686 |
| }, |
| { |
| "epoch": 162.14, |
| "learning_rate": 1.675728155339806e-05, |
| "loss": 0.1534, |
| "step": 16700 |
| }, |
| { |
| "epoch": 163.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.224771499633789, |
| "eval_runtime": 4.4208, |
| "eval_samples_per_second": 65.825, |
| "eval_steps_per_second": 4.298, |
| "step": 16789 |
| }, |
| { |
| "epoch": 163.11, |
| "learning_rate": 1.6737864077669905e-05, |
| "loss": 0.136, |
| "step": 16800 |
| }, |
| { |
| "epoch": 164.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.215867519378662, |
| "eval_runtime": 4.4089, |
| "eval_samples_per_second": 66.003, |
| "eval_steps_per_second": 4.309, |
| "step": 16892 |
| }, |
| { |
| "epoch": 164.08, |
| "learning_rate": 1.671844660194175e-05, |
| "loss": 0.1525, |
| "step": 16900 |
| }, |
| { |
| "epoch": 165.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.05654239654541, |
| "eval_runtime": 4.3994, |
| "eval_samples_per_second": 66.146, |
| "eval_steps_per_second": 4.319, |
| "step": 16995 |
| }, |
| { |
| "epoch": 165.05, |
| "learning_rate": 1.6699029126213594e-05, |
| "loss": 0.1418, |
| "step": 17000 |
| }, |
| { |
| "epoch": 166.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.117518424987793, |
| "eval_runtime": 4.4011, |
| "eval_samples_per_second": 66.12, |
| "eval_steps_per_second": 4.317, |
| "step": 17098 |
| }, |
| { |
| "epoch": 166.02, |
| "learning_rate": 1.667961165048544e-05, |
| "loss": 0.1542, |
| "step": 17100 |
| }, |
| { |
| "epoch": 166.99, |
| "learning_rate": 1.6660194174757283e-05, |
| "loss": 0.1374, |
| "step": 17200 |
| }, |
| { |
| "epoch": 167.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.170831203460693, |
| "eval_runtime": 4.3965, |
| "eval_samples_per_second": 66.188, |
| "eval_steps_per_second": 4.322, |
| "step": 17201 |
| }, |
| { |
| "epoch": 167.96, |
| "learning_rate": 1.6640776699029128e-05, |
| "loss": 0.1538, |
| "step": 17300 |
| }, |
| { |
| "epoch": 168.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.256599426269531, |
| "eval_runtime": 4.4123, |
| "eval_samples_per_second": 65.952, |
| "eval_steps_per_second": 4.306, |
| "step": 17304 |
| }, |
| { |
| "epoch": 168.93, |
| "learning_rate": 1.6621359223300973e-05, |
| "loss": 0.1365, |
| "step": 17400 |
| }, |
| { |
| "epoch": 169.0, |
| "eval_accuracy": 0.25773195876288657, |
| "eval_loss": 4.306251525878906, |
| "eval_runtime": 4.409, |
| "eval_samples_per_second": 66.002, |
| "eval_steps_per_second": 4.309, |
| "step": 17407 |
| }, |
| { |
| "epoch": 169.9, |
| "learning_rate": 1.6601941747572817e-05, |
| "loss": 0.1661, |
| "step": 17500 |
| }, |
| { |
| "epoch": 170.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.223095417022705, |
| "eval_runtime": 4.413, |
| "eval_samples_per_second": 65.941, |
| "eval_steps_per_second": 4.305, |
| "step": 17510 |
| }, |
| { |
| "epoch": 170.87, |
| "learning_rate": 1.6582524271844662e-05, |
| "loss": 0.1278, |
| "step": 17600 |
| }, |
| { |
| "epoch": 171.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.312500953674316, |
| "eval_runtime": 4.3996, |
| "eval_samples_per_second": 66.143, |
| "eval_steps_per_second": 4.319, |
| "step": 17613 |
| }, |
| { |
| "epoch": 171.84, |
| "learning_rate": 1.6563106796116506e-05, |
| "loss": 0.1418, |
| "step": 17700 |
| }, |
| { |
| "epoch": 172.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.333723545074463, |
| "eval_runtime": 4.4095, |
| "eval_samples_per_second": 65.994, |
| "eval_steps_per_second": 4.309, |
| "step": 17716 |
| }, |
| { |
| "epoch": 172.82, |
| "learning_rate": 1.654368932038835e-05, |
| "loss": 0.1538, |
| "step": 17800 |
| }, |
| { |
| "epoch": 173.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.3129119873046875, |
| "eval_runtime": 4.4001, |
| "eval_samples_per_second": 66.135, |
| "eval_steps_per_second": 4.318, |
| "step": 17819 |
| }, |
| { |
| "epoch": 173.79, |
| "learning_rate": 1.6524271844660196e-05, |
| "loss": 0.1315, |
| "step": 17900 |
| }, |
| { |
| "epoch": 174.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.3102030754089355, |
| "eval_runtime": 4.3961, |
| "eval_samples_per_second": 66.195, |
| "eval_steps_per_second": 4.322, |
| "step": 17922 |
| }, |
| { |
| "epoch": 174.76, |
| "learning_rate": 1.650485436893204e-05, |
| "loss": 0.128, |
| "step": 18000 |
| }, |
| { |
| "epoch": 175.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.285308837890625, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.109, |
| "eval_steps_per_second": 4.316, |
| "step": 18025 |
| }, |
| { |
| "epoch": 175.73, |
| "learning_rate": 1.6485436893203885e-05, |
| "loss": 0.1398, |
| "step": 18100 |
| }, |
| { |
| "epoch": 176.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.156043529510498, |
| "eval_runtime": 4.4077, |
| "eval_samples_per_second": 66.02, |
| "eval_steps_per_second": 4.311, |
| "step": 18128 |
| }, |
| { |
| "epoch": 176.7, |
| "learning_rate": 1.646601941747573e-05, |
| "loss": 0.1525, |
| "step": 18200 |
| }, |
| { |
| "epoch": 177.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.181150436401367, |
| "eval_runtime": 4.4122, |
| "eval_samples_per_second": 65.954, |
| "eval_steps_per_second": 4.306, |
| "step": 18231 |
| }, |
| { |
| "epoch": 177.67, |
| "learning_rate": 1.6446601941747574e-05, |
| "loss": 0.1603, |
| "step": 18300 |
| }, |
| { |
| "epoch": 178.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.126158714294434, |
| "eval_runtime": 4.4076, |
| "eval_samples_per_second": 66.022, |
| "eval_steps_per_second": 4.311, |
| "step": 18334 |
| }, |
| { |
| "epoch": 178.64, |
| "learning_rate": 1.642718446601942e-05, |
| "loss": 0.1412, |
| "step": 18400 |
| }, |
| { |
| "epoch": 179.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.277770519256592, |
| "eval_runtime": 4.4525, |
| "eval_samples_per_second": 65.357, |
| "eval_steps_per_second": 4.267, |
| "step": 18437 |
| }, |
| { |
| "epoch": 179.61, |
| "learning_rate": 1.6407766990291263e-05, |
| "loss": 0.1521, |
| "step": 18500 |
| }, |
| { |
| "epoch": 180.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.288129806518555, |
| "eval_runtime": 4.3977, |
| "eval_samples_per_second": 66.17, |
| "eval_steps_per_second": 4.32, |
| "step": 18540 |
| }, |
| { |
| "epoch": 180.58, |
| "learning_rate": 1.6388349514563108e-05, |
| "loss": 0.1404, |
| "step": 18600 |
| }, |
| { |
| "epoch": 181.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.314670562744141, |
| "eval_runtime": 4.4108, |
| "eval_samples_per_second": 65.974, |
| "eval_steps_per_second": 4.308, |
| "step": 18643 |
| }, |
| { |
| "epoch": 181.55, |
| "learning_rate": 1.6368932038834952e-05, |
| "loss": 0.1468, |
| "step": 18700 |
| }, |
| { |
| "epoch": 182.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.204223155975342, |
| "eval_runtime": 4.4068, |
| "eval_samples_per_second": 66.034, |
| "eval_steps_per_second": 4.312, |
| "step": 18746 |
| }, |
| { |
| "epoch": 182.52, |
| "learning_rate": 1.6349514563106797e-05, |
| "loss": 0.1448, |
| "step": 18800 |
| }, |
| { |
| "epoch": 183.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.211010456085205, |
| "eval_runtime": 4.411, |
| "eval_samples_per_second": 65.971, |
| "eval_steps_per_second": 4.307, |
| "step": 18849 |
| }, |
| { |
| "epoch": 183.5, |
| "learning_rate": 1.633009708737864e-05, |
| "loss": 0.1299, |
| "step": 18900 |
| }, |
| { |
| "epoch": 184.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.231362342834473, |
| "eval_runtime": 4.4084, |
| "eval_samples_per_second": 66.011, |
| "eval_steps_per_second": 4.31, |
| "step": 18952 |
| }, |
| { |
| "epoch": 184.47, |
| "learning_rate": 1.6310679611650486e-05, |
| "loss": 0.1361, |
| "step": 19000 |
| }, |
| { |
| "epoch": 185.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.299282550811768, |
| "eval_runtime": 4.5219, |
| "eval_samples_per_second": 64.354, |
| "eval_steps_per_second": 4.202, |
| "step": 19055 |
| }, |
| { |
| "epoch": 185.44, |
| "learning_rate": 1.629126213592233e-05, |
| "loss": 0.1455, |
| "step": 19100 |
| }, |
| { |
| "epoch": 186.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.350893020629883, |
| "eval_runtime": 4.4097, |
| "eval_samples_per_second": 65.991, |
| "eval_steps_per_second": 4.309, |
| "step": 19158 |
| }, |
| { |
| "epoch": 186.41, |
| "learning_rate": 1.6271844660194175e-05, |
| "loss": 0.1345, |
| "step": 19200 |
| }, |
| { |
| "epoch": 187.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.282843112945557, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.981, |
| "eval_steps_per_second": 4.308, |
| "step": 19261 |
| }, |
| { |
| "epoch": 187.38, |
| "learning_rate": 1.625242718446602e-05, |
| "loss": 0.1394, |
| "step": 19300 |
| }, |
| { |
| "epoch": 188.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.100064277648926, |
| "eval_runtime": 4.4148, |
| "eval_samples_per_second": 65.914, |
| "eval_steps_per_second": 4.304, |
| "step": 19364 |
| }, |
| { |
| "epoch": 188.35, |
| "learning_rate": 1.6233009708737864e-05, |
| "loss": 0.1415, |
| "step": 19400 |
| }, |
| { |
| "epoch": 189.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.217869281768799, |
| "eval_runtime": 4.4147, |
| "eval_samples_per_second": 65.917, |
| "eval_steps_per_second": 4.304, |
| "step": 19467 |
| }, |
| { |
| "epoch": 189.32, |
| "learning_rate": 1.621359223300971e-05, |
| "loss": 0.1235, |
| "step": 19500 |
| }, |
| { |
| "epoch": 190.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.296295642852783, |
| "eval_runtime": 4.4171, |
| "eval_samples_per_second": 65.88, |
| "eval_steps_per_second": 4.301, |
| "step": 19570 |
| }, |
| { |
| "epoch": 190.29, |
| "learning_rate": 1.6194174757281557e-05, |
| "loss": 0.1373, |
| "step": 19600 |
| }, |
| { |
| "epoch": 191.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.183337211608887, |
| "eval_runtime": 4.402, |
| "eval_samples_per_second": 66.107, |
| "eval_steps_per_second": 4.316, |
| "step": 19673 |
| }, |
| { |
| "epoch": 191.26, |
| "learning_rate": 1.6174757281553398e-05, |
| "loss": 0.1323, |
| "step": 19700 |
| }, |
| { |
| "epoch": 192.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.305690288543701, |
| "eval_runtime": 4.3952, |
| "eval_samples_per_second": 66.209, |
| "eval_steps_per_second": 4.323, |
| "step": 19776 |
| }, |
| { |
| "epoch": 192.23, |
| "learning_rate": 1.6155339805825243e-05, |
| "loss": 0.1188, |
| "step": 19800 |
| }, |
| { |
| "epoch": 193.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.381898880004883, |
| "eval_runtime": 4.3965, |
| "eval_samples_per_second": 66.189, |
| "eval_steps_per_second": 4.322, |
| "step": 19879 |
| }, |
| { |
| "epoch": 193.2, |
| "learning_rate": 1.613592233009709e-05, |
| "loss": 0.1528, |
| "step": 19900 |
| }, |
| { |
| "epoch": 194.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.309067726135254, |
| "eval_runtime": 4.3965, |
| "eval_samples_per_second": 66.189, |
| "eval_steps_per_second": 4.322, |
| "step": 19982 |
| }, |
| { |
| "epoch": 194.17, |
| "learning_rate": 1.6116504854368932e-05, |
| "loss": 0.1365, |
| "step": 20000 |
| }, |
| { |
| "epoch": 195.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.387022495269775, |
| "eval_runtime": 4.3928, |
| "eval_samples_per_second": 66.245, |
| "eval_steps_per_second": 4.325, |
| "step": 20085 |
| }, |
| { |
| "epoch": 195.15, |
| "learning_rate": 1.609708737864078e-05, |
| "loss": 0.1187, |
| "step": 20100 |
| }, |
| { |
| "epoch": 196.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.230319499969482, |
| "eval_runtime": 4.4107, |
| "eval_samples_per_second": 65.976, |
| "eval_steps_per_second": 4.308, |
| "step": 20188 |
| }, |
| { |
| "epoch": 196.12, |
| "learning_rate": 1.6077669902912624e-05, |
| "loss": 0.1409, |
| "step": 20200 |
| }, |
| { |
| "epoch": 197.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.234382152557373, |
| "eval_runtime": 4.4056, |
| "eval_samples_per_second": 66.053, |
| "eval_steps_per_second": 4.313, |
| "step": 20291 |
| }, |
| { |
| "epoch": 197.09, |
| "learning_rate": 1.6058252427184466e-05, |
| "loss": 0.1346, |
| "step": 20300 |
| }, |
| { |
| "epoch": 198.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 4.06366491317749, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.854, |
| "eval_steps_per_second": 4.3, |
| "step": 20394 |
| }, |
| { |
| "epoch": 198.06, |
| "learning_rate": 1.6038834951456313e-05, |
| "loss": 0.1449, |
| "step": 20400 |
| }, |
| { |
| "epoch": 199.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.3022308349609375, |
| "eval_runtime": 4.4119, |
| "eval_samples_per_second": 65.959, |
| "eval_steps_per_second": 4.307, |
| "step": 20497 |
| }, |
| { |
| "epoch": 199.03, |
| "learning_rate": 1.6019417475728155e-05, |
| "loss": 0.131, |
| "step": 20500 |
| }, |
| { |
| "epoch": 200.0, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.1415, |
| "step": 20600 |
| }, |
| { |
| "epoch": 200.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.267215728759766, |
| "eval_runtime": 4.4711, |
| "eval_samples_per_second": 65.085, |
| "eval_steps_per_second": 4.25, |
| "step": 20600 |
| }, |
| { |
| "epoch": 200.97, |
| "learning_rate": 1.5980582524271847e-05, |
| "loss": 0.1283, |
| "step": 20700 |
| }, |
| { |
| "epoch": 201.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.236283302307129, |
| "eval_runtime": 4.4239, |
| "eval_samples_per_second": 65.779, |
| "eval_steps_per_second": 4.295, |
| "step": 20703 |
| }, |
| { |
| "epoch": 201.94, |
| "learning_rate": 1.596116504854369e-05, |
| "loss": 0.1469, |
| "step": 20800 |
| }, |
| { |
| "epoch": 202.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.27135705947876, |
| "eval_runtime": 4.4118, |
| "eval_samples_per_second": 65.96, |
| "eval_steps_per_second": 4.307, |
| "step": 20806 |
| }, |
| { |
| "epoch": 202.91, |
| "learning_rate": 1.5941747572815536e-05, |
| "loss": 0.1288, |
| "step": 20900 |
| }, |
| { |
| "epoch": 203.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.324564456939697, |
| "eval_runtime": 4.41, |
| "eval_samples_per_second": 65.986, |
| "eval_steps_per_second": 4.308, |
| "step": 20909 |
| }, |
| { |
| "epoch": 203.88, |
| "learning_rate": 1.592233009708738e-05, |
| "loss": 0.1334, |
| "step": 21000 |
| }, |
| { |
| "epoch": 204.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.171061038970947, |
| "eval_runtime": 4.4147, |
| "eval_samples_per_second": 65.915, |
| "eval_steps_per_second": 4.304, |
| "step": 21012 |
| }, |
| { |
| "epoch": 204.85, |
| "learning_rate": 1.5902912621359226e-05, |
| "loss": 0.1419, |
| "step": 21100 |
| }, |
| { |
| "epoch": 205.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.326306343078613, |
| "eval_runtime": 4.3969, |
| "eval_samples_per_second": 66.183, |
| "eval_steps_per_second": 4.321, |
| "step": 21115 |
| }, |
| { |
| "epoch": 205.83, |
| "learning_rate": 1.588349514563107e-05, |
| "loss": 0.1395, |
| "step": 21200 |
| }, |
| { |
| "epoch": 206.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.285510063171387, |
| "eval_runtime": 4.4115, |
| "eval_samples_per_second": 65.963, |
| "eval_steps_per_second": 4.307, |
| "step": 21218 |
| }, |
| { |
| "epoch": 206.8, |
| "learning_rate": 1.5864077669902915e-05, |
| "loss": 0.1255, |
| "step": 21300 |
| }, |
| { |
| "epoch": 207.0, |
| "eval_accuracy": 0.24742268041237114, |
| "eval_loss": 4.430055141448975, |
| "eval_runtime": 4.4012, |
| "eval_samples_per_second": 66.118, |
| "eval_steps_per_second": 4.317, |
| "step": 21321 |
| }, |
| { |
| "epoch": 207.77, |
| "learning_rate": 1.584466019417476e-05, |
| "loss": 0.1288, |
| "step": 21400 |
| }, |
| { |
| "epoch": 208.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.373450756072998, |
| "eval_runtime": 4.4162, |
| "eval_samples_per_second": 65.893, |
| "eval_steps_per_second": 4.302, |
| "step": 21424 |
| }, |
| { |
| "epoch": 208.74, |
| "learning_rate": 1.5825242718446604e-05, |
| "loss": 0.1395, |
| "step": 21500 |
| }, |
| { |
| "epoch": 209.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.354865550994873, |
| "eval_runtime": 4.3919, |
| "eval_samples_per_second": 66.259, |
| "eval_steps_per_second": 4.326, |
| "step": 21527 |
| }, |
| { |
| "epoch": 209.71, |
| "learning_rate": 1.580582524271845e-05, |
| "loss": 0.1144, |
| "step": 21600 |
| }, |
| { |
| "epoch": 210.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.456879615783691, |
| "eval_runtime": 4.3921, |
| "eval_samples_per_second": 66.255, |
| "eval_steps_per_second": 4.326, |
| "step": 21630 |
| }, |
| { |
| "epoch": 210.68, |
| "learning_rate": 1.5786407766990293e-05, |
| "loss": 0.1185, |
| "step": 21700 |
| }, |
| { |
| "epoch": 211.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.500795364379883, |
| "eval_runtime": 4.4075, |
| "eval_samples_per_second": 66.024, |
| "eval_steps_per_second": 4.311, |
| "step": 21733 |
| }, |
| { |
| "epoch": 211.65, |
| "learning_rate": 1.5766990291262138e-05, |
| "loss": 0.1578, |
| "step": 21800 |
| }, |
| { |
| "epoch": 212.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.231286525726318, |
| "eval_runtime": 4.3986, |
| "eval_samples_per_second": 66.157, |
| "eval_steps_per_second": 4.32, |
| "step": 21836 |
| }, |
| { |
| "epoch": 212.62, |
| "learning_rate": 1.5747572815533982e-05, |
| "loss": 0.1434, |
| "step": 21900 |
| }, |
| { |
| "epoch": 213.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.444507122039795, |
| "eval_runtime": 4.403, |
| "eval_samples_per_second": 66.091, |
| "eval_steps_per_second": 4.315, |
| "step": 21939 |
| }, |
| { |
| "epoch": 213.59, |
| "learning_rate": 1.5728155339805827e-05, |
| "loss": 0.1147, |
| "step": 22000 |
| }, |
| { |
| "epoch": 214.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.432860851287842, |
| "eval_runtime": 4.4129, |
| "eval_samples_per_second": 65.942, |
| "eval_steps_per_second": 4.306, |
| "step": 22042 |
| }, |
| { |
| "epoch": 214.56, |
| "learning_rate": 1.570873786407767e-05, |
| "loss": 0.1239, |
| "step": 22100 |
| }, |
| { |
| "epoch": 215.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.410243034362793, |
| "eval_runtime": 4.4417, |
| "eval_samples_per_second": 65.516, |
| "eval_steps_per_second": 4.278, |
| "step": 22145 |
| }, |
| { |
| "epoch": 215.53, |
| "learning_rate": 1.5689320388349516e-05, |
| "loss": 0.1315, |
| "step": 22200 |
| }, |
| { |
| "epoch": 216.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.250341892242432, |
| "eval_runtime": 4.4449, |
| "eval_samples_per_second": 65.469, |
| "eval_steps_per_second": 4.275, |
| "step": 22248 |
| }, |
| { |
| "epoch": 216.5, |
| "learning_rate": 1.566990291262136e-05, |
| "loss": 0.1413, |
| "step": 22300 |
| }, |
| { |
| "epoch": 217.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.555916786193848, |
| "eval_runtime": 4.4149, |
| "eval_samples_per_second": 65.913, |
| "eval_steps_per_second": 4.304, |
| "step": 22351 |
| }, |
| { |
| "epoch": 217.48, |
| "learning_rate": 1.5650485436893205e-05, |
| "loss": 0.1137, |
| "step": 22400 |
| }, |
| { |
| "epoch": 218.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.450405120849609, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.008, |
| "eval_steps_per_second": 4.31, |
| "step": 22454 |
| }, |
| { |
| "epoch": 218.45, |
| "learning_rate": 1.563106796116505e-05, |
| "loss": 0.1412, |
| "step": 22500 |
| }, |
| { |
| "epoch": 219.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.337742805480957, |
| "eval_runtime": 4.4117, |
| "eval_samples_per_second": 65.961, |
| "eval_steps_per_second": 4.307, |
| "step": 22557 |
| }, |
| { |
| "epoch": 219.42, |
| "learning_rate": 1.5611650485436894e-05, |
| "loss": 0.1051, |
| "step": 22600 |
| }, |
| { |
| "epoch": 220.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.524986743927002, |
| "eval_runtime": 4.4284, |
| "eval_samples_per_second": 65.713, |
| "eval_steps_per_second": 4.291, |
| "step": 22660 |
| }, |
| { |
| "epoch": 220.39, |
| "learning_rate": 1.559223300970874e-05, |
| "loss": 0.1314, |
| "step": 22700 |
| }, |
| { |
| "epoch": 221.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.453868389129639, |
| "eval_runtime": 4.4017, |
| "eval_samples_per_second": 66.111, |
| "eval_steps_per_second": 4.317, |
| "step": 22763 |
| }, |
| { |
| "epoch": 221.36, |
| "learning_rate": 1.5572815533980583e-05, |
| "loss": 0.1284, |
| "step": 22800 |
| }, |
| { |
| "epoch": 222.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.34807825088501, |
| "eval_runtime": 4.3999, |
| "eval_samples_per_second": 66.139, |
| "eval_steps_per_second": 4.318, |
| "step": 22866 |
| }, |
| { |
| "epoch": 222.33, |
| "learning_rate": 1.5553398058252428e-05, |
| "loss": 0.1159, |
| "step": 22900 |
| }, |
| { |
| "epoch": 223.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.428357124328613, |
| "eval_runtime": 4.527, |
| "eval_samples_per_second": 64.28, |
| "eval_steps_per_second": 4.197, |
| "step": 22969 |
| }, |
| { |
| "epoch": 223.3, |
| "learning_rate": 1.5533980582524273e-05, |
| "loss": 0.1219, |
| "step": 23000 |
| }, |
| { |
| "epoch": 224.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.506850242614746, |
| "eval_runtime": 4.5066, |
| "eval_samples_per_second": 64.572, |
| "eval_steps_per_second": 4.216, |
| "step": 23072 |
| }, |
| { |
| "epoch": 224.27, |
| "learning_rate": 1.5514563106796117e-05, |
| "loss": 0.1183, |
| "step": 23100 |
| }, |
| { |
| "epoch": 225.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.546067237854004, |
| "eval_runtime": 4.3972, |
| "eval_samples_per_second": 66.179, |
| "eval_steps_per_second": 4.321, |
| "step": 23175 |
| }, |
| { |
| "epoch": 225.24, |
| "learning_rate": 1.5495145631067962e-05, |
| "loss": 0.1172, |
| "step": 23200 |
| }, |
| { |
| "epoch": 226.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.398603439331055, |
| "eval_runtime": 4.3972, |
| "eval_samples_per_second": 66.179, |
| "eval_steps_per_second": 4.321, |
| "step": 23278 |
| }, |
| { |
| "epoch": 226.21, |
| "learning_rate": 1.5475728155339806e-05, |
| "loss": 0.1216, |
| "step": 23300 |
| }, |
| { |
| "epoch": 227.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.515445232391357, |
| "eval_runtime": 4.4132, |
| "eval_samples_per_second": 65.938, |
| "eval_steps_per_second": 4.305, |
| "step": 23381 |
| }, |
| { |
| "epoch": 227.18, |
| "learning_rate": 1.545631067961165e-05, |
| "loss": 0.1207, |
| "step": 23400 |
| }, |
| { |
| "epoch": 228.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.484820365905762, |
| "eval_runtime": 4.4113, |
| "eval_samples_per_second": 65.967, |
| "eval_steps_per_second": 4.307, |
| "step": 23484 |
| }, |
| { |
| "epoch": 228.16, |
| "learning_rate": 1.5436893203883496e-05, |
| "loss": 0.1303, |
| "step": 23500 |
| }, |
| { |
| "epoch": 229.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.392459869384766, |
| "eval_runtime": 4.4087, |
| "eval_samples_per_second": 66.006, |
| "eval_steps_per_second": 4.31, |
| "step": 23587 |
| }, |
| { |
| "epoch": 229.13, |
| "learning_rate": 1.541747572815534e-05, |
| "loss": 0.1238, |
| "step": 23600 |
| }, |
| { |
| "epoch": 230.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.37477445602417, |
| "eval_runtime": 4.4077, |
| "eval_samples_per_second": 66.022, |
| "eval_steps_per_second": 4.311, |
| "step": 23690 |
| }, |
| { |
| "epoch": 230.1, |
| "learning_rate": 1.5398058252427185e-05, |
| "loss": 0.1126, |
| "step": 23700 |
| }, |
| { |
| "epoch": 231.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.480639934539795, |
| "eval_runtime": 4.3943, |
| "eval_samples_per_second": 66.222, |
| "eval_steps_per_second": 4.324, |
| "step": 23793 |
| }, |
| { |
| "epoch": 231.07, |
| "learning_rate": 1.537864077669903e-05, |
| "loss": 0.1227, |
| "step": 23800 |
| }, |
| { |
| "epoch": 232.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.443945407867432, |
| "eval_runtime": 4.5038, |
| "eval_samples_per_second": 64.612, |
| "eval_steps_per_second": 4.219, |
| "step": 23896 |
| }, |
| { |
| "epoch": 232.04, |
| "learning_rate": 1.5359223300970877e-05, |
| "loss": 0.1146, |
| "step": 23900 |
| }, |
| { |
| "epoch": 233.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.522760391235352, |
| "eval_runtime": 4.3912, |
| "eval_samples_per_second": 66.269, |
| "eval_steps_per_second": 4.327, |
| "step": 23999 |
| }, |
| { |
| "epoch": 233.01, |
| "learning_rate": 1.533980582524272e-05, |
| "loss": 0.123, |
| "step": 24000 |
| }, |
| { |
| "epoch": 233.98, |
| "learning_rate": 1.5320388349514563e-05, |
| "loss": 0.1168, |
| "step": 24100 |
| }, |
| { |
| "epoch": 234.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.56139612197876, |
| "eval_runtime": 4.4137, |
| "eval_samples_per_second": 65.93, |
| "eval_steps_per_second": 4.305, |
| "step": 24102 |
| }, |
| { |
| "epoch": 234.95, |
| "learning_rate": 1.5300970873786408e-05, |
| "loss": 0.1219, |
| "step": 24200 |
| }, |
| { |
| "epoch": 235.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.4129486083984375, |
| "eval_runtime": 4.4147, |
| "eval_samples_per_second": 65.915, |
| "eval_steps_per_second": 4.304, |
| "step": 24205 |
| }, |
| { |
| "epoch": 235.92, |
| "learning_rate": 1.5281553398058252e-05, |
| "loss": 0.1181, |
| "step": 24300 |
| }, |
| { |
| "epoch": 236.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.544414520263672, |
| "eval_runtime": 4.3973, |
| "eval_samples_per_second": 66.178, |
| "eval_steps_per_second": 4.321, |
| "step": 24308 |
| }, |
| { |
| "epoch": 236.89, |
| "learning_rate": 1.52621359223301e-05, |
| "loss": 0.1167, |
| "step": 24400 |
| }, |
| { |
| "epoch": 237.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.403836727142334, |
| "eval_runtime": 4.4112, |
| "eval_samples_per_second": 65.968, |
| "eval_steps_per_second": 4.307, |
| "step": 24411 |
| }, |
| { |
| "epoch": 237.86, |
| "learning_rate": 1.5242718446601943e-05, |
| "loss": 0.1173, |
| "step": 24500 |
| }, |
| { |
| "epoch": 238.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 4.396702289581299, |
| "eval_runtime": 4.4026, |
| "eval_samples_per_second": 66.097, |
| "eval_steps_per_second": 4.316, |
| "step": 24514 |
| }, |
| { |
| "epoch": 238.83, |
| "learning_rate": 1.5223300970873786e-05, |
| "loss": 0.1052, |
| "step": 24600 |
| }, |
| { |
| "epoch": 239.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.505501747131348, |
| "eval_runtime": 4.3966, |
| "eval_samples_per_second": 66.188, |
| "eval_steps_per_second": 4.322, |
| "step": 24617 |
| }, |
| { |
| "epoch": 239.81, |
| "learning_rate": 1.5203883495145632e-05, |
| "loss": 0.1216, |
| "step": 24700 |
| }, |
| { |
| "epoch": 240.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.56933069229126, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.854, |
| "eval_steps_per_second": 4.3, |
| "step": 24720 |
| }, |
| { |
| "epoch": 240.78, |
| "learning_rate": 1.5184466019417477e-05, |
| "loss": 0.1242, |
| "step": 24800 |
| }, |
| { |
| "epoch": 241.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.490577697753906, |
| "eval_runtime": 4.4022, |
| "eval_samples_per_second": 66.104, |
| "eval_steps_per_second": 4.316, |
| "step": 24823 |
| }, |
| { |
| "epoch": 241.75, |
| "learning_rate": 1.5165048543689323e-05, |
| "loss": 0.1553, |
| "step": 24900 |
| }, |
| { |
| "epoch": 242.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.497089862823486, |
| "eval_runtime": 4.4179, |
| "eval_samples_per_second": 65.869, |
| "eval_steps_per_second": 4.301, |
| "step": 24926 |
| }, |
| { |
| "epoch": 242.72, |
| "learning_rate": 1.5145631067961166e-05, |
| "loss": 0.1377, |
| "step": 25000 |
| }, |
| { |
| "epoch": 243.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.453564643859863, |
| "eval_runtime": 4.4333, |
| "eval_samples_per_second": 65.639, |
| "eval_steps_per_second": 4.286, |
| "step": 25029 |
| }, |
| { |
| "epoch": 243.69, |
| "learning_rate": 1.512621359223301e-05, |
| "loss": 0.1126, |
| "step": 25100 |
| }, |
| { |
| "epoch": 244.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.532435417175293, |
| "eval_runtime": 4.4152, |
| "eval_samples_per_second": 65.908, |
| "eval_steps_per_second": 4.303, |
| "step": 25132 |
| }, |
| { |
| "epoch": 244.66, |
| "learning_rate": 1.5106796116504855e-05, |
| "loss": 0.1321, |
| "step": 25200 |
| }, |
| { |
| "epoch": 245.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.803735256195068, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.007, |
| "eval_steps_per_second": 4.31, |
| "step": 25235 |
| }, |
| { |
| "epoch": 245.63, |
| "learning_rate": 1.50873786407767e-05, |
| "loss": 0.115, |
| "step": 25300 |
| }, |
| { |
| "epoch": 246.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.66818380355835, |
| "eval_runtime": 4.4075, |
| "eval_samples_per_second": 66.024, |
| "eval_steps_per_second": 4.311, |
| "step": 25338 |
| }, |
| { |
| "epoch": 246.6, |
| "learning_rate": 1.5067961165048546e-05, |
| "loss": 0.1311, |
| "step": 25400 |
| }, |
| { |
| "epoch": 247.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 4.63736629486084, |
| "eval_runtime": 4.3953, |
| "eval_samples_per_second": 66.206, |
| "eval_steps_per_second": 4.323, |
| "step": 25441 |
| }, |
| { |
| "epoch": 247.57, |
| "learning_rate": 1.5048543689320389e-05, |
| "loss": 0.1224, |
| "step": 25500 |
| }, |
| { |
| "epoch": 248.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.7802581787109375, |
| "eval_runtime": 4.3891, |
| "eval_samples_per_second": 66.301, |
| "eval_steps_per_second": 4.329, |
| "step": 25544 |
| }, |
| { |
| "epoch": 248.54, |
| "learning_rate": 1.5029126213592234e-05, |
| "loss": 0.1291, |
| "step": 25600 |
| }, |
| { |
| "epoch": 249.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.656409740447998, |
| "eval_runtime": 4.41, |
| "eval_samples_per_second": 65.987, |
| "eval_steps_per_second": 4.308, |
| "step": 25647 |
| }, |
| { |
| "epoch": 249.51, |
| "learning_rate": 1.500970873786408e-05, |
| "loss": 0.1138, |
| "step": 25700 |
| }, |
| { |
| "epoch": 250.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.518815040588379, |
| "eval_runtime": 4.4021, |
| "eval_samples_per_second": 66.105, |
| "eval_steps_per_second": 4.316, |
| "step": 25750 |
| }, |
| { |
| "epoch": 250.49, |
| "learning_rate": 1.4990291262135923e-05, |
| "loss": 0.1159, |
| "step": 25800 |
| }, |
| { |
| "epoch": 251.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.511619567871094, |
| "eval_runtime": 4.4073, |
| "eval_samples_per_second": 66.027, |
| "eval_steps_per_second": 4.311, |
| "step": 25853 |
| }, |
| { |
| "epoch": 251.46, |
| "learning_rate": 1.4970873786407769e-05, |
| "loss": 0.1172, |
| "step": 25900 |
| }, |
| { |
| "epoch": 252.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.703920841217041, |
| "eval_runtime": 4.4286, |
| "eval_samples_per_second": 65.709, |
| "eval_steps_per_second": 4.29, |
| "step": 25956 |
| }, |
| { |
| "epoch": 252.43, |
| "learning_rate": 1.4951456310679614e-05, |
| "loss": 0.1256, |
| "step": 26000 |
| }, |
| { |
| "epoch": 253.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.646224498748779, |
| "eval_runtime": 4.4924, |
| "eval_samples_per_second": 64.776, |
| "eval_steps_per_second": 4.229, |
| "step": 26059 |
| }, |
| { |
| "epoch": 253.4, |
| "learning_rate": 1.4932038834951456e-05, |
| "loss": 0.1227, |
| "step": 26100 |
| }, |
| { |
| "epoch": 254.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.746954917907715, |
| "eval_runtime": 4.4099, |
| "eval_samples_per_second": 65.987, |
| "eval_steps_per_second": 4.308, |
| "step": 26162 |
| }, |
| { |
| "epoch": 254.37, |
| "learning_rate": 1.4912621359223303e-05, |
| "loss": 0.1186, |
| "step": 26200 |
| }, |
| { |
| "epoch": 255.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.654090404510498, |
| "eval_runtime": 4.4095, |
| "eval_samples_per_second": 65.994, |
| "eval_steps_per_second": 4.309, |
| "step": 26265 |
| }, |
| { |
| "epoch": 255.34, |
| "learning_rate": 1.4893203883495147e-05, |
| "loss": 0.1114, |
| "step": 26300 |
| }, |
| { |
| "epoch": 256.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.600460052490234, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.007, |
| "eval_steps_per_second": 4.31, |
| "step": 26368 |
| }, |
| { |
| "epoch": 256.31, |
| "learning_rate": 1.4873786407766992e-05, |
| "loss": 0.1154, |
| "step": 26400 |
| }, |
| { |
| "epoch": 257.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.570699691772461, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.981, |
| "eval_steps_per_second": 4.308, |
| "step": 26471 |
| }, |
| { |
| "epoch": 257.28, |
| "learning_rate": 1.4854368932038836e-05, |
| "loss": 0.1229, |
| "step": 26500 |
| }, |
| { |
| "epoch": 258.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.517983913421631, |
| "eval_runtime": 4.4402, |
| "eval_samples_per_second": 65.537, |
| "eval_steps_per_second": 4.279, |
| "step": 26574 |
| }, |
| { |
| "epoch": 258.25, |
| "learning_rate": 1.483495145631068e-05, |
| "loss": 0.1138, |
| "step": 26600 |
| }, |
| { |
| "epoch": 259.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.621974468231201, |
| "eval_runtime": 4.3978, |
| "eval_samples_per_second": 66.17, |
| "eval_steps_per_second": 4.32, |
| "step": 26677 |
| }, |
| { |
| "epoch": 259.22, |
| "learning_rate": 1.4815533980582526e-05, |
| "loss": 0.0987, |
| "step": 26700 |
| }, |
| { |
| "epoch": 260.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.6445817947387695, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 26780 |
| }, |
| { |
| "epoch": 260.19, |
| "learning_rate": 1.479611650485437e-05, |
| "loss": 0.1056, |
| "step": 26800 |
| }, |
| { |
| "epoch": 261.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.759962558746338, |
| "eval_runtime": 4.4457, |
| "eval_samples_per_second": 65.457, |
| "eval_steps_per_second": 4.274, |
| "step": 26883 |
| }, |
| { |
| "epoch": 261.17, |
| "learning_rate": 1.4776699029126216e-05, |
| "loss": 0.1362, |
| "step": 26900 |
| }, |
| { |
| "epoch": 262.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.670341968536377, |
| "eval_runtime": 4.3936, |
| "eval_samples_per_second": 66.232, |
| "eval_steps_per_second": 4.324, |
| "step": 26986 |
| }, |
| { |
| "epoch": 262.14, |
| "learning_rate": 1.475728155339806e-05, |
| "loss": 0.1131, |
| "step": 27000 |
| }, |
| { |
| "epoch": 263.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.606517314910889, |
| "eval_runtime": 4.4121, |
| "eval_samples_per_second": 65.955, |
| "eval_steps_per_second": 4.306, |
| "step": 27089 |
| }, |
| { |
| "epoch": 263.11, |
| "learning_rate": 1.4737864077669904e-05, |
| "loss": 0.1127, |
| "step": 27100 |
| }, |
| { |
| "epoch": 264.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.512498378753662, |
| "eval_runtime": 4.4169, |
| "eval_samples_per_second": 65.883, |
| "eval_steps_per_second": 4.302, |
| "step": 27192 |
| }, |
| { |
| "epoch": 264.08, |
| "learning_rate": 1.4718446601941749e-05, |
| "loss": 0.1248, |
| "step": 27200 |
| }, |
| { |
| "epoch": 265.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.596677303314209, |
| "eval_runtime": 4.4358, |
| "eval_samples_per_second": 65.603, |
| "eval_steps_per_second": 4.283, |
| "step": 27295 |
| }, |
| { |
| "epoch": 265.05, |
| "learning_rate": 1.4699029126213593e-05, |
| "loss": 0.111, |
| "step": 27300 |
| }, |
| { |
| "epoch": 266.0, |
| "eval_accuracy": 0.24742268041237114, |
| "eval_loss": 4.618172645568848, |
| "eval_runtime": 4.4576, |
| "eval_samples_per_second": 65.281, |
| "eval_steps_per_second": 4.262, |
| "step": 27398 |
| }, |
| { |
| "epoch": 266.02, |
| "learning_rate": 1.467961165048544e-05, |
| "loss": 0.1022, |
| "step": 27400 |
| }, |
| { |
| "epoch": 266.99, |
| "learning_rate": 1.4660194174757282e-05, |
| "loss": 0.1203, |
| "step": 27500 |
| }, |
| { |
| "epoch": 267.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.596898555755615, |
| "eval_runtime": 4.4052, |
| "eval_samples_per_second": 66.059, |
| "eval_steps_per_second": 4.313, |
| "step": 27501 |
| }, |
| { |
| "epoch": 267.96, |
| "learning_rate": 1.4640776699029127e-05, |
| "loss": 0.1242, |
| "step": 27600 |
| }, |
| { |
| "epoch": 268.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.543684959411621, |
| "eval_runtime": 4.4036, |
| "eval_samples_per_second": 66.082, |
| "eval_steps_per_second": 4.315, |
| "step": 27604 |
| }, |
| { |
| "epoch": 268.93, |
| "learning_rate": 1.4621359223300973e-05, |
| "loss": 0.1041, |
| "step": 27700 |
| }, |
| { |
| "epoch": 269.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.710482120513916, |
| "eval_runtime": 4.4051, |
| "eval_samples_per_second": 66.06, |
| "eval_steps_per_second": 4.313, |
| "step": 27707 |
| }, |
| { |
| "epoch": 269.9, |
| "learning_rate": 1.4601941747572816e-05, |
| "loss": 0.1233, |
| "step": 27800 |
| }, |
| { |
| "epoch": 270.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.630477428436279, |
| "eval_runtime": 4.4588, |
| "eval_samples_per_second": 65.264, |
| "eval_steps_per_second": 4.261, |
| "step": 27810 |
| }, |
| { |
| "epoch": 270.87, |
| "learning_rate": 1.4582524271844662e-05, |
| "loss": 0.1003, |
| "step": 27900 |
| }, |
| { |
| "epoch": 271.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.586513996124268, |
| "eval_runtime": 4.4119, |
| "eval_samples_per_second": 65.957, |
| "eval_steps_per_second": 4.306, |
| "step": 27913 |
| }, |
| { |
| "epoch": 271.84, |
| "learning_rate": 1.4563106796116507e-05, |
| "loss": 0.1144, |
| "step": 28000 |
| }, |
| { |
| "epoch": 272.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.621643543243408, |
| "eval_runtime": 4.398, |
| "eval_samples_per_second": 66.166, |
| "eval_steps_per_second": 4.32, |
| "step": 28016 |
| }, |
| { |
| "epoch": 272.82, |
| "learning_rate": 1.454368932038835e-05, |
| "loss": 0.1061, |
| "step": 28100 |
| }, |
| { |
| "epoch": 273.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.538716793060303, |
| "eval_runtime": 4.4, |
| "eval_samples_per_second": 66.136, |
| "eval_steps_per_second": 4.318, |
| "step": 28119 |
| }, |
| { |
| "epoch": 273.79, |
| "learning_rate": 1.4524271844660196e-05, |
| "loss": 0.1102, |
| "step": 28200 |
| }, |
| { |
| "epoch": 274.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.58504581451416, |
| "eval_runtime": 4.424, |
| "eval_samples_per_second": 65.778, |
| "eval_steps_per_second": 4.295, |
| "step": 28222 |
| }, |
| { |
| "epoch": 274.76, |
| "learning_rate": 1.450485436893204e-05, |
| "loss": 0.109, |
| "step": 28300 |
| }, |
| { |
| "epoch": 275.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.644214630126953, |
| "eval_runtime": 4.3972, |
| "eval_samples_per_second": 66.178, |
| "eval_steps_per_second": 4.321, |
| "step": 28325 |
| }, |
| { |
| "epoch": 275.73, |
| "learning_rate": 1.4485436893203884e-05, |
| "loss": 0.1277, |
| "step": 28400 |
| }, |
| { |
| "epoch": 276.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 4.583741188049316, |
| "eval_runtime": 4.4168, |
| "eval_samples_per_second": 65.885, |
| "eval_steps_per_second": 4.302, |
| "step": 28428 |
| }, |
| { |
| "epoch": 276.7, |
| "learning_rate": 1.446601941747573e-05, |
| "loss": 0.1101, |
| "step": 28500 |
| }, |
| { |
| "epoch": 277.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.7879719734191895, |
| "eval_runtime": 4.4436, |
| "eval_samples_per_second": 65.488, |
| "eval_steps_per_second": 4.276, |
| "step": 28531 |
| }, |
| { |
| "epoch": 277.67, |
| "learning_rate": 1.4446601941747573e-05, |
| "loss": 0.1136, |
| "step": 28600 |
| }, |
| { |
| "epoch": 278.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.566427230834961, |
| "eval_runtime": 4.4045, |
| "eval_samples_per_second": 66.069, |
| "eval_steps_per_second": 4.314, |
| "step": 28634 |
| }, |
| { |
| "epoch": 278.64, |
| "learning_rate": 1.4427184466019419e-05, |
| "loss": 0.1125, |
| "step": 28700 |
| }, |
| { |
| "epoch": 279.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.724515914916992, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.11, |
| "eval_steps_per_second": 4.316, |
| "step": 28737 |
| }, |
| { |
| "epoch": 279.61, |
| "learning_rate": 1.4407766990291264e-05, |
| "loss": 0.1207, |
| "step": 28800 |
| }, |
| { |
| "epoch": 280.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.78406286239624, |
| "eval_runtime": 4.4052, |
| "eval_samples_per_second": 66.059, |
| "eval_steps_per_second": 4.313, |
| "step": 28840 |
| }, |
| { |
| "epoch": 280.58, |
| "learning_rate": 1.4388349514563106e-05, |
| "loss": 0.1223, |
| "step": 28900 |
| }, |
| { |
| "epoch": 281.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.773590564727783, |
| "eval_runtime": 4.3939, |
| "eval_samples_per_second": 66.228, |
| "eval_steps_per_second": 4.324, |
| "step": 28943 |
| }, |
| { |
| "epoch": 281.55, |
| "learning_rate": 1.4368932038834953e-05, |
| "loss": 0.1132, |
| "step": 29000 |
| }, |
| { |
| "epoch": 282.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.619295597076416, |
| "eval_runtime": 4.4015, |
| "eval_samples_per_second": 66.114, |
| "eval_steps_per_second": 4.317, |
| "step": 29046 |
| }, |
| { |
| "epoch": 282.52, |
| "learning_rate": 1.4349514563106797e-05, |
| "loss": 0.1118, |
| "step": 29100 |
| }, |
| { |
| "epoch": 283.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.751223087310791, |
| "eval_runtime": 4.5072, |
| "eval_samples_per_second": 64.563, |
| "eval_steps_per_second": 4.215, |
| "step": 29149 |
| }, |
| { |
| "epoch": 283.5, |
| "learning_rate": 1.4330097087378642e-05, |
| "loss": 0.1196, |
| "step": 29200 |
| }, |
| { |
| "epoch": 284.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.777285099029541, |
| "eval_runtime": 4.3932, |
| "eval_samples_per_second": 66.239, |
| "eval_steps_per_second": 4.325, |
| "step": 29252 |
| }, |
| { |
| "epoch": 284.47, |
| "learning_rate": 1.4310679611650486e-05, |
| "loss": 0.1035, |
| "step": 29300 |
| }, |
| { |
| "epoch": 285.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.661113262176514, |
| "eval_runtime": 4.4004, |
| "eval_samples_per_second": 66.131, |
| "eval_steps_per_second": 4.318, |
| "step": 29355 |
| }, |
| { |
| "epoch": 285.44, |
| "learning_rate": 1.4291262135922331e-05, |
| "loss": 0.1079, |
| "step": 29400 |
| }, |
| { |
| "epoch": 286.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.691645622253418, |
| "eval_runtime": 4.4078, |
| "eval_samples_per_second": 66.02, |
| "eval_steps_per_second": 4.311, |
| "step": 29458 |
| }, |
| { |
| "epoch": 286.41, |
| "learning_rate": 1.4271844660194176e-05, |
| "loss": 0.1124, |
| "step": 29500 |
| }, |
| { |
| "epoch": 287.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.650529384613037, |
| "eval_runtime": 4.3911, |
| "eval_samples_per_second": 66.27, |
| "eval_steps_per_second": 4.327, |
| "step": 29561 |
| }, |
| { |
| "epoch": 287.38, |
| "learning_rate": 1.425242718446602e-05, |
| "loss": 0.1024, |
| "step": 29600 |
| }, |
| { |
| "epoch": 288.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.63031005859375, |
| "eval_runtime": 4.3925, |
| "eval_samples_per_second": 66.25, |
| "eval_steps_per_second": 4.326, |
| "step": 29664 |
| }, |
| { |
| "epoch": 288.35, |
| "learning_rate": 1.4233009708737866e-05, |
| "loss": 0.101, |
| "step": 29700 |
| }, |
| { |
| "epoch": 289.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.607905864715576, |
| "eval_runtime": 4.3996, |
| "eval_samples_per_second": 66.142, |
| "eval_steps_per_second": 4.319, |
| "step": 29767 |
| }, |
| { |
| "epoch": 289.32, |
| "learning_rate": 1.421359223300971e-05, |
| "loss": 0.124, |
| "step": 29800 |
| }, |
| { |
| "epoch": 290.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.456625938415527, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.11, |
| "eval_steps_per_second": 4.316, |
| "step": 29870 |
| }, |
| { |
| "epoch": 290.29, |
| "learning_rate": 1.4194174757281554e-05, |
| "loss": 0.1121, |
| "step": 29900 |
| }, |
| { |
| "epoch": 291.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.502068519592285, |
| "eval_runtime": 4.4021, |
| "eval_samples_per_second": 66.105, |
| "eval_steps_per_second": 4.316, |
| "step": 29973 |
| }, |
| { |
| "epoch": 291.26, |
| "learning_rate": 1.41747572815534e-05, |
| "loss": 0.1005, |
| "step": 30000 |
| }, |
| { |
| "epoch": 292.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.547921180725098, |
| "eval_runtime": 4.4521, |
| "eval_samples_per_second": 65.362, |
| "eval_steps_per_second": 4.268, |
| "step": 30076 |
| }, |
| { |
| "epoch": 292.23, |
| "learning_rate": 1.4155339805825243e-05, |
| "loss": 0.1152, |
| "step": 30100 |
| }, |
| { |
| "epoch": 293.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.665774822235107, |
| "eval_runtime": 4.4006, |
| "eval_samples_per_second": 66.128, |
| "eval_steps_per_second": 4.318, |
| "step": 30179 |
| }, |
| { |
| "epoch": 293.2, |
| "learning_rate": 1.413592233009709e-05, |
| "loss": 0.113, |
| "step": 30200 |
| }, |
| { |
| "epoch": 294.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.5608320236206055, |
| "eval_runtime": 4.4028, |
| "eval_samples_per_second": 66.094, |
| "eval_steps_per_second": 4.315, |
| "step": 30282 |
| }, |
| { |
| "epoch": 294.17, |
| "learning_rate": 1.4116504854368934e-05, |
| "loss": 0.112, |
| "step": 30300 |
| }, |
| { |
| "epoch": 295.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.657680511474609, |
| "eval_runtime": 4.4088, |
| "eval_samples_per_second": 66.004, |
| "eval_steps_per_second": 4.31, |
| "step": 30385 |
| }, |
| { |
| "epoch": 295.15, |
| "learning_rate": 1.4097087378640777e-05, |
| "loss": 0.1095, |
| "step": 30400 |
| }, |
| { |
| "epoch": 296.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.532250881195068, |
| "eval_runtime": 4.4136, |
| "eval_samples_per_second": 65.932, |
| "eval_steps_per_second": 4.305, |
| "step": 30488 |
| }, |
| { |
| "epoch": 296.12, |
| "learning_rate": 1.4077669902912623e-05, |
| "loss": 0.1053, |
| "step": 30500 |
| }, |
| { |
| "epoch": 297.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.635454177856445, |
| "eval_runtime": 4.4197, |
| "eval_samples_per_second": 65.841, |
| "eval_steps_per_second": 4.299, |
| "step": 30591 |
| }, |
| { |
| "epoch": 297.09, |
| "learning_rate": 1.4058252427184466e-05, |
| "loss": 0.1138, |
| "step": 30600 |
| }, |
| { |
| "epoch": 298.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.718722343444824, |
| "eval_runtime": 4.4225, |
| "eval_samples_per_second": 65.799, |
| "eval_steps_per_second": 4.296, |
| "step": 30694 |
| }, |
| { |
| "epoch": 298.06, |
| "learning_rate": 1.4038834951456312e-05, |
| "loss": 0.1105, |
| "step": 30700 |
| }, |
| { |
| "epoch": 299.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.603695392608643, |
| "eval_runtime": 4.4021, |
| "eval_samples_per_second": 66.105, |
| "eval_steps_per_second": 4.316, |
| "step": 30797 |
| }, |
| { |
| "epoch": 299.03, |
| "learning_rate": 1.4019417475728157e-05, |
| "loss": 0.1175, |
| "step": 30800 |
| }, |
| { |
| "epoch": 300.0, |
| "learning_rate": 1.4e-05, |
| "loss": 0.0944, |
| "step": 30900 |
| }, |
| { |
| "epoch": 300.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.719486713409424, |
| "eval_runtime": 4.4224, |
| "eval_samples_per_second": 65.801, |
| "eval_steps_per_second": 4.296, |
| "step": 30900 |
| }, |
| { |
| "epoch": 300.97, |
| "learning_rate": 1.3980582524271846e-05, |
| "loss": 0.1027, |
| "step": 31000 |
| }, |
| { |
| "epoch": 301.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.67861795425415, |
| "eval_runtime": 4.3977, |
| "eval_samples_per_second": 66.171, |
| "eval_steps_per_second": 4.32, |
| "step": 31003 |
| }, |
| { |
| "epoch": 301.94, |
| "learning_rate": 1.396116504854369e-05, |
| "loss": 0.0994, |
| "step": 31100 |
| }, |
| { |
| "epoch": 302.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.762547492980957, |
| "eval_runtime": 4.3983, |
| "eval_samples_per_second": 66.162, |
| "eval_steps_per_second": 4.32, |
| "step": 31106 |
| }, |
| { |
| "epoch": 302.91, |
| "learning_rate": 1.3941747572815535e-05, |
| "loss": 0.1229, |
| "step": 31200 |
| }, |
| { |
| "epoch": 303.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.849686622619629, |
| "eval_runtime": 4.3908, |
| "eval_samples_per_second": 66.275, |
| "eval_steps_per_second": 4.327, |
| "step": 31209 |
| }, |
| { |
| "epoch": 303.88, |
| "learning_rate": 1.392233009708738e-05, |
| "loss": 0.1094, |
| "step": 31300 |
| }, |
| { |
| "epoch": 304.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 4.74536657333374, |
| "eval_runtime": 4.4588, |
| "eval_samples_per_second": 65.265, |
| "eval_steps_per_second": 4.261, |
| "step": 31312 |
| }, |
| { |
| "epoch": 304.85, |
| "learning_rate": 1.3902912621359224e-05, |
| "loss": 0.1225, |
| "step": 31400 |
| }, |
| { |
| "epoch": 305.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.77222204208374, |
| "eval_runtime": 4.468, |
| "eval_samples_per_second": 65.13, |
| "eval_steps_per_second": 4.252, |
| "step": 31415 |
| }, |
| { |
| "epoch": 305.83, |
| "learning_rate": 1.3883495145631069e-05, |
| "loss": 0.102, |
| "step": 31500 |
| }, |
| { |
| "epoch": 306.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.843104839324951, |
| "eval_runtime": 4.4326, |
| "eval_samples_per_second": 65.65, |
| "eval_steps_per_second": 4.286, |
| "step": 31518 |
| }, |
| { |
| "epoch": 306.8, |
| "learning_rate": 1.3864077669902914e-05, |
| "loss": 0.1283, |
| "step": 31600 |
| }, |
| { |
| "epoch": 307.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.797704219818115, |
| "eval_runtime": 4.4133, |
| "eval_samples_per_second": 65.937, |
| "eval_steps_per_second": 4.305, |
| "step": 31621 |
| }, |
| { |
| "epoch": 307.77, |
| "learning_rate": 1.384466019417476e-05, |
| "loss": 0.109, |
| "step": 31700 |
| }, |
| { |
| "epoch": 308.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.638222694396973, |
| "eval_runtime": 4.4167, |
| "eval_samples_per_second": 65.886, |
| "eval_steps_per_second": 4.302, |
| "step": 31724 |
| }, |
| { |
| "epoch": 308.74, |
| "learning_rate": 1.3825242718446603e-05, |
| "loss": 0.1193, |
| "step": 31800 |
| }, |
| { |
| "epoch": 309.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 4.7093939781188965, |
| "eval_runtime": 4.4231, |
| "eval_samples_per_second": 65.791, |
| "eval_steps_per_second": 4.296, |
| "step": 31827 |
| }, |
| { |
| "epoch": 309.71, |
| "learning_rate": 1.3805825242718447e-05, |
| "loss": 0.1106, |
| "step": 31900 |
| }, |
| { |
| "epoch": 310.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.756236553192139, |
| "eval_runtime": 4.5026, |
| "eval_samples_per_second": 64.629, |
| "eval_steps_per_second": 4.22, |
| "step": 31930 |
| }, |
| { |
| "epoch": 310.68, |
| "learning_rate": 1.3786407766990294e-05, |
| "loss": 0.1032, |
| "step": 32000 |
| }, |
| { |
| "epoch": 311.0, |
| "eval_accuracy": 0.25773195876288657, |
| "eval_loss": 4.726458549499512, |
| "eval_runtime": 4.4077, |
| "eval_samples_per_second": 66.021, |
| "eval_steps_per_second": 4.311, |
| "step": 32033 |
| }, |
| { |
| "epoch": 311.65, |
| "learning_rate": 1.3766990291262136e-05, |
| "loss": 0.114, |
| "step": 32100 |
| }, |
| { |
| "epoch": 312.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.751614570617676, |
| "eval_runtime": 4.411, |
| "eval_samples_per_second": 65.971, |
| "eval_steps_per_second": 4.307, |
| "step": 32136 |
| }, |
| { |
| "epoch": 312.62, |
| "learning_rate": 1.3747572815533983e-05, |
| "loss": 0.1265, |
| "step": 32200 |
| }, |
| { |
| "epoch": 313.0, |
| "eval_accuracy": 0.24742268041237114, |
| "eval_loss": 4.788166522979736, |
| "eval_runtime": 4.4529, |
| "eval_samples_per_second": 65.351, |
| "eval_steps_per_second": 4.267, |
| "step": 32239 |
| }, |
| { |
| "epoch": 313.59, |
| "learning_rate": 1.3728155339805826e-05, |
| "loss": 0.1252, |
| "step": 32300 |
| }, |
| { |
| "epoch": 314.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 4.70837926864624, |
| "eval_runtime": 4.4294, |
| "eval_samples_per_second": 65.697, |
| "eval_steps_per_second": 4.29, |
| "step": 32342 |
| }, |
| { |
| "epoch": 314.56, |
| "learning_rate": 1.370873786407767e-05, |
| "loss": 0.1102, |
| "step": 32400 |
| }, |
| { |
| "epoch": 315.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.689497470855713, |
| "eval_runtime": 4.4128, |
| "eval_samples_per_second": 65.944, |
| "eval_steps_per_second": 4.306, |
| "step": 32445 |
| }, |
| { |
| "epoch": 315.53, |
| "learning_rate": 1.3689320388349517e-05, |
| "loss": 0.0984, |
| "step": 32500 |
| }, |
| { |
| "epoch": 316.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.6340837478637695, |
| "eval_runtime": 4.3942, |
| "eval_samples_per_second": 66.224, |
| "eval_steps_per_second": 4.324, |
| "step": 32548 |
| }, |
| { |
| "epoch": 316.5, |
| "learning_rate": 1.366990291262136e-05, |
| "loss": 0.0978, |
| "step": 32600 |
| }, |
| { |
| "epoch": 317.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 4.621079921722412, |
| "eval_runtime": 4.4388, |
| "eval_samples_per_second": 65.558, |
| "eval_steps_per_second": 4.28, |
| "step": 32651 |
| }, |
| { |
| "epoch": 317.48, |
| "learning_rate": 1.3650485436893206e-05, |
| "loss": 0.1068, |
| "step": 32700 |
| }, |
| { |
| "epoch": 318.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.7675371170043945, |
| "eval_runtime": 4.4083, |
| "eval_samples_per_second": 66.012, |
| "eval_steps_per_second": 4.31, |
| "step": 32754 |
| }, |
| { |
| "epoch": 318.45, |
| "learning_rate": 1.363106796116505e-05, |
| "loss": 0.1017, |
| "step": 32800 |
| }, |
| { |
| "epoch": 319.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.706081390380859, |
| "eval_runtime": 4.4275, |
| "eval_samples_per_second": 65.726, |
| "eval_steps_per_second": 4.291, |
| "step": 32857 |
| }, |
| { |
| "epoch": 319.42, |
| "learning_rate": 1.3611650485436893e-05, |
| "loss": 0.1138, |
| "step": 32900 |
| }, |
| { |
| "epoch": 320.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.713945388793945, |
| "eval_runtime": 4.4304, |
| "eval_samples_per_second": 65.682, |
| "eval_steps_per_second": 4.289, |
| "step": 32960 |
| }, |
| { |
| "epoch": 320.39, |
| "learning_rate": 1.359223300970874e-05, |
| "loss": 0.0997, |
| "step": 33000 |
| }, |
| { |
| "epoch": 321.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.711687088012695, |
| "eval_runtime": 4.4111, |
| "eval_samples_per_second": 65.97, |
| "eval_steps_per_second": 4.307, |
| "step": 33063 |
| }, |
| { |
| "epoch": 321.36, |
| "learning_rate": 1.3572815533980584e-05, |
| "loss": 0.1036, |
| "step": 33100 |
| }, |
| { |
| "epoch": 322.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.71359920501709, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.853, |
| "eval_steps_per_second": 4.3, |
| "step": 33166 |
| }, |
| { |
| "epoch": 322.33, |
| "learning_rate": 1.3553398058252429e-05, |
| "loss": 0.0988, |
| "step": 33200 |
| }, |
| { |
| "epoch": 323.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.7139410972595215, |
| "eval_runtime": 4.4238, |
| "eval_samples_per_second": 65.78, |
| "eval_steps_per_second": 4.295, |
| "step": 33269 |
| }, |
| { |
| "epoch": 323.3, |
| "learning_rate": 1.3533980582524273e-05, |
| "loss": 0.1052, |
| "step": 33300 |
| }, |
| { |
| "epoch": 324.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.7646050453186035, |
| "eval_runtime": 4.3998, |
| "eval_samples_per_second": 66.139, |
| "eval_steps_per_second": 4.318, |
| "step": 33372 |
| }, |
| { |
| "epoch": 324.27, |
| "learning_rate": 1.3514563106796118e-05, |
| "loss": 0.0957, |
| "step": 33400 |
| }, |
| { |
| "epoch": 325.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.79006290435791, |
| "eval_runtime": 4.4111, |
| "eval_samples_per_second": 65.97, |
| "eval_steps_per_second": 4.307, |
| "step": 33475 |
| }, |
| { |
| "epoch": 325.24, |
| "learning_rate": 1.3495145631067962e-05, |
| "loss": 0.1009, |
| "step": 33500 |
| }, |
| { |
| "epoch": 326.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.704848289489746, |
| "eval_runtime": 4.3963, |
| "eval_samples_per_second": 66.192, |
| "eval_steps_per_second": 4.322, |
| "step": 33578 |
| }, |
| { |
| "epoch": 326.21, |
| "learning_rate": 1.3475728155339807e-05, |
| "loss": 0.0957, |
| "step": 33600 |
| }, |
| { |
| "epoch": 327.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.62115478515625, |
| "eval_runtime": 4.4064, |
| "eval_samples_per_second": 66.04, |
| "eval_steps_per_second": 4.312, |
| "step": 33681 |
| }, |
| { |
| "epoch": 327.18, |
| "learning_rate": 1.345631067961165e-05, |
| "loss": 0.1244, |
| "step": 33700 |
| }, |
| { |
| "epoch": 328.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.748119831085205, |
| "eval_runtime": 4.4371, |
| "eval_samples_per_second": 65.583, |
| "eval_steps_per_second": 4.282, |
| "step": 33784 |
| }, |
| { |
| "epoch": 328.16, |
| "learning_rate": 1.3436893203883496e-05, |
| "loss": 0.1021, |
| "step": 33800 |
| }, |
| { |
| "epoch": 329.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.749732971191406, |
| "eval_runtime": 4.4065, |
| "eval_samples_per_second": 66.038, |
| "eval_steps_per_second": 4.312, |
| "step": 33887 |
| }, |
| { |
| "epoch": 329.13, |
| "learning_rate": 1.341747572815534e-05, |
| "loss": 0.1017, |
| "step": 33900 |
| }, |
| { |
| "epoch": 330.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.830997467041016, |
| "eval_runtime": 4.4152, |
| "eval_samples_per_second": 65.908, |
| "eval_steps_per_second": 4.303, |
| "step": 33990 |
| }, |
| { |
| "epoch": 330.1, |
| "learning_rate": 1.3398058252427187e-05, |
| "loss": 0.0957, |
| "step": 34000 |
| }, |
| { |
| "epoch": 331.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.694131851196289, |
| "eval_runtime": 4.4251, |
| "eval_samples_per_second": 65.762, |
| "eval_steps_per_second": 4.294, |
| "step": 34093 |
| }, |
| { |
| "epoch": 331.07, |
| "learning_rate": 1.337864077669903e-05, |
| "loss": 0.1042, |
| "step": 34100 |
| }, |
| { |
| "epoch": 332.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.725266456604004, |
| "eval_runtime": 4.3958, |
| "eval_samples_per_second": 66.199, |
| "eval_steps_per_second": 4.322, |
| "step": 34196 |
| }, |
| { |
| "epoch": 332.04, |
| "learning_rate": 1.3359223300970874e-05, |
| "loss": 0.1046, |
| "step": 34200 |
| }, |
| { |
| "epoch": 333.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.859306335449219, |
| "eval_runtime": 4.3972, |
| "eval_samples_per_second": 66.179, |
| "eval_steps_per_second": 4.321, |
| "step": 34299 |
| }, |
| { |
| "epoch": 333.01, |
| "learning_rate": 1.3339805825242719e-05, |
| "loss": 0.0984, |
| "step": 34300 |
| }, |
| { |
| "epoch": 333.98, |
| "learning_rate": 1.3320388349514564e-05, |
| "loss": 0.1103, |
| "step": 34400 |
| }, |
| { |
| "epoch": 334.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.847973823547363, |
| "eval_runtime": 4.4128, |
| "eval_samples_per_second": 65.944, |
| "eval_steps_per_second": 4.306, |
| "step": 34402 |
| }, |
| { |
| "epoch": 334.95, |
| "learning_rate": 1.330097087378641e-05, |
| "loss": 0.09, |
| "step": 34500 |
| }, |
| { |
| "epoch": 335.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 4.91008996963501, |
| "eval_runtime": 4.407, |
| "eval_samples_per_second": 66.032, |
| "eval_steps_per_second": 4.311, |
| "step": 34505 |
| }, |
| { |
| "epoch": 335.92, |
| "learning_rate": 1.3281553398058253e-05, |
| "loss": 0.1108, |
| "step": 34600 |
| }, |
| { |
| "epoch": 336.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.783932209014893, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.98, |
| "eval_steps_per_second": 4.308, |
| "step": 34608 |
| }, |
| { |
| "epoch": 336.89, |
| "learning_rate": 1.3262135922330097e-05, |
| "loss": 0.1043, |
| "step": 34700 |
| }, |
| { |
| "epoch": 337.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.954315662384033, |
| "eval_runtime": 4.4085, |
| "eval_samples_per_second": 66.009, |
| "eval_steps_per_second": 4.31, |
| "step": 34711 |
| }, |
| { |
| "epoch": 337.86, |
| "learning_rate": 1.3242718446601944e-05, |
| "loss": 0.104, |
| "step": 34800 |
| }, |
| { |
| "epoch": 338.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.802567481994629, |
| "eval_runtime": 4.454, |
| "eval_samples_per_second": 65.335, |
| "eval_steps_per_second": 4.266, |
| "step": 34814 |
| }, |
| { |
| "epoch": 338.83, |
| "learning_rate": 1.3223300970873786e-05, |
| "loss": 0.1015, |
| "step": 34900 |
| }, |
| { |
| "epoch": 339.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.800775051116943, |
| "eval_runtime": 4.4097, |
| "eval_samples_per_second": 65.992, |
| "eval_steps_per_second": 4.309, |
| "step": 34917 |
| }, |
| { |
| "epoch": 339.81, |
| "learning_rate": 1.3203883495145633e-05, |
| "loss": 0.1029, |
| "step": 35000 |
| }, |
| { |
| "epoch": 340.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.90689754486084, |
| "eval_runtime": 4.4116, |
| "eval_samples_per_second": 65.963, |
| "eval_steps_per_second": 4.307, |
| "step": 35020 |
| }, |
| { |
| "epoch": 340.78, |
| "learning_rate": 1.3184466019417477e-05, |
| "loss": 0.1002, |
| "step": 35100 |
| }, |
| { |
| "epoch": 341.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.924178600311279, |
| "eval_runtime": 4.4136, |
| "eval_samples_per_second": 65.933, |
| "eval_steps_per_second": 4.305, |
| "step": 35123 |
| }, |
| { |
| "epoch": 341.75, |
| "learning_rate": 1.316504854368932e-05, |
| "loss": 0.1076, |
| "step": 35200 |
| }, |
| { |
| "epoch": 342.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.719906330108643, |
| "eval_runtime": 4.4068, |
| "eval_samples_per_second": 66.034, |
| "eval_steps_per_second": 4.312, |
| "step": 35226 |
| }, |
| { |
| "epoch": 342.72, |
| "learning_rate": 1.3145631067961167e-05, |
| "loss": 0.1055, |
| "step": 35300 |
| }, |
| { |
| "epoch": 343.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 4.844000816345215, |
| "eval_runtime": 4.4456, |
| "eval_samples_per_second": 65.458, |
| "eval_steps_per_second": 4.274, |
| "step": 35329 |
| }, |
| { |
| "epoch": 343.69, |
| "learning_rate": 1.3126213592233011e-05, |
| "loss": 0.0925, |
| "step": 35400 |
| }, |
| { |
| "epoch": 344.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 4.857196807861328, |
| "eval_runtime": 4.408, |
| "eval_samples_per_second": 66.016, |
| "eval_steps_per_second": 4.31, |
| "step": 35432 |
| }, |
| { |
| "epoch": 344.66, |
| "learning_rate": 1.3106796116504856e-05, |
| "loss": 0.0827, |
| "step": 35500 |
| }, |
| { |
| "epoch": 345.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.913283824920654, |
| "eval_runtime": 4.4856, |
| "eval_samples_per_second": 64.874, |
| "eval_steps_per_second": 4.236, |
| "step": 35535 |
| }, |
| { |
| "epoch": 345.63, |
| "learning_rate": 1.30873786407767e-05, |
| "loss": 0.1105, |
| "step": 35600 |
| }, |
| { |
| "epoch": 346.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.986526012420654, |
| "eval_runtime": 4.4528, |
| "eval_samples_per_second": 65.352, |
| "eval_steps_per_second": 4.267, |
| "step": 35638 |
| }, |
| { |
| "epoch": 346.6, |
| "learning_rate": 1.3067961165048543e-05, |
| "loss": 0.0875, |
| "step": 35700 |
| }, |
| { |
| "epoch": 347.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.797267436981201, |
| "eval_runtime": 4.4052, |
| "eval_samples_per_second": 66.059, |
| "eval_steps_per_second": 4.313, |
| "step": 35741 |
| }, |
| { |
| "epoch": 347.57, |
| "learning_rate": 1.304854368932039e-05, |
| "loss": 0.106, |
| "step": 35800 |
| }, |
| { |
| "epoch": 348.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.869633674621582, |
| "eval_runtime": 4.392, |
| "eval_samples_per_second": 66.257, |
| "eval_steps_per_second": 4.326, |
| "step": 35844 |
| }, |
| { |
| "epoch": 348.54, |
| "learning_rate": 1.3029126213592234e-05, |
| "loss": 0.1083, |
| "step": 35900 |
| }, |
| { |
| "epoch": 349.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.978613376617432, |
| "eval_runtime": 4.3987, |
| "eval_samples_per_second": 66.156, |
| "eval_steps_per_second": 4.319, |
| "step": 35947 |
| }, |
| { |
| "epoch": 349.51, |
| "learning_rate": 1.300970873786408e-05, |
| "loss": 0.105, |
| "step": 36000 |
| }, |
| { |
| "epoch": 350.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.911431789398193, |
| "eval_runtime": 4.4527, |
| "eval_samples_per_second": 65.354, |
| "eval_steps_per_second": 4.267, |
| "step": 36050 |
| }, |
| { |
| "epoch": 350.49, |
| "learning_rate": 1.2990291262135923e-05, |
| "loss": 0.1075, |
| "step": 36100 |
| }, |
| { |
| "epoch": 351.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 4.869287967681885, |
| "eval_runtime": 4.3993, |
| "eval_samples_per_second": 66.147, |
| "eval_steps_per_second": 4.319, |
| "step": 36153 |
| }, |
| { |
| "epoch": 351.46, |
| "learning_rate": 1.2970873786407768e-05, |
| "loss": 0.1026, |
| "step": 36200 |
| }, |
| { |
| "epoch": 352.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.873523235321045, |
| "eval_runtime": 4.4141, |
| "eval_samples_per_second": 65.926, |
| "eval_steps_per_second": 4.304, |
| "step": 36256 |
| }, |
| { |
| "epoch": 352.43, |
| "learning_rate": 1.2951456310679612e-05, |
| "loss": 0.101, |
| "step": 36300 |
| }, |
| { |
| "epoch": 353.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.044714450836182, |
| "eval_runtime": 4.4175, |
| "eval_samples_per_second": 65.874, |
| "eval_steps_per_second": 4.301, |
| "step": 36359 |
| }, |
| { |
| "epoch": 353.4, |
| "learning_rate": 1.2932038834951457e-05, |
| "loss": 0.0944, |
| "step": 36400 |
| }, |
| { |
| "epoch": 354.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.94920015335083, |
| "eval_runtime": 4.4052, |
| "eval_samples_per_second": 66.058, |
| "eval_steps_per_second": 4.313, |
| "step": 36462 |
| }, |
| { |
| "epoch": 354.37, |
| "learning_rate": 1.2912621359223303e-05, |
| "loss": 0.1055, |
| "step": 36500 |
| }, |
| { |
| "epoch": 355.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.989469051361084, |
| "eval_runtime": 4.4203, |
| "eval_samples_per_second": 65.833, |
| "eval_steps_per_second": 4.298, |
| "step": 36565 |
| }, |
| { |
| "epoch": 355.34, |
| "learning_rate": 1.2893203883495146e-05, |
| "loss": 0.0858, |
| "step": 36600 |
| }, |
| { |
| "epoch": 356.0, |
| "eval_accuracy": 0.24398625429553264, |
| "eval_loss": 5.095457077026367, |
| "eval_runtime": 4.3921, |
| "eval_samples_per_second": 66.256, |
| "eval_steps_per_second": 4.326, |
| "step": 36668 |
| }, |
| { |
| "epoch": 356.31, |
| "learning_rate": 1.287378640776699e-05, |
| "loss": 0.0955, |
| "step": 36700 |
| }, |
| { |
| "epoch": 357.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.010565280914307, |
| "eval_runtime": 4.4098, |
| "eval_samples_per_second": 65.99, |
| "eval_steps_per_second": 4.309, |
| "step": 36771 |
| }, |
| { |
| "epoch": 357.28, |
| "learning_rate": 1.2854368932038837e-05, |
| "loss": 0.1108, |
| "step": 36800 |
| }, |
| { |
| "epoch": 358.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.910917282104492, |
| "eval_runtime": 4.4051, |
| "eval_samples_per_second": 66.06, |
| "eval_steps_per_second": 4.313, |
| "step": 36874 |
| }, |
| { |
| "epoch": 358.25, |
| "learning_rate": 1.283495145631068e-05, |
| "loss": 0.1179, |
| "step": 36900 |
| }, |
| { |
| "epoch": 359.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.908169269561768, |
| "eval_runtime": 4.4082, |
| "eval_samples_per_second": 66.013, |
| "eval_steps_per_second": 4.31, |
| "step": 36977 |
| }, |
| { |
| "epoch": 359.22, |
| "learning_rate": 1.2815533980582526e-05, |
| "loss": 0.0984, |
| "step": 37000 |
| }, |
| { |
| "epoch": 360.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.848038673400879, |
| "eval_runtime": 4.4081, |
| "eval_samples_per_second": 66.015, |
| "eval_steps_per_second": 4.31, |
| "step": 37080 |
| }, |
| { |
| "epoch": 360.19, |
| "learning_rate": 1.279611650485437e-05, |
| "loss": 0.0997, |
| "step": 37100 |
| }, |
| { |
| "epoch": 361.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.895744323730469, |
| "eval_runtime": 4.3961, |
| "eval_samples_per_second": 66.196, |
| "eval_steps_per_second": 4.322, |
| "step": 37183 |
| }, |
| { |
| "epoch": 361.17, |
| "learning_rate": 1.2776699029126214e-05, |
| "loss": 0.1128, |
| "step": 37200 |
| }, |
| { |
| "epoch": 362.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.912665843963623, |
| "eval_runtime": 4.4624, |
| "eval_samples_per_second": 65.212, |
| "eval_steps_per_second": 4.258, |
| "step": 37286 |
| }, |
| { |
| "epoch": 362.14, |
| "learning_rate": 1.275728155339806e-05, |
| "loss": 0.0961, |
| "step": 37300 |
| }, |
| { |
| "epoch": 363.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.096518039703369, |
| "eval_runtime": 4.4077, |
| "eval_samples_per_second": 66.021, |
| "eval_steps_per_second": 4.311, |
| "step": 37389 |
| }, |
| { |
| "epoch": 363.11, |
| "learning_rate": 1.2737864077669904e-05, |
| "loss": 0.1096, |
| "step": 37400 |
| }, |
| { |
| "epoch": 364.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.031692028045654, |
| "eval_runtime": 4.4003, |
| "eval_samples_per_second": 66.132, |
| "eval_steps_per_second": 4.318, |
| "step": 37492 |
| }, |
| { |
| "epoch": 364.08, |
| "learning_rate": 1.2718446601941749e-05, |
| "loss": 0.0916, |
| "step": 37500 |
| }, |
| { |
| "epoch": 365.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.974483966827393, |
| "eval_runtime": 4.4064, |
| "eval_samples_per_second": 66.04, |
| "eval_steps_per_second": 4.312, |
| "step": 37595 |
| }, |
| { |
| "epoch": 365.05, |
| "learning_rate": 1.2699029126213594e-05, |
| "loss": 0.1057, |
| "step": 37600 |
| }, |
| { |
| "epoch": 366.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.8774895668029785, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.008, |
| "eval_steps_per_second": 4.31, |
| "step": 37698 |
| }, |
| { |
| "epoch": 366.02, |
| "learning_rate": 1.2679611650485437e-05, |
| "loss": 0.0978, |
| "step": 37700 |
| }, |
| { |
| "epoch": 366.99, |
| "learning_rate": 1.2660194174757283e-05, |
| "loss": 0.0932, |
| "step": 37800 |
| }, |
| { |
| "epoch": 367.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.028201580047607, |
| "eval_runtime": 4.4112, |
| "eval_samples_per_second": 65.968, |
| "eval_steps_per_second": 4.307, |
| "step": 37801 |
| }, |
| { |
| "epoch": 367.96, |
| "learning_rate": 1.2640776699029127e-05, |
| "loss": 0.1072, |
| "step": 37900 |
| }, |
| { |
| "epoch": 368.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.809718608856201, |
| "eval_runtime": 4.4102, |
| "eval_samples_per_second": 65.983, |
| "eval_steps_per_second": 4.308, |
| "step": 37904 |
| }, |
| { |
| "epoch": 368.93, |
| "learning_rate": 1.2621359223300974e-05, |
| "loss": 0.0973, |
| "step": 38000 |
| }, |
| { |
| "epoch": 369.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.932143211364746, |
| "eval_runtime": 4.3895, |
| "eval_samples_per_second": 66.295, |
| "eval_steps_per_second": 4.329, |
| "step": 38007 |
| }, |
| { |
| "epoch": 369.9, |
| "learning_rate": 1.2601941747572817e-05, |
| "loss": 0.1034, |
| "step": 38100 |
| }, |
| { |
| "epoch": 370.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.817590236663818, |
| "eval_runtime": 4.4213, |
| "eval_samples_per_second": 65.818, |
| "eval_steps_per_second": 4.297, |
| "step": 38110 |
| }, |
| { |
| "epoch": 370.87, |
| "learning_rate": 1.2582524271844661e-05, |
| "loss": 0.1084, |
| "step": 38200 |
| }, |
| { |
| "epoch": 371.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.856151103973389, |
| "eval_runtime": 4.4093, |
| "eval_samples_per_second": 65.996, |
| "eval_steps_per_second": 4.309, |
| "step": 38213 |
| }, |
| { |
| "epoch": 371.84, |
| "learning_rate": 1.2563106796116506e-05, |
| "loss": 0.0957, |
| "step": 38300 |
| }, |
| { |
| "epoch": 372.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.946646690368652, |
| "eval_runtime": 4.4359, |
| "eval_samples_per_second": 65.601, |
| "eval_steps_per_second": 4.283, |
| "step": 38316 |
| }, |
| { |
| "epoch": 372.82, |
| "learning_rate": 1.254368932038835e-05, |
| "loss": 0.1049, |
| "step": 38400 |
| }, |
| { |
| "epoch": 373.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 4.851525783538818, |
| "eval_runtime": 4.4102, |
| "eval_samples_per_second": 65.984, |
| "eval_steps_per_second": 4.308, |
| "step": 38419 |
| }, |
| { |
| "epoch": 373.79, |
| "learning_rate": 1.2524271844660197e-05, |
| "loss": 0.097, |
| "step": 38500 |
| }, |
| { |
| "epoch": 374.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.8833394050598145, |
| "eval_runtime": 4.4255, |
| "eval_samples_per_second": 65.755, |
| "eval_steps_per_second": 4.293, |
| "step": 38522 |
| }, |
| { |
| "epoch": 374.76, |
| "learning_rate": 1.250485436893204e-05, |
| "loss": 0.1008, |
| "step": 38600 |
| }, |
| { |
| "epoch": 375.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.944166660308838, |
| "eval_runtime": 4.5113, |
| "eval_samples_per_second": 64.504, |
| "eval_steps_per_second": 4.212, |
| "step": 38625 |
| }, |
| { |
| "epoch": 375.73, |
| "learning_rate": 1.2485436893203884e-05, |
| "loss": 0.1019, |
| "step": 38700 |
| }, |
| { |
| "epoch": 376.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.8345046043396, |
| "eval_runtime": 4.4758, |
| "eval_samples_per_second": 65.016, |
| "eval_steps_per_second": 4.245, |
| "step": 38728 |
| }, |
| { |
| "epoch": 376.7, |
| "learning_rate": 1.246601941747573e-05, |
| "loss": 0.1083, |
| "step": 38800 |
| }, |
| { |
| "epoch": 377.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.934985637664795, |
| "eval_runtime": 4.4219, |
| "eval_samples_per_second": 65.809, |
| "eval_steps_per_second": 4.297, |
| "step": 38831 |
| }, |
| { |
| "epoch": 377.67, |
| "learning_rate": 1.2446601941747573e-05, |
| "loss": 0.1181, |
| "step": 38900 |
| }, |
| { |
| "epoch": 378.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 4.860500335693359, |
| "eval_runtime": 4.4042, |
| "eval_samples_per_second": 66.073, |
| "eval_steps_per_second": 4.314, |
| "step": 38934 |
| }, |
| { |
| "epoch": 378.64, |
| "learning_rate": 1.2427184466019418e-05, |
| "loss": 0.1043, |
| "step": 39000 |
| }, |
| { |
| "epoch": 379.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.878326416015625, |
| "eval_runtime": 4.41, |
| "eval_samples_per_second": 65.987, |
| "eval_steps_per_second": 4.308, |
| "step": 39037 |
| }, |
| { |
| "epoch": 379.61, |
| "learning_rate": 1.2407766990291264e-05, |
| "loss": 0.1212, |
| "step": 39100 |
| }, |
| { |
| "epoch": 380.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.8640666007995605, |
| "eval_runtime": 4.3997, |
| "eval_samples_per_second": 66.14, |
| "eval_steps_per_second": 4.318, |
| "step": 39140 |
| }, |
| { |
| "epoch": 380.58, |
| "learning_rate": 1.2388349514563107e-05, |
| "loss": 0.0941, |
| "step": 39200 |
| }, |
| { |
| "epoch": 381.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.9771833419799805, |
| "eval_runtime": 4.4284, |
| "eval_samples_per_second": 65.712, |
| "eval_steps_per_second": 4.29, |
| "step": 39243 |
| }, |
| { |
| "epoch": 381.55, |
| "learning_rate": 1.2368932038834953e-05, |
| "loss": 0.0986, |
| "step": 39300 |
| }, |
| { |
| "epoch": 382.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.919087886810303, |
| "eval_runtime": 4.5207, |
| "eval_samples_per_second": 64.371, |
| "eval_steps_per_second": 4.203, |
| "step": 39346 |
| }, |
| { |
| "epoch": 382.52, |
| "learning_rate": 1.2349514563106798e-05, |
| "loss": 0.1054, |
| "step": 39400 |
| }, |
| { |
| "epoch": 383.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.069497108459473, |
| "eval_runtime": 4.3931, |
| "eval_samples_per_second": 66.241, |
| "eval_steps_per_second": 4.325, |
| "step": 39449 |
| }, |
| { |
| "epoch": 383.5, |
| "learning_rate": 1.233009708737864e-05, |
| "loss": 0.1066, |
| "step": 39500 |
| }, |
| { |
| "epoch": 384.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.114091873168945, |
| "eval_runtime": 4.3868, |
| "eval_samples_per_second": 66.336, |
| "eval_steps_per_second": 4.331, |
| "step": 39552 |
| }, |
| { |
| "epoch": 384.47, |
| "learning_rate": 1.2310679611650487e-05, |
| "loss": 0.0929, |
| "step": 39600 |
| }, |
| { |
| "epoch": 385.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.017634391784668, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.98, |
| "eval_steps_per_second": 4.308, |
| "step": 39655 |
| }, |
| { |
| "epoch": 385.44, |
| "learning_rate": 1.229126213592233e-05, |
| "loss": 0.102, |
| "step": 39700 |
| }, |
| { |
| "epoch": 386.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.778977870941162, |
| "eval_runtime": 4.4049, |
| "eval_samples_per_second": 66.063, |
| "eval_steps_per_second": 4.313, |
| "step": 39758 |
| }, |
| { |
| "epoch": 386.41, |
| "learning_rate": 1.2271844660194176e-05, |
| "loss": 0.103, |
| "step": 39800 |
| }, |
| { |
| "epoch": 387.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.734787464141846, |
| "eval_runtime": 4.4138, |
| "eval_samples_per_second": 65.93, |
| "eval_steps_per_second": 4.305, |
| "step": 39861 |
| }, |
| { |
| "epoch": 387.38, |
| "learning_rate": 1.225242718446602e-05, |
| "loss": 0.107, |
| "step": 39900 |
| }, |
| { |
| "epoch": 388.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.666727066040039, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 39964 |
| }, |
| { |
| "epoch": 388.35, |
| "learning_rate": 1.2233009708737864e-05, |
| "loss": 0.0922, |
| "step": 40000 |
| }, |
| { |
| "epoch": 389.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.668744087219238, |
| "eval_runtime": 4.3997, |
| "eval_samples_per_second": 66.14, |
| "eval_steps_per_second": 4.318, |
| "step": 40067 |
| }, |
| { |
| "epoch": 389.32, |
| "learning_rate": 1.221359223300971e-05, |
| "loss": 0.102, |
| "step": 40100 |
| }, |
| { |
| "epoch": 390.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.845048427581787, |
| "eval_runtime": 4.405, |
| "eval_samples_per_second": 66.061, |
| "eval_steps_per_second": 4.313, |
| "step": 40170 |
| }, |
| { |
| "epoch": 390.29, |
| "learning_rate": 1.2194174757281554e-05, |
| "loss": 0.0958, |
| "step": 40200 |
| }, |
| { |
| "epoch": 391.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.127882957458496, |
| "eval_runtime": 4.4089, |
| "eval_samples_per_second": 66.002, |
| "eval_steps_per_second": 4.309, |
| "step": 40273 |
| }, |
| { |
| "epoch": 391.26, |
| "learning_rate": 1.2174757281553399e-05, |
| "loss": 0.0908, |
| "step": 40300 |
| }, |
| { |
| "epoch": 392.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 4.962398529052734, |
| "eval_runtime": 4.3996, |
| "eval_samples_per_second": 66.143, |
| "eval_steps_per_second": 4.319, |
| "step": 40376 |
| }, |
| { |
| "epoch": 392.23, |
| "learning_rate": 1.2155339805825244e-05, |
| "loss": 0.0988, |
| "step": 40400 |
| }, |
| { |
| "epoch": 393.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.167624473571777, |
| "eval_runtime": 4.3907, |
| "eval_samples_per_second": 66.277, |
| "eval_steps_per_second": 4.327, |
| "step": 40479 |
| }, |
| { |
| "epoch": 393.2, |
| "learning_rate": 1.2135922330097088e-05, |
| "loss": 0.0995, |
| "step": 40500 |
| }, |
| { |
| "epoch": 394.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.872605323791504, |
| "eval_runtime": 4.4082, |
| "eval_samples_per_second": 66.013, |
| "eval_steps_per_second": 4.31, |
| "step": 40582 |
| }, |
| { |
| "epoch": 394.17, |
| "learning_rate": 1.2116504854368933e-05, |
| "loss": 0.1087, |
| "step": 40600 |
| }, |
| { |
| "epoch": 395.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.9525041580200195, |
| "eval_runtime": 4.408, |
| "eval_samples_per_second": 66.016, |
| "eval_steps_per_second": 4.31, |
| "step": 40685 |
| }, |
| { |
| "epoch": 395.15, |
| "learning_rate": 1.2097087378640777e-05, |
| "loss": 0.11, |
| "step": 40700 |
| }, |
| { |
| "epoch": 396.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 5.0257697105407715, |
| "eval_runtime": 4.4366, |
| "eval_samples_per_second": 65.591, |
| "eval_steps_per_second": 4.283, |
| "step": 40788 |
| }, |
| { |
| "epoch": 396.12, |
| "learning_rate": 1.2077669902912624e-05, |
| "loss": 0.0916, |
| "step": 40800 |
| }, |
| { |
| "epoch": 397.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 5.011427402496338, |
| "eval_runtime": 4.4266, |
| "eval_samples_per_second": 65.739, |
| "eval_steps_per_second": 4.292, |
| "step": 40891 |
| }, |
| { |
| "epoch": 397.09, |
| "learning_rate": 1.2058252427184467e-05, |
| "loss": 0.089, |
| "step": 40900 |
| }, |
| { |
| "epoch": 398.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.968867778778076, |
| "eval_runtime": 4.4037, |
| "eval_samples_per_second": 66.081, |
| "eval_steps_per_second": 4.315, |
| "step": 40994 |
| }, |
| { |
| "epoch": 398.06, |
| "learning_rate": 1.2038834951456311e-05, |
| "loss": 0.1089, |
| "step": 41000 |
| }, |
| { |
| "epoch": 399.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.864815711975098, |
| "eval_runtime": 4.3906, |
| "eval_samples_per_second": 66.279, |
| "eval_steps_per_second": 4.327, |
| "step": 41097 |
| }, |
| { |
| "epoch": 399.03, |
| "learning_rate": 1.2019417475728157e-05, |
| "loss": 0.0909, |
| "step": 41100 |
| }, |
| { |
| "epoch": 400.0, |
| "learning_rate": 1.2e-05, |
| "loss": 0.085, |
| "step": 41200 |
| }, |
| { |
| "epoch": 400.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.737619400024414, |
| "eval_runtime": 4.4091, |
| "eval_samples_per_second": 65.999, |
| "eval_steps_per_second": 4.309, |
| "step": 41200 |
| }, |
| { |
| "epoch": 400.97, |
| "learning_rate": 1.1980582524271847e-05, |
| "loss": 0.1135, |
| "step": 41300 |
| }, |
| { |
| "epoch": 401.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.968517303466797, |
| "eval_runtime": 4.421, |
| "eval_samples_per_second": 65.821, |
| "eval_steps_per_second": 4.298, |
| "step": 41303 |
| }, |
| { |
| "epoch": 401.94, |
| "learning_rate": 1.196116504854369e-05, |
| "loss": 0.1032, |
| "step": 41400 |
| }, |
| { |
| "epoch": 402.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 4.695452690124512, |
| "eval_runtime": 4.4013, |
| "eval_samples_per_second": 66.117, |
| "eval_steps_per_second": 4.317, |
| "step": 41406 |
| }, |
| { |
| "epoch": 402.91, |
| "learning_rate": 1.1941747572815534e-05, |
| "loss": 0.0987, |
| "step": 41500 |
| }, |
| { |
| "epoch": 403.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.897180557250977, |
| "eval_runtime": 4.4151, |
| "eval_samples_per_second": 65.911, |
| "eval_steps_per_second": 4.303, |
| "step": 41509 |
| }, |
| { |
| "epoch": 403.88, |
| "learning_rate": 1.192233009708738e-05, |
| "loss": 0.1112, |
| "step": 41600 |
| }, |
| { |
| "epoch": 404.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.8028459548950195, |
| "eval_runtime": 4.3946, |
| "eval_samples_per_second": 66.218, |
| "eval_steps_per_second": 4.323, |
| "step": 41612 |
| }, |
| { |
| "epoch": 404.85, |
| "learning_rate": 1.1902912621359223e-05, |
| "loss": 0.0926, |
| "step": 41700 |
| }, |
| { |
| "epoch": 405.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 4.6858320236206055, |
| "eval_runtime": 4.399, |
| "eval_samples_per_second": 66.152, |
| "eval_steps_per_second": 4.319, |
| "step": 41715 |
| }, |
| { |
| "epoch": 405.83, |
| "learning_rate": 1.188349514563107e-05, |
| "loss": 0.1032, |
| "step": 41800 |
| }, |
| { |
| "epoch": 406.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.768010139465332, |
| "eval_runtime": 4.395, |
| "eval_samples_per_second": 66.212, |
| "eval_steps_per_second": 4.323, |
| "step": 41818 |
| }, |
| { |
| "epoch": 406.8, |
| "learning_rate": 1.1864077669902914e-05, |
| "loss": 0.1066, |
| "step": 41900 |
| }, |
| { |
| "epoch": 407.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.80867338180542, |
| "eval_runtime": 4.5161, |
| "eval_samples_per_second": 64.436, |
| "eval_steps_per_second": 4.207, |
| "step": 41921 |
| }, |
| { |
| "epoch": 407.77, |
| "learning_rate": 1.1844660194174757e-05, |
| "loss": 0.1053, |
| "step": 42000 |
| }, |
| { |
| "epoch": 408.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.887094020843506, |
| "eval_runtime": 4.4348, |
| "eval_samples_per_second": 65.618, |
| "eval_steps_per_second": 4.284, |
| "step": 42024 |
| }, |
| { |
| "epoch": 408.74, |
| "learning_rate": 1.1825242718446603e-05, |
| "loss": 0.0999, |
| "step": 42100 |
| }, |
| { |
| "epoch": 409.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.705599784851074, |
| "eval_runtime": 4.3941, |
| "eval_samples_per_second": 66.225, |
| "eval_steps_per_second": 4.324, |
| "step": 42127 |
| }, |
| { |
| "epoch": 409.71, |
| "learning_rate": 1.1805825242718448e-05, |
| "loss": 0.0929, |
| "step": 42200 |
| }, |
| { |
| "epoch": 410.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.884646892547607, |
| "eval_runtime": 4.4074, |
| "eval_samples_per_second": 66.026, |
| "eval_steps_per_second": 4.311, |
| "step": 42230 |
| }, |
| { |
| "epoch": 410.68, |
| "learning_rate": 1.1786407766990292e-05, |
| "loss": 0.1138, |
| "step": 42300 |
| }, |
| { |
| "epoch": 411.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.774139404296875, |
| "eval_runtime": 4.4036, |
| "eval_samples_per_second": 66.082, |
| "eval_steps_per_second": 4.315, |
| "step": 42333 |
| }, |
| { |
| "epoch": 411.65, |
| "learning_rate": 1.1766990291262137e-05, |
| "loss": 0.1126, |
| "step": 42400 |
| }, |
| { |
| "epoch": 412.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.915742874145508, |
| "eval_runtime": 4.3921, |
| "eval_samples_per_second": 66.255, |
| "eval_steps_per_second": 4.326, |
| "step": 42436 |
| }, |
| { |
| "epoch": 412.62, |
| "learning_rate": 1.1747572815533982e-05, |
| "loss": 0.0835, |
| "step": 42500 |
| }, |
| { |
| "epoch": 413.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.960720539093018, |
| "eval_runtime": 4.3944, |
| "eval_samples_per_second": 66.22, |
| "eval_steps_per_second": 4.324, |
| "step": 42539 |
| }, |
| { |
| "epoch": 413.59, |
| "learning_rate": 1.1728155339805826e-05, |
| "loss": 0.1004, |
| "step": 42600 |
| }, |
| { |
| "epoch": 414.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.77178955078125, |
| "eval_runtime": 4.3908, |
| "eval_samples_per_second": 66.275, |
| "eval_steps_per_second": 4.327, |
| "step": 42642 |
| }, |
| { |
| "epoch": 414.56, |
| "learning_rate": 1.170873786407767e-05, |
| "loss": 0.0972, |
| "step": 42700 |
| }, |
| { |
| "epoch": 415.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.828794479370117, |
| "eval_runtime": 4.5174, |
| "eval_samples_per_second": 64.417, |
| "eval_steps_per_second": 4.206, |
| "step": 42745 |
| }, |
| { |
| "epoch": 415.53, |
| "learning_rate": 1.1689320388349517e-05, |
| "loss": 0.1023, |
| "step": 42800 |
| }, |
| { |
| "epoch": 416.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 4.908327102661133, |
| "eval_runtime": 4.4015, |
| "eval_samples_per_second": 66.114, |
| "eval_steps_per_second": 4.317, |
| "step": 42848 |
| }, |
| { |
| "epoch": 416.5, |
| "learning_rate": 1.166990291262136e-05, |
| "loss": 0.0948, |
| "step": 42900 |
| }, |
| { |
| "epoch": 417.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.850914478302002, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.853, |
| "eval_steps_per_second": 4.3, |
| "step": 42951 |
| }, |
| { |
| "epoch": 417.48, |
| "learning_rate": 1.1650485436893204e-05, |
| "loss": 0.0918, |
| "step": 43000 |
| }, |
| { |
| "epoch": 418.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.832261085510254, |
| "eval_runtime": 4.5134, |
| "eval_samples_per_second": 64.475, |
| "eval_steps_per_second": 4.21, |
| "step": 43054 |
| }, |
| { |
| "epoch": 418.45, |
| "learning_rate": 1.163106796116505e-05, |
| "loss": 0.0961, |
| "step": 43100 |
| }, |
| { |
| "epoch": 419.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.956958293914795, |
| "eval_runtime": 4.5157, |
| "eval_samples_per_second": 64.441, |
| "eval_steps_per_second": 4.207, |
| "step": 43157 |
| }, |
| { |
| "epoch": 419.42, |
| "learning_rate": 1.1611650485436894e-05, |
| "loss": 0.0911, |
| "step": 43200 |
| }, |
| { |
| "epoch": 420.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 4.95814847946167, |
| "eval_runtime": 4.398, |
| "eval_samples_per_second": 66.166, |
| "eval_steps_per_second": 4.32, |
| "step": 43260 |
| }, |
| { |
| "epoch": 420.39, |
| "learning_rate": 1.159223300970874e-05, |
| "loss": 0.0927, |
| "step": 43300 |
| }, |
| { |
| "epoch": 421.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 4.985574245452881, |
| "eval_runtime": 4.4056, |
| "eval_samples_per_second": 66.052, |
| "eval_steps_per_second": 4.313, |
| "step": 43363 |
| }, |
| { |
| "epoch": 421.36, |
| "learning_rate": 1.1572815533980583e-05, |
| "loss": 0.0907, |
| "step": 43400 |
| }, |
| { |
| "epoch": 422.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.9146223068237305, |
| "eval_runtime": 4.4091, |
| "eval_samples_per_second": 66.001, |
| "eval_steps_per_second": 4.309, |
| "step": 43466 |
| }, |
| { |
| "epoch": 422.33, |
| "learning_rate": 1.1553398058252427e-05, |
| "loss": 0.1039, |
| "step": 43500 |
| }, |
| { |
| "epoch": 423.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.781336307525635, |
| "eval_runtime": 4.4095, |
| "eval_samples_per_second": 65.994, |
| "eval_steps_per_second": 4.309, |
| "step": 43569 |
| }, |
| { |
| "epoch": 423.3, |
| "learning_rate": 1.1533980582524274e-05, |
| "loss": 0.1093, |
| "step": 43600 |
| }, |
| { |
| "epoch": 424.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.957409858703613, |
| "eval_runtime": 4.4089, |
| "eval_samples_per_second": 66.003, |
| "eval_steps_per_second": 4.309, |
| "step": 43672 |
| }, |
| { |
| "epoch": 424.27, |
| "learning_rate": 1.1514563106796117e-05, |
| "loss": 0.0859, |
| "step": 43700 |
| }, |
| { |
| "epoch": 425.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.893417835235596, |
| "eval_runtime": 4.4094, |
| "eval_samples_per_second": 65.995, |
| "eval_steps_per_second": 4.309, |
| "step": 43775 |
| }, |
| { |
| "epoch": 425.24, |
| "learning_rate": 1.1495145631067961e-05, |
| "loss": 0.111, |
| "step": 43800 |
| }, |
| { |
| "epoch": 426.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.856235504150391, |
| "eval_runtime": 4.4105, |
| "eval_samples_per_second": 65.979, |
| "eval_steps_per_second": 4.308, |
| "step": 43878 |
| }, |
| { |
| "epoch": 426.21, |
| "learning_rate": 1.1475728155339807e-05, |
| "loss": 0.0944, |
| "step": 43900 |
| }, |
| { |
| "epoch": 427.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.826057434082031, |
| "eval_runtime": 4.3947, |
| "eval_samples_per_second": 66.215, |
| "eval_steps_per_second": 4.323, |
| "step": 43981 |
| }, |
| { |
| "epoch": 427.18, |
| "learning_rate": 1.145631067961165e-05, |
| "loss": 0.1, |
| "step": 44000 |
| }, |
| { |
| "epoch": 428.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.822572708129883, |
| "eval_runtime": 4.4039, |
| "eval_samples_per_second": 66.078, |
| "eval_steps_per_second": 4.314, |
| "step": 44084 |
| }, |
| { |
| "epoch": 428.16, |
| "learning_rate": 1.1436893203883497e-05, |
| "loss": 0.0965, |
| "step": 44100 |
| }, |
| { |
| "epoch": 429.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 4.810351371765137, |
| "eval_runtime": 4.4271, |
| "eval_samples_per_second": 65.731, |
| "eval_steps_per_second": 4.292, |
| "step": 44187 |
| }, |
| { |
| "epoch": 429.13, |
| "learning_rate": 1.1417475728155341e-05, |
| "loss": 0.0905, |
| "step": 44200 |
| }, |
| { |
| "epoch": 430.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.741602420806885, |
| "eval_runtime": 4.3937, |
| "eval_samples_per_second": 66.232, |
| "eval_steps_per_second": 4.324, |
| "step": 44290 |
| }, |
| { |
| "epoch": 430.1, |
| "learning_rate": 1.1398058252427184e-05, |
| "loss": 0.1095, |
| "step": 44300 |
| }, |
| { |
| "epoch": 431.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.087652683258057, |
| "eval_runtime": 4.3905, |
| "eval_samples_per_second": 66.28, |
| "eval_steps_per_second": 4.328, |
| "step": 44393 |
| }, |
| { |
| "epoch": 431.07, |
| "learning_rate": 1.137864077669903e-05, |
| "loss": 0.0855, |
| "step": 44400 |
| }, |
| { |
| "epoch": 432.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.93923282623291, |
| "eval_runtime": 4.406, |
| "eval_samples_per_second": 66.046, |
| "eval_steps_per_second": 4.312, |
| "step": 44496 |
| }, |
| { |
| "epoch": 432.04, |
| "learning_rate": 1.1359223300970875e-05, |
| "loss": 0.1079, |
| "step": 44500 |
| }, |
| { |
| "epoch": 433.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 4.822700023651123, |
| "eval_runtime": 4.393, |
| "eval_samples_per_second": 66.242, |
| "eval_steps_per_second": 4.325, |
| "step": 44599 |
| }, |
| { |
| "epoch": 433.01, |
| "learning_rate": 1.133980582524272e-05, |
| "loss": 0.112, |
| "step": 44600 |
| }, |
| { |
| "epoch": 433.98, |
| "learning_rate": 1.1320388349514564e-05, |
| "loss": 0.102, |
| "step": 44700 |
| }, |
| { |
| "epoch": 434.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.977917671203613, |
| "eval_runtime": 4.4052, |
| "eval_samples_per_second": 66.058, |
| "eval_steps_per_second": 4.313, |
| "step": 44702 |
| }, |
| { |
| "epoch": 434.95, |
| "learning_rate": 1.1300970873786407e-05, |
| "loss": 0.0888, |
| "step": 44800 |
| }, |
| { |
| "epoch": 435.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.995783805847168, |
| "eval_runtime": 4.3942, |
| "eval_samples_per_second": 66.224, |
| "eval_steps_per_second": 4.324, |
| "step": 44805 |
| }, |
| { |
| "epoch": 435.92, |
| "learning_rate": 1.1281553398058253e-05, |
| "loss": 0.0842, |
| "step": 44900 |
| }, |
| { |
| "epoch": 436.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.74613094329834, |
| "eval_runtime": 4.417, |
| "eval_samples_per_second": 65.881, |
| "eval_steps_per_second": 4.302, |
| "step": 44908 |
| }, |
| { |
| "epoch": 436.89, |
| "learning_rate": 1.1262135922330098e-05, |
| "loss": 0.0918, |
| "step": 45000 |
| }, |
| { |
| "epoch": 437.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.059698104858398, |
| "eval_runtime": 4.4033, |
| "eval_samples_per_second": 66.087, |
| "eval_steps_per_second": 4.315, |
| "step": 45011 |
| }, |
| { |
| "epoch": 437.86, |
| "learning_rate": 1.1242718446601944e-05, |
| "loss": 0.0911, |
| "step": 45100 |
| }, |
| { |
| "epoch": 438.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.977145195007324, |
| "eval_runtime": 4.4276, |
| "eval_samples_per_second": 65.724, |
| "eval_steps_per_second": 4.291, |
| "step": 45114 |
| }, |
| { |
| "epoch": 438.83, |
| "learning_rate": 1.1223300970873787e-05, |
| "loss": 0.0859, |
| "step": 45200 |
| }, |
| { |
| "epoch": 439.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.837311744689941, |
| "eval_runtime": 4.4081, |
| "eval_samples_per_second": 66.014, |
| "eval_steps_per_second": 4.31, |
| "step": 45217 |
| }, |
| { |
| "epoch": 439.81, |
| "learning_rate": 1.1203883495145632e-05, |
| "loss": 0.0916, |
| "step": 45300 |
| }, |
| { |
| "epoch": 440.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 4.74083948135376, |
| "eval_runtime": 4.3993, |
| "eval_samples_per_second": 66.147, |
| "eval_steps_per_second": 4.319, |
| "step": 45320 |
| }, |
| { |
| "epoch": 440.78, |
| "learning_rate": 1.1184466019417476e-05, |
| "loss": 0.0988, |
| "step": 45400 |
| }, |
| { |
| "epoch": 441.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 4.78790807723999, |
| "eval_runtime": 4.3954, |
| "eval_samples_per_second": 66.205, |
| "eval_steps_per_second": 4.323, |
| "step": 45423 |
| }, |
| { |
| "epoch": 441.75, |
| "learning_rate": 1.116504854368932e-05, |
| "loss": 0.0994, |
| "step": 45500 |
| }, |
| { |
| "epoch": 442.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.735467433929443, |
| "eval_runtime": 4.3956, |
| "eval_samples_per_second": 66.203, |
| "eval_steps_per_second": 4.323, |
| "step": 45526 |
| }, |
| { |
| "epoch": 442.72, |
| "learning_rate": 1.1145631067961167e-05, |
| "loss": 0.102, |
| "step": 45600 |
| }, |
| { |
| "epoch": 443.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 4.869570255279541, |
| "eval_runtime": 4.4285, |
| "eval_samples_per_second": 65.711, |
| "eval_steps_per_second": 4.29, |
| "step": 45629 |
| }, |
| { |
| "epoch": 443.69, |
| "learning_rate": 1.112621359223301e-05, |
| "loss": 0.0951, |
| "step": 45700 |
| }, |
| { |
| "epoch": 444.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 4.957821369171143, |
| "eval_runtime": 4.4022, |
| "eval_samples_per_second": 66.104, |
| "eval_steps_per_second": 4.316, |
| "step": 45732 |
| }, |
| { |
| "epoch": 444.66, |
| "learning_rate": 1.1106796116504855e-05, |
| "loss": 0.0843, |
| "step": 45800 |
| }, |
| { |
| "epoch": 445.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.033973217010498, |
| "eval_runtime": 4.4069, |
| "eval_samples_per_second": 66.033, |
| "eval_steps_per_second": 4.311, |
| "step": 45835 |
| }, |
| { |
| "epoch": 445.63, |
| "learning_rate": 1.10873786407767e-05, |
| "loss": 0.0927, |
| "step": 45900 |
| }, |
| { |
| "epoch": 446.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.01215934753418, |
| "eval_runtime": 4.4401, |
| "eval_samples_per_second": 65.539, |
| "eval_steps_per_second": 4.279, |
| "step": 45938 |
| }, |
| { |
| "epoch": 446.6, |
| "learning_rate": 1.1067961165048544e-05, |
| "loss": 0.1028, |
| "step": 46000 |
| }, |
| { |
| "epoch": 447.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.8365044593811035, |
| "eval_runtime": 4.4122, |
| "eval_samples_per_second": 65.953, |
| "eval_steps_per_second": 4.306, |
| "step": 46041 |
| }, |
| { |
| "epoch": 447.57, |
| "learning_rate": 1.104854368932039e-05, |
| "loss": 0.0988, |
| "step": 46100 |
| }, |
| { |
| "epoch": 448.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 4.978984355926514, |
| "eval_runtime": 4.3946, |
| "eval_samples_per_second": 66.218, |
| "eval_steps_per_second": 4.324, |
| "step": 46144 |
| }, |
| { |
| "epoch": 448.54, |
| "learning_rate": 1.1029126213592235e-05, |
| "loss": 0.0993, |
| "step": 46200 |
| }, |
| { |
| "epoch": 449.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 4.857437610626221, |
| "eval_runtime": 4.3957, |
| "eval_samples_per_second": 66.2, |
| "eval_steps_per_second": 4.322, |
| "step": 46247 |
| }, |
| { |
| "epoch": 449.51, |
| "learning_rate": 1.1009708737864077e-05, |
| "loss": 0.0935, |
| "step": 46300 |
| }, |
| { |
| "epoch": 450.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.0488691329956055, |
| "eval_runtime": 4.3937, |
| "eval_samples_per_second": 66.231, |
| "eval_steps_per_second": 4.324, |
| "step": 46350 |
| }, |
| { |
| "epoch": 450.49, |
| "learning_rate": 1.0990291262135924e-05, |
| "loss": 0.0942, |
| "step": 46400 |
| }, |
| { |
| "epoch": 451.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.9593119621276855, |
| "eval_runtime": 4.3941, |
| "eval_samples_per_second": 66.225, |
| "eval_steps_per_second": 4.324, |
| "step": 46453 |
| }, |
| { |
| "epoch": 451.46, |
| "learning_rate": 1.0970873786407768e-05, |
| "loss": 0.0875, |
| "step": 46500 |
| }, |
| { |
| "epoch": 452.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 4.957134246826172, |
| "eval_runtime": 4.4335, |
| "eval_samples_per_second": 65.637, |
| "eval_steps_per_second": 4.286, |
| "step": 46556 |
| }, |
| { |
| "epoch": 452.43, |
| "learning_rate": 1.0951456310679613e-05, |
| "loss": 0.0968, |
| "step": 46600 |
| }, |
| { |
| "epoch": 453.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 4.800377368927002, |
| "eval_runtime": 4.4318, |
| "eval_samples_per_second": 65.662, |
| "eval_steps_per_second": 4.287, |
| "step": 46659 |
| }, |
| { |
| "epoch": 453.4, |
| "learning_rate": 1.0932038834951457e-05, |
| "loss": 0.0969, |
| "step": 46700 |
| }, |
| { |
| "epoch": 454.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.191004276275635, |
| "eval_runtime": 4.3946, |
| "eval_samples_per_second": 66.217, |
| "eval_steps_per_second": 4.323, |
| "step": 46762 |
| }, |
| { |
| "epoch": 454.37, |
| "learning_rate": 1.09126213592233e-05, |
| "loss": 0.0954, |
| "step": 46800 |
| }, |
| { |
| "epoch": 455.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.035511016845703, |
| "eval_runtime": 4.4202, |
| "eval_samples_per_second": 65.835, |
| "eval_steps_per_second": 4.298, |
| "step": 46865 |
| }, |
| { |
| "epoch": 455.34, |
| "learning_rate": 1.0893203883495147e-05, |
| "loss": 0.1008, |
| "step": 46900 |
| }, |
| { |
| "epoch": 456.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 4.853602409362793, |
| "eval_runtime": 4.442, |
| "eval_samples_per_second": 65.51, |
| "eval_steps_per_second": 4.277, |
| "step": 46968 |
| }, |
| { |
| "epoch": 456.31, |
| "learning_rate": 1.0873786407766991e-05, |
| "loss": 0.09, |
| "step": 47000 |
| }, |
| { |
| "epoch": 457.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 4.704257488250732, |
| "eval_runtime": 4.4109, |
| "eval_samples_per_second": 65.972, |
| "eval_steps_per_second": 4.307, |
| "step": 47071 |
| }, |
| { |
| "epoch": 457.28, |
| "learning_rate": 1.0854368932038837e-05, |
| "loss": 0.1064, |
| "step": 47100 |
| }, |
| { |
| "epoch": 458.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 4.873353958129883, |
| "eval_runtime": 4.4341, |
| "eval_samples_per_second": 65.628, |
| "eval_steps_per_second": 4.285, |
| "step": 47174 |
| }, |
| { |
| "epoch": 458.25, |
| "learning_rate": 1.083495145631068e-05, |
| "loss": 0.0902, |
| "step": 47200 |
| }, |
| { |
| "epoch": 459.0, |
| "eval_accuracy": 0.32989690721649484, |
| "eval_loss": 4.906158447265625, |
| "eval_runtime": 4.3951, |
| "eval_samples_per_second": 66.21, |
| "eval_steps_per_second": 4.323, |
| "step": 47277 |
| }, |
| { |
| "epoch": 459.22, |
| "learning_rate": 1.0815533980582525e-05, |
| "loss": 0.0831, |
| "step": 47300 |
| }, |
| { |
| "epoch": 460.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.066910266876221, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.007, |
| "eval_steps_per_second": 4.31, |
| "step": 47380 |
| }, |
| { |
| "epoch": 460.19, |
| "learning_rate": 1.079611650485437e-05, |
| "loss": 0.1008, |
| "step": 47400 |
| }, |
| { |
| "epoch": 461.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.140333652496338, |
| "eval_runtime": 4.413, |
| "eval_samples_per_second": 65.942, |
| "eval_steps_per_second": 4.305, |
| "step": 47483 |
| }, |
| { |
| "epoch": 461.17, |
| "learning_rate": 1.0776699029126214e-05, |
| "loss": 0.0883, |
| "step": 47500 |
| }, |
| { |
| "epoch": 462.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.177355766296387, |
| "eval_runtime": 4.3997, |
| "eval_samples_per_second": 66.141, |
| "eval_steps_per_second": 4.318, |
| "step": 47586 |
| }, |
| { |
| "epoch": 462.14, |
| "learning_rate": 1.075728155339806e-05, |
| "loss": 0.0915, |
| "step": 47600 |
| }, |
| { |
| "epoch": 463.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.148591995239258, |
| "eval_runtime": 4.4145, |
| "eval_samples_per_second": 65.92, |
| "eval_steps_per_second": 4.304, |
| "step": 47689 |
| }, |
| { |
| "epoch": 463.11, |
| "learning_rate": 1.0737864077669903e-05, |
| "loss": 0.1124, |
| "step": 47700 |
| }, |
| { |
| "epoch": 464.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.107584476470947, |
| "eval_runtime": 4.5054, |
| "eval_samples_per_second": 64.589, |
| "eval_steps_per_second": 4.217, |
| "step": 47792 |
| }, |
| { |
| "epoch": 464.08, |
| "learning_rate": 1.0718446601941748e-05, |
| "loss": 0.0892, |
| "step": 47800 |
| }, |
| { |
| "epoch": 465.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.02621603012085, |
| "eval_runtime": 4.4092, |
| "eval_samples_per_second": 65.999, |
| "eval_steps_per_second": 4.309, |
| "step": 47895 |
| }, |
| { |
| "epoch": 465.05, |
| "learning_rate": 1.0699029126213594e-05, |
| "loss": 0.088, |
| "step": 47900 |
| }, |
| { |
| "epoch": 466.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.167210102081299, |
| "eval_runtime": 4.4082, |
| "eval_samples_per_second": 66.014, |
| "eval_steps_per_second": 4.31, |
| "step": 47998 |
| }, |
| { |
| "epoch": 466.02, |
| "learning_rate": 1.0679611650485437e-05, |
| "loss": 0.0862, |
| "step": 48000 |
| }, |
| { |
| "epoch": 466.99, |
| "learning_rate": 1.0660194174757283e-05, |
| "loss": 0.0969, |
| "step": 48100 |
| }, |
| { |
| "epoch": 467.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.17960786819458, |
| "eval_runtime": 4.4129, |
| "eval_samples_per_second": 65.943, |
| "eval_steps_per_second": 4.306, |
| "step": 48101 |
| }, |
| { |
| "epoch": 467.96, |
| "learning_rate": 1.0640776699029128e-05, |
| "loss": 0.0851, |
| "step": 48200 |
| }, |
| { |
| "epoch": 468.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.142207622528076, |
| "eval_runtime": 4.4128, |
| "eval_samples_per_second": 65.945, |
| "eval_steps_per_second": 4.306, |
| "step": 48204 |
| }, |
| { |
| "epoch": 468.93, |
| "learning_rate": 1.062135922330097e-05, |
| "loss": 0.094, |
| "step": 48300 |
| }, |
| { |
| "epoch": 469.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 5.166329383850098, |
| "eval_runtime": 4.4012, |
| "eval_samples_per_second": 66.118, |
| "eval_steps_per_second": 4.317, |
| "step": 48307 |
| }, |
| { |
| "epoch": 469.9, |
| "learning_rate": 1.0601941747572817e-05, |
| "loss": 0.085, |
| "step": 48400 |
| }, |
| { |
| "epoch": 470.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.2026872634887695, |
| "eval_runtime": 4.5175, |
| "eval_samples_per_second": 64.416, |
| "eval_steps_per_second": 4.206, |
| "step": 48410 |
| }, |
| { |
| "epoch": 470.87, |
| "learning_rate": 1.0582524271844662e-05, |
| "loss": 0.0953, |
| "step": 48500 |
| }, |
| { |
| "epoch": 471.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.078782081604004, |
| "eval_runtime": 4.4749, |
| "eval_samples_per_second": 65.03, |
| "eval_steps_per_second": 4.246, |
| "step": 48513 |
| }, |
| { |
| "epoch": 471.84, |
| "learning_rate": 1.0563106796116506e-05, |
| "loss": 0.097, |
| "step": 48600 |
| }, |
| { |
| "epoch": 472.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.156815528869629, |
| "eval_runtime": 4.4008, |
| "eval_samples_per_second": 66.124, |
| "eval_steps_per_second": 4.317, |
| "step": 48616 |
| }, |
| { |
| "epoch": 472.82, |
| "learning_rate": 1.054368932038835e-05, |
| "loss": 0.092, |
| "step": 48700 |
| }, |
| { |
| "epoch": 473.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.017523765563965, |
| "eval_runtime": 4.3959, |
| "eval_samples_per_second": 66.199, |
| "eval_steps_per_second": 4.322, |
| "step": 48719 |
| }, |
| { |
| "epoch": 473.79, |
| "learning_rate": 1.0524271844660194e-05, |
| "loss": 0.0876, |
| "step": 48800 |
| }, |
| { |
| "epoch": 474.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.006375789642334, |
| "eval_runtime": 4.4478, |
| "eval_samples_per_second": 65.426, |
| "eval_steps_per_second": 4.272, |
| "step": 48822 |
| }, |
| { |
| "epoch": 474.76, |
| "learning_rate": 1.050485436893204e-05, |
| "loss": 0.0984, |
| "step": 48900 |
| }, |
| { |
| "epoch": 475.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 4.988500595092773, |
| "eval_runtime": 4.388, |
| "eval_samples_per_second": 66.317, |
| "eval_steps_per_second": 4.33, |
| "step": 48925 |
| }, |
| { |
| "epoch": 475.73, |
| "learning_rate": 1.0485436893203885e-05, |
| "loss": 0.0781, |
| "step": 49000 |
| }, |
| { |
| "epoch": 476.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.167103290557861, |
| "eval_runtime": 4.4047, |
| "eval_samples_per_second": 66.066, |
| "eval_steps_per_second": 4.314, |
| "step": 49028 |
| }, |
| { |
| "epoch": 476.7, |
| "learning_rate": 1.0466019417475727e-05, |
| "loss": 0.1001, |
| "step": 49100 |
| }, |
| { |
| "epoch": 477.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.242895603179932, |
| "eval_runtime": 4.3902, |
| "eval_samples_per_second": 66.284, |
| "eval_steps_per_second": 4.328, |
| "step": 49131 |
| }, |
| { |
| "epoch": 477.67, |
| "learning_rate": 1.0446601941747574e-05, |
| "loss": 0.085, |
| "step": 49200 |
| }, |
| { |
| "epoch": 478.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.267037868499756, |
| "eval_runtime": 4.5251, |
| "eval_samples_per_second": 64.307, |
| "eval_steps_per_second": 4.199, |
| "step": 49234 |
| }, |
| { |
| "epoch": 478.64, |
| "learning_rate": 1.0427184466019418e-05, |
| "loss": 0.0924, |
| "step": 49300 |
| }, |
| { |
| "epoch": 479.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.075860023498535, |
| "eval_runtime": 4.3975, |
| "eval_samples_per_second": 66.174, |
| "eval_steps_per_second": 4.321, |
| "step": 49337 |
| }, |
| { |
| "epoch": 479.61, |
| "learning_rate": 1.0407766990291263e-05, |
| "loss": 0.0855, |
| "step": 49400 |
| }, |
| { |
| "epoch": 480.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.26733922958374, |
| "eval_runtime": 4.4145, |
| "eval_samples_per_second": 65.919, |
| "eval_steps_per_second": 4.304, |
| "step": 49440 |
| }, |
| { |
| "epoch": 480.58, |
| "learning_rate": 1.0388349514563107e-05, |
| "loss": 0.1018, |
| "step": 49500 |
| }, |
| { |
| "epoch": 481.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.171545028686523, |
| "eval_runtime": 4.4381, |
| "eval_samples_per_second": 65.569, |
| "eval_steps_per_second": 4.281, |
| "step": 49543 |
| }, |
| { |
| "epoch": 481.55, |
| "learning_rate": 1.0368932038834952e-05, |
| "loss": 0.0883, |
| "step": 49600 |
| }, |
| { |
| "epoch": 482.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.085958480834961, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.008, |
| "eval_steps_per_second": 4.31, |
| "step": 49646 |
| }, |
| { |
| "epoch": 482.52, |
| "learning_rate": 1.0349514563106797e-05, |
| "loss": 0.101, |
| "step": 49700 |
| }, |
| { |
| "epoch": 483.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.18726110458374, |
| "eval_runtime": 4.4027, |
| "eval_samples_per_second": 66.095, |
| "eval_steps_per_second": 4.316, |
| "step": 49749 |
| }, |
| { |
| "epoch": 483.5, |
| "learning_rate": 1.0330097087378641e-05, |
| "loss": 0.1061, |
| "step": 49800 |
| }, |
| { |
| "epoch": 484.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.115561008453369, |
| "eval_runtime": 4.4013, |
| "eval_samples_per_second": 66.117, |
| "eval_steps_per_second": 4.317, |
| "step": 49852 |
| }, |
| { |
| "epoch": 484.47, |
| "learning_rate": 1.0310679611650487e-05, |
| "loss": 0.1091, |
| "step": 49900 |
| }, |
| { |
| "epoch": 485.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.1338372230529785, |
| "eval_runtime": 4.4034, |
| "eval_samples_per_second": 66.085, |
| "eval_steps_per_second": 4.315, |
| "step": 49955 |
| }, |
| { |
| "epoch": 485.44, |
| "learning_rate": 1.029126213592233e-05, |
| "loss": 0.0935, |
| "step": 50000 |
| }, |
| { |
| "epoch": 486.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.0872015953063965, |
| "eval_runtime": 4.4043, |
| "eval_samples_per_second": 66.072, |
| "eval_steps_per_second": 4.314, |
| "step": 50058 |
| }, |
| { |
| "epoch": 486.41, |
| "learning_rate": 1.0271844660194175e-05, |
| "loss": 0.0983, |
| "step": 50100 |
| }, |
| { |
| "epoch": 487.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.034875392913818, |
| "eval_runtime": 4.3888, |
| "eval_samples_per_second": 66.305, |
| "eval_steps_per_second": 4.329, |
| "step": 50161 |
| }, |
| { |
| "epoch": 487.38, |
| "learning_rate": 1.0252427184466021e-05, |
| "loss": 0.0955, |
| "step": 50200 |
| }, |
| { |
| "epoch": 488.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.149185657501221, |
| "eval_runtime": 4.4094, |
| "eval_samples_per_second": 65.995, |
| "eval_steps_per_second": 4.309, |
| "step": 50264 |
| }, |
| { |
| "epoch": 488.35, |
| "learning_rate": 1.0233009708737864e-05, |
| "loss": 0.1065, |
| "step": 50300 |
| }, |
| { |
| "epoch": 489.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.052890777587891, |
| "eval_runtime": 4.4248, |
| "eval_samples_per_second": 65.765, |
| "eval_steps_per_second": 4.294, |
| "step": 50367 |
| }, |
| { |
| "epoch": 489.32, |
| "learning_rate": 1.021359223300971e-05, |
| "loss": 0.0771, |
| "step": 50400 |
| }, |
| { |
| "epoch": 490.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.01772928237915, |
| "eval_runtime": 4.4527, |
| "eval_samples_per_second": 65.354, |
| "eval_steps_per_second": 4.267, |
| "step": 50470 |
| }, |
| { |
| "epoch": 490.29, |
| "learning_rate": 1.0194174757281555e-05, |
| "loss": 0.0962, |
| "step": 50500 |
| }, |
| { |
| "epoch": 491.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.068234443664551, |
| "eval_runtime": 4.3973, |
| "eval_samples_per_second": 66.177, |
| "eval_steps_per_second": 4.321, |
| "step": 50573 |
| }, |
| { |
| "epoch": 491.26, |
| "learning_rate": 1.0174757281553398e-05, |
| "loss": 0.0701, |
| "step": 50600 |
| }, |
| { |
| "epoch": 492.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.144649505615234, |
| "eval_runtime": 4.425, |
| "eval_samples_per_second": 65.763, |
| "eval_steps_per_second": 4.294, |
| "step": 50676 |
| }, |
| { |
| "epoch": 492.23, |
| "learning_rate": 1.0155339805825244e-05, |
| "loss": 0.0908, |
| "step": 50700 |
| }, |
| { |
| "epoch": 493.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.131927013397217, |
| "eval_runtime": 4.4148, |
| "eval_samples_per_second": 65.915, |
| "eval_steps_per_second": 4.304, |
| "step": 50779 |
| }, |
| { |
| "epoch": 493.2, |
| "learning_rate": 1.0135922330097087e-05, |
| "loss": 0.0957, |
| "step": 50800 |
| }, |
| { |
| "epoch": 494.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 5.173168659210205, |
| "eval_runtime": 4.4124, |
| "eval_samples_per_second": 65.95, |
| "eval_steps_per_second": 4.306, |
| "step": 50882 |
| }, |
| { |
| "epoch": 494.17, |
| "learning_rate": 1.0116504854368933e-05, |
| "loss": 0.1039, |
| "step": 50900 |
| }, |
| { |
| "epoch": 495.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.140829086303711, |
| "eval_runtime": 4.4096, |
| "eval_samples_per_second": 65.992, |
| "eval_steps_per_second": 4.309, |
| "step": 50985 |
| }, |
| { |
| "epoch": 495.15, |
| "learning_rate": 1.0097087378640778e-05, |
| "loss": 0.0947, |
| "step": 51000 |
| }, |
| { |
| "epoch": 496.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.1906304359436035, |
| "eval_runtime": 4.4456, |
| "eval_samples_per_second": 65.458, |
| "eval_steps_per_second": 4.274, |
| "step": 51088 |
| }, |
| { |
| "epoch": 496.12, |
| "learning_rate": 1.007766990291262e-05, |
| "loss": 0.097, |
| "step": 51100 |
| }, |
| { |
| "epoch": 497.0, |
| "eval_accuracy": 0.24054982817869416, |
| "eval_loss": 5.318382740020752, |
| "eval_runtime": 4.3901, |
| "eval_samples_per_second": 66.286, |
| "eval_steps_per_second": 4.328, |
| "step": 51191 |
| }, |
| { |
| "epoch": 497.09, |
| "learning_rate": 1.0058252427184467e-05, |
| "loss": 0.0848, |
| "step": 51200 |
| }, |
| { |
| "epoch": 498.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.134629726409912, |
| "eval_runtime": 4.3879, |
| "eval_samples_per_second": 66.319, |
| "eval_steps_per_second": 4.33, |
| "step": 51294 |
| }, |
| { |
| "epoch": 498.06, |
| "learning_rate": 1.0038834951456312e-05, |
| "loss": 0.0855, |
| "step": 51300 |
| }, |
| { |
| "epoch": 499.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.015251636505127, |
| "eval_runtime": 4.4095, |
| "eval_samples_per_second": 65.994, |
| "eval_steps_per_second": 4.309, |
| "step": 51397 |
| }, |
| { |
| "epoch": 499.03, |
| "learning_rate": 1.0019417475728156e-05, |
| "loss": 0.0848, |
| "step": 51400 |
| }, |
| { |
| "epoch": 500.0, |
| "learning_rate": 1e-05, |
| "loss": 0.1041, |
| "step": 51500 |
| }, |
| { |
| "epoch": 500.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 5.1230010986328125, |
| "eval_runtime": 4.4364, |
| "eval_samples_per_second": 65.593, |
| "eval_steps_per_second": 4.283, |
| "step": 51500 |
| }, |
| { |
| "epoch": 500.97, |
| "learning_rate": 9.980582524271845e-06, |
| "loss": 0.0936, |
| "step": 51600 |
| }, |
| { |
| "epoch": 501.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.133138656616211, |
| "eval_runtime": 4.4417, |
| "eval_samples_per_second": 65.515, |
| "eval_steps_per_second": 4.278, |
| "step": 51603 |
| }, |
| { |
| "epoch": 501.94, |
| "learning_rate": 9.96116504854369e-06, |
| "loss": 0.0934, |
| "step": 51700 |
| }, |
| { |
| "epoch": 502.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 5.176680564880371, |
| "eval_runtime": 4.3896, |
| "eval_samples_per_second": 66.293, |
| "eval_steps_per_second": 4.328, |
| "step": 51706 |
| }, |
| { |
| "epoch": 502.91, |
| "learning_rate": 9.941747572815535e-06, |
| "loss": 0.0966, |
| "step": 51800 |
| }, |
| { |
| "epoch": 503.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.0494513511657715, |
| "eval_runtime": 4.4589, |
| "eval_samples_per_second": 65.263, |
| "eval_steps_per_second": 4.261, |
| "step": 51809 |
| }, |
| { |
| "epoch": 503.88, |
| "learning_rate": 9.922330097087379e-06, |
| "loss": 0.0953, |
| "step": 51900 |
| }, |
| { |
| "epoch": 504.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 5.061805248260498, |
| "eval_runtime": 4.3961, |
| "eval_samples_per_second": 66.195, |
| "eval_steps_per_second": 4.322, |
| "step": 51912 |
| }, |
| { |
| "epoch": 504.85, |
| "learning_rate": 9.902912621359224e-06, |
| "loss": 0.0852, |
| "step": 52000 |
| }, |
| { |
| "epoch": 505.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.116728782653809, |
| "eval_runtime": 4.3911, |
| "eval_samples_per_second": 66.27, |
| "eval_steps_per_second": 4.327, |
| "step": 52015 |
| }, |
| { |
| "epoch": 505.83, |
| "learning_rate": 9.883495145631068e-06, |
| "loss": 0.0889, |
| "step": 52100 |
| }, |
| { |
| "epoch": 506.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.098071575164795, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.109, |
| "eval_steps_per_second": 4.316, |
| "step": 52118 |
| }, |
| { |
| "epoch": 506.8, |
| "learning_rate": 9.864077669902915e-06, |
| "loss": 0.0854, |
| "step": 52200 |
| }, |
| { |
| "epoch": 507.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.185293674468994, |
| "eval_runtime": 4.4, |
| "eval_samples_per_second": 66.136, |
| "eval_steps_per_second": 4.318, |
| "step": 52221 |
| }, |
| { |
| "epoch": 507.77, |
| "learning_rate": 9.844660194174757e-06, |
| "loss": 0.0877, |
| "step": 52300 |
| }, |
| { |
| "epoch": 508.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.2160773277282715, |
| "eval_runtime": 4.3889, |
| "eval_samples_per_second": 66.303, |
| "eval_steps_per_second": 4.329, |
| "step": 52324 |
| }, |
| { |
| "epoch": 508.74, |
| "learning_rate": 9.825242718446602e-06, |
| "loss": 0.1074, |
| "step": 52400 |
| }, |
| { |
| "epoch": 509.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.167038440704346, |
| "eval_runtime": 4.3915, |
| "eval_samples_per_second": 66.264, |
| "eval_steps_per_second": 4.327, |
| "step": 52427 |
| }, |
| { |
| "epoch": 509.71, |
| "learning_rate": 9.805825242718447e-06, |
| "loss": 0.1055, |
| "step": 52500 |
| }, |
| { |
| "epoch": 510.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.054455757141113, |
| "eval_runtime": 4.3937, |
| "eval_samples_per_second": 66.231, |
| "eval_steps_per_second": 4.324, |
| "step": 52530 |
| }, |
| { |
| "epoch": 510.68, |
| "learning_rate": 9.786407766990293e-06, |
| "loss": 0.0789, |
| "step": 52600 |
| }, |
| { |
| "epoch": 511.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 5.069103717803955, |
| "eval_runtime": 4.4043, |
| "eval_samples_per_second": 66.072, |
| "eval_steps_per_second": 4.314, |
| "step": 52633 |
| }, |
| { |
| "epoch": 511.65, |
| "learning_rate": 9.766990291262138e-06, |
| "loss": 0.0816, |
| "step": 52700 |
| }, |
| { |
| "epoch": 512.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.084735870361328, |
| "eval_runtime": 4.4034, |
| "eval_samples_per_second": 66.085, |
| "eval_steps_per_second": 4.315, |
| "step": 52736 |
| }, |
| { |
| "epoch": 512.62, |
| "learning_rate": 9.74757281553398e-06, |
| "loss": 0.0818, |
| "step": 52800 |
| }, |
| { |
| "epoch": 513.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.130674839019775, |
| "eval_runtime": 4.4001, |
| "eval_samples_per_second": 66.134, |
| "eval_steps_per_second": 4.318, |
| "step": 52839 |
| }, |
| { |
| "epoch": 513.59, |
| "learning_rate": 9.728155339805827e-06, |
| "loss": 0.0999, |
| "step": 52900 |
| }, |
| { |
| "epoch": 514.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.102930068969727, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 52942 |
| }, |
| { |
| "epoch": 514.56, |
| "learning_rate": 9.708737864077671e-06, |
| "loss": 0.0787, |
| "step": 53000 |
| }, |
| { |
| "epoch": 515.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.226955890655518, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 53045 |
| }, |
| { |
| "epoch": 515.53, |
| "learning_rate": 9.689320388349516e-06, |
| "loss": 0.0892, |
| "step": 53100 |
| }, |
| { |
| "epoch": 516.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.192480087280273, |
| "eval_runtime": 4.4136, |
| "eval_samples_per_second": 65.932, |
| "eval_steps_per_second": 4.305, |
| "step": 53148 |
| }, |
| { |
| "epoch": 516.5, |
| "learning_rate": 9.669902912621359e-06, |
| "loss": 0.0995, |
| "step": 53200 |
| }, |
| { |
| "epoch": 517.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.246269702911377, |
| "eval_runtime": 4.4171, |
| "eval_samples_per_second": 65.88, |
| "eval_steps_per_second": 4.301, |
| "step": 53251 |
| }, |
| { |
| "epoch": 517.48, |
| "learning_rate": 9.650485436893205e-06, |
| "loss": 0.0812, |
| "step": 53300 |
| }, |
| { |
| "epoch": 518.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.37426233291626, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.853, |
| "eval_steps_per_second": 4.3, |
| "step": 53354 |
| }, |
| { |
| "epoch": 518.45, |
| "learning_rate": 9.63106796116505e-06, |
| "loss": 0.101, |
| "step": 53400 |
| }, |
| { |
| "epoch": 519.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.19058084487915, |
| "eval_runtime": 4.3966, |
| "eval_samples_per_second": 66.188, |
| "eval_steps_per_second": 4.322, |
| "step": 53457 |
| }, |
| { |
| "epoch": 519.42, |
| "learning_rate": 9.611650485436894e-06, |
| "loss": 0.082, |
| "step": 53500 |
| }, |
| { |
| "epoch": 520.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.165647506713867, |
| "eval_runtime": 4.4225, |
| "eval_samples_per_second": 65.8, |
| "eval_steps_per_second": 4.296, |
| "step": 53560 |
| }, |
| { |
| "epoch": 520.39, |
| "learning_rate": 9.592233009708739e-06, |
| "loss": 0.0904, |
| "step": 53600 |
| }, |
| { |
| "epoch": 521.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.105106353759766, |
| "eval_runtime": 4.4296, |
| "eval_samples_per_second": 65.694, |
| "eval_steps_per_second": 4.289, |
| "step": 53663 |
| }, |
| { |
| "epoch": 521.36, |
| "learning_rate": 9.572815533980583e-06, |
| "loss": 0.0909, |
| "step": 53700 |
| }, |
| { |
| "epoch": 522.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.254262924194336, |
| "eval_runtime": 4.496, |
| "eval_samples_per_second": 64.725, |
| "eval_steps_per_second": 4.226, |
| "step": 53766 |
| }, |
| { |
| "epoch": 522.33, |
| "learning_rate": 9.553398058252428e-06, |
| "loss": 0.1033, |
| "step": 53800 |
| }, |
| { |
| "epoch": 523.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.217056751251221, |
| "eval_runtime": 4.4543, |
| "eval_samples_per_second": 65.33, |
| "eval_steps_per_second": 4.266, |
| "step": 53869 |
| }, |
| { |
| "epoch": 523.3, |
| "learning_rate": 9.533980582524273e-06, |
| "loss": 0.0793, |
| "step": 53900 |
| }, |
| { |
| "epoch": 524.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.242816925048828, |
| "eval_runtime": 4.3911, |
| "eval_samples_per_second": 66.27, |
| "eval_steps_per_second": 4.327, |
| "step": 53972 |
| }, |
| { |
| "epoch": 524.27, |
| "learning_rate": 9.514563106796117e-06, |
| "loss": 0.0879, |
| "step": 54000 |
| }, |
| { |
| "epoch": 525.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.3479905128479, |
| "eval_runtime": 4.4236, |
| "eval_samples_per_second": 65.784, |
| "eval_steps_per_second": 4.295, |
| "step": 54075 |
| }, |
| { |
| "epoch": 525.24, |
| "learning_rate": 9.495145631067962e-06, |
| "loss": 0.0836, |
| "step": 54100 |
| }, |
| { |
| "epoch": 526.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.280987739562988, |
| "eval_runtime": 4.4062, |
| "eval_samples_per_second": 66.044, |
| "eval_steps_per_second": 4.312, |
| "step": 54178 |
| }, |
| { |
| "epoch": 526.21, |
| "learning_rate": 9.475728155339806e-06, |
| "loss": 0.0886, |
| "step": 54200 |
| }, |
| { |
| "epoch": 527.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.253178596496582, |
| "eval_runtime": 4.4495, |
| "eval_samples_per_second": 65.401, |
| "eval_steps_per_second": 4.27, |
| "step": 54281 |
| }, |
| { |
| "epoch": 527.18, |
| "learning_rate": 9.45631067961165e-06, |
| "loss": 0.0881, |
| "step": 54300 |
| }, |
| { |
| "epoch": 528.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.499323844909668, |
| "eval_runtime": 4.4145, |
| "eval_samples_per_second": 65.919, |
| "eval_steps_per_second": 4.304, |
| "step": 54384 |
| }, |
| { |
| "epoch": 528.16, |
| "learning_rate": 9.436893203883495e-06, |
| "loss": 0.1158, |
| "step": 54400 |
| }, |
| { |
| "epoch": 529.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.275381565093994, |
| "eval_runtime": 4.3577, |
| "eval_samples_per_second": 66.779, |
| "eval_steps_per_second": 4.36, |
| "step": 54487 |
| }, |
| { |
| "epoch": 529.13, |
| "learning_rate": 9.41747572815534e-06, |
| "loss": 0.0984, |
| "step": 54500 |
| }, |
| { |
| "epoch": 530.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 5.223719120025635, |
| "eval_runtime": 4.3685, |
| "eval_samples_per_second": 66.614, |
| "eval_steps_per_second": 4.349, |
| "step": 54590 |
| }, |
| { |
| "epoch": 530.1, |
| "learning_rate": 9.398058252427186e-06, |
| "loss": 0.0974, |
| "step": 54600 |
| }, |
| { |
| "epoch": 531.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.413337707519531, |
| "eval_runtime": 4.4815, |
| "eval_samples_per_second": 64.934, |
| "eval_steps_per_second": 4.24, |
| "step": 54693 |
| }, |
| { |
| "epoch": 531.07, |
| "learning_rate": 9.37864077669903e-06, |
| "loss": 0.0892, |
| "step": 54700 |
| }, |
| { |
| "epoch": 532.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.2499871253967285, |
| "eval_runtime": 4.38, |
| "eval_samples_per_second": 66.438, |
| "eval_steps_per_second": 4.338, |
| "step": 54796 |
| }, |
| { |
| "epoch": 532.04, |
| "learning_rate": 9.359223300970874e-06, |
| "loss": 0.0892, |
| "step": 54800 |
| }, |
| { |
| "epoch": 533.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 5.320400714874268, |
| "eval_runtime": 4.4262, |
| "eval_samples_per_second": 65.745, |
| "eval_steps_per_second": 4.293, |
| "step": 54899 |
| }, |
| { |
| "epoch": 533.01, |
| "learning_rate": 9.33980582524272e-06, |
| "loss": 0.0938, |
| "step": 54900 |
| }, |
| { |
| "epoch": 533.98, |
| "learning_rate": 9.320388349514565e-06, |
| "loss": 0.0873, |
| "step": 55000 |
| }, |
| { |
| "epoch": 534.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.227492332458496, |
| "eval_runtime": 4.3496, |
| "eval_samples_per_second": 66.902, |
| "eval_steps_per_second": 4.368, |
| "step": 55002 |
| }, |
| { |
| "epoch": 534.95, |
| "learning_rate": 9.30097087378641e-06, |
| "loss": 0.0882, |
| "step": 55100 |
| }, |
| { |
| "epoch": 535.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.204889297485352, |
| "eval_runtime": 4.401, |
| "eval_samples_per_second": 66.121, |
| "eval_steps_per_second": 4.317, |
| "step": 55105 |
| }, |
| { |
| "epoch": 535.92, |
| "learning_rate": 9.281553398058252e-06, |
| "loss": 0.0915, |
| "step": 55200 |
| }, |
| { |
| "epoch": 536.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.2154951095581055, |
| "eval_runtime": 4.3769, |
| "eval_samples_per_second": 66.486, |
| "eval_steps_per_second": 4.341, |
| "step": 55208 |
| }, |
| { |
| "epoch": 536.89, |
| "learning_rate": 9.262135922330098e-06, |
| "loss": 0.0759, |
| "step": 55300 |
| }, |
| { |
| "epoch": 537.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.279453754425049, |
| "eval_runtime": 4.3694, |
| "eval_samples_per_second": 66.6, |
| "eval_steps_per_second": 4.348, |
| "step": 55311 |
| }, |
| { |
| "epoch": 537.86, |
| "learning_rate": 9.242718446601943e-06, |
| "loss": 0.0893, |
| "step": 55400 |
| }, |
| { |
| "epoch": 538.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.227136135101318, |
| "eval_runtime": 4.3788, |
| "eval_samples_per_second": 66.457, |
| "eval_steps_per_second": 4.339, |
| "step": 55414 |
| }, |
| { |
| "epoch": 538.83, |
| "learning_rate": 9.223300970873788e-06, |
| "loss": 0.0845, |
| "step": 55500 |
| }, |
| { |
| "epoch": 539.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.234629154205322, |
| "eval_runtime": 4.3591, |
| "eval_samples_per_second": 66.757, |
| "eval_steps_per_second": 4.359, |
| "step": 55517 |
| }, |
| { |
| "epoch": 539.81, |
| "learning_rate": 9.203883495145632e-06, |
| "loss": 0.0912, |
| "step": 55600 |
| }, |
| { |
| "epoch": 540.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.244317054748535, |
| "eval_runtime": 4.3649, |
| "eval_samples_per_second": 66.669, |
| "eval_steps_per_second": 4.353, |
| "step": 55620 |
| }, |
| { |
| "epoch": 540.78, |
| "learning_rate": 9.184466019417477e-06, |
| "loss": 0.0804, |
| "step": 55700 |
| }, |
| { |
| "epoch": 541.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.277728080749512, |
| "eval_runtime": 4.4039, |
| "eval_samples_per_second": 66.078, |
| "eval_steps_per_second": 4.314, |
| "step": 55723 |
| }, |
| { |
| "epoch": 541.75, |
| "learning_rate": 9.165048543689321e-06, |
| "loss": 0.0753, |
| "step": 55800 |
| }, |
| { |
| "epoch": 542.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.358335494995117, |
| "eval_runtime": 4.3815, |
| "eval_samples_per_second": 66.415, |
| "eval_steps_per_second": 4.336, |
| "step": 55826 |
| }, |
| { |
| "epoch": 542.72, |
| "learning_rate": 9.145631067961166e-06, |
| "loss": 0.0829, |
| "step": 55900 |
| }, |
| { |
| "epoch": 543.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.189969062805176, |
| "eval_runtime": 4.3835, |
| "eval_samples_per_second": 66.385, |
| "eval_steps_per_second": 4.334, |
| "step": 55929 |
| }, |
| { |
| "epoch": 543.69, |
| "learning_rate": 9.12621359223301e-06, |
| "loss": 0.0984, |
| "step": 56000 |
| }, |
| { |
| "epoch": 544.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.192966938018799, |
| "eval_runtime": 4.3632, |
| "eval_samples_per_second": 66.695, |
| "eval_steps_per_second": 4.355, |
| "step": 56032 |
| }, |
| { |
| "epoch": 544.66, |
| "learning_rate": 9.106796116504855e-06, |
| "loss": 0.0993, |
| "step": 56100 |
| }, |
| { |
| "epoch": 545.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.122324466705322, |
| "eval_runtime": 4.439, |
| "eval_samples_per_second": 65.555, |
| "eval_steps_per_second": 4.28, |
| "step": 56135 |
| }, |
| { |
| "epoch": 545.63, |
| "learning_rate": 9.0873786407767e-06, |
| "loss": 0.0793, |
| "step": 56200 |
| }, |
| { |
| "epoch": 546.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.210149765014648, |
| "eval_runtime": 4.3685, |
| "eval_samples_per_second": 66.613, |
| "eval_steps_per_second": 4.349, |
| "step": 56238 |
| }, |
| { |
| "epoch": 546.6, |
| "learning_rate": 9.067961165048544e-06, |
| "loss": 0.0912, |
| "step": 56300 |
| }, |
| { |
| "epoch": 547.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.274239540100098, |
| "eval_runtime": 4.3627, |
| "eval_samples_per_second": 66.701, |
| "eval_steps_per_second": 4.355, |
| "step": 56341 |
| }, |
| { |
| "epoch": 547.57, |
| "learning_rate": 9.048543689320389e-06, |
| "loss": 0.0892, |
| "step": 56400 |
| }, |
| { |
| "epoch": 548.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.1733808517456055, |
| "eval_runtime": 4.3916, |
| "eval_samples_per_second": 66.262, |
| "eval_steps_per_second": 4.326, |
| "step": 56444 |
| }, |
| { |
| "epoch": 548.54, |
| "learning_rate": 9.029126213592233e-06, |
| "loss": 0.1029, |
| "step": 56500 |
| }, |
| { |
| "epoch": 549.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.265845775604248, |
| "eval_runtime": 4.4438, |
| "eval_samples_per_second": 65.485, |
| "eval_steps_per_second": 4.276, |
| "step": 56547 |
| }, |
| { |
| "epoch": 549.51, |
| "learning_rate": 9.009708737864078e-06, |
| "loss": 0.0863, |
| "step": 56600 |
| }, |
| { |
| "epoch": 550.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.237177848815918, |
| "eval_runtime": 4.3771, |
| "eval_samples_per_second": 66.483, |
| "eval_steps_per_second": 4.341, |
| "step": 56650 |
| }, |
| { |
| "epoch": 550.49, |
| "learning_rate": 8.990291262135923e-06, |
| "loss": 0.1017, |
| "step": 56700 |
| }, |
| { |
| "epoch": 551.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.210503101348877, |
| "eval_runtime": 4.4112, |
| "eval_samples_per_second": 65.968, |
| "eval_steps_per_second": 4.307, |
| "step": 56753 |
| }, |
| { |
| "epoch": 551.46, |
| "learning_rate": 8.970873786407767e-06, |
| "loss": 0.0883, |
| "step": 56800 |
| }, |
| { |
| "epoch": 552.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.105496883392334, |
| "eval_runtime": 4.4302, |
| "eval_samples_per_second": 65.685, |
| "eval_steps_per_second": 4.289, |
| "step": 56856 |
| }, |
| { |
| "epoch": 552.43, |
| "learning_rate": 8.951456310679613e-06, |
| "loss": 0.1042, |
| "step": 56900 |
| }, |
| { |
| "epoch": 553.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 5.24324893951416, |
| "eval_runtime": 4.3771, |
| "eval_samples_per_second": 66.482, |
| "eval_steps_per_second": 4.341, |
| "step": 56959 |
| }, |
| { |
| "epoch": 553.4, |
| "learning_rate": 8.932038834951458e-06, |
| "loss": 0.0817, |
| "step": 57000 |
| }, |
| { |
| "epoch": 554.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.242309093475342, |
| "eval_runtime": 4.3846, |
| "eval_samples_per_second": 66.368, |
| "eval_steps_per_second": 4.333, |
| "step": 57062 |
| }, |
| { |
| "epoch": 554.37, |
| "learning_rate": 8.912621359223301e-06, |
| "loss": 0.0869, |
| "step": 57100 |
| }, |
| { |
| "epoch": 555.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.22501277923584, |
| "eval_runtime": 4.4814, |
| "eval_samples_per_second": 64.935, |
| "eval_steps_per_second": 4.24, |
| "step": 57165 |
| }, |
| { |
| "epoch": 555.34, |
| "learning_rate": 8.893203883495145e-06, |
| "loss": 0.0843, |
| "step": 57200 |
| }, |
| { |
| "epoch": 556.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.196157932281494, |
| "eval_runtime": 4.4042, |
| "eval_samples_per_second": 66.073, |
| "eval_steps_per_second": 4.314, |
| "step": 57268 |
| }, |
| { |
| "epoch": 556.31, |
| "learning_rate": 8.873786407766992e-06, |
| "loss": 0.0887, |
| "step": 57300 |
| }, |
| { |
| "epoch": 557.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.1147990226745605, |
| "eval_runtime": 4.4748, |
| "eval_samples_per_second": 65.031, |
| "eval_steps_per_second": 4.246, |
| "step": 57371 |
| }, |
| { |
| "epoch": 557.28, |
| "learning_rate": 8.854368932038836e-06, |
| "loss": 0.0838, |
| "step": 57400 |
| }, |
| { |
| "epoch": 558.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.020167350769043, |
| "eval_runtime": 4.3534, |
| "eval_samples_per_second": 66.845, |
| "eval_steps_per_second": 4.364, |
| "step": 57474 |
| }, |
| { |
| "epoch": 558.25, |
| "learning_rate": 8.834951456310681e-06, |
| "loss": 0.0759, |
| "step": 57500 |
| }, |
| { |
| "epoch": 559.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 5.0678253173828125, |
| "eval_runtime": 4.3946, |
| "eval_samples_per_second": 66.218, |
| "eval_steps_per_second": 4.324, |
| "step": 57577 |
| }, |
| { |
| "epoch": 559.22, |
| "learning_rate": 8.815533980582525e-06, |
| "loss": 0.0934, |
| "step": 57600 |
| }, |
| { |
| "epoch": 560.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 4.955771446228027, |
| "eval_runtime": 4.4482, |
| "eval_samples_per_second": 65.42, |
| "eval_steps_per_second": 4.271, |
| "step": 57680 |
| }, |
| { |
| "epoch": 560.19, |
| "learning_rate": 8.79611650485437e-06, |
| "loss": 0.0858, |
| "step": 57700 |
| }, |
| { |
| "epoch": 561.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.016815185546875, |
| "eval_runtime": 4.3972, |
| "eval_samples_per_second": 66.178, |
| "eval_steps_per_second": 4.321, |
| "step": 57783 |
| }, |
| { |
| "epoch": 561.17, |
| "learning_rate": 8.776699029126215e-06, |
| "loss": 0.0873, |
| "step": 57800 |
| }, |
| { |
| "epoch": 562.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.045673847198486, |
| "eval_runtime": 4.3475, |
| "eval_samples_per_second": 66.936, |
| "eval_steps_per_second": 4.37, |
| "step": 57886 |
| }, |
| { |
| "epoch": 562.14, |
| "learning_rate": 8.75728155339806e-06, |
| "loss": 0.0902, |
| "step": 57900 |
| }, |
| { |
| "epoch": 563.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 5.046875, |
| "eval_runtime": 4.3618, |
| "eval_samples_per_second": 66.716, |
| "eval_steps_per_second": 4.356, |
| "step": 57989 |
| }, |
| { |
| "epoch": 563.11, |
| "learning_rate": 8.737864077669904e-06, |
| "loss": 0.0793, |
| "step": 58000 |
| }, |
| { |
| "epoch": 564.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 4.987062931060791, |
| "eval_runtime": 4.3493, |
| "eval_samples_per_second": 66.907, |
| "eval_steps_per_second": 4.368, |
| "step": 58092 |
| }, |
| { |
| "epoch": 564.08, |
| "learning_rate": 8.718446601941748e-06, |
| "loss": 0.0882, |
| "step": 58100 |
| }, |
| { |
| "epoch": 565.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.158361911773682, |
| "eval_runtime": 4.485, |
| "eval_samples_per_second": 64.883, |
| "eval_steps_per_second": 4.236, |
| "step": 58195 |
| }, |
| { |
| "epoch": 565.05, |
| "learning_rate": 8.699029126213593e-06, |
| "loss": 0.0984, |
| "step": 58200 |
| }, |
| { |
| "epoch": 566.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 5.074683666229248, |
| "eval_runtime": 4.5094, |
| "eval_samples_per_second": 64.532, |
| "eval_steps_per_second": 4.213, |
| "step": 58298 |
| }, |
| { |
| "epoch": 566.02, |
| "learning_rate": 8.679611650485438e-06, |
| "loss": 0.0818, |
| "step": 58300 |
| }, |
| { |
| "epoch": 566.99, |
| "learning_rate": 8.660194174757282e-06, |
| "loss": 0.0824, |
| "step": 58400 |
| }, |
| { |
| "epoch": 567.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 5.173541069030762, |
| "eval_runtime": 4.496, |
| "eval_samples_per_second": 64.723, |
| "eval_steps_per_second": 4.226, |
| "step": 58401 |
| }, |
| { |
| "epoch": 567.96, |
| "learning_rate": 8.640776699029127e-06, |
| "loss": 0.0794, |
| "step": 58500 |
| }, |
| { |
| "epoch": 568.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 5.1322712898254395, |
| "eval_runtime": 4.3776, |
| "eval_samples_per_second": 66.475, |
| "eval_steps_per_second": 4.34, |
| "step": 58504 |
| }, |
| { |
| "epoch": 568.93, |
| "learning_rate": 8.621359223300971e-06, |
| "loss": 0.0847, |
| "step": 58600 |
| }, |
| { |
| "epoch": 569.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 5.129234313964844, |
| "eval_runtime": 4.3766, |
| "eval_samples_per_second": 66.49, |
| "eval_steps_per_second": 4.341, |
| "step": 58607 |
| }, |
| { |
| "epoch": 569.9, |
| "learning_rate": 8.601941747572816e-06, |
| "loss": 0.0833, |
| "step": 58700 |
| }, |
| { |
| "epoch": 570.0, |
| "eval_accuracy": 0.32646048109965636, |
| "eval_loss": 5.070975303649902, |
| "eval_runtime": 4.3647, |
| "eval_samples_per_second": 66.671, |
| "eval_steps_per_second": 4.353, |
| "step": 58710 |
| }, |
| { |
| "epoch": 570.87, |
| "learning_rate": 8.58252427184466e-06, |
| "loss": 0.0831, |
| "step": 58800 |
| }, |
| { |
| "epoch": 571.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.120458126068115, |
| "eval_runtime": 4.3931, |
| "eval_samples_per_second": 66.24, |
| "eval_steps_per_second": 4.325, |
| "step": 58813 |
| }, |
| { |
| "epoch": 571.84, |
| "learning_rate": 8.563106796116507e-06, |
| "loss": 0.0922, |
| "step": 58900 |
| }, |
| { |
| "epoch": 572.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.100735187530518, |
| "eval_runtime": 4.3647, |
| "eval_samples_per_second": 66.672, |
| "eval_steps_per_second": 4.353, |
| "step": 58916 |
| }, |
| { |
| "epoch": 572.82, |
| "learning_rate": 8.54368932038835e-06, |
| "loss": 0.0906, |
| "step": 59000 |
| }, |
| { |
| "epoch": 573.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.19244384765625, |
| "eval_runtime": 4.3639, |
| "eval_samples_per_second": 66.683, |
| "eval_steps_per_second": 4.354, |
| "step": 59019 |
| }, |
| { |
| "epoch": 573.79, |
| "learning_rate": 8.524271844660194e-06, |
| "loss": 0.1079, |
| "step": 59100 |
| }, |
| { |
| "epoch": 574.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.193302154541016, |
| "eval_runtime": 4.3456, |
| "eval_samples_per_second": 66.964, |
| "eval_steps_per_second": 4.372, |
| "step": 59122 |
| }, |
| { |
| "epoch": 574.76, |
| "learning_rate": 8.504854368932039e-06, |
| "loss": 0.0943, |
| "step": 59200 |
| }, |
| { |
| "epoch": 575.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.155801773071289, |
| "eval_runtime": 4.3466, |
| "eval_samples_per_second": 66.949, |
| "eval_steps_per_second": 4.371, |
| "step": 59225 |
| }, |
| { |
| "epoch": 575.73, |
| "learning_rate": 8.485436893203885e-06, |
| "loss": 0.0877, |
| "step": 59300 |
| }, |
| { |
| "epoch": 576.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.157259941101074, |
| "eval_runtime": 4.4499, |
| "eval_samples_per_second": 65.395, |
| "eval_steps_per_second": 4.27, |
| "step": 59328 |
| }, |
| { |
| "epoch": 576.7, |
| "learning_rate": 8.46601941747573e-06, |
| "loss": 0.0977, |
| "step": 59400 |
| }, |
| { |
| "epoch": 577.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.031143665313721, |
| "eval_runtime": 4.3879, |
| "eval_samples_per_second": 66.319, |
| "eval_steps_per_second": 4.33, |
| "step": 59431 |
| }, |
| { |
| "epoch": 577.67, |
| "learning_rate": 8.446601941747573e-06, |
| "loss": 0.0751, |
| "step": 59500 |
| }, |
| { |
| "epoch": 578.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.158066272735596, |
| "eval_runtime": 4.4017, |
| "eval_samples_per_second": 66.111, |
| "eval_steps_per_second": 4.317, |
| "step": 59534 |
| }, |
| { |
| "epoch": 578.64, |
| "learning_rate": 8.427184466019419e-06, |
| "loss": 0.096, |
| "step": 59600 |
| }, |
| { |
| "epoch": 579.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.211477756500244, |
| "eval_runtime": 4.3934, |
| "eval_samples_per_second": 66.236, |
| "eval_steps_per_second": 4.325, |
| "step": 59637 |
| }, |
| { |
| "epoch": 579.61, |
| "learning_rate": 8.407766990291263e-06, |
| "loss": 0.0902, |
| "step": 59700 |
| }, |
| { |
| "epoch": 580.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.254421234130859, |
| "eval_runtime": 4.4012, |
| "eval_samples_per_second": 66.118, |
| "eval_steps_per_second": 4.317, |
| "step": 59740 |
| }, |
| { |
| "epoch": 580.58, |
| "learning_rate": 8.388349514563108e-06, |
| "loss": 0.1052, |
| "step": 59800 |
| }, |
| { |
| "epoch": 581.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 5.161226749420166, |
| "eval_runtime": 4.4919, |
| "eval_samples_per_second": 64.784, |
| "eval_steps_per_second": 4.23, |
| "step": 59843 |
| }, |
| { |
| "epoch": 581.55, |
| "learning_rate": 8.368932038834953e-06, |
| "loss": 0.0763, |
| "step": 59900 |
| }, |
| { |
| "epoch": 582.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.143395900726318, |
| "eval_runtime": 4.3936, |
| "eval_samples_per_second": 66.233, |
| "eval_steps_per_second": 4.325, |
| "step": 59946 |
| }, |
| { |
| "epoch": 582.52, |
| "learning_rate": 8.349514563106797e-06, |
| "loss": 0.0904, |
| "step": 60000 |
| }, |
| { |
| "epoch": 583.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.191125869750977, |
| "eval_runtime": 4.4669, |
| "eval_samples_per_second": 65.146, |
| "eval_steps_per_second": 4.254, |
| "step": 60049 |
| }, |
| { |
| "epoch": 583.5, |
| "learning_rate": 8.330097087378642e-06, |
| "loss": 0.0868, |
| "step": 60100 |
| }, |
| { |
| "epoch": 584.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.171573162078857, |
| "eval_runtime": 4.4419, |
| "eval_samples_per_second": 65.512, |
| "eval_steps_per_second": 4.277, |
| "step": 60152 |
| }, |
| { |
| "epoch": 584.47, |
| "learning_rate": 8.310679611650486e-06, |
| "loss": 0.091, |
| "step": 60200 |
| }, |
| { |
| "epoch": 585.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.17667818069458, |
| "eval_runtime": 4.4127, |
| "eval_samples_per_second": 65.946, |
| "eval_steps_per_second": 4.306, |
| "step": 60255 |
| }, |
| { |
| "epoch": 585.44, |
| "learning_rate": 8.291262135922331e-06, |
| "loss": 0.0936, |
| "step": 60300 |
| }, |
| { |
| "epoch": 586.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.180116176605225, |
| "eval_runtime": 4.3695, |
| "eval_samples_per_second": 66.599, |
| "eval_steps_per_second": 4.348, |
| "step": 60358 |
| }, |
| { |
| "epoch": 586.41, |
| "learning_rate": 8.271844660194175e-06, |
| "loss": 0.082, |
| "step": 60400 |
| }, |
| { |
| "epoch": 587.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.049594879150391, |
| "eval_runtime": 4.3508, |
| "eval_samples_per_second": 66.885, |
| "eval_steps_per_second": 4.367, |
| "step": 60461 |
| }, |
| { |
| "epoch": 587.38, |
| "learning_rate": 8.25242718446602e-06, |
| "loss": 0.0999, |
| "step": 60500 |
| }, |
| { |
| "epoch": 588.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.258527755737305, |
| "eval_runtime": 4.3735, |
| "eval_samples_per_second": 66.538, |
| "eval_steps_per_second": 4.344, |
| "step": 60564 |
| }, |
| { |
| "epoch": 588.35, |
| "learning_rate": 8.233009708737865e-06, |
| "loss": 0.0826, |
| "step": 60600 |
| }, |
| { |
| "epoch": 589.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.256552696228027, |
| "eval_runtime": 4.521, |
| "eval_samples_per_second": 64.367, |
| "eval_steps_per_second": 4.203, |
| "step": 60667 |
| }, |
| { |
| "epoch": 589.32, |
| "learning_rate": 8.21359223300971e-06, |
| "loss": 0.0949, |
| "step": 60700 |
| }, |
| { |
| "epoch": 590.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.301484107971191, |
| "eval_runtime": 4.3639, |
| "eval_samples_per_second": 66.684, |
| "eval_steps_per_second": 4.354, |
| "step": 60770 |
| }, |
| { |
| "epoch": 590.29, |
| "learning_rate": 8.194174757281554e-06, |
| "loss": 0.0828, |
| "step": 60800 |
| }, |
| { |
| "epoch": 591.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.1411333084106445, |
| "eval_runtime": 4.4822, |
| "eval_samples_per_second": 64.924, |
| "eval_steps_per_second": 4.239, |
| "step": 60873 |
| }, |
| { |
| "epoch": 591.26, |
| "learning_rate": 8.174757281553398e-06, |
| "loss": 0.0827, |
| "step": 60900 |
| }, |
| { |
| "epoch": 592.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.119908332824707, |
| "eval_runtime": 4.392, |
| "eval_samples_per_second": 66.256, |
| "eval_steps_per_second": 4.326, |
| "step": 60976 |
| }, |
| { |
| "epoch": 592.23, |
| "learning_rate": 8.155339805825243e-06, |
| "loss": 0.0943, |
| "step": 61000 |
| }, |
| { |
| "epoch": 593.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.10630989074707, |
| "eval_runtime": 4.4688, |
| "eval_samples_per_second": 65.119, |
| "eval_steps_per_second": 4.252, |
| "step": 61079 |
| }, |
| { |
| "epoch": 593.2, |
| "learning_rate": 8.135922330097088e-06, |
| "loss": 0.076, |
| "step": 61100 |
| }, |
| { |
| "epoch": 594.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.114058494567871, |
| "eval_runtime": 4.602, |
| "eval_samples_per_second": 63.233, |
| "eval_steps_per_second": 4.129, |
| "step": 61182 |
| }, |
| { |
| "epoch": 594.17, |
| "learning_rate": 8.116504854368932e-06, |
| "loss": 0.0917, |
| "step": 61200 |
| }, |
| { |
| "epoch": 595.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.14142370223999, |
| "eval_runtime": 4.3886, |
| "eval_samples_per_second": 66.308, |
| "eval_steps_per_second": 4.329, |
| "step": 61285 |
| }, |
| { |
| "epoch": 595.15, |
| "learning_rate": 8.097087378640778e-06, |
| "loss": 0.0976, |
| "step": 61300 |
| }, |
| { |
| "epoch": 596.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.144129753112793, |
| "eval_runtime": 4.3645, |
| "eval_samples_per_second": 66.674, |
| "eval_steps_per_second": 4.353, |
| "step": 61388 |
| }, |
| { |
| "epoch": 596.12, |
| "learning_rate": 8.077669902912621e-06, |
| "loss": 0.0804, |
| "step": 61400 |
| }, |
| { |
| "epoch": 597.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.168061256408691, |
| "eval_runtime": 4.4405, |
| "eval_samples_per_second": 65.533, |
| "eval_steps_per_second": 4.279, |
| "step": 61491 |
| }, |
| { |
| "epoch": 597.09, |
| "learning_rate": 8.058252427184466e-06, |
| "loss": 0.0923, |
| "step": 61500 |
| }, |
| { |
| "epoch": 598.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.133292198181152, |
| "eval_runtime": 4.3696, |
| "eval_samples_per_second": 66.596, |
| "eval_steps_per_second": 4.348, |
| "step": 61594 |
| }, |
| { |
| "epoch": 598.06, |
| "learning_rate": 8.038834951456312e-06, |
| "loss": 0.093, |
| "step": 61600 |
| }, |
| { |
| "epoch": 599.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.125970840454102, |
| "eval_runtime": 4.3619, |
| "eval_samples_per_second": 66.715, |
| "eval_steps_per_second": 4.356, |
| "step": 61697 |
| }, |
| { |
| "epoch": 599.03, |
| "learning_rate": 8.019417475728157e-06, |
| "loss": 0.0872, |
| "step": 61700 |
| }, |
| { |
| "epoch": 600.0, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.0926, |
| "step": 61800 |
| }, |
| { |
| "epoch": 600.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 5.156001567840576, |
| "eval_runtime": 4.4356, |
| "eval_samples_per_second": 65.606, |
| "eval_steps_per_second": 4.284, |
| "step": 61800 |
| }, |
| { |
| "epoch": 600.97, |
| "learning_rate": 7.980582524271844e-06, |
| "loss": 0.0844, |
| "step": 61900 |
| }, |
| { |
| "epoch": 601.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.193061828613281, |
| "eval_runtime": 4.3861, |
| "eval_samples_per_second": 66.346, |
| "eval_steps_per_second": 4.332, |
| "step": 61903 |
| }, |
| { |
| "epoch": 601.94, |
| "learning_rate": 7.96116504854369e-06, |
| "loss": 0.0847, |
| "step": 62000 |
| }, |
| { |
| "epoch": 602.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.086513996124268, |
| "eval_runtime": 4.4154, |
| "eval_samples_per_second": 65.906, |
| "eval_steps_per_second": 4.303, |
| "step": 62006 |
| }, |
| { |
| "epoch": 602.91, |
| "learning_rate": 7.941747572815535e-06, |
| "loss": 0.0822, |
| "step": 62100 |
| }, |
| { |
| "epoch": 603.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.086156368255615, |
| "eval_runtime": 4.3772, |
| "eval_samples_per_second": 66.48, |
| "eval_steps_per_second": 4.341, |
| "step": 62109 |
| }, |
| { |
| "epoch": 603.88, |
| "learning_rate": 7.92233009708738e-06, |
| "loss": 0.0771, |
| "step": 62200 |
| }, |
| { |
| "epoch": 604.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.047454833984375, |
| "eval_runtime": 4.4113, |
| "eval_samples_per_second": 65.967, |
| "eval_steps_per_second": 4.307, |
| "step": 62212 |
| }, |
| { |
| "epoch": 604.85, |
| "learning_rate": 7.902912621359224e-06, |
| "loss": 0.0885, |
| "step": 62300 |
| }, |
| { |
| "epoch": 605.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.088384628295898, |
| "eval_runtime": 4.3498, |
| "eval_samples_per_second": 66.9, |
| "eval_steps_per_second": 4.368, |
| "step": 62315 |
| }, |
| { |
| "epoch": 605.83, |
| "learning_rate": 7.883495145631069e-06, |
| "loss": 0.0809, |
| "step": 62400 |
| }, |
| { |
| "epoch": 606.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.215940475463867, |
| "eval_runtime": 4.3881, |
| "eval_samples_per_second": 66.316, |
| "eval_steps_per_second": 4.33, |
| "step": 62418 |
| }, |
| { |
| "epoch": 606.8, |
| "learning_rate": 7.864077669902913e-06, |
| "loss": 0.0892, |
| "step": 62500 |
| }, |
| { |
| "epoch": 607.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.086651802062988, |
| "eval_runtime": 4.4003, |
| "eval_samples_per_second": 66.132, |
| "eval_steps_per_second": 4.318, |
| "step": 62521 |
| }, |
| { |
| "epoch": 607.77, |
| "learning_rate": 7.844660194174758e-06, |
| "loss": 0.085, |
| "step": 62600 |
| }, |
| { |
| "epoch": 608.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.0848236083984375, |
| "eval_runtime": 4.4944, |
| "eval_samples_per_second": 64.747, |
| "eval_steps_per_second": 4.227, |
| "step": 62624 |
| }, |
| { |
| "epoch": 608.74, |
| "learning_rate": 7.825242718446603e-06, |
| "loss": 0.0828, |
| "step": 62700 |
| }, |
| { |
| "epoch": 609.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.234314918518066, |
| "eval_runtime": 4.3909, |
| "eval_samples_per_second": 66.274, |
| "eval_steps_per_second": 4.327, |
| "step": 62727 |
| }, |
| { |
| "epoch": 609.71, |
| "learning_rate": 7.805825242718447e-06, |
| "loss": 0.0978, |
| "step": 62800 |
| }, |
| { |
| "epoch": 610.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.1202826499938965, |
| "eval_runtime": 4.4024, |
| "eval_samples_per_second": 66.1, |
| "eval_steps_per_second": 4.316, |
| "step": 62830 |
| }, |
| { |
| "epoch": 610.68, |
| "learning_rate": 7.786407766990292e-06, |
| "loss": 0.0922, |
| "step": 62900 |
| }, |
| { |
| "epoch": 611.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.254323959350586, |
| "eval_runtime": 4.3525, |
| "eval_samples_per_second": 66.859, |
| "eval_steps_per_second": 4.365, |
| "step": 62933 |
| }, |
| { |
| "epoch": 611.65, |
| "learning_rate": 7.766990291262136e-06, |
| "loss": 0.091, |
| "step": 63000 |
| }, |
| { |
| "epoch": 612.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.122802734375, |
| "eval_runtime": 4.399, |
| "eval_samples_per_second": 66.151, |
| "eval_steps_per_second": 4.319, |
| "step": 63036 |
| }, |
| { |
| "epoch": 612.62, |
| "learning_rate": 7.747572815533981e-06, |
| "loss": 0.0926, |
| "step": 63100 |
| }, |
| { |
| "epoch": 613.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.306426525115967, |
| "eval_runtime": 4.4296, |
| "eval_samples_per_second": 65.695, |
| "eval_steps_per_second": 4.289, |
| "step": 63139 |
| }, |
| { |
| "epoch": 613.59, |
| "learning_rate": 7.728155339805825e-06, |
| "loss": 0.078, |
| "step": 63200 |
| }, |
| { |
| "epoch": 614.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.336696624755859, |
| "eval_runtime": 4.4235, |
| "eval_samples_per_second": 65.786, |
| "eval_steps_per_second": 4.295, |
| "step": 63242 |
| }, |
| { |
| "epoch": 614.56, |
| "learning_rate": 7.70873786407767e-06, |
| "loss": 0.0791, |
| "step": 63300 |
| }, |
| { |
| "epoch": 615.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.27379846572876, |
| "eval_runtime": 4.3721, |
| "eval_samples_per_second": 66.559, |
| "eval_steps_per_second": 4.346, |
| "step": 63345 |
| }, |
| { |
| "epoch": 615.53, |
| "learning_rate": 7.689320388349515e-06, |
| "loss": 0.0803, |
| "step": 63400 |
| }, |
| { |
| "epoch": 616.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.269800186157227, |
| "eval_runtime": 4.3964, |
| "eval_samples_per_second": 66.191, |
| "eval_steps_per_second": 4.322, |
| "step": 63448 |
| }, |
| { |
| "epoch": 616.5, |
| "learning_rate": 7.66990291262136e-06, |
| "loss": 0.0936, |
| "step": 63500 |
| }, |
| { |
| "epoch": 617.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.30620002746582, |
| "eval_runtime": 4.3979, |
| "eval_samples_per_second": 66.168, |
| "eval_steps_per_second": 4.32, |
| "step": 63551 |
| }, |
| { |
| "epoch": 617.48, |
| "learning_rate": 7.650485436893204e-06, |
| "loss": 0.0894, |
| "step": 63600 |
| }, |
| { |
| "epoch": 618.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.383390426635742, |
| "eval_runtime": 4.3878, |
| "eval_samples_per_second": 66.32, |
| "eval_steps_per_second": 4.33, |
| "step": 63654 |
| }, |
| { |
| "epoch": 618.45, |
| "learning_rate": 7.63106796116505e-06, |
| "loss": 0.0794, |
| "step": 63700 |
| }, |
| { |
| "epoch": 619.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 5.2768330574035645, |
| "eval_runtime": 4.3931, |
| "eval_samples_per_second": 66.24, |
| "eval_steps_per_second": 4.325, |
| "step": 63757 |
| }, |
| { |
| "epoch": 619.42, |
| "learning_rate": 7.611650485436893e-06, |
| "loss": 0.0885, |
| "step": 63800 |
| }, |
| { |
| "epoch": 620.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.2569475173950195, |
| "eval_runtime": 4.4091, |
| "eval_samples_per_second": 65.999, |
| "eval_steps_per_second": 4.309, |
| "step": 63860 |
| }, |
| { |
| "epoch": 620.39, |
| "learning_rate": 7.592233009708738e-06, |
| "loss": 0.0866, |
| "step": 63900 |
| }, |
| { |
| "epoch": 621.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.332491874694824, |
| "eval_runtime": 4.4767, |
| "eval_samples_per_second": 65.004, |
| "eval_steps_per_second": 4.244, |
| "step": 63963 |
| }, |
| { |
| "epoch": 621.36, |
| "learning_rate": 7.572815533980583e-06, |
| "loss": 0.079, |
| "step": 64000 |
| }, |
| { |
| "epoch": 622.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.279804229736328, |
| "eval_runtime": 4.452, |
| "eval_samples_per_second": 65.364, |
| "eval_steps_per_second": 4.268, |
| "step": 64066 |
| }, |
| { |
| "epoch": 622.33, |
| "learning_rate": 7.553398058252428e-06, |
| "loss": 0.084, |
| "step": 64100 |
| }, |
| { |
| "epoch": 623.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.460251331329346, |
| "eval_runtime": 4.4075, |
| "eval_samples_per_second": 66.025, |
| "eval_steps_per_second": 4.311, |
| "step": 64169 |
| }, |
| { |
| "epoch": 623.3, |
| "learning_rate": 7.533980582524273e-06, |
| "loss": 0.0886, |
| "step": 64200 |
| }, |
| { |
| "epoch": 624.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.292215347290039, |
| "eval_runtime": 4.5112, |
| "eval_samples_per_second": 64.507, |
| "eval_steps_per_second": 4.212, |
| "step": 64272 |
| }, |
| { |
| "epoch": 624.27, |
| "learning_rate": 7.514563106796117e-06, |
| "loss": 0.0726, |
| "step": 64300 |
| }, |
| { |
| "epoch": 625.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.195230960845947, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.98, |
| "eval_steps_per_second": 4.308, |
| "step": 64375 |
| }, |
| { |
| "epoch": 625.24, |
| "learning_rate": 7.495145631067961e-06, |
| "loss": 0.0893, |
| "step": 64400 |
| }, |
| { |
| "epoch": 626.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 5.411427974700928, |
| "eval_runtime": 4.3902, |
| "eval_samples_per_second": 66.285, |
| "eval_steps_per_second": 4.328, |
| "step": 64478 |
| }, |
| { |
| "epoch": 626.21, |
| "learning_rate": 7.475728155339807e-06, |
| "loss": 0.0881, |
| "step": 64500 |
| }, |
| { |
| "epoch": 627.0, |
| "eval_accuracy": 0.2508591065292096, |
| "eval_loss": 5.48668909072876, |
| "eval_runtime": 4.3905, |
| "eval_samples_per_second": 66.279, |
| "eval_steps_per_second": 4.328, |
| "step": 64581 |
| }, |
| { |
| "epoch": 627.18, |
| "learning_rate": 7.456310679611651e-06, |
| "loss": 0.079, |
| "step": 64600 |
| }, |
| { |
| "epoch": 628.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.483811855316162, |
| "eval_runtime": 4.4689, |
| "eval_samples_per_second": 65.117, |
| "eval_steps_per_second": 4.252, |
| "step": 64684 |
| }, |
| { |
| "epoch": 628.16, |
| "learning_rate": 7.436893203883496e-06, |
| "loss": 0.0933, |
| "step": 64700 |
| }, |
| { |
| "epoch": 629.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.521385192871094, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.109, |
| "eval_steps_per_second": 4.316, |
| "step": 64787 |
| }, |
| { |
| "epoch": 629.13, |
| "learning_rate": 7.41747572815534e-06, |
| "loss": 0.0795, |
| "step": 64800 |
| }, |
| { |
| "epoch": 630.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.425594806671143, |
| "eval_runtime": 4.4068, |
| "eval_samples_per_second": 66.034, |
| "eval_steps_per_second": 4.311, |
| "step": 64890 |
| }, |
| { |
| "epoch": 630.1, |
| "learning_rate": 7.398058252427185e-06, |
| "loss": 0.0882, |
| "step": 64900 |
| }, |
| { |
| "epoch": 631.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.362780570983887, |
| "eval_runtime": 4.4131, |
| "eval_samples_per_second": 65.94, |
| "eval_steps_per_second": 4.305, |
| "step": 64993 |
| }, |
| { |
| "epoch": 631.07, |
| "learning_rate": 7.37864077669903e-06, |
| "loss": 0.0826, |
| "step": 65000 |
| }, |
| { |
| "epoch": 632.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.2815680503845215, |
| "eval_runtime": 4.4008, |
| "eval_samples_per_second": 66.124, |
| "eval_steps_per_second": 4.317, |
| "step": 65096 |
| }, |
| { |
| "epoch": 632.04, |
| "learning_rate": 7.359223300970874e-06, |
| "loss": 0.0853, |
| "step": 65100 |
| }, |
| { |
| "epoch": 633.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.261467933654785, |
| "eval_runtime": 4.4513, |
| "eval_samples_per_second": 65.374, |
| "eval_steps_per_second": 4.268, |
| "step": 65199 |
| }, |
| { |
| "epoch": 633.01, |
| "learning_rate": 7.33980582524272e-06, |
| "loss": 0.0809, |
| "step": 65200 |
| }, |
| { |
| "epoch": 633.98, |
| "learning_rate": 7.3203883495145634e-06, |
| "loss": 0.0862, |
| "step": 65300 |
| }, |
| { |
| "epoch": 634.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.262171745300293, |
| "eval_runtime": 4.4171, |
| "eval_samples_per_second": 65.88, |
| "eval_steps_per_second": 4.301, |
| "step": 65302 |
| }, |
| { |
| "epoch": 634.95, |
| "learning_rate": 7.300970873786408e-06, |
| "loss": 0.0823, |
| "step": 65400 |
| }, |
| { |
| "epoch": 635.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.3122878074646, |
| "eval_runtime": 4.4674, |
| "eval_samples_per_second": 65.138, |
| "eval_steps_per_second": 4.253, |
| "step": 65405 |
| }, |
| { |
| "epoch": 635.92, |
| "learning_rate": 7.2815533980582534e-06, |
| "loss": 0.0915, |
| "step": 65500 |
| }, |
| { |
| "epoch": 636.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.248616695404053, |
| "eval_runtime": 4.4395, |
| "eval_samples_per_second": 65.549, |
| "eval_steps_per_second": 4.28, |
| "step": 65508 |
| }, |
| { |
| "epoch": 636.89, |
| "learning_rate": 7.262135922330098e-06, |
| "loss": 0.0776, |
| "step": 65600 |
| }, |
| { |
| "epoch": 637.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.264139175415039, |
| "eval_runtime": 4.5011, |
| "eval_samples_per_second": 64.651, |
| "eval_steps_per_second": 4.221, |
| "step": 65611 |
| }, |
| { |
| "epoch": 637.86, |
| "learning_rate": 7.242718446601942e-06, |
| "loss": 0.0799, |
| "step": 65700 |
| }, |
| { |
| "epoch": 638.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.432704448699951, |
| "eval_runtime": 4.4798, |
| "eval_samples_per_second": 64.959, |
| "eval_steps_per_second": 4.241, |
| "step": 65714 |
| }, |
| { |
| "epoch": 638.83, |
| "learning_rate": 7.223300970873786e-06, |
| "loss": 0.0925, |
| "step": 65800 |
| }, |
| { |
| "epoch": 639.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.366397380828857, |
| "eval_runtime": 4.4339, |
| "eval_samples_per_second": 65.63, |
| "eval_steps_per_second": 4.285, |
| "step": 65817 |
| }, |
| { |
| "epoch": 639.81, |
| "learning_rate": 7.203883495145632e-06, |
| "loss": 0.0865, |
| "step": 65900 |
| }, |
| { |
| "epoch": 640.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.30657958984375, |
| "eval_runtime": 4.4192, |
| "eval_samples_per_second": 65.849, |
| "eval_steps_per_second": 4.299, |
| "step": 65920 |
| }, |
| { |
| "epoch": 640.78, |
| "learning_rate": 7.184466019417476e-06, |
| "loss": 0.09, |
| "step": 66000 |
| }, |
| { |
| "epoch": 641.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.0984883308410645, |
| "eval_runtime": 4.401, |
| "eval_samples_per_second": 66.121, |
| "eval_steps_per_second": 4.317, |
| "step": 66023 |
| }, |
| { |
| "epoch": 641.75, |
| "learning_rate": 7.165048543689321e-06, |
| "loss": 0.0867, |
| "step": 66100 |
| }, |
| { |
| "epoch": 642.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.173170566558838, |
| "eval_runtime": 4.467, |
| "eval_samples_per_second": 65.145, |
| "eval_steps_per_second": 4.253, |
| "step": 66126 |
| }, |
| { |
| "epoch": 642.72, |
| "learning_rate": 7.1456310679611655e-06, |
| "loss": 0.084, |
| "step": 66200 |
| }, |
| { |
| "epoch": 643.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.233015537261963, |
| "eval_runtime": 4.3939, |
| "eval_samples_per_second": 66.229, |
| "eval_steps_per_second": 4.324, |
| "step": 66229 |
| }, |
| { |
| "epoch": 643.69, |
| "learning_rate": 7.12621359223301e-06, |
| "loss": 0.0806, |
| "step": 66300 |
| }, |
| { |
| "epoch": 644.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.209733486175537, |
| "eval_runtime": 4.4601, |
| "eval_samples_per_second": 65.245, |
| "eval_steps_per_second": 4.26, |
| "step": 66332 |
| }, |
| { |
| "epoch": 644.66, |
| "learning_rate": 7.106796116504855e-06, |
| "loss": 0.0821, |
| "step": 66400 |
| }, |
| { |
| "epoch": 645.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.3271918296813965, |
| "eval_runtime": 4.4348, |
| "eval_samples_per_second": 65.617, |
| "eval_steps_per_second": 4.284, |
| "step": 66435 |
| }, |
| { |
| "epoch": 645.63, |
| "learning_rate": 7.0873786407767e-06, |
| "loss": 0.0869, |
| "step": 66500 |
| }, |
| { |
| "epoch": 646.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.393039703369141, |
| "eval_runtime": 4.4132, |
| "eval_samples_per_second": 65.938, |
| "eval_steps_per_second": 4.305, |
| "step": 66538 |
| }, |
| { |
| "epoch": 646.6, |
| "learning_rate": 7.067961165048545e-06, |
| "loss": 0.0777, |
| "step": 66600 |
| }, |
| { |
| "epoch": 647.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.334554672241211, |
| "eval_runtime": 4.3932, |
| "eval_samples_per_second": 66.239, |
| "eval_steps_per_second": 4.325, |
| "step": 66641 |
| }, |
| { |
| "epoch": 647.57, |
| "learning_rate": 7.0485436893203884e-06, |
| "loss": 0.0822, |
| "step": 66700 |
| }, |
| { |
| "epoch": 648.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.216523170471191, |
| "eval_runtime": 4.4213, |
| "eval_samples_per_second": 65.817, |
| "eval_steps_per_second": 4.297, |
| "step": 66744 |
| }, |
| { |
| "epoch": 648.54, |
| "learning_rate": 7.029126213592233e-06, |
| "loss": 0.0967, |
| "step": 66800 |
| }, |
| { |
| "epoch": 649.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.228401184082031, |
| "eval_runtime": 4.43, |
| "eval_samples_per_second": 65.689, |
| "eval_steps_per_second": 4.289, |
| "step": 66847 |
| }, |
| { |
| "epoch": 649.51, |
| "learning_rate": 7.0097087378640785e-06, |
| "loss": 0.0792, |
| "step": 66900 |
| }, |
| { |
| "epoch": 650.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.392093181610107, |
| "eval_runtime": 4.4149, |
| "eval_samples_per_second": 65.913, |
| "eval_steps_per_second": 4.304, |
| "step": 66950 |
| }, |
| { |
| "epoch": 650.49, |
| "learning_rate": 6.990291262135923e-06, |
| "loss": 0.0849, |
| "step": 67000 |
| }, |
| { |
| "epoch": 651.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.52961540222168, |
| "eval_runtime": 4.4619, |
| "eval_samples_per_second": 65.219, |
| "eval_steps_per_second": 4.258, |
| "step": 67053 |
| }, |
| { |
| "epoch": 651.46, |
| "learning_rate": 6.970873786407768e-06, |
| "loss": 0.0854, |
| "step": 67100 |
| }, |
| { |
| "epoch": 652.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.479518890380859, |
| "eval_runtime": 4.4569, |
| "eval_samples_per_second": 65.291, |
| "eval_steps_per_second": 4.263, |
| "step": 67156 |
| }, |
| { |
| "epoch": 652.43, |
| "learning_rate": 6.951456310679612e-06, |
| "loss": 0.0796, |
| "step": 67200 |
| }, |
| { |
| "epoch": 653.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.333386421203613, |
| "eval_runtime": 4.4265, |
| "eval_samples_per_second": 65.74, |
| "eval_steps_per_second": 4.292, |
| "step": 67259 |
| }, |
| { |
| "epoch": 653.4, |
| "learning_rate": 6.932038834951457e-06, |
| "loss": 0.093, |
| "step": 67300 |
| }, |
| { |
| "epoch": 654.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.3139777183532715, |
| "eval_runtime": 4.421, |
| "eval_samples_per_second": 65.822, |
| "eval_steps_per_second": 4.298, |
| "step": 67362 |
| }, |
| { |
| "epoch": 654.37, |
| "learning_rate": 6.912621359223301e-06, |
| "loss": 0.076, |
| "step": 67400 |
| }, |
| { |
| "epoch": 655.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.306375980377197, |
| "eval_runtime": 4.5012, |
| "eval_samples_per_second": 64.65, |
| "eval_steps_per_second": 4.221, |
| "step": 67465 |
| }, |
| { |
| "epoch": 655.34, |
| "learning_rate": 6.893203883495147e-06, |
| "loss": 0.086, |
| "step": 67500 |
| }, |
| { |
| "epoch": 656.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.385765075683594, |
| "eval_runtime": 4.452, |
| "eval_samples_per_second": 65.363, |
| "eval_steps_per_second": 4.268, |
| "step": 67568 |
| }, |
| { |
| "epoch": 656.31, |
| "learning_rate": 6.873786407766991e-06, |
| "loss": 0.0856, |
| "step": 67600 |
| }, |
| { |
| "epoch": 657.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.320601940155029, |
| "eval_runtime": 4.4353, |
| "eval_samples_per_second": 65.61, |
| "eval_steps_per_second": 4.284, |
| "step": 67671 |
| }, |
| { |
| "epoch": 657.28, |
| "learning_rate": 6.854368932038835e-06, |
| "loss": 0.0826, |
| "step": 67700 |
| }, |
| { |
| "epoch": 658.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.273061275482178, |
| "eval_runtime": 4.4023, |
| "eval_samples_per_second": 66.102, |
| "eval_steps_per_second": 4.316, |
| "step": 67774 |
| }, |
| { |
| "epoch": 658.25, |
| "learning_rate": 6.83495145631068e-06, |
| "loss": 0.0972, |
| "step": 67800 |
| }, |
| { |
| "epoch": 659.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.310391902923584, |
| "eval_runtime": 4.4173, |
| "eval_samples_per_second": 65.877, |
| "eval_steps_per_second": 4.301, |
| "step": 67877 |
| }, |
| { |
| "epoch": 659.22, |
| "learning_rate": 6.815533980582525e-06, |
| "loss": 0.0828, |
| "step": 67900 |
| }, |
| { |
| "epoch": 660.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.32990026473999, |
| "eval_runtime": 4.4134, |
| "eval_samples_per_second": 65.935, |
| "eval_steps_per_second": 4.305, |
| "step": 67980 |
| }, |
| { |
| "epoch": 660.19, |
| "learning_rate": 6.79611650485437e-06, |
| "loss": 0.0792, |
| "step": 68000 |
| }, |
| { |
| "epoch": 661.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.461109161376953, |
| "eval_runtime": 4.4061, |
| "eval_samples_per_second": 66.045, |
| "eval_steps_per_second": 4.312, |
| "step": 68083 |
| }, |
| { |
| "epoch": 661.17, |
| "learning_rate": 6.776699029126214e-06, |
| "loss": 0.0839, |
| "step": 68100 |
| }, |
| { |
| "epoch": 662.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.407573699951172, |
| "eval_runtime": 4.4041, |
| "eval_samples_per_second": 66.075, |
| "eval_steps_per_second": 4.314, |
| "step": 68186 |
| }, |
| { |
| "epoch": 662.14, |
| "learning_rate": 6.757281553398059e-06, |
| "loss": 0.0816, |
| "step": 68200 |
| }, |
| { |
| "epoch": 663.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.33349609375, |
| "eval_runtime": 4.4, |
| "eval_samples_per_second": 66.136, |
| "eval_steps_per_second": 4.318, |
| "step": 68289 |
| }, |
| { |
| "epoch": 663.11, |
| "learning_rate": 6.7378640776699035e-06, |
| "loss": 0.0786, |
| "step": 68300 |
| }, |
| { |
| "epoch": 664.0, |
| "eval_accuracy": 0.25773195876288657, |
| "eval_loss": 5.388492107391357, |
| "eval_runtime": 4.4145, |
| "eval_samples_per_second": 65.919, |
| "eval_steps_per_second": 4.304, |
| "step": 68392 |
| }, |
| { |
| "epoch": 664.08, |
| "learning_rate": 6.718446601941748e-06, |
| "loss": 0.0958, |
| "step": 68400 |
| }, |
| { |
| "epoch": 665.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 5.482216835021973, |
| "eval_runtime": 4.4201, |
| "eval_samples_per_second": 65.835, |
| "eval_steps_per_second": 4.299, |
| "step": 68495 |
| }, |
| { |
| "epoch": 665.05, |
| "learning_rate": 6.6990291262135935e-06, |
| "loss": 0.0872, |
| "step": 68500 |
| }, |
| { |
| "epoch": 666.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.474820137023926, |
| "eval_runtime": 4.4131, |
| "eval_samples_per_second": 65.94, |
| "eval_steps_per_second": 4.305, |
| "step": 68598 |
| }, |
| { |
| "epoch": 666.02, |
| "learning_rate": 6.679611650485437e-06, |
| "loss": 0.0861, |
| "step": 68600 |
| }, |
| { |
| "epoch": 666.99, |
| "learning_rate": 6.660194174757282e-06, |
| "loss": 0.0823, |
| "step": 68700 |
| }, |
| { |
| "epoch": 667.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.3411993980407715, |
| "eval_runtime": 4.4496, |
| "eval_samples_per_second": 65.399, |
| "eval_steps_per_second": 4.27, |
| "step": 68701 |
| }, |
| { |
| "epoch": 667.96, |
| "learning_rate": 6.640776699029126e-06, |
| "loss": 0.0845, |
| "step": 68800 |
| }, |
| { |
| "epoch": 668.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.271579265594482, |
| "eval_runtime": 4.496, |
| "eval_samples_per_second": 64.724, |
| "eval_steps_per_second": 4.226, |
| "step": 68804 |
| }, |
| { |
| "epoch": 668.93, |
| "learning_rate": 6.621359223300972e-06, |
| "loss": 0.0882, |
| "step": 68900 |
| }, |
| { |
| "epoch": 669.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.405780792236328, |
| "eval_runtime": 4.3969, |
| "eval_samples_per_second": 66.183, |
| "eval_steps_per_second": 4.321, |
| "step": 68907 |
| }, |
| { |
| "epoch": 669.9, |
| "learning_rate": 6.601941747572816e-06, |
| "loss": 0.0794, |
| "step": 69000 |
| }, |
| { |
| "epoch": 670.0, |
| "eval_accuracy": 0.2542955326460481, |
| "eval_loss": 5.5217204093933105, |
| "eval_runtime": 4.4131, |
| "eval_samples_per_second": 65.94, |
| "eval_steps_per_second": 4.305, |
| "step": 69010 |
| }, |
| { |
| "epoch": 670.87, |
| "learning_rate": 6.58252427184466e-06, |
| "loss": 0.0876, |
| "step": 69100 |
| }, |
| { |
| "epoch": 671.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.354759693145752, |
| "eval_runtime": 4.4085, |
| "eval_samples_per_second": 66.009, |
| "eval_steps_per_second": 4.31, |
| "step": 69113 |
| }, |
| { |
| "epoch": 671.84, |
| "learning_rate": 6.5631067961165056e-06, |
| "loss": 0.0754, |
| "step": 69200 |
| }, |
| { |
| "epoch": 672.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.359265327453613, |
| "eval_runtime": 4.4009, |
| "eval_samples_per_second": 66.122, |
| "eval_steps_per_second": 4.317, |
| "step": 69216 |
| }, |
| { |
| "epoch": 672.82, |
| "learning_rate": 6.54368932038835e-06, |
| "loss": 0.0842, |
| "step": 69300 |
| }, |
| { |
| "epoch": 673.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.426083087921143, |
| "eval_runtime": 4.3984, |
| "eval_samples_per_second": 66.16, |
| "eval_steps_per_second": 4.32, |
| "step": 69319 |
| }, |
| { |
| "epoch": 673.79, |
| "learning_rate": 6.524271844660195e-06, |
| "loss": 0.0832, |
| "step": 69400 |
| }, |
| { |
| "epoch": 674.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.360762596130371, |
| "eval_runtime": 4.3989, |
| "eval_samples_per_second": 66.153, |
| "eval_steps_per_second": 4.319, |
| "step": 69422 |
| }, |
| { |
| "epoch": 674.76, |
| "learning_rate": 6.50485436893204e-06, |
| "loss": 0.0874, |
| "step": 69500 |
| }, |
| { |
| "epoch": 675.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.422213554382324, |
| "eval_runtime": 4.405, |
| "eval_samples_per_second": 66.062, |
| "eval_steps_per_second": 4.313, |
| "step": 69525 |
| }, |
| { |
| "epoch": 675.73, |
| "learning_rate": 6.485436893203884e-06, |
| "loss": 0.0822, |
| "step": 69600 |
| }, |
| { |
| "epoch": 676.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.25921630859375, |
| "eval_runtime": 4.4206, |
| "eval_samples_per_second": 65.828, |
| "eval_steps_per_second": 4.298, |
| "step": 69628 |
| }, |
| { |
| "epoch": 676.7, |
| "learning_rate": 6.4660194174757285e-06, |
| "loss": 0.0852, |
| "step": 69700 |
| }, |
| { |
| "epoch": 677.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.290493011474609, |
| "eval_runtime": 4.4465, |
| "eval_samples_per_second": 65.445, |
| "eval_steps_per_second": 4.273, |
| "step": 69731 |
| }, |
| { |
| "epoch": 677.67, |
| "learning_rate": 6.446601941747573e-06, |
| "loss": 0.0819, |
| "step": 69800 |
| }, |
| { |
| "epoch": 678.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.2874016761779785, |
| "eval_runtime": 4.4284, |
| "eval_samples_per_second": 65.712, |
| "eval_steps_per_second": 4.29, |
| "step": 69834 |
| }, |
| { |
| "epoch": 678.64, |
| "learning_rate": 6.4271844660194185e-06, |
| "loss": 0.0842, |
| "step": 69900 |
| }, |
| { |
| "epoch": 679.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.514050483703613, |
| "eval_runtime": 4.4431, |
| "eval_samples_per_second": 65.495, |
| "eval_steps_per_second": 4.276, |
| "step": 69937 |
| }, |
| { |
| "epoch": 679.61, |
| "learning_rate": 6.407766990291263e-06, |
| "loss": 0.0871, |
| "step": 70000 |
| }, |
| { |
| "epoch": 680.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.368432521820068, |
| "eval_runtime": 4.4109, |
| "eval_samples_per_second": 65.973, |
| "eval_steps_per_second": 4.307, |
| "step": 70040 |
| }, |
| { |
| "epoch": 680.58, |
| "learning_rate": 6.388349514563107e-06, |
| "loss": 0.0756, |
| "step": 70100 |
| }, |
| { |
| "epoch": 681.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.452810287475586, |
| "eval_runtime": 4.4231, |
| "eval_samples_per_second": 65.792, |
| "eval_steps_per_second": 4.296, |
| "step": 70143 |
| }, |
| { |
| "epoch": 681.55, |
| "learning_rate": 6.368932038834952e-06, |
| "loss": 0.0844, |
| "step": 70200 |
| }, |
| { |
| "epoch": 682.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.371203422546387, |
| "eval_runtime": 4.4399, |
| "eval_samples_per_second": 65.542, |
| "eval_steps_per_second": 4.279, |
| "step": 70246 |
| }, |
| { |
| "epoch": 682.52, |
| "learning_rate": 6.349514563106797e-06, |
| "loss": 0.0774, |
| "step": 70300 |
| }, |
| { |
| "epoch": 683.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.362085819244385, |
| "eval_runtime": 4.4613, |
| "eval_samples_per_second": 65.228, |
| "eval_steps_per_second": 4.259, |
| "step": 70349 |
| }, |
| { |
| "epoch": 683.5, |
| "learning_rate": 6.330097087378641e-06, |
| "loss": 0.0914, |
| "step": 70400 |
| }, |
| { |
| "epoch": 684.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.372140884399414, |
| "eval_runtime": 4.3903, |
| "eval_samples_per_second": 66.283, |
| "eval_steps_per_second": 4.328, |
| "step": 70452 |
| }, |
| { |
| "epoch": 684.47, |
| "learning_rate": 6.310679611650487e-06, |
| "loss": 0.0883, |
| "step": 70500 |
| }, |
| { |
| "epoch": 685.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.2809271812438965, |
| "eval_runtime": 4.4334, |
| "eval_samples_per_second": 65.638, |
| "eval_steps_per_second": 4.286, |
| "step": 70555 |
| }, |
| { |
| "epoch": 685.44, |
| "learning_rate": 6.2912621359223306e-06, |
| "loss": 0.0812, |
| "step": 70600 |
| }, |
| { |
| "epoch": 686.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.343222618103027, |
| "eval_runtime": 4.3898, |
| "eval_samples_per_second": 66.29, |
| "eval_steps_per_second": 4.328, |
| "step": 70658 |
| }, |
| { |
| "epoch": 686.41, |
| "learning_rate": 6.271844660194175e-06, |
| "loss": 0.0838, |
| "step": 70700 |
| }, |
| { |
| "epoch": 687.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.31311559677124, |
| "eval_runtime": 4.3958, |
| "eval_samples_per_second": 66.199, |
| "eval_steps_per_second": 4.322, |
| "step": 70761 |
| }, |
| { |
| "epoch": 687.38, |
| "learning_rate": 6.25242718446602e-06, |
| "loss": 0.081, |
| "step": 70800 |
| }, |
| { |
| "epoch": 688.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.305084228515625, |
| "eval_runtime": 4.4287, |
| "eval_samples_per_second": 65.708, |
| "eval_steps_per_second": 4.29, |
| "step": 70864 |
| }, |
| { |
| "epoch": 688.35, |
| "learning_rate": 6.233009708737865e-06, |
| "loss": 0.0785, |
| "step": 70900 |
| }, |
| { |
| "epoch": 689.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.239564418792725, |
| "eval_runtime": 4.452, |
| "eval_samples_per_second": 65.364, |
| "eval_steps_per_second": 4.268, |
| "step": 70967 |
| }, |
| { |
| "epoch": 689.32, |
| "learning_rate": 6.213592233009709e-06, |
| "loss": 0.0842, |
| "step": 71000 |
| }, |
| { |
| "epoch": 690.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.2474684715271, |
| "eval_runtime": 4.4012, |
| "eval_samples_per_second": 66.119, |
| "eval_steps_per_second": 4.317, |
| "step": 71070 |
| }, |
| { |
| "epoch": 690.29, |
| "learning_rate": 6.1941747572815535e-06, |
| "loss": 0.0956, |
| "step": 71100 |
| }, |
| { |
| "epoch": 691.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.349338531494141, |
| "eval_runtime": 4.4106, |
| "eval_samples_per_second": 65.978, |
| "eval_steps_per_second": 4.308, |
| "step": 71173 |
| }, |
| { |
| "epoch": 691.26, |
| "learning_rate": 6.174757281553399e-06, |
| "loss": 0.0823, |
| "step": 71200 |
| }, |
| { |
| "epoch": 692.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.211832046508789, |
| "eval_runtime": 4.489, |
| "eval_samples_per_second": 64.825, |
| "eval_steps_per_second": 4.233, |
| "step": 71276 |
| }, |
| { |
| "epoch": 692.23, |
| "learning_rate": 6.1553398058252435e-06, |
| "loss": 0.0841, |
| "step": 71300 |
| }, |
| { |
| "epoch": 693.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.162426471710205, |
| "eval_runtime": 4.505, |
| "eval_samples_per_second": 64.596, |
| "eval_steps_per_second": 4.218, |
| "step": 71379 |
| }, |
| { |
| "epoch": 693.2, |
| "learning_rate": 6.135922330097088e-06, |
| "loss": 0.078, |
| "step": 71400 |
| }, |
| { |
| "epoch": 694.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.222851753234863, |
| "eval_runtime": 4.393, |
| "eval_samples_per_second": 66.242, |
| "eval_steps_per_second": 4.325, |
| "step": 71482 |
| }, |
| { |
| "epoch": 694.17, |
| "learning_rate": 6.116504854368932e-06, |
| "loss": 0.0831, |
| "step": 71500 |
| }, |
| { |
| "epoch": 695.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.266942024230957, |
| "eval_runtime": 4.437, |
| "eval_samples_per_second": 65.585, |
| "eval_steps_per_second": 4.282, |
| "step": 71585 |
| }, |
| { |
| "epoch": 695.15, |
| "learning_rate": 6.097087378640777e-06, |
| "loss": 0.0863, |
| "step": 71600 |
| }, |
| { |
| "epoch": 696.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.276256561279297, |
| "eval_runtime": 4.4054, |
| "eval_samples_per_second": 66.055, |
| "eval_steps_per_second": 4.313, |
| "step": 71688 |
| }, |
| { |
| "epoch": 696.12, |
| "learning_rate": 6.077669902912622e-06, |
| "loss": 0.0957, |
| "step": 71700 |
| }, |
| { |
| "epoch": 697.0, |
| "eval_accuracy": 0.3333333333333333, |
| "eval_loss": 5.301415920257568, |
| "eval_runtime": 4.4203, |
| "eval_samples_per_second": 65.833, |
| "eval_steps_per_second": 4.298, |
| "step": 71791 |
| }, |
| { |
| "epoch": 697.09, |
| "learning_rate": 6.058252427184466e-06, |
| "loss": 0.0775, |
| "step": 71800 |
| }, |
| { |
| "epoch": 698.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.38198709487915, |
| "eval_runtime": 4.4349, |
| "eval_samples_per_second": 65.616, |
| "eval_steps_per_second": 4.284, |
| "step": 71894 |
| }, |
| { |
| "epoch": 698.06, |
| "learning_rate": 6.038834951456312e-06, |
| "loss": 0.0907, |
| "step": 71900 |
| }, |
| { |
| "epoch": 699.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.4358649253845215, |
| "eval_runtime": 4.4417, |
| "eval_samples_per_second": 65.515, |
| "eval_steps_per_second": 4.278, |
| "step": 71997 |
| }, |
| { |
| "epoch": 699.03, |
| "learning_rate": 6.0194174757281556e-06, |
| "loss": 0.0887, |
| "step": 72000 |
| }, |
| { |
| "epoch": 700.0, |
| "learning_rate": 6e-06, |
| "loss": 0.0802, |
| "step": 72100 |
| }, |
| { |
| "epoch": 700.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.401218414306641, |
| "eval_runtime": 4.4053, |
| "eval_samples_per_second": 66.056, |
| "eval_steps_per_second": 4.313, |
| "step": 72100 |
| }, |
| { |
| "epoch": 700.97, |
| "learning_rate": 5.980582524271845e-06, |
| "loss": 0.0799, |
| "step": 72200 |
| }, |
| { |
| "epoch": 701.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.378960132598877, |
| "eval_runtime": 4.4059, |
| "eval_samples_per_second": 66.047, |
| "eval_steps_per_second": 4.312, |
| "step": 72203 |
| }, |
| { |
| "epoch": 701.94, |
| "learning_rate": 5.96116504854369e-06, |
| "loss": 0.0822, |
| "step": 72300 |
| }, |
| { |
| "epoch": 702.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.3592753410339355, |
| "eval_runtime": 4.4023, |
| "eval_samples_per_second": 66.101, |
| "eval_steps_per_second": 4.316, |
| "step": 72306 |
| }, |
| { |
| "epoch": 702.91, |
| "learning_rate": 5.941747572815535e-06, |
| "loss": 0.0841, |
| "step": 72400 |
| }, |
| { |
| "epoch": 703.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.317993640899658, |
| "eval_runtime": 4.3917, |
| "eval_samples_per_second": 66.261, |
| "eval_steps_per_second": 4.326, |
| "step": 72409 |
| }, |
| { |
| "epoch": 703.88, |
| "learning_rate": 5.9223300970873785e-06, |
| "loss": 0.0883, |
| "step": 72500 |
| }, |
| { |
| "epoch": 704.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.275454998016357, |
| "eval_runtime": 4.46, |
| "eval_samples_per_second": 65.247, |
| "eval_steps_per_second": 4.26, |
| "step": 72512 |
| }, |
| { |
| "epoch": 704.85, |
| "learning_rate": 5.902912621359224e-06, |
| "loss": 0.0863, |
| "step": 72600 |
| }, |
| { |
| "epoch": 705.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.243884086608887, |
| "eval_runtime": 4.3982, |
| "eval_samples_per_second": 66.163, |
| "eval_steps_per_second": 4.32, |
| "step": 72615 |
| }, |
| { |
| "epoch": 705.83, |
| "learning_rate": 5.8834951456310685e-06, |
| "loss": 0.0776, |
| "step": 72700 |
| }, |
| { |
| "epoch": 706.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.292761325836182, |
| "eval_runtime": 4.4045, |
| "eval_samples_per_second": 66.069, |
| "eval_steps_per_second": 4.314, |
| "step": 72718 |
| }, |
| { |
| "epoch": 706.8, |
| "learning_rate": 5.864077669902913e-06, |
| "loss": 0.0854, |
| "step": 72800 |
| }, |
| { |
| "epoch": 707.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.34207820892334, |
| "eval_runtime": 4.3984, |
| "eval_samples_per_second": 66.16, |
| "eval_steps_per_second": 4.32, |
| "step": 72821 |
| }, |
| { |
| "epoch": 707.77, |
| "learning_rate": 5.8446601941747585e-06, |
| "loss": 0.0853, |
| "step": 72900 |
| }, |
| { |
| "epoch": 708.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.336627006530762, |
| "eval_runtime": 4.4249, |
| "eval_samples_per_second": 65.765, |
| "eval_steps_per_second": 4.294, |
| "step": 72924 |
| }, |
| { |
| "epoch": 708.74, |
| "learning_rate": 5.825242718446602e-06, |
| "loss": 0.0864, |
| "step": 73000 |
| }, |
| { |
| "epoch": 709.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.3050031661987305, |
| "eval_runtime": 4.4004, |
| "eval_samples_per_second": 66.131, |
| "eval_steps_per_second": 4.318, |
| "step": 73027 |
| }, |
| { |
| "epoch": 709.71, |
| "learning_rate": 5.805825242718447e-06, |
| "loss": 0.0802, |
| "step": 73100 |
| }, |
| { |
| "epoch": 710.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.309476375579834, |
| "eval_runtime": 4.4093, |
| "eval_samples_per_second": 65.997, |
| "eval_steps_per_second": 4.309, |
| "step": 73130 |
| }, |
| { |
| "epoch": 710.68, |
| "learning_rate": 5.786407766990291e-06, |
| "loss": 0.0868, |
| "step": 73200 |
| }, |
| { |
| "epoch": 711.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.308775424957275, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.981, |
| "eval_steps_per_second": 4.308, |
| "step": 73233 |
| }, |
| { |
| "epoch": 711.65, |
| "learning_rate": 5.766990291262137e-06, |
| "loss": 0.0817, |
| "step": 73300 |
| }, |
| { |
| "epoch": 712.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.284626007080078, |
| "eval_runtime": 4.4228, |
| "eval_samples_per_second": 65.795, |
| "eval_steps_per_second": 4.296, |
| "step": 73336 |
| }, |
| { |
| "epoch": 712.62, |
| "learning_rate": 5.747572815533981e-06, |
| "loss": 0.0848, |
| "step": 73400 |
| }, |
| { |
| "epoch": 713.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 5.321852207183838, |
| "eval_runtime": 4.4087, |
| "eval_samples_per_second": 66.006, |
| "eval_steps_per_second": 4.31, |
| "step": 73439 |
| }, |
| { |
| "epoch": 713.59, |
| "learning_rate": 5.728155339805825e-06, |
| "loss": 0.0891, |
| "step": 73500 |
| }, |
| { |
| "epoch": 714.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.370724678039551, |
| "eval_runtime": 4.3913, |
| "eval_samples_per_second": 66.267, |
| "eval_steps_per_second": 4.327, |
| "step": 73542 |
| }, |
| { |
| "epoch": 714.56, |
| "learning_rate": 5.708737864077671e-06, |
| "loss": 0.0829, |
| "step": 73600 |
| }, |
| { |
| "epoch": 715.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.340518474578857, |
| "eval_runtime": 4.4354, |
| "eval_samples_per_second": 65.608, |
| "eval_steps_per_second": 4.284, |
| "step": 73645 |
| }, |
| { |
| "epoch": 715.53, |
| "learning_rate": 5.689320388349515e-06, |
| "loss": 0.0882, |
| "step": 73700 |
| }, |
| { |
| "epoch": 716.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.187460422515869, |
| "eval_runtime": 4.399, |
| "eval_samples_per_second": 66.152, |
| "eval_steps_per_second": 4.319, |
| "step": 73748 |
| }, |
| { |
| "epoch": 716.5, |
| "learning_rate": 5.66990291262136e-06, |
| "loss": 0.0944, |
| "step": 73800 |
| }, |
| { |
| "epoch": 717.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.266665935516357, |
| "eval_runtime": 4.4415, |
| "eval_samples_per_second": 65.519, |
| "eval_steps_per_second": 4.278, |
| "step": 73851 |
| }, |
| { |
| "epoch": 717.48, |
| "learning_rate": 5.6504854368932035e-06, |
| "loss": 0.0713, |
| "step": 73900 |
| }, |
| { |
| "epoch": 718.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.291965961456299, |
| "eval_runtime": 4.3974, |
| "eval_samples_per_second": 66.176, |
| "eval_steps_per_second": 4.321, |
| "step": 73954 |
| }, |
| { |
| "epoch": 718.45, |
| "learning_rate": 5.631067961165049e-06, |
| "loss": 0.0855, |
| "step": 74000 |
| }, |
| { |
| "epoch": 719.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.172239303588867, |
| "eval_runtime": 4.4587, |
| "eval_samples_per_second": 65.266, |
| "eval_steps_per_second": 4.261, |
| "step": 74057 |
| }, |
| { |
| "epoch": 719.42, |
| "learning_rate": 5.6116504854368935e-06, |
| "loss": 0.0812, |
| "step": 74100 |
| }, |
| { |
| "epoch": 720.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.137197017669678, |
| "eval_runtime": 4.4382, |
| "eval_samples_per_second": 65.567, |
| "eval_steps_per_second": 4.281, |
| "step": 74160 |
| }, |
| { |
| "epoch": 720.39, |
| "learning_rate": 5.592233009708738e-06, |
| "loss": 0.0731, |
| "step": 74200 |
| }, |
| { |
| "epoch": 721.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.101325511932373, |
| "eval_runtime": 4.4604, |
| "eval_samples_per_second": 65.241, |
| "eval_steps_per_second": 4.26, |
| "step": 74263 |
| }, |
| { |
| "epoch": 721.36, |
| "learning_rate": 5.5728155339805835e-06, |
| "loss": 0.0845, |
| "step": 74300 |
| }, |
| { |
| "epoch": 722.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.105453968048096, |
| "eval_runtime": 4.4086, |
| "eval_samples_per_second": 66.007, |
| "eval_steps_per_second": 4.31, |
| "step": 74366 |
| }, |
| { |
| "epoch": 722.33, |
| "learning_rate": 5.553398058252427e-06, |
| "loss": 0.0857, |
| "step": 74400 |
| }, |
| { |
| "epoch": 723.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.216444969177246, |
| "eval_runtime": 4.4028, |
| "eval_samples_per_second": 66.094, |
| "eval_steps_per_second": 4.315, |
| "step": 74469 |
| }, |
| { |
| "epoch": 723.3, |
| "learning_rate": 5.533980582524272e-06, |
| "loss": 0.0843, |
| "step": 74500 |
| }, |
| { |
| "epoch": 724.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.302288055419922, |
| "eval_runtime": 4.4618, |
| "eval_samples_per_second": 65.22, |
| "eval_steps_per_second": 4.258, |
| "step": 74572 |
| }, |
| { |
| "epoch": 724.27, |
| "learning_rate": 5.514563106796117e-06, |
| "loss": 0.084, |
| "step": 74600 |
| }, |
| { |
| "epoch": 725.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.123310089111328, |
| "eval_runtime": 4.4177, |
| "eval_samples_per_second": 65.871, |
| "eval_steps_per_second": 4.301, |
| "step": 74675 |
| }, |
| { |
| "epoch": 725.24, |
| "learning_rate": 5.495145631067962e-06, |
| "loss": 0.0846, |
| "step": 74700 |
| }, |
| { |
| "epoch": 726.0, |
| "eval_accuracy": 0.26804123711340205, |
| "eval_loss": 5.316282272338867, |
| "eval_runtime": 4.435, |
| "eval_samples_per_second": 65.615, |
| "eval_steps_per_second": 4.284, |
| "step": 74778 |
| }, |
| { |
| "epoch": 726.21, |
| "learning_rate": 5.4757281553398064e-06, |
| "loss": 0.0838, |
| "step": 74800 |
| }, |
| { |
| "epoch": 727.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.224409580230713, |
| "eval_runtime": 4.4056, |
| "eval_samples_per_second": 66.052, |
| "eval_steps_per_second": 4.313, |
| "step": 74881 |
| }, |
| { |
| "epoch": 727.18, |
| "learning_rate": 5.45631067961165e-06, |
| "loss": 0.0815, |
| "step": 74900 |
| }, |
| { |
| "epoch": 728.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.161591529846191, |
| "eval_runtime": 4.4105, |
| "eval_samples_per_second": 65.979, |
| "eval_steps_per_second": 4.308, |
| "step": 74984 |
| }, |
| { |
| "epoch": 728.16, |
| "learning_rate": 5.436893203883496e-06, |
| "loss": 0.0849, |
| "step": 75000 |
| }, |
| { |
| "epoch": 729.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.15138578414917, |
| "eval_runtime": 4.412, |
| "eval_samples_per_second": 65.956, |
| "eval_steps_per_second": 4.306, |
| "step": 75087 |
| }, |
| { |
| "epoch": 729.13, |
| "learning_rate": 5.41747572815534e-06, |
| "loss": 0.0818, |
| "step": 75100 |
| }, |
| { |
| "epoch": 730.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.14281702041626, |
| "eval_runtime": 4.4541, |
| "eval_samples_per_second": 65.333, |
| "eval_steps_per_second": 4.266, |
| "step": 75190 |
| }, |
| { |
| "epoch": 730.1, |
| "learning_rate": 5.398058252427185e-06, |
| "loss": 0.0751, |
| "step": 75200 |
| }, |
| { |
| "epoch": 731.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.182039737701416, |
| "eval_runtime": 4.3971, |
| "eval_samples_per_second": 66.18, |
| "eval_steps_per_second": 4.321, |
| "step": 75293 |
| }, |
| { |
| "epoch": 731.07, |
| "learning_rate": 5.37864077669903e-06, |
| "loss": 0.0766, |
| "step": 75300 |
| }, |
| { |
| "epoch": 732.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.232609748840332, |
| "eval_runtime": 4.4105, |
| "eval_samples_per_second": 65.978, |
| "eval_steps_per_second": 4.308, |
| "step": 75396 |
| }, |
| { |
| "epoch": 732.04, |
| "learning_rate": 5.359223300970874e-06, |
| "loss": 0.0772, |
| "step": 75400 |
| }, |
| { |
| "epoch": 733.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.208255290985107, |
| "eval_runtime": 4.4325, |
| "eval_samples_per_second": 65.652, |
| "eval_steps_per_second": 4.287, |
| "step": 75499 |
| }, |
| { |
| "epoch": 733.01, |
| "learning_rate": 5.3398058252427185e-06, |
| "loss": 0.0871, |
| "step": 75500 |
| }, |
| { |
| "epoch": 733.98, |
| "learning_rate": 5.320388349514564e-06, |
| "loss": 0.0846, |
| "step": 75600 |
| }, |
| { |
| "epoch": 734.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.325695037841797, |
| "eval_runtime": 4.4062, |
| "eval_samples_per_second": 66.044, |
| "eval_steps_per_second": 4.312, |
| "step": 75602 |
| }, |
| { |
| "epoch": 734.95, |
| "learning_rate": 5.3009708737864085e-06, |
| "loss": 0.0811, |
| "step": 75700 |
| }, |
| { |
| "epoch": 735.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.345978736877441, |
| "eval_runtime": 4.451, |
| "eval_samples_per_second": 65.378, |
| "eval_steps_per_second": 4.269, |
| "step": 75705 |
| }, |
| { |
| "epoch": 735.92, |
| "learning_rate": 5.281553398058253e-06, |
| "loss": 0.089, |
| "step": 75800 |
| }, |
| { |
| "epoch": 736.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.300384521484375, |
| "eval_runtime": 4.4009, |
| "eval_samples_per_second": 66.124, |
| "eval_steps_per_second": 4.317, |
| "step": 75808 |
| }, |
| { |
| "epoch": 736.89, |
| "learning_rate": 5.262135922330097e-06, |
| "loss": 0.0711, |
| "step": 75900 |
| }, |
| { |
| "epoch": 737.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.24236536026001, |
| "eval_runtime": 4.4122, |
| "eval_samples_per_second": 65.954, |
| "eval_steps_per_second": 4.306, |
| "step": 75911 |
| }, |
| { |
| "epoch": 737.86, |
| "learning_rate": 5.242718446601942e-06, |
| "loss": 0.0852, |
| "step": 76000 |
| }, |
| { |
| "epoch": 738.0, |
| "eval_accuracy": 0.2611683848797251, |
| "eval_loss": 5.3143439292907715, |
| "eval_runtime": 4.3939, |
| "eval_samples_per_second": 66.229, |
| "eval_steps_per_second": 4.324, |
| "step": 76014 |
| }, |
| { |
| "epoch": 738.83, |
| "learning_rate": 5.223300970873787e-06, |
| "loss": 0.0798, |
| "step": 76100 |
| }, |
| { |
| "epoch": 739.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.32684326171875, |
| "eval_runtime": 4.4153, |
| "eval_samples_per_second": 65.907, |
| "eval_steps_per_second": 4.303, |
| "step": 76117 |
| }, |
| { |
| "epoch": 739.81, |
| "learning_rate": 5.2038834951456314e-06, |
| "loss": 0.0783, |
| "step": 76200 |
| }, |
| { |
| "epoch": 740.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.26964807510376, |
| "eval_runtime": 4.4134, |
| "eval_samples_per_second": 65.935, |
| "eval_steps_per_second": 4.305, |
| "step": 76220 |
| }, |
| { |
| "epoch": 740.78, |
| "learning_rate": 5.184466019417476e-06, |
| "loss": 0.086, |
| "step": 76300 |
| }, |
| { |
| "epoch": 741.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.274394989013672, |
| "eval_runtime": 4.3931, |
| "eval_samples_per_second": 66.24, |
| "eval_steps_per_second": 4.325, |
| "step": 76323 |
| }, |
| { |
| "epoch": 741.75, |
| "learning_rate": 5.165048543689321e-06, |
| "loss": 0.0778, |
| "step": 76400 |
| }, |
| { |
| "epoch": 742.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.327398300170898, |
| "eval_runtime": 4.3897, |
| "eval_samples_per_second": 66.291, |
| "eval_steps_per_second": 4.328, |
| "step": 76426 |
| }, |
| { |
| "epoch": 742.72, |
| "learning_rate": 5.145631067961165e-06, |
| "loss": 0.0832, |
| "step": 76500 |
| }, |
| { |
| "epoch": 743.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.329669952392578, |
| "eval_runtime": 4.4137, |
| "eval_samples_per_second": 65.931, |
| "eval_steps_per_second": 4.305, |
| "step": 76529 |
| }, |
| { |
| "epoch": 743.69, |
| "learning_rate": 5.126213592233011e-06, |
| "loss": 0.0826, |
| "step": 76600 |
| }, |
| { |
| "epoch": 744.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.285783767700195, |
| "eval_runtime": 4.4299, |
| "eval_samples_per_second": 65.689, |
| "eval_steps_per_second": 4.289, |
| "step": 76632 |
| }, |
| { |
| "epoch": 744.66, |
| "learning_rate": 5.106796116504855e-06, |
| "loss": 0.0792, |
| "step": 76700 |
| }, |
| { |
| "epoch": 745.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.336843967437744, |
| "eval_runtime": 4.5001, |
| "eval_samples_per_second": 64.665, |
| "eval_steps_per_second": 4.222, |
| "step": 76735 |
| }, |
| { |
| "epoch": 745.63, |
| "learning_rate": 5.087378640776699e-06, |
| "loss": 0.0787, |
| "step": 76800 |
| }, |
| { |
| "epoch": 746.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.3573713302612305, |
| "eval_runtime": 4.3936, |
| "eval_samples_per_second": 66.232, |
| "eval_steps_per_second": 4.324, |
| "step": 76838 |
| }, |
| { |
| "epoch": 746.6, |
| "learning_rate": 5.0679611650485435e-06, |
| "loss": 0.0732, |
| "step": 76900 |
| }, |
| { |
| "epoch": 747.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.346883773803711, |
| "eval_runtime": 4.4248, |
| "eval_samples_per_second": 65.766, |
| "eval_steps_per_second": 4.294, |
| "step": 76941 |
| }, |
| { |
| "epoch": 747.57, |
| "learning_rate": 5.048543689320389e-06, |
| "loss": 0.0857, |
| "step": 77000 |
| }, |
| { |
| "epoch": 748.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.297471523284912, |
| "eval_runtime": 4.3992, |
| "eval_samples_per_second": 66.148, |
| "eval_steps_per_second": 4.319, |
| "step": 77044 |
| }, |
| { |
| "epoch": 748.54, |
| "learning_rate": 5.0291262135922335e-06, |
| "loss": 0.07, |
| "step": 77100 |
| }, |
| { |
| "epoch": 749.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.337193489074707, |
| "eval_runtime": 4.4061, |
| "eval_samples_per_second": 66.045, |
| "eval_steps_per_second": 4.312, |
| "step": 77147 |
| }, |
| { |
| "epoch": 749.51, |
| "learning_rate": 5.009708737864078e-06, |
| "loss": 0.0829, |
| "step": 77200 |
| }, |
| { |
| "epoch": 750.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.252513408660889, |
| "eval_runtime": 4.4464, |
| "eval_samples_per_second": 65.447, |
| "eval_steps_per_second": 4.273, |
| "step": 77250 |
| }, |
| { |
| "epoch": 750.49, |
| "learning_rate": 4.990291262135923e-06, |
| "loss": 0.0794, |
| "step": 77300 |
| }, |
| { |
| "epoch": 751.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.331362247467041, |
| "eval_runtime": 4.4255, |
| "eval_samples_per_second": 65.756, |
| "eval_steps_per_second": 4.293, |
| "step": 77353 |
| }, |
| { |
| "epoch": 751.46, |
| "learning_rate": 4.970873786407767e-06, |
| "loss": 0.0781, |
| "step": 77400 |
| }, |
| { |
| "epoch": 752.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.331817626953125, |
| "eval_runtime": 4.4236, |
| "eval_samples_per_second": 65.783, |
| "eval_steps_per_second": 4.295, |
| "step": 77456 |
| }, |
| { |
| "epoch": 752.43, |
| "learning_rate": 4.951456310679612e-06, |
| "loss": 0.0914, |
| "step": 77500 |
| }, |
| { |
| "epoch": 753.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.265148162841797, |
| "eval_runtime": 4.423, |
| "eval_samples_per_second": 65.792, |
| "eval_steps_per_second": 4.296, |
| "step": 77559 |
| }, |
| { |
| "epoch": 753.4, |
| "learning_rate": 4.932038834951457e-06, |
| "loss": 0.0822, |
| "step": 77600 |
| }, |
| { |
| "epoch": 754.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.355736255645752, |
| "eval_runtime": 4.4337, |
| "eval_samples_per_second": 65.633, |
| "eval_steps_per_second": 4.285, |
| "step": 77662 |
| }, |
| { |
| "epoch": 754.37, |
| "learning_rate": 4.912621359223301e-06, |
| "loss": 0.0782, |
| "step": 77700 |
| }, |
| { |
| "epoch": 755.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.41204309463501, |
| "eval_runtime": 4.4469, |
| "eval_samples_per_second": 65.439, |
| "eval_steps_per_second": 4.273, |
| "step": 77765 |
| }, |
| { |
| "epoch": 755.34, |
| "learning_rate": 4.8932038834951465e-06, |
| "loss": 0.0828, |
| "step": 77800 |
| }, |
| { |
| "epoch": 756.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.419083118438721, |
| "eval_runtime": 4.4195, |
| "eval_samples_per_second": 65.845, |
| "eval_steps_per_second": 4.299, |
| "step": 77868 |
| }, |
| { |
| "epoch": 756.31, |
| "learning_rate": 4.87378640776699e-06, |
| "loss": 0.0747, |
| "step": 77900 |
| }, |
| { |
| "epoch": 757.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.410015106201172, |
| "eval_runtime": 4.4331, |
| "eval_samples_per_second": 65.642, |
| "eval_steps_per_second": 4.286, |
| "step": 77971 |
| }, |
| { |
| "epoch": 757.28, |
| "learning_rate": 4.854368932038836e-06, |
| "loss": 0.0765, |
| "step": 78000 |
| }, |
| { |
| "epoch": 758.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.383244514465332, |
| "eval_runtime": 4.4023, |
| "eval_samples_per_second": 66.101, |
| "eval_steps_per_second": 4.316, |
| "step": 78074 |
| }, |
| { |
| "epoch": 758.25, |
| "learning_rate": 4.834951456310679e-06, |
| "loss": 0.077, |
| "step": 78100 |
| }, |
| { |
| "epoch": 759.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.3800554275512695, |
| "eval_runtime": 4.4424, |
| "eval_samples_per_second": 65.505, |
| "eval_steps_per_second": 4.277, |
| "step": 78177 |
| }, |
| { |
| "epoch": 759.22, |
| "learning_rate": 4.815533980582525e-06, |
| "loss": 0.0751, |
| "step": 78200 |
| }, |
| { |
| "epoch": 760.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.327369213104248, |
| "eval_runtime": 4.4113, |
| "eval_samples_per_second": 65.967, |
| "eval_steps_per_second": 4.307, |
| "step": 78280 |
| }, |
| { |
| "epoch": 760.19, |
| "learning_rate": 4.796116504854369e-06, |
| "loss": 0.0821, |
| "step": 78300 |
| }, |
| { |
| "epoch": 761.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.391132354736328, |
| "eval_runtime": 4.3952, |
| "eval_samples_per_second": 66.208, |
| "eval_steps_per_second": 4.323, |
| "step": 78383 |
| }, |
| { |
| "epoch": 761.17, |
| "learning_rate": 4.776699029126214e-06, |
| "loss": 0.0854, |
| "step": 78400 |
| }, |
| { |
| "epoch": 762.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.411304473876953, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 78486 |
| }, |
| { |
| "epoch": 762.14, |
| "learning_rate": 4.7572815533980585e-06, |
| "loss": 0.0765, |
| "step": 78500 |
| }, |
| { |
| "epoch": 763.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.364217758178711, |
| "eval_runtime": 4.5151, |
| "eval_samples_per_second": 64.45, |
| "eval_steps_per_second": 4.208, |
| "step": 78589 |
| }, |
| { |
| "epoch": 763.11, |
| "learning_rate": 4.737864077669903e-06, |
| "loss": 0.0787, |
| "step": 78600 |
| }, |
| { |
| "epoch": 764.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.354491710662842, |
| "eval_runtime": 4.518, |
| "eval_samples_per_second": 64.408, |
| "eval_steps_per_second": 4.205, |
| "step": 78692 |
| }, |
| { |
| "epoch": 764.08, |
| "learning_rate": 4.718446601941748e-06, |
| "loss": 0.0842, |
| "step": 78700 |
| }, |
| { |
| "epoch": 765.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.398560047149658, |
| "eval_runtime": 4.5154, |
| "eval_samples_per_second": 64.447, |
| "eval_steps_per_second": 4.208, |
| "step": 78795 |
| }, |
| { |
| "epoch": 765.05, |
| "learning_rate": 4.699029126213593e-06, |
| "loss": 0.0856, |
| "step": 78800 |
| }, |
| { |
| "epoch": 766.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.403796672821045, |
| "eval_runtime": 4.4093, |
| "eval_samples_per_second": 65.997, |
| "eval_steps_per_second": 4.309, |
| "step": 78898 |
| }, |
| { |
| "epoch": 766.02, |
| "learning_rate": 4.679611650485437e-06, |
| "loss": 0.0777, |
| "step": 78900 |
| }, |
| { |
| "epoch": 766.99, |
| "learning_rate": 4.660194174757282e-06, |
| "loss": 0.082, |
| "step": 79000 |
| }, |
| { |
| "epoch": 767.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.3815436363220215, |
| "eval_runtime": 4.4585, |
| "eval_samples_per_second": 65.269, |
| "eval_steps_per_second": 4.262, |
| "step": 79001 |
| }, |
| { |
| "epoch": 767.96, |
| "learning_rate": 4.640776699029126e-06, |
| "loss": 0.0787, |
| "step": 79100 |
| }, |
| { |
| "epoch": 768.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.409327507019043, |
| "eval_runtime": 4.4203, |
| "eval_samples_per_second": 65.832, |
| "eval_steps_per_second": 4.298, |
| "step": 79104 |
| }, |
| { |
| "epoch": 768.93, |
| "learning_rate": 4.6213592233009715e-06, |
| "loss": 0.0731, |
| "step": 79200 |
| }, |
| { |
| "epoch": 769.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.396090984344482, |
| "eval_runtime": 4.4163, |
| "eval_samples_per_second": 65.893, |
| "eval_steps_per_second": 4.302, |
| "step": 79207 |
| }, |
| { |
| "epoch": 769.9, |
| "learning_rate": 4.601941747572816e-06, |
| "loss": 0.0762, |
| "step": 79300 |
| }, |
| { |
| "epoch": 770.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.37462043762207, |
| "eval_runtime": 4.4241, |
| "eval_samples_per_second": 65.776, |
| "eval_steps_per_second": 4.295, |
| "step": 79310 |
| }, |
| { |
| "epoch": 770.87, |
| "learning_rate": 4.582524271844661e-06, |
| "loss": 0.0874, |
| "step": 79400 |
| }, |
| { |
| "epoch": 771.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.398296356201172, |
| "eval_runtime": 4.419, |
| "eval_samples_per_second": 65.853, |
| "eval_steps_per_second": 4.3, |
| "step": 79413 |
| }, |
| { |
| "epoch": 771.84, |
| "learning_rate": 4.563106796116505e-06, |
| "loss": 0.0835, |
| "step": 79500 |
| }, |
| { |
| "epoch": 772.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.426390171051025, |
| "eval_runtime": 4.3989, |
| "eval_samples_per_second": 66.153, |
| "eval_steps_per_second": 4.319, |
| "step": 79516 |
| }, |
| { |
| "epoch": 772.82, |
| "learning_rate": 4.54368932038835e-06, |
| "loss": 0.0841, |
| "step": 79600 |
| }, |
| { |
| "epoch": 773.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.42516565322876, |
| "eval_runtime": 4.415, |
| "eval_samples_per_second": 65.911, |
| "eval_steps_per_second": 4.303, |
| "step": 79619 |
| }, |
| { |
| "epoch": 773.79, |
| "learning_rate": 4.524271844660194e-06, |
| "loss": 0.0792, |
| "step": 79700 |
| }, |
| { |
| "epoch": 774.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.373010158538818, |
| "eval_runtime": 4.4077, |
| "eval_samples_per_second": 66.021, |
| "eval_steps_per_second": 4.311, |
| "step": 79722 |
| }, |
| { |
| "epoch": 774.76, |
| "learning_rate": 4.504854368932039e-06, |
| "loss": 0.0816, |
| "step": 79800 |
| }, |
| { |
| "epoch": 775.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.383403778076172, |
| "eval_runtime": 4.4067, |
| "eval_samples_per_second": 66.036, |
| "eval_steps_per_second": 4.312, |
| "step": 79825 |
| }, |
| { |
| "epoch": 775.73, |
| "learning_rate": 4.4854368932038836e-06, |
| "loss": 0.0928, |
| "step": 79900 |
| }, |
| { |
| "epoch": 776.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.469430923461914, |
| "eval_runtime": 4.3928, |
| "eval_samples_per_second": 66.244, |
| "eval_steps_per_second": 4.325, |
| "step": 79928 |
| }, |
| { |
| "epoch": 776.7, |
| "learning_rate": 4.466019417475729e-06, |
| "loss": 0.0739, |
| "step": 80000 |
| }, |
| { |
| "epoch": 777.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.380051612854004, |
| "eval_runtime": 4.4336, |
| "eval_samples_per_second": 65.635, |
| "eval_steps_per_second": 4.285, |
| "step": 80031 |
| }, |
| { |
| "epoch": 777.67, |
| "learning_rate": 4.446601941747573e-06, |
| "loss": 0.0778, |
| "step": 80100 |
| }, |
| { |
| "epoch": 778.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.382711887359619, |
| "eval_runtime": 4.3971, |
| "eval_samples_per_second": 66.18, |
| "eval_steps_per_second": 4.321, |
| "step": 80134 |
| }, |
| { |
| "epoch": 778.64, |
| "learning_rate": 4.427184466019418e-06, |
| "loss": 0.0826, |
| "step": 80200 |
| }, |
| { |
| "epoch": 779.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.497971534729004, |
| "eval_runtime": 4.3998, |
| "eval_samples_per_second": 66.139, |
| "eval_steps_per_second": 4.318, |
| "step": 80237 |
| }, |
| { |
| "epoch": 779.61, |
| "learning_rate": 4.407766990291263e-06, |
| "loss": 0.0873, |
| "step": 80300 |
| }, |
| { |
| "epoch": 780.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.38844108581543, |
| "eval_runtime": 4.4096, |
| "eval_samples_per_second": 65.993, |
| "eval_steps_per_second": 4.309, |
| "step": 80340 |
| }, |
| { |
| "epoch": 780.58, |
| "learning_rate": 4.388349514563107e-06, |
| "loss": 0.0762, |
| "step": 80400 |
| }, |
| { |
| "epoch": 781.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.383063316345215, |
| "eval_runtime": 4.3973, |
| "eval_samples_per_second": 66.176, |
| "eval_steps_per_second": 4.321, |
| "step": 80443 |
| }, |
| { |
| "epoch": 781.55, |
| "learning_rate": 4.368932038834952e-06, |
| "loss": 0.0802, |
| "step": 80500 |
| }, |
| { |
| "epoch": 782.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.444866180419922, |
| "eval_runtime": 4.3986, |
| "eval_samples_per_second": 66.158, |
| "eval_steps_per_second": 4.32, |
| "step": 80546 |
| }, |
| { |
| "epoch": 782.52, |
| "learning_rate": 4.3495145631067965e-06, |
| "loss": 0.0832, |
| "step": 80600 |
| }, |
| { |
| "epoch": 783.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.402950286865234, |
| "eval_runtime": 4.3971, |
| "eval_samples_per_second": 66.18, |
| "eval_steps_per_second": 4.321, |
| "step": 80649 |
| }, |
| { |
| "epoch": 783.5, |
| "learning_rate": 4.330097087378641e-06, |
| "loss": 0.0716, |
| "step": 80700 |
| }, |
| { |
| "epoch": 784.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.450810432434082, |
| "eval_runtime": 4.3963, |
| "eval_samples_per_second": 66.192, |
| "eval_steps_per_second": 4.322, |
| "step": 80752 |
| }, |
| { |
| "epoch": 784.47, |
| "learning_rate": 4.310679611650486e-06, |
| "loss": 0.0885, |
| "step": 80800 |
| }, |
| { |
| "epoch": 785.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.386898517608643, |
| "eval_runtime": 4.4618, |
| "eval_samples_per_second": 65.221, |
| "eval_steps_per_second": 4.258, |
| "step": 80855 |
| }, |
| { |
| "epoch": 785.44, |
| "learning_rate": 4.29126213592233e-06, |
| "loss": 0.0685, |
| "step": 80900 |
| }, |
| { |
| "epoch": 786.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.369156360626221, |
| "eval_runtime": 4.4618, |
| "eval_samples_per_second": 65.22, |
| "eval_steps_per_second": 4.258, |
| "step": 80958 |
| }, |
| { |
| "epoch": 786.41, |
| "learning_rate": 4.271844660194175e-06, |
| "loss": 0.0797, |
| "step": 81000 |
| }, |
| { |
| "epoch": 787.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.388444423675537, |
| "eval_runtime": 4.4049, |
| "eval_samples_per_second": 66.062, |
| "eval_steps_per_second": 4.313, |
| "step": 81061 |
| }, |
| { |
| "epoch": 787.38, |
| "learning_rate": 4.252427184466019e-06, |
| "loss": 0.0748, |
| "step": 81100 |
| }, |
| { |
| "epoch": 788.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.32634973526001, |
| "eval_runtime": 4.4157, |
| "eval_samples_per_second": 65.901, |
| "eval_steps_per_second": 4.303, |
| "step": 81164 |
| }, |
| { |
| "epoch": 788.35, |
| "learning_rate": 4.233009708737865e-06, |
| "loss": 0.0741, |
| "step": 81200 |
| }, |
| { |
| "epoch": 789.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.352422714233398, |
| "eval_runtime": 4.5192, |
| "eval_samples_per_second": 64.392, |
| "eval_steps_per_second": 4.204, |
| "step": 81267 |
| }, |
| { |
| "epoch": 789.32, |
| "learning_rate": 4.213592233009709e-06, |
| "loss": 0.0767, |
| "step": 81300 |
| }, |
| { |
| "epoch": 790.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 5.262473106384277, |
| "eval_runtime": 4.419, |
| "eval_samples_per_second": 65.852, |
| "eval_steps_per_second": 4.3, |
| "step": 81370 |
| }, |
| { |
| "epoch": 790.29, |
| "learning_rate": 4.194174757281554e-06, |
| "loss": 0.0814, |
| "step": 81400 |
| }, |
| { |
| "epoch": 791.0, |
| "eval_accuracy": 0.32989690721649484, |
| "eval_loss": 5.266846656799316, |
| "eval_runtime": 4.4649, |
| "eval_samples_per_second": 65.176, |
| "eval_steps_per_second": 4.255, |
| "step": 81473 |
| }, |
| { |
| "epoch": 791.26, |
| "learning_rate": 4.1747572815533986e-06, |
| "loss": 0.0845, |
| "step": 81500 |
| }, |
| { |
| "epoch": 792.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.23559045791626, |
| "eval_runtime": 4.4615, |
| "eval_samples_per_second": 65.225, |
| "eval_steps_per_second": 4.259, |
| "step": 81576 |
| }, |
| { |
| "epoch": 792.23, |
| "learning_rate": 4.155339805825243e-06, |
| "loss": 0.076, |
| "step": 81600 |
| }, |
| { |
| "epoch": 793.0, |
| "eval_accuracy": 0.3230240549828179, |
| "eval_loss": 5.261579990386963, |
| "eval_runtime": 4.4103, |
| "eval_samples_per_second": 65.982, |
| "eval_steps_per_second": 4.308, |
| "step": 81679 |
| }, |
| { |
| "epoch": 793.2, |
| "learning_rate": 4.135922330097088e-06, |
| "loss": 0.0769, |
| "step": 81700 |
| }, |
| { |
| "epoch": 794.0, |
| "eval_accuracy": 0.3333333333333333, |
| "eval_loss": 5.304605960845947, |
| "eval_runtime": 4.4261, |
| "eval_samples_per_second": 65.746, |
| "eval_steps_per_second": 4.293, |
| "step": 81782 |
| }, |
| { |
| "epoch": 794.17, |
| "learning_rate": 4.116504854368932e-06, |
| "loss": 0.0866, |
| "step": 81800 |
| }, |
| { |
| "epoch": 795.0, |
| "eval_accuracy": 0.32989690721649484, |
| "eval_loss": 5.290163040161133, |
| "eval_runtime": 4.4172, |
| "eval_samples_per_second": 65.88, |
| "eval_steps_per_second": 4.301, |
| "step": 81885 |
| }, |
| { |
| "epoch": 795.15, |
| "learning_rate": 4.097087378640777e-06, |
| "loss": 0.0772, |
| "step": 81900 |
| }, |
| { |
| "epoch": 796.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.3077616691589355, |
| "eval_runtime": 4.4159, |
| "eval_samples_per_second": 65.898, |
| "eval_steps_per_second": 4.303, |
| "step": 81988 |
| }, |
| { |
| "epoch": 796.12, |
| "learning_rate": 4.0776699029126215e-06, |
| "loss": 0.079, |
| "step": 82000 |
| }, |
| { |
| "epoch": 797.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.288947105407715, |
| "eval_runtime": 4.4024, |
| "eval_samples_per_second": 66.101, |
| "eval_steps_per_second": 4.316, |
| "step": 82091 |
| }, |
| { |
| "epoch": 797.09, |
| "learning_rate": 4.058252427184466e-06, |
| "loss": 0.0797, |
| "step": 82100 |
| }, |
| { |
| "epoch": 798.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.215836524963379, |
| "eval_runtime": 4.402, |
| "eval_samples_per_second": 66.107, |
| "eval_steps_per_second": 4.316, |
| "step": 82194 |
| }, |
| { |
| "epoch": 798.06, |
| "learning_rate": 4.038834951456311e-06, |
| "loss": 0.0802, |
| "step": 82200 |
| }, |
| { |
| "epoch": 799.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.312952041625977, |
| "eval_runtime": 4.3928, |
| "eval_samples_per_second": 66.245, |
| "eval_steps_per_second": 4.325, |
| "step": 82297 |
| }, |
| { |
| "epoch": 799.03, |
| "learning_rate": 4.019417475728156e-06, |
| "loss": 0.0736, |
| "step": 82300 |
| }, |
| { |
| "epoch": 800.0, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0859, |
| "step": 82400 |
| }, |
| { |
| "epoch": 800.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.284284591674805, |
| "eval_runtime": 4.4141, |
| "eval_samples_per_second": 65.926, |
| "eval_steps_per_second": 4.304, |
| "step": 82400 |
| }, |
| { |
| "epoch": 800.97, |
| "learning_rate": 3.980582524271845e-06, |
| "loss": 0.0789, |
| "step": 82500 |
| }, |
| { |
| "epoch": 801.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.243020534515381, |
| "eval_runtime": 4.4114, |
| "eval_samples_per_second": 65.965, |
| "eval_steps_per_second": 4.307, |
| "step": 82503 |
| }, |
| { |
| "epoch": 801.94, |
| "learning_rate": 3.96116504854369e-06, |
| "loss": 0.0809, |
| "step": 82600 |
| }, |
| { |
| "epoch": 802.0, |
| "eval_accuracy": 0.3436426116838488, |
| "eval_loss": 5.216651916503906, |
| "eval_runtime": 4.3885, |
| "eval_samples_per_second": 66.31, |
| "eval_steps_per_second": 4.33, |
| "step": 82606 |
| }, |
| { |
| "epoch": 802.91, |
| "learning_rate": 3.941747572815534e-06, |
| "loss": 0.0787, |
| "step": 82700 |
| }, |
| { |
| "epoch": 803.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.220209121704102, |
| "eval_runtime": 4.4262, |
| "eval_samples_per_second": 65.745, |
| "eval_steps_per_second": 4.293, |
| "step": 82709 |
| }, |
| { |
| "epoch": 803.88, |
| "learning_rate": 3.922330097087379e-06, |
| "loss": 0.0878, |
| "step": 82800 |
| }, |
| { |
| "epoch": 804.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.356659889221191, |
| "eval_runtime": 4.4019, |
| "eval_samples_per_second": 66.107, |
| "eval_steps_per_second": 4.316, |
| "step": 82812 |
| }, |
| { |
| "epoch": 804.85, |
| "learning_rate": 3.902912621359224e-06, |
| "loss": 0.0772, |
| "step": 82900 |
| }, |
| { |
| "epoch": 805.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.398636341094971, |
| "eval_runtime": 4.4271, |
| "eval_samples_per_second": 65.731, |
| "eval_steps_per_second": 4.292, |
| "step": 82915 |
| }, |
| { |
| "epoch": 805.83, |
| "learning_rate": 3.883495145631068e-06, |
| "loss": 0.0809, |
| "step": 83000 |
| }, |
| { |
| "epoch": 806.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.357775688171387, |
| "eval_runtime": 4.4302, |
| "eval_samples_per_second": 65.685, |
| "eval_steps_per_second": 4.289, |
| "step": 83018 |
| }, |
| { |
| "epoch": 806.8, |
| "learning_rate": 3.864077669902913e-06, |
| "loss": 0.0815, |
| "step": 83100 |
| }, |
| { |
| "epoch": 807.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.314184188842773, |
| "eval_runtime": 4.4506, |
| "eval_samples_per_second": 65.384, |
| "eval_steps_per_second": 4.269, |
| "step": 83121 |
| }, |
| { |
| "epoch": 807.77, |
| "learning_rate": 3.844660194174757e-06, |
| "loss": 0.0762, |
| "step": 83200 |
| }, |
| { |
| "epoch": 808.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.285727500915527, |
| "eval_runtime": 4.4066, |
| "eval_samples_per_second": 66.038, |
| "eval_steps_per_second": 4.312, |
| "step": 83224 |
| }, |
| { |
| "epoch": 808.74, |
| "learning_rate": 3.825242718446602e-06, |
| "loss": 0.0732, |
| "step": 83300 |
| }, |
| { |
| "epoch": 809.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.2570672035217285, |
| "eval_runtime": 4.3998, |
| "eval_samples_per_second": 66.14, |
| "eval_steps_per_second": 4.318, |
| "step": 83327 |
| }, |
| { |
| "epoch": 809.71, |
| "learning_rate": 3.8058252427184465e-06, |
| "loss": 0.0779, |
| "step": 83400 |
| }, |
| { |
| "epoch": 810.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.28815221786499, |
| "eval_runtime": 4.3987, |
| "eval_samples_per_second": 66.156, |
| "eval_steps_per_second": 4.319, |
| "step": 83430 |
| }, |
| { |
| "epoch": 810.68, |
| "learning_rate": 3.7864077669902915e-06, |
| "loss": 0.0872, |
| "step": 83500 |
| }, |
| { |
| "epoch": 811.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.345547676086426, |
| "eval_runtime": 4.4198, |
| "eval_samples_per_second": 65.84, |
| "eval_steps_per_second": 4.299, |
| "step": 83533 |
| }, |
| { |
| "epoch": 811.65, |
| "learning_rate": 3.7669902912621365e-06, |
| "loss": 0.076, |
| "step": 83600 |
| }, |
| { |
| "epoch": 812.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.280517101287842, |
| "eval_runtime": 4.4067, |
| "eval_samples_per_second": 66.037, |
| "eval_steps_per_second": 4.312, |
| "step": 83636 |
| }, |
| { |
| "epoch": 812.62, |
| "learning_rate": 3.7475728155339807e-06, |
| "loss": 0.0894, |
| "step": 83700 |
| }, |
| { |
| "epoch": 813.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.292069435119629, |
| "eval_runtime": 4.4116, |
| "eval_samples_per_second": 65.963, |
| "eval_steps_per_second": 4.307, |
| "step": 83739 |
| }, |
| { |
| "epoch": 813.59, |
| "learning_rate": 3.7281553398058257e-06, |
| "loss": 0.0724, |
| "step": 83800 |
| }, |
| { |
| "epoch": 814.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.350996971130371, |
| "eval_runtime": 4.4025, |
| "eval_samples_per_second": 66.099, |
| "eval_steps_per_second": 4.316, |
| "step": 83842 |
| }, |
| { |
| "epoch": 814.56, |
| "learning_rate": 3.70873786407767e-06, |
| "loss": 0.0828, |
| "step": 83900 |
| }, |
| { |
| "epoch": 815.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.30106782913208, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.109, |
| "eval_steps_per_second": 4.316, |
| "step": 83945 |
| }, |
| { |
| "epoch": 815.53, |
| "learning_rate": 3.689320388349515e-06, |
| "loss": 0.0818, |
| "step": 84000 |
| }, |
| { |
| "epoch": 816.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 5.294423580169678, |
| "eval_runtime": 4.4503, |
| "eval_samples_per_second": 65.389, |
| "eval_steps_per_second": 4.269, |
| "step": 84048 |
| }, |
| { |
| "epoch": 816.5, |
| "learning_rate": 3.66990291262136e-06, |
| "loss": 0.0728, |
| "step": 84100 |
| }, |
| { |
| "epoch": 817.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.252551555633545, |
| "eval_runtime": 4.4458, |
| "eval_samples_per_second": 65.456, |
| "eval_steps_per_second": 4.274, |
| "step": 84151 |
| }, |
| { |
| "epoch": 817.48, |
| "learning_rate": 3.650485436893204e-06, |
| "loss": 0.0776, |
| "step": 84200 |
| }, |
| { |
| "epoch": 818.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.264585494995117, |
| "eval_runtime": 4.4128, |
| "eval_samples_per_second": 65.945, |
| "eval_steps_per_second": 4.306, |
| "step": 84254 |
| }, |
| { |
| "epoch": 818.45, |
| "learning_rate": 3.631067961165049e-06, |
| "loss": 0.0768, |
| "step": 84300 |
| }, |
| { |
| "epoch": 819.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.3151092529296875, |
| "eval_runtime": 4.3951, |
| "eval_samples_per_second": 66.209, |
| "eval_steps_per_second": 4.323, |
| "step": 84357 |
| }, |
| { |
| "epoch": 819.42, |
| "learning_rate": 3.611650485436893e-06, |
| "loss": 0.0725, |
| "step": 84400 |
| }, |
| { |
| "epoch": 820.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.304262161254883, |
| "eval_runtime": 4.469, |
| "eval_samples_per_second": 65.115, |
| "eval_steps_per_second": 4.251, |
| "step": 84460 |
| }, |
| { |
| "epoch": 820.39, |
| "learning_rate": 3.592233009708738e-06, |
| "loss": 0.077, |
| "step": 84500 |
| }, |
| { |
| "epoch": 821.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.353638172149658, |
| "eval_runtime": 4.4311, |
| "eval_samples_per_second": 65.673, |
| "eval_steps_per_second": 4.288, |
| "step": 84563 |
| }, |
| { |
| "epoch": 821.36, |
| "learning_rate": 3.5728155339805828e-06, |
| "loss": 0.0815, |
| "step": 84600 |
| }, |
| { |
| "epoch": 822.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.324342727661133, |
| "eval_runtime": 4.4038, |
| "eval_samples_per_second": 66.079, |
| "eval_steps_per_second": 4.314, |
| "step": 84666 |
| }, |
| { |
| "epoch": 822.33, |
| "learning_rate": 3.5533980582524273e-06, |
| "loss": 0.0753, |
| "step": 84700 |
| }, |
| { |
| "epoch": 823.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.3727874755859375, |
| "eval_runtime": 4.4068, |
| "eval_samples_per_second": 66.034, |
| "eval_steps_per_second": 4.312, |
| "step": 84769 |
| }, |
| { |
| "epoch": 823.3, |
| "learning_rate": 3.5339805825242724e-06, |
| "loss": 0.0837, |
| "step": 84800 |
| }, |
| { |
| "epoch": 824.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.35664176940918, |
| "eval_runtime": 4.4441, |
| "eval_samples_per_second": 65.48, |
| "eval_steps_per_second": 4.275, |
| "step": 84872 |
| }, |
| { |
| "epoch": 824.27, |
| "learning_rate": 3.5145631067961165e-06, |
| "loss": 0.0786, |
| "step": 84900 |
| }, |
| { |
| "epoch": 825.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.3486647605896, |
| "eval_runtime": 4.4202, |
| "eval_samples_per_second": 65.834, |
| "eval_steps_per_second": 4.298, |
| "step": 84975 |
| }, |
| { |
| "epoch": 825.24, |
| "learning_rate": 3.4951456310679615e-06, |
| "loss": 0.0897, |
| "step": 85000 |
| }, |
| { |
| "epoch": 826.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.384740829467773, |
| "eval_runtime": 4.4168, |
| "eval_samples_per_second": 65.884, |
| "eval_steps_per_second": 4.302, |
| "step": 85078 |
| }, |
| { |
| "epoch": 826.21, |
| "learning_rate": 3.475728155339806e-06, |
| "loss": 0.079, |
| "step": 85100 |
| }, |
| { |
| "epoch": 827.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.357576847076416, |
| "eval_runtime": 4.4063, |
| "eval_samples_per_second": 66.042, |
| "eval_steps_per_second": 4.312, |
| "step": 85181 |
| }, |
| { |
| "epoch": 827.18, |
| "learning_rate": 3.4563106796116507e-06, |
| "loss": 0.0791, |
| "step": 85200 |
| }, |
| { |
| "epoch": 828.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.343855857849121, |
| "eval_runtime": 4.4485, |
| "eval_samples_per_second": 65.415, |
| "eval_steps_per_second": 4.271, |
| "step": 85284 |
| }, |
| { |
| "epoch": 828.16, |
| "learning_rate": 3.4368932038834957e-06, |
| "loss": 0.0778, |
| "step": 85300 |
| }, |
| { |
| "epoch": 829.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.3456902503967285, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.853, |
| "eval_steps_per_second": 4.3, |
| "step": 85387 |
| }, |
| { |
| "epoch": 829.13, |
| "learning_rate": 3.41747572815534e-06, |
| "loss": 0.0732, |
| "step": 85400 |
| }, |
| { |
| "epoch": 830.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.346973896026611, |
| "eval_runtime": 4.4395, |
| "eval_samples_per_second": 65.549, |
| "eval_steps_per_second": 4.28, |
| "step": 85490 |
| }, |
| { |
| "epoch": 830.1, |
| "learning_rate": 3.398058252427185e-06, |
| "loss": 0.0752, |
| "step": 85500 |
| }, |
| { |
| "epoch": 831.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.329358100891113, |
| "eval_runtime": 4.4045, |
| "eval_samples_per_second": 66.068, |
| "eval_steps_per_second": 4.314, |
| "step": 85593 |
| }, |
| { |
| "epoch": 831.07, |
| "learning_rate": 3.3786407766990294e-06, |
| "loss": 0.0823, |
| "step": 85600 |
| }, |
| { |
| "epoch": 832.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.416337966918945, |
| "eval_runtime": 4.4167, |
| "eval_samples_per_second": 65.887, |
| "eval_steps_per_second": 4.302, |
| "step": 85696 |
| }, |
| { |
| "epoch": 832.04, |
| "learning_rate": 3.359223300970874e-06, |
| "loss": 0.0803, |
| "step": 85700 |
| }, |
| { |
| "epoch": 833.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.396190643310547, |
| "eval_runtime": 4.4054, |
| "eval_samples_per_second": 66.055, |
| "eval_steps_per_second": 4.313, |
| "step": 85799 |
| }, |
| { |
| "epoch": 833.01, |
| "learning_rate": 3.3398058252427186e-06, |
| "loss": 0.0769, |
| "step": 85800 |
| }, |
| { |
| "epoch": 833.98, |
| "learning_rate": 3.320388349514563e-06, |
| "loss": 0.0792, |
| "step": 85900 |
| }, |
| { |
| "epoch": 834.0, |
| "eval_accuracy": 0.3127147766323024, |
| "eval_loss": 5.394441604614258, |
| "eval_runtime": 4.4439, |
| "eval_samples_per_second": 65.483, |
| "eval_steps_per_second": 4.276, |
| "step": 85902 |
| }, |
| { |
| "epoch": 834.95, |
| "learning_rate": 3.300970873786408e-06, |
| "loss": 0.0701, |
| "step": 86000 |
| }, |
| { |
| "epoch": 835.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.410513877868652, |
| "eval_runtime": 4.415, |
| "eval_samples_per_second": 65.912, |
| "eval_steps_per_second": 4.304, |
| "step": 86005 |
| }, |
| { |
| "epoch": 835.92, |
| "learning_rate": 3.2815533980582528e-06, |
| "loss": 0.0853, |
| "step": 86100 |
| }, |
| { |
| "epoch": 836.0, |
| "eval_accuracy": 0.3161512027491409, |
| "eval_loss": 5.340237140655518, |
| "eval_runtime": 4.4626, |
| "eval_samples_per_second": 65.209, |
| "eval_steps_per_second": 4.258, |
| "step": 86108 |
| }, |
| { |
| "epoch": 836.89, |
| "learning_rate": 3.2621359223300974e-06, |
| "loss": 0.0753, |
| "step": 86200 |
| }, |
| { |
| "epoch": 837.0, |
| "eval_accuracy": 0.31958762886597936, |
| "eval_loss": 5.384557723999023, |
| "eval_runtime": 4.4269, |
| "eval_samples_per_second": 65.734, |
| "eval_steps_per_second": 4.292, |
| "step": 86211 |
| }, |
| { |
| "epoch": 837.86, |
| "learning_rate": 3.242718446601942e-06, |
| "loss": 0.0867, |
| "step": 86300 |
| }, |
| { |
| "epoch": 838.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.402867317199707, |
| "eval_runtime": 4.4242, |
| "eval_samples_per_second": 65.774, |
| "eval_steps_per_second": 4.295, |
| "step": 86314 |
| }, |
| { |
| "epoch": 838.83, |
| "learning_rate": 3.2233009708737865e-06, |
| "loss": 0.0722, |
| "step": 86400 |
| }, |
| { |
| "epoch": 839.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.361295223236084, |
| "eval_runtime": 4.4189, |
| "eval_samples_per_second": 65.854, |
| "eval_steps_per_second": 4.3, |
| "step": 86417 |
| }, |
| { |
| "epoch": 839.81, |
| "learning_rate": 3.2038834951456315e-06, |
| "loss": 0.0686, |
| "step": 86500 |
| }, |
| { |
| "epoch": 840.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.396561622619629, |
| "eval_runtime": 4.4714, |
| "eval_samples_per_second": 65.08, |
| "eval_steps_per_second": 4.249, |
| "step": 86520 |
| }, |
| { |
| "epoch": 840.78, |
| "learning_rate": 3.184466019417476e-06, |
| "loss": 0.0891, |
| "step": 86600 |
| }, |
| { |
| "epoch": 841.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.3979573249816895, |
| "eval_runtime": 4.414, |
| "eval_samples_per_second": 65.927, |
| "eval_steps_per_second": 4.305, |
| "step": 86623 |
| }, |
| { |
| "epoch": 841.75, |
| "learning_rate": 3.1650485436893207e-06, |
| "loss": 0.0826, |
| "step": 86700 |
| }, |
| { |
| "epoch": 842.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.337278366088867, |
| "eval_runtime": 4.4387, |
| "eval_samples_per_second": 65.56, |
| "eval_steps_per_second": 4.281, |
| "step": 86726 |
| }, |
| { |
| "epoch": 842.72, |
| "learning_rate": 3.1456310679611653e-06, |
| "loss": 0.0767, |
| "step": 86800 |
| }, |
| { |
| "epoch": 843.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.402046203613281, |
| "eval_runtime": 4.4158, |
| "eval_samples_per_second": 65.9, |
| "eval_steps_per_second": 4.303, |
| "step": 86829 |
| }, |
| { |
| "epoch": 843.69, |
| "learning_rate": 3.12621359223301e-06, |
| "loss": 0.0816, |
| "step": 86900 |
| }, |
| { |
| "epoch": 844.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.381257057189941, |
| "eval_runtime": 4.4224, |
| "eval_samples_per_second": 65.801, |
| "eval_steps_per_second": 4.296, |
| "step": 86932 |
| }, |
| { |
| "epoch": 844.66, |
| "learning_rate": 3.1067961165048544e-06, |
| "loss": 0.0775, |
| "step": 87000 |
| }, |
| { |
| "epoch": 845.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.396775722503662, |
| "eval_runtime": 4.4202, |
| "eval_samples_per_second": 65.834, |
| "eval_steps_per_second": 4.298, |
| "step": 87035 |
| }, |
| { |
| "epoch": 845.63, |
| "learning_rate": 3.0873786407766995e-06, |
| "loss": 0.0694, |
| "step": 87100 |
| }, |
| { |
| "epoch": 846.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.428651809692383, |
| "eval_runtime": 4.4207, |
| "eval_samples_per_second": 65.827, |
| "eval_steps_per_second": 4.298, |
| "step": 87138 |
| }, |
| { |
| "epoch": 846.6, |
| "learning_rate": 3.067961165048544e-06, |
| "loss": 0.0816, |
| "step": 87200 |
| }, |
| { |
| "epoch": 847.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.442520618438721, |
| "eval_runtime": 4.4107, |
| "eval_samples_per_second": 65.976, |
| "eval_steps_per_second": 4.308, |
| "step": 87241 |
| }, |
| { |
| "epoch": 847.57, |
| "learning_rate": 3.0485436893203886e-06, |
| "loss": 0.0697, |
| "step": 87300 |
| }, |
| { |
| "epoch": 848.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.404880046844482, |
| "eval_runtime": 4.4039, |
| "eval_samples_per_second": 66.078, |
| "eval_steps_per_second": 4.314, |
| "step": 87344 |
| }, |
| { |
| "epoch": 848.54, |
| "learning_rate": 3.029126213592233e-06, |
| "loss": 0.0771, |
| "step": 87400 |
| }, |
| { |
| "epoch": 849.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.404363632202148, |
| "eval_runtime": 4.4102, |
| "eval_samples_per_second": 65.984, |
| "eval_steps_per_second": 4.308, |
| "step": 87447 |
| }, |
| { |
| "epoch": 849.51, |
| "learning_rate": 3.0097087378640778e-06, |
| "loss": 0.0712, |
| "step": 87500 |
| }, |
| { |
| "epoch": 850.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.402904987335205, |
| "eval_runtime": 4.4059, |
| "eval_samples_per_second": 66.048, |
| "eval_steps_per_second": 4.312, |
| "step": 87550 |
| }, |
| { |
| "epoch": 850.49, |
| "learning_rate": 2.9902912621359224e-06, |
| "loss": 0.0806, |
| "step": 87600 |
| }, |
| { |
| "epoch": 851.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.3960394859313965, |
| "eval_runtime": 4.4669, |
| "eval_samples_per_second": 65.146, |
| "eval_steps_per_second": 4.253, |
| "step": 87653 |
| }, |
| { |
| "epoch": 851.46, |
| "learning_rate": 2.9708737864077674e-06, |
| "loss": 0.0766, |
| "step": 87700 |
| }, |
| { |
| "epoch": 852.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.387826919555664, |
| "eval_runtime": 4.4035, |
| "eval_samples_per_second": 66.084, |
| "eval_steps_per_second": 4.315, |
| "step": 87756 |
| }, |
| { |
| "epoch": 852.43, |
| "learning_rate": 2.951456310679612e-06, |
| "loss": 0.074, |
| "step": 87800 |
| }, |
| { |
| "epoch": 853.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.421268463134766, |
| "eval_runtime": 4.4094, |
| "eval_samples_per_second": 65.995, |
| "eval_steps_per_second": 4.309, |
| "step": 87859 |
| }, |
| { |
| "epoch": 853.4, |
| "learning_rate": 2.9320388349514565e-06, |
| "loss": 0.0779, |
| "step": 87900 |
| }, |
| { |
| "epoch": 854.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.402750492095947, |
| "eval_runtime": 4.4054, |
| "eval_samples_per_second": 66.055, |
| "eval_steps_per_second": 4.313, |
| "step": 87962 |
| }, |
| { |
| "epoch": 854.37, |
| "learning_rate": 2.912621359223301e-06, |
| "loss": 0.084, |
| "step": 88000 |
| }, |
| { |
| "epoch": 855.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.471996784210205, |
| "eval_runtime": 4.4141, |
| "eval_samples_per_second": 65.925, |
| "eval_steps_per_second": 4.304, |
| "step": 88065 |
| }, |
| { |
| "epoch": 855.34, |
| "learning_rate": 2.8932038834951457e-06, |
| "loss": 0.0757, |
| "step": 88100 |
| }, |
| { |
| "epoch": 856.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.4470367431640625, |
| "eval_runtime": 4.4149, |
| "eval_samples_per_second": 65.913, |
| "eval_steps_per_second": 4.304, |
| "step": 88168 |
| }, |
| { |
| "epoch": 856.31, |
| "learning_rate": 2.8737864077669903e-06, |
| "loss": 0.0763, |
| "step": 88200 |
| }, |
| { |
| "epoch": 857.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.443137168884277, |
| "eval_runtime": 4.4241, |
| "eval_samples_per_second": 65.776, |
| "eval_steps_per_second": 4.295, |
| "step": 88271 |
| }, |
| { |
| "epoch": 857.28, |
| "learning_rate": 2.8543689320388353e-06, |
| "loss": 0.0816, |
| "step": 88300 |
| }, |
| { |
| "epoch": 858.0, |
| "eval_accuracy": 0.27491408934707906, |
| "eval_loss": 5.41270112991333, |
| "eval_runtime": 4.4182, |
| "eval_samples_per_second": 65.864, |
| "eval_steps_per_second": 4.3, |
| "step": 88374 |
| }, |
| { |
| "epoch": 858.25, |
| "learning_rate": 2.83495145631068e-06, |
| "loss": 0.0761, |
| "step": 88400 |
| }, |
| { |
| "epoch": 859.0, |
| "eval_accuracy": 0.2646048109965636, |
| "eval_loss": 5.420130729675293, |
| "eval_runtime": 4.4118, |
| "eval_samples_per_second": 65.959, |
| "eval_steps_per_second": 4.307, |
| "step": 88477 |
| }, |
| { |
| "epoch": 859.22, |
| "learning_rate": 2.8155339805825245e-06, |
| "loss": 0.093, |
| "step": 88500 |
| }, |
| { |
| "epoch": 860.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.346418380737305, |
| "eval_runtime": 4.41, |
| "eval_samples_per_second": 65.987, |
| "eval_steps_per_second": 4.308, |
| "step": 88580 |
| }, |
| { |
| "epoch": 860.19, |
| "learning_rate": 2.796116504854369e-06, |
| "loss": 0.0729, |
| "step": 88600 |
| }, |
| { |
| "epoch": 861.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.3696441650390625, |
| "eval_runtime": 4.459, |
| "eval_samples_per_second": 65.262, |
| "eval_steps_per_second": 4.261, |
| "step": 88683 |
| }, |
| { |
| "epoch": 861.17, |
| "learning_rate": 2.7766990291262136e-06, |
| "loss": 0.0792, |
| "step": 88700 |
| }, |
| { |
| "epoch": 862.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.340893268585205, |
| "eval_runtime": 4.4118, |
| "eval_samples_per_second": 65.959, |
| "eval_steps_per_second": 4.307, |
| "step": 88786 |
| }, |
| { |
| "epoch": 862.14, |
| "learning_rate": 2.7572815533980586e-06, |
| "loss": 0.0742, |
| "step": 88800 |
| }, |
| { |
| "epoch": 863.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.372981071472168, |
| "eval_runtime": 4.4766, |
| "eval_samples_per_second": 65.004, |
| "eval_steps_per_second": 4.244, |
| "step": 88889 |
| }, |
| { |
| "epoch": 863.11, |
| "learning_rate": 2.7378640776699032e-06, |
| "loss": 0.0795, |
| "step": 88900 |
| }, |
| { |
| "epoch": 864.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.429410457611084, |
| "eval_runtime": 4.4651, |
| "eval_samples_per_second": 65.171, |
| "eval_steps_per_second": 4.255, |
| "step": 88992 |
| }, |
| { |
| "epoch": 864.08, |
| "learning_rate": 2.718446601941748e-06, |
| "loss": 0.0701, |
| "step": 89000 |
| }, |
| { |
| "epoch": 865.0, |
| "eval_accuracy": 0.27147766323024053, |
| "eval_loss": 5.41763973236084, |
| "eval_runtime": 4.4618, |
| "eval_samples_per_second": 65.221, |
| "eval_steps_per_second": 4.258, |
| "step": 89095 |
| }, |
| { |
| "epoch": 865.05, |
| "learning_rate": 2.6990291262135924e-06, |
| "loss": 0.087, |
| "step": 89100 |
| }, |
| { |
| "epoch": 866.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.433925151824951, |
| "eval_runtime": 4.4181, |
| "eval_samples_per_second": 65.866, |
| "eval_steps_per_second": 4.301, |
| "step": 89198 |
| }, |
| { |
| "epoch": 866.02, |
| "learning_rate": 2.679611650485437e-06, |
| "loss": 0.0749, |
| "step": 89200 |
| }, |
| { |
| "epoch": 866.99, |
| "learning_rate": 2.660194174757282e-06, |
| "loss": 0.0775, |
| "step": 89300 |
| }, |
| { |
| "epoch": 867.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.466894626617432, |
| "eval_runtime": 4.397, |
| "eval_samples_per_second": 66.182, |
| "eval_steps_per_second": 4.321, |
| "step": 89301 |
| }, |
| { |
| "epoch": 867.96, |
| "learning_rate": 2.6407766990291266e-06, |
| "loss": 0.0764, |
| "step": 89400 |
| }, |
| { |
| "epoch": 868.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.4773712158203125, |
| "eval_runtime": 4.4111, |
| "eval_samples_per_second": 65.971, |
| "eval_steps_per_second": 4.307, |
| "step": 89404 |
| }, |
| { |
| "epoch": 868.93, |
| "learning_rate": 2.621359223300971e-06, |
| "loss": 0.0827, |
| "step": 89500 |
| }, |
| { |
| "epoch": 869.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.422665119171143, |
| "eval_runtime": 4.4281, |
| "eval_samples_per_second": 65.717, |
| "eval_steps_per_second": 4.291, |
| "step": 89507 |
| }, |
| { |
| "epoch": 869.9, |
| "learning_rate": 2.6019417475728157e-06, |
| "loss": 0.0757, |
| "step": 89600 |
| }, |
| { |
| "epoch": 870.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.422026634216309, |
| "eval_runtime": 4.4452, |
| "eval_samples_per_second": 65.464, |
| "eval_steps_per_second": 4.274, |
| "step": 89610 |
| }, |
| { |
| "epoch": 870.87, |
| "learning_rate": 2.5825242718446603e-06, |
| "loss": 0.0761, |
| "step": 89700 |
| }, |
| { |
| "epoch": 871.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.395354747772217, |
| "eval_runtime": 4.5037, |
| "eval_samples_per_second": 64.614, |
| "eval_steps_per_second": 4.219, |
| "step": 89713 |
| }, |
| { |
| "epoch": 871.84, |
| "learning_rate": 2.5631067961165053e-06, |
| "loss": 0.0777, |
| "step": 89800 |
| }, |
| { |
| "epoch": 872.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.385969638824463, |
| "eval_runtime": 4.4447, |
| "eval_samples_per_second": 65.471, |
| "eval_steps_per_second": 4.275, |
| "step": 89816 |
| }, |
| { |
| "epoch": 872.82, |
| "learning_rate": 2.5436893203883495e-06, |
| "loss": 0.0737, |
| "step": 89900 |
| }, |
| { |
| "epoch": 873.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.362537860870361, |
| "eval_runtime": 4.4093, |
| "eval_samples_per_second": 65.997, |
| "eval_steps_per_second": 4.309, |
| "step": 89919 |
| }, |
| { |
| "epoch": 873.79, |
| "learning_rate": 2.5242718446601945e-06, |
| "loss": 0.0777, |
| "step": 90000 |
| }, |
| { |
| "epoch": 874.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.413704872131348, |
| "eval_runtime": 4.4462, |
| "eval_samples_per_second": 65.45, |
| "eval_steps_per_second": 4.273, |
| "step": 90022 |
| }, |
| { |
| "epoch": 874.76, |
| "learning_rate": 2.504854368932039e-06, |
| "loss": 0.0758, |
| "step": 90100 |
| }, |
| { |
| "epoch": 875.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.415232181549072, |
| "eval_runtime": 4.408, |
| "eval_samples_per_second": 66.016, |
| "eval_steps_per_second": 4.31, |
| "step": 90125 |
| }, |
| { |
| "epoch": 875.73, |
| "learning_rate": 2.4854368932038836e-06, |
| "loss": 0.0764, |
| "step": 90200 |
| }, |
| { |
| "epoch": 876.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.38121223449707, |
| "eval_runtime": 4.4095, |
| "eval_samples_per_second": 65.993, |
| "eval_steps_per_second": 4.309, |
| "step": 90228 |
| }, |
| { |
| "epoch": 876.7, |
| "learning_rate": 2.4660194174757286e-06, |
| "loss": 0.087, |
| "step": 90300 |
| }, |
| { |
| "epoch": 877.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.375702857971191, |
| "eval_runtime": 4.471, |
| "eval_samples_per_second": 65.086, |
| "eval_steps_per_second": 4.25, |
| "step": 90331 |
| }, |
| { |
| "epoch": 877.67, |
| "learning_rate": 2.4466019417475732e-06, |
| "loss": 0.0705, |
| "step": 90400 |
| }, |
| { |
| "epoch": 878.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.39950704574585, |
| "eval_runtime": 4.3946, |
| "eval_samples_per_second": 66.218, |
| "eval_steps_per_second": 4.323, |
| "step": 90434 |
| }, |
| { |
| "epoch": 878.64, |
| "learning_rate": 2.427184466019418e-06, |
| "loss": 0.0831, |
| "step": 90500 |
| }, |
| { |
| "epoch": 879.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.375514984130859, |
| "eval_runtime": 4.405, |
| "eval_samples_per_second": 66.062, |
| "eval_steps_per_second": 4.313, |
| "step": 90537 |
| }, |
| { |
| "epoch": 879.61, |
| "learning_rate": 2.4077669902912624e-06, |
| "loss": 0.0692, |
| "step": 90600 |
| }, |
| { |
| "epoch": 880.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.384296417236328, |
| "eval_runtime": 4.5292, |
| "eval_samples_per_second": 64.249, |
| "eval_steps_per_second": 4.195, |
| "step": 90640 |
| }, |
| { |
| "epoch": 880.58, |
| "learning_rate": 2.388349514563107e-06, |
| "loss": 0.0752, |
| "step": 90700 |
| }, |
| { |
| "epoch": 881.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.397815704345703, |
| "eval_runtime": 4.4681, |
| "eval_samples_per_second": 65.128, |
| "eval_steps_per_second": 4.252, |
| "step": 90743 |
| }, |
| { |
| "epoch": 881.55, |
| "learning_rate": 2.3689320388349516e-06, |
| "loss": 0.0732, |
| "step": 90800 |
| }, |
| { |
| "epoch": 882.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.387296676635742, |
| "eval_runtime": 4.3718, |
| "eval_samples_per_second": 66.564, |
| "eval_steps_per_second": 4.346, |
| "step": 90846 |
| }, |
| { |
| "epoch": 882.52, |
| "learning_rate": 2.3495145631067966e-06, |
| "loss": 0.0836, |
| "step": 90900 |
| }, |
| { |
| "epoch": 883.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.39614725112915, |
| "eval_runtime": 4.3772, |
| "eval_samples_per_second": 66.481, |
| "eval_steps_per_second": 4.341, |
| "step": 90949 |
| }, |
| { |
| "epoch": 883.5, |
| "learning_rate": 2.330097087378641e-06, |
| "loss": 0.0761, |
| "step": 91000 |
| }, |
| { |
| "epoch": 884.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.415928363800049, |
| "eval_runtime": 4.4665, |
| "eval_samples_per_second": 65.152, |
| "eval_steps_per_second": 4.254, |
| "step": 91052 |
| }, |
| { |
| "epoch": 884.47, |
| "learning_rate": 2.3106796116504857e-06, |
| "loss": 0.082, |
| "step": 91100 |
| }, |
| { |
| "epoch": 885.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.418325424194336, |
| "eval_runtime": 4.4755, |
| "eval_samples_per_second": 65.021, |
| "eval_steps_per_second": 4.245, |
| "step": 91155 |
| }, |
| { |
| "epoch": 885.44, |
| "learning_rate": 2.2912621359223303e-06, |
| "loss": 0.0729, |
| "step": 91200 |
| }, |
| { |
| "epoch": 886.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.443818092346191, |
| "eval_runtime": 4.3458, |
| "eval_samples_per_second": 66.961, |
| "eval_steps_per_second": 4.372, |
| "step": 91258 |
| }, |
| { |
| "epoch": 886.41, |
| "learning_rate": 2.271844660194175e-06, |
| "loss": 0.0908, |
| "step": 91300 |
| }, |
| { |
| "epoch": 887.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.458770275115967, |
| "eval_runtime": 4.3756, |
| "eval_samples_per_second": 66.505, |
| "eval_steps_per_second": 4.342, |
| "step": 91361 |
| }, |
| { |
| "epoch": 887.38, |
| "learning_rate": 2.2524271844660195e-06, |
| "loss": 0.0677, |
| "step": 91400 |
| }, |
| { |
| "epoch": 888.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.484026908874512, |
| "eval_runtime": 4.4708, |
| "eval_samples_per_second": 65.089, |
| "eval_steps_per_second": 4.25, |
| "step": 91464 |
| }, |
| { |
| "epoch": 888.35, |
| "learning_rate": 2.2330097087378645e-06, |
| "loss": 0.0821, |
| "step": 91500 |
| }, |
| { |
| "epoch": 889.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.466355323791504, |
| "eval_runtime": 4.3868, |
| "eval_samples_per_second": 66.335, |
| "eval_steps_per_second": 4.331, |
| "step": 91567 |
| }, |
| { |
| "epoch": 889.32, |
| "learning_rate": 2.213592233009709e-06, |
| "loss": 0.0812, |
| "step": 91600 |
| }, |
| { |
| "epoch": 890.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.5019025802612305, |
| "eval_runtime": 4.3644, |
| "eval_samples_per_second": 66.676, |
| "eval_steps_per_second": 4.353, |
| "step": 91670 |
| }, |
| { |
| "epoch": 890.29, |
| "learning_rate": 2.1941747572815537e-06, |
| "loss": 0.0849, |
| "step": 91700 |
| }, |
| { |
| "epoch": 891.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.478269577026367, |
| "eval_runtime": 4.3779, |
| "eval_samples_per_second": 66.47, |
| "eval_steps_per_second": 4.34, |
| "step": 91773 |
| }, |
| { |
| "epoch": 891.26, |
| "learning_rate": 2.1747572815533982e-06, |
| "loss": 0.079, |
| "step": 91800 |
| }, |
| { |
| "epoch": 892.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.493340969085693, |
| "eval_runtime": 4.3925, |
| "eval_samples_per_second": 66.249, |
| "eval_steps_per_second": 4.326, |
| "step": 91876 |
| }, |
| { |
| "epoch": 892.23, |
| "learning_rate": 2.155339805825243e-06, |
| "loss": 0.0703, |
| "step": 91900 |
| }, |
| { |
| "epoch": 893.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.519100189208984, |
| "eval_runtime": 4.441, |
| "eval_samples_per_second": 65.526, |
| "eval_steps_per_second": 4.278, |
| "step": 91979 |
| }, |
| { |
| "epoch": 893.2, |
| "learning_rate": 2.1359223300970874e-06, |
| "loss": 0.0777, |
| "step": 92000 |
| }, |
| { |
| "epoch": 894.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.517086982727051, |
| "eval_runtime": 4.5323, |
| "eval_samples_per_second": 64.206, |
| "eval_steps_per_second": 4.192, |
| "step": 92082 |
| }, |
| { |
| "epoch": 894.17, |
| "learning_rate": 2.1165048543689324e-06, |
| "loss": 0.0767, |
| "step": 92100 |
| }, |
| { |
| "epoch": 895.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.527967929840088, |
| "eval_runtime": 4.4907, |
| "eval_samples_per_second": 64.8, |
| "eval_steps_per_second": 4.231, |
| "step": 92185 |
| }, |
| { |
| "epoch": 895.15, |
| "learning_rate": 2.097087378640777e-06, |
| "loss": 0.0697, |
| "step": 92200 |
| }, |
| { |
| "epoch": 896.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.491966724395752, |
| "eval_runtime": 4.3722, |
| "eval_samples_per_second": 66.557, |
| "eval_steps_per_second": 4.346, |
| "step": 92288 |
| }, |
| { |
| "epoch": 896.12, |
| "learning_rate": 2.0776699029126216e-06, |
| "loss": 0.0831, |
| "step": 92300 |
| }, |
| { |
| "epoch": 897.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.458693981170654, |
| "eval_runtime": 4.3596, |
| "eval_samples_per_second": 66.75, |
| "eval_steps_per_second": 4.358, |
| "step": 92391 |
| }, |
| { |
| "epoch": 897.09, |
| "learning_rate": 2.058252427184466e-06, |
| "loss": 0.0715, |
| "step": 92400 |
| }, |
| { |
| "epoch": 898.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.484317779541016, |
| "eval_runtime": 4.3692, |
| "eval_samples_per_second": 66.602, |
| "eval_steps_per_second": 4.349, |
| "step": 92494 |
| }, |
| { |
| "epoch": 898.06, |
| "learning_rate": 2.0388349514563107e-06, |
| "loss": 0.0764, |
| "step": 92500 |
| }, |
| { |
| "epoch": 899.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.503616809844971, |
| "eval_runtime": 4.3713, |
| "eval_samples_per_second": 66.57, |
| "eval_steps_per_second": 4.347, |
| "step": 92597 |
| }, |
| { |
| "epoch": 899.03, |
| "learning_rate": 2.0194174757281553e-06, |
| "loss": 0.074, |
| "step": 92600 |
| }, |
| { |
| "epoch": 900.0, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0785, |
| "step": 92700 |
| }, |
| { |
| "epoch": 900.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4780683517456055, |
| "eval_runtime": 4.3991, |
| "eval_samples_per_second": 66.149, |
| "eval_steps_per_second": 4.319, |
| "step": 92700 |
| }, |
| { |
| "epoch": 900.97, |
| "learning_rate": 1.980582524271845e-06, |
| "loss": 0.0783, |
| "step": 92800 |
| }, |
| { |
| "epoch": 901.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.468466281890869, |
| "eval_runtime": 4.4156, |
| "eval_samples_per_second": 65.902, |
| "eval_steps_per_second": 4.303, |
| "step": 92803 |
| }, |
| { |
| "epoch": 901.94, |
| "learning_rate": 1.9611650485436895e-06, |
| "loss": 0.0791, |
| "step": 92900 |
| }, |
| { |
| "epoch": 902.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.443396091461182, |
| "eval_runtime": 4.3883, |
| "eval_samples_per_second": 66.313, |
| "eval_steps_per_second": 4.33, |
| "step": 92906 |
| }, |
| { |
| "epoch": 902.91, |
| "learning_rate": 1.941747572815534e-06, |
| "loss": 0.0714, |
| "step": 93000 |
| }, |
| { |
| "epoch": 903.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.470444679260254, |
| "eval_runtime": 4.3876, |
| "eval_samples_per_second": 66.324, |
| "eval_steps_per_second": 4.33, |
| "step": 93009 |
| }, |
| { |
| "epoch": 903.88, |
| "learning_rate": 1.9223300970873787e-06, |
| "loss": 0.0834, |
| "step": 93100 |
| }, |
| { |
| "epoch": 904.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.4543023109436035, |
| "eval_runtime": 4.3556, |
| "eval_samples_per_second": 66.811, |
| "eval_steps_per_second": 4.362, |
| "step": 93112 |
| }, |
| { |
| "epoch": 904.85, |
| "learning_rate": 1.9029126213592232e-06, |
| "loss": 0.0796, |
| "step": 93200 |
| }, |
| { |
| "epoch": 905.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.442955017089844, |
| "eval_runtime": 4.3847, |
| "eval_samples_per_second": 66.367, |
| "eval_steps_per_second": 4.333, |
| "step": 93215 |
| }, |
| { |
| "epoch": 905.83, |
| "learning_rate": 1.8834951456310683e-06, |
| "loss": 0.0741, |
| "step": 93300 |
| }, |
| { |
| "epoch": 906.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.462120532989502, |
| "eval_runtime": 4.5039, |
| "eval_samples_per_second": 64.611, |
| "eval_steps_per_second": 4.219, |
| "step": 93318 |
| }, |
| { |
| "epoch": 906.8, |
| "learning_rate": 1.8640776699029128e-06, |
| "loss": 0.0752, |
| "step": 93400 |
| }, |
| { |
| "epoch": 907.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.449808597564697, |
| "eval_runtime": 4.3965, |
| "eval_samples_per_second": 66.19, |
| "eval_steps_per_second": 4.322, |
| "step": 93421 |
| }, |
| { |
| "epoch": 907.77, |
| "learning_rate": 1.8446601941747574e-06, |
| "loss": 0.0776, |
| "step": 93500 |
| }, |
| { |
| "epoch": 908.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.455343246459961, |
| "eval_runtime": 4.4319, |
| "eval_samples_per_second": 65.66, |
| "eval_steps_per_second": 4.287, |
| "step": 93524 |
| }, |
| { |
| "epoch": 908.74, |
| "learning_rate": 1.825242718446602e-06, |
| "loss": 0.0795, |
| "step": 93600 |
| }, |
| { |
| "epoch": 909.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.415092945098877, |
| "eval_runtime": 4.403, |
| "eval_samples_per_second": 66.091, |
| "eval_steps_per_second": 4.315, |
| "step": 93627 |
| }, |
| { |
| "epoch": 909.71, |
| "learning_rate": 1.8058252427184466e-06, |
| "loss": 0.0771, |
| "step": 93700 |
| }, |
| { |
| "epoch": 910.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.396514415740967, |
| "eval_runtime": 4.4896, |
| "eval_samples_per_second": 64.816, |
| "eval_steps_per_second": 4.232, |
| "step": 93730 |
| }, |
| { |
| "epoch": 910.68, |
| "learning_rate": 1.7864077669902914e-06, |
| "loss": 0.0756, |
| "step": 93800 |
| }, |
| { |
| "epoch": 911.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.412069797515869, |
| "eval_runtime": 4.4395, |
| "eval_samples_per_second": 65.548, |
| "eval_steps_per_second": 4.28, |
| "step": 93833 |
| }, |
| { |
| "epoch": 911.65, |
| "learning_rate": 1.7669902912621362e-06, |
| "loss": 0.0769, |
| "step": 93900 |
| }, |
| { |
| "epoch": 912.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.40557861328125, |
| "eval_runtime": 4.441, |
| "eval_samples_per_second": 65.526, |
| "eval_steps_per_second": 4.278, |
| "step": 93936 |
| }, |
| { |
| "epoch": 912.62, |
| "learning_rate": 1.7475728155339808e-06, |
| "loss": 0.0799, |
| "step": 94000 |
| }, |
| { |
| "epoch": 913.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.387575149536133, |
| "eval_runtime": 4.3533, |
| "eval_samples_per_second": 66.845, |
| "eval_steps_per_second": 4.364, |
| "step": 94039 |
| }, |
| { |
| "epoch": 913.59, |
| "learning_rate": 1.7281553398058253e-06, |
| "loss": 0.0853, |
| "step": 94100 |
| }, |
| { |
| "epoch": 914.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.4021782875061035, |
| "eval_runtime": 4.4157, |
| "eval_samples_per_second": 65.901, |
| "eval_steps_per_second": 4.303, |
| "step": 94142 |
| }, |
| { |
| "epoch": 914.56, |
| "learning_rate": 1.70873786407767e-06, |
| "loss": 0.0726, |
| "step": 94200 |
| }, |
| { |
| "epoch": 915.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.43842887878418, |
| "eval_runtime": 4.4744, |
| "eval_samples_per_second": 65.037, |
| "eval_steps_per_second": 4.246, |
| "step": 94245 |
| }, |
| { |
| "epoch": 915.53, |
| "learning_rate": 1.6893203883495147e-06, |
| "loss": 0.0745, |
| "step": 94300 |
| }, |
| { |
| "epoch": 916.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.422259330749512, |
| "eval_runtime": 4.3587, |
| "eval_samples_per_second": 66.763, |
| "eval_steps_per_second": 4.359, |
| "step": 94348 |
| }, |
| { |
| "epoch": 916.5, |
| "learning_rate": 1.6699029126213593e-06, |
| "loss": 0.0688, |
| "step": 94400 |
| }, |
| { |
| "epoch": 917.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.429776191711426, |
| "eval_runtime": 4.3673, |
| "eval_samples_per_second": 66.632, |
| "eval_steps_per_second": 4.351, |
| "step": 94451 |
| }, |
| { |
| "epoch": 917.48, |
| "learning_rate": 1.650485436893204e-06, |
| "loss": 0.0743, |
| "step": 94500 |
| }, |
| { |
| "epoch": 918.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.422666072845459, |
| "eval_runtime": 4.3668, |
| "eval_samples_per_second": 66.639, |
| "eval_steps_per_second": 4.351, |
| "step": 94554 |
| }, |
| { |
| "epoch": 918.45, |
| "learning_rate": 1.6310679611650487e-06, |
| "loss": 0.0842, |
| "step": 94600 |
| }, |
| { |
| "epoch": 919.0, |
| "eval_accuracy": 0.30927835051546393, |
| "eval_loss": 5.38067626953125, |
| "eval_runtime": 4.3401, |
| "eval_samples_per_second": 67.049, |
| "eval_steps_per_second": 4.378, |
| "step": 94657 |
| }, |
| { |
| "epoch": 919.42, |
| "learning_rate": 1.6116504854368933e-06, |
| "loss": 0.0732, |
| "step": 94700 |
| }, |
| { |
| "epoch": 920.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.388149261474609, |
| "eval_runtime": 4.3924, |
| "eval_samples_per_second": 66.25, |
| "eval_steps_per_second": 4.326, |
| "step": 94760 |
| }, |
| { |
| "epoch": 920.39, |
| "learning_rate": 1.592233009708738e-06, |
| "loss": 0.0717, |
| "step": 94800 |
| }, |
| { |
| "epoch": 921.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.3828325271606445, |
| "eval_runtime": 4.4097, |
| "eval_samples_per_second": 65.991, |
| "eval_steps_per_second": 4.309, |
| "step": 94863 |
| }, |
| { |
| "epoch": 921.36, |
| "learning_rate": 1.5728155339805826e-06, |
| "loss": 0.084, |
| "step": 94900 |
| }, |
| { |
| "epoch": 922.0, |
| "eval_accuracy": 0.3024054982817869, |
| "eval_loss": 5.377039432525635, |
| "eval_runtime": 4.3946, |
| "eval_samples_per_second": 66.217, |
| "eval_steps_per_second": 4.323, |
| "step": 94966 |
| }, |
| { |
| "epoch": 922.33, |
| "learning_rate": 1.5533980582524272e-06, |
| "loss": 0.079, |
| "step": 95000 |
| }, |
| { |
| "epoch": 923.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.387304782867432, |
| "eval_runtime": 4.4138, |
| "eval_samples_per_second": 65.93, |
| "eval_steps_per_second": 4.305, |
| "step": 95069 |
| }, |
| { |
| "epoch": 923.3, |
| "learning_rate": 1.533980582524272e-06, |
| "loss": 0.0761, |
| "step": 95100 |
| }, |
| { |
| "epoch": 924.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.378848552703857, |
| "eval_runtime": 4.4188, |
| "eval_samples_per_second": 65.855, |
| "eval_steps_per_second": 4.3, |
| "step": 95172 |
| }, |
| { |
| "epoch": 924.27, |
| "learning_rate": 1.5145631067961166e-06, |
| "loss": 0.0777, |
| "step": 95200 |
| }, |
| { |
| "epoch": 925.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.393227577209473, |
| "eval_runtime": 4.3963, |
| "eval_samples_per_second": 66.193, |
| "eval_steps_per_second": 4.322, |
| "step": 95275 |
| }, |
| { |
| "epoch": 925.24, |
| "learning_rate": 1.4951456310679612e-06, |
| "loss": 0.0729, |
| "step": 95300 |
| }, |
| { |
| "epoch": 926.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.435247898101807, |
| "eval_runtime": 4.4184, |
| "eval_samples_per_second": 65.861, |
| "eval_steps_per_second": 4.3, |
| "step": 95378 |
| }, |
| { |
| "epoch": 926.21, |
| "learning_rate": 1.475728155339806e-06, |
| "loss": 0.0756, |
| "step": 95400 |
| }, |
| { |
| "epoch": 927.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.427146911621094, |
| "eval_runtime": 4.4068, |
| "eval_samples_per_second": 66.034, |
| "eval_steps_per_second": 4.311, |
| "step": 95481 |
| }, |
| { |
| "epoch": 927.18, |
| "learning_rate": 1.4563106796116506e-06, |
| "loss": 0.0699, |
| "step": 95500 |
| }, |
| { |
| "epoch": 928.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.408605098724365, |
| "eval_runtime": 4.4391, |
| "eval_samples_per_second": 65.554, |
| "eval_steps_per_second": 4.28, |
| "step": 95584 |
| }, |
| { |
| "epoch": 928.16, |
| "learning_rate": 1.4368932038834951e-06, |
| "loss": 0.0814, |
| "step": 95600 |
| }, |
| { |
| "epoch": 929.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.421037197113037, |
| "eval_runtime": 4.421, |
| "eval_samples_per_second": 65.822, |
| "eval_steps_per_second": 4.298, |
| "step": 95687 |
| }, |
| { |
| "epoch": 929.13, |
| "learning_rate": 1.41747572815534e-06, |
| "loss": 0.07, |
| "step": 95700 |
| }, |
| { |
| "epoch": 930.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.417635440826416, |
| "eval_runtime": 4.4051, |
| "eval_samples_per_second": 66.059, |
| "eval_steps_per_second": 4.313, |
| "step": 95790 |
| }, |
| { |
| "epoch": 930.1, |
| "learning_rate": 1.3980582524271845e-06, |
| "loss": 0.0736, |
| "step": 95800 |
| }, |
| { |
| "epoch": 931.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.434685230255127, |
| "eval_runtime": 4.4155, |
| "eval_samples_per_second": 65.904, |
| "eval_steps_per_second": 4.303, |
| "step": 95893 |
| }, |
| { |
| "epoch": 931.07, |
| "learning_rate": 1.3786407766990293e-06, |
| "loss": 0.0694, |
| "step": 95900 |
| }, |
| { |
| "epoch": 932.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.436407089233398, |
| "eval_runtime": 4.402, |
| "eval_samples_per_second": 66.106, |
| "eval_steps_per_second": 4.316, |
| "step": 95996 |
| }, |
| { |
| "epoch": 932.04, |
| "learning_rate": 1.359223300970874e-06, |
| "loss": 0.0771, |
| "step": 96000 |
| }, |
| { |
| "epoch": 933.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.446750640869141, |
| "eval_runtime": 4.442, |
| "eval_samples_per_second": 65.511, |
| "eval_steps_per_second": 4.277, |
| "step": 96099 |
| }, |
| { |
| "epoch": 933.01, |
| "learning_rate": 1.3398058252427185e-06, |
| "loss": 0.0701, |
| "step": 96100 |
| }, |
| { |
| "epoch": 933.98, |
| "learning_rate": 1.3203883495145633e-06, |
| "loss": 0.0718, |
| "step": 96200 |
| }, |
| { |
| "epoch": 934.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.452322959899902, |
| "eval_runtime": 4.4596, |
| "eval_samples_per_second": 65.253, |
| "eval_steps_per_second": 4.26, |
| "step": 96202 |
| }, |
| { |
| "epoch": 934.95, |
| "learning_rate": 1.3009708737864079e-06, |
| "loss": 0.0784, |
| "step": 96300 |
| }, |
| { |
| "epoch": 935.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.4216132164001465, |
| "eval_runtime": 4.3711, |
| "eval_samples_per_second": 66.574, |
| "eval_steps_per_second": 4.347, |
| "step": 96305 |
| }, |
| { |
| "epoch": 935.92, |
| "learning_rate": 1.2815533980582527e-06, |
| "loss": 0.087, |
| "step": 96400 |
| }, |
| { |
| "epoch": 936.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.415948390960693, |
| "eval_runtime": 4.3669, |
| "eval_samples_per_second": 66.637, |
| "eval_steps_per_second": 4.351, |
| "step": 96408 |
| }, |
| { |
| "epoch": 936.89, |
| "learning_rate": 1.2621359223300972e-06, |
| "loss": 0.0717, |
| "step": 96500 |
| }, |
| { |
| "epoch": 937.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.422750949859619, |
| "eval_runtime": 4.4213, |
| "eval_samples_per_second": 65.818, |
| "eval_steps_per_second": 4.297, |
| "step": 96511 |
| }, |
| { |
| "epoch": 937.86, |
| "learning_rate": 1.2427184466019418e-06, |
| "loss": 0.0714, |
| "step": 96600 |
| }, |
| { |
| "epoch": 938.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.401679992675781, |
| "eval_runtime": 4.3842, |
| "eval_samples_per_second": 66.375, |
| "eval_steps_per_second": 4.334, |
| "step": 96614 |
| }, |
| { |
| "epoch": 938.83, |
| "learning_rate": 1.2233009708737866e-06, |
| "loss": 0.0754, |
| "step": 96700 |
| }, |
| { |
| "epoch": 939.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.4021172523498535, |
| "eval_runtime": 4.359, |
| "eval_samples_per_second": 66.759, |
| "eval_steps_per_second": 4.359, |
| "step": 96717 |
| }, |
| { |
| "epoch": 939.81, |
| "learning_rate": 1.2038834951456312e-06, |
| "loss": 0.0733, |
| "step": 96800 |
| }, |
| { |
| "epoch": 940.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.395828723907471, |
| "eval_runtime": 4.4108, |
| "eval_samples_per_second": 65.974, |
| "eval_steps_per_second": 4.308, |
| "step": 96820 |
| }, |
| { |
| "epoch": 940.78, |
| "learning_rate": 1.1844660194174758e-06, |
| "loss": 0.0697, |
| "step": 96900 |
| }, |
| { |
| "epoch": 941.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.385928630828857, |
| "eval_runtime": 4.4078, |
| "eval_samples_per_second": 66.019, |
| "eval_steps_per_second": 4.31, |
| "step": 96923 |
| }, |
| { |
| "epoch": 941.75, |
| "learning_rate": 1.1650485436893206e-06, |
| "loss": 0.082, |
| "step": 97000 |
| }, |
| { |
| "epoch": 942.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.371447563171387, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 97026 |
| }, |
| { |
| "epoch": 942.72, |
| "learning_rate": 1.1456310679611652e-06, |
| "loss": 0.0696, |
| "step": 97100 |
| }, |
| { |
| "epoch": 943.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.369715690612793, |
| "eval_runtime": 4.4234, |
| "eval_samples_per_second": 65.786, |
| "eval_steps_per_second": 4.295, |
| "step": 97129 |
| }, |
| { |
| "epoch": 943.69, |
| "learning_rate": 1.1262135922330097e-06, |
| "loss": 0.0719, |
| "step": 97200 |
| }, |
| { |
| "epoch": 944.0, |
| "eval_accuracy": 0.27835051546391754, |
| "eval_loss": 5.396899700164795, |
| "eval_runtime": 4.3747, |
| "eval_samples_per_second": 66.519, |
| "eval_steps_per_second": 4.343, |
| "step": 97232 |
| }, |
| { |
| "epoch": 944.66, |
| "learning_rate": 1.1067961165048545e-06, |
| "loss": 0.0772, |
| "step": 97300 |
| }, |
| { |
| "epoch": 945.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.395821571350098, |
| "eval_runtime": 4.5669, |
| "eval_samples_per_second": 63.72, |
| "eval_steps_per_second": 4.16, |
| "step": 97335 |
| }, |
| { |
| "epoch": 945.63, |
| "learning_rate": 1.0873786407766991e-06, |
| "loss": 0.0759, |
| "step": 97400 |
| }, |
| { |
| "epoch": 946.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.41284704208374, |
| "eval_runtime": 4.4196, |
| "eval_samples_per_second": 65.843, |
| "eval_steps_per_second": 4.299, |
| "step": 97438 |
| }, |
| { |
| "epoch": 946.6, |
| "learning_rate": 1.0679611650485437e-06, |
| "loss": 0.074, |
| "step": 97500 |
| }, |
| { |
| "epoch": 947.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.428328514099121, |
| "eval_runtime": 4.4673, |
| "eval_samples_per_second": 65.139, |
| "eval_steps_per_second": 4.253, |
| "step": 97541 |
| }, |
| { |
| "epoch": 947.57, |
| "learning_rate": 1.0485436893203885e-06, |
| "loss": 0.0704, |
| "step": 97600 |
| }, |
| { |
| "epoch": 948.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.430525779724121, |
| "eval_runtime": 4.4255, |
| "eval_samples_per_second": 65.755, |
| "eval_steps_per_second": 4.293, |
| "step": 97644 |
| }, |
| { |
| "epoch": 948.54, |
| "learning_rate": 1.029126213592233e-06, |
| "loss": 0.069, |
| "step": 97700 |
| }, |
| { |
| "epoch": 949.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.429955005645752, |
| "eval_runtime": 4.3939, |
| "eval_samples_per_second": 66.228, |
| "eval_steps_per_second": 4.324, |
| "step": 97747 |
| }, |
| { |
| "epoch": 949.51, |
| "learning_rate": 1.0097087378640777e-06, |
| "loss": 0.0701, |
| "step": 97800 |
| }, |
| { |
| "epoch": 950.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.44457483291626, |
| "eval_runtime": 4.3896, |
| "eval_samples_per_second": 66.293, |
| "eval_steps_per_second": 4.328, |
| "step": 97850 |
| }, |
| { |
| "epoch": 950.49, |
| "learning_rate": 9.902912621359225e-07, |
| "loss": 0.087, |
| "step": 97900 |
| }, |
| { |
| "epoch": 951.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.436530590057373, |
| "eval_runtime": 4.3811, |
| "eval_samples_per_second": 66.421, |
| "eval_steps_per_second": 4.337, |
| "step": 97953 |
| }, |
| { |
| "epoch": 951.46, |
| "learning_rate": 9.70873786407767e-07, |
| "loss": 0.0837, |
| "step": 98000 |
| }, |
| { |
| "epoch": 952.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.426782131195068, |
| "eval_runtime": 4.3706, |
| "eval_samples_per_second": 66.581, |
| "eval_steps_per_second": 4.347, |
| "step": 98056 |
| }, |
| { |
| "epoch": 952.43, |
| "learning_rate": 9.514563106796116e-07, |
| "loss": 0.0754, |
| "step": 98100 |
| }, |
| { |
| "epoch": 953.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.425992488861084, |
| "eval_runtime": 4.4133, |
| "eval_samples_per_second": 65.937, |
| "eval_steps_per_second": 4.305, |
| "step": 98159 |
| }, |
| { |
| "epoch": 953.4, |
| "learning_rate": 9.320388349514564e-07, |
| "loss": 0.0778, |
| "step": 98200 |
| }, |
| { |
| "epoch": 954.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.405651092529297, |
| "eval_runtime": 4.4174, |
| "eval_samples_per_second": 65.876, |
| "eval_steps_per_second": 4.301, |
| "step": 98262 |
| }, |
| { |
| "epoch": 954.37, |
| "learning_rate": 9.12621359223301e-07, |
| "loss": 0.0643, |
| "step": 98300 |
| }, |
| { |
| "epoch": 955.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.39918851852417, |
| "eval_runtime": 4.522, |
| "eval_samples_per_second": 64.352, |
| "eval_steps_per_second": 4.202, |
| "step": 98365 |
| }, |
| { |
| "epoch": 955.34, |
| "learning_rate": 8.932038834951457e-07, |
| "loss": 0.0768, |
| "step": 98400 |
| }, |
| { |
| "epoch": 956.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.388579845428467, |
| "eval_runtime": 4.4701, |
| "eval_samples_per_second": 65.1, |
| "eval_steps_per_second": 4.251, |
| "step": 98468 |
| }, |
| { |
| "epoch": 956.31, |
| "learning_rate": 8.737864077669904e-07, |
| "loss": 0.0727, |
| "step": 98500 |
| }, |
| { |
| "epoch": 957.0, |
| "eval_accuracy": 0.29896907216494845, |
| "eval_loss": 5.384490489959717, |
| "eval_runtime": 4.3672, |
| "eval_samples_per_second": 66.633, |
| "eval_steps_per_second": 4.351, |
| "step": 98571 |
| }, |
| { |
| "epoch": 957.28, |
| "learning_rate": 8.54368932038835e-07, |
| "loss": 0.0859, |
| "step": 98600 |
| }, |
| { |
| "epoch": 958.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.3821940422058105, |
| "eval_runtime": 4.3548, |
| "eval_samples_per_second": 66.822, |
| "eval_steps_per_second": 4.363, |
| "step": 98674 |
| }, |
| { |
| "epoch": 958.25, |
| "learning_rate": 8.349514563106797e-07, |
| "loss": 0.0831, |
| "step": 98700 |
| }, |
| { |
| "epoch": 959.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.38521146774292, |
| "eval_runtime": 4.4018, |
| "eval_samples_per_second": 66.109, |
| "eval_steps_per_second": 4.316, |
| "step": 98777 |
| }, |
| { |
| "epoch": 959.22, |
| "learning_rate": 8.155339805825243e-07, |
| "loss": 0.0756, |
| "step": 98800 |
| }, |
| { |
| "epoch": 960.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.38844108581543, |
| "eval_runtime": 4.4092, |
| "eval_samples_per_second": 65.998, |
| "eval_steps_per_second": 4.309, |
| "step": 98880 |
| }, |
| { |
| "epoch": 960.19, |
| "learning_rate": 7.96116504854369e-07, |
| "loss": 0.0857, |
| "step": 98900 |
| }, |
| { |
| "epoch": 961.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.389212608337402, |
| "eval_runtime": 4.4817, |
| "eval_samples_per_second": 64.93, |
| "eval_steps_per_second": 4.239, |
| "step": 98983 |
| }, |
| { |
| "epoch": 961.17, |
| "learning_rate": 7.766990291262136e-07, |
| "loss": 0.0707, |
| "step": 99000 |
| }, |
| { |
| "epoch": 962.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.377591133117676, |
| "eval_runtime": 4.3914, |
| "eval_samples_per_second": 66.266, |
| "eval_steps_per_second": 4.327, |
| "step": 99086 |
| }, |
| { |
| "epoch": 962.14, |
| "learning_rate": 7.572815533980583e-07, |
| "loss": 0.0746, |
| "step": 99100 |
| }, |
| { |
| "epoch": 963.0, |
| "eval_accuracy": 0.30584192439862545, |
| "eval_loss": 5.37846565246582, |
| "eval_runtime": 4.4873, |
| "eval_samples_per_second": 64.85, |
| "eval_steps_per_second": 4.234, |
| "step": 99189 |
| }, |
| { |
| "epoch": 963.11, |
| "learning_rate": 7.37864077669903e-07, |
| "loss": 0.0745, |
| "step": 99200 |
| }, |
| { |
| "epoch": 964.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.377573013305664, |
| "eval_runtime": 4.3679, |
| "eval_samples_per_second": 66.623, |
| "eval_steps_per_second": 4.35, |
| "step": 99292 |
| }, |
| { |
| "epoch": 964.08, |
| "learning_rate": 7.184466019417476e-07, |
| "loss": 0.0827, |
| "step": 99300 |
| }, |
| { |
| "epoch": 965.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.370428562164307, |
| "eval_runtime": 4.4339, |
| "eval_samples_per_second": 65.631, |
| "eval_steps_per_second": 4.285, |
| "step": 99395 |
| }, |
| { |
| "epoch": 965.05, |
| "learning_rate": 6.990291262135923e-07, |
| "loss": 0.0774, |
| "step": 99400 |
| }, |
| { |
| "epoch": 966.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.365330219268799, |
| "eval_runtime": 4.3501, |
| "eval_samples_per_second": 66.895, |
| "eval_steps_per_second": 4.368, |
| "step": 99498 |
| }, |
| { |
| "epoch": 966.02, |
| "learning_rate": 6.79611650485437e-07, |
| "loss": 0.0752, |
| "step": 99500 |
| }, |
| { |
| "epoch": 966.99, |
| "learning_rate": 6.601941747572816e-07, |
| "loss": 0.0795, |
| "step": 99600 |
| }, |
| { |
| "epoch": 967.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.356910228729248, |
| "eval_runtime": 4.4051, |
| "eval_samples_per_second": 66.06, |
| "eval_steps_per_second": 4.313, |
| "step": 99601 |
| }, |
| { |
| "epoch": 967.96, |
| "learning_rate": 6.407766990291263e-07, |
| "loss": 0.0759, |
| "step": 99700 |
| }, |
| { |
| "epoch": 968.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.351494312286377, |
| "eval_runtime": 4.3759, |
| "eval_samples_per_second": 66.501, |
| "eval_steps_per_second": 4.342, |
| "step": 99704 |
| }, |
| { |
| "epoch": 968.93, |
| "learning_rate": 6.213592233009709e-07, |
| "loss": 0.0713, |
| "step": 99800 |
| }, |
| { |
| "epoch": 969.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.375247478485107, |
| "eval_runtime": 4.6323, |
| "eval_samples_per_second": 62.82, |
| "eval_steps_per_second": 4.102, |
| "step": 99807 |
| }, |
| { |
| "epoch": 969.9, |
| "learning_rate": 6.019417475728156e-07, |
| "loss": 0.0735, |
| "step": 99900 |
| }, |
| { |
| "epoch": 970.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.372783660888672, |
| "eval_runtime": 4.3885, |
| "eval_samples_per_second": 66.31, |
| "eval_steps_per_second": 4.33, |
| "step": 99910 |
| }, |
| { |
| "epoch": 970.87, |
| "learning_rate": 5.825242718446603e-07, |
| "loss": 0.0777, |
| "step": 100000 |
| }, |
| { |
| "epoch": 971.0, |
| "eval_accuracy": 0.29553264604810997, |
| "eval_loss": 5.368955612182617, |
| "eval_runtime": 4.4012, |
| "eval_samples_per_second": 66.119, |
| "eval_steps_per_second": 4.317, |
| "step": 100013 |
| }, |
| { |
| "epoch": 971.84, |
| "learning_rate": 5.631067961165049e-07, |
| "loss": 0.0844, |
| "step": 100100 |
| }, |
| { |
| "epoch": 972.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.3782057762146, |
| "eval_runtime": 4.3662, |
| "eval_samples_per_second": 66.649, |
| "eval_steps_per_second": 4.352, |
| "step": 100116 |
| }, |
| { |
| "epoch": 972.82, |
| "learning_rate": 5.436893203883496e-07, |
| "loss": 0.0758, |
| "step": 100200 |
| }, |
| { |
| "epoch": 973.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.382194995880127, |
| "eval_runtime": 4.3834, |
| "eval_samples_per_second": 66.388, |
| "eval_steps_per_second": 4.335, |
| "step": 100219 |
| }, |
| { |
| "epoch": 973.79, |
| "learning_rate": 5.242718446601942e-07, |
| "loss": 0.0735, |
| "step": 100300 |
| }, |
| { |
| "epoch": 974.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.389287948608398, |
| "eval_runtime": 4.3644, |
| "eval_samples_per_second": 66.676, |
| "eval_steps_per_second": 4.353, |
| "step": 100322 |
| }, |
| { |
| "epoch": 974.76, |
| "learning_rate": 5.048543689320388e-07, |
| "loss": 0.0698, |
| "step": 100400 |
| }, |
| { |
| "epoch": 975.0, |
| "eval_accuracy": 0.281786941580756, |
| "eval_loss": 5.388708591461182, |
| "eval_runtime": 4.394, |
| "eval_samples_per_second": 66.227, |
| "eval_steps_per_second": 4.324, |
| "step": 100425 |
| }, |
| { |
| "epoch": 975.73, |
| "learning_rate": 4.854368932038835e-07, |
| "loss": 0.0773, |
| "step": 100500 |
| }, |
| { |
| "epoch": 976.0, |
| "eval_accuracy": 0.2852233676975945, |
| "eval_loss": 5.390843868255615, |
| "eval_runtime": 4.8733, |
| "eval_samples_per_second": 59.713, |
| "eval_steps_per_second": 3.899, |
| "step": 100528 |
| }, |
| { |
| "epoch": 976.7, |
| "learning_rate": 4.660194174757282e-07, |
| "loss": 0.0695, |
| "step": 100600 |
| }, |
| { |
| "epoch": 977.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.390900135040283, |
| "eval_runtime": 4.418, |
| "eval_samples_per_second": 65.867, |
| "eval_steps_per_second": 4.301, |
| "step": 100631 |
| }, |
| { |
| "epoch": 977.67, |
| "learning_rate": 4.4660194174757285e-07, |
| "loss": 0.0786, |
| "step": 100700 |
| }, |
| { |
| "epoch": 978.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.393945693969727, |
| "eval_runtime": 4.4024, |
| "eval_samples_per_second": 66.1, |
| "eval_steps_per_second": 4.316, |
| "step": 100734 |
| }, |
| { |
| "epoch": 978.64, |
| "learning_rate": 4.271844660194175e-07, |
| "loss": 0.0784, |
| "step": 100800 |
| }, |
| { |
| "epoch": 979.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.383818626403809, |
| "eval_runtime": 4.4523, |
| "eval_samples_per_second": 65.36, |
| "eval_steps_per_second": 4.267, |
| "step": 100837 |
| }, |
| { |
| "epoch": 979.61, |
| "learning_rate": 4.0776699029126217e-07, |
| "loss": 0.078, |
| "step": 100900 |
| }, |
| { |
| "epoch": 980.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.389102458953857, |
| "eval_runtime": 4.4077, |
| "eval_samples_per_second": 66.02, |
| "eval_steps_per_second": 4.311, |
| "step": 100940 |
| }, |
| { |
| "epoch": 980.58, |
| "learning_rate": 3.883495145631068e-07, |
| "loss": 0.0721, |
| "step": 101000 |
| }, |
| { |
| "epoch": 981.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.387507915496826, |
| "eval_runtime": 4.4006, |
| "eval_samples_per_second": 66.128, |
| "eval_steps_per_second": 4.318, |
| "step": 101043 |
| }, |
| { |
| "epoch": 981.55, |
| "learning_rate": 3.689320388349515e-07, |
| "loss": 0.0779, |
| "step": 101100 |
| }, |
| { |
| "epoch": 982.0, |
| "eval_accuracy": 0.28865979381443296, |
| "eval_loss": 5.392478942871094, |
| "eval_runtime": 4.4664, |
| "eval_samples_per_second": 65.153, |
| "eval_steps_per_second": 4.254, |
| "step": 101146 |
| }, |
| { |
| "epoch": 982.52, |
| "learning_rate": 3.4951456310679613e-07, |
| "loss": 0.0706, |
| "step": 101200 |
| }, |
| { |
| "epoch": 983.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.400639057159424, |
| "eval_runtime": 4.4109, |
| "eval_samples_per_second": 65.973, |
| "eval_steps_per_second": 4.308, |
| "step": 101249 |
| }, |
| { |
| "epoch": 983.5, |
| "learning_rate": 3.300970873786408e-07, |
| "loss": 0.0808, |
| "step": 101300 |
| }, |
| { |
| "epoch": 984.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.402230262756348, |
| "eval_runtime": 4.4104, |
| "eval_samples_per_second": 65.98, |
| "eval_steps_per_second": 4.308, |
| "step": 101352 |
| }, |
| { |
| "epoch": 984.47, |
| "learning_rate": 3.1067961165048546e-07, |
| "loss": 0.071, |
| "step": 101400 |
| }, |
| { |
| "epoch": 985.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4075798988342285, |
| "eval_runtime": 4.4611, |
| "eval_samples_per_second": 65.23, |
| "eval_steps_per_second": 4.259, |
| "step": 101455 |
| }, |
| { |
| "epoch": 985.44, |
| "learning_rate": 2.9126213592233014e-07, |
| "loss": 0.0743, |
| "step": 101500 |
| }, |
| { |
| "epoch": 986.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4103875160217285, |
| "eval_runtime": 4.5223, |
| "eval_samples_per_second": 64.348, |
| "eval_steps_per_second": 4.201, |
| "step": 101558 |
| }, |
| { |
| "epoch": 986.41, |
| "learning_rate": 2.718446601941748e-07, |
| "loss": 0.0784, |
| "step": 101600 |
| }, |
| { |
| "epoch": 987.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.409285545349121, |
| "eval_runtime": 4.454, |
| "eval_samples_per_second": 65.335, |
| "eval_steps_per_second": 4.266, |
| "step": 101661 |
| }, |
| { |
| "epoch": 987.38, |
| "learning_rate": 2.524271844660194e-07, |
| "loss": 0.0793, |
| "step": 101700 |
| }, |
| { |
| "epoch": 988.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.407143592834473, |
| "eval_runtime": 4.4102, |
| "eval_samples_per_second": 65.983, |
| "eval_steps_per_second": 4.308, |
| "step": 101764 |
| }, |
| { |
| "epoch": 988.35, |
| "learning_rate": 2.330097087378641e-07, |
| "loss": 0.0838, |
| "step": 101800 |
| }, |
| { |
| "epoch": 989.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.402917385101318, |
| "eval_runtime": 4.4226, |
| "eval_samples_per_second": 65.798, |
| "eval_steps_per_second": 4.296, |
| "step": 101867 |
| }, |
| { |
| "epoch": 989.32, |
| "learning_rate": 2.1359223300970874e-07, |
| "loss": 0.0708, |
| "step": 101900 |
| }, |
| { |
| "epoch": 990.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4035491943359375, |
| "eval_runtime": 4.4171, |
| "eval_samples_per_second": 65.88, |
| "eval_steps_per_second": 4.301, |
| "step": 101970 |
| }, |
| { |
| "epoch": 990.29, |
| "learning_rate": 1.941747572815534e-07, |
| "loss": 0.0742, |
| "step": 102000 |
| }, |
| { |
| "epoch": 991.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4020819664001465, |
| "eval_runtime": 4.427, |
| "eval_samples_per_second": 65.733, |
| "eval_steps_per_second": 4.292, |
| "step": 102073 |
| }, |
| { |
| "epoch": 991.26, |
| "learning_rate": 1.7475728155339807e-07, |
| "loss": 0.0746, |
| "step": 102100 |
| }, |
| { |
| "epoch": 992.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.404983043670654, |
| "eval_runtime": 4.4417, |
| "eval_samples_per_second": 65.515, |
| "eval_steps_per_second": 4.278, |
| "step": 102176 |
| }, |
| { |
| "epoch": 992.23, |
| "learning_rate": 1.5533980582524273e-07, |
| "loss": 0.0756, |
| "step": 102200 |
| }, |
| { |
| "epoch": 993.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.405885219573975, |
| "eval_runtime": 4.413, |
| "eval_samples_per_second": 65.942, |
| "eval_steps_per_second": 4.306, |
| "step": 102279 |
| }, |
| { |
| "epoch": 993.2, |
| "learning_rate": 1.359223300970874e-07, |
| "loss": 0.0744, |
| "step": 102300 |
| }, |
| { |
| "epoch": 994.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.405316352844238, |
| "eval_runtime": 4.5201, |
| "eval_samples_per_second": 64.379, |
| "eval_steps_per_second": 4.203, |
| "step": 102382 |
| }, |
| { |
| "epoch": 994.17, |
| "learning_rate": 1.1650485436893205e-07, |
| "loss": 0.0741, |
| "step": 102400 |
| }, |
| { |
| "epoch": 995.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.407505512237549, |
| "eval_runtime": 4.4833, |
| "eval_samples_per_second": 64.907, |
| "eval_steps_per_second": 4.238, |
| "step": 102485 |
| }, |
| { |
| "epoch": 995.15, |
| "learning_rate": 9.70873786407767e-08, |
| "loss": 0.0757, |
| "step": 102500 |
| }, |
| { |
| "epoch": 996.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4071760177612305, |
| "eval_runtime": 4.455, |
| "eval_samples_per_second": 65.32, |
| "eval_steps_per_second": 4.265, |
| "step": 102588 |
| }, |
| { |
| "epoch": 996.12, |
| "learning_rate": 7.766990291262136e-08, |
| "loss": 0.0735, |
| "step": 102600 |
| }, |
| { |
| "epoch": 997.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.408614635467529, |
| "eval_runtime": 4.3981, |
| "eval_samples_per_second": 66.165, |
| "eval_steps_per_second": 4.32, |
| "step": 102691 |
| }, |
| { |
| "epoch": 997.09, |
| "learning_rate": 5.8252427184466026e-08, |
| "loss": 0.0708, |
| "step": 102700 |
| }, |
| { |
| "epoch": 998.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.4088239669799805, |
| "eval_runtime": 4.3935, |
| "eval_samples_per_second": 66.234, |
| "eval_steps_per_second": 4.325, |
| "step": 102794 |
| }, |
| { |
| "epoch": 998.06, |
| "learning_rate": 3.883495145631068e-08, |
| "loss": 0.0812, |
| "step": 102800 |
| }, |
| { |
| "epoch": 999.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.408839702606201, |
| "eval_runtime": 4.4716, |
| "eval_samples_per_second": 65.077, |
| "eval_steps_per_second": 4.249, |
| "step": 102897 |
| }, |
| { |
| "epoch": 999.03, |
| "learning_rate": 1.941747572815534e-08, |
| "loss": 0.0746, |
| "step": 102900 |
| }, |
| { |
| "epoch": 1000.0, |
| "learning_rate": 0.0, |
| "loss": 0.0722, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1000.0, |
| "eval_accuracy": 0.2920962199312715, |
| "eval_loss": 5.409001350402832, |
| "eval_runtime": 4.4131, |
| "eval_samples_per_second": 65.94, |
| "eval_steps_per_second": 4.305, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1000.0, |
| "step": 103000, |
| "total_flos": 1.2743565272137728e+20, |
| "train_loss": 0.18406761223135643, |
| "train_runtime": 65091.9872, |
| "train_samples_per_second": 25.257, |
| "train_steps_per_second": 1.582 |
| } |
| ], |
| "max_steps": 103000, |
| "num_train_epochs": 1000, |
| "total_flos": 1.2743565272137728e+20, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|