| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.38278977185729596, |
| "eval_steps": 10, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001531159087429184, |
| "grad_norm": 270.11077880859375, |
| "learning_rate": 4.996172102281427e-06, |
| "loss": 1.2137, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001531159087429184, |
| "eval_accuracy": 0.5366216907106497, |
| "eval_loss": 1.2146648168563843, |
| "eval_runtime": 277.3281, |
| "eval_samples_per_second": 162.796, |
| "eval_steps_per_second": 20.351, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003062318174858368, |
| "grad_norm": 69.96483612060547, |
| "learning_rate": 4.9923442045628545e-06, |
| "loss": 0.8743, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.003062318174858368, |
| "eval_accuracy": 0.5232508865445948, |
| "eval_loss": 1.02613365650177, |
| "eval_runtime": 276.3955, |
| "eval_samples_per_second": 163.346, |
| "eval_steps_per_second": 20.42, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004593477262287551, |
| "grad_norm": 120.09966278076172, |
| "learning_rate": 4.988516306844281e-06, |
| "loss": 1.0324, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.004593477262287551, |
| "eval_accuracy": 0.5467704523151039, |
| "eval_loss": 0.9261217713356018, |
| "eval_runtime": 277.0496, |
| "eval_samples_per_second": 162.96, |
| "eval_steps_per_second": 20.372, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.006124636349716736, |
| "grad_norm": 76.13516998291016, |
| "learning_rate": 4.984688409125709e-06, |
| "loss": 0.9529, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.006124636349716736, |
| "eval_accuracy": 0.5361401310932641, |
| "eval_loss": 0.8699733018875122, |
| "eval_runtime": 276.196, |
| "eval_samples_per_second": 163.464, |
| "eval_steps_per_second": 20.435, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.007655795437145919, |
| "grad_norm": 51.14548110961914, |
| "learning_rate": 4.980860511407135e-06, |
| "loss": 0.8279, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.007655795437145919, |
| "eval_accuracy": 0.5364695340501792, |
| "eval_loss": 0.8454414010047913, |
| "eval_runtime": 277.7047, |
| "eval_samples_per_second": 162.576, |
| "eval_steps_per_second": 20.324, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.009186954524575103, |
| "grad_norm": 29.365215301513672, |
| "learning_rate": 4.977032613688563e-06, |
| "loss": 0.6843, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.009186954524575103, |
| "eval_accuracy": 0.5485189652861546, |
| "eval_loss": 0.8400074243545532, |
| "eval_runtime": 276.2282, |
| "eval_samples_per_second": 163.445, |
| "eval_steps_per_second": 20.432, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.010718113612004287, |
| "grad_norm": 167.26617431640625, |
| "learning_rate": 4.9732047159699895e-06, |
| "loss": 0.8245, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.010718113612004287, |
| "eval_accuracy": 0.5589761867634938, |
| "eval_loss": 1.03753662109375, |
| "eval_runtime": 280.6426, |
| "eval_samples_per_second": 160.874, |
| "eval_steps_per_second": 20.111, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.012249272699433471, |
| "grad_norm": 104.5846939086914, |
| "learning_rate": 4.969376818251417e-06, |
| "loss": 0.8412, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.012249272699433471, |
| "eval_accuracy": 0.5760374802939805, |
| "eval_loss": 0.7869167923927307, |
| "eval_runtime": 281.2402, |
| "eval_samples_per_second": 160.532, |
| "eval_steps_per_second": 20.068, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.013780431786862656, |
| "grad_norm": 35.11406326293945, |
| "learning_rate": 4.965548920532844e-06, |
| "loss": 0.8279, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.013780431786862656, |
| "eval_accuracy": 0.5787423483583751, |
| "eval_loss": 0.724981963634491, |
| "eval_runtime": 277.5138, |
| "eval_samples_per_second": 162.687, |
| "eval_steps_per_second": 20.338, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.015311590874291839, |
| "grad_norm": 25.292316436767578, |
| "learning_rate": 4.961721022814271e-06, |
| "loss": 0.6736, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.015311590874291839, |
| "eval_accuracy": 0.5338034748948856, |
| "eval_loss": 0.8150990009307861, |
| "eval_runtime": 278.062, |
| "eval_samples_per_second": 162.367, |
| "eval_steps_per_second": 20.298, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.016842749961721023, |
| "grad_norm": 21.010047912597656, |
| "learning_rate": 4.957893125095698e-06, |
| "loss": 0.729, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.016842749961721023, |
| "eval_accuracy": 0.5677641824249166, |
| "eval_loss": 0.833014965057373, |
| "eval_runtime": 278.6412, |
| "eval_samples_per_second": 162.029, |
| "eval_steps_per_second": 20.255, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.018373909049150206, |
| "grad_norm": 29.881685256958008, |
| "learning_rate": 4.954065227377125e-06, |
| "loss": 0.771, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.018373909049150206, |
| "eval_accuracy": 0.5731047331765958, |
| "eval_loss": 0.7811030745506287, |
| "eval_runtime": 276.9087, |
| "eval_samples_per_second": 163.043, |
| "eval_steps_per_second": 20.382, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01990506813657939, |
| "grad_norm": 15.595231056213379, |
| "learning_rate": 4.950237329658552e-06, |
| "loss": 0.7783, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.01990506813657939, |
| "eval_accuracy": 0.5626015898109594, |
| "eval_loss": 0.7167708873748779, |
| "eval_runtime": 277.1636, |
| "eval_samples_per_second": 162.893, |
| "eval_steps_per_second": 20.363, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.021436227224008574, |
| "grad_norm": 22.25090789794922, |
| "learning_rate": 4.946409431939979e-06, |
| "loss": 0.6794, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.021436227224008574, |
| "eval_accuracy": 0.5578002997964069, |
| "eval_loss": 0.7202938795089722, |
| "eval_runtime": 278.0355, |
| "eval_samples_per_second": 162.382, |
| "eval_steps_per_second": 20.3, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.022967386311437757, |
| "grad_norm": 37.13331604003906, |
| "learning_rate": 4.942581534221406e-06, |
| "loss": 0.7133, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.022967386311437757, |
| "eval_accuracy": 0.5772544642857143, |
| "eval_loss": 0.7133862972259521, |
| "eval_runtime": 277.8264, |
| "eval_samples_per_second": 162.504, |
| "eval_steps_per_second": 20.315, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.024498545398866943, |
| "grad_norm": 28.552654266357422, |
| "learning_rate": 4.9387536365028335e-06, |
| "loss": 0.7334, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.024498545398866943, |
| "eval_accuracy": 0.5714637421665174, |
| "eval_loss": 0.7050605416297913, |
| "eval_runtime": 277.1231, |
| "eval_samples_per_second": 162.917, |
| "eval_steps_per_second": 20.366, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.026029704486296126, |
| "grad_norm": 18.05495834350586, |
| "learning_rate": 4.93492573878426e-06, |
| "loss": 0.7056, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.026029704486296126, |
| "eval_accuracy": 0.5888312385382655, |
| "eval_loss": 0.6785433888435364, |
| "eval_runtime": 275.2348, |
| "eval_samples_per_second": 164.034, |
| "eval_steps_per_second": 20.506, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.02756086357372531, |
| "grad_norm": 14.354559898376465, |
| "learning_rate": 4.931097841065688e-06, |
| "loss": 0.6317, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02756086357372531, |
| "eval_accuracy": 0.5905926454837558, |
| "eval_loss": 0.6833683252334595, |
| "eval_runtime": 278.3913, |
| "eval_samples_per_second": 162.175, |
| "eval_steps_per_second": 20.274, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.029092022661154494, |
| "grad_norm": 10.222257614135742, |
| "learning_rate": 4.927269943347114e-06, |
| "loss": 0.6975, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.029092022661154494, |
| "eval_accuracy": 0.5688126660860633, |
| "eval_loss": 0.6987228989601135, |
| "eval_runtime": 278.0516, |
| "eval_samples_per_second": 162.373, |
| "eval_steps_per_second": 20.298, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.030623181748583677, |
| "grad_norm": 19.388473510742188, |
| "learning_rate": 4.923442045628542e-06, |
| "loss": 0.675, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.030623181748583677, |
| "eval_accuracy": 0.5726792755463477, |
| "eval_loss": 0.7145602107048035, |
| "eval_runtime": 280.8737, |
| "eval_samples_per_second": 160.741, |
| "eval_steps_per_second": 20.094, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03215434083601286, |
| "grad_norm": 21.06436538696289, |
| "learning_rate": 4.919614147909968e-06, |
| "loss": 0.6576, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.03215434083601286, |
| "eval_accuracy": 0.5798941563639598, |
| "eval_loss": 0.7224599123001099, |
| "eval_runtime": 276.3195, |
| "eval_samples_per_second": 163.391, |
| "eval_steps_per_second": 20.426, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.033685499923442046, |
| "grad_norm": 14.679203987121582, |
| "learning_rate": 4.915786250191396e-06, |
| "loss": 0.631, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.033685499923442046, |
| "eval_accuracy": 0.5863426131815351, |
| "eval_loss": 0.6974319815635681, |
| "eval_runtime": 275.464, |
| "eval_samples_per_second": 163.898, |
| "eval_steps_per_second": 20.489, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.03521665901087123, |
| "grad_norm": 22.14579963684082, |
| "learning_rate": 4.9119583524728225e-06, |
| "loss": 0.7267, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.03521665901087123, |
| "eval_accuracy": 0.5820226223144368, |
| "eval_loss": 0.7019667625427246, |
| "eval_runtime": 276.7093, |
| "eval_samples_per_second": 163.16, |
| "eval_steps_per_second": 20.397, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.03674781809830041, |
| "grad_norm": 7.788635730743408, |
| "learning_rate": 4.908130454754249e-06, |
| "loss": 0.6079, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03674781809830041, |
| "eval_accuracy": 0.5955016145195412, |
| "eval_loss": 0.6954487562179565, |
| "eval_runtime": 279.6934, |
| "eval_samples_per_second": 161.42, |
| "eval_steps_per_second": 20.179, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0382789771857296, |
| "grad_norm": 10.69842529296875, |
| "learning_rate": 4.904302557035676e-06, |
| "loss": 0.6862, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0382789771857296, |
| "eval_accuracy": 0.5917668001780151, |
| "eval_loss": 0.7000778913497925, |
| "eval_runtime": 279.4881, |
| "eval_samples_per_second": 161.538, |
| "eval_steps_per_second": 20.194, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.03981013627315878, |
| "grad_norm": 11.247802734375, |
| "learning_rate": 4.900474659317103e-06, |
| "loss": 0.6138, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03981013627315878, |
| "eval_accuracy": 0.5918217597225002, |
| "eval_loss": 0.7174475789070129, |
| "eval_runtime": 279.259, |
| "eval_samples_per_second": 161.671, |
| "eval_steps_per_second": 20.211, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.04134129536058796, |
| "grad_norm": 14.550101280212402, |
| "learning_rate": 4.89664676159853e-06, |
| "loss": 0.7057, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.04134129536058796, |
| "eval_accuracy": 0.5835486457590877, |
| "eval_loss": 0.7064123749732971, |
| "eval_runtime": 279.3626, |
| "eval_samples_per_second": 161.611, |
| "eval_steps_per_second": 20.203, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.04287245444801715, |
| "grad_norm": 10.610974311828613, |
| "learning_rate": 4.8928188638799574e-06, |
| "loss": 0.6402, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.04287245444801715, |
| "eval_accuracy": 0.565008347245409, |
| "eval_loss": 0.7342826128005981, |
| "eval_runtime": 276.5673, |
| "eval_samples_per_second": 163.244, |
| "eval_steps_per_second": 20.407, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.044403613535446335, |
| "grad_norm": 16.33481216430664, |
| "learning_rate": 4.888990966161384e-06, |
| "loss": 0.6989, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.044403613535446335, |
| "eval_accuracy": 0.5488992095744206, |
| "eval_loss": 0.7351524829864502, |
| "eval_runtime": 277.0818, |
| "eval_samples_per_second": 162.941, |
| "eval_steps_per_second": 20.369, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.045934772622875514, |
| "grad_norm": 7.8038530349731445, |
| "learning_rate": 4.8851630684428116e-06, |
| "loss": 0.8763, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.045934772622875514, |
| "eval_accuracy": 0.5477137400292102, |
| "eval_loss": 0.7000990509986877, |
| "eval_runtime": 276.7186, |
| "eval_samples_per_second": 163.155, |
| "eval_steps_per_second": 20.396, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0474659317103047, |
| "grad_norm": 7.672823429107666, |
| "learning_rate": 4.881335170724238e-06, |
| "loss": 0.6821, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0474659317103047, |
| "eval_accuracy": 0.5508064880495188, |
| "eval_loss": 0.6869771480560303, |
| "eval_runtime": 276.0483, |
| "eval_samples_per_second": 163.551, |
| "eval_steps_per_second": 20.446, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.048997090797733886, |
| "grad_norm": 6.58916711807251, |
| "learning_rate": 4.877507273005666e-06, |
| "loss": 0.6771, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.048997090797733886, |
| "eval_accuracy": 0.5899522937439237, |
| "eval_loss": 0.6720254421234131, |
| "eval_runtime": 276.7493, |
| "eval_samples_per_second": 163.137, |
| "eval_steps_per_second": 20.394, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.050528249885163065, |
| "grad_norm": 5.778668403625488, |
| "learning_rate": 4.873679375287092e-06, |
| "loss": 0.6747, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.050528249885163065, |
| "eval_accuracy": 0.59793675409615, |
| "eval_loss": 0.662046492099762, |
| "eval_runtime": 275.7574, |
| "eval_samples_per_second": 163.724, |
| "eval_steps_per_second": 20.467, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.05205940897259225, |
| "grad_norm": 6.639097690582275, |
| "learning_rate": 4.86985147756852e-06, |
| "loss": 0.6283, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.05205940897259225, |
| "eval_accuracy": 0.5966143497757848, |
| "eval_loss": 0.6647851467132568, |
| "eval_runtime": 274.2604, |
| "eval_samples_per_second": 164.617, |
| "eval_steps_per_second": 20.579, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.05359056806002144, |
| "grad_norm": 7.865772724151611, |
| "learning_rate": 4.8660235798499465e-06, |
| "loss": 0.6396, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05359056806002144, |
| "eval_accuracy": 0.5950059134626113, |
| "eval_loss": 0.6750874519348145, |
| "eval_runtime": 275.118, |
| "eval_samples_per_second": 164.104, |
| "eval_steps_per_second": 20.515, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05512172714745062, |
| "grad_norm": 12.208525657653809, |
| "learning_rate": 4.862195682131374e-06, |
| "loss": 0.6802, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.05512172714745062, |
| "eval_accuracy": 0.5931904836228232, |
| "eval_loss": 0.6721886992454529, |
| "eval_runtime": 276.9399, |
| "eval_samples_per_second": 163.025, |
| "eval_steps_per_second": 20.38, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0566528862348798, |
| "grad_norm": 21.355411529541016, |
| "learning_rate": 4.858367784412801e-06, |
| "loss": 0.7074, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0566528862348798, |
| "eval_accuracy": 0.5865997770345597, |
| "eval_loss": 0.6712462306022644, |
| "eval_runtime": 277.2497, |
| "eval_samples_per_second": 162.842, |
| "eval_steps_per_second": 20.357, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.05818404532230899, |
| "grad_norm": 12.31411075592041, |
| "learning_rate": 4.854539886694228e-06, |
| "loss": 0.7007, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.05818404532230899, |
| "eval_accuracy": 0.5599766601584416, |
| "eval_loss": 0.6877785921096802, |
| "eval_runtime": 278.0283, |
| "eval_samples_per_second": 162.386, |
| "eval_steps_per_second": 20.3, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.059715204409738175, |
| "grad_norm": 8.385506629943848, |
| "learning_rate": 4.850711988975655e-06, |
| "loss": 0.6589, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.059715204409738175, |
| "eval_accuracy": 0.5800094486063305, |
| "eval_loss": 0.6738844513893127, |
| "eval_runtime": 279.2165, |
| "eval_samples_per_second": 161.695, |
| "eval_steps_per_second": 20.214, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.061246363497167354, |
| "grad_norm": 9.52385139465332, |
| "learning_rate": 4.846884091257082e-06, |
| "loss": 0.6144, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.061246363497167354, |
| "eval_accuracy": 0.5881171772160372, |
| "eval_loss": 0.6737349033355713, |
| "eval_runtime": 277.7475, |
| "eval_samples_per_second": 162.551, |
| "eval_steps_per_second": 20.321, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06277752258459654, |
| "grad_norm": 9.535078048706055, |
| "learning_rate": 4.843056193538509e-06, |
| "loss": 0.6653, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.06277752258459654, |
| "eval_accuracy": 0.5889365121885882, |
| "eval_loss": 0.6895773410797119, |
| "eval_runtime": 277.0672, |
| "eval_samples_per_second": 162.95, |
| "eval_steps_per_second": 20.371, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.06430868167202572, |
| "grad_norm": 13.276960372924805, |
| "learning_rate": 4.839228295819936e-06, |
| "loss": 0.6033, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.06430868167202572, |
| "eval_accuracy": 0.5839217088211637, |
| "eval_loss": 0.7118301391601562, |
| "eval_runtime": 277.1875, |
| "eval_samples_per_second": 162.879, |
| "eval_steps_per_second": 20.362, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.06583984075945491, |
| "grad_norm": 11.221186637878418, |
| "learning_rate": 4.835400398101363e-06, |
| "loss": 0.6102, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.06583984075945491, |
| "eval_accuracy": 0.5721997903049502, |
| "eval_loss": 0.7291567325592041, |
| "eval_runtime": 277.8978, |
| "eval_samples_per_second": 162.463, |
| "eval_steps_per_second": 20.31, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.06737099984688409, |
| "grad_norm": 12.54729175567627, |
| "learning_rate": 4.8315725003827905e-06, |
| "loss": 0.7269, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.06737099984688409, |
| "eval_accuracy": 0.5739306990338918, |
| "eval_loss": 0.7139677405357361, |
| "eval_runtime": 278.0873, |
| "eval_samples_per_second": 162.352, |
| "eval_steps_per_second": 20.296, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.06890215893431327, |
| "grad_norm": 7.917787075042725, |
| "learning_rate": 4.827744602664217e-06, |
| "loss": 0.625, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06890215893431327, |
| "eval_accuracy": 0.5735848215281029, |
| "eval_loss": 0.7001749873161316, |
| "eval_runtime": 274.8459, |
| "eval_samples_per_second": 164.267, |
| "eval_steps_per_second": 20.535, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.07043331802174246, |
| "grad_norm": 9.970056533813477, |
| "learning_rate": 4.823916704945645e-06, |
| "loss": 0.6168, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.07043331802174246, |
| "eval_accuracy": 0.5706444127097465, |
| "eval_loss": 0.7068008184432983, |
| "eval_runtime": 275.5547, |
| "eval_samples_per_second": 163.844, |
| "eval_steps_per_second": 20.482, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.07196447710917164, |
| "grad_norm": 14.022720336914062, |
| "learning_rate": 4.820088807227071e-06, |
| "loss": 0.5978, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.07196447710917164, |
| "eval_accuracy": 0.5733915328597199, |
| "eval_loss": 0.7220426797866821, |
| "eval_runtime": 276.0799, |
| "eval_samples_per_second": 163.532, |
| "eval_steps_per_second": 20.443, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.07349563619660082, |
| "grad_norm": 15.758445739746094, |
| "learning_rate": 4.816260909508498e-06, |
| "loss": 0.6583, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.07349563619660082, |
| "eval_accuracy": 0.5795259437643544, |
| "eval_loss": 0.7102298736572266, |
| "eval_runtime": 277.2435, |
| "eval_samples_per_second": 162.846, |
| "eval_steps_per_second": 20.358, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.07502679528403002, |
| "grad_norm": 9.483732223510742, |
| "learning_rate": 4.8124330117899254e-06, |
| "loss": 0.6455, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.07502679528403002, |
| "eval_accuracy": 0.5872728491919802, |
| "eval_loss": 0.6983802318572998, |
| "eval_runtime": 277.7989, |
| "eval_samples_per_second": 162.52, |
| "eval_steps_per_second": 20.317, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0765579543714592, |
| "grad_norm": 11.97518539428711, |
| "learning_rate": 4.808605114071352e-06, |
| "loss": 0.6796, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0765579543714592, |
| "eval_accuracy": 0.5854998659876709, |
| "eval_loss": 0.7021452188491821, |
| "eval_runtime": 278.7441, |
| "eval_samples_per_second": 161.969, |
| "eval_steps_per_second": 20.248, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07808911345888837, |
| "grad_norm": 10.056512832641602, |
| "learning_rate": 4.8047772163527796e-06, |
| "loss": 0.6509, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.07808911345888837, |
| "eval_accuracy": 0.57778125558934, |
| "eval_loss": 0.7093414664268494, |
| "eval_runtime": 286.7453, |
| "eval_samples_per_second": 157.45, |
| "eval_steps_per_second": 19.683, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.07962027254631757, |
| "grad_norm": 11.229554176330566, |
| "learning_rate": 4.800949318634206e-06, |
| "loss": 0.5777, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.07962027254631757, |
| "eval_accuracy": 0.5840340820377846, |
| "eval_loss": 0.7045831084251404, |
| "eval_runtime": 284.2635, |
| "eval_samples_per_second": 158.824, |
| "eval_steps_per_second": 19.855, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.08115143163374675, |
| "grad_norm": 16.119789123535156, |
| "learning_rate": 4.797121420915634e-06, |
| "loss": 0.6145, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.08115143163374675, |
| "eval_accuracy": 0.5854364178573018, |
| "eval_loss": 0.7088597416877747, |
| "eval_runtime": 279.337, |
| "eval_samples_per_second": 161.626, |
| "eval_steps_per_second": 20.205, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.08268259072117592, |
| "grad_norm": 14.363024711608887, |
| "learning_rate": 4.79329352319706e-06, |
| "loss": 0.6973, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.08268259072117592, |
| "eval_accuracy": 0.5904418635696169, |
| "eval_loss": 0.6892778277397156, |
| "eval_runtime": 279.0651, |
| "eval_samples_per_second": 161.783, |
| "eval_steps_per_second": 20.225, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.08421374980860512, |
| "grad_norm": 12.421643257141113, |
| "learning_rate": 4.789465625478488e-06, |
| "loss": 0.6444, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.08421374980860512, |
| "eval_accuracy": 0.5964966878584449, |
| "eval_loss": 0.6805678009986877, |
| "eval_runtime": 277.6804, |
| "eval_samples_per_second": 162.59, |
| "eval_steps_per_second": 20.326, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0857449088960343, |
| "grad_norm": 13.641290664672852, |
| "learning_rate": 4.7856377277599145e-06, |
| "loss": 0.6197, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0857449088960343, |
| "eval_accuracy": 0.6008450077829665, |
| "eval_loss": 0.6836313605308533, |
| "eval_runtime": 278.3481, |
| "eval_samples_per_second": 162.2, |
| "eval_steps_per_second": 20.277, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.08727606798346348, |
| "grad_norm": 11.80357837677002, |
| "learning_rate": 4.781809830041342e-06, |
| "loss": 0.6241, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.08727606798346348, |
| "eval_accuracy": 0.601209668453003, |
| "eval_loss": 0.6760628819465637, |
| "eval_runtime": 279.0499, |
| "eval_samples_per_second": 161.792, |
| "eval_steps_per_second": 20.226, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.08880722707089267, |
| "grad_norm": 10.733393669128418, |
| "learning_rate": 4.777981932322769e-06, |
| "loss": 0.713, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.08880722707089267, |
| "eval_accuracy": 0.5967536955697755, |
| "eval_loss": 0.6692460775375366, |
| "eval_runtime": 279.8616, |
| "eval_samples_per_second": 161.323, |
| "eval_steps_per_second": 20.167, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.09033838615832185, |
| "grad_norm": 11.116392135620117, |
| "learning_rate": 4.774154034604196e-06, |
| "loss": 0.6109, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.09033838615832185, |
| "eval_accuracy": 0.5920902946621758, |
| "eval_loss": 0.674372673034668, |
| "eval_runtime": 276.8783, |
| "eval_samples_per_second": 163.061, |
| "eval_steps_per_second": 20.384, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.09186954524575103, |
| "grad_norm": 9.64384937286377, |
| "learning_rate": 4.770326136885623e-06, |
| "loss": 0.6704, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09186954524575103, |
| "eval_accuracy": 0.586066763425254, |
| "eval_loss": 0.6863875389099121, |
| "eval_runtime": 279.7067, |
| "eval_samples_per_second": 161.412, |
| "eval_steps_per_second": 20.178, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.09340070433318022, |
| "grad_norm": 13.213354110717773, |
| "learning_rate": 4.76649823916705e-06, |
| "loss": 0.6605, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.09340070433318022, |
| "eval_accuracy": 0.5854859919317092, |
| "eval_loss": 0.6997817158699036, |
| "eval_runtime": 278.2489, |
| "eval_samples_per_second": 162.258, |
| "eval_steps_per_second": 20.284, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0949318634206094, |
| "grad_norm": 8.854043006896973, |
| "learning_rate": 4.762670341448477e-06, |
| "loss": 0.6467, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0949318634206094, |
| "eval_accuracy": 0.5874072750022343, |
| "eval_loss": 0.6911128759384155, |
| "eval_runtime": 279.0402, |
| "eval_samples_per_second": 161.797, |
| "eval_steps_per_second": 20.226, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.09646302250803858, |
| "grad_norm": 8.668028831481934, |
| "learning_rate": 4.758842443729904e-06, |
| "loss": 0.653, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.09646302250803858, |
| "eval_accuracy": 0.5896617883276816, |
| "eval_loss": 0.683178186416626, |
| "eval_runtime": 278.9227, |
| "eval_samples_per_second": 161.866, |
| "eval_steps_per_second": 20.235, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.09799418159546777, |
| "grad_norm": 8.654230117797852, |
| "learning_rate": 4.755014546011331e-06, |
| "loss": 0.6292, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.09799418159546777, |
| "eval_accuracy": 0.5951244535641003, |
| "eval_loss": 0.6820477843284607, |
| "eval_runtime": 279.422, |
| "eval_samples_per_second": 161.576, |
| "eval_steps_per_second": 20.199, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.09952534068289695, |
| "grad_norm": 12.051166534423828, |
| "learning_rate": 4.7511866482927585e-06, |
| "loss": 0.6319, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.09952534068289695, |
| "eval_accuracy": 0.5965489637996976, |
| "eval_loss": 0.6887350678443909, |
| "eval_runtime": 279.2633, |
| "eval_samples_per_second": 161.668, |
| "eval_steps_per_second": 20.21, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.10105649977032613, |
| "grad_norm": 10.261021614074707, |
| "learning_rate": 4.747358750574185e-06, |
| "loss": 0.687, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.10105649977032613, |
| "eval_accuracy": 0.5923518675154699, |
| "eval_loss": 0.6835098266601562, |
| "eval_runtime": 279.8292, |
| "eval_samples_per_second": 161.341, |
| "eval_steps_per_second": 20.169, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.10258765885775532, |
| "grad_norm": 9.035223007202148, |
| "learning_rate": 4.743530852855613e-06, |
| "loss": 0.6705, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.10258765885775532, |
| "eval_accuracy": 0.5858986422906305, |
| "eval_loss": 0.6886019706726074, |
| "eval_runtime": 279.2751, |
| "eval_samples_per_second": 161.661, |
| "eval_steps_per_second": 20.209, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.1041188179451845, |
| "grad_norm": 7.328871726989746, |
| "learning_rate": 4.739702955137039e-06, |
| "loss": 0.565, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.1041188179451845, |
| "eval_accuracy": 0.5869302949061662, |
| "eval_loss": 0.7063195109367371, |
| "eval_runtime": 279.7163, |
| "eval_samples_per_second": 161.406, |
| "eval_steps_per_second": 20.178, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.10564997703261368, |
| "grad_norm": 21.045848846435547, |
| "learning_rate": 4.735875057418467e-06, |
| "loss": 0.6541, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.10564997703261368, |
| "eval_accuracy": 0.5889380826306538, |
| "eval_loss": 0.7606213092803955, |
| "eval_runtime": 280.23, |
| "eval_samples_per_second": 161.111, |
| "eval_steps_per_second": 20.141, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.10718113612004287, |
| "grad_norm": 12.267477989196777, |
| "learning_rate": 4.732047159699893e-06, |
| "loss": 0.7604, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10718113612004287, |
| "eval_accuracy": 0.5878412959789937, |
| "eval_loss": 0.7354863882064819, |
| "eval_runtime": 280.1519, |
| "eval_samples_per_second": 161.155, |
| "eval_steps_per_second": 20.146, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10871229520747205, |
| "grad_norm": 8.619697570800781, |
| "learning_rate": 4.72821926198132e-06, |
| "loss": 0.6401, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.10871229520747205, |
| "eval_accuracy": 0.5879179670084708, |
| "eval_loss": 0.699480414390564, |
| "eval_runtime": 282.5216, |
| "eval_samples_per_second": 159.804, |
| "eval_steps_per_second": 19.977, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.11024345429490125, |
| "grad_norm": 6.322849750518799, |
| "learning_rate": 4.724391364262747e-06, |
| "loss": 0.6129, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.11024345429490125, |
| "eval_accuracy": 0.5929760364139408, |
| "eval_loss": 0.688232958316803, |
| "eval_runtime": 278.9404, |
| "eval_samples_per_second": 161.855, |
| "eval_steps_per_second": 20.234, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.11177461338233043, |
| "grad_norm": 9.213967323303223, |
| "learning_rate": 4.720563466544174e-06, |
| "loss": 0.6502, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.11177461338233043, |
| "eval_accuracy": 0.5926983206583555, |
| "eval_loss": 0.6913579702377319, |
| "eval_runtime": 277.6463, |
| "eval_samples_per_second": 162.61, |
| "eval_steps_per_second": 20.328, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.1133057724697596, |
| "grad_norm": 7.4615349769592285, |
| "learning_rate": 4.716735568825601e-06, |
| "loss": 0.6199, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.1133057724697596, |
| "eval_accuracy": 0.5917493589028877, |
| "eval_loss": 0.6992406845092773, |
| "eval_runtime": 278.7175, |
| "eval_samples_per_second": 161.985, |
| "eval_steps_per_second": 20.25, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.1148369315571888, |
| "grad_norm": 11.835037231445312, |
| "learning_rate": 4.712907671107028e-06, |
| "loss": 0.5761, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1148369315571888, |
| "eval_accuracy": 0.5893469260561813, |
| "eval_loss": 0.7284606099128723, |
| "eval_runtime": 279.7689, |
| "eval_samples_per_second": 161.376, |
| "eval_steps_per_second": 20.174, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.11636809064461798, |
| "grad_norm": 9.900166511535645, |
| "learning_rate": 4.709079773388455e-06, |
| "loss": 0.6017, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.11636809064461798, |
| "eval_accuracy": 0.5889316629208483, |
| "eval_loss": 0.7434907555580139, |
| "eval_runtime": 281.7435, |
| "eval_samples_per_second": 160.245, |
| "eval_steps_per_second": 20.032, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.11789924973204716, |
| "grad_norm": 12.274435997009277, |
| "learning_rate": 4.7052518756698825e-06, |
| "loss": 0.5757, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.11789924973204716, |
| "eval_accuracy": 0.5897960545337277, |
| "eval_loss": 0.7581047415733337, |
| "eval_runtime": 279.4034, |
| "eval_samples_per_second": 161.587, |
| "eval_steps_per_second": 20.2, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.11943040881947635, |
| "grad_norm": 10.7369384765625, |
| "learning_rate": 4.701423977951309e-06, |
| "loss": 0.6231, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.11943040881947635, |
| "eval_accuracy": 0.5952973044984236, |
| "eval_loss": 0.7496009469032288, |
| "eval_runtime": 279.5031, |
| "eval_samples_per_second": 161.529, |
| "eval_steps_per_second": 20.193, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.12096156790690553, |
| "grad_norm": 11.4940824508667, |
| "learning_rate": 4.697596080232737e-06, |
| "loss": 0.6995, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.12096156790690553, |
| "eval_accuracy": 0.5959564541213064, |
| "eval_loss": 0.7335057258605957, |
| "eval_runtime": 279.459, |
| "eval_samples_per_second": 161.555, |
| "eval_steps_per_second": 20.196, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.12249272699433471, |
| "grad_norm": 8.03961181640625, |
| "learning_rate": 4.693768182514163e-06, |
| "loss": 0.6434, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.12249272699433471, |
| "eval_accuracy": 0.5859463796215819, |
| "eval_loss": 0.728286623954773, |
| "eval_runtime": 280.7002, |
| "eval_samples_per_second": 160.841, |
| "eval_steps_per_second": 20.107, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1240238860817639, |
| "grad_norm": 8.879143714904785, |
| "learning_rate": 4.689940284795591e-06, |
| "loss": 0.7005, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.1240238860817639, |
| "eval_accuracy": 0.5771607003457121, |
| "eval_loss": 0.7147245407104492, |
| "eval_runtime": 276.8558, |
| "eval_samples_per_second": 163.074, |
| "eval_steps_per_second": 20.386, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.12555504516919308, |
| "grad_norm": 7.983788967132568, |
| "learning_rate": 4.686112387077017e-06, |
| "loss": 0.6639, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.12555504516919308, |
| "eval_accuracy": 0.5777222309014216, |
| "eval_loss": 0.6992844939231873, |
| "eval_runtime": 278.3704, |
| "eval_samples_per_second": 162.187, |
| "eval_steps_per_second": 20.275, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.12708620425662226, |
| "grad_norm": 6.068845748901367, |
| "learning_rate": 4.682284489358445e-06, |
| "loss": 0.6211, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.12708620425662226, |
| "eval_accuracy": 0.5828836462560764, |
| "eval_loss": 0.689354419708252, |
| "eval_runtime": 278.7274, |
| "eval_samples_per_second": 161.979, |
| "eval_steps_per_second": 20.249, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.12861736334405144, |
| "grad_norm": 7.345738887786865, |
| "learning_rate": 4.6784565916398715e-06, |
| "loss": 0.6456, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.12861736334405144, |
| "eval_accuracy": 0.5853175045103236, |
| "eval_loss": 0.6859722137451172, |
| "eval_runtime": 279.9849, |
| "eval_samples_per_second": 161.252, |
| "eval_steps_per_second": 20.158, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.13014852243148062, |
| "grad_norm": 11.570878028869629, |
| "learning_rate": 4.674628693921299e-06, |
| "loss": 0.6255, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.13014852243148062, |
| "eval_accuracy": 0.5951967978652435, |
| "eval_loss": 0.6828535199165344, |
| "eval_runtime": 281.2947, |
| "eval_samples_per_second": 160.501, |
| "eval_steps_per_second": 20.064, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.13167968151890982, |
| "grad_norm": 10.029556274414062, |
| "learning_rate": 4.670800796202726e-06, |
| "loss": 0.5931, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.13167968151890982, |
| "eval_accuracy": 0.5980770938804512, |
| "eval_loss": 0.6795242428779602, |
| "eval_runtime": 280.5479, |
| "eval_samples_per_second": 160.928, |
| "eval_steps_per_second": 20.118, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.133210840606339, |
| "grad_norm": 16.333703994750977, |
| "learning_rate": 4.666972898484153e-06, |
| "loss": 0.7352, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.133210840606339, |
| "eval_accuracy": 0.5991164979577339, |
| "eval_loss": 0.6769992709159851, |
| "eval_runtime": 280.0752, |
| "eval_samples_per_second": 161.2, |
| "eval_steps_per_second": 20.152, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.13474199969376818, |
| "grad_norm": 6.884426593780518, |
| "learning_rate": 4.66314500076558e-06, |
| "loss": 0.6425, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.13474199969376818, |
| "eval_accuracy": 0.598591236334548, |
| "eval_loss": 0.6669920086860657, |
| "eval_runtime": 279.9453, |
| "eval_samples_per_second": 161.274, |
| "eval_steps_per_second": 20.161, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.13627315878119736, |
| "grad_norm": 9.960312843322754, |
| "learning_rate": 4.659317103047007e-06, |
| "loss": 0.6905, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.13627315878119736, |
| "eval_accuracy": 0.5981738203145828, |
| "eval_loss": 0.6663030385971069, |
| "eval_runtime": 279.6741, |
| "eval_samples_per_second": 161.431, |
| "eval_steps_per_second": 20.181, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.13780431786862654, |
| "grad_norm": 7.798278331756592, |
| "learning_rate": 4.655489205328434e-06, |
| "loss": 0.5681, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13780431786862654, |
| "eval_accuracy": 0.6002266515565629, |
| "eval_loss": 0.6640587449073792, |
| "eval_runtime": 279.9048, |
| "eval_samples_per_second": 161.298, |
| "eval_steps_per_second": 20.164, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13933547695605572, |
| "grad_norm": 10.989811897277832, |
| "learning_rate": 4.651661307609861e-06, |
| "loss": 0.5809, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.13933547695605572, |
| "eval_accuracy": 0.599174562318326, |
| "eval_loss": 0.6831759810447693, |
| "eval_runtime": 278.8518, |
| "eval_samples_per_second": 161.907, |
| "eval_steps_per_second": 20.24, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.14086663604348493, |
| "grad_norm": 13.403684616088867, |
| "learning_rate": 4.647833409891288e-06, |
| "loss": 0.5984, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.14086663604348493, |
| "eval_accuracy": 0.5973414996782282, |
| "eval_loss": 0.7047386765480042, |
| "eval_runtime": 277.7644, |
| "eval_samples_per_second": 162.541, |
| "eval_steps_per_second": 20.319, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.1423977951309141, |
| "grad_norm": 11.702314376831055, |
| "learning_rate": 4.6440055121727155e-06, |
| "loss": 0.631, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1423977951309141, |
| "eval_accuracy": 0.5949097681018046, |
| "eval_loss": 0.7055184841156006, |
| "eval_runtime": 277.5329, |
| "eval_samples_per_second": 162.676, |
| "eval_steps_per_second": 20.336, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.14392895421834329, |
| "grad_norm": 10.92357063293457, |
| "learning_rate": 4.640177614454142e-06, |
| "loss": 0.6703, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.14392895421834329, |
| "eval_accuracy": 0.5971907868459593, |
| "eval_loss": 0.684637188911438, |
| "eval_runtime": 276.7646, |
| "eval_samples_per_second": 163.128, |
| "eval_steps_per_second": 20.393, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.14546011330577246, |
| "grad_norm": 9.321954727172852, |
| "learning_rate": 4.636349716735569e-06, |
| "loss": 0.6304, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.14546011330577246, |
| "eval_accuracy": 0.5969360568383659, |
| "eval_loss": 0.6831667423248291, |
| "eval_runtime": 276.3791, |
| "eval_samples_per_second": 163.355, |
| "eval_steps_per_second": 20.421, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.14699127239320164, |
| "grad_norm": 11.221244812011719, |
| "learning_rate": 4.632521819016996e-06, |
| "loss": 0.6373, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.14699127239320164, |
| "eval_accuracy": 0.5956472445145944, |
| "eval_loss": 0.6867417097091675, |
| "eval_runtime": 275.868, |
| "eval_samples_per_second": 163.658, |
| "eval_steps_per_second": 20.459, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.14852243148063085, |
| "grad_norm": 13.402386665344238, |
| "learning_rate": 4.628693921298423e-06, |
| "loss": 0.6338, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.14852243148063085, |
| "eval_accuracy": 0.595343976519767, |
| "eval_loss": 0.6871860027313232, |
| "eval_runtime": 276.6694, |
| "eval_samples_per_second": 163.184, |
| "eval_steps_per_second": 20.4, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.15005359056806003, |
| "grad_norm": 6.8687520027160645, |
| "learning_rate": 4.6248660235798504e-06, |
| "loss": 0.6541, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.15005359056806003, |
| "eval_accuracy": 0.5944526067405725, |
| "eval_loss": 0.6828967332839966, |
| "eval_runtime": 277.3111, |
| "eval_samples_per_second": 162.806, |
| "eval_steps_per_second": 20.353, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.1515847496554892, |
| "grad_norm": 8.383277893066406, |
| "learning_rate": 4.621038125861277e-06, |
| "loss": 0.6485, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.1515847496554892, |
| "eval_accuracy": 0.5881514159035716, |
| "eval_loss": 0.6898565292358398, |
| "eval_runtime": 278.3748, |
| "eval_samples_per_second": 162.184, |
| "eval_steps_per_second": 20.275, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.1531159087429184, |
| "grad_norm": 8.281054496765137, |
| "learning_rate": 4.617210228142705e-06, |
| "loss": 0.5877, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1531159087429184, |
| "eval_accuracy": 0.5914452307829261, |
| "eval_loss": 0.6997293829917908, |
| "eval_runtime": 277.7962, |
| "eval_samples_per_second": 162.522, |
| "eval_steps_per_second": 20.317, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.15464706783034757, |
| "grad_norm": 10.8377685546875, |
| "learning_rate": 4.613382330424131e-06, |
| "loss": 0.6585, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.15464706783034757, |
| "eval_accuracy": 0.5923300819872465, |
| "eval_loss": 0.6947582364082336, |
| "eval_runtime": 278.9872, |
| "eval_samples_per_second": 161.828, |
| "eval_steps_per_second": 20.23, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.15617822691777675, |
| "grad_norm": 12.618541717529297, |
| "learning_rate": 4.609554432705559e-06, |
| "loss": 0.6153, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.15617822691777675, |
| "eval_accuracy": 0.5965337184757249, |
| "eval_loss": 0.6904256939888, |
| "eval_runtime": 278.6853, |
| "eval_samples_per_second": 162.004, |
| "eval_steps_per_second": 20.252, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.15770938600520595, |
| "grad_norm": 15.610793113708496, |
| "learning_rate": 4.605726534986985e-06, |
| "loss": 0.6145, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.15770938600520595, |
| "eval_accuracy": 0.5957805907172996, |
| "eval_loss": 0.7072130441665649, |
| "eval_runtime": 278.4706, |
| "eval_samples_per_second": 162.128, |
| "eval_steps_per_second": 20.268, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.15924054509263513, |
| "grad_norm": 10.127962112426758, |
| "learning_rate": 4.601898637268413e-06, |
| "loss": 0.6019, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.15924054509263513, |
| "eval_accuracy": 0.5954627183733269, |
| "eval_loss": 0.6940288543701172, |
| "eval_runtime": 278.8001, |
| "eval_samples_per_second": 161.937, |
| "eval_steps_per_second": 20.244, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.1607717041800643, |
| "grad_norm": 18.335458755493164, |
| "learning_rate": 4.5980707395498395e-06, |
| "loss": 0.5354, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1607717041800643, |
| "eval_accuracy": 0.5993520757982559, |
| "eval_loss": 0.7147676348686218, |
| "eval_runtime": 278.6112, |
| "eval_samples_per_second": 162.047, |
| "eval_steps_per_second": 20.258, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1623028632674935, |
| "grad_norm": 13.370587348937988, |
| "learning_rate": 4.594242841831267e-06, |
| "loss": 0.6977, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.1623028632674935, |
| "eval_accuracy": 0.5989220600629908, |
| "eval_loss": 0.7047263979911804, |
| "eval_runtime": 279.4512, |
| "eval_samples_per_second": 161.56, |
| "eval_steps_per_second": 20.197, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.16383402235492267, |
| "grad_norm": 9.09716510772705, |
| "learning_rate": 4.590414944112694e-06, |
| "loss": 0.6039, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.16383402235492267, |
| "eval_accuracy": 0.5984739258700619, |
| "eval_loss": 0.6938444972038269, |
| "eval_runtime": 280.6964, |
| "eval_samples_per_second": 160.843, |
| "eval_steps_per_second": 20.107, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.16536518144235185, |
| "grad_norm": 11.401485443115234, |
| "learning_rate": 4.586587046394121e-06, |
| "loss": 0.6579, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.16536518144235185, |
| "eval_accuracy": 0.5967512870584059, |
| "eval_loss": 0.6896911263465881, |
| "eval_runtime": 279.247, |
| "eval_samples_per_second": 161.678, |
| "eval_steps_per_second": 20.212, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.16689634052978106, |
| "grad_norm": 10.442956924438477, |
| "learning_rate": 4.582759148675548e-06, |
| "loss": 0.5409, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.16689634052978106, |
| "eval_accuracy": 0.5923961292613636, |
| "eval_loss": 0.7205661535263062, |
| "eval_runtime": 278.5362, |
| "eval_samples_per_second": 162.09, |
| "eval_steps_per_second": 20.263, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.16842749961721024, |
| "grad_norm": 24.116500854492188, |
| "learning_rate": 4.578931250956975e-06, |
| "loss": 0.5717, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.16842749961721024, |
| "eval_accuracy": 0.5918675367336973, |
| "eval_loss": 0.7739020586013794, |
| "eval_runtime": 277.7642, |
| "eval_samples_per_second": 162.541, |
| "eval_steps_per_second": 20.319, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.16995865870463941, |
| "grad_norm": 17.19237518310547, |
| "learning_rate": 4.575103353238402e-06, |
| "loss": 0.7444, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.16995865870463941, |
| "eval_accuracy": 0.5971056953877569, |
| "eval_loss": 0.7259252667427063, |
| "eval_runtime": 280.0491, |
| "eval_samples_per_second": 161.215, |
| "eval_steps_per_second": 20.154, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.1714898177920686, |
| "grad_norm": 12.191926002502441, |
| "learning_rate": 4.571275455519829e-06, |
| "loss": 0.5495, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.1714898177920686, |
| "eval_accuracy": 0.5972413486739816, |
| "eval_loss": 0.7175703644752502, |
| "eval_runtime": 280.1557, |
| "eval_samples_per_second": 161.153, |
| "eval_steps_per_second": 20.146, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.17302097687949777, |
| "grad_norm": 18.153154373168945, |
| "learning_rate": 4.567447557801256e-06, |
| "loss": 0.6002, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.17302097687949777, |
| "eval_accuracy": 0.5982025962498613, |
| "eval_loss": 0.7397978901863098, |
| "eval_runtime": 280.0206, |
| "eval_samples_per_second": 161.231, |
| "eval_steps_per_second": 20.156, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.17455213596692695, |
| "grad_norm": 9.707260131835938, |
| "learning_rate": 4.5636196600826835e-06, |
| "loss": 0.648, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.17455213596692695, |
| "eval_accuracy": 0.5982077501497238, |
| "eval_loss": 0.7219535708427429, |
| "eval_runtime": 279.1283, |
| "eval_samples_per_second": 161.746, |
| "eval_steps_per_second": 20.22, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.17608329505435616, |
| "grad_norm": 13.713787078857422, |
| "learning_rate": 4.55979176236411e-06, |
| "loss": 0.7169, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.17608329505435616, |
| "eval_accuracy": 0.5967283703999645, |
| "eval_loss": 0.7080119848251343, |
| "eval_runtime": 280.0417, |
| "eval_samples_per_second": 161.219, |
| "eval_steps_per_second": 20.154, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.17761445414178534, |
| "grad_norm": 12.010796546936035, |
| "learning_rate": 4.555963864645538e-06, |
| "loss": 0.6007, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.17761445414178534, |
| "eval_accuracy": 0.593027131524565, |
| "eval_loss": 0.686759889125824, |
| "eval_runtime": 277.4036, |
| "eval_samples_per_second": 162.752, |
| "eval_steps_per_second": 20.346, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.17914561322921452, |
| "grad_norm": 11.684185028076172, |
| "learning_rate": 4.552135966926964e-06, |
| "loss": 0.5699, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.17914561322921452, |
| "eval_accuracy": 0.589081225033289, |
| "eval_loss": 0.6952749490737915, |
| "eval_runtime": 278.728, |
| "eval_samples_per_second": 161.979, |
| "eval_steps_per_second": 20.249, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.1806767723166437, |
| "grad_norm": 14.61754035949707, |
| "learning_rate": 4.548308069208391e-06, |
| "loss": 0.6718, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.1806767723166437, |
| "eval_accuracy": 0.5850660157550205, |
| "eval_loss": 0.7031010985374451, |
| "eval_runtime": 279.3758, |
| "eval_samples_per_second": 161.603, |
| "eval_steps_per_second": 20.202, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.18220793140407288, |
| "grad_norm": 8.807073593139648, |
| "learning_rate": 4.544480171489818e-06, |
| "loss": 0.6719, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.18220793140407288, |
| "eval_accuracy": 0.5842983840494343, |
| "eval_loss": 0.6897585988044739, |
| "eval_runtime": 278.4428, |
| "eval_samples_per_second": 162.145, |
| "eval_steps_per_second": 20.27, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.18373909049150206, |
| "grad_norm": 8.141523361206055, |
| "learning_rate": 4.540652273771245e-06, |
| "loss": 0.6139, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.18373909049150206, |
| "eval_accuracy": 0.5901683023224832, |
| "eval_loss": 0.6856178045272827, |
| "eval_runtime": 278.2363, |
| "eval_samples_per_second": 162.265, |
| "eval_steps_per_second": 20.285, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.18527024957893126, |
| "grad_norm": 8.22572135925293, |
| "learning_rate": 4.536824376052672e-06, |
| "loss": 0.6554, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.18527024957893126, |
| "eval_accuracy": 0.5936903334665423, |
| "eval_loss": 0.6899842619895935, |
| "eval_runtime": 279.6454, |
| "eval_samples_per_second": 161.447, |
| "eval_steps_per_second": 20.183, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.18680140866636044, |
| "grad_norm": 10.63383674621582, |
| "learning_rate": 4.532996478334099e-06, |
| "loss": 0.5281, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.18680140866636044, |
| "eval_accuracy": 0.5959264271926515, |
| "eval_loss": 0.7073134183883667, |
| "eval_runtime": 280.0671, |
| "eval_samples_per_second": 161.204, |
| "eval_steps_per_second": 20.152, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.18833256775378962, |
| "grad_norm": 17.710468292236328, |
| "learning_rate": 4.529168580615526e-06, |
| "loss": 0.6106, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.18833256775378962, |
| "eval_accuracy": 0.5961982077899033, |
| "eval_loss": 0.7480549812316895, |
| "eval_runtime": 279.3117, |
| "eval_samples_per_second": 161.64, |
| "eval_steps_per_second": 20.207, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.1898637268412188, |
| "grad_norm": 19.713132858276367, |
| "learning_rate": 4.525340682896953e-06, |
| "loss": 0.6344, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.1898637268412188, |
| "eval_accuracy": 0.5923722417084758, |
| "eval_loss": 0.7380005717277527, |
| "eval_runtime": 279.4642, |
| "eval_samples_per_second": 161.552, |
| "eval_steps_per_second": 20.196, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.19139488592864798, |
| "grad_norm": 17.92173957824707, |
| "learning_rate": 4.52151278517838e-06, |
| "loss": 0.5918, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.19139488592864798, |
| "eval_accuracy": 0.5899982226961699, |
| "eval_loss": 0.7242019772529602, |
| "eval_runtime": 279.4636, |
| "eval_samples_per_second": 161.552, |
| "eval_steps_per_second": 20.196, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.19292604501607716, |
| "grad_norm": 12.8328857421875, |
| "learning_rate": 4.5176848874598075e-06, |
| "loss": 0.6847, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.19292604501607716, |
| "eval_accuracy": 0.5958907152376721, |
| "eval_loss": 0.6952394843101501, |
| "eval_runtime": 276.2146, |
| "eval_samples_per_second": 163.453, |
| "eval_steps_per_second": 20.433, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.19445720410350636, |
| "grad_norm": 8.0042142868042, |
| "learning_rate": 4.513856989741234e-06, |
| "loss": 0.6312, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.19445720410350636, |
| "eval_accuracy": 0.5953926887841323, |
| "eval_loss": 0.6836423873901367, |
| "eval_runtime": 279.4491, |
| "eval_samples_per_second": 161.561, |
| "eval_steps_per_second": 20.197, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.19598836319093554, |
| "grad_norm": 10.186333656311035, |
| "learning_rate": 4.510029092022662e-06, |
| "loss": 0.6135, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.19598836319093554, |
| "eval_accuracy": 0.5948660962329148, |
| "eval_loss": 0.6878789067268372, |
| "eval_runtime": 276.736, |
| "eval_samples_per_second": 163.145, |
| "eval_steps_per_second": 20.395, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.19751952227836472, |
| "grad_norm": 9.086492538452148, |
| "learning_rate": 4.506201194304088e-06, |
| "loss": 0.6481, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.19751952227836472, |
| "eval_accuracy": 0.5944503735325507, |
| "eval_loss": 0.6821103692054749, |
| "eval_runtime": 274.4173, |
| "eval_samples_per_second": 164.523, |
| "eval_steps_per_second": 20.567, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1990506813657939, |
| "grad_norm": 8.008011817932129, |
| "learning_rate": 4.502373296585516e-06, |
| "loss": 0.6022, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.1990506813657939, |
| "eval_accuracy": 0.5919674427913804, |
| "eval_loss": 0.6874357461929321, |
| "eval_runtime": 273.5543, |
| "eval_samples_per_second": 165.042, |
| "eval_steps_per_second": 20.632, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.20058184045322308, |
| "grad_norm": 9.115822792053223, |
| "learning_rate": 4.498545398866942e-06, |
| "loss": 0.5877, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.20058184045322308, |
| "eval_accuracy": 0.5949752993012595, |
| "eval_loss": 0.696998655796051, |
| "eval_runtime": 277.1667, |
| "eval_samples_per_second": 162.891, |
| "eval_steps_per_second": 20.363, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.20211299954065226, |
| "grad_norm": 14.700295448303223, |
| "learning_rate": 4.49471750114837e-06, |
| "loss": 0.6563, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.20211299954065226, |
| "eval_accuracy": 0.5916631504141775, |
| "eval_loss": 0.7209578156471252, |
| "eval_runtime": 277.3197, |
| "eval_samples_per_second": 162.801, |
| "eval_steps_per_second": 20.352, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.20364415862808147, |
| "grad_norm": 12.265641212463379, |
| "learning_rate": 4.4908896034297965e-06, |
| "loss": 0.6844, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.20364415862808147, |
| "eval_accuracy": 0.5921450151057401, |
| "eval_loss": 0.7018990516662598, |
| "eval_runtime": 275.9386, |
| "eval_samples_per_second": 163.616, |
| "eval_steps_per_second": 20.454, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.20517531771551065, |
| "grad_norm": 14.695290565490723, |
| "learning_rate": 4.487061705711224e-06, |
| "loss": 0.6242, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.20517531771551065, |
| "eval_accuracy": 0.5874938869870626, |
| "eval_loss": 0.7059697508811951, |
| "eval_runtime": 275.3787, |
| "eval_samples_per_second": 163.949, |
| "eval_steps_per_second": 20.495, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.20670647680293983, |
| "grad_norm": 17.197837829589844, |
| "learning_rate": 4.483233807992651e-06, |
| "loss": 0.6532, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.20670647680293983, |
| "eval_accuracy": 0.5832462130480237, |
| "eval_loss": 0.7054564952850342, |
| "eval_runtime": 278.9798, |
| "eval_samples_per_second": 161.833, |
| "eval_steps_per_second": 20.231, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.208237635890369, |
| "grad_norm": 10.455153465270996, |
| "learning_rate": 4.479405910274078e-06, |
| "loss": 0.6235, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.208237635890369, |
| "eval_accuracy": 0.5825216811207472, |
| "eval_loss": 0.7006902098655701, |
| "eval_runtime": 278.2863, |
| "eval_samples_per_second": 162.236, |
| "eval_steps_per_second": 20.281, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.20976879497779818, |
| "grad_norm": 11.930909156799316, |
| "learning_rate": 4.475578012555505e-06, |
| "loss": 0.5851, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.20976879497779818, |
| "eval_accuracy": 0.5853447126283504, |
| "eval_loss": 0.7129948139190674, |
| "eval_runtime": 277.2023, |
| "eval_samples_per_second": 162.87, |
| "eval_steps_per_second": 20.361, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.21129995406522736, |
| "grad_norm": 10.416621208190918, |
| "learning_rate": 4.471750114836932e-06, |
| "loss": 0.6387, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.21129995406522736, |
| "eval_accuracy": 0.5875080603917906, |
| "eval_loss": 0.7203475832939148, |
| "eval_runtime": 276.7485, |
| "eval_samples_per_second": 163.137, |
| "eval_steps_per_second": 20.394, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.21283111315265657, |
| "grad_norm": 14.316187858581543, |
| "learning_rate": 4.467922217118359e-06, |
| "loss": 0.5589, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.21283111315265657, |
| "eval_accuracy": 0.5918902114052229, |
| "eval_loss": 0.7276438474655151, |
| "eval_runtime": 277.5697, |
| "eval_samples_per_second": 162.655, |
| "eval_steps_per_second": 20.334, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.21436227224008575, |
| "grad_norm": 11.353260040283203, |
| "learning_rate": 4.4640943193997856e-06, |
| "loss": 0.5305, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.21436227224008575, |
| "eval_accuracy": 0.5941167335891918, |
| "eval_loss": 0.7376775145530701, |
| "eval_runtime": 277.7475, |
| "eval_samples_per_second": 162.551, |
| "eval_steps_per_second": 20.321, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.21589343132751493, |
| "grad_norm": 25.445398330688477, |
| "learning_rate": 4.460266421681213e-06, |
| "loss": 0.6585, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.21589343132751493, |
| "eval_accuracy": 0.5962537174308669, |
| "eval_loss": 0.7421597242355347, |
| "eval_runtime": 276.6197, |
| "eval_samples_per_second": 163.213, |
| "eval_steps_per_second": 20.403, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.2174245904149441, |
| "grad_norm": 12.295394897460938, |
| "learning_rate": 4.45643852396264e-06, |
| "loss": 0.6483, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.2174245904149441, |
| "eval_accuracy": 0.5987611837577426, |
| "eval_loss": 0.6953349709510803, |
| "eval_runtime": 278.2037, |
| "eval_samples_per_second": 162.284, |
| "eval_steps_per_second": 20.287, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.2189557495023733, |
| "grad_norm": 11.241786003112793, |
| "learning_rate": 4.452610626244067e-06, |
| "loss": 0.5395, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.2189557495023733, |
| "eval_accuracy": 0.5976324790121263, |
| "eval_loss": 0.6999543309211731, |
| "eval_runtime": 279.3688, |
| "eval_samples_per_second": 161.607, |
| "eval_steps_per_second": 20.203, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.2204869085898025, |
| "grad_norm": 14.92603874206543, |
| "learning_rate": 4.448782728525494e-06, |
| "loss": 0.619, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.2204869085898025, |
| "eval_accuracy": 0.5938706670809107, |
| "eval_loss": 0.7095398306846619, |
| "eval_runtime": 280.0278, |
| "eval_samples_per_second": 161.227, |
| "eval_steps_per_second": 20.155, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.22201806767723167, |
| "grad_norm": 20.692684173583984, |
| "learning_rate": 4.444954830806921e-06, |
| "loss": 0.4735, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.22201806767723167, |
| "eval_accuracy": 0.5908092395766303, |
| "eval_loss": 0.734937310218811, |
| "eval_runtime": 279.9051, |
| "eval_samples_per_second": 161.298, |
| "eval_steps_per_second": 20.164, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.22354922676466085, |
| "grad_norm": 17.677717208862305, |
| "learning_rate": 4.441126933088348e-06, |
| "loss": 0.6086, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.22354922676466085, |
| "eval_accuracy": 0.595049395049395, |
| "eval_loss": 0.7369093894958496, |
| "eval_runtime": 280.5701, |
| "eval_samples_per_second": 160.915, |
| "eval_steps_per_second": 20.116, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.22508038585209003, |
| "grad_norm": 15.074790954589844, |
| "learning_rate": 4.4372990353697755e-06, |
| "loss": 0.5995, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.22508038585209003, |
| "eval_accuracy": 0.5964959030044634, |
| "eval_loss": 0.71633380651474, |
| "eval_runtime": 280.9412, |
| "eval_samples_per_second": 160.703, |
| "eval_steps_per_second": 20.09, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.2266115449395192, |
| "grad_norm": 14.004373550415039, |
| "learning_rate": 4.433471137651202e-06, |
| "loss": 0.6036, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.2266115449395192, |
| "eval_accuracy": 0.5984582574310214, |
| "eval_loss": 0.7075589895248413, |
| "eval_runtime": 279.8252, |
| "eval_samples_per_second": 161.344, |
| "eval_steps_per_second": 20.17, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.2281427040269484, |
| "grad_norm": 12.327754974365234, |
| "learning_rate": 4.42964323993263e-06, |
| "loss": 0.6168, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.2281427040269484, |
| "eval_accuracy": 0.5992044974779459, |
| "eval_loss": 0.692619800567627, |
| "eval_runtime": 280.1157, |
| "eval_samples_per_second": 161.176, |
| "eval_steps_per_second": 20.149, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.2296738631143776, |
| "grad_norm": 10.927477836608887, |
| "learning_rate": 4.425815342214056e-06, |
| "loss": 0.5584, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2296738631143776, |
| "eval_accuracy": 0.5985860696738623, |
| "eval_loss": 0.7029620409011841, |
| "eval_runtime": 278.9125, |
| "eval_samples_per_second": 161.872, |
| "eval_steps_per_second": 20.236, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.23120502220180678, |
| "grad_norm": 21.215038299560547, |
| "learning_rate": 4.421987444495484e-06, |
| "loss": 0.6836, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.23120502220180678, |
| "eval_accuracy": 0.5978149842341343, |
| "eval_loss": 0.7012072205543518, |
| "eval_runtime": 279.942, |
| "eval_samples_per_second": 161.276, |
| "eval_steps_per_second": 20.161, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.23273618128923595, |
| "grad_norm": 18.26300621032715, |
| "learning_rate": 4.41815954677691e-06, |
| "loss": 0.5803, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.23273618128923595, |
| "eval_accuracy": 0.593573744282098, |
| "eval_loss": 0.7073465585708618, |
| "eval_runtime": 278.8484, |
| "eval_samples_per_second": 161.909, |
| "eval_steps_per_second": 20.24, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.23426734037666513, |
| "grad_norm": 15.730330467224121, |
| "learning_rate": 4.414331649058338e-06, |
| "loss": 0.6735, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.23426734037666513, |
| "eval_accuracy": 0.5931864173097022, |
| "eval_loss": 0.694299578666687, |
| "eval_runtime": 278.339, |
| "eval_samples_per_second": 162.205, |
| "eval_steps_per_second": 20.277, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.2357984994640943, |
| "grad_norm": 10.599174499511719, |
| "learning_rate": 4.4105037513397645e-06, |
| "loss": 0.6482, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.2357984994640943, |
| "eval_accuracy": 0.5938021401081177, |
| "eval_loss": 0.6790253520011902, |
| "eval_runtime": 279.1453, |
| "eval_samples_per_second": 161.737, |
| "eval_steps_per_second": 20.219, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.2373296585515235, |
| "grad_norm": 9.95355224609375, |
| "learning_rate": 4.406675853621192e-06, |
| "loss": 0.6667, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2373296585515235, |
| "eval_accuracy": 0.5938640206460799, |
| "eval_loss": 0.6704154014587402, |
| "eval_runtime": 279.4535, |
| "eval_samples_per_second": 161.558, |
| "eval_steps_per_second": 20.197, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.2388608176389527, |
| "grad_norm": 9.302884101867676, |
| "learning_rate": 4.402847955902619e-06, |
| "loss": 0.604, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.2388608176389527, |
| "eval_accuracy": 0.5950828790744243, |
| "eval_loss": 0.6687915921211243, |
| "eval_runtime": 277.3792, |
| "eval_samples_per_second": 162.766, |
| "eval_steps_per_second": 20.348, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.24039197672638188, |
| "grad_norm": 8.783987998962402, |
| "learning_rate": 4.399020058184046e-06, |
| "loss": 0.5914, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.24039197672638188, |
| "eval_accuracy": 0.5949302294527408, |
| "eval_loss": 0.6737338304519653, |
| "eval_runtime": 277.8845, |
| "eval_samples_per_second": 162.47, |
| "eval_steps_per_second": 20.311, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.24192313581381106, |
| "grad_norm": 8.757774353027344, |
| "learning_rate": 4.395192160465473e-06, |
| "loss": 0.629, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.24192313581381106, |
| "eval_accuracy": 0.5952777963049423, |
| "eval_loss": 0.6752948760986328, |
| "eval_runtime": 279.1759, |
| "eval_samples_per_second": 161.719, |
| "eval_steps_per_second": 20.217, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.24345429490124024, |
| "grad_norm": 8.354512214660645, |
| "learning_rate": 4.3913642627469e-06, |
| "loss": 0.6632, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.24345429490124024, |
| "eval_accuracy": 0.5962355663336819, |
| "eval_loss": 0.6745610237121582, |
| "eval_runtime": 279.0496, |
| "eval_samples_per_second": 161.792, |
| "eval_steps_per_second": 20.226, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.24498545398866942, |
| "grad_norm": 13.983068466186523, |
| "learning_rate": 4.387536365028327e-06, |
| "loss": 0.6018, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.24498545398866942, |
| "eval_accuracy": 0.5935825309643993, |
| "eval_loss": 0.687160849571228, |
| "eval_runtime": 280.0465, |
| "eval_samples_per_second": 161.216, |
| "eval_steps_per_second": 20.154, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2465166130760986, |
| "grad_norm": 8.103803634643555, |
| "learning_rate": 4.383708467309754e-06, |
| "loss": 0.6217, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.2465166130760986, |
| "eval_accuracy": 0.5935380578595094, |
| "eval_loss": 0.6901026368141174, |
| "eval_runtime": 280.8048, |
| "eval_samples_per_second": 160.781, |
| "eval_steps_per_second": 20.099, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.2480477721635278, |
| "grad_norm": 9.161907196044922, |
| "learning_rate": 4.379880569591181e-06, |
| "loss": 0.6106, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.2480477721635278, |
| "eval_accuracy": 0.5978512323160423, |
| "eval_loss": 0.6946441531181335, |
| "eval_runtime": 278.4057, |
| "eval_samples_per_second": 162.166, |
| "eval_steps_per_second": 20.273, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.24957893125095698, |
| "grad_norm": 7.822539329528809, |
| "learning_rate": 4.3760526718726085e-06, |
| "loss": 0.693, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.24957893125095698, |
| "eval_accuracy": 0.598020462633452, |
| "eval_loss": 0.6881946921348572, |
| "eval_runtime": 277.0846, |
| "eval_samples_per_second": 162.939, |
| "eval_steps_per_second": 20.369, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.25111009033838616, |
| "grad_norm": 8.115804672241211, |
| "learning_rate": 4.372224774154035e-06, |
| "loss": 0.6638, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.25111009033838616, |
| "eval_accuracy": 0.5966775781058632, |
| "eval_loss": 0.6835174560546875, |
| "eval_runtime": 277.1947, |
| "eval_samples_per_second": 162.875, |
| "eval_steps_per_second": 20.361, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.25264124942581534, |
| "grad_norm": 8.402555465698242, |
| "learning_rate": 4.368396876435462e-06, |
| "loss": 0.5649, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.25264124942581534, |
| "eval_accuracy": 0.5948628917378918, |
| "eval_loss": 0.6932902336120605, |
| "eval_runtime": 278.2801, |
| "eval_samples_per_second": 162.239, |
| "eval_steps_per_second": 20.282, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2541724085132445, |
| "grad_norm": 9.621747970581055, |
| "learning_rate": 4.3645689787168885e-06, |
| "loss": 0.6463, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.2541724085132445, |
| "eval_accuracy": 0.593183788710789, |
| "eval_loss": 0.6967864036560059, |
| "eval_runtime": 277.2248, |
| "eval_samples_per_second": 162.857, |
| "eval_steps_per_second": 20.359, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.2557035676006737, |
| "grad_norm": 17.633258819580078, |
| "learning_rate": 4.360741080998316e-06, |
| "loss": 0.5943, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.2557035676006737, |
| "eval_accuracy": 0.591132348038671, |
| "eval_loss": 0.7154887318611145, |
| "eval_runtime": 277.3036, |
| "eval_samples_per_second": 162.811, |
| "eval_steps_per_second": 20.353, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "grad_norm": 16.508804321289062, |
| "learning_rate": 4.356913183279743e-06, |
| "loss": 0.5856, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "eval_accuracy": 0.5927422936839299, |
| "eval_loss": 0.7325928211212158, |
| "eval_runtime": 279.5671, |
| "eval_samples_per_second": 161.493, |
| "eval_steps_per_second": 20.188, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.25876588577553206, |
| "grad_norm": 25.668621063232422, |
| "learning_rate": 4.35308528556117e-06, |
| "loss": 0.6454, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.25876588577553206, |
| "eval_accuracy": 0.5932041424063291, |
| "eval_loss": 0.7432768940925598, |
| "eval_runtime": 281.3082, |
| "eval_samples_per_second": 160.493, |
| "eval_steps_per_second": 20.063, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.26029704486296124, |
| "grad_norm": 16.12009620666504, |
| "learning_rate": 4.349257387842597e-06, |
| "loss": 0.597, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.26029704486296124, |
| "eval_accuracy": 0.5940829190340909, |
| "eval_loss": 0.7180017232894897, |
| "eval_runtime": 281.0285, |
| "eval_samples_per_second": 160.653, |
| "eval_steps_per_second": 20.083, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.26182820395039047, |
| "grad_norm": 17.72113609313965, |
| "learning_rate": 4.345429490124024e-06, |
| "loss": 0.624, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.26182820395039047, |
| "eval_accuracy": 0.5941739381424987, |
| "eval_loss": 0.7116958498954773, |
| "eval_runtime": 279.9549, |
| "eval_samples_per_second": 161.269, |
| "eval_steps_per_second": 20.16, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.26335936303781965, |
| "grad_norm": 14.417743682861328, |
| "learning_rate": 4.341601592405451e-06, |
| "loss": 0.5733, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.26335936303781965, |
| "eval_accuracy": 0.5937305745493295, |
| "eval_loss": 0.7162705063819885, |
| "eval_runtime": 278.2449, |
| "eval_samples_per_second": 162.26, |
| "eval_steps_per_second": 20.284, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.26489052212524883, |
| "grad_norm": 18.933935165405273, |
| "learning_rate": 4.337773694686878e-06, |
| "loss": 0.5191, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.26489052212524883, |
| "eval_accuracy": 0.5937839937839938, |
| "eval_loss": 0.7459293603897095, |
| "eval_runtime": 280.6883, |
| "eval_samples_per_second": 160.847, |
| "eval_steps_per_second": 20.108, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.266421681212678, |
| "grad_norm": 21.37299346923828, |
| "learning_rate": 4.333945796968305e-06, |
| "loss": 0.6065, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.266421681212678, |
| "eval_accuracy": 0.5947049555047602, |
| "eval_loss": 0.7559405565261841, |
| "eval_runtime": 279.6788, |
| "eval_samples_per_second": 161.428, |
| "eval_steps_per_second": 20.18, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.2679528403001072, |
| "grad_norm": 17.455568313598633, |
| "learning_rate": 4.3301178992497325e-06, |
| "loss": 0.641, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.2679528403001072, |
| "eval_accuracy": 0.5933846529272134, |
| "eval_loss": 0.7480175495147705, |
| "eval_runtime": 277.648, |
| "eval_samples_per_second": 162.609, |
| "eval_steps_per_second": 20.328, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.26948399938753637, |
| "grad_norm": 14.3558349609375, |
| "learning_rate": 4.326290001531159e-06, |
| "loss": 0.6186, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.26948399938753637, |
| "eval_accuracy": 0.5932022659113628, |
| "eval_loss": 0.7287299633026123, |
| "eval_runtime": 281.2354, |
| "eval_samples_per_second": 160.535, |
| "eval_steps_per_second": 20.069, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.27101515847496555, |
| "grad_norm": 10.249687194824219, |
| "learning_rate": 4.322462103812587e-06, |
| "loss": 0.6375, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.27101515847496555, |
| "eval_accuracy": 0.5906849680170576, |
| "eval_loss": 0.7209318280220032, |
| "eval_runtime": 279.922, |
| "eval_samples_per_second": 161.288, |
| "eval_steps_per_second": 20.163, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.2725463175623947, |
| "grad_norm": 13.502520561218262, |
| "learning_rate": 4.318634206094013e-06, |
| "loss": 0.6078, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.2725463175623947, |
| "eval_accuracy": 0.590238919975131, |
| "eval_loss": 0.713438868522644, |
| "eval_runtime": 281.1596, |
| "eval_samples_per_second": 160.578, |
| "eval_steps_per_second": 20.074, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.2740774766498239, |
| "grad_norm": 8.710155487060547, |
| "learning_rate": 4.314806308375441e-06, |
| "loss": 0.6112, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.2740774766498239, |
| "eval_accuracy": 0.5918866080156403, |
| "eval_loss": 0.7061217427253723, |
| "eval_runtime": 278.6715, |
| "eval_samples_per_second": 162.012, |
| "eval_steps_per_second": 20.253, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.2756086357372531, |
| "grad_norm": 12.963603973388672, |
| "learning_rate": 4.310978410656867e-06, |
| "loss": 0.6836, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2756086357372531, |
| "eval_accuracy": 0.589742449179307, |
| "eval_loss": 0.7048377394676208, |
| "eval_runtime": 280.3976, |
| "eval_samples_per_second": 161.014, |
| "eval_steps_per_second": 20.129, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.27713979482468226, |
| "grad_norm": 18.37137794494629, |
| "learning_rate": 4.307150512938295e-06, |
| "loss": 0.5662, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.27713979482468226, |
| "eval_accuracy": 0.5890812901504879, |
| "eval_loss": 0.7051539421081543, |
| "eval_runtime": 280.7367, |
| "eval_samples_per_second": 160.82, |
| "eval_steps_per_second": 20.104, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.27867095391211144, |
| "grad_norm": 8.255058288574219, |
| "learning_rate": 4.3033226152197215e-06, |
| "loss": 0.6022, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.27867095391211144, |
| "eval_accuracy": 0.5886953430501244, |
| "eval_loss": 0.7059171199798584, |
| "eval_runtime": 278.1826, |
| "eval_samples_per_second": 162.296, |
| "eval_steps_per_second": 20.289, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.2802021129995407, |
| "grad_norm": 12.834601402282715, |
| "learning_rate": 4.299494717501149e-06, |
| "loss": 0.5255, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.2802021129995407, |
| "eval_accuracy": 0.5897788828700826, |
| "eval_loss": 0.724184513092041, |
| "eval_runtime": 281.9681, |
| "eval_samples_per_second": 160.117, |
| "eval_steps_per_second": 20.016, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.28173327208696985, |
| "grad_norm": 13.296520233154297, |
| "learning_rate": 4.295666819782576e-06, |
| "loss": 0.5974, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.28173327208696985, |
| "eval_accuracy": 0.5901501208506109, |
| "eval_loss": 0.7438974380493164, |
| "eval_runtime": 279.7112, |
| "eval_samples_per_second": 161.409, |
| "eval_steps_per_second": 20.178, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.28326443117439903, |
| "grad_norm": 14.873211860656738, |
| "learning_rate": 4.291838922064003e-06, |
| "loss": 0.6871, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.28326443117439903, |
| "eval_accuracy": 0.5945105702611476, |
| "eval_loss": 0.7173364162445068, |
| "eval_runtime": 278.1451, |
| "eval_samples_per_second": 162.318, |
| "eval_steps_per_second": 20.292, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2847955902618282, |
| "grad_norm": 11.980530738830566, |
| "learning_rate": 4.28801102434543e-06, |
| "loss": 0.5518, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2847955902618282, |
| "eval_accuracy": 0.5945075210522808, |
| "eval_loss": 0.7088351845741272, |
| "eval_runtime": 281.4397, |
| "eval_samples_per_second": 160.418, |
| "eval_steps_per_second": 20.054, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2863267493492574, |
| "grad_norm": 14.939533233642578, |
| "learning_rate": 4.2841831266268565e-06, |
| "loss": 0.5496, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.2863267493492574, |
| "eval_accuracy": 0.5940158599702348, |
| "eval_loss": 0.7212331295013428, |
| "eval_runtime": 279.5218, |
| "eval_samples_per_second": 161.519, |
| "eval_steps_per_second": 20.192, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.28785790843668657, |
| "grad_norm": 15.159697532653809, |
| "learning_rate": 4.280355228908284e-06, |
| "loss": 0.5738, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.28785790843668657, |
| "eval_accuracy": 0.5914114513981358, |
| "eval_loss": 0.7385027408599854, |
| "eval_runtime": 279.6403, |
| "eval_samples_per_second": 161.45, |
| "eval_steps_per_second": 20.183, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.28938906752411575, |
| "grad_norm": 10.097131729125977, |
| "learning_rate": 4.276527331189711e-06, |
| "loss": 0.5, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.28938906752411575, |
| "eval_accuracy": 0.5934275634055961, |
| "eval_loss": 0.7404712438583374, |
| "eval_runtime": 278.9074, |
| "eval_samples_per_second": 161.875, |
| "eval_steps_per_second": 20.236, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.29092022661154493, |
| "grad_norm": 17.089492797851562, |
| "learning_rate": 4.272699433471138e-06, |
| "loss": 0.6033, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.29092022661154493, |
| "eval_accuracy": 0.5967577397321032, |
| "eval_loss": 0.7266111373901367, |
| "eval_runtime": 280.8228, |
| "eval_samples_per_second": 160.77, |
| "eval_steps_per_second": 20.098, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2924513856989741, |
| "grad_norm": 14.520054817199707, |
| "learning_rate": 4.268871535752565e-06, |
| "loss": 0.5852, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.2924513856989741, |
| "eval_accuracy": 0.5944566495794776, |
| "eval_loss": 0.7083961367607117, |
| "eval_runtime": 278.472, |
| "eval_samples_per_second": 162.128, |
| "eval_steps_per_second": 20.268, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.2939825447864033, |
| "grad_norm": 16.736730575561523, |
| "learning_rate": 4.265043638033992e-06, |
| "loss": 0.6374, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.2939825447864033, |
| "eval_accuracy": 0.5979578246392897, |
| "eval_loss": 0.6861377358436584, |
| "eval_runtime": 279.2609, |
| "eval_samples_per_second": 161.67, |
| "eval_steps_per_second": 20.21, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.29551370387383247, |
| "grad_norm": 9.897313117980957, |
| "learning_rate": 4.261215740315419e-06, |
| "loss": 0.5925, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.29551370387383247, |
| "eval_accuracy": 0.5983366600133068, |
| "eval_loss": 0.6827172636985779, |
| "eval_runtime": 278.8073, |
| "eval_samples_per_second": 161.933, |
| "eval_steps_per_second": 20.243, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.2970448629612617, |
| "grad_norm": 7.9534478187561035, |
| "learning_rate": 4.257387842596846e-06, |
| "loss": 0.5634, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.2970448629612617, |
| "eval_accuracy": 0.5988369512140986, |
| "eval_loss": 0.684248685836792, |
| "eval_runtime": 277.3928, |
| "eval_samples_per_second": 162.758, |
| "eval_steps_per_second": 20.347, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.2985760220486909, |
| "grad_norm": 13.70839786529541, |
| "learning_rate": 4.253559944878273e-06, |
| "loss": 0.5783, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2985760220486909, |
| "eval_accuracy": 0.597880548042389, |
| "eval_loss": 0.705771267414093, |
| "eval_runtime": 277.0321, |
| "eval_samples_per_second": 162.97, |
| "eval_steps_per_second": 20.373, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.30010718113612006, |
| "grad_norm": 18.95427703857422, |
| "learning_rate": 4.2497320471597005e-06, |
| "loss": 0.7029, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.30010718113612006, |
| "eval_accuracy": 0.5943931866572036, |
| "eval_loss": 0.7076370716094971, |
| "eval_runtime": 279.5179, |
| "eval_samples_per_second": 161.521, |
| "eval_steps_per_second": 20.192, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.30163834022354924, |
| "grad_norm": 12.317983627319336, |
| "learning_rate": 4.245904149441127e-06, |
| "loss": 0.562, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.30163834022354924, |
| "eval_accuracy": 0.5903159950292917, |
| "eval_loss": 0.6966370344161987, |
| "eval_runtime": 278.3564, |
| "eval_samples_per_second": 162.195, |
| "eval_steps_per_second": 20.276, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.3031694993109784, |
| "grad_norm": 18.507949829101562, |
| "learning_rate": 4.242076251722555e-06, |
| "loss": 0.6133, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.3031694993109784, |
| "eval_accuracy": 0.5898846495119787, |
| "eval_loss": 0.697861909866333, |
| "eval_runtime": 276.7732, |
| "eval_samples_per_second": 163.123, |
| "eval_steps_per_second": 20.392, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.3047006583984076, |
| "grad_norm": 10.3158597946167, |
| "learning_rate": 4.238248354003981e-06, |
| "loss": 0.5549, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.3047006583984076, |
| "eval_accuracy": 0.5933229813664597, |
| "eval_loss": 0.6916565299034119, |
| "eval_runtime": 279.0507, |
| "eval_samples_per_second": 161.791, |
| "eval_steps_per_second": 20.226, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.3062318174858368, |
| "grad_norm": 17.062057495117188, |
| "learning_rate": 4.234420456285409e-06, |
| "loss": 0.6238, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3062318174858368, |
| "eval_accuracy": 0.5943655723158828, |
| "eval_loss": 0.7041603326797485, |
| "eval_runtime": 280.3627, |
| "eval_samples_per_second": 161.034, |
| "eval_steps_per_second": 20.131, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.30776297657326596, |
| "grad_norm": 7.667088985443115, |
| "learning_rate": 4.230592558566835e-06, |
| "loss": 0.6945, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.30776297657326596, |
| "eval_accuracy": 0.5923155464796236, |
| "eval_loss": 0.695047914981842, |
| "eval_runtime": 282.0476, |
| "eval_samples_per_second": 160.072, |
| "eval_steps_per_second": 20.011, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.30929413566069514, |
| "grad_norm": 13.864084243774414, |
| "learning_rate": 4.226764660848263e-06, |
| "loss": 0.6421, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.30929413566069514, |
| "eval_accuracy": 0.5927388930806444, |
| "eval_loss": 0.6951669454574585, |
| "eval_runtime": 282.3074, |
| "eval_samples_per_second": 159.925, |
| "eval_steps_per_second": 19.992, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.3108252947481243, |
| "grad_norm": 9.97375202178955, |
| "learning_rate": 4.2229367631296895e-06, |
| "loss": 0.5758, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.3108252947481243, |
| "eval_accuracy": 0.5917714488825698, |
| "eval_loss": 0.6952547430992126, |
| "eval_runtime": 281.5694, |
| "eval_samples_per_second": 160.344, |
| "eval_steps_per_second": 20.045, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.3123564538355535, |
| "grad_norm": 7.828521251678467, |
| "learning_rate": 4.219108865411117e-06, |
| "loss": 0.6181, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.3123564538355535, |
| "eval_accuracy": 0.5886520097712636, |
| "eval_loss": 0.6984680891036987, |
| "eval_runtime": 278.5618, |
| "eval_samples_per_second": 162.075, |
| "eval_steps_per_second": 20.261, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.3138876129229827, |
| "grad_norm": 10.627179145812988, |
| "learning_rate": 4.215280967692544e-06, |
| "loss": 0.6605, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.3138876129229827, |
| "eval_accuracy": 0.5845493371296379, |
| "eval_loss": 0.6960271000862122, |
| "eval_runtime": 278.4801, |
| "eval_samples_per_second": 162.123, |
| "eval_steps_per_second": 20.267, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.3154187720104119, |
| "grad_norm": 6.945221424102783, |
| "learning_rate": 4.211453069973971e-06, |
| "loss": 0.6138, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.3154187720104119, |
| "eval_accuracy": 0.5852839088643645, |
| "eval_loss": 0.6904491782188416, |
| "eval_runtime": 276.524, |
| "eval_samples_per_second": 163.27, |
| "eval_steps_per_second": 20.411, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.3169499310978411, |
| "grad_norm": 13.37806224822998, |
| "learning_rate": 4.207625172255398e-06, |
| "loss": 0.5744, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.3169499310978411, |
| "eval_accuracy": 0.5887589069679682, |
| "eval_loss": 0.6954379677772522, |
| "eval_runtime": 275.6555, |
| "eval_samples_per_second": 163.784, |
| "eval_steps_per_second": 20.475, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.31848109018527027, |
| "grad_norm": 11.931571006774902, |
| "learning_rate": 4.203797274536825e-06, |
| "loss": 0.5473, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.31848109018527027, |
| "eval_accuracy": 0.589274223967694, |
| "eval_loss": 0.7085046172142029, |
| "eval_runtime": 276.9102, |
| "eval_samples_per_second": 163.042, |
| "eval_steps_per_second": 20.382, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.32001224927269944, |
| "grad_norm": 17.946001052856445, |
| "learning_rate": 4.199969376818252e-06, |
| "loss": 0.6201, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.32001224927269944, |
| "eval_accuracy": 0.5832519747936452, |
| "eval_loss": 0.7224695086479187, |
| "eval_runtime": 278.3015, |
| "eval_samples_per_second": 162.227, |
| "eval_steps_per_second": 20.28, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "grad_norm": 9.482304573059082, |
| "learning_rate": 4.1961414790996794e-06, |
| "loss": 0.5663, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "eval_accuracy": 0.5839268676917615, |
| "eval_loss": 0.7226927876472473, |
| "eval_runtime": 280.269, |
| "eval_samples_per_second": 161.088, |
| "eval_steps_per_second": 20.138, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3230745674475578, |
| "grad_norm": 10.172694206237793, |
| "learning_rate": 4.192313581381106e-06, |
| "loss": 0.612, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.3230745674475578, |
| "eval_accuracy": 0.5900024405937299, |
| "eval_loss": 0.7088232040405273, |
| "eval_runtime": 280.1784, |
| "eval_samples_per_second": 161.14, |
| "eval_steps_per_second": 20.144, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.324605726534987, |
| "grad_norm": 11.057249069213867, |
| "learning_rate": 4.188485683662533e-06, |
| "loss": 0.5937, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.324605726534987, |
| "eval_accuracy": 0.5903734771320152, |
| "eval_loss": 0.7097996473312378, |
| "eval_runtime": 281.3342, |
| "eval_samples_per_second": 160.478, |
| "eval_steps_per_second": 20.062, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.32613688562241616, |
| "grad_norm": 12.521862030029297, |
| "learning_rate": 4.184657785943959e-06, |
| "loss": 0.6988, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.32613688562241616, |
| "eval_accuracy": 0.5909282466452257, |
| "eval_loss": 0.6956667900085449, |
| "eval_runtime": 280.7428, |
| "eval_samples_per_second": 160.816, |
| "eval_steps_per_second": 20.104, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.32766804470984534, |
| "grad_norm": 13.895928382873535, |
| "learning_rate": 4.180829888225387e-06, |
| "loss": 0.4822, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.32766804470984534, |
| "eval_accuracy": 0.5896343627973021, |
| "eval_loss": 0.7213166356086731, |
| "eval_runtime": 281.166, |
| "eval_samples_per_second": 160.574, |
| "eval_steps_per_second": 20.074, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.3291992037972745, |
| "grad_norm": 11.10944938659668, |
| "learning_rate": 4.1770019905068135e-06, |
| "loss": 0.5878, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3291992037972745, |
| "eval_accuracy": 0.5907275953859805, |
| "eval_loss": 0.742756724357605, |
| "eval_runtime": 281.9419, |
| "eval_samples_per_second": 160.132, |
| "eval_steps_per_second": 20.018, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3307303628847037, |
| "grad_norm": 12.602340698242188, |
| "learning_rate": 4.173174092788241e-06, |
| "loss": 0.5722, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.3307303628847037, |
| "eval_accuracy": 0.590145030380982, |
| "eval_loss": 0.7571865320205688, |
| "eval_runtime": 279.9038, |
| "eval_samples_per_second": 161.298, |
| "eval_steps_per_second": 20.164, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.3322615219721329, |
| "grad_norm": 18.790254592895508, |
| "learning_rate": 4.169346195069668e-06, |
| "loss": 0.6094, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.3322615219721329, |
| "eval_accuracy": 0.5902217294900222, |
| "eval_loss": 0.7526936531066895, |
| "eval_runtime": 280.4762, |
| "eval_samples_per_second": 160.969, |
| "eval_steps_per_second": 20.123, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.3337926810595621, |
| "grad_norm": 13.405548095703125, |
| "learning_rate": 4.165518297351095e-06, |
| "loss": 0.693, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.3337926810595621, |
| "eval_accuracy": 0.5901581176679307, |
| "eval_loss": 0.7200701832771301, |
| "eval_runtime": 281.5673, |
| "eval_samples_per_second": 160.345, |
| "eval_steps_per_second": 20.045, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.3353238401469913, |
| "grad_norm": 10.354043006896973, |
| "learning_rate": 4.161690399632522e-06, |
| "loss": 0.499, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.3353238401469913, |
| "eval_accuracy": 0.5892896756732774, |
| "eval_loss": 0.721836507320404, |
| "eval_runtime": 279.1391, |
| "eval_samples_per_second": 161.74, |
| "eval_steps_per_second": 20.219, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.33685499923442047, |
| "grad_norm": 8.689166069030762, |
| "learning_rate": 4.157862501913949e-06, |
| "loss": 0.594, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.33685499923442047, |
| "eval_accuracy": 0.5895724296992815, |
| "eval_loss": 0.7207421064376831, |
| "eval_runtime": 279.1316, |
| "eval_samples_per_second": 161.744, |
| "eval_steps_per_second": 20.22, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.33838615832184965, |
| "grad_norm": 12.664347648620605, |
| "learning_rate": 4.154034604195376e-06, |
| "loss": 0.5292, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.33838615832184965, |
| "eval_accuracy": 0.5918439794990127, |
| "eval_loss": 0.7299882173538208, |
| "eval_runtime": 281.451, |
| "eval_samples_per_second": 160.412, |
| "eval_steps_per_second": 20.053, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.33991731740927883, |
| "grad_norm": 14.595951080322266, |
| "learning_rate": 4.150206706476803e-06, |
| "loss": 0.5728, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.33991731740927883, |
| "eval_accuracy": 0.5933771015392805, |
| "eval_loss": 0.7359711527824402, |
| "eval_runtime": 281.6141, |
| "eval_samples_per_second": 160.319, |
| "eval_steps_per_second": 20.042, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.341448476496708, |
| "grad_norm": 16.81365203857422, |
| "learning_rate": 4.14637880875823e-06, |
| "loss": 0.6216, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.341448476496708, |
| "eval_accuracy": 0.5928677563150074, |
| "eval_loss": 0.7266600728034973, |
| "eval_runtime": 281.4751, |
| "eval_samples_per_second": 160.398, |
| "eval_steps_per_second": 20.052, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.3429796355841372, |
| "grad_norm": 9.753067016601562, |
| "learning_rate": 4.1425509110396575e-06, |
| "loss": 0.5759, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.3429796355841372, |
| "eval_accuracy": 0.5927989522519923, |
| "eval_loss": 0.7114787697792053, |
| "eval_runtime": 281.2888, |
| "eval_samples_per_second": 160.504, |
| "eval_steps_per_second": 20.065, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.34451079467156637, |
| "grad_norm": 10.276047706604004, |
| "learning_rate": 4.138723013321084e-06, |
| "loss": 0.621, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.34451079467156637, |
| "eval_accuracy": 0.5948861366360367, |
| "eval_loss": 0.7070339918136597, |
| "eval_runtime": 280.8031, |
| "eval_samples_per_second": 160.782, |
| "eval_steps_per_second": 20.099, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.34604195375899555, |
| "grad_norm": 11.647406578063965, |
| "learning_rate": 4.134895115602512e-06, |
| "loss": 0.6023, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.34604195375899555, |
| "eval_accuracy": 0.5949940087871123, |
| "eval_loss": 0.7148999571800232, |
| "eval_runtime": 280.6819, |
| "eval_samples_per_second": 160.851, |
| "eval_steps_per_second": 20.108, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.3475731128464247, |
| "grad_norm": 9.785872459411621, |
| "learning_rate": 4.131067217883938e-06, |
| "loss": 0.578, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.3475731128464247, |
| "eval_accuracy": 0.59318833174113, |
| "eval_loss": 0.7126178741455078, |
| "eval_runtime": 281.5251, |
| "eval_samples_per_second": 160.369, |
| "eval_steps_per_second": 20.048, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.3491042719338539, |
| "grad_norm": 11.013738632202148, |
| "learning_rate": 4.127239320165366e-06, |
| "loss": 0.5701, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.3491042719338539, |
| "eval_accuracy": 0.5925876549793361, |
| "eval_loss": 0.7025783061981201, |
| "eval_runtime": 278.114, |
| "eval_samples_per_second": 162.336, |
| "eval_steps_per_second": 20.294, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.3506354310212831, |
| "grad_norm": 9.779340744018555, |
| "learning_rate": 4.1234114224467924e-06, |
| "loss": 0.6761, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.3506354310212831, |
| "eval_accuracy": 0.5935654336338203, |
| "eval_loss": 0.6881637573242188, |
| "eval_runtime": 281.0803, |
| "eval_samples_per_second": 160.623, |
| "eval_steps_per_second": 20.08, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.3521665901087123, |
| "grad_norm": 13.62732219696045, |
| "learning_rate": 4.11958352472822e-06, |
| "loss": 0.5771, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3521665901087123, |
| "eval_accuracy": 0.5967165834719911, |
| "eval_loss": 0.6921752691268921, |
| "eval_runtime": 278.472, |
| "eval_samples_per_second": 162.128, |
| "eval_steps_per_second": 20.268, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3536977491961415, |
| "grad_norm": 13.277196884155273, |
| "learning_rate": 4.1157556270096466e-06, |
| "loss": 0.6241, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.3536977491961415, |
| "eval_accuracy": 0.5976616231086658, |
| "eval_loss": 0.6972672939300537, |
| "eval_runtime": 279.3077, |
| "eval_samples_per_second": 161.643, |
| "eval_steps_per_second": 20.207, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.3552289082835707, |
| "grad_norm": 11.036153793334961, |
| "learning_rate": 4.111927729291074e-06, |
| "loss": 0.6102, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.3552289082835707, |
| "eval_accuracy": 0.5959098571555319, |
| "eval_loss": 0.6897289752960205, |
| "eval_runtime": 280.1189, |
| "eval_samples_per_second": 161.174, |
| "eval_steps_per_second": 20.149, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.35676006737099986, |
| "grad_norm": 16.50404167175293, |
| "learning_rate": 4.108099831572501e-06, |
| "loss": 0.5876, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.35676006737099986, |
| "eval_accuracy": 0.595568665720369, |
| "eval_loss": 0.6913372874259949, |
| "eval_runtime": 279.8966, |
| "eval_samples_per_second": 161.302, |
| "eval_steps_per_second": 20.165, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.35829122645842904, |
| "grad_norm": 10.642626762390137, |
| "learning_rate": 4.104271933853927e-06, |
| "loss": 0.651, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.35829122645842904, |
| "eval_accuracy": 0.5946874792133212, |
| "eval_loss": 0.6878921389579773, |
| "eval_runtime": 280.9394, |
| "eval_samples_per_second": 160.704, |
| "eval_steps_per_second": 20.09, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.3598223855458582, |
| "grad_norm": 13.040077209472656, |
| "learning_rate": 4.100444036135355e-06, |
| "loss": 0.5587, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3598223855458582, |
| "eval_accuracy": 0.5935343584281879, |
| "eval_loss": 0.6936639547348022, |
| "eval_runtime": 282.096, |
| "eval_samples_per_second": 160.045, |
| "eval_steps_per_second": 20.007, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3613535446332874, |
| "grad_norm": 10.807535171508789, |
| "learning_rate": 4.0966161384167815e-06, |
| "loss": 0.6514, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.3613535446332874, |
| "eval_accuracy": 0.5977954711792233, |
| "eval_loss": 0.6872532963752747, |
| "eval_runtime": 279.4925, |
| "eval_samples_per_second": 161.536, |
| "eval_steps_per_second": 20.194, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.3628847037207166, |
| "grad_norm": 10.98725700378418, |
| "learning_rate": 4.092788240698209e-06, |
| "loss": 0.6015, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.3628847037207166, |
| "eval_accuracy": 0.5974941789555384, |
| "eval_loss": 0.6847018003463745, |
| "eval_runtime": 281.2012, |
| "eval_samples_per_second": 160.554, |
| "eval_steps_per_second": 20.071, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.36441586280814575, |
| "grad_norm": 12.160524368286133, |
| "learning_rate": 4.088960342979636e-06, |
| "loss": 0.5671, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.36441586280814575, |
| "eval_accuracy": 0.598935344349562, |
| "eval_loss": 0.6907532811164856, |
| "eval_runtime": 281.9981, |
| "eval_samples_per_second": 160.1, |
| "eval_steps_per_second": 20.014, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.36594702189557493, |
| "grad_norm": 12.533185005187988, |
| "learning_rate": 4.085132445261063e-06, |
| "loss": 0.6757, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.36594702189557493, |
| "eval_accuracy": 0.5970619563287769, |
| "eval_loss": 0.6963858008384705, |
| "eval_runtime": 281.702, |
| "eval_samples_per_second": 160.269, |
| "eval_steps_per_second": 20.035, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.3674781809830041, |
| "grad_norm": 11.481986045837402, |
| "learning_rate": 4.08130454754249e-06, |
| "loss": 0.6244, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3674781809830041, |
| "eval_accuracy": 0.5956671480946562, |
| "eval_loss": 0.6951790452003479, |
| "eval_runtime": 281.7788, |
| "eval_samples_per_second": 160.225, |
| "eval_steps_per_second": 20.03, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.36900934007043334, |
| "grad_norm": 15.283388137817383, |
| "learning_rate": 4.077476649823917e-06, |
| "loss": 0.5761, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.36900934007043334, |
| "eval_accuracy": 0.5921616520484068, |
| "eval_loss": 0.7129482626914978, |
| "eval_runtime": 280.9999, |
| "eval_samples_per_second": 160.669, |
| "eval_steps_per_second": 20.085, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.3705404991578625, |
| "grad_norm": 14.590538024902344, |
| "learning_rate": 4.073648752105344e-06, |
| "loss": 0.5847, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.3705404991578625, |
| "eval_accuracy": 0.5910292582142936, |
| "eval_loss": 0.7289432287216187, |
| "eval_runtime": 281.5113, |
| "eval_samples_per_second": 160.377, |
| "eval_steps_per_second": 20.049, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.3720716582452917, |
| "grad_norm": 14.669201850891113, |
| "learning_rate": 4.069820854386771e-06, |
| "loss": 0.5957, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.3720716582452917, |
| "eval_accuracy": 0.5891025356365736, |
| "eval_loss": 0.7361324429512024, |
| "eval_runtime": 278.169, |
| "eval_samples_per_second": 162.304, |
| "eval_steps_per_second": 20.29, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.3736028173327209, |
| "grad_norm": 9.489580154418945, |
| "learning_rate": 4.065992956668198e-06, |
| "loss": 0.5718, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.3736028173327209, |
| "eval_accuracy": 0.5889370209930024, |
| "eval_loss": 0.7279490828514099, |
| "eval_runtime": 277.7775, |
| "eval_samples_per_second": 162.533, |
| "eval_steps_per_second": 20.318, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.37513397642015006, |
| "grad_norm": 15.029380798339844, |
| "learning_rate": 4.0621650589496255e-06, |
| "loss": 0.6081, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.37513397642015006, |
| "eval_accuracy": 0.5909000155289837, |
| "eval_loss": 0.72515469789505, |
| "eval_runtime": 280.1896, |
| "eval_samples_per_second": 161.134, |
| "eval_steps_per_second": 20.143, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.37666513550757924, |
| "grad_norm": 12.974061965942383, |
| "learning_rate": 4.058337161231052e-06, |
| "loss": 0.5805, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.37666513550757924, |
| "eval_accuracy": 0.5923000110950849, |
| "eval_loss": 0.7263885736465454, |
| "eval_runtime": 277.833, |
| "eval_samples_per_second": 162.501, |
| "eval_steps_per_second": 20.314, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.3781962945950084, |
| "grad_norm": 17.26422119140625, |
| "learning_rate": 4.05450926351248e-06, |
| "loss": 0.6574, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.3781962945950084, |
| "eval_accuracy": 0.5921569497769591, |
| "eval_loss": 0.7078375816345215, |
| "eval_runtime": 278.2251, |
| "eval_samples_per_second": 162.271, |
| "eval_steps_per_second": 20.286, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.3797274536824376, |
| "grad_norm": 13.827315330505371, |
| "learning_rate": 4.050681365793906e-06, |
| "loss": 0.6347, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.3797274536824376, |
| "eval_accuracy": 0.5945813901843215, |
| "eval_loss": 0.700303316116333, |
| "eval_runtime": 280.284, |
| "eval_samples_per_second": 161.079, |
| "eval_steps_per_second": 20.137, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.3812586127698668, |
| "grad_norm": 12.102642059326172, |
| "learning_rate": 4.046853468075334e-06, |
| "loss": 0.6385, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.3812586127698668, |
| "eval_accuracy": 0.5984858576439768, |
| "eval_loss": 0.6862630844116211, |
| "eval_runtime": 277.6537, |
| "eval_samples_per_second": 162.605, |
| "eval_steps_per_second": 20.327, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.38278977185729596, |
| "grad_norm": 8.007050514221191, |
| "learning_rate": 4.04302557035676e-06, |
| "loss": 0.5878, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.38278977185729596, |
| "eval_accuracy": 0.6000088768558177, |
| "eval_loss": 0.6816014647483826, |
| "eval_runtime": 278.6731, |
| "eval_samples_per_second": 162.011, |
| "eval_steps_per_second": 20.253, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 13062, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|